feat(skins): add bunnny — barbie-pink coquette theme ♡

Adds a built-in 'bunnny' skin preset with a hot-pink coquette palette: - Hot pink (#FF3366) borders with Barbie-pink (#FF69B4) accents - Lavender-blush (#FFF0F5) text on deep-plum (#2A0E1E) surfaces - Coquette spinner verbs (sparkling, twirling, tying a little bow) - Heart/sparkle/flower spinner faces (♡ ✧ ✿ ❀ ෆ) - Heart (♡) prompt symbol and tool prefix - (ﾉ◕ヮ◕)ﾉ*:･ﾟ✧ kaomoji in welcome + help header - Custom HERMES <3 banner_logo in pink gradient - banner_hero of twin coquette bunnies holding paws, framed with floating sparkles, hearts, and flowers to fill the banner width Skin is cosmetic only — agent_name stays 'Hermes Agent'. Adds entry to the skins.md docs table and ignores .venv/ in .gitignore.
feat(review): active-update bias, loaded-skill-first, support-file variants (#17213 )
2026-04-29 19:23:31 -05:00 · 2026-04-28 21:11:48 -07:00 · 2026-04-28 21:04:35 -07:00 · 2026-04-28 20:22:44 -07:00 · 2026-04-28 22:21:44 -05:00 · 2026-04-28 22:18:26 -05:00
450 changed files with 40638 additions and 3597 deletions
@@ -5,7 +5,9 @@

 # Dependencies
 node_modules
+**/node_modules
 .venv
+**/.venv

 # CI/CD
 .github
@@ -13,7 +13,7 @@ concurrency:
  cancel-in-progress: true

 jobs:
-  check:
+  nix-lockfile-check:
    runs-on: ubuntu-latest
    timeout-minutes: 20
    steps:
@@ -36,6 +36,12 @@ jobs:
          LINK_SHA: ${{ steps.sha.outputs.full }}
        run: nix run .#fix-lockfiles -- --check

+      - name: Fail if check crashed without reporting
+        if: steps.check.outputs.stale != 'true' && steps.check.outputs.stale != 'false'
+        run: |
+          echo "::error::fix-lockfiles exited without reporting stale status — likely an infrastructure or script failure"
+          exit 1
+
      - name: Post sticky PR comment (stale)
        if: steps.check.outputs.stale == 'true' && github.event_name == 'pull_request'
        uses: marocchino/sticky-pull-request-comment@52423e01640425a022ef5fd42c6fb5f633a02728  # v2.9.1
@@ -1,6 +1,13 @@
 name: Nix Lockfile Fix

 on:
+  push:
+    branches: [main]
+    paths:
+      - 'ui-tui/package-lock.json'
+      - 'ui-tui/package.json'
+      - 'web/package-lock.json'
+      - 'web/package.json'
  workflow_dispatch:
    inputs:
      pr_number:
@@ -19,9 +26,103 @@ concurrency:
  cancel-in-progress: false

 jobs:
+  # ── Auto-fix on main ───────────────────────────────────────────────
+  # Fires when a push to main touches package.json or package-lock.json
+  # in ui-tui/ or web/. Runs fix-lockfiles --apply and pushes the hash
+  # update commit directly to main so Nix builds never stay broken.
+  #
+  # Safety invariants:
+  #   1. The fix commit only touches nix/*.nix files, which are NOT in
+  #      the paths filter above, so this cannot re-trigger itself.
+  #   2. An explicit file-whitelist check before commit aborts if
+  #      fix-lockfiles ever modifies unexpected files.
+  #   3. Job-level concurrency with cancel-in-progress: true ensures
+  #      back-to-back pushes collapse to the newest; ref: main checkout
+  #      always operates on the latest branch state.
+  #   4. Uses a GitHub App token (not GITHUB_TOKEN) so the fix commit
+  #      triggers downstream nix.yml verification.
+  auto-fix-main:
+    if: github.event_name == 'push'
+    runs-on: ubuntu-latest
+    timeout-minutes: 25
+    concurrency:
+      group: auto-fix-main
+      cancel-in-progress: true
+    steps:
+      - name: Generate GitHub App token
+        id: app-token
+        uses: actions/create-github-app-token@7bfa3a4717ef143a604ee0a99d859b8886a96d00  # v1.9.3
+        with:
+          app-id: ${{ secrets.APP_ID }}
+          private-key: ${{ secrets.APP_PRIVATE_KEY }}
+
+      - uses: actions/checkout@34e114876b0b11c390a56381ad16ebd13914f8d5  # v4
+        with:
+          ref: main
+          token: ${{ steps.app-token.outputs.token }}
+
+      - uses: ./.github/actions/nix-setup
+
+      - name: Apply lockfile hashes
+        id: apply
+        run: nix run .#fix-lockfiles -- --apply
+
+      - name: Commit & push
+        if: steps.apply.outputs.changed == 'true'
+        shell: bash
+        run: |
+          set -euo pipefail
+
+          # Ensure only nix files were modified — prevents accidental
+          # self-triggering if fix-lockfiles ever touches package files.
+          unexpected="$(git diff --name-only | grep -Ev '^nix/(tui|web)\.nix$' || true)"
+          if [ -n "$unexpected" ]; then
+            echo "::error::Unexpected modified files: $unexpected"
+            exit 1
+          fi
+
+          # Record the base SHA before committing — used to detect package
+          # file changes if we need to rebase after a non-fast-forward push.
+          BASE_SHA="$(git rev-parse HEAD)"
+
+          git config user.name 'github-actions[bot]'
+          git config user.email '41898282+github-actions[bot]@users.noreply.github.com'
+          git add nix/tui.nix nix/web.nix
+          git commit -m "fix(nix): auto-refresh npm lockfile hashes" \
+            -m "Source: $GITHUB_SHA" \
+            -m "Run: $GITHUB_SERVER_URL/$GITHUB_REPOSITORY/actions/runs/$GITHUB_RUN_ID"
+
+          # Retry push with rebase in case main advanced with an unrelated
+          # commit during the nix build. Without this, a non-fast-forward
+          # rejection silently loses the fix. If package files changed during
+          # the rebase, abort — a fresh auto-fix run will handle the new state.
+          for attempt in 1 2 3; do
+            if git push origin HEAD:main; then
+              exit 0
+            fi
+            echo "::warning::Push attempt $attempt failed (non-fast-forward?), rebasing…"
+            git fetch origin main
+
+            # If package files changed between our base and the new main,
+            # our computed hashes are stale. Abort and let the next triggered
+            # run recompute from the correct package-lock state.
+            pkg_changed="$(git diff --name-only "$BASE_SHA"..origin/main -- \
+              'ui-tui/package-lock.json' 'ui-tui/package.json' \
+              'web/package-lock.json' 'web/package.json' || true)"
+            if [ -n "$pkg_changed" ]; then
+              echo "::warning::Package files changed since hash computation — aborting; a fresh run will recompute"
+              exit 0
+            fi
+
+            git rebase origin/main
+          done
+          echo "::error::Failed to push after 3 rebase attempts"
+          exit 1
+
+  # ── PR fix (manual / checkbox) ─────────────────────────────────────
+  # Existing behavior: run on manual dispatch OR when a task-list
+  # checkbox in the sticky lockfile-check comment flips from [ ] to [x].
  fix:
-    # Run on manual dispatch OR when a task-list checkbox in the sticky
-    # lockfile-check comment flips from `[ ]` to `[x]`.
    if: |
      github.event_name == 'workflow_dispatch' ||
      (github.event_name == 'issue_comment'
@@ -69,3 +69,5 @@ mini-swe-agent/
 .nix-stamps/
 result
 website/static/api/skills-index.json
+models-dev-upstream/
+.venv
@@ -38,7 +38,7 @@ hermes-agent/
 │   │                     #   homeassistant, signal, matrix, mattermost, email, sms,
 │   │                     #   dingtalk, wecom, weixin, feishu, qqbot, bluebubbles,
 │   │                     #   webhook, api_server, ...). See ADDING_A_PLATFORM.md.
-│   └── builtin_hooks/    # Always-registered gateway hooks (boot-md, ...)
+│   └── builtin_hooks/    # Extension point for always-registered gateway hooks (none shipped)
 ├── plugins/              # Plugin system (see "Plugins" section below)
 │   ├── memory/           # Memory-provider plugins (honcho, mem0, supermemory, ...)
 │   ├── context_engine/   # Context-engine plugins
@@ -494,7 +494,7 @@ branding:
  agent_name: "My Agent"
  welcome: "Welcome message"
  response_label: " ⚔ Agent "
-  prompt_symbol: "⚔ ❯ "
+  prompt_symbol: "⚔"

 tool_prefix: "╎"             # Tool output line prefix
 ```
@@ -14,7 +14,7 @@ ENV PLAYWRIGHT_BROWSERS_PATH=/opt/hermes/.playwright
 # that would otherwise accumulate when hermes runs as PID 1. See #15012.
 RUN apt-get update && \
    apt-get install -y --no-install-recommends \
-        build-essential nodejs npm python3 ripgrep ffmpeg gcc python3-dev libffi-dev procps git openssh-client docker-cli tini && \
+    build-essential nodejs npm python3 ripgrep ffmpeg gcc python3-dev libffi-dev procps git openssh-client docker-cli tini && \
    rm -rf /var/lib/apt/lists/*

 # Non-root user for runtime; UID can be overridden via HERMES_UID at runtime
@@ -30,18 +30,28 @@ WORKDIR /opt/hermes
 # unless the lockfiles themselves change.
 COPY package.json package-lock.json ./
 COPY web/package.json web/package-lock.json web/
+COPY ui-tui/package.json ui-tui/package-lock.json ui-tui/
+COPY ui-tui/packages/hermes-ink/package.json ui-tui/packages/hermes-ink/package-lock.json ui-tui/packages/hermes-ink/

 RUN npm install --prefer-offline --no-audit && \
    npx playwright install --with-deps chromium --only-shell && \
    (cd web && npm install --prefer-offline --no-audit) && \
+    (cd ui-tui && npm install --prefer-offline --no-audit) && \
    npm cache clean --force

 # ---------- Source code ----------
 # .dockerignore excludes node_modules, so the installs above survive.
 COPY --chown=hermes:hermes . .

-# Build web dashboard (Vite outputs to hermes_cli/web_dist/)
-RUN cd web && npm run build
+# Build browser dashboard and terminal UI assets.
+RUN cd web && npm run build && \
+    cd ../ui-tui && npm run build && \
+    rm -rf node_modules/@hermes/ink && \
+    rm -rf packages/hermes-ink/node_modules && \
+    cp -R packages/hermes-ink node_modules/@hermes/ink && \
+    npm install --omit=dev --prefer-offline --no-audit --prefix node_modules/@hermes/ink && \
+    rm -rf node_modules/@hermes/ink/node_modules/react && \
+    node --input-type=module -e "await import('@hermes/ink')"

 # ---------- Permissions ----------
 # Make install dir world-readable so any HERMES_UID can read it at runtime.
@@ -112,6 +112,17 @@ def main() -> None:
    import acp
    from .server import HermesACPAgent

+    # MCP tool discovery from config.yaml — run before asyncio.run() so
+    # it's safe to use blocking waits.  (ACP also registers per-session
+    # MCP servers dynamically via asyncio.to_thread inside the event
+    # loop; that path is unaffected.)  Moved from model_tools.py module
+    # scope to avoid freezing the gateway's loop on lazy import (#16856).
+    try:
+        from tools.mcp_tool import discover_mcp_tools
+        discover_mcp_tools()
+    except Exception:
+        logger.debug("MCP tool discovery failed at ACP startup", exc_info=True)
+
    agent = HermesACPAgent()
    try:
        asyncio.run(acp.run_agent(agent, use_unstable_protocol=True))
@@ -3,6 +3,7 @@
 from __future__ import annotations

 import asyncio
+import contextvars
 import logging
 import os
 from collections import defaultdict, deque
@@ -574,6 +575,22 @@ class HermesACPAgent(acp.Agent):

        def _run_agent() -> dict:
            nonlocal previous_approval_cb, previous_interactive
+            # Bind HERMES_SESSION_KEY for this session so per-session caches
+            # (e.g. the interactive sudo password cache in tools.terminal_tool)
+            # scope to the ACP session rather than leaking across sessions
+            # that land on the same reused executor thread. This call runs
+            # inside a contextvars.copy_context() below, so the ContextVar
+            # write is isolated from other concurrent ACP sessions.
+            try:
+                from gateway.session_context import (
+                    clear_session_vars,
+                    set_session_vars,
+                )
+                session_tokens = set_session_vars(session_key=session_id)
+            except Exception:
+                session_tokens = None
+                clear_session_vars = None  # type: ignore[assignment]
+                logger.debug("Could not set ACP session context", exc_info=True)
            if approval_cb:
                try:
                    from tools import terminal_tool as _terminal_tool
@@ -607,9 +624,19 @@ class HermesACPAgent(acp.Agent):
                        _terminal_tool.set_approval_callback(previous_approval_cb)
                    except Exception:
                        logger.debug("Could not restore approval callback", exc_info=True)
+                if session_tokens is not None and clear_session_vars is not None:
+                    try:
+                        clear_session_vars(session_tokens)
+                    except Exception:
+                        logger.debug("Could not clear ACP session context", exc_info=True)

        try:
-            result = await loop.run_in_executor(_executor, _run_agent)
+            # Wrap the executor call in a fresh copy of the current context so
+            # concurrent ACP sessions on the shared ThreadPoolExecutor don't
+            # stomp on each other's ContextVar writes (HERMES_SESSION_KEY in
+            # particular — used by the interactive sudo password cache scope).
+            ctx = contextvars.copy_context()
+            result = await loop.run_in_executor(_executor, ctx.run, _run_agent)
        except Exception:
            logger.exception("Executor error for session %s", session_id)
            return PromptResponse(stop_reason="end_turn")
@@ -22,10 +22,25 @@ from hermes_constants import get_hermes_home
 from typing import Any, Dict, List, Optional, Tuple
 from utils import normalize_proxy_env_vars

-try:
-    import anthropic as _anthropic_sdk
-except ImportError:
-    _anthropic_sdk = None  # type: ignore[assignment]
+# NOTE: `import anthropic` is deliberately NOT at module top — the SDK pulls
+# ~220 ms of imports (anthropic.types, anthropic.lib.tools._beta_runner, etc.)
+# and the 3 usage sites (build_anthropic_client, build_anthropic_bedrock_client,
+# read_claude_code_credentials_from_keychain) are all on cold user-triggered
+# paths. Access via the `_get_anthropic_sdk()` accessor below, which caches
+# the module after the first call and returns None on ImportError.
+_anthropic_sdk: Any = ...  # sentinel — None means "tried and missing"
+
+
+def _get_anthropic_sdk():
+    """Return the ``anthropic`` SDK module, importing lazily. None if not installed."""
+    global _anthropic_sdk
+    if _anthropic_sdk is ...:
+        try:
+            import anthropic as _sdk
+            _anthropic_sdk = _sdk
+        except ImportError:
+            _anthropic_sdk = None
+    return _anthropic_sdk

 logger = logging.getLogger(__name__)

@@ -202,19 +217,33 @@ def _forbids_sampling_params(model: str) -> bool:


 # Beta headers for enhanced features (sent with ALL auth types).
-# As of Opus 4.7 (2026-04-16), both of these are GA on Claude 4.6+ — the
+# As of Opus 4.7 (2026-04-16), the first two are GA on Claude 4.6+ — the
 # beta headers are still accepted (harmless no-op) but not required. Kept
 # here so older Claude (4.5, 4.1) + third-party Anthropic-compat endpoints
 # that still gate on the headers continue to get the enhanced features.
-# Migration guide: remove these if you no longer support ≤4.5 models.
+#
+# ``context-1m-2025-08-07`` unlocks the 1M context window on Claude Opus 4.6/4.7
+# and Sonnet 4.6 when served via AWS Bedrock or Azure AI Foundry. 1M is GA on
+# native Anthropic (api.anthropic.com) for Opus 4.6+, but Bedrock/Azure still
+# gate it behind this beta header as of 2026-04 — without it Bedrock caps Opus
+# at 200K even though model_metadata.py advertises 1M. The header is a harmless
+# no-op on endpoints where 1M is GA.
+#
+# Migration guide: remove these if you no longer support ≤4.5 models or once
+# Bedrock/Azure promote 1M to GA.
 _COMMON_BETAS = [
    "interleaved-thinking-2025-05-14",
    "fine-grained-tool-streaming-2025-05-14",
+    "context-1m-2025-08-07",
 ]
 # MiniMax's Anthropic-compatible endpoints fail tool-use requests when
 # the fine-grained tool streaming beta is present.  Omit it so tool calls
 # fall back to the provider's default response path.
 _TOOL_STREAMING_BETA = "fine-grained-tool-streaming-2025-05-14"
+# 1M context beta — see comment on _COMMON_BETAS above. Stripped for
+# Bearer-auth (MiniMax) endpoints since they host their own models and
+# unknown Anthropic beta headers risk request rejection.
+_CONTEXT_1M_BETA = "context-1m-2025-08-07"

 # Fast mode beta — enables the ``speed: "fast"`` request parameter for
 # significantly higher output token throughput on Opus 4.6 (~2.5x).
@@ -228,10 +257,11 @@ _OAUTH_ONLY_BETAS = [
    "oauth-2025-04-20",
 ]

-# Claude Code identity — required for OAuth requests to be routed correctly.
-# Without these, Anthropic's infrastructure intermittently 500s OAuth traffic.
-# The version must stay reasonably current — Anthropic rejects OAuth requests
-# when the spoofed user-agent version is too far behind the actual release.
+# Claude Code version — sent on OAuth token-exchange / refresh requests
+# (platform.claude.com/v1/oauth/token) as the client's user-agent. Anthropic's
+# OAuth flow validates the UA and may reject requests with a version that's
+# too old, so detecting dynamically keeps users on a current Claude Code
+# install from hitting stale-version errors during login/refresh.
 _CLAUDE_CODE_VERSION_FALLBACK = "2.1.74"
 _claude_code_version_cache: Optional[str] = None

@@ -239,9 +269,9 @@ _claude_code_version_cache: Optional[str] = None
 def _detect_claude_code_version() -> str:
    """Detect the installed Claude Code version, fall back to a static constant.

-    Anthropic's OAuth infrastructure validates the user-agent version and may
-    reject requests with a version that's too old.  Detecting dynamically means
-    users who keep Claude Code updated never hit stale-version 400s.
+    Used only by the OAuth token-exchange / refresh flow
+    (``platform.claude.com/v1/oauth/token``). The Messages API client no
+    longer sends a claude-cli user-agent.
    """
    import subprocess as _sp

@@ -261,12 +291,13 @@ def _detect_claude_code_version() -> str:
    return _CLAUDE_CODE_VERSION_FALLBACK


-_CLAUDE_CODE_SYSTEM_PREFIX = "You are Claude Code, Anthropic's official CLI for Claude."
-_MCP_TOOL_PREFIX = "mcp_"
-
-
 def _get_claude_code_version() -> str:
-    """Lazily detect the installed Claude Code version when OAuth headers need it."""
+    """Lazily detect the installed Claude Code version for OAuth flow headers.
+
+    Used only on the OAuth token-exchange and refresh endpoints
+    (``platform.claude.com/v1/oauth/token``). The Messages API client does
+    not send a claude-cli user-agent.
+    """
    global _claude_code_version_cache
    if _claude_code_version_cache is None:
        _claude_code_version_cache = _detect_claude_code_version()
@@ -357,9 +388,14 @@ def _common_betas_for_base_url(base_url: str | None) -> list[str]:
    that include Anthropic's ``fine-grained-tool-streaming`` beta — every
    tool-use message triggers a connection error.  Strip that beta for
    Bearer-auth endpoints while keeping all other betas intact.
+
+    The ``context-1m-2025-08-07`` beta is also stripped for Bearer-auth
+    endpoints — MiniMax hosts its own models, not Claude, so the header is
+    irrelevant at best and risks request rejection at worst.
    """
    if _requires_bearer_auth(base_url):
-        return [b for b in _COMMON_BETAS if b != _TOOL_STREAMING_BETA]
+        _stripped = {_TOOL_STREAMING_BETA, _CONTEXT_1M_BETA}
+        return [b for b in _COMMON_BETAS if b not in _stripped]
    return _COMMON_BETAS


@@ -374,6 +410,7 @@ def build_anthropic_client(api_key: str, base_url: str = None, timeout: float =

    Returns an anthropic.Anthropic instance.
    """
+    _anthropic_sdk = _get_anthropic_sdk()
    if _anthropic_sdk is None:
        raise ImportError(
            "The 'anthropic' package is required for the Anthropic provider. "
@@ -430,15 +467,21 @@ def build_anthropic_client(api_key: str, base_url: str = None, timeout: float =
        if common_betas:
            kwargs["default_headers"] = {"anthropic-beta": ",".join(common_betas)}
    elif _is_oauth_token(api_key):
-        # OAuth access token / setup-token → Bearer auth + Claude Code identity.
-        # Anthropic routes OAuth requests based on user-agent and headers;
-        # without Claude Code's fingerprint, requests get intermittent 500s.
-        all_betas = common_betas + _OAUTH_ONLY_BETAS
+        # OAuth access token / setup-token → Bearer auth + OAuth-only betas.
+        # The OAuth-specific beta headers are still required by Anthropic's
+        # OAuth-gated Messages API path; the Claude Code user-agent / x-app
+        # spoofing is deliberately NOT sent — Hermes identifies as itself.
+        #
+        # ``context-1m-2025-08-07`` is stripped here: Anthropic rejects
+        # OAuth requests that carry it with
+        #   "This authentication style is incompatible with the long
+        #    context beta header."
+        # Subscription-gated OAuth traffic gets the 200K default window.
+        oauth_safe_common = [b for b in common_betas if b != _CONTEXT_1M_BETA]
+        all_betas = oauth_safe_common + _OAUTH_ONLY_BETAS
        kwargs["auth_token"] = api_key
        kwargs["default_headers"] = {
            "anthropic-beta": ",".join(all_betas),
-            "user-agent": f"claude-cli/{_get_claude_code_version()} (external, cli)",
-            "x-app": "cli",
        }
    else:
        # Regular API key → x-api-key header + common betas
@@ -456,8 +499,16 @@ def build_anthropic_bedrock_client(region: str):
    Claude feature parity: prompt caching, thinking budgets, adaptive
    thinking, fast mode — features not available via the Converse API.

+    Attaches the common Anthropic beta headers as client-level defaults so
+    that Bedrock-hosted Claude models get the same enhanced features as
+    native Anthropic. The ``context-1m-2025-08-07`` beta in particular
+    unlocks the 1M context window for Opus 4.6/4.7 on Bedrock — without
+    it, Bedrock caps these models at 200K even though the Anthropic API
+    serves them with 1M natively.
+
    Auth uses the boto3 default credential chain (IAM roles, SSO, env vars).
    """
+    _anthropic_sdk = _get_anthropic_sdk()
    if _anthropic_sdk is None:
        raise ImportError(
            "The 'anthropic' package is required for the Bedrock provider. "
@@ -473,6 +524,7 @@ def build_anthropic_bedrock_client(region: str):
    return _anthropic_sdk.AnthropicBedrock(
        aws_region=region,
        timeout=Timeout(timeout=900.0, connect=10.0),
+        default_headers={"anthropic-beta": ",".join(_COMMON_BETAS)},
    )


@@ -488,9 +540,6 @@ def _read_claude_code_credentials_from_keychain() -> Optional[Dict[str, Any]]:

    Returns dict with {accessToken, refreshToken?, expiresAt?} or None.
    """
-    import platform
-    import subprocess
-
    if platform.system() != "Darwin":
        return None

@@ -776,17 +825,45 @@ def resolve_anthropic_token() -> Optional[str]:
    """Resolve an Anthropic token from all available sources.

    Priority:
-      1. ANTHROPIC_TOKEN env var (OAuth/setup token saved by Hermes)
-      2. CLAUDE_CODE_OAUTH_TOKEN env var
-      3. Claude Code credentials (~/.claude.json or ~/.claude/.credentials.json)
+      1. Hermes credential pool (``~/.hermes/auth.json`` →
+         ``credential_pool.anthropic``) — OAuth tokens minted by Hermes'
+         own PKCE login flow. Entries are auto-refreshed when near
+         expiry. Env-sourced pool entries (``source="env:..."``) are
+         skipped here so the env-var priority logic below still runs.
+      2. ANTHROPIC_TOKEN env var (OAuth/setup token saved by Hermes)
+      3. CLAUDE_CODE_OAUTH_TOKEN env var
+      4. Claude Code credentials (~/.claude.json or ~/.claude/.credentials.json)
         — with automatic refresh if expired and a refresh token is available
-      4. ANTHROPIC_API_KEY env var (regular API key, or legacy fallback)
+      5. ANTHROPIC_API_KEY env var (regular API key, or legacy fallback)

    Returns the token string or None.
    """
+    # 1. Hermes credential pool — the live source of truth for tokens
+    #    minted via ``hermes login anthropic`` / the dashboard PKCE flow.
+    #    ``select()`` picks the best available entry and refreshes it if
+    #    it's near expiry, so callers always get a fresh token.
+    #
+    #    Skip env-sourced pool entries (``env:ANTHROPIC_TOKEN``, etc.) —
+    #    those are passthroughs of the env var, and the env-var branches
+    #    below have richer priority logic (``_prefer_refreshable_claude_code_token``)
+    #    that can upgrade a static env OAuth token to a refreshed
+    #    Claude Code token. Letting the pool win here would short-circuit
+    #    that upgrade.
+    try:
+        from agent.credential_pool import load_pool
+        pool = load_pool("anthropic")
+        entry = pool.select()
+        if entry and entry.access_token and not entry.source.startswith("env:"):
+            return entry.access_token
+    except Exception as exc:
+        # Pool lookup is best-effort — fall through to env/file sources
+        # if anything goes wrong (e.g. auth.json corruption during a
+        # concurrent write).
+        logger.debug("Credential-pool lookup failed for anthropic: %s", exc)
+
    creds = read_claude_code_credentials()

-    # 1. Hermes-managed OAuth/setup token env var
+    # 2. Hermes-managed OAuth/setup token env var
    token = os.getenv("ANTHROPIC_TOKEN", "").strip()
    if token:
        preferred = _prefer_refreshable_claude_code_token(token, creds)
@@ -794,7 +871,7 @@ def resolve_anthropic_token() -> Optional[str]:
            return preferred
        return token

-    # 2. CLAUDE_CODE_OAUTH_TOKEN (used by Claude Code for setup-tokens)
+    # 3. CLAUDE_CODE_OAUTH_TOKEN (used by Claude Code for setup-tokens)
    cc_token = os.getenv("CLAUDE_CODE_OAUTH_TOKEN", "").strip()
    if cc_token:
        preferred = _prefer_refreshable_claude_code_token(cc_token, creds)
@@ -802,12 +879,12 @@ def resolve_anthropic_token() -> Optional[str]:
            return preferred
        return cc_token

-    # 3. Claude Code credential file
+    # 4. Claude Code credential file
    resolved_claude_token = _resolve_claude_code_token_from_credentials(creds)
    if resolved_claude_token:
        return resolved_claude_token

-    # 4. Regular API key, or a legacy OAuth token saved in ANTHROPIC_API_KEY.
+    # 5. Regular API key, or a legacy OAuth token saved in ANTHROPIC_API_KEY.
    # This remains as a compatibility fallback for pre-migration Hermes configs.
    api_key = os.getenv("ANTHROPIC_API_KEY", "").strip()
    if api_key:
@@ -1054,6 +1131,33 @@ def _sanitize_tool_id(tool_id: str) -> str:
    return sanitized or "tool_0"


+def _normalize_tool_input_schema(schema: Any) -> Dict[str, Any]:
+    """Normalize tool schemas before sending them to Anthropic.
+
+    Anthropic's tool schema validator rejects nullable unions such as
+    ``anyOf: [{"type": "string"}, {"type": "null"}]`` that Pydantic/MCP
+    commonly emits for optional fields. Tool optionality is represented by
+    the parent ``required`` array, so we delegate to the shared
+    ``strip_nullable_unions`` helper to collapse nullable unions to the
+    non-null branch while preserving metadata like description/default.
+
+    ``keep_nullable_hint=False`` because the Anthropic validator does not
+    recognize the OpenAPI-style ``nullable: true`` extension and strict
+    schema-to-grammar converters may reject unknown keywords.
+    """
+    if not schema:
+        return {"type": "object", "properties": {}}
+
+    from tools.schema_sanitizer import strip_nullable_unions
+
+    normalized = strip_nullable_unions(schema, keep_nullable_hint=False)
+    if not isinstance(normalized, dict):
+        return {"type": "object", "properties": {}}
+    if normalized.get("type") == "object" and not isinstance(normalized.get("properties"), dict):
+        normalized = {**normalized, "properties": {}}
+    return normalized
+
+
 def convert_tools_to_anthropic(tools: List[Dict]) -> List[Dict]:
    """Convert OpenAI tool definitions to Anthropic format."""
    if not tools:
@@ -1064,7 +1168,9 @@ def convert_tools_to_anthropic(tools: List[Dict]) -> List[Dict]:
        result.append({
            "name": fn.get("name", ""),
            "description": fn.get("description", ""),
-            "input_schema": fn.get("parameters", {"type": "object", "properties": {}}),
+            "input_schema": _normalize_tool_input_schema(
+                fn.get("parameters", {"type": "object", "properties": {}})
+            ),
        })
    return result

@@ -1543,8 +1649,10 @@ def build_anthropic_kwargs(
    "max_tokens too large given prompt" errors and retry with a smaller cap
    (see parse_available_output_tokens_from_error + _ephemeral_max_output_tokens).

-    When *is_oauth* is True, applies Claude Code compatibility transforms:
-    system prompt prefix, tool name prefixing, and prompt sanitization.
+    When *is_oauth* is True, enables the OAuth-only beta headers required by
+    Anthropic's subscription-gated Messages endpoint (fast-mode branch only;
+    the default headers are set by build_anthropic_client). No system-prompt
+    or tool-name rewriting is performed — Hermes identifies as itself.

    When *preserve_dots* is True, model name dots are not converted to hyphens
    (for Alibaba/DashScope anthropic-compatible endpoints: qwen3.5-plus).
@@ -1577,45 +1685,11 @@ def build_anthropic_kwargs(
    if context_length and effective_max_tokens > context_length:
        effective_max_tokens = max(context_length - 1, 1)

-    # ── OAuth: Claude Code identity ──────────────────────────────────
-    if is_oauth:
-        # 1. Prepend Claude Code system prompt identity
-        cc_block = {"type": "text", "text": _CLAUDE_CODE_SYSTEM_PREFIX}
-        if isinstance(system, list):
-            system = [cc_block] + system
-        elif isinstance(system, str) and system:
-            system = [cc_block, {"type": "text", "text": system}]
-        else:
-            system = [cc_block]
-
-        # 2. Sanitize system prompt — replace product name references
-        #    to avoid Anthropic's server-side content filters.
-        for block in system:
-            if isinstance(block, dict) and block.get("type") == "text":
-                text = block.get("text", "")
-                text = text.replace("Hermes Agent", "Claude Code")
-                text = text.replace("Hermes agent", "Claude Code")
-                text = text.replace("hermes-agent", "claude-code")
-                text = text.replace("Nous Research", "Anthropic")
-                block["text"] = text
-
-        # 3. Prefix tool names with mcp_ (Claude Code convention)
-        if anthropic_tools:
-            for tool in anthropic_tools:
-                if "name" in tool:
-                    tool["name"] = _MCP_TOOL_PREFIX + tool["name"]
-
-        # 4. Prefix tool names in message history (tool_use and tool_result blocks)
-        for msg in anthropic_messages:
-            content = msg.get("content")
-            if isinstance(content, list):
-                for block in content:
-                    if isinstance(block, dict):
-                        if block.get("type") == "tool_use" and "name" in block:
-                            if not block["name"].startswith(_MCP_TOOL_PREFIX):
-                                block["name"] = _MCP_TOOL_PREFIX + block["name"]
-                        elif block.get("type") == "tool_result" and "tool_use_id" in block:
-                            pass  # tool_result uses ID, not name
+    # OAuth requests go through Anthropic's subscription-gated Messages
+    # endpoint but otherwise send the real Hermes system prompt and real
+    # Hermes tool names — the only OAuth-specific wire differences are
+    # Bearer auth and the _OAUTH_ONLY_BETAS header (applied in
+    # build_anthropic_client and the fast-mode branch below).

    kwargs: Dict[str, Any] = {
        "model": model,
@@ -1706,6 +1780,9 @@ def build_anthropic_kwargs(
        # extra_headers override the client-level anthropic-beta header).
        betas = list(_common_betas_for_base_url(base_url))
        if is_oauth:
+            # Strip context-1m — incompatible with OAuth auth. See matching
+            # comment in build_anthropic_client().
+            betas = [b for b in betas if b != _CONTEXT_1M_BETA]
            betas.extend(_OAUTH_ONLY_BETAS)
        betas.append(_FAST_MODE_BETA)
        kwargs["extra_headers"] = {"anthropic-beta": ",".join(betas)}
@@ -41,10 +41,57 @@ import threading
 import time
 from pathlib import Path  # noqa: F401 — used by test mocks
 from types import SimpleNamespace
-from typing import Any, Dict, List, Optional, Tuple
+from typing import Any, Dict, List, Optional, Tuple, TYPE_CHECKING
 from urllib.parse import urlparse, parse_qs, urlunparse

-from openai import OpenAI
+# NOTE: `from openai import OpenAI` is deliberately NOT at module top — the
+# openai SDK pulls a large type tree (~240 ms cold, including responses/*,
+# graders/*). We expose `OpenAI` here as a thin proxy that imports the SDK on
+# first call and forwards, so:
+#   (a) the 15+ in-module `OpenAI(...)` construction sites work unchanged
+#       (Python's function-scope name lookup resolves `OpenAI` to the proxy
+#       object bound in module globals here, without triggering any import);
+#   (b) external code can still do `auxiliary_client.OpenAI` or
+#       `patch("agent.auxiliary_client.OpenAI", ...)` — tests see the proxy,
+#       and patch replaces the module attribute as usual;
+#   (c) `OpenAI` as a type annotation resolves at runtime to the proxy class
+#       (which is harmless — annotations aren't type-checked at runtime).
+# See tests/agent/test_auxiliary_client.py for patch patterns this supports.
+if TYPE_CHECKING:
+    from openai import OpenAI  # noqa: F401 — type hints only
+
+_OPENAI_CLS_CACHE: Optional[type] = None
+
+
+def _load_openai_cls() -> type:
+    """Import and cache ``openai.OpenAI``."""
+    global _OPENAI_CLS_CACHE
+    if _OPENAI_CLS_CACHE is None:
+        from openai import OpenAI as _cls
+        _OPENAI_CLS_CACHE = _cls
+    return _OPENAI_CLS_CACHE
+
+
+class _OpenAIProxy:
+    """Module-level proxy that looks like the ``openai.OpenAI`` class.
+
+    Forwards ``OpenAI(...)`` calls and ``isinstance(x, OpenAI)`` checks to the
+    real SDK class, importing the SDK lazily on first use.
+    """
+
+    __slots__ = ()
+
+    def __call__(self, *args, **kwargs):
+        return _load_openai_cls()(*args, **kwargs)
+
+    def __instancecheck__(self, obj):
+        return isinstance(obj, _load_openai_cls())
+
+    def __repr__(self):
+        return "<lazy openai.OpenAI proxy>"
+
+
+OpenAI = _OpenAIProxy()  # module-level name, resolves lazily on call/isinstance

 from agent.credential_pool import load_pool
 from hermes_cli.config import get_hermes_home
@@ -82,6 +129,8 @@ _PROVIDER_ALIASES = {
    "moonshot": "kimi-coding",
    "kimi-cn": "kimi-coding-cn",
    "moonshot-cn": "kimi-coding-cn",
+    "gmi-cloud": "gmi",
+    "gmicloud": "gmi",
    "minimax-china": "minimax-cn",
    "minimax_cn": "minimax-cn",
    "claude": "anthropic",
@@ -92,6 +141,10 @@ _PROVIDER_ALIASES = {
    "github-models": "copilot",
    "github-copilot-acp": "copilot-acp",
    "copilot-acp-agent": "copilot-acp",
+    "tencent": "tencent-tokenhub",
+    "tokenhub": "tencent-tokenhub",
+    "tencent-cloud": "tencent-tokenhub",
+    "tencentmaas": "tencent-tokenhub",
 }


@@ -155,6 +208,7 @@ _API_KEY_PROVIDER_AUX_MODELS: Dict[str, str] = {
    "kimi-coding": "kimi-k2-turbo-preview",
    "stepfun": "step-3.5-flash",
    "kimi-coding-cn": "kimi-k2-turbo-preview",
+    "gmi": "google/gemini-3.1-flash-lite-preview",
    "minimax": "MiniMax-M2.7",
    "minimax-cn": "MiniMax-M2.7",
    "anthropic": "claude-haiku-4-5-20251001",
@@ -163,6 +217,7 @@ _API_KEY_PROVIDER_AUX_MODELS: Dict[str, str] = {
    "opencode-go": "glm-5",
    "kilocode": "google/gemini-3-flash-preview",
    "ollama-cloud": "nemotron-3-nano:30b",
+    "tencent-tokenhub": "hy3-preview",
 }

 # Vision-specific model overrides for direct providers.
@@ -402,6 +457,33 @@ class _CodexCompletionsAdapter:
        # Note: the Codex endpoint (chatgpt.com/backend-api/codex) does NOT
        # support max_output_tokens or temperature — omit to avoid 400 errors.

+        # Translate extra_body.reasoning (chat.completions shape) into the
+        # Responses API's top-level reasoning + include fields.  Mirrors
+        # agent/transports/codex.py::build_kwargs() so auxiliary callers
+        # that configure reasoning via auxiliary.<task>.extra_body get the
+        # same behavior as the main agent's Codex transport.
+        extra_body = kwargs.get("extra_body") or {}
+        if isinstance(extra_body, dict):
+            reasoning_cfg = extra_body.get("reasoning")
+            if isinstance(reasoning_cfg, dict):
+                if reasoning_cfg.get("enabled") is False:
+                    # Reasoning explicitly disabled — do not set reasoning
+                    # or include.  The Codex backend still thinks by
+                    # default, but we honor the caller's intent where the
+                    # API allows it.
+                    pass
+                else:
+                    effort = reasoning_cfg.get("effort", "medium")
+                    # Codex backend rejects "minimal"; clamp to "low" to
+                    # match the main-agent Codex transport behavior.
+                    if effort == "minimal":
+                        effort = "low"
+                    resp_kwargs["reasoning"] = {
+                        "effort": effort,
+                        "summary": "auto",
+                    }
+                    resp_kwargs["include"] = ["reasoning.encrypted_content"]
+
        # Tools support for auxiliary callers (e.g. skills_hub) that pass function schemas
        tools = kwargs.get("tools")
        if tools:
@@ -631,9 +713,7 @@ class _AnthropicCompletionsAdapter:

        response = self._client.messages.create(**anthropic_kwargs)
        _transport = get_transport("anthropic_messages")
-        _nr = _transport.normalize_response(
-            response, strip_tool_prefix=self._is_oauth
-        )
+        _nr = _transport.normalize_response(response)

        # ToolCall already duck-types as OpenAI shape (.type, .function.name,
        # .function.arguments) via properties, so no wrapping needed.
@@ -711,6 +791,116 @@ class AsyncAnthropicAuxiliaryClient:
        self.base_url = sync_wrapper.base_url


+def _endpoint_speaks_anthropic_messages(base_url: str) -> bool:
+    """True if the endpoint at ``base_url`` speaks the Anthropic Messages
+    protocol instead of OpenAI chat.completions.
+
+    Mirrors ``hermes_cli.runtime_provider._detect_api_mode_for_url`` so the
+    auxiliary client and the main agent stay in sync on transport selection.
+    Covers:
+
+    - Any URL ending in ``/anthropic`` (MiniMax, Zhipu GLM, LiteLLM proxies,
+      Anthropic-compatible gateways).
+    - ``api.kimi.com/coding`` (Kimi Coding Plan — the /coding route only
+      speaks Claude-Code's native Anthropic shape; ``chat.completions``
+      returns 404 on Anthropic-only model aliases like ``kimi-for-coding``).
+    - ``api.anthropic.com`` (native Anthropic).
+    """
+    normalized = (base_url or "").strip().lower().rstrip("/")
+    if not normalized:
+        return False
+    if normalized.endswith("/anthropic"):
+        return True
+    hostname = base_url_hostname(normalized)
+    if hostname == "api.anthropic.com":
+        return True
+    if hostname == "api.kimi.com" and "/coding" in normalized:
+        return True
+    return False
+
+
+def _maybe_wrap_anthropic(
+    client_obj: Any,
+    model: str,
+    api_key: str,
+    base_url: str,
+    api_mode: Optional[str] = None,
+) -> Any:
+    """Rewrap a plain OpenAI client in ``AnthropicAuxiliaryClient`` when
+    the endpoint actually speaks Anthropic Messages.
+
+    This is the single chokepoint for aux-client transport correction.
+    Runs at the end of every ``resolve_provider_client`` branch so that
+    api_key providers (Kimi Coding Plan), the ``custom`` endpoint, and
+    future /anthropic gateways all land on the right wire format
+    regardless of which branch built the client.
+
+    Returns ``client_obj`` unchanged when:
+
+    - It's already an Anthropic/Codex/Gemini/CopilotACP wrapper.
+    - The endpoint is an OpenAI-wire endpoint.
+    - ``api_mode`` is explicitly set to a non-Anthropic transport.
+    - The ``anthropic`` SDK is not installed (falls back to OpenAI wire).
+    """
+    # Already wrapped — don't double-wrap.
+    if isinstance(client_obj, AnthropicAuxiliaryClient):
+        return client_obj
+    # Other specialized adapters we should never re-dispatch.
+    if isinstance(client_obj, CodexAuxiliaryClient):
+        return client_obj
+    try:
+        from agent.gemini_native_adapter import GeminiNativeClient
+        if isinstance(client_obj, GeminiNativeClient):
+            return client_obj
+    except ImportError:
+        pass
+    try:
+        from agent.copilot_acp_client import CopilotACPClient
+        if isinstance(client_obj, CopilotACPClient):
+            return client_obj
+    except ImportError:
+        pass
+
+    # Explicit non-anthropic api_mode wins over URL heuristics.
+    if api_mode and api_mode != "anthropic_messages":
+        return client_obj
+
+    should_wrap = (
+        api_mode == "anthropic_messages"
+        or _endpoint_speaks_anthropic_messages(base_url)
+    )
+    if not should_wrap:
+        return client_obj
+
+    try:
+        from agent.anthropic_adapter import build_anthropic_client
+    except ImportError:
+        logger.warning(
+            "Endpoint %s speaks Anthropic Messages but the anthropic SDK is "
+            "not installed — falling back to OpenAI-wire (will likely 404).",
+            base_url,
+        )
+        return client_obj
+
+    try:
+        real_client = build_anthropic_client(api_key, base_url)
+    except Exception as exc:
+        logger.warning(
+            "Failed to build Anthropic client for %s (%s) — falling back to "
+            "OpenAI-wire client.", base_url, exc,
+        )
+        return client_obj
+
+    logger.debug(
+        "Auxiliary transport: wrapping client in AnthropicAuxiliaryClient "
+        "(model=%s, base_url=%s, api_mode=%s)",
+        model, base_url[:60] if base_url else "", api_mode or "auto-detected",
+    )
+    return AnthropicAuxiliaryClient(
+        real_client, model, api_key, base_url, is_oauth=False,
+    )
+
+
 def _read_nous_auth() -> Optional[dict]:
    """Read and validate ~/.hermes/auth.json for an active Nous provider.

@@ -881,7 +1071,9 @@ def _resolve_api_key_provider() -> Tuple[Optional[OpenAI], Optional[str]]:
                from hermes_cli.models import copilot_default_headers

                extra["default_headers"] = copilot_default_headers()
-            return OpenAI(api_key=api_key, base_url=base_url, **extra), model
+            _client = OpenAI(api_key=api_key, base_url=base_url, **extra)
+            _client = _maybe_wrap_anthropic(_client, model, api_key, base_url)
+            return _client, model

        creds = resolve_api_key_provider_credentials(provider_id)
        api_key = str(creds.get("api_key", "")).strip()
@@ -907,7 +1099,9 @@ def _resolve_api_key_provider() -> Tuple[Optional[OpenAI], Optional[str]]:
            from hermes_cli.models import copilot_default_headers

            extra["default_headers"] = copilot_default_headers()
-        return OpenAI(api_key=api_key, base_url=base_url, **extra), model
+        _client = OpenAI(api_key=api_key, base_url=base_url, **extra)
+        _client = _maybe_wrap_anthropic(_client, model, api_key, base_url)
+        return _client, model

    return None, None

@@ -1191,7 +1385,13 @@ def _try_custom_endpoint() -> Tuple[Optional[Any], Optional[str]]:
            AnthropicAuxiliaryClient(real_client, model, custom_key, custom_base, is_oauth=False),
            model,
        )
-    return OpenAI(api_key=custom_key, base_url=_clean_base, **_extra), model
+    # URL-based anthropic detection for custom endpoints that didn't set
+    # api_mode explicitly (e.g. kimi.com/coding reached via custom config).
+    _fallback_client = OpenAI(api_key=custom_key, base_url=_clean_base, **_extra)
+    _fallback_client = _maybe_wrap_anthropic(
+        _fallback_client, model, custom_key, custom_base, custom_mode,
+    )
+    return _fallback_client, model


 def _try_codex() -> Tuple[Optional[Any], Optional[str]]:
@@ -1617,8 +1817,14 @@ def _resolve_auto(main_runtime: Optional[Dict[str, Any]] = None) -> Tuple[Option
 # below — never look up auth env vars ad-hoc.


-def _to_async_client(sync_client, model: str):
-    """Convert a sync client to its async counterpart, preserving Codex routing."""
+def _to_async_client(sync_client, model: str, is_vision: bool = False):
+    """Convert a sync client to its async counterpart, preserving Codex routing.
+
+    When ``is_vision=True`` and the underlying base URL is Copilot, the
+    resulting async client carries the ``Copilot-Vision-Request: true``
+    header so the request is routed to Copilot's vision-capable
+    infrastructure (otherwise vision payloads silently time out).
+    """
    from openai import AsyncOpenAI

    if isinstance(sync_client, CodexAuxiliaryClient):
@@ -1647,9 +1853,11 @@ def _to_async_client(sync_client, model: str):
    if base_url_host_matches(sync_base_url, "openrouter.ai"):
        async_kwargs["default_headers"] = dict(_OR_HEADERS)
    elif base_url_host_matches(sync_base_url, "api.githubcopilot.com"):
-        from hermes_cli.models import copilot_default_headers
+        from hermes_cli.copilot_auth import copilot_request_headers

-        async_kwargs["default_headers"] = copilot_default_headers()
+        async_kwargs["default_headers"] = copilot_request_headers(
+            is_agent_turn=True, is_vision=is_vision
+        )
    elif base_url_host_matches(sync_base_url, "api.kimi.com"):
        async_kwargs["default_headers"] = {"User-Agent": "claude-code/0.1.0"}
    return AsyncOpenAI(**async_kwargs), model
@@ -1676,6 +1884,7 @@ def resolve_provider_client(
    explicit_api_key: str = None,
    api_mode: str = None,
    main_runtime: Optional[Dict[str, Any]] = None,
+    is_vision: bool = False,
 ) -> Tuple[Optional[Any], Optional[str]]:
    """Central router: given a provider name and optional model, return a
    configured client with the correct auth, base URL, and API format.
@@ -1733,8 +1942,20 @@ def resolve_provider_client(
                return True
        return False

-    def _wrap_if_needed(client_obj, final_model_str: str, base_url_str: str = ""):
-        """Wrap a plain OpenAI client in CodexAuxiliaryClient if Responses API is needed."""
+    def _wrap_if_needed(client_obj, final_model_str: str, base_url_str: str = "",
+                        api_key_str: str = ""):
+        """Wrap a plain OpenAI client in the correct transport adapter.
+
+        Handles two cases:
+        - ``CodexAuxiliaryClient`` when the endpoint needs the Responses API
+          (explicit ``api_mode=codex_responses`` or api.openai.com + codex
+          model name).
+        - ``AnthropicAuxiliaryClient`` when the endpoint speaks Anthropic
+          Messages (explicit ``api_mode=anthropic_messages``, any ``/anthropic``
+          suffix, ``api.kimi.com/coding``, or ``api.anthropic.com``).
+
+        Clients that are already specialized wrappers pass through unchanged.
+        """
        if _needs_codex_wrap(client_obj, base_url_str, final_model_str):
            logger.debug(
                "resolve_provider_client: wrapping client in CodexAuxiliaryClient "
@@ -1742,7 +1963,11 @@ def resolve_provider_client(
                api_mode or "auto-detected", final_model_str,
                base_url_str[:60] if base_url_str else "")
            return CodexAuxiliaryClient(client_obj, final_model_str)
-        return client_obj
+        # Anthropic-wire endpoints: rewrap plain OpenAI clients so
+        # chat.completions.create() is translated to /v1/messages.
+        return _maybe_wrap_anthropic(
+            client_obj, final_model_str, api_key_str, base_url_str, api_mode,
+        )

    # ── Auto: try all providers in priority order ────────────────────
    if provider == "auto":
@@ -1759,7 +1984,7 @@ def resolve_provider_client(
                "auxiliary provider (using %r instead)", model, resolved)
            model = None
        final_model = model or resolved
-        return (_to_async_client(client, final_model) if async_mode
+        return (_to_async_client(client, final_model, is_vision=is_vision) if async_mode
                else (client, final_model))

    # ── OpenRouter ───────────────────────────────────────────────────
@@ -1772,7 +1997,7 @@ def resolve_provider_client(
            )
            return None, None
        final_model = _normalize_resolved_model(model or default, provider)
-        return (_to_async_client(client, final_model) if async_mode
+        return (_to_async_client(client, final_model, is_vision=is_vision) if async_mode
                else (client, final_model))

    # ── Nous Portal (OAuth) ──────────────────────────────────────────
@@ -1789,7 +2014,7 @@ def resolve_provider_client(
                           "but Nous Portal not configured (run: hermes auth)")
            return None, None
        final_model = _normalize_resolved_model(model or default, provider)
-        return (_to_async_client(client, final_model) if async_mode
+        return (_to_async_client(client, final_model, is_vision=is_vision) if async_mode
                else (client, final_model))

    # ── OpenAI Codex (OAuth → Responses API) ─────────────────────────
@@ -1816,13 +2041,13 @@ def resolve_provider_client(
                           "but no Codex OAuth token found (run: hermes model)")
            return None, None
        final_model = _normalize_resolved_model(model or default, provider)
-        return (_to_async_client(client, final_model) if async_mode
+        return (_to_async_client(client, final_model, is_vision=is_vision) if async_mode
                else (client, final_model))

    # ── Custom endpoint (OPENAI_BASE_URL + OPENAI_API_KEY) ───────────
    if provider == "custom":
        if explicit_base_url:
-            custom_base = explicit_base_url.strip()
+            custom_base = _to_openai_base_url(explicit_base_url).strip()
            custom_key = (
                (explicit_api_key or "").strip()
                or os.getenv("OPENAI_API_KEY", "").strip()
@@ -1835,7 +2060,7 @@ def resolve_provider_client(
                )
                return None, None
            final_model = _normalize_resolved_model(
-                model or _read_main_model() or "gpt-4o-mini",
+                model or (main_runtime.get("model") if main_runtime else None) or "gpt-4o-mini",
                provider,
            )
            extra = {}
@@ -1845,11 +2070,13 @@ def resolve_provider_client(
            if base_url_host_matches(custom_base, "api.kimi.com"):
                extra["default_headers"] = {"User-Agent": "claude-code/0.1.0"}
            elif base_url_host_matches(custom_base, "api.githubcopilot.com"):
-                from hermes_cli.models import copilot_default_headers
-                extra["default_headers"] = copilot_default_headers()
+                from hermes_cli.copilot_auth import copilot_request_headers
+                extra["default_headers"] = copilot_request_headers(
+                    is_agent_turn=True, is_vision=is_vision
+                )
            client = OpenAI(api_key=custom_key, base_url=_clean_base, **extra)
-            client = _wrap_if_needed(client, final_model, custom_base)
-            return (_to_async_client(client, final_model) if async_mode
+            client = _wrap_if_needed(client, final_model, custom_base, custom_key)
+            return (_to_async_client(client, final_model, is_vision=is_vision) if async_mode
                    else (client, final_model))
        # Try custom first, then codex, then API-key providers
        for try_fn in (_try_custom_endpoint, _try_codex,
@@ -1858,8 +2085,9 @@ def resolve_provider_client(
            if client is not None:
                final_model = _normalize_resolved_model(model or default, provider)
                _cbase = str(getattr(client, "base_url", "") or "")
-                client = _wrap_if_needed(client, final_model, _cbase)
-                return (_to_async_client(client, final_model) if async_mode
+                _ckey = str(getattr(client, "api_key", "") or "")
+                client = _wrap_if_needed(client, final_model, _cbase, _ckey)
+                return (_to_async_client(client, final_model, is_vision=is_vision) if async_mode
                        else (client, final_model))
        logger.warning("resolve_provider_client: custom/main requested "
                       "but no endpoint credentials found")
@@ -1881,10 +2109,22 @@ def resolve_provider_client(
            entry_api_mode = (api_mode or custom_entry.get("api_mode") or "").strip()
            if custom_base:
                final_model = _normalize_resolved_model(
-                    model or custom_entry.get("model") or _read_main_model() or "gpt-4o-mini",
+                    model
+                    or custom_entry.get("model")
+                    or (main_runtime.get("model") if main_runtime else None)
+                    or _read_main_model()
+                    or "gpt-4o-mini",
                    provider,
                )
-                _clean_base2, _dq2 = _extract_url_query_params(custom_base)
+                # anthropic_messages talks to the /anthropic surface directly;
+                # OpenAI-wire paths (chat_completions / codex_responses) need the
+                # /v1 equivalent.  Rewrite only on the OpenAI-wire path so the
+                # Anthropic fallback SDK still sees the original URL.
+                if entry_api_mode == "anthropic_messages":
+                    openai_base = custom_base
+                else:
+                    openai_base = _to_openai_base_url(custom_base)
+                _clean_base2, _dq2 = _extract_url_query_params(openai_base)
                _extra2 = {"default_query": _dq2} if _dq2 else {}
                logger.debug(
                    "resolve_provider_client: named custom provider %r (%s, api_mode=%s)",
@@ -1903,8 +2143,13 @@ def resolve_provider_client(
                            "installed — falling back to OpenAI-wire.",
                            provider,
                        )
-                        client = OpenAI(api_key=custom_key, base_url=_clean_base2, **_extra2)
-                        return (_to_async_client(client, final_model) if async_mode
+                        # Fallback went OpenAI-wire after all — redo the query
+                        # extraction against the rewritten /v1 URL.
+                        _fallback_base = _to_openai_base_url(custom_base)
+                        _fb_clean, _fb_dq = _extract_url_query_params(_fallback_base)
+                        _fb_extra = {"default_query": _fb_dq} if _fb_dq else {}
+                        client = OpenAI(api_key=custom_key, base_url=_fb_clean, **_fb_extra)
+                        return (_to_async_client(client, final_model, is_vision=is_vision) if async_mode
                                else (client, final_model))
                    sync_anthropic = AnthropicAuxiliaryClient(
                        real_client, final_model, custom_key, custom_base, is_oauth=False,
@@ -1922,8 +2167,8 @@ def resolve_provider_client(
                ):
                    client = CodexAuxiliaryClient(client, final_model)
                else:
-                    client = _wrap_if_needed(client, final_model, custom_base)
-                return (_to_async_client(client, final_model) if async_mode
+                    client = _wrap_if_needed(client, final_model, openai_base, custom_key)
+                return (_to_async_client(client, final_model, is_vision=is_vision) if async_mode
                        else (client, final_model))
            logger.warning(
                "resolve_provider_client: named custom provider %r has no base_url",
@@ -1955,7 +2200,7 @@ def resolve_provider_client(
                logger.warning("resolve_provider_client: anthropic requested but no Anthropic credentials found")
                return None, None
            final_model = _normalize_resolved_model(model or default_model, provider)
-            return (_to_async_client(client, final_model) if async_mode else (client, final_model))
+            return (_to_async_client(client, final_model, is_vision=is_vision) if async_mode else (client, final_model))

        creds = resolve_api_key_provider_credentials(provider)
        api_key = str(creds.get("api_key", "")).strip()
@@ -1981,7 +2226,7 @@ def resolve_provider_client(
            if is_native_gemini_base_url(base_url):
                client = GeminiNativeClient(api_key=api_key, base_url=base_url)
                logger.debug("resolve_provider_client: %s (%s)", provider, final_model)
-                return (_to_async_client(client, final_model) if async_mode
+                return (_to_async_client(client, final_model, is_vision=is_vision) if async_mode
                        else (client, final_model))

        # Provider-specific headers
@@ -1989,9 +2234,11 @@ def resolve_provider_client(
        if base_url_host_matches(base_url, "api.kimi.com"):
            headers["User-Agent"] = "claude-code/0.1.0"
        elif base_url_host_matches(base_url, "api.githubcopilot.com"):
-            from hermes_cli.models import copilot_default_headers
+            from hermes_cli.copilot_auth import copilot_request_headers

-            headers.update(copilot_default_headers())
+            headers.update(copilot_request_headers(
+                is_agent_turn=True, is_vision=is_vision
+            ))
        client = OpenAI(api_key=api_key, base_url=base_url,
                        **({"default_headers": headers} if headers else {}))

@@ -2013,16 +2260,24 @@ def resolve_provider_client(

        # Honor api_mode for any API-key provider (e.g. direct OpenAI with
        # codex-family models).  The copilot-specific wrapping above handles
-        # copilot; this covers the general case (#6800).
-        client = _wrap_if_needed(client, final_model, base_url)
+        # copilot; this covers the general case (#6800).  Also rewraps
+        # Anthropic-wire endpoints (Kimi Coding Plan api.kimi.com/coding,
+        # /anthropic-suffixed gateways) so named providers like kimi-coding
+        # land on the right transport without needing per-provider branches.
+        client = _wrap_if_needed(client, final_model, base_url, api_key)

        logger.debug("resolve_provider_client: %s (%s)", provider, final_model)
-        return (_to_async_client(client, final_model) if async_mode
+        return (_to_async_client(client, final_model, is_vision=is_vision) if async_mode
                else (client, final_model))

    if pconfig.auth_type == "external_process":
        creds = resolve_external_process_provider_credentials(provider)
-        final_model = _normalize_resolved_model(model or _read_main_model(), provider)
+        final_model = _normalize_resolved_model(
+            model
+            or (main_runtime.get("model") if main_runtime else None)
+            or _read_main_model(),
+            provider,
+        )
        if provider == "copilot-acp":
            api_key = str(creds.get("api_key", "")).strip()
            base_url = str(creds.get("base_url", "")).strip()
@@ -2049,7 +2304,7 @@ def resolve_provider_client(
                args=args,
            )
            logger.debug("resolve_provider_client: %s (%s)", provider, final_model)
-            return (_to_async_client(client, final_model) if async_mode
+            return (_to_async_client(client, final_model, is_vision=is_vision) if async_mode
                    else (client, final_model))
        logger.warning("resolve_provider_client: external-process provider %s not "
                       "directly supported", provider)
@@ -2085,7 +2340,7 @@ def resolve_provider_client(
            base_url=f"https://bedrock-runtime.{region}.amazonaws.com",
        )
        logger.debug("resolve_provider_client: bedrock (%s, %s)", final_model, region)
-        return (_to_async_client(client, final_model) if async_mode
+        return (_to_async_client(client, final_model, is_vision=is_vision) if async_mode
                else (client, final_model))

    elif pconfig.auth_type in ("oauth_device_code", "oauth_external"):
@@ -2160,8 +2415,13 @@ def _normalize_vision_provider(provider: Optional[str]) -> str:
    return _normalize_aux_provider(provider)


-def _resolve_strict_vision_backend(provider: str) -> Tuple[Optional[Any], Optional[str]]:
+def _resolve_strict_vision_backend(
+    provider: str,
+    model: Optional[str] = None,
+) -> Tuple[Optional[Any], Optional[str]]:
    provider = _normalize_vision_provider(provider)
+    if provider == "copilot":
+        return resolve_provider_client("copilot", model, is_vision=True)
    if provider == "openrouter":
        return _try_openrouter()
    if provider == "nous":
@@ -2229,7 +2489,7 @@ def resolve_vision_provider_client(
            return resolved_provider, None, None
        final_model = resolved_model or default_model
        if async_mode:
-            async_client, async_model = _to_async_client(sync_client, final_model)
+            async_client, async_model = _to_async_client(sync_client, final_model, is_vision=True)
            return resolved_provider, async_client, async_model
        return resolved_provider, sync_client, final_model

@@ -2261,8 +2521,11 @@ def resolve_vision_provider_client(
        main_provider = _read_main_provider()
        main_model = _read_main_model()
        if main_provider and main_provider not in ("auto", ""):
+            vision_model = _PROVIDER_VISION_MODELS.get(main_provider, main_model)
            if main_provider == "nous":
-                sync_client, default_model = _resolve_strict_vision_backend(main_provider)
+                sync_client, default_model = _resolve_strict_vision_backend(
+                    main_provider, vision_model
+                )
                if sync_client is not None:
                    logger.info(
                        "Vision auto-detect: using main provider %s (%s)",
@@ -2270,10 +2533,10 @@ def resolve_vision_provider_client(
                    )
                    return _finalize(main_provider, sync_client, default_model)
            else:
-                vision_model = _PROVIDER_VISION_MODELS.get(main_provider, main_model)
                rpc_client, rpc_model = resolve_provider_client(
                    main_provider, vision_model,
-                    api_mode=resolved_api_mode)
+                    api_mode=resolved_api_mode,
+                    is_vision=True)
                if rpc_client is not None:
                    logger.info(
                        "Vision auto-detect: using main provider %s (%s)",
@@ -2295,11 +2558,14 @@ def resolve_vision_provider_client(
        return None, None, None

    if requested in _VISION_AUTO_PROVIDER_ORDER:
-        sync_client, default_model = _resolve_strict_vision_backend(requested)
+        sync_client, default_model = _resolve_strict_vision_backend(
+            requested, resolved_model
+        )
        return _finalize(requested, sync_client, default_model)

    client, final_model = _get_cached_client(requested, resolved_model, async_mode,
-                                             api_mode=resolved_api_mode)
+                                             api_mode=resolved_api_mode,
+                                             is_vision=True)
    if client is None:
        return requested, None, None
    return requested, client, final_model
@@ -2363,10 +2629,11 @@ def _client_cache_key(
    api_key: Optional[str] = None,
    api_mode: Optional[str] = None,
    main_runtime: Optional[Dict[str, Any]] = None,
+    is_vision: bool = False,
 ) -> tuple:
    runtime = _normalize_main_runtime(main_runtime)
    runtime_key = tuple(runtime.get(field, "") for field in _MAIN_RUNTIME_FIELDS) if provider == "auto" else ()
-    return (provider, async_mode, base_url or "", api_key or "", api_mode or "", runtime_key)
+    return (provider, async_mode, base_url or "", api_key or "", api_mode or "", runtime_key, is_vision)


 def _store_cached_client(cache_key: tuple, client: Any, default_model: Optional[str], *, bound_loop: Any = None) -> None:
@@ -2392,6 +2659,7 @@ def _refresh_nous_auxiliary_client(
    api_key: Optional[str] = None,
    api_mode: Optional[str] = None,
    main_runtime: Optional[Dict[str, Any]] = None,
+    is_vision: bool = False,
 ) -> Tuple[Optional[Any], Optional[str]]:
    """Refresh Nous runtime creds, rebuild the client, and replace the cache entry."""
    runtime = _resolve_nous_runtime_api(force_refresh=True)
@@ -2409,7 +2677,7 @@ def _refresh_nous_auxiliary_client(
            current_loop = _aio.get_event_loop()
        except RuntimeError:
            pass
-        client, final_model = _to_async_client(sync_client, final_model or "")
+        client, final_model = _to_async_client(sync_client, final_model or "", is_vision=is_vision)
    else:
        client = sync_client

@@ -2420,6 +2688,7 @@ def _refresh_nous_auxiliary_client(
        api_key=api_key,
        api_mode=api_mode,
        main_runtime=main_runtime,
+        is_vision=is_vision,
    )
    _store_cached_client(cache_key, client, final_model, bound_loop=current_loop)
    return client, final_model
@@ -2531,12 +2800,19 @@ def _is_openrouter_client(client: Any) -> bool:
    return False


+def _cached_client_accepts_slash_models(client: Any, cached_default: Optional[str]) -> bool:
+    """Best-effort check for cached clients that accept ``vendor/model`` IDs."""
+    if _is_openrouter_client(client):
+        return True
+    return bool(cached_default and "/" in cached_default)
+
+
 def _compat_model(client: Any, model: Optional[str], cached_default: Optional[str]) -> Optional[str]:
-    """Drop OpenRouter-format model slugs (with '/') for non-OpenRouter clients.
+    """Keep slash-bearing model IDs only for cached clients that support them.

    Mirrors the guard in resolve_provider_client() which is skipped on cache hits.
    """
-    if model and "/" in model and not _is_openrouter_client(client):
+    if model and "/" in model and not _cached_client_accepts_slash_models(client, cached_default):
        return cached_default
    return model or cached_default

@@ -2549,6 +2825,7 @@ def _get_cached_client(
    api_key: str = None,
    api_mode: str = None,
    main_runtime: Optional[Dict[str, Any]] = None,
+    is_vision: bool = False,
 ) -> Tuple[Optional[Any], Optional[str]]:
    """Get or create a cached client for the given provider.

@@ -2585,6 +2862,7 @@ def _get_cached_client(
        api_key=api_key,
        api_mode=api_mode,
        main_runtime=main_runtime,
+        is_vision=is_vision,
    )
    with _client_cache_lock:
        if cache_key in _client_cache:
@@ -2616,6 +2894,7 @@ def _get_cached_client(
        explicit_api_key=api_key,
        api_mode=api_mode,
        main_runtime=runtime,
+        is_vision=is_vision,
    )
    if client is not None:
        # For async clients, remember which loop they were created on so we
@@ -3079,6 +3358,7 @@ def call_llm(
                api_key=resolved_api_key,
                api_mode=resolved_api_mode,
                main_runtime=main_runtime,
+                is_vision=(task == "vision"),
            )
            if refreshed_client is not None:
                logger.info("Auxiliary %s: refreshed Nous runtime credentials after 401, retrying",
@@ -3369,6 +3649,7 @@ async def async_call_llm(
                base_url=resolved_base_url,
                api_key=resolved_api_key,
                api_mode=resolved_api_mode,
+                is_vision=(task == "vision"),
            )
            if refreshed_client is not None:
                logger.info("Auxiliary %s (async): refreshed Nous runtime credentials after 401, retrying",
@@ -3437,7 +3718,9 @@ async def async_call_llm(
                    extra_body=effective_extra_body,
                    base_url=str(getattr(fb_client, "base_url", "") or ""))
                # Convert sync fallback client to async
-                async_fb, async_fb_model = _to_async_client(fb_client, fb_model or "")
+                async_fb, async_fb_model = _to_async_client(
+                    fb_client, fb_model or "", is_vision=(task == "vision")
+                )
                if async_fb_model and async_fb_model != fb_kwargs.get("model"):
                    fb_kwargs["model"] = async_fb_model
                return _validate_llm_response(
@@ -291,14 +291,52 @@ def has_aws_credentials(env: Optional[Dict[str, str]] = None) -> bool:
 def resolve_bedrock_region(env: Optional[Dict[str, str]] = None) -> str:
    """Resolve the AWS region for Bedrock API calls.

-    Priority: AWS_REGION → AWS_DEFAULT_REGION → us-east-1 (fallback).
+    Priority:
+      1. AWS_REGION env var
+      2. AWS_DEFAULT_REGION env var
+      3. boto3/botocore configured region (from ~/.aws/config or SSO profile)
+      4. us-east-1 (hard fallback)
+
+    The boto3 fallback is critical for EU/AP users who configure their region
+    in ~/.aws/config via a named profile rather than env vars — without it,
+    live model discovery would always return us.* profile IDs regardless of
+    the user's actual region.
    """
    env = env if env is not None else os.environ
-    return (
+    explicit = (
        env.get("AWS_REGION", "").strip()
        or env.get("AWS_DEFAULT_REGION", "").strip()
-        or "us-east-1"
    )
+    if explicit:
+        return explicit
+    try:
+        import botocore.session
+        region = botocore.session.get_session().get_config_variable("region")
+        if region:
+            return region
+    except Exception:
+        pass
+    return "us-east-1"
+
+
+def bedrock_model_ids_or_none() -> Optional[List[str]]:
+    """Live-discover Bedrock model IDs for the active region.
+
+    Returns a list of model ID strings if discovery succeeds and yields
+    at least one model, or ``None`` on failure / empty result.  Callers
+    should fall back to the static curated list when ``None`` is returned.
+
+    This helper consolidates the discover → extract-ids → fallback
+    pattern that was previously duplicated across ``provider_model_ids``,
+    ``list_authenticated_providers`` section 2, and section 3.
+    """
+    try:
+        discovered = discover_bedrock_models(resolve_bedrock_region())
+        if discovered:
+            return [m["id"] for m in discovered]
+    except Exception:
+        pass
+    return None


 # ---------------------------------------------------------------------------
@@ -61,9 +61,52 @@ _PRUNED_TOOL_PLACEHOLDER = "[Old tool output cleared to save context space]"

 # Chars per token rough estimate
 _CHARS_PER_TOKEN = 4
+# Flat token cost per attached image part.  Real cost varies by provider and
+# dimensions (Anthropic ≈ width×height/750, GPT-4o up to ~1700 for
+# high-detail 2048×2048, Gemini 258/tile), but 1600 is a realistic ceiling
+# that keeps compression budgeting honest for multi-image conversations.
+# Matches Claude Code's IMAGE_TOKEN_ESTIMATE constant.
+_IMAGE_TOKEN_ESTIMATE = 1600
+# Same figure expressed in the char-budget currency the rest of the
+# compressor speaks in.  Used when accumulating message "content length"
+# for tail-cut decisions.
+_IMAGE_CHAR_EQUIVALENT = _IMAGE_TOKEN_ESTIMATE * _CHARS_PER_TOKEN
 _SUMMARY_FAILURE_COOLDOWN_SECONDS = 600


+def _content_length_for_budget(raw_content: Any) -> int:
+    """Return the effective char-length of a message's content for token budgeting.
+
+    Plain strings: ``len(content)``. Multimodal lists: sum of text-part
+    ``len(text)`` plus a flat ``_IMAGE_CHAR_EQUIVALENT`` per image part
+    (``image_url`` / ``input_image`` / Anthropic-style ``image``). This
+    keeps the compressor from treating a turn with 5 attached images as
+    near-zero tokens just because the text part is empty.
+    """
+    if isinstance(raw_content, str):
+        return len(raw_content)
+    if not isinstance(raw_content, list):
+        return len(str(raw_content or ""))
+
+    total = 0
+    for p in raw_content:
+        if isinstance(p, str):
+            total += len(p)
+            continue
+        if not isinstance(p, dict):
+            total += len(str(p))
+            continue
+        ptype = p.get("type")
+        if ptype in {"image_url", "input_image", "image"}:
+            total += _IMAGE_CHAR_EQUIVALENT
+        else:
+            # text / input_text / tool_result-with-text / anything else with
+            # a text field.  Ignore the raw base64 payload inside image_url
+            # dicts — dimensions don't matter, only whether it's an image.
+            total += len(p.get("text", "") or "")
+    return total
+
+
 def _content_text_for_contains(content: Any) -> str:
    """Return a best-effort text view of message content.

@@ -295,6 +338,10 @@ class ContextCompressor(ContextEngine):
        self._context_probe_persistable = False
        self._previous_summary = None
        self._last_summary_error = None
+        self._last_summary_dropped_count = 0
+        self._last_summary_fallback_used = False
+        self._last_aux_model_failure_error = None
+        self._last_aux_model_failure_model = None
        self._last_compression_savings_pct = 100.0
        self._ineffective_compression_count = 0

@@ -398,6 +445,17 @@ class ContextCompressor(ContextEngine):
        self._ineffective_compression_count: int = 0
        self._summary_failure_cooldown_until: float = 0.0
        self._last_summary_error: Optional[str] = None
+        # When summary generation fails and a static fallback is inserted,
+        # record how many turns were unrecoverably dropped so callers
+        # (gateway hygiene, /compress) can surface a visible warning.
+        self._last_summary_dropped_count: int = 0
+        self._last_summary_fallback_used: bool = False
+        # When a user-configured summary model fails and we recover by
+        # retrying on the main model, record the failure so gateway /
+        # CLI callers can still warn the user even though compression
+        # succeeded.  Silent recovery would hide the broken config.
+        self._last_aux_model_failure_error: Optional[str] = None
+        self._last_aux_model_failure_model: Optional[str] = None

    def update_from_response(self, usage: Dict[str, Any]):
        """Update tracked token usage from API response."""
@@ -484,18 +542,7 @@ class ContextCompressor(ContextEngine):
            for i in range(len(result) - 1, -1, -1):
                msg = result[i]
                raw_content = msg.get("content") or ""
-                content_len = (
-                    sum(
-                        len(p.get("text", ""))
-                        if isinstance(p, dict)
-                        else len(p)
-                        if isinstance(p, str)
-                        else len(str(p))
-                        for p in raw_content
-                    )
-                    if isinstance(raw_content, list)
-                    else len(raw_content)
-                )
+                content_len = _content_length_for_budget(raw_content)
                msg_tokens = content_len // _CHARS_PER_TOKEN + 10
                for tc in msg.get("tool_calls") or []:
                    if isinstance(tc, dict):
@@ -868,10 +915,50 @@ The user has requested that this compaction PRIORITISE preserving all informatio
                    "Falling back to main model '%s' for compression.",
                    self.summary_model, e, self.model,
                )
+                # Record the aux-model failure so callers can warn the user
+                # even if the retry-on-main succeeds — a misconfigured aux
+                # model is something the user needs to fix.
+                _err_text = str(e).strip() or e.__class__.__name__
+                if len(_err_text) > 220:
+                    _err_text = _err_text[:217].rstrip() + "..."
+                self._last_aux_model_failure_error = _err_text
+                self._last_aux_model_failure_model = self.summary_model
                self.summary_model = ""  # empty = use main model
                self._summary_failure_cooldown_until = 0.0  # no cooldown
                return self._generate_summary(turns_to_summarize, focus_topic=focus_topic)  # retry immediately

+            # Unknown-error best-effort retry on main model.  Losing N turns of
+            # context is almost always worse than one extra summary attempt, so
+            # if we haven't already fallen back and the summary model differs
+            # from the main model, try once more on main before entering
+            # cooldown.  Errors that DID match _is_model_not_found above are
+            # already handled by the fast-path retry; this branch catches
+            # everything else (400s, provider-specific "no route" strings,
+            # aggregator rejections, etc.) where auto-retry is still safer
+            # than dropping the turns.
+            if (
+                self.summary_model
+                and self.summary_model != self.model
+                and not getattr(self, "_summary_model_fallen_back", False)
+            ):
+                self._summary_model_fallen_back = True
+                logging.warning(
+                    "Summary model '%s' failed (%s). "
+                    "Retrying on main model '%s' before giving up.",
+                    self.summary_model, e, self.model,
+                )
+                # Record the aux-model failure (see 404 branch above) — user
+                # should know their configured model is broken even if main
+                # recovers the call.
+                _err_text = str(e).strip() or e.__class__.__name__
+                if len(_err_text) > 220:
+                    _err_text = _err_text[:217].rstrip() + "..."
+                self._last_aux_model_failure_error = _err_text
+                self._last_aux_model_failure_model = self.summary_model
+                self.summary_model = ""  # empty = use main model
+                self._summary_failure_cooldown_until = 0.0
+                return self._generate_summary(turns_to_summarize, focus_topic=focus_topic)
+
            # Transient errors (timeout, rate limit, network) — shorter cooldown
            _transient_cooldown = 60
            self._summary_failure_cooldown_until = time.monotonic() + _transient_cooldown
@@ -1094,18 +1181,7 @@ The user has requested that this compaction PRIORITISE preserving all informatio
        for i in range(n - 1, head_end - 1, -1):
            msg = messages[i]
            raw_content = msg.get("content") or ""
-            content_len = (
-                sum(
-                    len(p.get("text", ""))
-                    if isinstance(p, dict)
-                    else len(p)
-                    if isinstance(p, str)
-                    else len(str(p))
-                    for p in raw_content
-                )
-                if isinstance(raw_content, list)
-                else len(raw_content)
-            )
+            content_len = _content_length_for_budget(raw_content)
            msg_tokens = content_len // _CHARS_PER_TOKEN + 10  # +10 for role/metadata
            # Include tool call arguments in estimate
            for tc in msg.get("tool_calls") or []:
@@ -1175,6 +1251,13 @@ The user has requested that this compaction PRIORITISE preserving all informatio
                related to this topic and be more aggressive about compressing
                everything else.  Inspired by Claude Code's ``/compact``.
        """
+        # Reset per-call summary failure state — callers inspect these fields
+        # after compress() returns to decide whether to surface a warning.
+        self._last_summary_dropped_count = 0
+        self._last_summary_fallback_used = False
+        self._last_summary_error = None
+        self._last_aux_model_failure_error = None
+        self._last_aux_model_failure_model = None
        n_messages = len(messages)
        # Only need head + 3 tail messages minimum (token budget decides the real tail size)
        _min_for_compress = self.protect_first_n + 3 + 1
@@ -1253,11 +1336,13 @@ The user has requested that this compaction PRIORITISE preserving all informatio
            if not self.quiet_mode:
                logger.warning("Summary generation failed — inserting static fallback context marker")
            n_dropped = compress_end - compress_start
+            self._last_summary_dropped_count = n_dropped
+            self._last_summary_fallback_used = True
            summary = (
                f"{SUMMARY_PREFIX}\n"
-                f"Summary generation was unavailable. {n_dropped} conversation turns were "
+                f"Summary generation was unavailable. {n_dropped} message(s) were "
                f"removed to free context space but could not be summarized. The removed "
-                f"turns contained earlier work in this session. Continue based on the "
+                f"messages contained earlier work in this session. Continue based on the "
                f"recent messages below and the current state of any files or resources."
            )

@@ -7,7 +7,6 @@ import random
 import threading
 import time
 import uuid
-import os
 import re
 from dataclasses import dataclass, fields, replace
 from datetime import datetime
@@ -456,6 +455,70 @@ class CredentialPool:
            logger.debug("Failed to sync from credentials file: %s", exc)
        return entry

+    def _sync_codex_entry_from_auth_store(self, entry: PooledCredential) -> PooledCredential:
+        """Sync a Codex device_code pool entry from auth.json if tokens differ.
+
+        When a Codex OAuth access token expires (or the ChatGPT account hits
+        its 5h/weekly quota), the pool entry gets marked ``STATUS_EXHAUSTED``
+        with a ``last_error_reset_at`` that can be many hours in the future.
+        Meanwhile the user may run ``hermes model`` / ``hermes auth`` which
+        performs a fresh device-code login and writes new tokens to
+        ``auth.json`` under ``_auth_store_lock``.  Without this sync the pool
+        entry stays frozen until ``last_error_reset_at`` elapses — even
+        though fresh credentials are sitting on disk — and every request
+        fails with "no available entries (all exhausted or empty)".
+
+        Mirrors the Nous/Anthropic resync paths above.  Only applies to
+        device_code-sourced entries; env/API-key-sourced entries have no
+        auth.json shadow to sync from.
+        """
+        if self.provider != "openai-codex" or entry.source != "device_code":
+            return entry
+        try:
+            with _auth_store_lock():
+                auth_store = _load_auth_store()
+                state = _load_provider_state(auth_store, "openai-codex")
+            if not isinstance(state, dict):
+                return entry
+            tokens = state.get("tokens")
+            if not isinstance(tokens, dict):
+                return entry
+            store_access = tokens.get("access_token", "")
+            store_refresh = tokens.get("refresh_token", "")
+            # Adopt auth.json tokens when either side differs.  Codex refresh
+            # tokens are single-use too, so a fresh refresh_token from
+            # another process means our entry's pair is consumed/stale.
+            entry_access = entry.access_token or ""
+            entry_refresh = entry.refresh_token or ""
+            if store_access and (
+                store_access != entry_access
+                or (store_refresh and store_refresh != entry_refresh)
+            ):
+                logger.debug(
+                    "Pool entry %s: syncing Codex tokens from auth.json "
+                    "(refreshed by another process)",
+                    entry.id,
+                )
+                field_updates: Dict[str, Any] = {
+                    "access_token": store_access,
+                    "refresh_token": store_refresh or entry.refresh_token,
+                    "last_status": None,
+                    "last_status_at": None,
+                    "last_error_code": None,
+                    "last_error_reason": None,
+                    "last_error_message": None,
+                    "last_error_reset_at": None,
+                }
+                if state.get("last_refresh"):
+                    field_updates["last_refresh"] = state["last_refresh"]
+                updated = replace(entry, **field_updates)
+                self._replace_entry(entry, updated)
+                self._persist()
+                return updated
+        except Exception as exc:
+            logger.debug("Failed to sync Codex entry from auth.json: %s", exc)
+        return entry
+
    def _sync_nous_entry_from_auth_store(self, entry: PooledCredential) -> PooledCredential:
        """Sync a Nous pool entry from auth.json if tokens differ.

@@ -788,6 +851,18 @@ class CredentialPool:
                if synced is not entry:
                    entry = synced
                    cleared_any = True
+            # For openai-codex entries, same pattern: the user may have
+            # re-authed via `hermes model` / `hermes auth` after a 429/401,
+            # leaving fresh tokens on disk while the pool entry is still
+            # frozen behind last_error_reset_at (can be hours in the
+            # future for ChatGPT weekly windows).
+            if (self.provider == "openai-codex"
+                    and entry.source == "device_code"
+                    and entry.last_status == STATUS_EXHAUSTED):
+                synced = self._sync_codex_entry_from_auth_store(entry)
+                if synced is not entry:
+                    entry = synced
+                    cleared_any = True
            if entry.last_status == STATUS_EXHAUSTED:
                exhausted_until = _exhausted_until(entry)
                if exhausted_until is not None and now < exhausted_until:
@@ -47,7 +47,6 @@ from __future__ import annotations

 import os
 from dataclasses import dataclass, field
-from pathlib import Path
 from typing import Callable, List, Optional


@@ -42,6 +42,7 @@ class FailoverReason(enum.Enum):
    # Context / payload
    context_overflow = "context_overflow"  # Context too large — compress, not failover
    payload_too_large = "payload_too_large"  # 413 — compress payload
+    image_too_large = "image_too_large"   # Native image part exceeds provider's per-image limit — shrink and retry

    # Model
    model_not_found = "model_not_found"  # 404 or invalid model — fallback to different model
@@ -90,6 +91,7 @@ class ClassifiedError:
 _BILLING_PATTERNS = [
    "insufficient credits",
    "insufficient_quota",
+    "insufficient balance",
    "credit balance",
    "credits have been exhausted",
    "top up your credits",
@@ -147,6 +149,20 @@ _PAYLOAD_TOO_LARGE_PATTERNS = [
    "error code: 413",
 ]

+# Image-size patterns.  Matched against 400 bodies (not 413) because most
+# providers return a 400 with a specific image-too-big message before the
+# whole request hits the 413 size limit.  Anthropic's wording is the most
+# important here (hard 5 MB per image, returned as
+# "messages.N.content.K.image.source.base64: image exceeds 5 MB maximum").
+_IMAGE_TOO_LARGE_PATTERNS = [
+    "image exceeds",        # Anthropic: "image exceeds 5 MB maximum"
+    "image too large",      # generic
+    "image_too_large",      # error_code variant
+    "image size exceeds",   # variant
+    # "request_too_large" on a request known to contain an image → image is
+    # the likely culprit; we still try the shrink path before giving up.
+]
+
 # Context overflow patterns
 _CONTEXT_OVERFLOW_PATTERNS = [
    "context length",
@@ -671,6 +687,15 @@ def _classify_400(
 ) -> ClassifiedError:
    """Classify 400 Bad Request — context overflow, format error, or generic."""

+    # Image-too-large from 400 (Anthropic's 5 MB per-image check fires this way).
+    # Must be checked BEFORE context_overflow because messages can trip both
+    # patterns ("exceeds" + "image") and image-shrink is a cheaper recovery.
+    if any(p in error_msg for p in _IMAGE_TOO_LARGE_PATTERNS):
+        return result_fn(
+            FailoverReason.image_too_large,
+            retryable=True,
+        )
+
    # Context overflow from 400
    if any(p in error_msg for p in _CONTEXT_OVERFLOW_PATTERNS):
        return result_fn(
@@ -798,6 +823,13 @@ def _classify_by_message(
            should_compress=True,
        )

+    # Image-too-large patterns (from message text when no status_code)
+    if any(p in error_msg for p in _IMAGE_TOO_LARGE_PATTERNS):
+        return result_fn(
+            FailoverReason.image_too_large,
+            retryable=True,
+        )
+
    # Usage-limit patterns need the same disambiguation as 402: some providers
    # surface "usage limit" errors without an HTTP status code.  A transient
    # signal ("try again", "resets at", …) means it's a periodic quota, not
@@ -30,7 +30,6 @@ from __future__ import annotations

 import json
 import logging
-import os
 import time
 import uuid
 from types import SimpleNamespace
@@ -42,7 +41,6 @@ from agent import google_oauth
 from agent.gemini_schema import sanitize_gemini_tool_parameters
 from agent.google_code_assist import (
    CODE_ASSIST_ENDPOINT,
-    FREE_TIER_ID,
    CodeAssistError,
    ProjectContext,
    resolve_project_context,
@@ -2,7 +2,7 @@

 from __future__ import annotations

-from typing import Any, Dict, List
+from typing import Any, Dict

 # Gemini's ``FunctionDeclaration.parameters`` field accepts the ``Schema``
 # object, which is only a subset of OpenAPI 3.0 / JSON Schema.  Strip fields
@@ -29,7 +29,6 @@ from __future__ import annotations

 import json
 import logging
-import os
 import time
 import urllib.error
 import urllib.parse
@@ -49,14 +49,13 @@ import json
 import logging
 import os
 import secrets
-import socket
 import stat
 import threading
 import time
 import urllib.error
 import urllib.parse
 import urllib.request
-from dataclasses import dataclass, field
+from dataclasses import dataclass
 from pathlib import Path
 from typing import Any, Dict, Optional, Tuple

@@ -98,6 +97,7 @@ _DEFAULT_CLIENT_SECRET = f"GOCSPX-{_PUBLIC_CLIENT_SECRET_SUFFIX}"

 # Regex patterns for fallback scraping from an installed gemini-cli.
 import re as _re
+from utils import atomic_replace
 _CLIENT_ID_PATTERN = _re.compile(
    r"OAUTH_CLIENT_ID\s*=\s*['\"]([0-9]+-[a-z0-9]+\.apps\.googleusercontent\.com)['\"]"
 )
@@ -499,7 +499,7 @@ def save_credentials(creds: GoogleCredentials) -> Path:
                fh.flush()
                os.fsync(fh.fileno())
            os.chmod(tmp_path, stat.S_IRUSR | stat.S_IWUSR)
-            os.replace(tmp_path, path)
+            atomic_replace(tmp_path, path)
        finally:
            try:
                if tmp_path.exists():
@@ -0,0 +1,236 @@
+"""Routing helpers for inbound user-attached images.
+
+Two modes:
+
+  native  — attach images as OpenAI-style ``image_url`` content parts on the
+            user turn. Provider adapters (Anthropic, Gemini, Bedrock, Codex,
+            OpenAI chat.completions) already translate these into their
+            vendor-specific multimodal formats.
+
+  text    — run ``vision_analyze`` on each image up-front and prepend the
+            description to the user's text. The model never sees the pixels;
+            it only sees a lossy text summary. This is the pre-existing
+            behaviour and still the right choice for non-vision models.
+
+The decision is made once per message turn by :func:`decide_image_input_mode`.
+It reads ``agent.image_input_mode`` from config.yaml (``auto`` | ``native``
+| ``text``, default ``auto``) and the active model's capability metadata.
+
+In ``auto`` mode:
+  - If the user has explicitly configured ``auxiliary.vision.provider``
+    (i.e. not ``auto`` and not empty), we assume they want the text pipeline
+    regardless of the main model — they've opted in to a specific vision
+    backend for a reason (cost, quality, local-only, etc.).
+  - Otherwise, if the active model reports ``supports_vision=True`` in its
+    models.dev metadata, we attach natively.
+  - Otherwise (non-vision model, no explicit override), we fall back to text.
+
+This keeps ``vision_analyze`` surfaced as a tool in every session — skills
+and agent flows that chain it (browser screenshots, deeper inspection of
+URL-referenced images, style-gating loops) keep working. The routing only
+affects *how user-attached images on the current turn* are presented to the
+main model.
+"""
+
+from __future__ import annotations
+
+import base64
+import logging
+import mimetypes
+from pathlib import Path
+from typing import Any, Dict, List, Optional, Tuple
+
+logger = logging.getLogger(__name__)
+
+
+_VALID_MODES = frozenset({"auto", "native", "text"})
+
+
+def _coerce_mode(raw: Any) -> str:
+    """Normalize a config value into one of the valid modes."""
+    if not isinstance(raw, str):
+        return "auto"
+    val = raw.strip().lower()
+    if val in _VALID_MODES:
+        return val
+    return "auto"
+
+
+def _explicit_aux_vision_override(cfg: Optional[Dict[str, Any]]) -> bool:
+    """True when the user configured a specific auxiliary vision backend.
+
+    An explicit override means the user *wants* the text pipeline (they're
+    paying for a dedicated vision model), so we don't silently bypass it.
+    """
+    if not isinstance(cfg, dict):
+        return False
+    aux = cfg.get("auxiliary") or {}
+    if not isinstance(aux, dict):
+        return False
+    vision = aux.get("vision") or {}
+    if not isinstance(vision, dict):
+        return False
+
+    provider = str(vision.get("provider") or "").strip().lower()
+    model = str(vision.get("model") or "").strip()
+    base_url = str(vision.get("base_url") or "").strip()
+
+    # "auto" / "" / blank = not explicit
+    if provider in ("", "auto") and not model and not base_url:
+        return False
+    return True
+
+
+def _lookup_supports_vision(provider: str, model: str) -> Optional[bool]:
+    """Return True/False if we can resolve caps, None if unknown."""
+    if not provider or not model:
+        return None
+    try:
+        from agent.models_dev import get_model_capabilities
+        caps = get_model_capabilities(provider, model)
+    except Exception as exc:  # pragma: no cover - defensive
+        logger.debug("image_routing: caps lookup failed for %s:%s — %s", provider, model, exc)
+        return None
+    if caps is None:
+        return None
+    return bool(caps.supports_vision)
+
+
+def decide_image_input_mode(
+    provider: str,
+    model: str,
+    cfg: Optional[Dict[str, Any]],
+) -> str:
+    """Return ``"native"`` or ``"text"`` for the given turn.
+
+    Args:
+      provider: active inference provider ID (e.g. ``"anthropic"``, ``"openrouter"``).
+      model:    active model slug as it would be sent to the provider.
+      cfg:      loaded config.yaml dict, or None. When None, behaves as auto.
+    """
+    mode_cfg = "auto"
+    if isinstance(cfg, dict):
+        agent_cfg = cfg.get("agent") or {}
+        if isinstance(agent_cfg, dict):
+            mode_cfg = _coerce_mode(agent_cfg.get("image_input_mode"))
+
+    if mode_cfg == "native":
+        return "native"
+    if mode_cfg == "text":
+        return "text"
+
+    # auto
+    if _explicit_aux_vision_override(cfg):
+        return "text"
+
+    supports = _lookup_supports_vision(provider, model)
+    if supports is True:
+        return "native"
+    return "text"
+
+
+# Image size handling is REACTIVE rather than proactive: we attempt native
+# attachment at full size regardless of provider, and rely on
+# ``run_agent._try_shrink_image_parts_in_messages`` to shrink + retry if
+# the provider rejects the request (e.g. Anthropic's hard 5 MB per-image
+# ceiling returned as HTTP 400 "image exceeds 5 MB maximum").
+#
+# Why reactive: our knowledge of provider ceilings is partial and evolving
+# (OpenAI accepts 49 MB+, Anthropic 5 MB, Gemini 100 MB, others unknown).
+# A proactive per-provider table would be stale the moment a provider raises
+# or lowers its limit, and silently degrading quality for users on providers
+# that would have accepted the full image is the worse failure mode.
+# The shrink-on-reject path loses 1 API call + maybe 1s of Pillow work when
+# it fires, which is cheaper than permanent quality loss.
+
+
+def _guess_mime(path: Path) -> str:
+    mime, _ = mimetypes.guess_type(str(path))
+    if mime and mime.startswith("image/"):
+        return mime
+    # mimetypes on some Linux distros mis-maps .jpg; default to jpeg when
+    # the suffix looks imagey.
+    suffix = path.suffix.lower()
+    return {
+        ".jpg": "image/jpeg",
+        ".jpeg": "image/jpeg",
+        ".png": "image/png",
+        ".gif": "image/gif",
+        ".webp": "image/webp",
+        ".bmp": "image/bmp",
+    }.get(suffix, "image/jpeg")
+
+
+def _file_to_data_url(path: Path) -> Optional[str]:
+    """Encode a local image as a base64 data URL at its native size.
+
+    Size limits are NOT enforced here — the agent retry loop
+    (``run_agent._try_shrink_image_parts_in_messages``) shrinks on the
+    provider's first rejection. Keeping this simple means providers that
+    accept large images (OpenAI 49 MB+, Gemini 100 MB) don't pay a silent
+    quality tax just because one other provider is stricter.
+
+    Returns None only if the file can't be read (missing, permission
+    denied, etc.); the caller reports those paths in ``skipped``.
+    """
+    try:
+        raw = path.read_bytes()
+    except Exception as exc:
+        logger.warning("image_routing: failed to read %s — %s", path, exc)
+        return None
+    mime = _guess_mime(path)
+    b64 = base64.b64encode(raw).decode("ascii")
+    return f"data:{mime};base64,{b64}"
+
+
+def build_native_content_parts(
+    user_text: str,
+    image_paths: List[str],
+) -> Tuple[List[Dict[str, Any]], List[str]]:
+    """Build an OpenAI-style ``content`` list for a user turn.
+
+    Shape:
+      [{"type": "text", "text": "..."},
+       {"type": "image_url", "image_url": {"url": "data:image/png;base64,..."}},
+       ...]
+
+    Images are attached at their native size. If a provider rejects the
+    request because an image is too large (e.g. Anthropic's 5 MB per-image
+    ceiling), the agent's retry loop transparently shrinks and retries
+    once — see ``run_agent._try_shrink_image_parts_in_messages``.
+
+    Returns (content_parts, skipped_paths). Skipped paths are files that
+    couldn't be read from disk.
+    """
+    parts: List[Dict[str, Any]] = []
+    skipped: List[str] = []
+
+    text = (user_text or "").strip()
+    if text:
+        parts.append({"type": "text", "text": text})
+
+    for raw_path in image_paths:
+        p = Path(raw_path)
+        if not p.exists() or not p.is_file():
+            skipped.append(str(raw_path))
+            continue
+        data_url = _file_to_data_url(p)
+        if not data_url:
+            skipped.append(str(raw_path))
+            continue
+        parts.append({
+            "type": "image_url",
+            "image_url": {"url": data_url},
+        })
+
+    # If the text was empty, add a neutral prompt so the turn isn't just images.
+    if not text and any(p.get("type") == "image_url" for p in parts):
+        parts.insert(0, {"type": "text", "text": "What do you see in this image?"})
+
+    return parts, skipped
+
+
+__all__ = [
+    "decide_image_input_mode",
+    "build_native_content_parts",
+]
@@ -0,0 +1,48 @@
+"""LM Studio reasoning-effort resolution shared by the chat-completions
+transport and run_agent's iteration-limit summary path.
+
+LM Studio publishes per-model ``capabilities.reasoning.allowed_options`` (e.g.
+``["off","on"]`` for toggle-style models, ``["off","minimal","low"]`` for
+graduated models). We map the user's ``reasoning_config`` onto LM Studio's
+OpenAI-compatible vocabulary, then clamp against the model's allowed set so
+the server doesn't 400 on an unsupported effort.
+"""
+
+from __future__ import annotations
+
+from typing import List, Optional
+
+# LM Studio accepts these top-level reasoning_effort values via its
+# OpenAI-compatible chat.completions endpoint.
+_LM_VALID_EFFORTS = {"none", "minimal", "low", "medium", "high", "xhigh"}
+
+# Toggle-style models publish allowed_options as ["off","on"] in /api/v1/models.
+# Map them onto the OpenAI-compatible request vocabulary.
+_LM_EFFORT_ALIASES = {"off": "none", "on": "medium"}
+
+
+def resolve_lmstudio_effort(
+    reasoning_config: Optional[dict],
+    allowed_options: Optional[List[str]],
+) -> Optional[str]:
+    """Return the ``reasoning_effort`` string to send to LM Studio, or ``None``.
+
+    ``None`` means "omit the field": the user picked a level the model can't
+    honor, so let LM Studio fall back to the model's declared default rather
+    than silently substituting a different effort. When ``allowed_options`` is
+    falsy (probe failed), skip clamping and send the resolved effort anyway.
+    """
+    effort = "medium"
+    if reasoning_config and isinstance(reasoning_config, dict):
+        if reasoning_config.get("enabled") is False:
+            effort = "none"
+        else:
+            raw = (reasoning_config.get("effort") or "").strip().lower()
+            raw = _LM_EFFORT_ALIASES.get(raw, raw)
+            if raw in _LM_VALID_EFFORTS:
+                effort = raw
+    if allowed_options:
+        allowed = {_LM_EFFORT_ALIASES.get(opt, opt) for opt in allowed_options}
+        if effort not in allowed:
+            return None
+    return effort
@@ -28,7 +28,6 @@ Usage in run_agent.py:

 from __future__ import annotations

-import json
 import logging
 import re
 import inspect
@@ -63,15 +62,124 @@ def sanitize_context(text: str) -> str:
    return text


-def build_memory_context_block(raw_context: str) -> str:
-    """Wrap prefetched memory in a fenced block with system note.
+class StreamingContextScrubber:
+    """Stateful scrubber for streaming text that may contain split memory-context spans.

-    The fence prevents the model from treating recalled context as user
-    discourse.  Injected at API-call time only — never persisted.
+    The one-shot ``sanitize_context`` regex cannot survive chunk boundaries:
+    a ``<memory-context>`` opened in one delta and closed in a later delta
+    leaks its payload to the UI because the non-greedy block regex needs
+    both tags in one string.  This scrubber runs a small state machine
+    across deltas, holding back partial-tag tails and discarding
+    everything inside a span (including the system-note line).
+
+    Usage::
+
+        scrubber = StreamingContextScrubber()
+        for delta in stream:
+            visible = scrubber.feed(delta)
+            if visible:
+                emit(visible)
+        trailing = scrubber.flush()  # at end of stream
+        if trailing:
+            emit(trailing)
+
+    The scrubber is re-entrant per agent instance.  Callers building new
+    top-level responses (new turn) should create a fresh scrubber or call
+    ``reset()``.
    """
+
+    _OPEN_TAG = "<memory-context>"
+    _CLOSE_TAG = "</memory-context>"
+
+    def __init__(self) -> None:
+        self._in_span: bool = False
+        self._buf: str = ""
+
+    def reset(self) -> None:
+        self._in_span = False
+        self._buf = ""
+
+    def feed(self, text: str) -> str:
+        """Return the visible portion of ``text`` after scrubbing.
+
+        Any trailing fragment that could be the start of an open/close tag
+        is held back in the internal buffer and surfaced on the next
+        ``feed()`` call or discarded/emitted by ``flush()``.
+        """
+        if not text:
+            return ""
+        buf = self._buf + text
+        self._buf = ""
+        out: list[str] = []
+
+        while buf:
+            if self._in_span:
+                idx = buf.lower().find(self._CLOSE_TAG)
+                if idx == -1:
+                    # Hold back a potential partial close tag; drop the rest
+                    held = self._max_partial_suffix(buf, self._CLOSE_TAG)
+                    self._buf = buf[-held:] if held else ""
+                    return "".join(out)
+                # Found close — skip span content + tag, continue
+                buf = buf[idx + len(self._CLOSE_TAG):]
+                self._in_span = False
+            else:
+                idx = buf.lower().find(self._OPEN_TAG)
+                if idx == -1:
+                    # No open tag — hold back a potential partial open tag
+                    held = self._max_partial_suffix(buf, self._OPEN_TAG)
+                    if held:
+                        out.append(buf[:-held])
+                        self._buf = buf[-held:]
+                    else:
+                        out.append(buf)
+                    return "".join(out)
+                # Emit text before the tag, enter span
+                if idx > 0:
+                    out.append(buf[:idx])
+                buf = buf[idx + len(self._OPEN_TAG):]
+                self._in_span = True
+
+        return "".join(out)
+
+    def flush(self) -> str:
+        """Emit any held-back buffer at end-of-stream.
+
+        If we're still inside an unterminated span the remaining content is
+        discarded (safer: leaking partial memory context is worse than a
+        truncated answer).  Otherwise the held-back partial-tag tail is
+        emitted verbatim (it turned out not to be a real tag).
+        """
+        if self._in_span:
+            self._buf = ""
+            self._in_span = False
+            return ""
+        tail = self._buf
+        self._buf = ""
+        return tail
+
+    @staticmethod
+    def _max_partial_suffix(buf: str, tag: str) -> int:
+        """Return the length of the longest buf-suffix that is a tag-prefix.
+
+        Case-insensitive.  Returns 0 if no suffix could start the tag.
+        """
+        tag_lower = tag.lower()
+        buf_lower = buf.lower()
+        max_check = min(len(buf_lower), len(tag_lower) - 1)
+        for i in range(max_check, 0, -1):
+            if tag_lower.startswith(buf_lower[-i:]):
+                return i
+        return 0
+
+
+def build_memory_context_block(raw_context: str) -> str:
+    """Wrap prefetched memory in a fenced block with system note."""
    if not raw_context or not raw_context.strip():
        return ""
    clean = sanitize_context(raw_context)
+    if clean != raw_context:
+        logger.warning("memory provider returned pre-wrapped context; stripped")
    return (
        "<memory-context>\n"
        "[System note: The following is recalled memory context, "
@@ -51,6 +51,8 @@ _PROVIDER_PREFIXES: frozenset[str] = frozenset({
    "qwen-oauth",
    "xiaomi",
    "arcee",
+    "gmi",
+    "tencent-tokenhub",
    "custom", "local",
    # Common aliases
    "google", "google-gemini", "google-ai-studio",
@@ -59,7 +61,9 @@ _PROVIDER_PREFIXES: frozenset[str] = frozenset({
    "ollama",
    "stepfun", "opencode", "zen", "go", "vercel", "kilo", "dashscope", "aliyun", "qwen",
    "mimo", "xiaomi-mimo",
+    "tencent", "tokenhub", "tencent-cloud", "tencentmaas",
    "arcee-ai", "arceeai",
+    "gmi-cloud", "gmicloud",
    "xai", "x-ai", "x.ai", "grok",
    "nvidia", "nim", "nvidia-nim", "nemotron",
    "qwen-portal",
@@ -206,6 +210,8 @@ DEFAULT_CONTEXT_LENGTHS = {
    "grok": 131072,             # catch-all (grok-beta, unknown grok-*)
    # Kimi
    "kimi": 262144,
+    # Tencent — Hy3 Preview (Hunyuan) with 256K context window
+    "hy3-preview": 256000,
    # Nemotron — NVIDIA's open-weights series (128K context across all sizes)
    "nemotron": 131072,
    # Arcee
@@ -307,6 +313,8 @@ _URL_TO_PROVIDER: Dict[str, str] = {
    "integrate.api.nvidia.com": "nvidia",
    "api.xiaomimimo.com": "xiaomi",
    "xiaomimimo.com": "xiaomi",
+    "api.gmi-serving.com": "gmi",
+    "tokenhub.tencentmaas.com": "tencent-tokenhub",
    "ollama.com": "ollama-cloud",
 }

@@ -617,8 +625,6 @@ def fetch_endpoint_model_metadata(
                        if isinstance(ctx, int) and ctx > 0:
                            context_length = ctx
                            break
-                    if context_length is None:
-                        context_length = _extract_context_length(model)
                    if context_length is not None:
                        entry["context_length"] = context_length

@@ -702,6 +708,29 @@ def fetch_endpoint_model_metadata(
    return {}


+def _resolve_endpoint_context_length(
+    model: str,
+    base_url: str,
+    api_key: str = "",
+) -> Optional[int]:
+    """Resolve context length from an endpoint's live ``/models`` metadata."""
+    endpoint_metadata = fetch_endpoint_model_metadata(base_url, api_key=api_key)
+    matched = endpoint_metadata.get(model)
+    if not matched:
+        if len(endpoint_metadata) == 1:
+            matched = next(iter(endpoint_metadata.values()))
+        else:
+            for key, entry in endpoint_metadata.items():
+                if model in key or key in model:
+                    matched = entry
+                    break
+    if matched:
+        context_length = matched.get("context_length")
+        if isinstance(context_length, int):
+            return context_length
+    return None
+
+
 def _get_context_cache_path() -> Path:
    """Return path to the persistent context length cache file."""
    from hermes_constants import get_hermes_home
@@ -985,10 +1014,7 @@ def _query_local_context_length(model: str, base_url: str, api_key: str = "") ->
                                ctx = cfg.get("context_length")
                                if ctx and isinstance(ctx, (int, float)):
                                    return int(ctx)
-                            # Fall back to max_context_length (theoretical model max)
-                            ctx = m.get("max_context_length") or m.get("context_length")
-                            if ctx and isinstance(ctx, (int, float)):
-                                return int(ctx)
+                            break

            # LM Studio / vLLM / llama.cpp: try /v1/models/{model}
            resp = client.get(f"{server_url}/v1/models/{model}")
@@ -1250,7 +1276,10 @@ def get_model_context_length(
    model = _strip_provider_prefix(model)

    # 1. Check persistent cache (model+provider)
-    if base_url:
+    # LM Studio is excluded — its loaded context length is transient (the
+    # user can reload the model with a different context_length at any time
+    # via /api/v1/models/load), so a stale cached value would mask reloads.
+    if base_url and provider != "lmstudio":
        cached = get_cached_context_length(model, base_url)
        if cached is not None:
            # Invalidate stale Codex OAuth cache entries: pre-PR #14935 builds
@@ -1295,28 +1324,16 @@ def get_model_context_length(
    # returns 128k) instead of the model's full context (400k).  models.dev
    # has the correct per-provider values and is checked at step 5+.
    if _is_custom_endpoint(base_url) and not _is_known_provider_base_url(base_url):
-        endpoint_metadata = fetch_endpoint_model_metadata(base_url, api_key=api_key)
-        matched = endpoint_metadata.get(model)
-        if not matched:
-            # Single-model servers: if only one model is loaded, use it
-            if len(endpoint_metadata) == 1:
-                matched = next(iter(endpoint_metadata.values()))
-            else:
-                # Fuzzy match: substring in either direction
-                for key, entry in endpoint_metadata.items():
-                    if model in key or key in model:
-                        matched = entry
-                        break
-        if matched:
-            context_length = matched.get("context_length")
-            if isinstance(context_length, int):
-                return context_length
+        context_length = _resolve_endpoint_context_length(model, base_url, api_key=api_key)
+        if context_length is not None:
+            return context_length
        if not _is_known_provider_base_url(base_url):
            # 3. Try querying local server directly
            if is_local_endpoint(base_url):
                local_ctx = _query_local_context_length(model, base_url, api_key=api_key)
                if local_ctx and local_ctx > 0:
-                    save_context_length(model, base_url, local_ctx)
+                    if provider != "lmstudio":
+                        save_context_length(model, base_url, local_ctx)
                    return local_ctx
            logger.info(
                "Could not detect context length for model %r at %s — "
@@ -1374,6 +1391,12 @@ def get_model_context_length(
            if base_url:
                save_context_length(model, base_url, codex_ctx)
            return codex_ctx
+    if effective_provider == "gmi" and base_url:
+        # GMI exposes authoritative context_length via /models, but it is not
+        # in models.dev yet. Preserve that higher-fidelity endpoint lookup.
+        ctx = _resolve_endpoint_context_length(model, base_url, api_key=api_key)
+        if ctx is not None:
+            return ctx
    if effective_provider:
        from agent.models_dev import lookup_models_dev_context
        ctx = lookup_models_dev_context(effective_provider, model)
@@ -1400,7 +1423,8 @@ def get_model_context_length(
    if base_url and is_local_endpoint(base_url):
        local_ctx = _query_local_context_length(model, base_url, api_key=api_key)
        if local_ctx and local_ctx > 0:
-            save_context_length(model, base_url, local_ctx)
+            if provider != "lmstudio":
+                save_context_length(model, base_url, local_ctx)
            return local_ctx

    # 10. Default fallback — 128K
@@ -18,6 +18,7 @@ import os
 import tempfile
 import time
 from typing import Any, Mapping, Optional
+from utils import atomic_replace

 logger = logging.getLogger(__name__)

@@ -118,7 +119,7 @@ def record_nous_rate_limit(
        try:
            with os.fdopen(fd, "w") as f:
                json.dump(state, f)
-            os.replace(tmp_path, path)
+            atomic_replace(tmp_path, path)
        except Exception:
            # Clean up temp file on failure
            try:
@@ -141,6 +141,12 @@ DEFAULT_AGENT_IDENTITY = (
    "Be targeted and efficient in your exploration and investigations."
 )

+HERMES_AGENT_HELP_GUIDANCE = (
+    "If the user asks about configuring, setting up, or using Hermes Agent "
+    "itself, load the `hermes-agent` skill with skill_view(name='hermes-agent') "
+    "before answering. Docs: https://hermes-agent.nousresearch.com/docs"
+)
+
 MEMORY_GUIDANCE = (
    "You have persistent memory across sessions. Save durable facts using the memory "
    "tool: user preferences, environment details, tool quirks, and stable conventions. "
@@ -304,6 +310,10 @@ PLATFORM_HINTS = {
        "Standard markdown is automatically converted to Telegram format. "
        "Supported: **bold**, *italic*, ~~strikethrough~~, ||spoiler||, "
        "`inline code`, ```code blocks```, [links](url), and ## headers. "
+        "Telegram has NO table syntax — prefer bullet lists or labeled "
+        "key: value pairs over pipe tables (any tables you do emit are "
+        "auto-rewritten into row-group bullets, which you can produce "
+        "directly for cleaner output). "
        "You can send media files natively: to deliver a file to the user, "
        "include MEDIA:/absolute/path/to/file in your response. Images "
        "(.png, .jpg, .webp) appear as photos, audio (.ogg) sends as voice "
@@ -56,8 +56,12 @@ _SENSITIVE_BODY_KEYS = frozenset({
 })

 # Snapshot at import time so runtime env mutations (e.g. LLM-generated
-# `export HERMES_REDACT_SECRETS=false`) cannot disable redaction mid-session.
-_REDACT_ENABLED = os.getenv("HERMES_REDACT_SECRETS", "").lower() not in ("0", "false", "no", "off")
+# `export HERMES_REDACT_SECRETS=true`) cannot enable/disable redaction
+# mid-session.  OFF by default — user must opt in via
+# `security.redact_secrets: true` in config.yaml (bridged to this env var
+# in hermes_cli/main.py and gateway/run.py) or `HERMES_REDACT_SECRETS=true`
+# in ~/.hermes/.env.
+_REDACT_ENABLED = os.getenv("HERMES_REDACT_SECRETS", "").lower() in ("1", "true", "yes", "on")

 # Known API key prefixes -- match the prefix + contiguous token chars
 _PREFIX_PATTERNS = [
@@ -180,11 +184,59 @@ _PREFIX_RE = re.compile(
 )


+def mask_secret(
+    value: str,
+    *,
+    head: int = 4,
+    tail: int = 4,
+    floor: int = 12,
+    placeholder: str = "***",
+    empty: str = "",
+) -> str:
+    """Mask a secret for display, preserving ``head`` and ``tail`` characters.
+
+    Canonical helper for display-time redaction across Hermes — used by
+    ``hermes config``, ``hermes status``, ``hermes dump``, and anywhere
+    a secret needs to be shown truncated for debuggability while still
+    keeping the bulk hidden.
+
+    Args:
+        value:       The secret to mask. ``None``/empty returns ``empty``.
+        head:        Leading characters to preserve. Default 4.
+        tail:        Trailing characters to preserve. Default 4.
+        floor:       Values shorter than ``head + tail + floor_margin`` are
+                     fully masked (returns ``placeholder``). Default 12 —
+                     matches the existing config/status/dump convention.
+        placeholder: Value returned for too-short inputs. Default ``"***"``.
+        empty:       Value returned when ``value`` is falsy (None, ""). The
+                     caller can override this to e.g. ``color("(not set)",
+                     Colors.DIM)`` for user-facing display.
+
+    Examples:
+        >>> mask_secret("sk-proj-abcdef1234567890")
+        'sk-p...7890'
+        >>> mask_secret("short")                         # fully masked
+        '***'
+        >>> mask_secret("")                              # empty default
+        ''
+        >>> mask_secret("", empty="(not set)")           # empty override
+        '(not set)'
+        >>> mask_secret("long-token", head=6, tail=4, floor=18)
+        '***'
+    """
+    if not value:
+        return empty
+    if len(value) < floor:
+        return placeholder
+    return f"{value[:head]}...{value[-tail:]}"
+
+
 def _mask_token(token: str) -> str:
-    """Mask a token, preserving prefix for long tokens."""
-    if len(token) < 18:
+    """Mask a log token — conservative 18-char floor, preserves 6 prefix / 4 suffix."""
+    # Empty input: historically this returned "***" rather than "". Preserve.
+    if not token:
        return "***"
-    return f"{token[:6]}...{token[-4:]}"
+    return mask_secret(token, head=6, tail=4, floor=18)


 def _redact_query_string(query: str) -> str:
@@ -257,7 +309,7 @@ def redact_sensitive_text(text: str) -> str:
    """Apply all redaction patterns to a block of text.

    Safe to call on any string -- non-matching text passes through unchanged.
-    Disabled when security.redact_secrets is false in config.yaml.
+    Disabled by default — enable via security.redact_secrets: true in config.yaml.
    """
    if text is None:
        return None
@@ -76,6 +76,7 @@ except ImportError:  # pragma: no cover
    fcntl = None  # type: ignore[assignment]

 from hermes_constants import get_hermes_home
+from utils import atomic_replace

 logger = logging.getLogger(__name__)

@@ -568,7 +569,7 @@ def save_allowlist(data: Dict[str, Any]) -> None:
        try:
            with os.fdopen(fd, "w") as fh:
                fh.write(json.dumps(data, indent=2, sort_keys=True))
-            os.replace(tmp_path, p)
+            atomic_replace(tmp_path, p)
        except Exception:
            try:
                os.unlink(tmp_path)
@@ -30,10 +30,12 @@ def generate_title(
    assistant_response: str,
    timeout: float = 30.0,
    failure_callback: Optional[FailureCallback] = None,
+    main_runtime: dict = None,
 ) -> Optional[str]:
    """Generate a session title from the first exchange.

-    Uses the auxiliary LLM client (cheapest/fastest available model).
+    Uses the main runtime's model when available, falling back to the
+    auxiliary LLM client (cheapest/fastest available model).
    Returns the title string or None on failure.

    ``failure_callback`` is invoked with ``(task, exception)`` when the
@@ -57,6 +59,7 @@ def generate_title(
            max_tokens=500,
            temperature=0.3,
            timeout=timeout,
+            main_runtime=main_runtime,
        )
        title = (response.choices[0].message.content or "").strip()
        # Clean up: remove quotes, trailing punctuation, prefixes like "Title: "
@@ -86,6 +89,7 @@ def auto_title_session(
    user_message: str,
    assistant_response: str,
    failure_callback: Optional[FailureCallback] = None,
+    main_runtime: dict = None,
 ) -> None:
    """Generate and set a session title if one doesn't already exist.

@@ -107,7 +111,7 @@ def auto_title_session(
        return

    title = generate_title(
-        user_message, assistant_response, failure_callback=failure_callback
+        user_message, assistant_response, failure_callback=failure_callback, main_runtime=main_runtime
    )
    if not title:
        return
@@ -126,6 +130,7 @@ def maybe_auto_title(
    assistant_response: str,
    conversation_history: list,
    failure_callback: Optional[FailureCallback] = None,
+    main_runtime: dict = None,
 ) -> None:
    """Fire-and-forget title generation after the first exchange.

@@ -147,7 +152,7 @@ def maybe_auto_title(
    thread = threading.Thread(
        target=auto_title_session,
        args=(session_db, session_id, user_message, assistant_response),
-        kwargs={"failure_callback": failure_callback},
+        kwargs={"failure_callback": failure_callback, "main_runtime": main_runtime},
        daemon=True,
        name="auto-title",
    )
@@ -85,9 +85,6 @@ class AnthropicTransport(ProviderTransport):
        from agent.anthropic_adapter import _to_plain_data
        from agent.transports.types import ToolCall

-        strip_tool_prefix = kwargs.get("strip_tool_prefix", False)
-        _MCP_PREFIX = "mcp_"
-
        text_parts = []
        reasoning_parts = []
        reasoning_details = []
@@ -102,13 +99,10 @@ class AnthropicTransport(ProviderTransport):
                if isinstance(block_dict, dict):
                    reasoning_details.append(block_dict)
            elif block.type == "tool_use":
-                name = block.name
-                if strip_tool_prefix and name.startswith(_MCP_PREFIX):
-                    name = name[len(_MCP_PREFIX):]
                tool_calls.append(
                    ToolCall(
                        id=block.id,
-                        name=name,
+                        name=block.name,
                        arguments=json.dumps(block.input),
                    )
                )
@@ -12,12 +12,65 @@ reasoning configuration, temperature handling, and extra_body assembly.
 import copy
 from typing import Any, Dict, List, Optional

+from agent.lmstudio_reasoning import resolve_lmstudio_effort
 from agent.moonshot_schema import is_moonshot_model, sanitize_moonshot_tools
 from agent.prompt_builder import DEVELOPER_ROLE_MODELS
 from agent.transports.base import ProviderTransport
 from agent.transports.types import NormalizedResponse, ToolCall, Usage


+def _build_gemini_thinking_config(model: str, reasoning_config: dict | None) -> dict | None:
+    """Translate Hermes/OpenRouter-style reasoning config to Gemini thinkingConfig.
+
+    Gemini native/cloud-code adapters do not read ``extra_body.reasoning``.
+    They only inspect ``extra_body.thinking_config`` / ``thinkingConfig`` and
+    then request thought parts with ``includeThoughts`` enabled.
+    """
+    if reasoning_config is None or not isinstance(reasoning_config, dict):
+        return None
+
+    if reasoning_config.get("enabled") is False:
+        # Gemini can hide thought parts even when internal thinking still
+        # happens; omit thinkingLevel to avoid model-specific validation quirks.
+        return {"includeThoughts": False}
+
+    effort = str(reasoning_config.get("effort", "medium") or "medium").strip().lower()
+    if effort == "none":
+        return {"includeThoughts": False}
+
+    thinking_config: Dict[str, Any] = {"includeThoughts": True}
+    normalized_model = (model or "").strip().lower()
+    if normalized_model.startswith("google/"):
+        normalized_model = normalized_model.split("/", 1)[1]
+
+    # Gemini 2.5 accepts thinkingBudget; don't guess a budget from Hermes'
+    # coarse effort levels. ``includeThoughts`` alone is enough to surface
+    # thought parts without risking request validation errors.
+    if normalized_model.startswith("gemini-2.5-"):
+        return thinking_config
+
+    if effort not in {"minimal", "low", "medium", "high", "xhigh"}:
+        effort = "medium"
+
+    # Gemini 3 Flash documents low/medium/high thinking levels; Gemini 3 Pro
+    # is stricter (low/high). Clamp Hermes' wider effort set to what each
+    # family accepts so we never forward an undocumented level verbatim.
+    if normalized_model.startswith(("gemini-3", "gemini-3.1")):
+        if "flash" in normalized_model:
+            if effort in {"minimal", "low"}:
+                thinking_config["thinkingLevel"] = "low"
+            elif effort in {"high", "xhigh"}:
+                thinking_config["thinkingLevel"] = "high"
+            else:
+                thinking_config["thinkingLevel"] = "medium"
+        elif "pro" in normalized_model:
+            thinking_config["thinkingLevel"] = (
+                "high" if effort in {"high", "xhigh"} else "low"
+            )
+
+    return thinking_config
+
+
 class ChatCompletionsTransport(ProviderTransport):
    """Transport for api_mode='chat_completions'.

@@ -101,6 +154,7 @@ class ChatCompletionsTransport(ProviderTransport):
            is_github_models: bool
            is_nvidia_nim: bool
            is_kimi: bool
+            is_lmstudio: bool
            is_custom_provider: bool
            ollama_num_ctx: int | None
            # Provider routing
@@ -114,6 +168,7 @@ class ChatCompletionsTransport(ProviderTransport):
            # Reasoning
            supports_reasoning: bool
            github_reasoning_extra: dict | None
+            lmstudio_reasoning_options: list[str] | None  # raw allowed_options from /api/v1/models
            # Claude on OpenRouter/Nous max output
            anthropic_max_output: int | None
            # Extra
@@ -188,6 +243,7 @@ class ChatCompletionsTransport(ProviderTransport):
        anthropic_max_out = params.get("anthropic_max_output")
        is_nvidia_nim = params.get("is_nvidia_nim", False)
        is_kimi = params.get("is_kimi", False)
+        is_tokenhub = params.get("is_tokenhub", False)
        reasoning_config = params.get("reasoning_config")

        if ephemeral is not None and max_tokens_fn:
@@ -219,12 +275,40 @@ class ChatCompletionsTransport(ProviderTransport):
                        _kimi_effort = _e
                api_kwargs["reasoning_effort"] = _kimi_effort

+        # Tencent TokenHub: top-level reasoning_effort (unless thinking disabled)
+        if is_tokenhub:
+            _tokenhub_thinking_off = bool(
+                reasoning_config
+                and isinstance(reasoning_config, dict)
+                and reasoning_config.get("enabled") is False
+            )
+            if not _tokenhub_thinking_off:
+                _tokenhub_effort = "high"
+                if reasoning_config and isinstance(reasoning_config, dict):
+                    _e = (reasoning_config.get("effort") or "").strip().lower()
+                    if _e in ("low", "medium", "high"):
+                        _tokenhub_effort = _e
+                api_kwargs["reasoning_effort"] = _tokenhub_effort
+
+        # LM Studio: top-level reasoning_effort. Only emit when the model
+        # declares reasoning support via /api/v1/models capabilities (gated
+        # upstream by params["supports_reasoning"]). resolve_lmstudio_effort
+        # is shared with run_agent's summary path so both stay in sync.
+        if params.get("is_lmstudio", False) and params.get("supports_reasoning", False):
+            _lm_effort = resolve_lmstudio_effort(
+                reasoning_config,
+                params.get("lmstudio_reasoning_options"),
+            )
+            if _lm_effort is not None:
+                api_kwargs["reasoning_effort"] = _lm_effort
+
        # extra_body assembly
        extra_body: Dict[str, Any] = {}

        is_openrouter = params.get("is_openrouter", False)
        is_nous = params.get("is_nous", False)
        is_github_models = params.get("is_github_models", False)
+        provider_name = str(params.get("provider_name") or "").strip().lower()

        provider_prefs = params.get("provider_preferences")
        if provider_prefs and is_openrouter:
@@ -240,8 +324,9 @@ class ChatCompletionsTransport(ProviderTransport):
                "type": "enabled" if _kimi_thinking_enabled else "disabled",
            }

-        # Reasoning
-        if params.get("supports_reasoning", False):
+        # Reasoning. LM Studio is handled above via top-level reasoning_effort,
+        # so skip emitting extra_body.reasoning for it.
+        if params.get("supports_reasoning", False) and not params.get("is_lmstudio", False):
            if is_github_models:
                gh_reasoning = params.get("github_reasoning_extra")
                if gh_reasoning is not None:
@@ -277,6 +362,11 @@ class ChatCompletionsTransport(ProviderTransport):
        if is_qwen:
            extra_body["vl_high_resolution_images"] = True

+        if provider_name in {"gemini", "google-gemini-cli"}:
+            thinking_config = _build_gemini_thinking_config(model, reasoning_config)
+            if thinking_config:
+                extra_body["thinking_config"] = thinking_config
+
        # Merge any pre-built extra_body additions
        additions = params.get("extra_body_additions")
        if additions:
@@ -8,7 +8,7 @@ streaming, or the _run_codex_stream() call path.
 from typing import Any, Dict, List, Optional

 from agent.transports.base import ProviderTransport
-from agent.transports.types import NormalizedResponse, ToolCall, Usage
+from agent.transports.types import NormalizedResponse, ToolCall


 class ResponsesApiTransport(ProviderTransport):
@@ -151,8 +151,6 @@ class ResponsesApiTransport(ProviderTransport):
        """Normalize Codex Responses API response to NormalizedResponse."""
        from agent.codex_responses_adapter import (
            _normalize_codex_response,
-            _extract_responses_message_text,
-            _extract_responses_reasoning_text,
        )

        # _normalize_codex_response returns (SimpleNamespace, finish_reason_str)
@@ -30,14 +30,13 @@ model:
  #   "ollama-cloud" - Ollama Cloud (requires: OLLAMA_API_KEY — https://ollama.com/settings)
  #   "kilocode"     - KiloCode gateway (requires: KILOCODE_API_KEY)
  #   "ai-gateway"   - Vercel AI Gateway (requires: AI_GATEWAY_API_KEY)
+  #   "lmstudio"     - LM Studio local server (optional: LM_API_KEY, defaults to http://127.0.0.1:1234/v1)
  #
  # Local servers (LM Studio, Ollama, vLLM, llama.cpp):
-  #   "custom"       - Any OpenAI-compatible endpoint. Set base_url below.
-  #   Aliases: "lmstudio", "ollama", "vllm", "llamacpp" all map to "custom".
-  #   Example for LM Studio:
-  #     provider: "lmstudio"
-  #     base_url: "http://localhost:1234/v1"
-  #   No API key needed — local servers typically ignore auth.
+  #   "custom"       - Any other OpenAI-compatible endpoint. Set base_url below.
+  #   Aliases: "ollama", "vllm", "llamacpp" all map to "custom".
+  #   LM Studio is first-class and uses provider: "lmstudio".
+  #   It works with both no-auth and auth-enabled server modes.
  #
  # Can also be overridden with --provider flag or HERMES_INFERENCE_PROVIDER env var.
  provider: "auto"
@@ -928,7 +927,7 @@ display:
  #     agent_name: "My Agent"               # Banner title and branding
  #     welcome: "Welcome message"           # Shown at CLI startup
  #     response_label: " ⚔ Agent "         # Response box header label
-  #     prompt_symbol: "⚔ ❯ "              # Prompt symbol
+  #     prompt_symbol: "⚔"                  # Prompt symbol (bare token; renderers add trailing space)
  #   tool_prefix: "╎"                       # Tool output line prefix (default: ┊)
  #
  skin: default
@@ -69,7 +69,9 @@ from agent.usage_pricing import (
    format_duration_compact,
    format_token_count_compact,
 )
-from agent.account_usage import fetch_account_usage, render_account_usage_lines
+# NOTE: `from agent.account_usage import ...` is deliberately NOT at module
+# top — it transitively pulls the OpenAI SDK chain (~230 ms cold) and is only
+# needed when the user runs `/limits`. Lazy-imported inside the handler below.
 from hermes_cli.banner import _format_context_length, format_banner_version_label

 _COMMAND_SPINNER_FRAMES = ("⠋", "⠙", "⠹", "⠸", "⠼", "⠴", "⠦", "⠧", "⠇", "⠏")
@@ -759,9 +761,17 @@ def _run_cleanup():
        pass
    try:
        if _active_agent_ref and hasattr(_active_agent_ref, 'shutdown_memory_provider'):
-            _active_agent_ref.shutdown_memory_provider(
-                getattr(_active_agent_ref, 'conversation_history', None) or []
-            )
+            # Forward the agent's own transcript so memory providers'
+            # ``on_session_end`` hooks see the real conversation instead of
+            # an empty list (#15165). ``_session_messages`` is set on
+            # ``AIAgent.__init__`` and refreshed every turn via
+            # ``_persist_session``. Fall back to no-arg on test stubs /
+            # partially-initialised agents where the attribute is missing.
+            _session_msgs = getattr(_active_agent_ref, '_session_messages', None)
+            if isinstance(_session_msgs, list):
+                _active_agent_ref.shutdown_memory_provider(_session_msgs)
+            else:
+                _active_agent_ref.shutdown_memory_provider()
    except Exception:
        pass

@@ -1574,6 +1584,34 @@ def _strip_leaked_bracketed_paste_wrappers(text: str) -> str:
    return text


+# Cursor Position Report (CPR / DSR) response, format ``ESC[<row>;<col>R``.
+# prompt_toolkit's _on_resize() + renderer send ``ESC[6n`` queries to the
+# terminal; under resize storms or tab switches the terminal's reply can
+# race past the input parser and end up in the input buffer as literal
+# text (see issue #14692). Also matches the visible-form ``^[[<row>;<col>R``
+# that appears when the ESC byte was stripped by a prior filter.
+_DSR_CPR_ESC_RE = re.compile(r"\x1b\[\d+;\d+R")
+_DSR_CPR_VISIBLE_RE = re.compile(r"\^\[\[\d+;\d+R")
+
+
+def _strip_leaked_terminal_responses(text: str) -> str:
+    """Strip leaked terminal control-response sequences from user input.
+
+    Covers Cursor Position Report (CPR / DSR) responses — ``ESC[<row>;<col>R``
+    and the visible ``^[[<row>;<col>R`` form. These are replies the terminal
+    sends back to queries prompt_toolkit makes during ``_on_resize`` /
+    ``_request_absolute_cursor_position``. When the input parser drops one
+    (resize storms, multiplexer focus changes, slow PTYs) the response
+    lands in the input buffer as literal text and corrupts what the user
+    typed.
+    """
+    if not text:
+        return text
+    text = _DSR_CPR_ESC_RE.sub("", text)
+    text = _DSR_CPR_VISIBLE_RE.sub("", text)
+    return text
+
+
 def _collect_query_images(query: str | None, image_arg: str | None = None) -> tuple[str, list[Path]]:
    """Collect local image attachments for single-query CLI flows."""
    message = query or ""
@@ -2182,6 +2220,42 @@ class HermesCLI:
            self._last_invalidate = now
            self._app.invalidate()

+    def _force_full_redraw(self) -> None:
+        """Force a clean full-screen repaint of the prompt_toolkit UI.
+
+        Used to recover from terminal buffer drift caused by external
+        redraws we can't detect — e.g. macOS cmux / tmux tab switches,
+        ``clear`` issued from a subshell, or SSH window restores. These
+        wipe or repaint the terminal without firing SIGWINCH, so
+        prompt_toolkit's tracked ``_cursor_pos`` no longer matches reality
+        and the next incremental redraw stacks on top of stale content
+        (ghost status bars, duplicated prompts).
+
+        Bound to Ctrl+L and exposed as the ``/redraw`` slash command,
+        matching the standard terminal-UX convention (bash, zsh, fish,
+        vim, htop).
+        """
+        app = getattr(self, "_app", None)
+        if not app:
+            return
+        try:
+            renderer = app.renderer
+            out = renderer.output
+            out.reset_attributes()
+            out.erase_screen()
+            out.cursor_goto(0, 0)
+            out.flush()
+            # Drop prompt_toolkit's cached screen + cursor state so the
+            # next _redraw() starts from a known (0, 0) origin and
+            # re-renders every cell rather than diffing against stale.
+            renderer.reset(leave_alternate_screen=False)
+        except Exception:
+            pass
+        try:
+            app.invalidate()
+        except Exception:
+            pass
+
    def _status_bar_context_style(self, percent_used: Optional[int]) -> str:
        if percent_used is None:
            return "class:status-bar-dim"
@@ -5385,6 +5459,8 @@ class HermesCLI:
            try:
                providers = list_authenticated_providers(
                    current_provider=self.provider or "",
+                    current_base_url=self.base_url or "",
+                    current_model=self.model or "",
                    user_providers=user_provs,
                    custom_providers=custom_provs,
                    max_models=50,
@@ -5928,6 +6004,7 @@ class HermesCLI:
            platform_status = {
                Platform.TELEGRAM: ("Telegram", "TELEGRAM_BOT_TOKEN"),
                Platform.DISCORD: ("Discord", "DISCORD_BOT_TOKEN"),
+                Platform.SLACK: ("Slack", "SLACK_BOT_TOKEN"),
                Platform.WHATSAPP: ("WhatsApp", "WHATSAPP_ENABLED"),
            }
            
@@ -5998,6 +6075,12 @@ class HermesCLI:
            self.show_toolsets()
        elif canonical == "config":
            self.show_config()
+        elif canonical == "redraw":
+            # Manual recovery for terminal buffer drift from multiplexer
+            # tab switches, subshell ``clear``, SSH window restores, etc.
+            # See issue #8688 (cmux). Ctrl+L is bound to the same helper.
+            self._force_full_redraw()
+            _cprint(f"  {_DIM}✓ UI redrawn{_RST}")
        elif canonical == "clear":
            self.new_session(silent=True)
            # Clear terminal screen.  Inside the TUI, Rich's console.clear()
@@ -6153,6 +6236,8 @@ class HermesCLI:
            self._console_print(f"  Status bar {state}")
        elif canonical == "verbose":
            self._toggle_verbose()
+        elif canonical == "footer":
+            self._handle_footer_command(cmd_original)
        elif canonical == "yolo":
            self._toggle_yolo()
        elif canonical == "reasoning":
@@ -6780,6 +6865,58 @@ class HermesCLI:
        if self._apply_tui_skin_style():
            print("  Prompt + TUI colors updated.")

+    def _handle_footer_command(self, cmd_original: str) -> None:
+        """Toggle or inspect ``display.runtime_footer.enabled`` from the CLI.
+
+        Usage:
+            /footer           → toggle
+            /footer on|off    → explicit
+            /footer status    → show current state
+        """
+        from hermes_cli.config import load_config
+        from hermes_cli.colors import Colors as _Colors
+
+        # Parse arg
+        arg = ""
+        try:
+            parts = (cmd_original or "").strip().split(None, 1)
+            if len(parts) > 1:
+                arg = parts[1].strip().lower()
+        except Exception:
+            arg = ""
+
+        cfg = load_config() or {}
+        footer_cfg = ((cfg.get("display") or {}).get("runtime_footer") or {})
+        current = bool(footer_cfg.get("enabled", False))
+        fields = footer_cfg.get("fields") or ["model", "context_pct", "cwd"]
+
+        if arg in ("status", "?"):
+            state = "ON" if current else "OFF"
+            _cprint(
+                f"  {_Colors.BOLD}Runtime footer:{_Colors.RESET} {state}\n"
+                f"  Fields: {', '.join(fields)}"
+            )
+            return
+
+        if arg in ("on", "enable", "true", "1"):
+            new_state = True
+        elif arg in ("off", "disable", "false", "0"):
+            new_state = False
+        elif arg == "":
+            new_state = not current
+        else:
+            _cprint("  Usage: /footer [on|off|status]")
+            return
+
+        if save_config_value("display.runtime_footer.enabled", new_state):
+            state = (
+                f"{_Colors.GREEN}ON{_Colors.RESET}" if new_state
+                else f"{_Colors.DIM}OFF{_Colors.RESET}"
+            )
+            _cprint(f"  Runtime footer: {state}")
+        else:
+            _cprint("  Failed to save runtime_footer setting to config.yaml")
+
    def _toggle_verbose(self):
        """Cycle tool progress mode: off → new → all → verbose → off."""
        cycle = ["off", "new", "all", "verbose"]
@@ -7020,9 +7157,15 @@ class HermesCLI:
                else:
                    print(f"🗜️  Compressing {original_count} messages (~{approx_tokens:,} tokens)...")

+                # Pass None as system_message so _compress_context rebuilds
+                # the system prompt from scratch via _build_system_prompt(None).
+                # Passing _cached_system_prompt caused duplication because
+                # _build_system_prompt appends system_message to prompt_parts
+                # which already contain the agent identity — resulting in the
+                # identity block appearing twice (issue #15281).
                compressed, _ = self.agent._compress_context(
                    original_history,
-                    self.agent._cached_system_prompt or "",
+                    None,
                    approx_tokens=approx_tokens,
                    focus_topic=focus_topic or None,
                )
@@ -7146,6 +7289,8 @@ class HermesCLI:
        provider = getattr(agent, "provider", None) or getattr(self, "provider", None)
        base_url = getattr(agent, "base_url", None) or getattr(self, "base_url", None)
        api_key = getattr(agent, "api_key", None) or getattr(self, "api_key", None)
+        # Lazy import — pulls the OpenAI SDK chain, only needed here.
+        from agent.account_usage import fetch_account_usage, render_account_usage_lines
        account_snapshot = None
        if provider:
            with concurrent.futures.ThreadPoolExecutor(max_workers=1) as _pool:
@@ -8363,13 +8508,62 @@ class HermesCLI:
        ):
            return None
        
-        # Pre-process images through the vision tool (Gemini Flash) so the
-        # main model receives text descriptions instead of raw base64 image
-        # content — works with any model, not just vision-capable ones.
+        # Route image attachments based on the active model's vision capability.
+        # "native" → pass pixels as OpenAI-style content parts (adapters
+        #            translate for Anthropic/Gemini/Bedrock).
+        # "text"   → pre-analyze each image with vision_analyze and prepend the
+        #            description as text — works with non-vision models.
+        # See agent/image_routing.py for the decision table.
        if images:
-            message = self._preprocess_images_with_vision(
-                message if isinstance(message, str) else "", images
-            )
+            try:
+                from agent.image_routing import (
+                    build_native_content_parts,
+                    decide_image_input_mode,
+                )
+                from hermes_cli.config import load_config
+
+                _img_mode = decide_image_input_mode(
+                    (self.provider or "").strip(),
+                    (self.model or "").strip(),
+                    load_config(),
+                )
+            except Exception as _img_exc:
+                logging.debug("image_routing decision failed, defaulting to text: %s", _img_exc)
+                _img_mode = "text"
+
+            if _img_mode == "native":
+                try:
+                    _text_for_parts = message if isinstance(message, str) else ""
+                    _img_str_paths = [str(p) for p in images]
+                    _parts, _skipped = build_native_content_parts(
+                        _text_for_parts,
+                        _img_str_paths,
+                    )
+                    if _skipped:
+                        _cprint(
+                            f"  {_DIM}⚠ skipped {len(_skipped)} unreadable image path(s){_RST}"
+                        )
+                    if any(p.get("type") == "image_url" for p in _parts):
+                        _img_names = ", ".join(Path(p).name for p in _img_str_paths)
+                        _cprint(
+                            f"  {_DIM}📎 attaching {len(images)} image(s) natively "
+                            f"(model supports vision): {_img_names}{_RST}"
+                        )
+                        message = _parts
+                    else:
+                        # All images unreadable — fall back to text enrichment.
+                        message = self._preprocess_images_with_vision(
+                            message if isinstance(message, str) else "", images
+                        )
+                except Exception as _img_exc:
+                    logging.warning("native image attach failed, falling back to text: %s", _img_exc)
+                    message = self._preprocess_images_with_vision(
+                        message if isinstance(message, str) else "", images
+                    )
+            else:
+                message = self._preprocess_images_with_vision(
+                    message if isinstance(message, str) else "", images
+                )

        # Expand @ context references (e.g. @file:main.py, @diff, @folder:src/)
        if isinstance(message, str) and "@" in message:
@@ -8686,6 +8880,13 @@ class HermesCLI:
                        response,
                        self.conversation_history,
                        failure_callback=_title_failure_cb,
+                        main_runtime={
+                            "model": self.model,
+                            "provider": self.provider,
+                            "base_url": self.base_url,
+                            "api_key": self.api_key,
+                            "api_mode": self.api_mode,
+                        },
                    )
                except Exception:
                    pass
@@ -9563,6 +9764,17 @@ class HermesCLI:
            """Down arrow: browse history when on last line, else move cursor down."""
            event.app.current_buffer.auto_down(count=event.arg)

+        @kb.add('c-l')
+        def handle_ctrl_l(event):
+            """Ctrl+L: force a clean full-screen repaint.
+
+            Recovers the UI after external terminal buffer drift — tmux /
+            cmux tab switches, ``clear`` from a subshell, SSH window
+            restores, etc. — that prompt_toolkit can't detect on its own.
+            Matches the universal bash/zsh/fish/vim/htop convention.
+            """
+            self._force_full_redraw()
+
        @kb.add('c-c')
        def handle_ctrl_c(event):
            """Handle Ctrl+C - cancel interactive prompts, interrupt agent, or exit.
@@ -9790,11 +10002,18 @@ class HermesCLI:
            placeholder while preserving any existing user text in the
            buffer.
            """
+            # Diagnostic canary: measure how long the paste handler blocks
+            # the prompt_toolkit event loop. If this exceeds ~500ms we log
+            # it so recurring "CLI freezes on paste" reports (issue #16263,
+            # macOS Tahoe 26 + iTerm2/Ghostty) arrive with data attached.
+            _paste_handler_start = time.perf_counter()
+            _paste_raw_size = len(event.data or "")
            pasted_text = event.data or ""
            # Normalise line endings — Windows \r\n and old Mac \r both become \n
            # so the 5-line collapse threshold and display are consistent.
            pasted_text = pasted_text.replace('\r\n', '\n').replace('\r', '\n')
            pasted_text = _strip_leaked_bracketed_paste_wrappers(pasted_text)
+            pasted_text = _strip_leaked_terminal_responses(pasted_text)
            if _should_auto_attach_clipboard_image_on_paste(pasted_text) and self._try_attach_clipboard_image():
                event.app.invalidate()
            if pasted_text:
@@ -9817,6 +10036,17 @@ class HermesCLI:
                    buf.insert_text(prefix + placeholder)
                else:
                    buf.insert_text(pasted_text)
+            _paste_handler_elapsed_ms = (time.perf_counter() - _paste_handler_start) * 1000.0
+            if _paste_handler_elapsed_ms > 500.0:
+                logger.warning(
+                    "Slow bracketed-paste handler: %.1fms to process %d bytes "
+                    "(%d lines) on %s. If the input becomes unresponsive after "
+                    "this, attach this log line to the bug report.",
+                    _paste_handler_elapsed_ms,
+                    _paste_raw_size,
+                    pasted_text.count('\n') + 1 if pasted_text else 0,
+                    sys.platform,
+                )

        @kb.add('c-v')
        def handle_ctrl_v(event):
@@ -9937,6 +10167,7 @@ class HermesCLI:
               event so it never triggers this.
            """
            text = _strip_leaked_bracketed_paste_wrappers(buf.text)
+            text = _strip_leaked_terminal_responses(text)
            if text != buf.text:
                cursor = min(buf.cursor_position, len(text))
                _paste_just_collapsed[0] = True
@@ -10601,36 +10832,30 @@ class HermesCLI:
        # only cursor_up()s by the stored layout height, missing the extra
        # rows created by reflow — leaving ghost duplicates visible.
        #
-        # Fix: before the standard erase, inflate _cursor_pos.y so the
-        # cursor moves up far enough to cover the reflowed ghost content.
+        # It's not just column-shrink: widening, row-shrinking, and
+        # multiplexer-driven SIGWINCH-less redraws (cmux / tmux tab switch)
+        # all produce the same class of drift, where the renderer's tracked
+        # _cursor_pos.y no longer matches terminal reality. The only reliable
+        # recovery is a full screen-clear (\x1b[2J\x1b[H) before the next
+        # redraw, so we force one on every resize rather than trying to
+        # compute the exact drift.
        _original_on_resize = app._on_resize

        def _resize_clear_ghosts():
-            from prompt_toolkit.data_structures import Point as _Pt
            renderer = app.renderer
            try:
-                old_size = renderer._last_size
-                new_size = renderer.output.get_size()
-                if (
-                    old_size
-                    and new_size.columns < old_size.columns
-                    and new_size.columns > 0
-                ):
-                    reflow_factor = (
-                        (old_size.columns + new_size.columns - 1)
-                        // new_size.columns
-                    )
-                    last_h = (
-                        renderer._last_screen.height
-                        if renderer._last_screen
-                        else 0
-                    )
-                    extra = last_h * (reflow_factor - 1)
-                    if extra > 0:
-                        renderer._cursor_pos = _Pt(
-                            x=renderer._cursor_pos.x,
-                            y=renderer._cursor_pos.y + extra,
-                        )
+                out = renderer.output
+                # Reset attributes, erase the entire screen, and home the
+                # cursor. This overwrites any reflowed status-bar rows or
+                # stale content the terminal kept from the prior layout.
+                out.reset_attributes()
+                out.erase_screen()
+                out.cursor_goto(0, 0)
+                out.flush()
+                # Tell the renderer its tracked position is fresh so its
+                # own erase() inside _on_resize doesn't cursor_up() past
+                # the top of the screen.
+                renderer.reset(leave_alternate_screen=False)
            except Exception:
                pass  # never break resize handling
            _original_on_resize()
@@ -10638,7 +10863,6 @@ class HermesCLI:
        app._on_resize = _resize_clear_ghosts

        def spinner_loop():
-            last_idle_refresh = 0.0
            while not self._should_exit:
                if not self._app:
                    time.sleep(0.1)
@@ -10647,10 +10871,11 @@ class HermesCLI:
                    self._invalidate(min_interval=0.1)
                    time.sleep(0.1)
                else:
-                    now = time.monotonic()
-                    if now - last_idle_refresh >= 1.0:
-                        last_idle_refresh = now
-                        self._invalidate(min_interval=1.0)
+                    # Do not repaint the idle prompt every second. In non-full-screen
+                    # prompt_toolkit mode, background redraws can fight tmux/Ghostty/cmux
+                    # viewport restoration after focus changes and visually move the
+                    # command input area. Keep idle stable; input/agent events still
+                    # invalidate explicitly when the UI actually changes.
                    time.sleep(0.2)

        spinner_thread = threading.Thread(target=spinner_loop, daemon=True)
@@ -10695,6 +10920,7 @@ class HermesCLI:

                    if isinstance(user_input, str):
                        user_input = _strip_leaked_bracketed_paste_wrappers(user_input)
+                        user_input = _strip_leaked_terminal_responses(user_input)
                    
                    # Check for commands — but detect dragged/pasted file paths first.
                    # See _detect_file_drop() for details.
@@ -21,6 +21,7 @@ from typing import Optional, Dict, List, Any, Union
 logger = logging.getLogger(__name__)

 from hermes_time import now as _hermes_now
+from utils import atomic_replace

 try:
    from croniter import croniter
@@ -367,7 +368,7 @@ def save_jobs(jobs: List[Dict[str, Any]]):
            json.dump({"jobs": jobs, "updated_at": _hermes_now().isoformat()}, f, indent=2)
            f.flush()
            os.fsync(f.fileno())
-        os.replace(tmp_path, JOBS_FILE)
+        atomic_replace(tmp_path, JOBS_FILE)
        _secure_file(JOBS_FILE)
    except BaseException:
        try:
@@ -863,7 +864,7 @@ def save_job_output(job_id: str, output: str):
            f.write(output)
            f.flush()
            os.fsync(f.fileno())
-        os.replace(tmp_path, output_file)
+        atomic_replace(tmp_path, output_file)
        _secure_file(output_file)
    except BaseException:
        try:
@@ -198,7 +198,9 @@ def _resolve_single_delivery_target(job: dict, deliver_value: str) -> Optional[d
            if resolved:
                parsed_chat_id, parsed_thread_id, resolved_is_explicit = _parse_target_ref(platform_key, resolved)
                if resolved_is_explicit:
-                    chat_id, thread_id = parsed_chat_id, parsed_thread_id
+                    chat_id = parsed_chat_id
+                    if parsed_thread_id is not None:
+                        thread_id = parsed_thread_id
                else:
                    chat_id = resolved
        except Exception:
@@ -822,6 +824,8 @@ def run_job(job: dict) -> tuple[bool, str, str, Optional[str]]:
    logger.info("Running job '%s' (ID: %s)", job_name, job_id)
    logger.info("Prompt: %s", prompt[:100])

+    agent = None
+
    # Mark this as a cron session so the approval system can apply cron_mode.
    # This env var is process-wide and persists for the lifetime of the
    # scheduler process — every job this process runs is a cron job.
@@ -1170,6 +1174,24 @@ def run_job(job: dict) -> tuple[bool, str, str, Optional[str]]:
                _session_db.close()
            except (Exception, KeyboardInterrupt) as e:
                logger.debug("Job '%s': failed to close SQLite session store: %s", job_id, e)
+        # Release subprocesses, terminal sandboxes, browser daemons, and the
+        # main OpenAI/httpx client held by this ephemeral cron agent. Without
+        # this, a gateway that ticks cron every N minutes leaks fds per job
+        # until it hits EMFILE (#10200 / "too many open files").
+        try:
+            if agent is not None:
+                agent.close()
+        except (Exception, KeyboardInterrupt) as e:
+            logger.debug("Job '%s': failed to close agent resources: %s", job_id, e)
+        # Each cron run spins up a short-lived worker thread whose event loop
+        # dies as soon as the ``ThreadPoolExecutor`` shuts down. Any async
+        # httpx clients cached under that loop are now unusable — reap them
+        # so their transports don't accumulate in the process-global cache.
+        try:
+            from agent.auxiliary_client import cleanup_stale_async_clients
+            cleanup_stale_async_clients()
+        except Exception as e:
+            logger.debug("Job '%s': failed to reap stale auxiliary clients: %s", job_id, e)


 def tick(verbose: bool = True, adapters=None, loop=None) -> int:
@@ -36,6 +36,7 @@

      imports = [
        ./nix/packages.nix
+        ./nix/overlays.nix
        ./nix/nixosModules.nix
        ./nix/checks.nix
        ./nix/devShell.nix
@@ -1,85 +0,0 @@
-"""Built-in boot-md hook — run ~/.hermes/BOOT.md on gateway startup.
-
-This hook is always registered. It silently skips if no BOOT.md exists.
-To activate, create ``~/.hermes/BOOT.md`` with instructions for the
-agent to execute on every gateway restart.
-
-Example BOOT.md::
-
-    # Startup Checklist
-
-    1. Check if any cron jobs failed overnight
-    2. Send a status update to Discord #general
-    3. If there are errors in /opt/app/deploy.log, summarize them
-
-The agent runs in a background thread so it doesn't block gateway
-startup. If nothing needs attention, it replies with [SILENT] to
-suppress delivery.
-"""
-
-import logging
-import threading
-
-logger = logging.getLogger("hooks.boot-md")
-
-from hermes_constants import get_hermes_home
-HERMES_HOME = get_hermes_home()
-BOOT_FILE = HERMES_HOME / "BOOT.md"
-
-
-def _build_boot_prompt(content: str) -> str:
-    """Wrap BOOT.md content in a system-level instruction."""
-    return (
-        "You are running a startup boot checklist. Follow the BOOT.md "
-        "instructions below exactly.\n\n"
-        "---\n"
-        f"{content}\n"
-        "---\n\n"
-        "Execute each instruction. If you need to send a message to a "
-        "platform, use the send_message tool.\n"
-        "If nothing needs attention and there is nothing to report, "
-        "reply with ONLY: [SILENT]"
-    )
-
-
-def _run_boot_agent(content: str) -> None:
-    """Spawn a one-shot agent session to execute the boot instructions."""
-    try:
-        from run_agent import AIAgent
-
-        prompt = _build_boot_prompt(content)
-        agent = AIAgent(
-            quiet_mode=True,
-            skip_context_files=True,
-            skip_memory=True,
-            max_iterations=20,
-        )
-        result = agent.run_conversation(prompt)
-        response = result.get("final_response", "")
-        if response and "[SILENT]" not in response:
-            logger.info("boot-md completed: %s", response[:200])
-        else:
-            logger.info("boot-md completed (nothing to report)")
-    except Exception as e:
-        logger.error("boot-md agent failed: %s", e)
-
-
-async def handle(event_type: str, context: dict) -> None:
-    """Gateway startup handler — run BOOT.md if it exists."""
-    if not BOOT_FILE.exists():
-        return
-
-    content = BOOT_FILE.read_text(encoding="utf-8").strip()
-    if not content:
-        return
-
-    logger.info("Running BOOT.md (%d chars)", len(content))
-
-    # Run in a background thread so we don't block gateway startup.
-    thread = threading.Thread(
-        target=_run_boot_agent,
-        args=(content,),
-        name="boot-md",
-        daemon=True,
-    )
-    thread.start()
@@ -566,6 +566,8 @@ def load_gateway_config() -> GatewayConfig:
                        existing = {}
                    # Deep-merge extra dicts so gateway.json defaults survive
                    merged_extra = {**existing.get("extra", {}), **plat_block.get("extra", {})}
+                    if plat_name == Platform.SLACK.value and "enabled" in plat_block:
+                        merged_extra["_enabled_explicit"] = True
                    merged = {**existing, **plat_block}
                    if merged_extra:
                        merged["extra"] = merged_extra
@@ -610,16 +612,21 @@ def load_gateway_config() -> GatewayConfig:
                        bridged["channel_prompts"] = {str(k): v for k, v in channel_prompts.items()}
                    else:
                        bridged["channel_prompts"] = channel_prompts
-                if not bridged:
+                enabled_was_explicit = "enabled" in platform_cfg
+                if not bridged and not enabled_was_explicit:
                    continue
                plat_data = platforms_data.setdefault(plat.value, {})
                if not isinstance(plat_data, dict):
                    plat_data = {}
                    platforms_data[plat.value] = plat_data
+                if enabled_was_explicit:
+                    plat_data["enabled"] = platform_cfg["enabled"]
                extra = plat_data.setdefault("extra", {})
                if not isinstance(extra, dict):
                    extra = {}
                    plat_data["extra"] = extra
+                if plat == Platform.SLACK and enabled_was_explicit:
+                    extra["_enabled_explicit"] = True
                extra.update(bridged)

            # Slack settings → env vars (env vars take precedence)
@@ -941,6 +948,14 @@ def _apply_env_overrides(config: GatewayConfig) -> None:
            # No yaml config for Slack — env-only setup, enable it
            config.platforms[Platform.SLACK] = PlatformConfig()
            config.platforms[Platform.SLACK].enabled = True
+        else:
+            slack_config = config.platforms[Platform.SLACK]
+            enabled_was_explicit = bool(slack_config.extra.pop("_enabled_explicit", False))
+            if not slack_config.enabled and not enabled_was_explicit:
+                # Top-level Slack settings such as channel prompts should not
+                # turn an env-token setup into a disabled platform. Only an
+                # explicit slack.enabled/platforms.slack.enabled false should.
+                slack_config.enabled = True
        # If yaml config exists, respect its enabled flag (don't override
        # explicit enabled: false). Token is still stored so skills that
        # send Slack messages can use it without activating the gateway adapter.
@@ -52,19 +52,13 @@ class HookRegistry:
        return list(self._loaded_hooks)

    def _register_builtin_hooks(self) -> None:
-        """Register built-in hooks that are always active."""
-        try:
-            from gateway.builtin_hooks.boot_md import handle as boot_md_handle
+        """Register built-in hooks that are always active.

-            self._handlers.setdefault("gateway:startup", []).append(boot_md_handle)
-            self._loaded_hooks.append({
-                "name": "boot-md",
-                "description": "Run ~/.hermes/BOOT.md on gateway startup",
-                "events": ["gateway:startup"],
-                "path": "(builtin)",
-            })
-        except Exception as e:
-            print(f"[hooks] Could not load built-in boot-md hook: {e}", flush=True)
+        Currently empty — no shipped built-in hooks. Kept as the extension
+        point for future always-on gateway hooks so they drop in without
+        re-plumbing discover_and_load().
+        """
+        return

    def discover_and_load(self) -> None:
        """
@@ -28,6 +28,7 @@ from pathlib import Path
 from typing import Optional

 from hermes_constants import get_hermes_dir
+from utils import atomic_replace


 # Unambiguous alphabet -- excludes 0/O, 1/I to prevent confusion
@@ -59,7 +60,7 @@ def _secure_write(path: Path, data: str) -> None:
            f.write(data)
            f.flush()
            os.fsync(f.fileno())
-        os.replace(tmp_path, str(path))
+        atomic_replace(tmp_path, path)
        try:
            os.chmod(path, 0o600)
        except OSError:
@@ -307,9 +307,14 @@ def proxy_kwargs_for_aiohttp(proxy_url: str | None) -> tuple[dict, dict]:
    """Build kwargs for standalone ``aiohttp.ClientSession`` with proxy.

    Returns ``(session_kwargs, request_kwargs)`` where:
-      - SOCKS → ``({"connector": ProxyConnector(...)}, {})``
-      - HTTP  → ``({}, {"proxy": url})``
-      - None  → ``({}, {})``
+      - With aiohttp-socks → ``({"connector": ProxyConnector(...)}, {})``
+        for *all* proxy schemes (SOCKS **and** HTTP/HTTPS).
+      - HTTP without aiohttp-socks → ``({}, {"proxy": url})``.
+      - None → ``({}, {})``.
+
+    Prefer the connector path: it works transparently with libraries
+    (like mautrix) that call ``session.request()`` without forwarding
+    per-request ``proxy=`` kwargs.

    Usage::

@@ -320,20 +325,20 @@ def proxy_kwargs_for_aiohttp(proxy_url: str | None) -> tuple[dict, dict]:
    """
    if not proxy_url:
        return {}, {}
-    if proxy_url.lower().startswith("socks"):
-        try:
-            from aiohttp_socks import ProxyConnector
+    try:
+        from aiohttp_socks import ProxyConnector

-            connector = ProxyConnector.from_url(proxy_url, rdns=True)
-            return {"connector": connector}, {}
-        except ImportError:
+        connector = ProxyConnector.from_url(proxy_url, rdns=True)
+        return {"connector": connector}, {}
+    except ImportError:
+        if proxy_url.lower().startswith("socks"):
            logger.warning(
                "aiohttp_socks not installed — SOCKS proxy %s ignored. "
                "Run: pip install aiohttp-socks",
                proxy_url,
            )
            return {}, {}
-    return {}, {"proxy": proxy_url}
+        return {}, {"proxy": proxy_url}


 def is_host_excluded_by_no_proxy(hostname: str, no_proxy_value: str | None = None) -> bool:
@@ -902,6 +907,41 @@ class MessageEvent:
        return args


+_PLAINTEXT_GATEWAY_RESTART_PATTERNS: tuple[re.Pattern[str], ...] = (
+    re.compile(r"^(?:please\s+)?restart\s+(?:the\s+)?gateway[.!?\s]*$", re.IGNORECASE),
+    re.compile(r"^(?:please\s+)?restart\s+(?:the\s+)?hermes\s+gateway[.!?\s]*$", re.IGNORECASE),
+    re.compile(r"^(?:please\s+)?restart\s+hermes[.!?\s]*$", re.IGNORECASE),
+)
+
+
+def coerce_plaintext_gateway_command(event: "MessageEvent") -> None:
+    """Rewrite a tiny set of DM plaintext admin phrases into slash commands.
+
+    This keeps high-impact operational phrases like ``restart gateway`` out of
+    the LLM/tool path, where they can trigger a self-restart from inside the
+    currently running agent and leave the gateway stuck in ``draining`` while it
+    waits for that same agent to finish.
+
+    Scope is intentionally narrow: DM text messages only, exact restart-style
+    phrases only. Group chats keep natural-language semantics.
+    """
+    try:
+        if event is None or event.message_type != MessageType.TEXT:
+            return
+        text = (event.text or "").strip()
+        if not text or text.startswith("/"):
+            return
+        source = getattr(event, "source", None)
+        if getattr(source, "chat_type", None) != "dm":
+            return
+        for pattern in _PLAINTEXT_GATEWAY_RESTART_PATTERNS:
+            if pattern.match(text):
+                event.text = "/restart"
+                return
+    except Exception:
+        return
+
+
@dataclass 
 class SendResult:
    """Result of sending a message."""
@@ -1702,13 +1742,41 @@ class BasePlatformAdapter(ABC):
        the agent is waiting for dangerous-command approval).  This is critical
        for Slack's Assistant API where ``assistant_threads_setStatus`` disables
        the compose box — pausing lets the user type ``/approve`` or ``/deny``.
+
+        Each ``send_typing`` call is bounded by a ~1.5s timeout so a slow
+        network round-trip can't stall the refresh cadence.  Telegram- and
+        Discord-side typing expire after ~5s; if any individual send_typing
+        takes longer than the refresh interval, the bubble would die and
+        stay dead until that call returns.  Abandoning the slow call lets
+        the next tick fire a fresh send_typing on schedule — as long as
+        one of them succeeds within the 5s platform-side window, the bubble
+        stays visible across provider stalls / upstream API timeouts.
        """
+        # Bound each send_typing round-trip so the refresh cadence isn't
+        # gated on network health.  Must stay below ``interval`` so a slow
+        # call gets abandoned before the next scheduled tick.
+        _send_typing_timeout = max(0.25, min(1.5, interval - 0.25))
        try:
            while True:
                if stop_event is not None and stop_event.is_set():
                    return
                if chat_id not in self._typing_paused:
-                    await self.send_typing(chat_id, metadata=metadata)
+                    try:
+                        await asyncio.wait_for(
+                            self.send_typing(chat_id, metadata=metadata),
+                            timeout=_send_typing_timeout,
+                        )
+                    except asyncio.TimeoutError:
+                        # Slow network — abandon this tick, keep the loop
+                        # on schedule so the next send_typing fires fresh.
+                        pass
+                    except asyncio.CancelledError:
+                        raise
+                    except Exception as typing_err:
+                        logger.debug(
+                            "[%s] send_typing error (non-fatal): %s",
+                            self.name, typing_err,
+                        )
                if stop_event is None:
                    await asyncio.sleep(interval)
                    continue
@@ -2160,6 +2228,8 @@ class BasePlatformAdapter(ABC):
        """
        if not self._message_handler:
            return
+
+        coerce_plaintext_gateway_command(event)
        
        session_key = build_session_key(
            event.source,
@@ -305,7 +305,7 @@ class VoiceReceiver:
        encrypted = bytes(payload_with_nonce[:-4])

        try:
-            import nacl.secret  # noqa: delayed import – only in voice path
+            import nacl.secret  # noqa: E402 — delayed import, only in voice path
            box = nacl.secret.Aead(self._secret_key)
            decrypted = box.decrypt(encrypted, header, bytes(nonce))
        except Exception as e:
@@ -813,7 +813,14 @@ class DiscordAdapter(BasePlatformAdapter):
                logger.info("[%s] Synced %d slash command(s) via bulk tree sync", self.name, len(synced))
                return

-            summary = await asyncio.wait_for(self._safe_sync_slash_commands(), timeout=30)
+            # Discord's per-app command-management bucket is ~5 writes / 20 s,
+            # so a mass-prune-plus-upsert reconcile (e.g. 77 orphans + 30
+            # desired = 107 writes) takes several minutes of forced waits.
+            # A flat 30 s budget blew up reliably under bucket pressure and
+            # left slash commands broken for ~60 min until the bucket fully
+            # recovered. Use a wide ceiling; the cap still guards against a
+            # true hang. (#16713)
+            summary = await asyncio.wait_for(self._safe_sync_slash_commands(), timeout=600)
            logger.info(
                "[%s] Safely reconciled %d slash command(s): unchanged=%d updated=%d recreated=%d created=%d deleted=%d",
                self.name,
@@ -825,7 +832,11 @@ class DiscordAdapter(BasePlatformAdapter):
                summary["deleted"],
            )
        except asyncio.TimeoutError:
-            logger.warning("[%s] Slash command sync timed out after 30s", self.name)
+            logger.warning(
+                "[%s] Slash command sync timed out — Discord rate-limit bucket "
+                "may be saturated; will retry on next reconnect",
+                self.name,
+            )
        except asyncio.CancelledError:
            raise
        except Exception as e:  # pragma: no cover - defensive logging
@@ -3294,6 +3305,7 @@ class DiscordAdapter(BasePlatformAdapter):
        chat_topic = self._get_effective_topic(message.channel, is_thread=is_thread)

        # Build source
+        guild = getattr(message, "guild", None)
        source = self.build_source(
            chat_id=str(effective_channel.id),
            chat_name=chat_name,
@@ -3303,7 +3315,7 @@ class DiscordAdapter(BasePlatformAdapter):
            thread_id=thread_id,
            chat_topic=chat_topic,
            is_bot=getattr(message.author, "bot", False),
-            guild_id=str(message.guild.id) if message.guild else None,
+            guild_id=str(guild.id) if guild else None,
            parent_chat_id=parent_channel_id,
            message_id=str(message.id),
        )
@@ -28,6 +28,7 @@ from email.header import decode_header
 from email.mime.multipart import MIMEMultipart
 from email.mime.text import MIMEText
 from email.mime.base import MIMEBase
+from email.utils import formatdate
 from email import encoders
 from pathlib import Path
 from typing import Any, Dict, List, Optional
@@ -504,6 +505,7 @@ class EmailAdapter(BasePlatformAdapter):
            msg["In-Reply-To"] = original_msg_id
            msg["References"] = original_msg_id

+        msg["Date"] = formatdate(localtime=True)
        msg_id = f"<hermes-{uuid.uuid4().hex[:12]}@{self._address.split('@')[1]}>"
        msg["Message-ID"] = msg_id

@@ -586,6 +588,7 @@ class EmailAdapter(BasePlatformAdapter):
            msg["In-Reply-To"] = original_msg_id
            msg["References"] = original_msg_id

+        msg["Date"] = formatdate(localtime=True)
        msg_id = f"<hermes-{uuid.uuid4().hex[:12]}@{self._address.split('@')[1]}>"
        msg["Message-ID"] = msg_id

@@ -974,7 +974,6 @@ def build_whole_comment_prompt(

 def _resolve_model_and_runtime() -> Tuple[str, dict]:
    """Resolve model and provider credentials, same as gateway message handling."""
-    import os
    from gateway.run import _load_gateway_config, _resolve_gateway_model

    user_config = _load_gateway_config()
@@ -11,10 +11,10 @@ import logging
 import re
 import time
 from pathlib import Path
-from typing import TYPE_CHECKING, Dict, Optional
+from typing import TYPE_CHECKING, Dict

 if TYPE_CHECKING:
-    from gateway.platforms.base import BasePlatformAdapter, MessageEvent
+    from gateway.platforms.base import MessageEvent

 logger = logging.getLogger(__name__)

@@ -11,6 +11,7 @@ Environment variables:
    MATRIX_PASSWORD             Password (alternative to access token)
    MATRIX_ENCRYPTION           Set "true" to enable E2EE
    MATRIX_DEVICE_ID            Stable device ID for E2EE persistence across restarts
+    MATRIX_PROXY                HTTP(S) or SOCKS proxy URL for Matrix traffic
    MATRIX_ALLOWED_USERS    Comma-separated Matrix user IDs (@user:server)
    MATRIX_HOME_ROOM        Room ID for cron/notification delivery
    MATRIX_REACTIONS        Set "false" to disable processing lifecycle reactions
@@ -18,6 +19,7 @@ Environment variables:
    MATRIX_REQUIRE_MENTION      Require @mention in rooms (default: true)
    MATRIX_FREE_RESPONSE_ROOMS  Comma-separated room IDs exempt from mention requirement
    MATRIX_AUTO_THREAD          Auto-create threads for room messages (default: true)
+    MATRIX_DM_AUTO_THREAD       Auto-create threads for DM messages (default: false)
    MATRIX_RECOVERY_KEY         Recovery key for cross-signing verification after device key rotation
    MATRIX_DM_MENTION_THREADS   Create a thread when bot is @mentioned in a DM (default: false)
 """
@@ -30,6 +32,8 @@ import mimetypes
 import os
 import re
 import time
+from dataclasses import dataclass
+
 from html import escape as _html_escape
 from pathlib import Path
 from typing import Any, Dict, Optional, Set
@@ -95,11 +99,25 @@ from gateway.platforms.base import (
    MessageType,
    ProcessingOutcome,
    SendResult,
+    resolve_proxy_url,
+    proxy_kwargs_for_aiohttp,
 )
 from gateway.platforms.helpers import ThreadParticipationTracker

 logger = logging.getLogger(__name__)

+
+@dataclass
+class _MatrixApprovalPrompt:
+    """Tracks a pending Matrix reaction-based exec approval prompt."""
+
+    def __init__(self, session_key: str, chat_id: str, message_id: str, resolved: bool = False):
+        self.session_key = session_key
+        self.chat_id = chat_id
+        self.message_id = message_id
+        self.resolved = resolved
+        self.bot_reaction_events: dict[str, str] = {}  # emoji -> event_id
+
 # Matrix message size limit (4000 chars practical, spec has no hard limit
 # but clients render poorly above this).
 MAX_MESSAGE_LENGTH = 4000
@@ -114,11 +132,85 @@ _CRYPTO_DB_PATH = _STORE_DIR / "crypto.db"
 # Grace period: ignore messages older than this many seconds before startup.
 _STARTUP_GRACE_SECONDS = 5

+_OUTBOUND_MENTION_RE = re.compile(
+    r"(?<![\w/])(@[0-9A-Za-z._=/-]+:[0-9A-Za-z.-]+(?::\d+)?)"
+)

 _E2EE_INSTALL_HINT = (
    "Install with: pip install 'mautrix[encryption]'  (requires libolm C library)"
 )

+_MATRIX_IMAGE_FILENAME_EXTS = frozenset({
+    ".jpg",
+    ".jpeg",
+    ".png",
+    ".gif",
+    ".webp",
+    ".bmp",
+    ".svg",
+    ".heic",
+    ".heif",
+    ".avif",
+})
+
+
+def _looks_like_matrix_image_filename(text: str) -> bool:
+    """Return True when Matrix image body text is probably just a transport filename.
+
+    Matrix ``m.image`` events commonly populate ``content.body`` with the uploaded
+    filename when the user did not add a caption. Treating that raw filename as
+    user-authored text confuses downstream vision enrichment.
+    """
+    candidate = str(text or "").strip()
+    if not candidate or "\n" in candidate or candidate.endswith("/"):
+        return False
+
+    name = Path(candidate).name
+    if not name or name != candidate:
+        return False
+
+    suffix = Path(name).suffix.lower()
+    if not suffix:
+        return False
+
+    guessed_type, _ = mimetypes.guess_type(name)
+    if guessed_type and guessed_type.startswith("image/"):
+        return True
+    return suffix in _MATRIX_IMAGE_FILENAME_EXTS
+
+
+def _create_matrix_session(proxy_url: str | None):
+    """Create an ``aiohttp.ClientSession`` whose proxy applies to *all* requests.
+
+    mautrix's ``HTTPAPI._send()`` calls ``session.request()`` without forwarding
+    per-request ``proxy=`` kwargs.  For HTTP(S) proxies we use aiohttp's native
+    ``proxy=`` session parameter which sets a default for every request.  For SOCKS
+    we use ``aiohttp_socks.ProxyConnector`` (connector-level).
+    When no proxy is configured we enable ``trust_env`` so standard env vars
+    (``HTTP_PROXY`` / ``HTTPS_PROXY``) are honoured automatically.
+    """
+    import aiohttp
+
+    if not proxy_url:
+        return aiohttp.ClientSession(trust_env=True)
+
+    if proxy_url.split("://")[0].lower().startswith("socks"):
+        try:
+            from aiohttp_socks import ProxyConnector
+
+            return aiohttp.ClientSession(
+                connector=ProxyConnector.from_url(proxy_url, rdns=True),
+            )
+        except ImportError:
+            logger.warning(
+                "aiohttp_socks not installed — SOCKS proxy %s ignored. "
+                "Run: pip install aiohttp-socks",
+                proxy_url,
+            )
+            return aiohttp.ClientSession(trust_env=True)
+
+    return aiohttp.ClientSession(proxy=proxy_url)
+

 def _check_e2ee_deps() -> bool:
    """Return True if mautrix E2EE dependencies (python-olm) are available."""
@@ -260,6 +352,9 @@ class MatrixAdapter(BasePlatformAdapter):
            "1",
            "yes",
        )
+        self._dm_auto_thread: bool = os.getenv(
+            "MATRIX_DM_AUTO_THREAD", "false"
+        ).lower() in ("true", "1", "yes")
        self._dm_mention_threads: bool = os.getenv(
            "MATRIX_DM_MENTION_THREADS", "false"
        ).lower() in ("true", "1", "yes")
@@ -270,6 +365,11 @@ class MatrixAdapter(BasePlatformAdapter):
        ).lower() not in ("false", "0", "no")
        self._pending_reactions: dict[tuple[str, str], str] = {}

+        # Proxy support — resolve once at init, reuse for all HTTP traffic.
+        self._proxy_url: str | None = resolve_proxy_url(platform_env_var="MATRIX_PROXY")
+        if self._proxy_url:
+            logger.info("Matrix: proxy configured — %s", self._proxy_url)
+
        # Text batching: merge rapid successive messages (Telegram-style).
        # Matrix clients split long messages around 4000 chars.
        self._text_batch_delay_seconds = float(
@@ -281,6 +381,18 @@ class MatrixAdapter(BasePlatformAdapter):
        self._pending_text_batches: Dict[str, MessageEvent] = {}
        self._pending_text_batch_tasks: Dict[str, asyncio.Task] = {}

+        # Matrix reaction-based dangerous command approvals.
+        self._approval_reaction_map = {
+            "✅": "once",
+            "❎": "deny",
+        }
+        self._approval_prompts_by_event: Dict[str, _MatrixApprovalPrompt] = {}
+        self._approval_prompt_by_session: Dict[str, str] = {}
+        allowed_users_raw = os.getenv("MATRIX_ALLOWED_USERS", "")
+        self._allowed_user_ids: Set[str] = {
+            u.strip() for u in allowed_users_raw.split(",") if u.strip()
+        }
+
    def _is_duplicate_event(self, event_id) -> bool:
        """Return True if this event was already processed. Tracks the ID otherwise."""
        if not event_id:
@@ -326,7 +438,7 @@ class MatrixAdapter(BasePlatformAdapter):
                    )
                    return False
        except Exception as exc:
-            logger.error("Matrix: post-upload key verification failed: %s", exc)
+            logger.error("Matrix: post-upload key verification failed: %s", exc, exc_info=True)
            return False
        return True

@@ -342,6 +454,7 @@ class MatrixAdapter(BasePlatformAdapter):
            logger.error(
                "Matrix: cannot verify device keys on server: %s — refusing E2EE",
                exc,
+                exc_info=True,
            )
            return False

@@ -356,7 +469,7 @@ class MatrixAdapter(BasePlatformAdapter):
            try:
                await olm.share_keys()
            except Exception as exc:
-                logger.error("Matrix: failed to re-upload device keys: %s", exc)
+                logger.error("Matrix: failed to re-upload device keys: %s", exc, exc_info=True)
                return False
            return await self._reverify_keys_after_upload(client, local_ed25519)

@@ -396,6 +509,7 @@ class MatrixAdapter(BasePlatformAdapter):
                    "Try generating a new access token to get a fresh device.",
                    client.device_id,
                    exc,
+                    exc_info=True,
                )
                return False
            return await self._reverify_keys_after_upload(client, local_ed25519)
@@ -420,9 +534,11 @@ class MatrixAdapter(BasePlatformAdapter):
        _STORE_DIR.mkdir(parents=True, exist_ok=True)

        # Create the HTTP API layer.
+        client_session = _create_matrix_session(self._proxy_url)
        api = HTTPAPI(
            base_url=self._homeserver,
            token=self._access_token or "",
+            client_session=client_session,
        )

        # Create the client.
@@ -465,6 +581,7 @@ class MatrixAdapter(BasePlatformAdapter):
                logger.error(
                    "Matrix: whoami failed — check MATRIX_ACCESS_TOKEN and MATRIX_HOMESERVER: %s",
                    exc,
+                    exc_info=True,
                )
                await api.session.close()
                return False
@@ -607,6 +724,44 @@ class MatrixAdapter(BasePlatformAdapter):
                        logger.warning(
                            "Matrix: recovery key verification failed: %s", exc
                        )
+                else:
+                    # No recovery key — bootstrap cross-signing if the bot
+                    # has none yet. Without this, Element shows "Encrypted
+                    # by a device not verified by its owner" on every
+                    # message from this bot, indefinitely. mautrix's
+                    # generate_recovery_key does the full flow: generates
+                    # MSK/SSK/USK, uploads private keys to SSSS, publishes
+                    # public keys to the homeserver, and signs the current
+                    # device with the new SSK. Some homeservers require UIA
+                    # for /keys/device_signing/upload — those will need an
+                    # alternate path; Continuwuity and Synapse-with-shared-
+                    # secret accept the unauthenticated upload.
+                    try:
+                        own_xsign = await olm.get_own_cross_signing_public_keys()
+                    except Exception as exc:
+                        own_xsign = None
+                        logger.warning(
+                            "Matrix: cross-signing key lookup failed: %s", exc
+                        )
+                    if own_xsign is None:
+                        try:
+                            new_recovery_key = await olm.generate_recovery_key()
+                            logger.warning(
+                                "Matrix: bootstrapped cross-signing for %s. "
+                                "SAVE THIS RECOVERY KEY — set "
+                                "MATRIX_RECOVERY_KEY for future restarts so "
+                                "the bot can re-sign its device after key "
+                                "rotation: %s",
+                                client.mxid,
+                                new_recovery_key,
+                            )
+                        except Exception as exc:
+                            logger.warning(
+                                "Matrix: cross-signing bootstrap failed "
+                                "(non-fatal — Element will show 'not "
+                                "verified by its owner'): %s",
+                                exc,
+                            )

                client.crypto = olm
                logger.info(
@@ -664,6 +819,7 @@ class MatrixAdapter(BasePlatformAdapter):
                        await asyncio.gather(*tasks)
                except Exception as exc:
                    logger.warning("Matrix: initial sync event dispatch error: %s", exc)
+                await self._join_pending_invites(sync_data)
            else:
                logger.warning(
                    "Matrix: initial sync returned unexpected type %s",
@@ -727,17 +883,8 @@ class MatrixAdapter(BasePlatformAdapter):
        chunks = self.truncate_message(formatted, MAX_MESSAGE_LENGTH)

        last_event_id = None
-        for chunk in chunks:
-            msg_content: Dict[str, Any] = {
-                "msgtype": "m.text",
-                "body": chunk,
-            }
-
-            # Convert markdown to HTML for rich rendering.
-            html = self._markdown_to_html(chunk)
-            if html and html != chunk:
-                msg_content["format"] = "org.matrix.custom.html"
-                msg_content["formatted_body"] = html
+        for i, chunk in enumerate(chunks):
+            msg_content = self._build_text_message_content(chunk)

            # Reply-to support.
            if reply_to:
@@ -844,25 +991,21 @@ class MatrixAdapter(BasePlatformAdapter):
        """Edit an existing message (via m.replace)."""

        formatted = self.format_message(content)
+        new_content = self._build_text_message_content(formatted)
        msg_content: Dict[str, Any] = {
            "msgtype": "m.text",
            "body": f"* {formatted}",
-            "m.new_content": {
-                "msgtype": "m.text",
-                "body": formatted,
-            },
-            "m.relates_to": {
-                "rel_type": "m.replace",
-                "event_id": message_id,
-            },
+            "m.new_content": new_content,
        }
-
-        html = self._markdown_to_html(formatted)
-        if html and html != formatted:
-            msg_content["m.new_content"]["format"] = "org.matrix.custom.html"
-            msg_content["m.new_content"]["formatted_body"] = html
+        if "m.mentions" in new_content:
+            msg_content["m.mentions"] = new_content["m.mentions"]
+        if "formatted_body" in new_content:
            msg_content["format"] = "org.matrix.custom.html"
-            msg_content["formatted_body"] = f"* {html}"
+            msg_content["formatted_body"] = f'* {new_content["formatted_body"]}'
+        msg_content["m.relates_to"] = {
+            "rel_type": "m.replace",
+            "event_id": message_id,
+        }

        try:
            event_id = await self._client.send_message_event(
@@ -895,10 +1038,12 @@ class MatrixAdapter(BasePlatformAdapter):
            # Try aiohttp first (always available), fall back to httpx
            try:
                import aiohttp as _aiohttp
-
-                async with _aiohttp.ClientSession(trust_env=True) as http:
+                _sess_kw, _req_kw = proxy_kwargs_for_aiohttp(self._proxy_url)
+                async with _aiohttp.ClientSession(**_sess_kw) as http:
                    async with http.get(
-                        image_url, timeout=_aiohttp.ClientTimeout(total=30)
+                        image_url,
+                        timeout=_aiohttp.ClientTimeout(total=30),
+                        **_req_kw,
                    ) as resp:
                        resp.raise_for_status()
                        data = await resp.read()
@@ -908,8 +1053,10 @@ class MatrixAdapter(BasePlatformAdapter):
                        )
            except ImportError:
                import httpx
-
-                async with httpx.AsyncClient() as http:
+                _httpx_kw: dict = {}
+                if self._proxy_url:
+                    _httpx_kw["proxy"] = self._proxy_url
+                async with httpx.AsyncClient(**_httpx_kw) as http:
                    resp = await http.get(image_url, follow_redirects=True, timeout=30)
                    resp.raise_for_status()
                    data = resp.content
@@ -984,6 +1131,56 @@ class MatrixAdapter(BasePlatformAdapter):
            chat_id, video_path, "m.video", caption, reply_to, metadata=metadata
        )

+    async def send_exec_approval(
+        self,
+        chat_id: str,
+        command: str,
+        session_key: str,
+        description: str = "dangerous command",
+        metadata: Optional[dict] = None,
+    ) -> SendResult:
+        """Send a reaction-based exec approval prompt for Matrix."""
+        if not self._client:
+            return SendResult(success=False, error="Not connected")
+
+        cmd_preview = command[:2000] + "..." if len(command) > 2000 else command
+        text = (
+            "⚠️ **Dangerous command requires approval**\n"
+            f"```\n{cmd_preview}\n```\n"
+            f"Reason: {description}\n\n"
+            "Reply `/approve` to execute, `/approve session` to approve this pattern for the session, "
+            "`/approve always` to approve permanently, or `/deny` to cancel.\n\n"
+            "You can also click the reaction to approve:\n"
+            "✅ = /approve\n"
+            "❎ = /deny"
+        )
+
+        result = await self.send(chat_id, text, metadata=metadata)
+        if not result.success or not result.message_id:
+            return result
+
+        prompt = _MatrixApprovalPrompt(
+            session_key=session_key,
+            chat_id=chat_id,
+            message_id=result.message_id,
+        )
+        old_event = self._approval_prompt_by_session.get(session_key)
+        if old_event:
+            self._approval_prompts_by_event.pop(old_event, None)
+        self._approval_prompts_by_event[result.message_id] = prompt
+        self._approval_prompt_by_session[session_key] = result.message_id
+
+        for emoji in ("✅", "❎"):
+            try:
+                reaction_result = await self._send_reaction(chat_id, result.message_id, emoji)
+                # Save the bot's reaction event_id for later cleanup
+                if reaction_result:
+                    prompt.bot_reaction_events[emoji] = str(reaction_result)
+            except Exception as exc:
+                logger.debug("Matrix: failed to add approval reaction %s: %s", emoji, exc)
+
+        return result
+
    def format_message(self, content: str) -> str:
        """Pass-through — Matrix supports standard Markdown natively."""
        # Strip image markdown; media is uploaded separately.
@@ -1115,9 +1312,15 @@ class MatrixAdapter(BasePlatformAdapter):
        next_batch = await client.sync_store.get_next_batch()
        while not self._closing:
            try:
-                sync_data = await client.sync(
-                    since=next_batch,
-                    timeout=30000,
+                # Wrap in asyncio.wait_for to guard against TCP-level hangs
+                # that the Matrix long-poll timeout cannot catch. Long-poll
+                # is 30s, so 45s gives 15s slack for network drain.
+                sync_data = await asyncio.wait_for(
+                    client.sync(
+                        since=next_batch,
+                        timeout=30000,
+                    ),
+                    timeout=45.0,
                )

                # nio returns SyncError objects (not exceptions) for auth
@@ -1153,6 +1356,7 @@ class MatrixAdapter(BasePlatformAdapter):
                            await asyncio.gather(*tasks)
                    except Exception as exc:
                        logger.warning("Matrix: sync event dispatch error: %s", exc)
+                    await self._join_pending_invites(sync_data)

            except asyncio.CancelledError:
                return
@@ -1239,6 +1443,15 @@ class MatrixAdapter(BasePlatformAdapter):
        room_id = str(getattr(event, "room_id", ""))
        sender = str(getattr(event, "sender", ""))

+        # Diagnostic: confirm the callback is firing at all when DEBUG is on.
+        # Helps users troubleshoot silent inbound issues like #5819, #7914, #12614.
+        logger.debug(
+            "Matrix: callback fired — event %s from %s in %s",
+            getattr(event, "event_id", "?"),
+            sender,
+            room_id,
+        )
+
        # Ignore own messages (case-insensitive; also drops when our own
        # user_id hasn't been resolved yet — see _is_self_sender docstring
        # and issue #15763).
@@ -1350,6 +1563,12 @@ class MatrixAdapter(BasePlatformAdapter):
            in_bot_thread = bool(thread_id and thread_id in self._threads)
            if self._require_mention and not is_free_room and not in_bot_thread:
                if not is_mentioned:
+                    logger.debug(
+                        "Matrix: ignoring message %s in %s — no @mention "
+                        "(set MATRIX_REQUIRE_MENTION=false to disable)",
+                        event_id,
+                        room_id,
+                    )
                    return None

        # DM mention-thread.
@@ -1362,7 +1581,7 @@ class MatrixAdapter(BasePlatformAdapter):
            body = self._strip_mention(body)

        # Auto-thread.
-        if not is_dm and not thread_id and self._auto_thread:
+        if not thread_id and ((not is_dm and self._auto_thread) or (is_dm and self._dm_auto_thread)):
            thread_id = event_id
            self._threads.mark(thread_id)

@@ -1604,6 +1823,9 @@ class MatrixAdapter(BasePlatformAdapter):
            return
        body, is_dm, chat_type, thread_id, display_name, source = ctx

+        if msgtype == "m.image" and _looks_like_matrix_image_filename(body):
+            body = ""
+
        allow_http_fallback = bool(http_url) and not is_encrypted_media
        media_urls = (
            [cached_path]
@@ -1633,13 +1855,35 @@ class MatrixAdapter(BasePlatformAdapter):
            "Matrix: invited to %s — joining",
            room_id,
        )
+        await self._join_room_by_id(room_id)
+
+    async def _join_room_by_id(self, room_id: str) -> bool:
+        """Join a room by ID and refresh local caches on success."""
+        if not room_id:
+            return False
+        if room_id in self._joined_rooms:
+            return True
        try:
            await self._client.join_room(RoomID(room_id))
            self._joined_rooms.add(room_id)
            logger.info("Matrix: joined %s", room_id)
            await self._refresh_dm_cache()
+            return True
        except Exception as exc:
            logger.warning("Matrix: error joining %s: %s", room_id, exc)
+            return False
+
+    async def _join_pending_invites(self, sync_data: Dict[str, Any]) -> None:
+        """Join rooms still present in rooms.invite after sync processing."""
+        rooms = sync_data.get("rooms", {}) if isinstance(sync_data, dict) else {}
+        invites = rooms.get("invite", {})
+        if not isinstance(invites, dict):
+            return
+        for room_id in invites:
+            if room_id in self._joined_rooms:
+                continue
+            logger.info("Matrix: reconciling pending invite for %s", room_id)
+            await self._join_room_by_id(str(room_id))

    # ------------------------------------------------------------------
    # Reactions (send, receive, processing lifecycle)
@@ -1754,6 +1998,51 @@ class MatrixAdapter(BasePlatformAdapter):
                room_id,
            )

+            # Check if this reaction resolves a pending approval prompt.
+            prompt = self._approval_prompts_by_event.get(reacts_to)
+            if prompt and not prompt.resolved:
+                if room_id != prompt.chat_id:
+                    return
+                if self._allowed_user_ids and sender not in self._allowed_user_ids:
+                    logger.info(
+                        "Matrix: ignoring approval reaction from unauthorized user %s on %s",
+                        sender, reacts_to,
+                    )
+                    return
+                choice = self._approval_reaction_map.get(key)
+                if not choice:
+                    return
+                try:
+                    from tools.approval import resolve_gateway_approval
+
+                    count = resolve_gateway_approval(prompt.session_key, choice)
+                    if count:
+                        prompt.resolved = True
+                        self._approval_prompts_by_event.pop(reacts_to, None)
+                        self._approval_prompt_by_session.pop(prompt.session_key, None)
+                        logger.info(
+                            "Matrix reaction resolved %d approval(s) for session %s "
+                            "(choice=%s, user=%s)",
+                            count, prompt.session_key, choice, sender,
+                        )
+                        # Redact bot's seed reactions, leaving only the user's
+                        await self._redact_bot_approval_reactions(room_id, prompt)
+                except Exception as exc:
+                    logger.error("Failed to resolve gateway approval from Matrix reaction: %s", exc)
+
+    async def _redact_bot_approval_reactions(
+        self,
+        room_id: str,
+        prompt: "_MatrixApprovalPrompt",
+    ) -> None:
+        """Redact the bot's seed ✅/❎ reactions, leaving only the user's reaction."""
+        for emoji, evt_id in prompt.bot_reaction_events.items():
+            try:
+                await self.redact_message(room_id, evt_id, "approval resolved")
+                logger.debug("Matrix: redacted bot reaction %s (%s)", emoji, evt_id)
+            except Exception as exc:
+                logger.debug("Matrix: failed to redact bot reaction %s: %s", emoji, exc)
+
    # ------------------------------------------------------------------
    # Text message aggregation (handles Matrix client-side splits)
    # ------------------------------------------------------------------
@@ -1979,11 +2268,7 @@ class MatrixAdapter(BasePlatformAdapter):
        if not self._client or not text:
            return SendResult(success=False, error="No client or empty text")

-        msg_content: Dict[str, Any] = {"msgtype": msgtype, "body": text}
-        html = self._markdown_to_html(text)
-        if html and html != text:
-            msg_content["format"] = "org.matrix.custom.html"
-            msg_content["formatted_body"] = html
+        msg_content = self._build_text_message_content(text, msgtype=msgtype)

        try:
            event_id = await self._client.send_message_event(
@@ -2046,6 +2331,77 @@ class MatrixAdapter(BasePlatformAdapter):
    # Mention detection helpers
    # ------------------------------------------------------------------

+    def _build_text_message_content(self, text: str, msgtype: str = "m.text") -> Dict[str, Any]:
+        """Build Matrix text content with HTML and outbound mention metadata."""
+        msg_content: Dict[str, Any] = {"msgtype": msgtype, "body": text}
+        mention_user_ids = self._extract_outbound_mentions(text)
+        if mention_user_ids:
+            msg_content["m.mentions"] = {"user_ids": mention_user_ids}
+
+        html_source = self._inject_outbound_mention_links(text)
+        html = self._markdown_to_html(html_source)
+        if html and html != text:
+            msg_content["format"] = "org.matrix.custom.html"
+            msg_content["formatted_body"] = html
+
+        return msg_content
+
+    def _extract_outbound_mentions(self, text: str) -> list[str]:
+        """Return unique Matrix user IDs mentioned in outbound text."""
+        protected, _ = self._protect_outbound_mention_regions(text)
+        seen: Set[str] = set()
+        mentions: list[str] = []
+        for match in _OUTBOUND_MENTION_RE.finditer(protected):
+            user_id = match.group(1)
+            if user_id not in seen:
+                seen.add(user_id)
+                mentions.append(user_id)
+        return mentions
+
+    def _inject_outbound_mention_links(self, text: str) -> str:
+        """Wrap outbound Matrix mentions in markdown links outside code spans."""
+        if not text:
+            return text
+
+        protected, placeholders = self._protect_outbound_mention_regions(text)
+
+        linked = _OUTBOUND_MENTION_RE.sub(
+            lambda match: f"[{match.group(1)}](https://matrix.to/#/{match.group(1)})",
+            protected,
+        )
+
+        for idx, original in enumerate(placeholders):
+            linked = linked.replace(f"\x00MENTION_PROTECTED{idx}\x00", original)
+
+        return linked
+
+    def _protect_outbound_mention_regions(self, text: str) -> tuple[str, list[str]]:
+        """Protect markdown regions where outbound mentions should stay literal."""
+        placeholders: list[str] = []
+
+        def _protect(fragment: str) -> str:
+            idx = len(placeholders)
+            placeholders.append(fragment)
+            return f"\x00MENTION_PROTECTED{idx}\x00"
+
+        protected = re.sub(
+            r"```[\s\S]*?```",
+            lambda match: _protect(match.group(0)),
+            text or "",
+        )
+        protected = re.sub(
+            r"`[^`\n]+`",
+            lambda match: _protect(match.group(0)),
+            protected,
+        )
+        protected = re.sub(
+            r"\[[^\]]+\]\([^)]+\)",
+            lambda match: _protect(match.group(0)),
+            protected,
+        )
+
+        return protected, placeholders
+
    def _is_bot_mentioned(
        self,
        body: str,
@@ -2080,13 +2436,33 @@ class MatrixAdapter(BasePlatformAdapter):
        return False

    def _strip_mention(self, body: str) -> str:
-        """Strip the bot's full MXID (``@user:server``) from *body*.
+        """Remove explicit bot mentions from message body.

-        The bare localpart is intentionally *not* stripped — it would
-        mangle file paths like ``/home/hermes/media/file.png``.
+        Important: only strip explicit mention tokens (``@user:server`` or
+        ``@localpart``). Do NOT strip bare words matching the bot localpart,
+        otherwise normal phrases like "Hermes Agent" become "Agent".
        """
+        if not body:
+            return ""
+
+        # Strip explicit full MXID mentions.
        if self._user_id:
            body = body.replace(self._user_id, "")
+
+        # Strip explicit @localpart mentions only (not bare localpart words).
+        if self._user_id and ":" in self._user_id:
+            localpart = self._user_id.split(":")[0].lstrip("@")
+            if localpart:
+                body = re.sub(
+                    r'(?<![\w])@' + re.escape(localpart) + r'\b',
+                    '',
+                    body,
+                    flags=re.IGNORECASE,
+                )
+
+        # Normalize spacing after mention removal.
+        body = re.sub(r'[ \t]{2,}', ' ', body)
+        body = re.sub(r'\s+([,.;:!?])', r'\1', body)
        return body.strip()

    async def _get_display_name(self, room_id: str, user_id: str) -> str:
@@ -412,7 +412,6 @@ class MattermostAdapter(BasePlatformAdapter):

        import aiohttp

-        last_exc = None
        file_data = None
        ct = "application/octet-stream"
        fname = url.rsplit("/", 1)[-1].split("?")[0] or f"{kind}.png"
@@ -1957,7 +1957,7 @@ class QQAdapter(BasePlatformAdapter):
            self, openid: str, content: str, reply_to: Optional[str] = None
    ) -> SendResult:
        """Send text to a C2C user via REST API."""
-        msg_seq = self._next_msg_seq(reply_to or openid)
+        self._next_msg_seq(reply_to or openid)
        body = self._build_text_body(content, reply_to)
        if reply_to:
            body["msg_id"] = reply_to
@@ -1970,7 +1970,7 @@ class QQAdapter(BasePlatformAdapter):
            self, group_openid: str, content: str, reply_to: Optional[str] = None
    ) -> SendResult:
        """Send text to a group via REST API."""
-        msg_seq = self._next_msg_seq(reply_to or group_openid)
+        self._next_msg_seq(reply_to or group_openid)
        body = self._build_text_body(content, reply_to)
        if reply_to:
            body["msg_id"] = reply_to
@@ -2135,11 +2135,6 @@ class QQAdapter(BasePlatformAdapter):

            # Route
            chat_type = self._guess_chat_type(chat_id)
-            target_path = (
-                f"/v2/users/{chat_id}/files"
-                if chat_type == "c2c"
-                else f"/v2/groups/{chat_id}/files"
-            )

            if chat_type == "guild":
                # Guild channels don't support native media upload in the same way
@@ -84,6 +84,7 @@ from gateway.platforms.telegram_network import (
    discover_fallback_ips,
    parse_fallback_ip_env,
 )
+from utils import atomic_replace


 def check_telegram_requirements() -> bool:
@@ -122,12 +123,12 @@ def _strip_mdv2(text: str) -> str:


 # ---------------------------------------------------------------------------
-# Markdown table → code block conversion
+# Markdown table → Telegram-friendly row groups
 # ---------------------------------------------------------------------------
 # Telegram's MarkdownV2 has no table syntax — '|' is just an escaped literal,
 # so pipe tables render as noisy backslash-pipe text with no alignment.
-# Wrapping the table in a fenced code block makes Telegram render it as
-# monospace preformatted text with columns intact.
+# Reformating each row into a bold heading plus bullet list keeps the content
+# readable on mobile clients while preserving the source data.

 # Matches a GFM table delimiter row: optional outer pipes, cells containing
 # only dashes (with optional leading/trailing colons for alignment) separated
@@ -144,13 +145,49 @@ def _is_table_row(line: str) -> bool:
    return bool(stripped) and '|' in stripped


+def _split_markdown_table_row(line: str) -> list[str]:
+    """Split a simple GFM table row into stripped cell values."""
+    stripped = line.strip()
+    if stripped.startswith("|"):
+        stripped = stripped[1:]
+    if stripped.endswith("|"):
+        stripped = stripped[:-1]
+    return [cell.strip() for cell in stripped.split("|")]
+
+
+def _render_table_block_for_telegram(table_block: list[str]) -> str:
+    """Render a detected GFM table as Telegram-friendly row groups."""
+    if len(table_block) < 3:
+        return "\n".join(table_block)
+
+    headers = _split_markdown_table_row(table_block[0])
+    if len(headers) < 2:
+        return "\n".join(table_block)
+
+    rendered_rows: list[str] = []
+    for index, row in enumerate(table_block[2:], start=1):
+        cells = _split_markdown_table_row(row)
+        if len(cells) < len(headers):
+            cells.extend([""] * (len(headers) - len(cells)))
+        elif len(cells) > len(headers):
+            cells = cells[: len(headers)]
+
+        heading = next((cell for cell in cells if cell), f"Row {index}")
+        rendered_rows.append(f"**{heading}**")
+        rendered_rows.extend(
+            f"• {header}: {value}" for header, value in zip(headers, cells)
+        )
+
+    return "\n\n".join(rendered_rows)
+
+
 def _wrap_markdown_tables(text: str) -> str:
-    """Wrap GFM-style pipe tables in ``` fences so Telegram renders them.
+    """Rewrite GFM-style pipe tables into Telegram-friendly bullet groups.

    Detected by a row containing '|' immediately followed by a delimiter
    row matching :data:`_TABLE_SEPARATOR_RE`.  Subsequent pipe-containing
-    non-blank lines are consumed as the table body and included in the
-    wrapped block.  Tables inside existing fenced code blocks are left
+    non-blank lines are consumed as the table body and rewritten as
+    per-row bullet groups. Tables inside existing fenced code blocks are left
    alone.
    """
    if '|' not in text or '-' not in text:
@@ -187,9 +224,7 @@ def _wrap_markdown_tables(text: str) -> str:
            while j < len(lines) and _is_table_row(lines[j]):
                table_block.append(lines[j])
                j += 1
-            out.append('```')
-            out.extend(table_block)
-            out.append('```')
+            out.append(_render_table_block_for_telegram(table_block))
            i = j
            continue

@@ -334,6 +369,49 @@ class TelegramAdapter(BasePlatformAdapter):
            return {"link_preview_options": LinkPreviewOptions(is_disabled=True)}
        return {"disable_web_page_preview": True}

+    async def _drain_polling_connections(self) -> None:
+        """Reset the httpx connection pool used for getUpdates polling.
+
+        Network errors (especially through proxies like sing-box) can leave
+        httpx connections in a half-closed state that still occupy pool slots.
+        After enough reconnect cycles the pool fills up entirely, causing
+        ``Pool timeout: All connections in the connection pool are occupied.``
+
+        We reset ONLY ``_request[0]`` (the getUpdates request) — the general
+        request (``_request[1]``) is left untouched so concurrent
+        ``send_message`` / ``edit_message`` calls are never interrupted.
+
+        Implementation note: accesses ``Bot._request[0]`` which is the
+        get-updates ``BaseRequest`` in the PTB 22.x internal tuple
+        ``(get_updates_request, general_request)``.  There is no public
+        accessor for the polling request; review if upgrading to PTB 23+.
+        """
+        if not (self._app and self._app.bot):
+            return
+        try:
+            # PTB 22.x: _request is a (get_updates, general) tuple;
+            # no public accessor exists for the polling request.
+            polling_req = self._app.bot._request[0]  # noqa: SLF001
+        except Exception:
+            return
+        try:
+            await polling_req.shutdown()
+        except Exception:
+            logger.debug(
+                "[%s] Polling request shutdown failed (non-fatal)",
+                self.name, exc_info=True,
+            )
+        try:
+            await polling_req.initialize()
+            logger.debug(
+                "[%s] Polling request pool drained before reconnect", self.name
+            )
+        except Exception:
+            logger.debug(
+                "[%s] Polling request re-initialize failed (non-fatal)",
+                self.name, exc_info=True,
+            )
+
    async def _handle_polling_network_error(self, error: Exception) -> None:
        """Reconnect polling after a transient network interruption.

@@ -379,6 +457,8 @@ class TelegramAdapter(BasePlatformAdapter):
        except Exception:
            pass

+        await self._drain_polling_connections()
+
        try:
            await self._app.updater.start_polling(
                allowed_updates=Update.ALL_TYPES,
@@ -426,6 +506,7 @@ class TelegramAdapter(BasePlatformAdapter):
            except Exception:
                pass
            await asyncio.sleep(RETRY_DELAY)
+            await self._drain_polling_connections()
            try:
                await self._app.updater.start_polling(
                    allowed_updates=Update.ALL_TYPES,
@@ -554,7 +635,7 @@ class TelegramAdapter(BasePlatformAdapter):
                        _yaml.dump(config, f, default_flow_style=False, sort_keys=False)
                        f.flush()
                        os.fsync(f.fileno())
-                    os.replace(tmp_path, config_path)
+                    atomic_replace(tmp_path, config_path)
                except BaseException:
                    try:
                        os.unlink(tmp_path)
@@ -2080,10 +2161,8 @@ class TelegramAdapter(BasePlatformAdapter):

        text = content

-        # 0) Pre-wrap GFM-style pipe tables in ``` fences.  Telegram can't
-        #    render tables natively, but fenced code blocks render as
-        #    monospace preformatted text with columns intact.  The wrapped
-        #    tables then flow through step (1) below as protected regions.
+        # 0) Rewrite GFM-style pipe tables into Telegram-friendly row groups
+        #    before the normal MarkdownV2 conversions run.
        text = _wrap_markdown_tables(text)

        # 1) Protect fenced code blocks (``` ... ```)
@@ -2353,6 +2432,26 @@ class TelegramAdapter(BasePlatformAdapter):
                    user = getattr(entity, "user", None)
                    if user and getattr(user, "id", None) == bot_id:
                        return True
+                elif entity_type == "bot_command" and expected:
+                    # Telegram's official group-disambiguation form for slash
+                    # commands (``/cmd@botname``) is emitted as a single
+                    # ``bot_command`` entity covering the whole span — there
+                    # is no accompanying ``mention`` entity. Treat it as a
+                    # direct address to this bot when the ``@botname`` suffix
+                    # matches. This is the form Telegram's own command menu
+                    # autocomplete produces in groups, so dropping it at the
+                    # mention gate would break /new, /reset, /help, ... for
+                    # every group that has ``require_mention`` enabled (#15415).
+                    offset = int(getattr(entity, "offset", -1))
+                    length = int(getattr(entity, "length", 0))
+                    if offset < 0 or length <= 0:
+                        continue
+                    command_text = source_text[offset:offset + length]
+                    at_index = command_text.find("@")
+                    if at_index < 0:
+                        continue
+                    if command_text[at_index:].strip().lower() == expected:
+                        return True
        return False

    def _message_matches_mention_patterns(self, message: Message) -> bool:
@@ -89,6 +89,7 @@ MAX_CONSECUTIVE_FAILURES = 3
 RETRY_DELAY_SECONDS = 2
 BACKOFF_DELAY_SECONDS = 30
 SESSION_EXPIRED_ERRCODE = -14
+RATE_LIMIT_ERRCODE = -2  # iLink frequency limit — backoff and retry
 MESSAGE_DEDUP_TTL_SECONDS = 300

 MEDIA_IMAGE = 1
@@ -1113,7 +1114,7 @@ async def qr_login(
 class WeixinAdapter(BasePlatformAdapter):
    """Native Hermes adapter for Weixin personal accounts."""

-    MAX_MESSAGE_LENGTH = 4000
+    MAX_MESSAGE_LENGTH = 2000

    # WeChat does not support editing sent messages — streaming must use the
    # fallback "send-final-only" path so the cursor (▉) is never left visible.
@@ -1138,10 +1139,10 @@ class WeixinAdapter(BasePlatformAdapter):
            extra.get("cdn_base_url") or os.getenv("WEIXIN_CDN_BASE_URL", WEIXIN_CDN_BASE_URL)
        ).strip().rstrip("/")
        self._send_chunk_delay_seconds = float(
-            extra.get("send_chunk_delay_seconds") or os.getenv("WEIXIN_SEND_CHUNK_DELAY_SECONDS", "0.35")
+            extra.get("send_chunk_delay_seconds") or os.getenv("WEIXIN_SEND_CHUNK_DELAY_SECONDS", "1.5")
        )
        self._send_chunk_retries = int(
-            extra.get("send_chunk_retries") or os.getenv("WEIXIN_SEND_CHUNK_RETRIES", "2")
+            extra.get("send_chunk_retries") or os.getenv("WEIXIN_SEND_CHUNK_RETRIES", "4")
        )
        self._send_chunk_retry_delay_seconds = float(
            extra.get("send_chunk_retry_delay_seconds")
@@ -1531,6 +1532,28 @@ class WeixinAdapter(BasePlatformAdapter):
                                self.name, _safe_id(chat_id),
                            )
                            continue
+                        # Rate limit (-2) — backoff and retry
+                        is_rate_limited = (
+                            ret == RATE_LIMIT_ERRCODE
+                            or errcode == RATE_LIMIT_ERRCODE
+                        )
+                        if is_rate_limited:
+                            errmsg = resp.get("errmsg") or resp.get("msg") or "rate limited"
+                            # Record the error so we raise a descriptive
+                            # RuntimeError (instead of AssertionError) if the
+                            # loop exhausts with the server still rate-limiting.
+                            last_error = RuntimeError(
+                                f"iLink sendmessage rate limited: ret={ret} errcode={errcode} errmsg={errmsg}"
+                            )
+                            if attempt >= self._send_chunk_retries:
+                                break
+                            wait = self._send_chunk_retry_delay_seconds * 3  # 3x backoff for rate limit
+                            logger.warning(
+                                "[%s] rate limited for %s; backing off %.1fs before retry",
+                                self.name, _safe_id(chat_id), wait,
+                            )
+                            await asyncio.sleep(wait)
+                            continue
                        errmsg = resp.get("errmsg") or resp.get("msg") or "unknown error"
                        raise RuntimeError(
                            f"iLink sendmessage error: ret={ret} errcode={errcode} errmsg={errmsg}"
@@ -90,7 +90,7 @@ from gateway.platforms.yuanbao_proto import (
    encode_get_group_member_list,
    next_seq_no,
 )
-from gateway.session import SessionSource, build_session_key
+from gateway.session import build_session_key

 logger = logging.getLogger(__name__)

@@ -1897,7 +1897,7 @@ class OwnerCommandMiddleware(InboundMiddleware):
            return None, None, False

        # Sender identity check: bot owner <-> push.from_account == push.bot_owner_id
-        owner_id = (push or {}).get("bot_owner_id") or ""
+        # owner_id = (push or {}).get("bot_owner_id") or ""
        # is_owner = bool(owner_id) and owner_id == from_account
        is_owner = True
        return cmd, cmd_line, is_owner
@@ -21,12 +21,10 @@ import hashlib
 import hmac
 import logging
 import os
-import re
 import secrets
 import struct
 import time
 import urllib.parse
-from datetime import datetime, timezone, timedelta
 from typing import Optional, Any

 import httpx
@@ -19,9 +19,8 @@ yuanbao_proto.py - Yuanbao WebSocket 协议编解码（纯 Python 实现）
 from __future__ import annotations

 import logging
-import struct
 import threading
-from typing import Optional, Union
+from typing import Optional

 logger = logging.getLogger(__name__)

@@ -0,0 +1,150 @@
+"""Gateway runtime-metadata footer.
+
+Renders a compact footer showing runtime state (model, context %, cwd) and
+appends it to the FINAL message of an agent turn when enabled.  Off by default
+to keep replies minimal.
+
+Config (``~/.hermes/config.yaml``)::
+
+    display:
+      runtime_footer:
+        enabled: true                       # off by default
+        fields: [model, context_pct, cwd]   # order shown; drop any to hide
+
+Per-platform overrides live under ``display.platforms.<platform>.runtime_footer``.
+Users can toggle the global setting with ``/footer on|off`` from both the CLI
+and any gateway platform.
+
+The footer is appended to the final response text in ``gateway/run.py`` right
+before returning the response to the adapter send path — so it only lands on
+the final message a user sees, not on tool-progress updates or streaming
+partials.  When streaming is on and the final text has already been delivered
+piecemeal, the footer is sent as a separate trailing message via
+``send_trailing_footer()``.
+"""
+
+from __future__ import annotations
+
+import os
+from pathlib import Path
+from typing import Any, Iterable, Optional
+
+_DEFAULT_FIELDS: tuple[str, ...] = ("model", "context_pct", "cwd")
+_SEP = " · "
+
+
+def _home_relative_cwd(cwd: str) -> str:
+    """Return *cwd* with ``$HOME`` collapsed to ``~``.  Empty string if unset."""
+    if not cwd:
+        return ""
+    try:
+        home = os.path.expanduser("~")
+        p = os.path.abspath(cwd)
+        if home and (p == home or p.startswith(home + os.sep)):
+            return "~" + p[len(home):]
+        return p
+    except Exception:
+        return cwd
+
+
+def _model_short(model: Optional[str]) -> str:
+    """Drop ``vendor/`` prefix for readability (``openai/gpt-5.4`` → ``gpt-5.4``)."""
+    if not model:
+        return ""
+    return model.rsplit("/", 1)[-1]
+
+
+def resolve_footer_config(
+    user_config: dict[str, Any] | None,
+    platform_key: str | None = None,
+) -> dict[str, Any]:
+    """Resolve effective runtime-footer config for *platform_key*.
+
+    Merge order (later wins):
+        1. Built-in defaults (enabled=False)
+        2. ``display.runtime_footer``
+        3. ``display.platforms.<platform_key>.runtime_footer``
+    """
+    resolved = {"enabled": False, "fields": list(_DEFAULT_FIELDS)}
+    cfg = (user_config or {}).get("display") or {}
+
+    global_cfg = cfg.get("runtime_footer")
+    if isinstance(global_cfg, dict):
+        if "enabled" in global_cfg:
+            resolved["enabled"] = bool(global_cfg.get("enabled"))
+        if isinstance(global_cfg.get("fields"), list) and global_cfg["fields"]:
+            resolved["fields"] = [str(f) for f in global_cfg["fields"]]
+
+    if platform_key:
+        platforms = cfg.get("platforms") or {}
+        plat_cfg = platforms.get(platform_key)
+        if isinstance(plat_cfg, dict):
+            plat_footer = plat_cfg.get("runtime_footer")
+            if isinstance(plat_footer, dict):
+                if "enabled" in plat_footer:
+                    resolved["enabled"] = bool(plat_footer.get("enabled"))
+                if isinstance(plat_footer.get("fields"), list) and plat_footer["fields"]:
+                    resolved["fields"] = [str(f) for f in plat_footer["fields"]]
+
+    return resolved
+
+
+def format_runtime_footer(
+    *,
+    model: Optional[str],
+    context_tokens: int,
+    context_length: Optional[int],
+    cwd: Optional[str] = None,
+    fields: Iterable[str] = _DEFAULT_FIELDS,
+) -> str:
+    """Render the footer line, or return "" if no fields have data.
+
+    Fields are skipped silently when their underlying data is missing — a
+    partially-populated footer is better than a line with ``?%`` or empty slots.
+    """
+    parts: list[str] = []
+    for field in fields:
+        if field == "model":
+            m = _model_short(model)
+            if m:
+                parts.append(m)
+        elif field == "context_pct":
+            if context_length and context_length > 0 and context_tokens >= 0:
+                pct = max(0, min(100, round((context_tokens / context_length) * 100)))
+                parts.append(f"{pct}%")
+        elif field == "cwd":
+            rel = _home_relative_cwd(cwd or os.environ.get("TERMINAL_CWD", ""))
+            if rel:
+                parts.append(rel)
+        # Unknown field names are silently ignored.
+
+    if not parts:
+        return ""
+    return _SEP.join(parts)
+
+
+def build_footer_line(
+    *,
+    user_config: dict[str, Any] | None,
+    platform_key: str | None,
+    model: Optional[str],
+    context_tokens: int,
+    context_length: Optional[int],
+    cwd: Optional[str] = None,
+) -> str:
+    """Top-level entry point used by gateway/run.py.
+
+    Returns the footer text (empty string when disabled or no data).  Callers
+    append this to the final response themselves, preserving a single blank
+    line of separation.
+    """
+    cfg = resolve_footer_config(user_config, platform_key)
+    if not cfg.get("enabled"):
+        return ""
+    return format_runtime_footer(
+        model=model,
+        context_tokens=context_tokens,
+        context_length=context_length,
+        cwd=cwd,
+        fields=cfg.get("fields") or _DEFAULT_FIELDS,
+    )
@@ -62,8 +62,8 @@ from .config import (
 )
 from .whatsapp_identity import (
    canonical_whatsapp_identifier,
-    normalize_whatsapp_identifier,
 )
+from utils import atomic_replace


@dataclass
@@ -705,7 +705,7 @@ class SessionStore:
                json.dump(data, f, indent=2)
                f.flush()
                os.fsync(f.fileno())
-            os.replace(tmp_path, sessions_file)
+            atomic_replace(tmp_path, sessions_file)
        except BaseException:
            try:
                os.unlink(tmp_path)
@@ -1257,25 +1257,11 @@ class SessionStore:
        Used by /retry, /undo, and /compress to persist modified conversation history.
        Rewrites both SQLite and legacy JSONL storage.
        """
-        # SQLite: clear old messages and re-insert
+        # SQLite: replace atomically so a mid-rewrite failure doesn't leave
+        # the session half-empty in the DB while JSONL still has history.
        if self._db:
            try:
-                self._db.clear_messages(session_id)
-                for msg in messages:
-                    role = msg.get("role", "unknown")
-                    self._db.append_message(
-                        session_id=session_id,
-                        role=role,
-                        content=msg.get("content"),
-                        tool_name=msg.get("tool_name"),
-                        tool_calls=msg.get("tool_calls"),
-                        tool_call_id=msg.get("tool_call_id"),
-                        reasoning=msg.get("reasoning") if role == "assistant" else None,
-                        reasoning_content=msg.get("reasoning_content") if role == "assistant" else None,
-                        reasoning_details=msg.get("reasoning_details") if role == "assistant" else None,
-                        codex_reasoning_items=msg.get("codex_reasoning_items") if role == "assistant" else None,
-                        codex_message_items=msg.get("codex_message_items") if role == "assistant" else None,
-                    )
+                self._db.replace_messages(session_id, messages)
            except Exception as e:
                logger.debug("Failed to rewrite transcript in DB: %s", e)
        
@@ -43,6 +43,7 @@ import yaml

 from hermes_cli.config import get_hermes_home, get_config_path, read_raw_config
 from hermes_constants import OPENROUTER_BASE_URL
+from utils import atomic_replace

 logger = logging.getLogger(__name__)

@@ -109,6 +110,12 @@ SERVICE_PROVIDER_NAMES: Dict[str, str] = {
 DEFAULT_GEMINI_CLOUDCODE_BASE_URL = "cloudcode-pa://google"
 GEMINI_OAUTH_ACCESS_TOKEN_REFRESH_SKEW_SECONDS = 60  # refresh 60s before expiry

+# LM Studio's default no-auth mode still requires *some* non-empty bearer for
+# the API-key code paths (auxiliary_client, runtime resolver) to treat the
+# provider as configured. This sentinel is sent only to LM Studio, never to
+# any remote service.
+LMSTUDIO_NOAUTH_PLACEHOLDER = "dummy-lm-api-key"
+

 # =============================================================================
 # Provider Registry
@@ -159,6 +166,14 @@ PROVIDER_REGISTRY: Dict[str, ProviderConfig] = {
        auth_type="oauth_external",
        inference_base_url=DEFAULT_GEMINI_CLOUDCODE_BASE_URL,
    ),
+    "lmstudio": ProviderConfig(
+        id="lmstudio",
+        name="LM Studio",
+        auth_type="api_key",
+        inference_base_url="http://127.0.0.1:1234/v1",
+        api_key_env_vars=("LM_API_KEY",),
+        base_url_env_var="LM_BASE_URL",
+    ),
    "copilot": ProviderConfig(
        id="copilot",
        name="GitHub Copilot",
@@ -224,6 +239,14 @@ PROVIDER_REGISTRY: Dict[str, ProviderConfig] = {
        api_key_env_vars=("ARCEEAI_API_KEY",),
        base_url_env_var="ARCEE_BASE_URL",
    ),
+    "gmi": ProviderConfig(
+        id="gmi",
+        name="GMI Cloud",
+        auth_type="api_key",
+        inference_base_url="https://api.gmi-serving.com/v1",
+        api_key_env_vars=("GMI_API_KEY",),
+        base_url_env_var="GMI_BASE_URL",
+    ),
    "minimax": ProviderConfig(
        id="minimax",
        name="MiniMax",
@@ -340,6 +363,14 @@ PROVIDER_REGISTRY: Dict[str, ProviderConfig] = {
        api_key_env_vars=("XIAOMI_API_KEY",),
        base_url_env_var="XIAOMI_BASE_URL",
    ),
+    "tencent-tokenhub": ProviderConfig(
+        id="tencent-tokenhub",
+        name="Tencent TokenHub",
+        auth_type="api_key",
+        inference_base_url="https://tokenhub.tencentmaas.com/v1",
+        api_key_env_vars=("TOKENHUB_API_KEY",),
+        base_url_env_var="TOKENHUB_BASE_URL",
+    ),
    "ollama-cloud": ProviderConfig(
        id="ollama-cloud",
        name="Ollama Cloud",
@@ -812,7 +843,7 @@ def _save_auth_store(auth_store: Dict[str, Any]) -> Path:
            handle.write(payload)
            handle.flush()
            os.fsync(handle.fileno())
-        os.replace(tmp_path, auth_file)
+        atomic_replace(tmp_path, auth_file)
        try:
            dir_fd = os.open(str(auth_file.parent), os.O_RDONLY)
        except OSError:
@@ -1120,6 +1151,7 @@ def resolve_provider(
        "kimi-cn": "kimi-coding-cn", "moonshot-cn": "kimi-coding-cn",
        "step": "stepfun", "stepfun-coding-plan": "stepfun",
        "arcee-ai": "arcee", "arceeai": "arcee",
+        "gmi-cloud": "gmi", "gmicloud": "gmi",
        "minimax-china": "minimax-cn", "minimax_cn": "minimax-cn",
        "alibaba_coding": "alibaba-coding-plan", "alibaba-coding": "alibaba-coding-plan",
        "alibaba_coding_plan": "alibaba-coding-plan",
@@ -1132,11 +1164,13 @@ def resolve_provider(
        "qwen-portal": "qwen-oauth", "qwen-cli": "qwen-oauth", "qwen-oauth": "qwen-oauth", "google-gemini-cli": "google-gemini-cli", "gemini-cli": "google-gemini-cli", "gemini-oauth": "google-gemini-cli",
        "hf": "huggingface", "hugging-face": "huggingface", "huggingface-hub": "huggingface",
        "mimo": "xiaomi", "xiaomi-mimo": "xiaomi",
+        "tencent": "tencent-tokenhub", "tokenhub": "tencent-tokenhub",
+        "tencent-cloud": "tencent-tokenhub", "tencentmaas": "tencent-tokenhub",
        "aws": "bedrock", "aws-bedrock": "bedrock", "amazon-bedrock": "bedrock", "amazon": "bedrock",
        "go": "opencode-go", "opencode-go-sub": "opencode-go",
        "kilo": "kilocode", "kilo-code": "kilocode", "kilo-gateway": "kilocode",
+        "lmstudio": "lmstudio", "lm-studio": "lmstudio", "lm_studio": "lmstudio",
        # Local server aliases — route through the generic custom provider
-        "lmstudio": "custom", "lm-studio": "custom", "lm_studio": "custom",
        "ollama": "custom", "ollama_cloud": "ollama-cloud",
        "vllm": "custom", "llamacpp": "custom",
        "llama.cpp": "custom", "llama-cpp": "custom",
@@ -1183,8 +1217,11 @@ def resolve_provider(
            continue
        # GitHub tokens are commonly present for repo/tool access but should not
        # hijack inference auto-selection unless the user explicitly chooses
-        # Copilot/GitHub Models as the provider.
-        if pid == "copilot":
+        # Copilot/GitHub Models as the provider. LM Studio is a local server
+        # whose availability isn't implied by LM_API_KEY presence (it may be
+        # offline, and the no-auth setup uses a placeholder value), so it
+        # also requires explicit selection.
+        if pid in ("copilot", "lmstudio"):
            continue
        for env_var in pconfig.api_key_env_vars:
            if has_usable_secret(os.getenv(env_var, "")):
@@ -3462,6 +3499,13 @@ def resolve_api_key_provider_credentials(provider_id: str) -> Dict[str, Any]:
    key_source = ""
    api_key, key_source = _resolve_api_key_provider_secret(provider_id, pconfig)

+    # No-auth LM Studio: substitute a placeholder so runtime / auxiliary_client
+    # see the local server as configured. doctor still reports unconfigured
+    # because get_api_key_provider_status uses the raw secret resolver.
+    if not api_key and provider_id == "lmstudio":
+        api_key = LMSTUDIO_NOAUTH_PLACEHOLDER
+        key_source = key_source or "default"
+
    env_url = ""
    if pconfig.base_url_env_var:
        env_url = os.getenv(pconfig.base_url_env_var, "").strip()
@@ -34,7 +34,7 @@ from dataclasses import dataclass, field
 from typing import Optional
 from urllib import request as urllib_request
 from urllib.error import HTTPError, URLError
-from urllib.parse import urlparse, urlunparse
+from urllib.parse import urlparse

 logger = logging.getLogger(__name__)

@@ -36,12 +36,23 @@ _EXCLUDED_DIRS = {
    "__pycache__",      # bytecode caches — regenerated on import
    ".git",             # nested git dirs (profiles shouldn't have these, but safety)
    "node_modules",     # js deps if website/ somehow leaks in
+    "backups",          # prior auto-backups — don't nest backups exponentially
+    "checkpoints",      # session-local trajectory caches — regenerated per-session,
+                        # session-hash-keyed so they don't port to another machine anyway
 }

 # File-name suffixes to skip
 _EXCLUDED_SUFFIXES = (
    ".pyc",
    ".pyo",
+    # SQLite sidecar files — the backup takes a consistent snapshot of ``*.db``
+    # via ``sqlite3.backup()``, so shipping the live WAL / shared-memory /
+    # rollback-journal alongside would pair a fresh snapshot with stale sidecar
+    # state and produce a torn restore on the next open. They're transient and
+    # regenerated on first connection anyway.
+    ".db-wal",
+    ".db-shm",
+    ".db-journal",
 )

 # File names to skip (runtime state that's meaningless on another machine)
@@ -454,6 +465,12 @@ def run_import(args) -> None:
 # Critical state files to include in quick snapshots (relative to HERMES_HOME).
 # Everything else is either regeneratable (logs, cache) or managed separately
 # (skills, repo, sessions/).
+#
+# Entries may be individual files OR directories.  Directories are captured
+# recursively; missing entries are silently skipped.  Pairing data lives in
+# platform-specific JSON blobs outside state.db, so it's listed here explicitly
+# — `hermes update` snapshots this set before pulling so approved-user lists
+# are recoverable if anything goes wrong (issue #15733).
 _QUICK_STATE_FILES = (
    "state.db",
    "config.yaml",
@@ -463,6 +480,10 @@ _QUICK_STATE_FILES = (
    "gateway_state.json",
    "channel_directory.json",
    "processes.json",
+    # Pairing stores (generic + per-platform JSONs outside state.db)
+    "pairing",                          # legacy location (gateway/pairing.py)
+    "platforms/pairing",                # new location (gateway/pairing.py)
+    "feishu_comment_pairing.json",      # Feishu comment subscription pairings
 )

 _QUICK_SNAPSHOTS_DIR = "state-snapshots"
@@ -498,7 +519,27 @@ def create_quick_snapshot(

    for rel in _QUICK_STATE_FILES:
        src = home / rel
-        if not src.exists() or not src.is_file():
+        if not src.exists():
+            continue
+
+        if src.is_dir():
+            # Walk the directory and record each file individually in the
+            # manifest so restore can treat them uniformly.  Empty dirs are
+            # skipped (nothing to snapshot).
+            for sub in src.rglob("*"):
+                if not sub.is_file():
+                    continue
+                sub_rel = sub.relative_to(home).as_posix()
+                dst = snap_dir / sub_rel
+                dst.parent.mkdir(parents=True, exist_ok=True)
+                try:
+                    shutil.copy2(sub, dst)
+                    manifest[sub_rel] = dst.stat().st_size
+                except (OSError, PermissionError) as exc:
+                    logger.warning("Could not snapshot %s: %s", sub_rel, exc)
+            continue
+
+        if not src.is_file():
            continue

        dst = snap_dir / rel
@@ -653,3 +694,233 @@ def run_quick_backup(args) -> None:
        print(f"  Restore with: /snapshot restore {snap_id}")
    else:
        print("No state files found to snapshot.")
+
+
+# ---------------------------------------------------------------------------
+# Shared full-zip backup helper
+# ---------------------------------------------------------------------------
+
+def _write_full_zip_backup(out_path: Path, hermes_root: Path) -> Optional[Path]:
+    """Write a full zip snapshot of ``hermes_root`` to ``out_path``.
+
+    Uses the same exclusion rules and SQLite safe-copy as :func:`run_backup`.
+    Returns the output path on success, None on failure (nothing to back up,
+    or write error — caller should surface the outcome but not raise).
+    """
+    files_to_add: list[tuple[Path, Path]] = []
+    try:
+        for dirpath, dirnames, filenames in os.walk(hermes_root, followlinks=False):
+            dp = Path(dirpath)
+            # Prune excluded directories in-place so os.walk doesn't descend
+            dirnames[:] = [d for d in dirnames if d not in _EXCLUDED_DIRS]
+
+            for fname in filenames:
+                fpath = dp / fname
+                try:
+                    rel = fpath.relative_to(hermes_root)
+                except ValueError:
+                    continue
+
+                if _should_exclude(rel):
+                    continue
+
+                # Skip the output zip itself if it already exists inside root.
+                try:
+                    if fpath.resolve() == out_path.resolve():
+                        continue
+                except (OSError, ValueError):
+                    pass
+
+                files_to_add.append((fpath, rel))
+    except OSError as exc:
+        logger.warning("Full-zip backup: walk failed: %s", exc)
+        return None
+
+    if not files_to_add:
+        return None
+
+    try:
+        with zipfile.ZipFile(out_path, "w", zipfile.ZIP_DEFLATED, compresslevel=6) as zf:
+            for abs_path, rel_path in files_to_add:
+                try:
+                    if abs_path.suffix == ".db":
+                        with tempfile.NamedTemporaryFile(suffix=".db", delete=False) as tmp:
+                            tmp_db = Path(tmp.name)
+                        try:
+                            if _safe_copy_db(abs_path, tmp_db):
+                                zf.write(tmp_db, arcname=str(rel_path))
+                        finally:
+                            tmp_db.unlink(missing_ok=True)
+                    else:
+                        zf.write(abs_path, arcname=str(rel_path))
+                except (PermissionError, OSError, ValueError) as exc:
+                    logger.debug("Skipping %s in zip backup: %s", rel_path, exc)
+                    continue
+    except OSError as exc:
+        logger.warning("Full-zip backup: zip write failed: %s", exc)
+        # Best-effort cleanup of partial file
+        try:
+            out_path.unlink(missing_ok=True)
+        except OSError:
+            pass
+        return None
+
+    return out_path
+
+
+# ---------------------------------------------------------------------------
+# Pre-update auto-backup
+# ---------------------------------------------------------------------------
+
+_PRE_UPDATE_BACKUPS_DIR = "backups"
+_PRE_UPDATE_PREFIX = "pre-update-"
+_PRE_UPDATE_DEFAULT_KEEP = 5
+
+
+def _pre_update_backup_dir(hermes_home: Optional[Path] = None) -> Path:
+    home = hermes_home or get_hermes_home()
+    return home / _PRE_UPDATE_BACKUPS_DIR
+
+
+def _prune_pre_update_backups(backup_dir: Path, keep: int) -> int:
+    """Remove oldest pre-update backups beyond the keep limit.
+
+    Returns the number of files deleted.  Only touches files matching
+    ``pre-update-*.zip`` so hand-made zips dropped in the same directory
+    are never touched.
+    """
+    if keep < 0:
+        keep = 0
+    if not backup_dir.exists():
+        return 0
+
+    backups = sorted(
+        (p for p in backup_dir.iterdir()
+         if p.is_file() and p.name.startswith(_PRE_UPDATE_PREFIX) and p.suffix.lower() == ".zip"),
+        key=lambda p: p.name,
+        reverse=True,
+    )
+
+    deleted = 0
+    for p in backups[keep:]:
+        try:
+            p.unlink()
+            deleted += 1
+        except OSError as exc:
+            logger.warning("Failed to prune backup %s: %s", p.name, exc)
+
+    return deleted
+
+
+def create_pre_update_backup(
+    hermes_home: Optional[Path] = None,
+    keep: int = _PRE_UPDATE_DEFAULT_KEEP,
+) -> Optional[Path]:
+    """Create a full zip backup of HERMES_HOME under ``backups/``.
+
+    Mirrors :func:`run_backup` (same exclusion rules, same SQLite safe-copy)
+    but writes to ``<HERMES_HOME>/backups/pre-update-<timestamp>.zip`` and
+    auto-prunes old pre-update backups.
+
+    Returns the path to the created zip, or ``None`` if no files were
+    found or the backup could not be created.  Never raises — the caller
+    (``hermes update``) should continue even if the backup fails.
+    """
+    hermes_root = hermes_home or get_default_hermes_root()
+    if not hermes_root.is_dir():
+        return None
+
+    backup_dir = _pre_update_backup_dir(hermes_root)
+    try:
+        backup_dir.mkdir(parents=True, exist_ok=True)
+    except OSError as exc:
+        logger.warning("Could not create pre-update backup dir %s: %s", backup_dir, exc)
+        return None
+
+    stamp = datetime.now().strftime("%Y-%m-%d-%H%M%S")
+    out_path = backup_dir / f"{_PRE_UPDATE_PREFIX}{stamp}.zip"
+
+    result = _write_full_zip_backup(out_path, hermes_root)
+    if result is None:
+        return None
+
+    _prune_pre_update_backups(backup_dir, keep=keep)
+    return out_path
+
+
+# ---------------------------------------------------------------------------
+# Pre-migration auto-backup (used by `hermes claw migrate`)
+# ---------------------------------------------------------------------------
+
+_PRE_MIGRATION_PREFIX = "pre-migration-"
+_PRE_MIGRATION_DEFAULT_KEEP = 5
+
+
+def _prune_pre_migration_backups(backup_dir: Path, keep: int) -> int:
+    """Remove oldest pre-migration backups beyond the keep limit.
+
+    Only touches files matching ``pre-migration-*.zip`` so other backups in
+    the same directory are never touched.
+    """
+    if keep < 0:
+        keep = 0
+    if not backup_dir.exists():
+        return 0
+
+    backups = sorted(
+        (p for p in backup_dir.iterdir()
+         if p.is_file() and p.name.startswith(_PRE_MIGRATION_PREFIX) and p.suffix.lower() == ".zip"),
+        key=lambda p: p.name,
+        reverse=True,
+    )
+
+    deleted = 0
+    for p in backups[keep:]:
+        try:
+            p.unlink()
+            deleted += 1
+        except OSError as exc:
+            logger.warning("Failed to prune pre-migration backup %s: %s", p.name, exc)
+
+    return deleted
+
+
+def create_pre_migration_backup(
+    hermes_home: Optional[Path] = None,
+    keep: int = _PRE_MIGRATION_DEFAULT_KEEP,
+) -> Optional[Path]:
+    """Create a full zip backup of HERMES_HOME under ``backups/`` before a
+    ``hermes claw migrate`` apply.
+
+    Shares implementation with :func:`create_pre_update_backup` via
+    ``_write_full_zip_backup`` — same exclusions, same SQLite safe-copy,
+    restorable with ``hermes import <archive>``.  Writes to
+    ``<HERMES_HOME>/backups/pre-migration-<timestamp>.zip`` and auto-prunes
+    old pre-migration backups.
+
+    Returns the path to the created zip, or ``None`` if nothing was found
+    to back up (fresh install) or the write failed.  Never raises — the
+    caller decides whether to abort or proceed.
+    """
+    hermes_root = hermes_home or get_default_hermes_root()
+    if not hermes_root.is_dir():
+        return None
+
+    # Reuses the shared backups/ directory so `hermes import` and the
+    # update-backup listing pick up pre-migration archives too.
+    backup_dir = _pre_update_backup_dir(hermes_root)
+    try:
+        backup_dir.mkdir(parents=True, exist_ok=True)
+    except OSError as exc:
+        logger.warning("Could not create pre-migration backup dir %s: %s", backup_dir, exc)
+        return None
+
+    stamp = datetime.now().strftime("%Y-%m-%d-%H%M%S")
+    out_path = backup_dir / f"{_PRE_MIGRATION_PREFIX}{stamp}.zip"
+
+    result = _write_full_zip_backup(out_path, hermes_root)
+    if result is None:
+        return None
+
+    _prune_pre_migration_backups(backup_dir, keep=keep)
+    return out_path
@@ -562,7 +562,6 @@ def build_welcome_banner(console: Console, model: str, cwd: str,
    right_content = "\n".join(right_lines)
    layout_table.add_row(left_content, right_content)

-    agent_name = _skin_branding("agent_name", "Hermes Agent")
    title_color = _skin_color("banner_title", "#FFD700")
    border_color = _skin_color("banner_border", "#CD7F32")
    version_label = format_banner_version_label()
@@ -4,7 +4,8 @@ Usage:
    hermes claw migrate              # Preview then migrate (always shows preview first)
    hermes claw migrate --dry-run    # Preview only, no changes
    hermes claw migrate --yes        # Skip confirmation prompt
-    hermes claw migrate --preset full --overwrite  # Full migration, overwrite conflicts
+    hermes claw migrate --preset full --overwrite --migrate-secrets  # Full run w/ secrets
+    hermes claw migrate --no-backup  # Skip pre-migration snapshot
    hermes claw cleanup              # Archive leftover OpenClaw directories
    hermes claw cleanup --dry-run    # Preview what would be archived
 """
@@ -15,6 +16,7 @@ import subprocess
 import sys
 from datetime import datetime
 from pathlib import Path
+from typing import Optional

 from hermes_cli.config import get_hermes_home, get_config_path, load_config, save_config
 from hermes_constants import get_optional_skills_dir
@@ -321,10 +323,13 @@ def _cmd_migrate(args):
    migrate_secrets = getattr(args, "migrate_secrets", False)
    workspace_target = getattr(args, "workspace_target", None)
    skill_conflict = getattr(args, "skill_conflict", "skip")
+    no_backup = getattr(args, "no_backup", False)

-    # If using the "full" preset, secrets are included by default
-    if preset == "full":
-        migrate_secrets = True
+    # Secrets are never included implicitly — they must be explicitly requested
+    # via --migrate-secrets, even under --preset full.  This mirrors OpenClaw's
+    # migrate-hermes posture (two-phase: run once without secrets, rerun with
+    # --include-secrets) and prevents a --preset full invocation from silently
+    # importing API keys that the user may not have intended to copy.

    print()
    print(
@@ -431,15 +436,24 @@ def _cmd_migrate(args):

    preview_summary = preview_report.get("summary", {})
    preview_count = preview_summary.get("migrated", 0)
+    preview_conflicts = preview_summary.get("conflict", 0)

-    if preview_count == 0:
+    # "Nothing to migrate" means nothing migrated AND nothing blocked by
+    # conflicts.  If there are conflicts, we still want to show the plan and
+    # surface the refusal/--overwrite guidance instead of silently bailing.
+    if preview_count == 0 and preview_conflicts == 0:
        print()
        print_info("Nothing to migrate from OpenClaw.")
        _print_migration_report(preview_report, dry_run=True)
        return

    print()
-    print_header(f"Migration Preview — {preview_count} item(s) would be imported")
+    if preview_count > 0:
+        print_header(f"Migration Preview — {preview_count} item(s) would be imported")
+    else:
+        print_header(
+            f"Migration Preview — {preview_conflicts} conflict(s), nothing would be imported"
+        )
    print_info("No changes have been made yet. Review the list below:")
    _print_migration_report(preview_report, dry_run=True)

@@ -447,6 +461,24 @@ def _cmd_migrate(args):
    if dry_run:
        return

+    # ── Phase 1b: Refuse if the plan has conflicts and --overwrite is not set ─
+    # Modelled on OpenClaw's assertConflictFreePlan() — apply is a safe no-op
+    # on conflicts unless the user explicitly opts in to overwriting.  Without
+    # this guard, the user would answer "yes, proceed" and silently end up
+    # with a migration that skipped every conflicting item.
+    if preview_conflicts > 0 and not overwrite:
+        print()
+        print_error(
+            f"Plan has {preview_conflicts} conflict(s). Refusing to apply."
+        )
+        print_info(
+            "Each conflict is an item whose target already exists in ~/.hermes/. "
+            "Re-run with --overwrite to replace conflicting targets (item-level "
+            "backups are written to the migration report directory)."
+        )
+        print_info("Or re-run with --dry-run to review the full plan.")
+        return
+
    # ── Phase 2: Confirm and execute ───────────────────────────
    print()
    if not auto_yes:
@@ -458,6 +490,32 @@ def _cmd_migrate(args):
            print_info("Migration cancelled.")
            return

+    # ── Phase 2b: Pre-apply backup of the Hermes home ─────────
+    # Delegates to hermes_cli.backup.create_pre_migration_backup(), which
+    # shares implementation with the pre-update backup (same exclusion
+    # rules, same SQLite safe-copy, zip format) so the archive is
+    # restorable with `hermes import`.  Mirrors OpenClaw's
+    # createPreMigrationBackup posture — one atomic restore point before
+    # any mutation, auto-pruned to the last 5 pre-migration zips.
+    backup_archive: Optional[Path] = None
+    if not no_backup:
+        try:
+            from hermes_cli.backup import create_pre_migration_backup, _format_size
+            backup_archive = create_pre_migration_backup(hermes_home=hermes_home)
+            if backup_archive:
+                size_str = _format_size(backup_archive.stat().st_size)
+                print()
+                print_success(f"Pre-migration backup: {backup_archive} ({size_str})")
+                print_info(f"Restore with: hermes import {backup_archive.name}")
+        except Exception as e:
+            print()
+            print_error(f"Could not create pre-migration backup: {e}")
+            print_info(
+                "Re-run with --no-backup to skip, or free up disk space under the Hermes home."
+            )
+            logger.debug("Pre-migration backup error", exc_info=True)
+            return
+
    try:
        migrator = mod.Migrator(
            source_root=source_dir.resolve(),
@@ -476,6 +534,9 @@ def _cmd_migrate(args):
        print()
        print_error(f"Migration failed: {e}")
        logger.debug("OpenClaw migration error", exc_info=True)
+        if backup_archive:
+            print_info(f"A pre-migration backup is available at: {backup_archive}")
+            print_info(f"Restore with: hermes import {backup_archive.name}")
        return

    # Print results
@@ -62,6 +62,8 @@ COMMAND_REGISTRY: list[CommandDef] = [
               aliases=("reset",)),
    CommandDef("clear", "Clear screen and start a new session", "Session",
               cli_only=True),
+    CommandDef("redraw", "Force a full UI repaint (recovers from terminal drift)", "Session",
+               cli_only=True),
    CommandDef("history", "Show conversation history", "Session",
               cli_only=True),
    CommandDef("save", "Save the current conversation", "Session",
@@ -113,6 +115,9 @@ COMMAND_REGISTRY: list[CommandDef] = [
    CommandDef("verbose", "Cycle tool progress display: off -> new -> all -> verbose",
               "Configuration", cli_only=True,
               gateway_config_gate="display.tool_progress_command"),
+    CommandDef("footer", "Toggle gateway runtime-metadata footer on final replies",
+               "Configuration", args_hint="[on|off|status]",
+               subcommands=("on", "off", "status")),
    CommandDef("yolo", "Toggle YOLO mode (skip all dangerous command approvals)",
               "Configuration"),
    CommandDef("reasoning", "Manage reasoning effort and display", "Configuration",
@@ -123,6 +128,9 @@ COMMAND_REGISTRY: list[CommandDef] = [
               subcommands=("normal", "fast", "status", "on", "off")),
    CommandDef("skin", "Show or change the display skin/theme", "Configuration",
               cli_only=True, args_hint="[name]"),
+    CommandDef("indicator", "Pick the TUI busy-indicator style", "Configuration",
+               cli_only=True, args_hint="[kaomoji|emoji|unicode|ascii]",
+               subcommands=("kaomoji", "emoji", "unicode", "ascii")),
    CommandDef("voice", "Toggle voice mode", "Configuration",
               args_hint="[on|off|tts|status]", subcommands=("on", "off", "tts", "status")),
    CommandDef("busy", "Control what Enter does while Hermes is working", "Configuration",
@@ -941,6 +949,42 @@ def slack_subcommand_map() -> dict[str, str]:
 # Autocomplete
 # ---------------------------------------------------------------------------

+
+# Per-process cache for /model<space> LM Studio autocomplete. Probing on
+# every keystroke would block the UI; a short TTL keeps it live without
+# hammering the server.
+_LMSTUDIO_COMPLETION_CACHE: tuple[float, list[str]] | None = None
+
+
+def _lmstudio_completion_models() -> list[str]:
+    """Locally-loaded LM Studio models for /model autocomplete (cached, gated)."""
+    global _LMSTUDIO_COMPLETION_CACHE
+    # Gate: don't probe 127.0.0.1 on every keystroke for users who don't use LM Studio.
+    if not (os.environ.get("LM_API_KEY") or os.environ.get("LM_BASE_URL")):
+        try:
+            from hermes_cli.auth import _load_auth_store
+            store = _load_auth_store() or {}
+            if "lmstudio" not in (store.get("providers") or {}) \
+               and "lmstudio" not in (store.get("credential_pool") or {}):
+                return []
+        except Exception:
+            return []
+    now = time.time()
+    if _LMSTUDIO_COMPLETION_CACHE and (now - _LMSTUDIO_COMPLETION_CACHE[0]) < 30.0:
+        return _LMSTUDIO_COMPLETION_CACHE[1]
+    try:
+        from hermes_cli.models import fetch_lmstudio_models
+        models = fetch_lmstudio_models(
+            api_key=os.environ.get("LM_API_KEY", ""),
+            base_url=os.environ.get("LM_BASE_URL") or "http://127.0.0.1:1234/v1",
+            timeout=0.8,
+        )
+    except Exception:
+        models = []
+    _LMSTUDIO_COMPLETION_CACHE = (now, models)
+    return models
+
+
 class SlashCommandCompleter(Completer):
    """Autocomplete for built-in slash commands, subcommands, and skill commands."""

@@ -1364,6 +1408,19 @@ class SlashCommandCompleter(Completer):
                    )
        except Exception:
            pass
+        # LM Studio: surface locally-loaded models. Gated on the user actually
+        # having LM Studio configured (env var or auth-store entry) so we
+        # don't probe 127.0.0.1 on every keystroke for users who don't use it.
+        for name in _lmstudio_completion_models():
+            if name in seen:
+                continue
+            if name.startswith(sub_lower) and name != sub_lower:
+                yield Completion(
+                    name,
+                    start_position=-len(sub_text),
+                    display=name,
+                    display_meta="LM Studio",
+                )

    def get_completions(self, document, complete_event):
        text = document.text_before_cursor
@@ -30,34 +30,67 @@ logger = logging.getLogger(__name__)
 _IS_WINDOWS = platform.system() == "Windows"
 _ENV_VAR_NAME_RE = re.compile(r"^[A-Za-z_][A-Za-z0-9_]*$")
 _LAST_EXPANDED_CONFIG_BY_PATH: Dict[str, Any] = {}
+# (path, mtime_ns, size) -> cached expanded config dict.
+# load_config() returns a deepcopy of the cached value when the file
+# hasn't changed since the last load, skipping yaml.safe_load +
+# _deep_merge + _normalize_* + _expand_env_vars (~13 ms/call).
+# save_config() + migrate_config() write via atomic_yaml_write which
+# produces a fresh inode, so stat() sees a new mtime_ns and the next
+# load repopulates automatically — no explicit invalidation hook.
+_LOAD_CONFIG_CACHE: Dict[str, Tuple[int, int, Dict[str, Any]]] = {}
+# (path, mtime_ns, size) -> cached raw yaml dict. Same pattern as
+# _LOAD_CONFIG_CACHE but for read_raw_config() — used when callers want
+# the user's on-disk values without defaults merged in.
+_RAW_CONFIG_CACHE: Dict[str, Tuple[int, int, Dict[str, Any]]] = {}
 # Env var names written to .env that aren't in OPTIONAL_ENV_VARS
 # (managed by setup/provider flows directly).
 _EXTRA_ENV_KEYS = frozenset({
    "OPENAI_API_KEY", "OPENAI_BASE_URL",
    "ANTHROPIC_API_KEY", "ANTHROPIC_TOKEN",
-    "DISCORD_HOME_CHANNEL", "TELEGRAM_HOME_CHANNEL",
+    "DISCORD_HOME_CHANNEL", "DISCORD_HOME_CHANNEL_NAME",
+    "TELEGRAM_HOME_CHANNEL", "TELEGRAM_HOME_CHANNEL_NAME",
+    "SLACK_HOME_CHANNEL", "SLACK_HOME_CHANNEL_NAME",
    "SIGNAL_ACCOUNT", "SIGNAL_HTTP_URL",
    "SIGNAL_ALLOWED_USERS", "SIGNAL_GROUP_ALLOWED_USERS",
+    "SIGNAL_HOME_CHANNEL", "SIGNAL_HOME_CHANNEL_NAME",
+    "SMS_HOME_CHANNEL", "SMS_HOME_CHANNEL_NAME",
    "DINGTALK_CLIENT_ID", "DINGTALK_CLIENT_SECRET",
+    "DINGTALK_HOME_CHANNEL", "DINGTALK_HOME_CHANNEL_NAME",
    "FEISHU_APP_ID", "FEISHU_APP_SECRET", "FEISHU_ENCRYPT_KEY", "FEISHU_VERIFICATION_TOKEN",
+    "FEISHU_HOME_CHANNEL", "FEISHU_HOME_CHANNEL_NAME",
+    "YUANBAO_HOME_CHANNEL", "YUANBAO_HOME_CHANNEL_NAME",
    "WECOM_BOT_ID", "WECOM_SECRET",
    "WECOM_CALLBACK_CORP_ID", "WECOM_CALLBACK_CORP_SECRET", "WECOM_CALLBACK_AGENT_ID",
    "WECOM_CALLBACK_TOKEN", "WECOM_CALLBACK_ENCODING_AES_KEY",
    "WECOM_CALLBACK_HOST", "WECOM_CALLBACK_PORT",
+    "WECOM_HOME_CHANNEL", "WECOM_HOME_CHANNEL_NAME",
    "WEIXIN_ACCOUNT_ID", "WEIXIN_TOKEN", "WEIXIN_BASE_URL", "WEIXIN_CDN_BASE_URL",
    "WEIXIN_HOME_CHANNEL", "WEIXIN_HOME_CHANNEL_NAME", "WEIXIN_DM_POLICY", "WEIXIN_GROUP_POLICY",
    "WEIXIN_ALLOWED_USERS", "WEIXIN_GROUP_ALLOWED_USERS", "WEIXIN_ALLOW_ALL_USERS",
    "BLUEBUBBLES_SERVER_URL", "BLUEBUBBLES_PASSWORD",
+    "BLUEBUBBLES_HOME_CHANNEL", "BLUEBUBBLES_HOME_CHANNEL_NAME",
    "QQ_APP_ID", "QQ_CLIENT_SECRET", "QQBOT_HOME_CHANNEL", "QQBOT_HOME_CHANNEL_NAME",
    "QQ_HOME_CHANNEL", "QQ_HOME_CHANNEL_NAME",  # legacy aliases (pre-rename, still read for back-compat)
    "QQ_ALLOWED_USERS", "QQ_GROUP_ALLOWED_USERS", "QQ_ALLOW_ALL_USERS", "QQ_MARKDOWN_SUPPORT",
    "QQ_STT_API_KEY", "QQ_STT_BASE_URL", "QQ_STT_MODEL",
    "TERMINAL_ENV", "TERMINAL_SSH_KEY", "TERMINAL_SSH_PORT",
    "WHATSAPP_MODE", "WHATSAPP_ENABLED",
-    "MATTERMOST_HOME_CHANNEL", "MATTERMOST_REPLY_MODE",
+    "MATTERMOST_HOME_CHANNEL", "MATTERMOST_HOME_CHANNEL_NAME", "MATTERMOST_REPLY_MODE",
    "MATRIX_PASSWORD", "MATRIX_ENCRYPTION", "MATRIX_DEVICE_ID", "MATRIX_HOME_ROOM",
-    "MATRIX_REQUIRE_MENTION", "MATRIX_FREE_RESPONSE_ROOMS", "MATRIX_AUTO_THREAD",
+    "MATRIX_REQUIRE_MENTION", "MATRIX_FREE_RESPONSE_ROOMS", "MATRIX_AUTO_THREAD", "MATRIX_DM_AUTO_THREAD",
    "MATRIX_RECOVERY_KEY",
+    # Langfuse observability plugin — optional tuning keys + standard SDK vars.
+    # Activation is via plugins.enabled (opt-in through `hermes plugins enable
+    # observability/langfuse` or `hermes tools → Langfuse`); credentials gate
+    # the plugin at runtime.
+    "HERMES_LANGFUSE_ENV",
+    "HERMES_LANGFUSE_RELEASE",
+    "HERMES_LANGFUSE_SAMPLE_RATE",
+    "HERMES_LANGFUSE_MAX_CHARS",
+    "HERMES_LANGFUSE_DEBUG",
+    "LANGFUSE_PUBLIC_KEY",
+    "LANGFUSE_SECRET_KEY",
+    "LANGFUSE_BASE_URL",
 })
 import yaml

@@ -206,6 +239,7 @@ def get_container_exec_info() -> Optional[dict]:

 # Re-export from hermes_constants — canonical definition lives there.
 from hermes_constants import get_hermes_home  # noqa: F811,E402
+from utils import atomic_replace

 def get_config_path() -> Path:
    """Get the main config file path."""
@@ -389,6 +423,34 @@ DEFAULT_CONFIG = {
        # (60+ tool iterations with tiny output) before users assume the
        # bot is dead and /restart.
        "gateway_notify_interval": 180,
+        # Freshness window for the gateway auto-continue note (seconds).
+        # After a gateway crash/restart/SIGTERM mid-run, the next user
+        # message gets a "[System note: your previous turn was
+        # interrupted — process the unfinished tool result(s) first]"
+        # prepended so the model picks up where it left off.  That's the
+        # right behaviour while the interruption is fresh, but stale
+        # markers (transcript last touched hours or days ago) can revive
+        # an unrelated old task when the user's next message starts new
+        # work.  This window is the max age of the last persisted
+        # transcript row for which we still inject the continue note.
+        # Default 3600s comfortably covers a long turn (gateway_timeout
+        # default is 1800s) plus runtime slack.  Set to 0 to disable the
+        # gate and restore pre-fix behaviour (always inject).
+        "gateway_auto_continue_freshness": 3600,
+        # How user-attached images are presented to the main model on each turn.
+        #   "auto"   — attach natively when the active model reports
+        #              supports_vision=True AND the user hasn't explicitly
+        #              configured auxiliary.vision.provider.  Otherwise fall
+        #              back to text (vision_analyze pre-analysis).
+        #   "native" — always attach natively; non-vision models will either
+        #              error at the provider or get a last-chance text fallback
+        #              (see run_agent._prepare_messages_for_api).
+        #   "text"   — always pre-analyze with vision_analyze and prepend the
+        #              description as text; the main model never sees pixels.
+        # Affects gateway platforms, the TUI, and CLI /attach.  vision_analyze
+        # remains available as a tool regardless of this setting — the routing
+        # only controls how inbound user images are presented.
+        "image_input_mode": "auto",
    },
    
    "terminal": {
@@ -532,7 +594,7 @@ DEFAULT_CONFIG = {
        "threshold": 0.50,            # compress when context usage exceeds this ratio
        "target_ratio": 0.20,         # fraction of threshold to preserve as recent tail
        "protect_last_n": 20,         # minimum recent messages to keep uncompressed
-
+        "hygiene_hard_message_limit": 400,  # gateway session-hygiene force-compress threshold by message count
    },

    # Anthropic prompt caching (Claude via OpenRouter or native Anthropic API).
@@ -641,6 +703,11 @@ DEFAULT_CONFIG = {
        "personality": "kawaii",
        "resume_display": "full",
        "busy_input_mode": "interrupt",  # interrupt | queue | steer
+        # When true, `hermes --tui` auto-resumes the most recent human-
+        # facing session on launch instead of forging a fresh one.
+        # Mirrors `hermes -c` muscle memory.  Default off so existing
+        # users aren't surprised.  HERMES_TUI_RESUME=<id> always wins.
+        "tui_auto_resume_recent": False,
        "bell_on_complete": False,
        "show_reasoning": False,
        "streaming": False,
@@ -648,6 +715,9 @@ DEFAULT_CONFIG = {
        "inline_diffs": True,     # Show inline diff previews for write actions (write_file, patch, skill_manage)
        "show_cost": False,       # Show $ cost in the status bar (off by default)
        "skin": "default",
+        # TUI busy indicator style: kaomoji (default), emoji, unicode (braille
+        # spinner), or ascii.  Live-swappable via `/indicator <style>`.
+        "tui_status_indicator": "kaomoji",
        "user_message_preview": {  # CLI: how many submitted user-message lines to echo back in scrollback
            "first_lines": 2,
            "last_lines": 2,
@@ -657,6 +727,14 @@ DEFAULT_CONFIG = {
        "tool_progress_overrides": {},  # DEPRECATED — use display.platforms instead
        "tool_preview_length": 0,  # Max chars for tool call previews (0 = no limit, show full paths/commands)
        "platforms": {},  # Per-platform display overrides: {"telegram": {"tool_progress": "all"}, "slack": {"tool_progress": "off"}}
+        # Gateway runtime-metadata footer appended to the FINAL message of a turn
+        # (disabled by default to keep replies minimal). When enabled, renders
+        # e.g. `model · 68% · ~/projects/hermes`. Per-platform overrides go under
+        # display.platforms.<platform>.runtime_footer.
+        "runtime_footer": {
+            "enabled": False,
+            "fields": ["model", "context_pct", "cwd"],  # Order shown; drop any to hide
+        },
    },

    # Web dashboard settings
@@ -874,6 +952,7 @@ DEFAULT_CONFIG = {

    # Telegram platform settings (gateway mode)
    "telegram": {
+        "reactions": False,            # Add 👀/✅/❌ reactions to messages during processing
        "channel_prompts": {},         # Per-chat/topic ephemeral system prompts (topics inherit from parent group)
    },

@@ -928,7 +1007,7 @@ DEFAULT_CONFIG = {
    # Pre-exec security scanning via tirith
    "security": {
        "allow_private_urls": False,  # Allow requests to private/internal IPs (for OpenWrt, proxies, VPNs)
-        "redact_secrets": True,
+        "redact_secrets": False,
        "tirith_enabled": True,
        "tirith_path": "tirith",
        "tirith_timeout": 5,
@@ -1037,6 +1116,20 @@ DEFAULT_CONFIG = {
        "seen": {},
    },

+    # ``hermes update`` behaviour.
+    "updates": {
+        # Run a full ``hermes backup``-style zip of HERMES_HOME before every
+        # ``hermes update``.  Backups land in ``<HERMES_HOME>/backups/`` and
+        # can be restored with ``hermes import <path>``.  Off by default —
+        # on large HERMES_HOME directories the zip can add minutes to every
+        # update.  Set to true to re-enable, or pass ``--backup`` to opt in
+        # for a single update run.
+        "pre_update_backup": False,
+        # How many pre-update backup zips to retain.  Older ones are pruned
+        # automatically after each successful backup.
+        "backup_keep": 5,
+    },
+
    # Config schema version - bump this when adding new required fields
    "_config_version": 22,
 }
@@ -1138,6 +1231,22 @@ OPTIONAL_ENV_VARS = {
        "category": "provider",
        "advanced": True,
    },
+    "LM_API_KEY": {
+        "description": "LM Studio bearer token for auth-enabled local servers",
+        "prompt": "LM Studio API key / bearer token",
+        "url": None,
+        "password": True,
+        "category": "provider",
+        "advanced": True,
+    },
+    "LM_BASE_URL": {
+        "description": "LM Studio base URL override",
+        "prompt": "LM Studio base URL (leave empty for default)",
+        "url": None,
+        "password": False,
+        "category": "provider",
+        "advanced": True,
+    },
    "GLM_API_KEY": {
        "description": "Z.AI / GLM API key (also recognized as ZAI_API_KEY / Z_AI_API_KEY)",
        "prompt": "Z.AI / GLM API key",
@@ -1226,6 +1335,22 @@ OPTIONAL_ENV_VARS = {
        "category": "provider",
        "advanced": True,
    },
+    "GMI_API_KEY": {
+        "description": "GMI Cloud API key",
+        "prompt": "GMI Cloud API key",
+        "url": "https://www.gmicloud.ai/",
+        "password": True,
+        "category": "provider",
+        "advanced": True,
+    },
+    "GMI_BASE_URL": {
+        "description": "GMI Cloud base URL override",
+        "prompt": "GMI Cloud base URL (leave empty for default)",
+        "url": None,
+        "password": False,
+        "category": "provider",
+        "advanced": True,
+    },
    "MINIMAX_API_KEY": {
        "description": "MiniMax API key (international)",
        "prompt": "MiniMax API key",
@@ -1648,6 +1773,30 @@ OPTIONAL_ENV_VARS = {
        "category": "tool",
    },

+    # ── Langfuse observability ──
+    "HERMES_LANGFUSE_PUBLIC_KEY": {
+        "description": "Langfuse project public key (pk-lf-...)",
+        "prompt": "Langfuse public key",
+        "url": "https://cloud.langfuse.com",
+        "password": False,
+        "category": "tool",
+    },
+    "HERMES_LANGFUSE_SECRET_KEY": {
+        "description": "Langfuse project secret key (sk-lf-...)",
+        "prompt": "Langfuse secret key",
+        "url": "https://cloud.langfuse.com",
+        "password": True,
+        "category": "tool",
+    },
+    "HERMES_LANGFUSE_BASE_URL": {
+        "description": "Langfuse server URL (default: https://cloud.langfuse.com)",
+        "prompt": "Langfuse server URL (leave empty for cloud.langfuse.com)",
+        "url": None,
+        "password": False,
+        "category": "tool",
+        "advanced": True,
+    },
+
    # ── Messaging platforms ──
    "TELEGRAM_BOT_TOKEN": {
        "description": "Telegram bot token from @BotFather",
@@ -1795,6 +1944,14 @@ OPTIONAL_ENV_VARS = {
        "category": "messaging",
        "advanced": True,
    },
+    "MATRIX_DM_AUTO_THREAD": {
+        "description": "Auto-create threads for DM messages in Matrix (default: false)",
+        "prompt": "Auto-create threads in DMs (true/false)",
+        "url": None,
+        "password": False,
+        "category": "messaging",
+        "advanced": True,
+    },
    "MATRIX_DEVICE_ID": {
        "description": "Stable Matrix device ID for E2EE persistence across restarts (e.g. HERMES_BOT)",
        "prompt": "Matrix device ID (stable across restarts)",
@@ -2136,14 +2293,21 @@ def _normalize_custom_provider_entry(
        "baseUrl": "base_url",
        "apiMode": "api_mode",
        "keyEnv": "key_env",
+        "apiKeyEnv": "key_env",  # alias — OpenClaw-compatible + docs variant
        "defaultModel": "default_model",
        "contextLength": "context_length",
        "rateLimitDelay": "rate_limit_delay",
    }
+    # api_key_env is a documented snake_case alias for key_env (see
+    # website/docs/guides/azure-foundry.md).  Normalize it up front so the
+    # rest of the normalizer treats it as the canonical field.
+    if "api_key_env" in entry and "key_env" not in entry:
+        entry["key_env"] = entry["api_key_env"]
    _KNOWN_KEYS = {
-        "name", "api", "url", "base_url", "api_key", "key_env",
+        "name", "api", "url", "base_url", "api_key", "key_env", "api_key_env",
        "api_mode", "transport", "model", "default_model", "models",
        "context_length", "rate_limit_delay",
+        "request_timeout_seconds", "stale_timeout_seconds",
    }
    for camel, snake in _CAMEL_ALIASES.items():
        if camel in entry and snake not in entry:
@@ -2395,6 +2559,9 @@ _KNOWN_ROOT_KEYS = {
 _VALID_CUSTOM_PROVIDER_FIELDS = {
    "name", "base_url", "api_key", "api_mode", "model", "models",
    "context_length", "rate_limit_delay",
+    # key_env is read at runtime by runtime_provider.py and auxiliary_client.py
+    # — include it here so the set accurately describes the supported schema.
+    "key_env",
 }

 # Fields that look like they should be inside custom_providers, not at root
@@ -2471,10 +2638,32 @@ def validate_config_structure(config: Optional[Dict[str, Any]] = None) -> List["
                        "Add the API endpoint URL, e.g.: base_url: https://api.example.com/v1",
                    ))

-    # ── fallback_model must be a top-level dict with provider + model ────
+    # ── fallback_model: single dict OR list of dicts (chain) ─────────────
    fb = config.get("fallback_model")
    if fb is not None:
-        if not isinstance(fb, dict):
+        if isinstance(fb, list):
+            # Chain fallback — validate each entry
+            for i, entry in enumerate(fb):
+                if not isinstance(entry, dict):
+                    issues.append(ConfigIssue(
+                        "error",
+                        f"fallback_model[{i}] should be a dict, got {type(entry).__name__}",
+                        "Each entry needs provider + model",
+                    ))
+                else:
+                    if not entry.get("provider"):
+                        issues.append(ConfigIssue(
+                            "warning",
+                            f"fallback_model[{i}] is missing 'provider' field",
+                            "Add: provider: openrouter (or another provider)",
+                        ))
+                    if not entry.get("model"):
+                        issues.append(ConfigIssue(
+                            "warning",
+                            f"fallback_model[{i}] is missing 'model' field",
+                            "Add: model: <model-name>",
+                        ))
+        elif not isinstance(fb, dict):
            issues.append(ConfigIssue(
                "error",
                f"fallback_model should be a dict with 'provider' and 'model', got {type(fb).__name__}",
@@ -3267,25 +3456,62 @@ def read_raw_config() -> Dict[str, Any]:
    be parsed.  Use this for lightweight config reads where you just need a
    single value and don't want the overhead of ``load_config()``'s deep-merge
    + migration pipeline.
+
+    Cached on the config file's (mtime_ns, size) — same strategy as
+    ``load_config()``. Returns a deepcopy on every call since some callers
+    mutate the result before passing to ``save_config()``.
    """
    try:
        config_path = get_config_path()
-        if config_path.exists():
-            with open(config_path, encoding="utf-8") as f:
-                return yaml.safe_load(f) or {}
+        st = config_path.stat()
+        cache_key = (st.st_mtime_ns, st.st_size)
+    except (FileNotFoundError, OSError):
+        return {}
+
+    path_key = str(config_path)
+    cached = _RAW_CONFIG_CACHE.get(path_key)
+    if cached is not None and cached[:2] == cache_key:
+        return copy.deepcopy(cached[2])
+
+    try:
+        with open(config_path, encoding="utf-8") as f:
+            data = yaml.safe_load(f) or {}
    except Exception:
-        pass
-    return {}
+        return {}
+
+    if not isinstance(data, dict):
+        data = {}
+    _RAW_CONFIG_CACHE[path_key] = (cache_key[0], cache_key[1], copy.deepcopy(data))
+    return data


 def load_config() -> Dict[str, Any]:
-    """Load configuration from ~/.hermes/config.yaml."""
+    """Load configuration from ~/.hermes/config.yaml.
+
+    Cached on the config file's (mtime_ns, size). Returns a deepcopy of
+    the cached value when unchanged, since most call sites mutate the
+    result (e.g. ``cfg["model"]["default"] = ...`` before ``save_config``).
+    The cache is keyed on ``str(config_path)`` so profile switches
+    (which change ``HERMES_HOME`` and therefore ``get_config_path()``)
+    don't collide.
+    """
    ensure_hermes_home()
    config_path = get_config_path()
-    
+    path_key = str(config_path)
+
+    try:
+        st = config_path.stat()
+        cache_key: Optional[Tuple[int, int]] = (st.st_mtime_ns, st.st_size)
+    except FileNotFoundError:
+        cache_key = None
+
+    cached = _LOAD_CONFIG_CACHE.get(path_key)
+    if cached is not None and cache_key is not None and cached[:2] == cache_key:
+        return copy.deepcopy(cached[2])
+
    config = copy.deepcopy(DEFAULT_CONFIG)
-    
-    if config_path.exists():
+
+    if cache_key is not None:
        try:
            with open(config_path, encoding="utf-8") as f:
                user_config = yaml.safe_load(f) or {}
@@ -3303,20 +3529,26 @@ def load_config() -> Dict[str, Any]:

    normalized = _normalize_root_model_keys(_normalize_max_turns_config(config))
    expanded = _expand_env_vars(normalized)
-    _LAST_EXPANDED_CONFIG_BY_PATH[str(config_path)] = copy.deepcopy(expanded)
+    _LAST_EXPANDED_CONFIG_BY_PATH[path_key] = copy.deepcopy(expanded)
+    if cache_key is not None:
+        _LOAD_CONFIG_CACHE[path_key] = (cache_key[0], cache_key[1], copy.deepcopy(expanded))
+    else:
+        _LOAD_CONFIG_CACHE.pop(path_key, None)
    return expanded


 _SECURITY_COMMENT = """
 # ── Security ──────────────────────────────────────────────────────────
-# API keys, tokens, and passwords are redacted from tool output by default.
-# Set to false to see full values (useful for debugging auth issues).
+# Secret redaction is OFF by default — tool output (terminal stdout,
+# read_file results, web content) passes through unmodified. Set
+# redact_secrets to true to mask strings that look like API keys, tokens,
+# and passwords before they enter the model context and logs.
 # tirith pre-exec scanning is enabled by default when the tirith binary
 # is available. Configure via security.tirith_* keys or env vars
 # (TIRITH_ENABLED, TIRITH_BIN, TIRITH_TIMEOUT, TIRITH_FAIL_OPEN).
 #
 # security:
-#   redact_secrets: false
+#   redact_secrets: true
 #   tirith_enabled: true
 #   tirith_path: "tirith"
 #   tirith_timeout: 5
@@ -3349,11 +3581,11 @@ _FALLBACK_COMMENT = """

 _COMMENTED_SECTIONS = """
 # ── Security ──────────────────────────────────────────────────────────
-# API keys, tokens, and passwords are redacted from tool output by default.
-# Set to false to see full values (useful for debugging auth issues).
+# Secret redaction is OFF by default. Set to true to mask strings that
+# look like API keys, tokens, and passwords in tool output and logs.
 #
 # security:
-#   redact_secrets: false
+#   redact_secrets: true

 # ── Fallback Model ────────────────────────────────────────────────────
 # Automatic provider failover when primary is unavailable.
@@ -3404,7 +3636,12 @@ def save_config(config: Dict[str, Any]):
    if not sec or sec.get("redact_secrets") is None:
        parts.append(_SECURITY_COMMENT)
    fb = normalized.get("fallback_model", {})
-    if not fb or not isinstance(fb, dict) or not (fb.get("provider") and fb.get("model")):
+    fb_is_valid = False
+    if isinstance(fb, list):
+        fb_is_valid = any(isinstance(e, dict) and e.get("provider") and e.get("model") for e in fb)
+    elif isinstance(fb, dict):
+        fb_is_valid = bool(fb.get("provider") and fb.get("model"))
+    if not fb_is_valid:
        parts.append(_FALLBACK_COMMENT)

    atomic_yaml_write(
@@ -3530,7 +3767,7 @@ def sanitize_env_file() -> int:
            f.writelines(sanitized)
            f.flush()
            os.fsync(f.fileno())
-        os.replace(tmp_path, env_path)
+        atomic_replace(tmp_path, env_path)
    except BaseException:
        try:
            os.unlink(tmp_path)
@@ -3593,7 +3830,7 @@ def save_env_value(key: str, value: str):
    value = _check_non_ascii_credential(key, value)
    ensure_hermes_home()
    env_path = get_env_path()
-    
+
    # On Windows, open() defaults to the system locale (cp1252) which can
    # cause OSError errno 22 on UTF-8 .env files.
    read_kw = {"encoding": "utf-8", "errors": "replace"} if _IS_WINDOWS else {}
@@ -3605,7 +3842,7 @@ def save_env_value(key: str, value: str):
            lines = f.readlines()
        # Sanitize on every read: split concatenated keys, drop stale placeholders
        lines = _sanitize_env_lines(lines)
-    
+
    # Find and update or append
    found = False
    for i, line in enumerate(lines):
@@ -3613,7 +3850,7 @@ def save_env_value(key: str, value: str):
            lines[i] = f"{key}={value}\n"
            found = True
            break
-    
+
    if not found:
        # Ensure there's a newline at the end of the file before appending
        if lines and not lines[-1].endswith("\n"):
@@ -3633,7 +3870,7 @@ def save_env_value(key: str, value: str):
            f.writelines(lines)
            f.flush()
            os.fsync(f.fileno())
-        os.replace(tmp_path, env_path)
+        atomic_replace(tmp_path, env_path)
        # Restore original permissions before _secure_file may tighten them.
        if original_mode is not None:
            try:
@@ -3689,7 +3926,7 @@ def remove_env_value(key: str) -> bool:
                f.writelines(new_lines)
                f.flush()
                os.fsync(f.fileno())
-            os.replace(tmp_path, env_path)
+            atomic_replace(tmp_path, env_path)
            if original_mode is not None:
                try:
                    os.chmod(env_path, original_mode)
@@ -3776,12 +4013,13 @@ def get_env_value(key: str) -> Optional[str]:
 # =============================================================================

 def redact_key(key: str) -> str:
-    """Redact an API key for display."""
-    if not key:
-        return color("(not set)", Colors.DIM)
-    if len(key) < 12:
-        return "***"
-    return key[:4] + "..." + key[-4:]
+    """Redact an API key for display.
+
+    Thin wrapper over :func:`agent.redact.mask_secret` — preserves the
+    "(not set)" placeholder in dim color for the empty case.
+    """
+    from agent.redact import mask_secret
+    return mask_secret(key, empty=color("(not set)", Colors.DIM))


 def show_config():
@@ -7,7 +7,6 @@ Currently supports:

 import io
 import json
-import os
 import sys
 import time
 import urllib.error
@@ -18,6 +17,7 @@ from pathlib import Path
 from typing import Optional

 from hermes_constants import get_hermes_home
+from utils import atomic_replace


 # ---------------------------------------------------------------------------
@@ -45,8 +45,13 @@ def _pending_file() -> Path:
    Each entry: ``{"url": "...", "expire_at": <unix_ts>}``.  Scheduled
    DELETEs used to be handled by spawning a detached Python process per
    paste that slept for 6 hours; those accumulated forever if the user
-    ran ``hermes debug share`` repeatedly.  We now persist the schedule
-    to disk and sweep expired entries on the next debug invocation.
+    ran ``hermes debug share`` repeatedly.
+
+    Deletion is now driven by the gateway's cron ticker
+    (``gateway/run.py::_start_cron_ticker``) which calls
+    ``_sweep_expired_pastes`` once per hour.  ``hermes debug share`` also
+    runs an opportunistic sweep on entry as a fallback for CLI-only users
+    who never start the gateway.
    """
    return get_hermes_home() / "pastes" / "pending.json"

@@ -74,7 +79,7 @@ def _save_pending(entries: list[dict]) -> None:
        path.parent.mkdir(parents=True, exist_ok=True)
        tmp = path.with_suffix(".json.tmp")
        tmp.write_text(json.dumps(entries, indent=2), encoding="utf-8")
-        os.replace(tmp, path)
+        atomic_replace(tmp, path)
    except OSError:
        # Non-fatal — worst case the user has to run ``hermes debug delete``
        # manually.
@@ -223,9 +228,10 @@ def _schedule_auto_delete(urls: list[str], delay_seconds: int = _AUTO_DELETE_SEC
    interpreters that never exited until the sleep completed.

    The replacement is stateless: we append to ``~/.hermes/pastes/pending.json``
-    and rely on opportunistic sweeps (``_sweep_expired_pastes``) called from
-    every ``hermes debug`` invocation.  If the user never runs ``hermes debug``
-    again, paste.rs's own retention policy handles cleanup.
+    and the gateway's cron ticker sweeps expired entries once per hour.
+    ``hermes debug share`` also runs an opportunistic sweep as a fallback
+    for CLI-only users.  If neither runs again, paste.rs's own retention
+    policy handles cleanup.
    """
    _record_pending(urls, delay_seconds=delay_seconds)

@@ -13,7 +13,6 @@ automatically.

 from __future__ import annotations

-import io
 import os
 import sys
 import time
@@ -46,6 +46,7 @@ _PROVIDER_ENV_HINTS = (
    "Z_AI_API_KEY",
    "KIMI_API_KEY",
    "KIMI_CN_API_KEY",
+    "GMI_API_KEY",
    "MINIMAX_API_KEY",
    "MINIMAX_CN_API_KEY",
    "KILOCODE_API_KEY",
@@ -56,6 +57,7 @@ _PROVIDER_ENV_HINTS = (
    "OPENCODE_ZEN_API_KEY",
    "OPENCODE_GO_API_KEY",
    "XIAOMI_API_KEY",
+    "TOKENHUB_API_KEY",
 )


@@ -291,15 +293,23 @@ def run_doctor(args):

            known_providers: set = set()
            try:
-                from hermes_cli.auth import PROVIDER_REGISTRY
+                from hermes_cli.auth import (
+                    PROVIDER_REGISTRY,
+                    resolve_provider as _resolve_auth_provider,
+                )
                known_providers = set(PROVIDER_REGISTRY.keys()) | {"openrouter", "custom", "auto"}
            except Exception:
+                _resolve_auth_provider = None
                pass
            try:
                from hermes_cli.config import get_compatible_custom_providers as _compatible_custom_providers
-                from hermes_cli.providers import resolve_provider_full as _resolve_provider_full
+                from hermes_cli.providers import (
+                    normalize_provider as _normalize_catalog_provider,
+                    resolve_provider_full as _resolve_provider_full,
+                )
            except Exception:
                _compatible_custom_providers = None
+                _normalize_catalog_provider = None
                _resolve_provider_full = None

            custom_providers = []
@@ -319,17 +329,43 @@ def run_doctor(args):
                if name:
                    known_providers.add("custom:" + name.lower().replace(" ", "-"))

-            canonical_provider = provider
+            valid_provider_ids = set(known_providers)
+            provider_ids_to_accept = {provider} if provider else set()
+            if _normalize_catalog_provider is not None:
+                for known_provider in known_providers:
+                    try:
+                        valid_provider_ids.add(_normalize_catalog_provider(known_provider))
+                    except Exception:
+                        continue
+
+            runtime_provider = provider
+            if (
+                provider
+                and _resolve_auth_provider is not None
+                and provider not in ("auto", "custom")
+            ):
+                try:
+                    runtime_provider = _resolve_auth_provider(provider)
+                    provider_ids_to_accept.add(runtime_provider)
+                except Exception:
+                    runtime_provider = provider
+
+            catalog_provider = provider
            if (
                provider
                and _resolve_provider_full is not None
                and provider not in ("auto", "custom")
            ):
                provider_def = _resolve_provider_full(provider, user_providers, custom_providers)
-                canonical_provider = provider_def.id if provider_def is not None else None
+                catalog_provider = provider_def.id if provider_def is not None else None
+                if catalog_provider is not None:
+                    provider_ids_to_accept.add(catalog_provider)

            if provider and provider != "auto":
-                if canonical_provider is None or (known_providers and canonical_provider not in known_providers):
+                if catalog_provider is None or (
+                    known_providers
+                    and not (provider_ids_to_accept & valid_provider_ids)
+                ):
                    known_list = ", ".join(sorted(known_providers)) if known_providers else "(unavailable)"
                    check_fail(
                        f"model.provider '{provider_raw}' is not a recognised provider",
@@ -342,7 +378,24 @@ def run_doctor(args):
                    )

            # Warn if model is set to a provider-prefixed name on a provider that doesn't use them
-            if default_model and "/" in default_model and canonical_provider and canonical_provider not in ("openrouter", "custom", "auto", "ai-gateway", "kilocode", "opencode-zen", "huggingface", "nous"):
+            provider_for_policy = runtime_provider or catalog_provider
+            providers_accepting_vendor_slugs = {
+                "openrouter",
+                "custom",
+                "auto",
+                "ai-gateway",
+                "kilocode",
+                "opencode-zen",
+                "huggingface",
+                "lmstudio",
+                "nous",
+            }
+            if (
+                default_model
+                and "/" in default_model
+                and provider_for_policy
+                and provider_for_policy not in providers_accepting_vendor_slugs
+            ):
                check_warn(
                    f"model.default '{default_model}' uses a vendor/model slug but provider is '{provider_raw}'",
                    "(vendor-prefixed slugs belong to aggregators like openrouter)",
@@ -358,20 +411,24 @@ def run_doctor(args):
            # own env-var checks elsewhere in doctor, and get_auth_status()
            # returns a bare {logged_in: False} for anything it doesn't
            # explicitly dispatch, which would produce false positives.
-            if canonical_provider and canonical_provider not in ("auto", "custom", "openrouter"):
+            if runtime_provider and runtime_provider not in ("auto", "custom", "openrouter"):
                try:
                    from hermes_cli.auth import PROVIDER_REGISTRY, get_auth_status
-                    pconfig = PROVIDER_REGISTRY.get(canonical_provider)
+                    pconfig = PROVIDER_REGISTRY.get(runtime_provider)
                    if pconfig and getattr(pconfig, "auth_type", "") == "api_key":
-                        status = get_auth_status(canonical_provider) or {}
-                        configured = bool(status.get("configured") or status.get("logged_in") or status.get("api_key"))
+                        status = get_auth_status(runtime_provider) or {}
+                        configured = bool(
+                            status.get("configured")
+                            or status.get("logged_in")
+                            or status.get("api_key")
+                        )
                        if not configured:
                            check_fail(
-                                f"model.provider '{canonical_provider}' is set but no API key is configured",
+                                f"model.provider '{runtime_provider}' is set but no API key is configured",
                                "(check ~/.hermes/.env or run 'hermes setup')",
                            )
                            issues.append(
-                                f"No credentials found for provider '{canonical_provider}'. "
+                                f"No credentials found for provider '{runtime_provider}'. "
                                f"Run 'hermes setup' or set the provider's API key in {_DHH}/.env, "
                                f"or switch providers with 'hermes config set model.provider <name>'"
                            )
@@ -515,7 +572,14 @@ def run_doctor(args):
    if shutil.which("codex"):
        check_ok("codex CLI")
    else:
-        check_warn("codex CLI not found", "(required for openai-codex login)")
+        # Native OAuth uses Hermes' own device-code flow — the Codex CLI is
+        # only needed if you want to import existing tokens from
+        # ~/.codex/auth.json.  Downgrade to info so users running
+        # `hermes auth openai-codex` aren't told they're missing something.
+        check_info(
+            "codex CLI not installed "
+            "(optional — only required to import tokens from an existing Codex CLI login)"
+        )

    # =========================================================================
    # Check: Directory structure
@@ -937,6 +1001,7 @@ def run_doctor(args):
        ("StepFun Step Plan",   ("STEPFUN_API_KEY",),                           "https://api.stepfun.ai/step_plan/v1/models", "STEPFUN_BASE_URL", True),
        ("Kimi / Moonshot (China)", ("KIMI_CN_API_KEY",),                    "https://api.moonshot.cn/v1/models",   None, True),
        ("Arcee AI",         ("ARCEEAI_API_KEY",),                            "https://api.arcee.ai/api/v1/models",  "ARCEE_BASE_URL", True),
+        ("GMI Cloud",        ("GMI_API_KEY",),                                "https://api.gmi-serving.com/v1/models", "GMI_BASE_URL", True),
        ("DeepSeek",         ("DEEPSEEK_API_KEY",),                           "https://api.deepseek.com/v1/models",  "DEEPSEEK_BASE_URL", True),
        ("Hugging Face",     ("HF_TOKEN",),                                   "https://router.huggingface.co/v1/models", "HF_BASE_URL", True),
        ("NVIDIA NIM",       ("NVIDIA_API_KEY",),                             "https://integrate.api.nvidia.com/v1/models", "NVIDIA_BASE_URL", True),
@@ -33,12 +33,14 @@ def _get_git_commit(project_root: Path) -> str:


 def _redact(value: str) -> str:
-    """Redact all but first 4 and last 4 chars."""
-    if not value:
-        return ""
-    if len(value) < 12:
-        return "***"
-    return value[:4] + "..." + value[-4:]
+    """Redact all but first 4 and last 4 chars.
+
+    Thin wrapper over :func:`agent.redact.mask_secret`. Returns ``""`` for
+    an empty value (matches the historical behavior of this helper —
+    ``hermes dump`` formats empty values as blank, not as ``"(not set)"``).
+    """
+    from agent.redact import mask_secret
+    return mask_secret(value)


 def _gateway_status() -> str:
@@ -7,6 +7,7 @@ import sys
 from pathlib import Path

 from dotenv import load_dotenv
+from utils import atomic_replace


 # Env var name suffixes that indicate credential values.  These are the
@@ -127,7 +128,7 @@ def _sanitize_env_file_if_needed(path: Path) -> None:
                    f.writelines(sanitized)
                    f.flush()
                    os.fsync(f.fileno())
-                os.replace(tmp, path)
+                atomic_replace(tmp, path)
            except BaseException:
                try:
                    os.unlink(tmp)
@@ -2953,7 +2953,7 @@ def _setup_sms():
 def _setup_dingtalk():
    """Configure DingTalk — QR scan (recommended) or manual credential entry."""
    from hermes_cli.setup import (
-        prompt_choice, prompt_yes_no, print_info, print_success, print_warning,
+        prompt_choice, prompt_yes_no, print_success, print_warning,
    )

    dingtalk_platform = next(p for p in _PLATFORMS if p["key"] == "dingtalk")
@@ -3504,7 +3504,6 @@ def _setup_qqbot():
    method_idx = prompt_choice("  How would you like to set up QQ Bot?", method_choices, 0)

    credentials = None
-    used_qr = False

    if method_idx == 0:
        # ── QR scan-to-configure ──
@@ -3515,8 +3514,6 @@ def _setup_qqbot():
            print()
            print_warning("  QQ Bot setup cancelled.")
            return
-        if credentials:
-            used_qr = True
        if not credentials:
            print_info("  QR setup did not complete. Continuing with manual input.")

@@ -19,9 +19,8 @@ format) lives there.
 from __future__ import annotations

 import json
-import os
 from pathlib import Path
-from typing import Any, Dict, List, Optional
+from typing import Any, Dict, List


 def hooks_command(args) -> None:
@@ -44,6 +44,7 @@ Usage:
 """

 import argparse
+import json
 import os
 import shutil
 import subprocess
@@ -595,17 +596,22 @@ def _session_browse_picker(sessions: list) -> Optional[str]:


 def _resolve_last_session(source: str = "cli") -> Optional[str]:
-    """Look up the most recent session ID for a source."""
+    """Look up the most recently-used session ID for a source."""
+    db = None
    try:
        from hermes_state import SessionDB

        db = SessionDB()
        sessions = db.search_sessions(source=source, limit=1)
-        db.close()
-        if sessions:
-            return sessions[0]["id"]
+        return sessions[0]["id"] if sessions else None
    except Exception:
        pass
+    finally:
+        if db is not None:
+            try:
+                db.close()
+            except Exception:
+                pass
    return None


@@ -760,9 +766,20 @@ def _resolve_session_by_name_or_id(name_or_id: str) -> Optional[str]:
    return None


-def _print_tui_exit_summary(session_id: Optional[str]) -> None:
+def _read_tui_active_session_file(path: Optional[str]) -> Optional[str]:
+    if not path:
+        return None
+    try:
+        data = json.loads(Path(path).read_text(encoding="utf-8"))
+        sid = str(data.get("session_id") or "").strip()
+        return sid or None
+    except Exception:
+        return None
+
+
+def _print_tui_exit_summary(session_id: Optional[str], active_session_file: Optional[str] = None) -> None:
    """Print a shell-visible epilogue after TUI exits."""
-    target = session_id or _resolve_last_session(source="tui")
+    target = _read_tui_active_session_file(active_session_file) or session_id or _resolve_last_session(source="tui")
    if not target:
        return

@@ -812,8 +829,29 @@ def _print_tui_exit_summary(session_id: Optional[str]) -> None:
    )


+_NPM_LOCK_RUNTIME_KEYS = frozenset({"ideallyInert"})
+
+
 def _tui_need_npm_install(root: Path) -> bool:
-    """True when @hermes/ink is missing or node_modules is behind package-lock.json (post-pull)."""
+    """True when @hermes/ink is missing or node_modules is behind package-lock.json.
+
+    Compares ``package-lock.json`` against ``node_modules/.package-lock.json``
+    (npm's hidden lockfile) by **content**, not mtime: git checkouts and npm
+    rewrites can bump the root lockfile's timestamp even when installed deps
+    already match, which used to trigger a spurious "Installing TUI
+    dependencies" on every launch.
+
+    For each entry in the root lock's ``packages`` map:
+      - missing from hidden lock → reinstall (unless the entry is marked
+        ``optional`` or ``peer``, which npm may intentionally skip per platform)
+      - present but with differing fields (excluding npm-written runtime
+        annotations like ``ideallyInert``) → reinstall
+
+    Extra entries that exist only in the hidden lock are ignored — stale
+    transitives left over from a removed dependency don't break runtime and
+    we'd rather not force a reinstall for them. Falls back to mtime
+    comparison if either lockfile is unparseable.
+    """
    ink = root / "node_modules" / "@hermes" / "ink" / "package.json"
    if not ink.is_file():
        return True
@@ -823,7 +861,35 @@ def _tui_need_npm_install(root: Path) -> bool:
    marker = root / "node_modules" / ".package-lock.json"
    if not marker.is_file():
        return True
-    return lock.stat().st_mtime > marker.stat().st_mtime
+
+    # Compare lockfile contents, not mtimes: git checkouts and npm rewrites
+    # can bump the root lockfile timestamp even when installed deps already
+    # match. Fall back to mtime when either file is unparseable.
+    try:
+        wanted = json.loads(lock.read_text(encoding="utf-8")).get("packages") or {}
+        installed = json.loads(marker.read_text(encoding="utf-8")).get("packages") or {}
+    except (OSError, UnicodeDecodeError, json.JSONDecodeError):
+        return lock.stat().st_mtime > marker.stat().st_mtime
+
+    def comparable(pkg: dict) -> dict:
+        return {k: v for k, v in pkg.items() if k not in _NPM_LOCK_RUNTIME_KEYS}
+
+    for name, pkg in wanted.items():
+        if not name:
+            continue
+
+        if not isinstance(pkg, dict):
+            continue
+
+        if name not in installed:
+            if pkg.get("optional") or pkg.get("peer"):
+                continue
+            return True
+
+        if isinstance(installed[name], dict) and comparable(pkg) != comparable(installed[name]):
+            return True
+
+    return False


 def _find_bundled_tui(tui_dir: Path) -> Optional[Path]:
@@ -1037,7 +1103,14 @@ def _launch_tui(
    """Replace current process with the TUI."""
    tui_dir = PROJECT_ROOT / "ui-tui"

+    import tempfile
+
    env = os.environ.copy()
+    active_session_fd, active_session_file = tempfile.mkstemp(
+        prefix="hermes-tui-active-session-", suffix=".json"
+    )
+    os.close(active_session_fd)
+    env["HERMES_TUI_ACTIVE_SESSION_FILE"] = active_session_file
    env["HERMES_PYTHON_SRC_ROOT"] = os.environ.get(
        "HERMES_PYTHON_SRC_ROOT", str(PROJECT_ROOT)
    )
@@ -1065,13 +1138,20 @@ def _launch_tui(
        env["HERMES_TUI_RESUME"] = resume_session_id

    argv, cwd = _make_tui_argv(tui_dir, tui_dev)
+    code: Optional[int] = None
    try:
-        code = subprocess.call(argv, cwd=str(cwd), env=env)
-    except KeyboardInterrupt:
-        code = 130
+        try:
+            code = subprocess.call(argv, cwd=str(cwd), env=env)
+        except KeyboardInterrupt:
+            code = 130

-    if code in (0, 130):
-        _print_tui_exit_summary(resume_session_id)
+        if code in (0, 130):
+            _print_tui_exit_summary(resume_session_id, active_session_file)
+    finally:
+        try:
+            os.unlink(active_session_file)
+        except OSError:
+            pass

    sys.exit(code)

@@ -1737,8 +1817,11 @@ def select_provider_and_model(args=None):
        "huggingface",
        "xiaomi",
        "arcee",
+        "gmi",
        "nvidia",
        "ollama-cloud",
+        "tencent-tokenhub",
+        "lmstudio",
    ):
        _model_flow_api_key_provider(config, selected_provider, current_model)

@@ -1965,7 +2048,11 @@ def _aux_select_for_task(task: str) -> None:

    # Gather authenticated providers (has credentials + curated model list)
    try:
-        providers = list_authenticated_providers(current_provider=current_provider)
+        providers = list_authenticated_providers(
+            current_provider=current_provider,
+            current_model=current_model,
+            current_base_url=current_base_url,
+        )
    except Exception as exc:
        print(f"Could not detect authenticated providers: {exc}")
        providers = []
@@ -4295,6 +4382,7 @@ def _model_flow_bedrock(config, current_model=""):
 def _model_flow_api_key_provider(config, provider_id, current_model=""):
    """Generic flow for API-key providers (z.ai, MiniMax, OpenCode, etc.)."""
    from hermes_cli.auth import (
+        LMSTUDIO_NOAUTH_PLACEHOLDER,
        PROVIDER_REGISTRY,
        _prompt_model_selection,
        _save_model_choice,
@@ -4329,13 +4417,20 @@ def _model_flow_api_key_provider(config, provider_id, current_model=""):
            try:
                import getpass

-                new_key = getpass.getpass(f"{key_env} (or Enter to cancel): ").strip()
+                if provider_id == "lmstudio":
+                    prompt = f"{key_env} (Enter for no-auth default {LMSTUDIO_NOAUTH_PLACEHOLDER!r}): "
+                else:
+                    prompt = f"{key_env} (or Enter to cancel): "
+                new_key = getpass.getpass(prompt).strip()
            except (KeyboardInterrupt, EOFError):
                print()
                return
            if not new_key:
-                print("Cancelled.")
-                return
+                if provider_id == "lmstudio":
+                    new_key = LMSTUDIO_NOAUTH_PLACEHOLDER
+                else:
+                    print("Cancelled.")
+                    return
            save_env_value(key_env, new_key)
            existing_key = new_key
            print("API key saved.")
@@ -4402,10 +4497,21 @@ def _model_flow_api_key_provider(config, provider_id, current_model=""):
                print("  Tier check: could not verify (proceeding anyway).")
            print()

-    # Optional base URL override
+    # Optional base URL override.
+    # Precedence: env var → config.yaml model.base_url → registry default.
+    # Reading config.yaml prevents silently overwriting a saved remote URL
+    # (e.g. a remote LM Studio endpoint) with localhost when the user just
+    # presses Enter at the prompt below.
    current_base = ""
    if base_url_env:
        current_base = get_env_value(base_url_env) or os.getenv(base_url_env, "")
+    if not current_base:
+        try:
+            _m = load_config().get("model") or {}
+            if str(_m.get("provider") or "").strip().lower() == provider_id:
+                current_base = str(_m.get("base_url") or "").strip()
+        except Exception:
+            pass
    effective_base = current_base or pconfig.inference_base_url

    try:
@@ -4427,8 +4533,22 @@ def _model_flow_api_key_provider(config, provider_id, current_model=""):
    #   2. Curated static fallback list (offline insurance)
    #   3. Live /models endpoint probe (small providers without models.dev data)
    #
-    # Ollama Cloud: dedicated merged discovery (live API + models.dev + disk cache)
-    if provider_id == "ollama-cloud":
+    # LM Studio: live /api/v1/models probe (no models.dev catalog).
+    # Ollama Cloud: merged discovery (live API + models.dev + disk cache).
+    if provider_id == "lmstudio":
+        from hermes_cli.auth import AuthError
+        from hermes_cli.models import fetch_lmstudio_models
+
+        api_key_for_probe = existing_key or (get_env_value(key_env) if key_env else "")
+        try:
+            model_list = fetch_lmstudio_models(api_key=api_key_for_probe, base_url=effective_base)
+        except AuthError as exc:
+            print(f"  LM Studio rejected the request: {exc}")
+            print("  Set LM_API_KEY (or update it) to match the server's bearer token.")
+            model_list = []
+        if model_list:
+            print(f"  Found {len(model_list)} model(s) from LM Studio")
+    elif provider_id == "ollama-cloud":
        from hermes_cli.models import fetch_ollama_cloud_models

        api_key_for_probe = existing_key or (get_env_value(key_env) if key_env else "")
@@ -4650,7 +4770,6 @@ def _model_flow_anthropic(config, current_model=""):
            read_claude_code_credentials,
            is_claude_code_token_valid,
            _is_oauth_token,
-            _resolve_claude_code_token_from_credentials,
        )

        cc_creds = read_claude_code_credentials()
@@ -5132,6 +5251,93 @@ def _build_web_ui(web_dir: Path, *, fatal: bool = False) -> bool:
    return True


+def _warn_stale_dashboard_processes() -> None:
+    """Warn about running dashboard processes that still hold pre-update code.
+
+    ``hermes dashboard`` is a long-lived server process commonly started and
+    forgotten.  When ``hermes update`` replaces files on disk, the running
+    process keeps the old Python backend in memory while the JS bundle on
+    disk is updated, causing a silent frontend/backend mismatch (e.g. new
+    auth headers the old backend doesn't recognise → every API call 401s).
+
+    Unlike the gateway, the dashboard has no service manager (systemd /
+    launchd), so we can only warn — we don't auto-kill user-managed
+    background processes.
+    """
+    patterns = [
+        "hermes dashboard",
+        "hermes_cli.main dashboard",
+        "hermes_cli/main.py dashboard",
+    ]
+    self_pid = os.getpid()
+    dashboard_pids: list[int] = []
+
+    try:
+        if sys.platform == "win32":
+            result = subprocess.run(
+                ["wmic", "process", "get", "ProcessId,CommandLine",
+                 "/FORMAT:LIST"],
+                capture_output=True, text=True, timeout=10,
+            )
+            if result.returncode != 0:
+                return
+            current_cmd = ""
+            for line in result.stdout.split("\n"):
+                line = line.strip()
+                if line.startswith("CommandLine="):
+                    current_cmd = line[len("CommandLine="):]
+                elif line.startswith("ProcessId="):
+                    pid_str = line[len("ProcessId="):]
+                    if (any(p in current_cmd for p in patterns)
+                            and int(pid_str) != self_pid):
+                        try:
+                            dashboard_pids.append(int(pid_str))
+                        except ValueError:
+                            pass
+        else:
+            # Linux / macOS: scan the process table via ps and match against
+            # the same explicit patterns list used on Windows.  Using ps
+            # (rather than `pgrep -f "hermes.*dashboard"`) keeps us consistent
+            # with `hermes_cli.gateway._scan_gateway_pids` and avoids the
+            # greedy regex matching unrelated cmdlines that merely contain
+            # both words (e.g. a chat session discussing "dashboard").
+            result = subprocess.run(
+                ["ps", "-A", "-o", "pid=,command="],
+                capture_output=True, text=True, timeout=10,
+            )
+            if result.returncode == 0:
+                for line in result.stdout.split("\n"):
+                    stripped = line.strip()
+                    if not stripped or "grep" in stripped:
+                        continue
+                    parts = stripped.split(None, 1)
+                    if len(parts) != 2:
+                        continue
+                    try:
+                        pid = int(parts[0])
+                    except ValueError:
+                        continue
+                    command = parts[1]
+                    if (any(p in command for p in patterns)
+                            and pid != self_pid):
+                        dashboard_pids.append(pid)
+    except (FileNotFoundError, subprocess.TimeoutExpired, OSError):
+        return
+
+    if not dashboard_pids:
+        return
+
+    print()
+    print(f"⚠ {len(dashboard_pids)} dashboard process(es) still running "
+          f"with the previous version:")
+    for pid in dashboard_pids:
+        print(f"    PID {pid}")
+    print("  The running backend may not match the updated frontend,")
+    print("  causing silent auth failures or empty data.")
+    print("  Restart them to pick up the changes:")
+    print("    kill <pid> && hermes dashboard --port <port> ...")
+
+
 def _update_via_zip(args):
    """Update Hermes Agent by downloading a ZIP archive.

@@ -5266,6 +5472,7 @@ def _update_via_zip(args):

    print()
    print("✓ Update complete!")
+    _warn_stale_dashboard_processes()


 def _stash_local_changes_if_needed(git_cmd: list[str], cwd: Path) -> Optional[str]:
@@ -6142,6 +6349,96 @@ def _ensure_fhs_path_guard() -> None:
        print("    (reload your shell or run 'source ~/.bashrc' to pick it up)")


+def _run_pre_update_backup(args) -> None:
+    """Create a full zip backup of HERMES_HOME before running the update.
+
+    Gated on ``updates.pre_update_backup`` in config (default false).  Off
+    by default because the zip can add minutes to every update on large
+    HERMES_HOME directories.  The ``--backup`` flag on ``hermes update``
+    opts in for a single run; ``--no-backup`` forces it off when config
+    has it enabled.  Never raises — a backup failure should not block the
+    update itself.
+    """
+    # CLI flags win over config.  --no-backup beats --backup if both are set.
+    if getattr(args, "no_backup", False):
+        print("◆ Pre-update backup: skipped (--no-backup)")
+        print()
+        return
+
+    force_backup = bool(getattr(args, "backup", False))
+
+    try:
+        from hermes_cli.config import load_config
+        cfg = load_config()
+    except Exception as exc:
+        logging.getLogger(__name__).debug("Could not load config for pre-update backup: %s", exc)
+        cfg = {}
+
+    updates_cfg = cfg.get("updates", {}) if isinstance(cfg, dict) else {}
+    enabled = updates_cfg.get("pre_update_backup", False)
+    keep = updates_cfg.get("backup_keep", 5)
+
+    if not enabled and not force_backup:
+        # Silent by default — the backup is off, most users don't need to
+        # hear about it on every update.  They can opt in via --backup
+        # or by flipping the config knob.
+        return
+
+    try:
+        from hermes_cli.backup import create_pre_update_backup
+    except Exception as exc:
+        print(f"⚠ Pre-update backup: could not load backup module ({exc}); continuing update.")
+        print()
+        return
+
+    print("◆ Creating pre-update backup...")
+    t0 = _time.monotonic()
+    try:
+        out_path = create_pre_update_backup(keep=int(keep))
+    except Exception as exc:  # defensive — helper already swallows, but just in case
+        print(f"  ⚠ Backup failed: {exc}")
+        print("  Continuing with update.")
+        print()
+        return
+
+    elapsed = _time.monotonic() - t0
+
+    if out_path is None:
+        print("  ⚠ Backup skipped (no files found or write failed); continuing update.")
+        print()
+        return
+
+    try:
+        size_bytes = out_path.stat().st_size
+    except OSError:
+        size_bytes = 0
+
+    # Human-readable size
+    size_str = f"{size_bytes} B"
+    for unit in ("KB", "MB", "GB"):
+        if size_bytes < 1024:
+            break
+        size_bytes /= 1024
+        size_str = f"{size_bytes:.1f} {unit}"
+
+    # Render path using display_hermes_home so the user sees ~/.hermes/...
+    try:
+        from hermes_constants import get_hermes_home, display_hermes_home
+        home = get_hermes_home()
+        try:
+            display_path = f"{display_hermes_home()}/{out_path.relative_to(home)}"
+        except ValueError:
+            display_path = str(out_path)
+    except Exception:
+        display_path = str(out_path)
+
+    print(f"  Saved:    {display_path} ({size_str}, {elapsed:.1f}s)")
+    print(f"  Restore:  hermes import {out_path}")
+    print(f"  Disable:  omit --backup (backups are off by default)")
+    print(f"            set updates.pre_update_backup: false in config.yaml")
+    print()
+
+
 def cmd_update(args):
    """Update Hermes Agent to the latest version.

@@ -6184,6 +6481,10 @@ def _cmd_update_impl(args, gateway_mode: bool):
    print("⚕ Updating Hermes Agent...")
    print()

+    # Pre-update backup — runs before any git/file mutation so users can
+    # always roll back to the exact state they had before this update.
+    _run_pre_update_backup(args)
+
    # Try git-based update first, fall back to ZIP download on Windows
    # when git file I/O is broken (antivirus, NTFS filter drivers, etc.)
    use_zip_update = False
@@ -6333,6 +6634,22 @@ def _cmd_update_impl(args, gateway_mode: bool):

        print(f"→ Found {commit_count} new commit(s)")

+        # Snapshot critical state (state.db, config, pairing JSONs, etc.)
+        # before pulling so a user can recover if something goes wrong.
+        # Issue #15733 reported missing pairing data after an update; even
+        # though `git pull` can't touch $HERMES_HOME, this is cheap
+        # belt-and-suspenders insurance and gives the user something to
+        # restore from via `/snapshot list` / `/snapshot restore <id>`.
+        try:
+            from hermes_cli.backup import create_quick_snapshot
+
+            snap_id = create_quick_snapshot(label="pre-update")
+            if snap_id:
+                print(f"  ✓ Pre-update snapshot: {snap_id}")
+        except Exception as exc:
+            # Never let a snapshot failure block an update.
+            logger.debug("Pre-update snapshot failed: %s", exc)
+
        print("→ Pulling updates...")
        update_succeeded = False
        try:
@@ -6857,7 +7174,7 @@ def _cmd_update_impl(args, gateway_mode: bool):
                                    print(
                                        f"  ⚠ {svc_name} died after restart, retrying..."
                                    )
-                                    retry = subprocess.run(
+                                    subprocess.run(
                                        scope_cmd + ["restart", svc_name],
                                        capture_output=True,
                                        text=True,
@@ -6972,6 +7289,10 @@ def _cmd_update_impl(args, gateway_mode: bool):
        except Exception as e:
            logger.debug("Legacy unit check during update failed: %s", e)

+        # Warn about stale dashboard processes — the dashboard has no
+        # service manager, so we can only tell the user to restart them.
+        _warn_stale_dashboard_processes()
+
        print()
        print("Tip: You can now select a provider and model:")
        print("  hermes model              # Select provider and model")
@@ -7620,31 +7941,12 @@ For more help on a command:
    )
    chat_parser.add_argument(
        "--provider",
-        choices=[
-            "auto",
-            "openrouter",
-            "nous",
-            "openai-codex",
-            "copilot-acp",
-            "copilot",
-            "anthropic",
-            "gemini",
-            "xai",
-            "ollama-cloud",
-            "huggingface",
-            "zai",
-            "kimi-coding",
-            "kimi-coding-cn",
-            "stepfun",
-            "minimax",
-            "minimax-cn",
-            "kilocode",
-            "xiaomi",
-            "arcee",
-            "nvidia",
-        ],
+        # No `choices=` here: user-defined providers from config.yaml `providers:`
+        # are also valid values, and runtime resolution (resolve_runtime_provider)
+        # handles validation/error reporting consistently with the top-level
+        # `--provider` flag.
        default=None,
-        help="Inference provider (default: auto)",
+        help="Inference provider (default: auto). Built-in or a user-defined name from `providers:` in config.yaml.",
    )
    chat_parser.add_argument(
        "-v", "--verbose", action="store_true", help="Verbose output"
@@ -9484,17 +9786,26 @@ Examples:
        "--preset",
        choices=["user-data", "full"],
        default="full",
-        help="Migration preset (default: full). 'user-data' excludes secrets",
+        help="Migration preset (default: full). Neither preset imports secrets — "
+        "pass --migrate-secrets to include API keys.",
    )
    claw_migrate.add_argument(
        "--overwrite",
        action="store_true",
-        help="Overwrite existing files (default: skip conflicts)",
+        help="Overwrite existing files (default: refuse to apply when the plan has conflicts)",
    )
    claw_migrate.add_argument(
        "--migrate-secrets",
        action="store_true",
-        help="Include allowlisted secrets (TELEGRAM_BOT_TOKEN, API keys, etc.)",
+        help="Include allowlisted secrets (TELEGRAM_BOT_TOKEN, API keys, etc.). "
+        "Required even under --preset full.",
+    )
+    claw_migrate.add_argument(
+        "--no-backup",
+        action="store_true",
+        help="Skip the pre-migration zip snapshot of ~/.hermes/ (by default a "
+        "single restore-point archive is written to ~/.hermes/backups/ "
+        "before apply; restorable with 'hermes import').",
    )
    claw_migrate.add_argument(
        "--workspace-target", help="Absolute path to copy workspace instructions into"
@@ -9561,6 +9872,18 @@ Examples:
        default=False,
        help="Check whether an update is available without installing anything",
    )
+    update_parser.add_argument(
+        "--no-backup",
+        action="store_true",
+        default=False,
+        help="Skip the pre-update backup for this run (overrides updates.pre_update_backup)",
+    )
+    update_parser.add_argument(
+        "--backup",
+        action="store_true",
+        default=False,
+        help="Force a pre-update backup for this run (off by default; overrides updates.pre_update_backup)",
+    )
    update_parser.set_defaults(func=cmd_update)

    # =========================================================================
@@ -9897,6 +10220,17 @@ Examples:
            logger.debug(
                "plugin discovery failed at CLI startup", exc_info=True,
            )
+        try:
+            # MCP tool discovery — no event loop running in CLI/TUI startup,
+            # so inline is safe.  Moved here from model_tools.py module scope
+            # to avoid freezing the gateway's event loop on its first message
+            # via the same lazy import path (#16856).
+            from tools.mcp_tool import discover_mcp_tools
+            discover_mcp_tools()
+        except Exception:
+            logger.debug(
+                "MCP tool discovery failed at CLI startup", exc_info=True,
+            )
        try:
            from hermes_cli.config import load_config
            from agent.shell_hooks import register_from_config
@@ -46,7 +46,6 @@ from __future__ import annotations

 import json
 import logging
-import os
 import time
 import urllib.error
 import urllib.request
@@ -54,6 +53,7 @@ from pathlib import Path
 from typing import Any

 from hermes_cli import __version__ as _HERMES_VERSION
+from utils import atomic_replace

 logger = logging.getLogger(__name__)

@@ -190,7 +190,7 @@ def _write_disk_cache(data: dict[str, Any]) -> None:
        with open(tmp, "w") as fh:
            json.dump(data, fh, indent=2)
            fh.write("\n")
-        os.replace(tmp, path)
+        atomic_replace(tmp, path)
    except OSError as exc:
        logger.info("model catalog cache write failed: %s", exc)

@@ -213,10 +213,15 @@ def _load_direct_aliases() -> dict[str, DirectAlias]:


 def _ensure_direct_aliases() -> None:
-    """Lazy-load direct aliases on first use."""
-    global DIRECT_ALIASES
+    """Lazy-load direct aliases on first use.
+
+    Mutates the existing DIRECT_ALIASES dict in place rather than rebinding
+    the module attribute. This keeps `from hermes_cli.model_switch import
+    DIRECT_ALIASES` references valid in callers — rebinding would leave them
+    pointing at a stale empty dict.
+    """
    if not DIRECT_ALIASES:
-        DIRECT_ALIASES = _load_direct_aliases()
+        DIRECT_ALIASES.update(_load_direct_aliases())


 # ---------------------------------------------------------------------------
@@ -979,6 +984,7 @@ def list_authenticated_providers(
    user_providers: dict = None,
    custom_providers: list | None = None,
    max_models: int = 8,
+    current_model: str = "",
 ) -> List[dict]:
    """Detect which providers have credentials and list their curated models.

@@ -1025,6 +1031,34 @@ def list_authenticated_providers(
    if "ollama-cloud" not in curated:
        from hermes_cli.models import fetch_ollama_cloud_models
        curated["ollama-cloud"] = fetch_ollama_cloud_models()
+    # LM Studio has no static catalog — probe its native /api/v1/models
+    # endpoint live so the picker reflects whatever the user has loaded.
+    # Base URL precedence: LM_BASE_URL env var > active config's base_url
+    # (when current provider is lmstudio) > 127.0.0.1 default.
+    # On auth rejection or unreachable server, fall back to the caller-supplied
+    # current model so the picker still shows something when offline / mis-keyed.
+    if "lmstudio" not in curated and (
+        os.environ.get("LM_API_KEY") or os.environ.get("LM_BASE_URL") or current_provider.strip().lower() == "lmstudio"
+    ):
+        from hermes_cli.models import fetch_lmstudio_models
+        from hermes_cli.auth import AuthError
+        is_current_lmstudio = current_provider.strip().lower() == "lmstudio"
+        lm_base = (
+            os.environ.get("LM_BASE_URL")
+            or (current_base_url if is_current_lmstudio and current_base_url else None)
+            or "http://127.0.0.1:1234/v1"
+        )
+        try:
+            live = fetch_lmstudio_models(
+                api_key=os.environ.get("LM_API_KEY", ""),
+                base_url=lm_base,
+                timeout=1.5, # Smaller timeout for picker
+            )
+        except AuthError:
+            live = []
+        if not live and is_current_lmstudio and current_model:
+            live = [current_model]
+        curated["lmstudio"] = live

    # --- 1. Check Hermes-mapped providers ---
    for hermes_id, mdev_id in PROVIDER_TO_MODELS_DEV.items():
@@ -1175,6 +1209,15 @@ def list_authenticated_providers(

        if hermes_slug in {"copilot", "copilot-acp"}:
            model_ids = provider_model_ids(hermes_slug)
+        # For aws_sdk providers (bedrock), use live discovery so the list
+        # reflects the active region (eu.*, ap.*) not the static us.* list.
+        elif overlay.auth_type == "aws_sdk":
+            try:
+                from agent.bedrock_adapter import bedrock_model_ids_or_none
+                _ids = bedrock_model_ids_or_none()
+                model_ids = _ids if _ids is not None else (curated.get(hermes_slug, []) or curated.get(pid, []))
+            except Exception:
+                model_ids = curated.get(hermes_slug, []) or curated.get(pid, [])
        else:
            # Use curated list — look up by Hermes slug, fall back to overlay key
            model_ids = curated.get(hermes_slug, []) or curated.get(pid, [])
@@ -1237,10 +1280,30 @@ def list_authenticated_providers(
            except Exception:
                pass

+        # Special case: aws_sdk auth (bedrock) — no API key env vars,
+        # credentials come from the boto3 credential chain (env vars,
+        # ~/.aws/credentials, instance roles, etc.)
+        if not _cp_has_creds and _cp_config and getattr(_cp_config, "auth_type", "") == "aws_sdk":
+            try:
+                from agent.bedrock_adapter import has_aws_credentials
+                _cp_has_creds = has_aws_credentials()
+            except Exception:
+                pass
+
        if not _cp_has_creds:
            continue

-        _cp_model_ids = curated.get(_cp.slug, [])
+        # For bedrock, use live discovery so the list reflects the active
+        # region (eu.*, us.*, ap.*) instead of the hardcoded us.* static list.
+        if _cp_config and getattr(_cp_config, "auth_type", "") == "aws_sdk":
+            try:
+                from agent.bedrock_adapter import bedrock_model_ids_or_none
+                _ids = bedrock_model_ids_or_none()
+                _cp_model_ids = _ids if _ids is not None else curated.get(_cp.slug, [])
+            except Exception:
+                _cp_model_ids = curated.get(_cp.slug, [])
+        else:
+            _cp_model_ids = curated.get(_cp.slug, [])
        _cp_total = len(_cp_model_ids)
        _cp_top = _cp_model_ids[:max_models]

@@ -1312,8 +1375,23 @@ def list_authenticated_providers(
                    if fb:
                        models_list = list(fb)

-            # Try to probe /v1/models if URL is set (but don't block on it)
-            # For now just show what we know from config
+            # Prefer the endpoint's live /models list when credentials are
+            # available. This keeps OpenAI-compatible relays (for example CRS)
+            # in sync when the server catalog changes without requiring the
+            # user to mirror every model into config.yaml.
+            api_key = str(ep_cfg.get("api_key", "") or "").strip()
+            if not api_key:
+                key_env = str(ep_cfg.get("key_env", "") or "").strip()
+                api_key = os.environ.get(key_env, "").strip() if key_env else ""
+            if api_url and api_key:
+                try:
+                    from hermes_cli.models import fetch_api_models
+                    live_models = fetch_api_models(api_key, api_url)
+                    if live_models:
+                        models_list = live_models
+                except Exception:
+                    pass
+
            results.append({
                "slug": ep_name,
                "name": display_name,
@@ -44,6 +44,7 @@ OPENROUTER_MODELS: list[tuple[str, str]] = [
    ("openai/gpt-5.4-mini",             ""),
    ("xiaomi/mimo-v2.5-pro",             ""),
    ("xiaomi/mimo-v2.5",                 ""),
+    ("tencent/hy3-preview:free",         "free"),
    ("openai/gpt-5.3-codex",            ""),
    ("google/gemini-3-pro-image-preview", ""),
    ("google/gemini-3-flash-preview",   ""),
@@ -106,11 +107,57 @@ def _codex_curated_models() -> list[str]:
    return _add_forward_compat_models(list(DEFAULT_CODEX_MODELS))


+# Static fallback for xAI when the models.dev disk cache is empty (fresh
+# install, offline first run, etc.). Mirrors the xAI-direct model IDs from
+# $HERMES_HOME/models_dev_cache.json as of 2026-04-28. Whenever xAI renames
+# or retires a model, the disk cache picks it up on the next refresh and the
+# fallback here only matters until that refresh lands.
+_XAI_STATIC_FALLBACK: list[str] = [
+    "grok-4.20-0309-reasoning",
+    "grok-4.20-0309-non-reasoning",
+    "grok-4.20-multi-agent-0309",
+    "grok-4-1-fast",
+    "grok-4-1-fast-non-reasoning",
+    "grok-4-fast",
+    "grok-4-fast-non-reasoning",
+    "grok-4",
+    "grok-code-fast-1",
+]
+
+
+def _xai_curated_models() -> list[str]:
+    """Derive the xAI-direct curated list from models.dev disk cache.
+
+    Reads $HERMES_HOME/models_dev_cache.json directly (no network) so this
+    runs at import time without blocking. Falls back to ``_XAI_STATIC_FALLBACK``
+    when the cache is empty or unreadable. Hermes refreshes the cache from
+    https://models.dev/api.json on normal use, so this list self-heals as
+    xAI renames models.
+
+    Mirrors ``_codex_curated_models()``'s role for openai-codex.
+    """
+    try:
+        from agent.models_dev import _load_disk_cache
+        data = _load_disk_cache()
+        xai = data.get("xai") if isinstance(data, dict) else None
+        models = xai.get("models") if isinstance(xai, dict) else None
+        if isinstance(models, dict) and models:
+            ids = [mid for mid in models.keys() if isinstance(mid, str)]
+            if ids:
+                return sorted(ids)
+    except Exception:
+        # Any failure (missing file, malformed JSON, import error)
+        # falls through to the static list.
+        pass
+    return list(_XAI_STATIC_FALLBACK)
+
+
 _PROVIDER_MODELS: dict[str, list[str]] = {
    "nous": [
        "moonshotai/kimi-k2.6",
        "xiaomi/mimo-v2.5-pro",
        "xiaomi/mimo-v2.5",
+        "tencent/hy3-preview",
        "anthropic/claude-opus-4.7",
        "anthropic/claude-opus-4.6",
        "anthropic/claude-sonnet-4.6",
@@ -193,10 +240,7 @@ _PROVIDER_MODELS: dict[str, list[str]] = {
        "glm-4.5",
        "glm-4.5-flash",
    ],
-    "xai": [
-        "grok-4.20-reasoning",
-        "grok-4-1-fast-reasoning",
-    ],
+    "xai": _xai_curated_models(),
    "nvidia": [
        # NVIDIA flagship reasoning models
        "nvidia/nemotron-3-super-120b-a12b",
@@ -273,11 +317,22 @@ _PROVIDER_MODELS: dict[str, list[str]] = {
        "mimo-v2-omni",
        "mimo-v2-flash",
    ],
+    "tencent-tokenhub": [
+        "hy3-preview",
+    ],
    "arcee": [
        "trinity-large-thinking",
        "trinity-large-preview",
        "trinity-mini",
    ],
+    "gmi": [
+        "zai-org/GLM-5.1-FP8",
+        "deepseek-ai/DeepSeek-V3.2",
+        "moonshotai/Kimi-K2.5",
+        "google/gemini-3.1-flash-lite-preview",
+        "anthropic/claude-sonnet-4.6",
+        "openai/gpt-5.4",
+    ],
    "opencode-zen": [
        "kimi-k2.5",
        "gpt-5.4-pro",
@@ -342,6 +397,7 @@ _PROVIDER_MODELS: dict[str, list[str]] = {
    # to https://dashscope-intl.aliyuncs.com/compatible-mode/v1 (OpenAI-compat)
    # or https://dashscope-intl.aliyuncs.com/apps/anthropic (Anthropic-compat).
    "alibaba": [
+        "qwen3.6-plus",
        "kimi-k2.5",
        "qwen3.5-plus",
        "qwen3-coder-plus",
@@ -709,14 +765,15 @@ class ProviderEntry(NamedTuple):
    label: str
    tui_desc: str   # detailed description for `hermes model` TUI

-
 CANONICAL_PROVIDERS: list[ProviderEntry] = [
    ProviderEntry("nous",           "Nous Portal",              "Nous Portal (Nous Research subscription)"),
    ProviderEntry("openrouter",     "OpenRouter",               "OpenRouter (100+ models, pay-per-use)"),
+    ProviderEntry("lmstudio",       "LM Studio",                "LM Studio (local desktop app with built-in model server)"),
    ProviderEntry("ai-gateway",     "Vercel AI Gateway",        "Vercel AI Gateway (200+ models, $5 free credit, no markup)"),
    ProviderEntry("anthropic",      "Anthropic",                "Anthropic (Claude models — API key or Claude Code)"),
    ProviderEntry("openai-codex",   "OpenAI Codex",             "OpenAI Codex"),
    ProviderEntry("xiaomi",         "Xiaomi MiMo",              "Xiaomi MiMo (MiMo-V2.5 and V2 models — pro, omni, flash)"),
+    ProviderEntry("tencent-tokenhub", "Tencent TokenHub",       "Tencent TokenHub (Hy3 Preview — direct API via tokenhub.tencentmaas.com)"),
    ProviderEntry("nvidia",         "NVIDIA NIM",               "NVIDIA NIM (Nemotron models — build.nvidia.com or local NIM)"),
    ProviderEntry("qwen-oauth",     "Qwen OAuth (Portal)",      "Qwen OAuth (reuses local Qwen CLI login)"),
    ProviderEntry("copilot",        "GitHub Copilot",           "GitHub Copilot (uses GITHUB_TOKEN or gh auth token)"),
@@ -735,6 +792,7 @@ CANONICAL_PROVIDERS: list[ProviderEntry] = [
    ProviderEntry("alibaba",        "Alibaba Cloud (DashScope)","Alibaba Cloud / DashScope Coding (Qwen + multi-provider)"),
    ProviderEntry("ollama-cloud",   "Ollama Cloud",             "Ollama Cloud (cloud-hosted open models — ollama.com)"),
    ProviderEntry("arcee",          "Arcee AI",                 "Arcee AI (Trinity models — direct API)"),
+    ProviderEntry("gmi",            "GMI Cloud",                "GMI Cloud (multi-model direct API)"),
    ProviderEntry("kilocode",       "Kilo Code",                "Kilo Code (Kilo Gateway API)"),
    ProviderEntry("opencode-zen",   "OpenCode Zen",             "OpenCode Zen (35+ curated models, pay-as-you-go)"),
    ProviderEntry("opencode-go",    "OpenCode Go",              "OpenCode Go (open models, $10/month subscription)"),
@@ -769,6 +827,8 @@ _PROVIDER_ALIASES = {
    "stepfun-coding-plan": "stepfun",
    "arcee-ai": "arcee",
    "arceeai": "arcee",
+    "gmi-cloud": "gmi",
+    "gmicloud": "gmi",
    "minimax-china": "minimax-cn",
    "minimax_cn": "minimax-cn",
    "claude": "anthropic",
@@ -796,6 +856,10 @@ _PROVIDER_ALIASES = {
    "huggingface-hub": "huggingface",
    "mimo": "xiaomi",
    "xiaomi-mimo": "xiaomi",
+    "tencent": "tencent-tokenhub",
+    "tokenhub": "tencent-tokenhub",
+    "tencent-cloud": "tencent-tokenhub",
+    "tencentmaas": "tencent-tokenhub",
    "aws": "bedrock",
    "aws-bedrock": "bedrock",
    "amazon-bedrock": "bedrock",
@@ -807,6 +871,9 @@ _PROVIDER_ALIASES = {
    "nvidia-nim": "nvidia",
    "build-nvidia": "nvidia",
    "nemotron": "nvidia",
+    "lmstudio": "lmstudio",
+    "lm-studio": "lmstudio",
+    "lm_studio": "lmstudio",
    "ollama": "custom",  # bare "ollama" = local; use "ollama-cloud" for cloud
    "ollama_cloud": "ollama-cloud",
 }
@@ -1613,31 +1680,41 @@ def provider_label(provider: Optional[str]) -> str:

 # Models that support OpenAI Priority Processing (service_tier="priority").
 # See https://openai.com/api-priority-processing/ for the canonical list.
-# Only the bare model slug is stored (no vendor prefix).
-_PRIORITY_PROCESSING_MODELS: frozenset[str] = frozenset({
-    "gpt-5.4",
-    "gpt-5.4-mini",
-    "gpt-5.2",
-    "gpt-5.1",
-    "gpt-5",
-    "gpt-5-mini",
-    "gpt-4.1",
-    "gpt-4.1-mini",
-    "gpt-4.1-nano",
-    "gpt-4o",
-    "gpt-4o-mini",
+#
+# Pattern-based matching — any OpenAI flagship model (gpt-*, o1*, o3*, o4*)
+# is assumed to support Priority Processing. service_tier=priority is silently
+# ignored by non-OpenAI endpoints (OpenRouter/Copilot/opencode-zen proxies
+# strip the field), so false positives are harmless. Codex-series models
+# (gpt-5-codex, gpt-5.3-codex, etc.) are excluded — they don't expose the
+# service_tier parameter through the Codex Responses API.
+_OPENAI_FAST_MODE_PREFIXES: tuple[str, ...] = (
+    "gpt-",
+    "o1",
    "o3",
-    "o4-mini",
-})
+    "o4",
+)
+
+
+def _is_openai_fast_model(model_id: Optional[str]) -> bool:
+    """Return True if the model is an OpenAI flagship eligible for Priority Processing."""
+    raw = _strip_vendor_prefix(str(model_id or ""))
+    base = raw.split(":")[0]
+    if not base:
+        return False
+    # Exclude Codex-series — they route through the Codex Responses API
+    # which doesn't accept service_tier.
+    if "codex" in base:
+        return False
+    return any(base.startswith(prefix) for prefix in _OPENAI_FAST_MODE_PREFIXES)
+

 # Models that support Anthropic Fast Mode (speed="fast").
 # See https://platform.claude.com/docs/en/build-with-claude/fast-mode
-# Currently only Claude Opus 4.6.  Both hyphen and dot variants are stored
-# to handle native Anthropic (claude-opus-4-6) and OpenRouter (claude-opus-4.6).
-_ANTHROPIC_FAST_MODE_MODELS: frozenset[str] = frozenset({
-    "claude-opus-4-6",
-    "claude-opus-4.6",
-})
+#
+# Pattern-based matching — any claude-* model is eligible. The anthropic
+# adapter gates speed=fast on native Anthropic endpoints only (see
+# _is_third_party_anthropic_endpoint in agent/anthropic_adapter.py), so
+# third-party proxies that would reject the beta header are protected.


 def _strip_vendor_prefix(model_id: str) -> str:
@@ -1650,20 +1727,14 @@ def _strip_vendor_prefix(model_id: str) -> str:

 def model_supports_fast_mode(model_id: Optional[str]) -> bool:
    """Return whether Hermes should expose the /fast toggle for this model."""
-    raw = _strip_vendor_prefix(str(model_id or ""))
-    if raw in _PRIORITY_PROCESSING_MODELS:
-        return True
-    # Anthropic fast mode — strip date suffixes (e.g. claude-opus-4-6-20260401)
-    # and OpenRouter variant tags (:fast, :beta) for matching.
-    base = raw.split(":")[0]
-    return base in _ANTHROPIC_FAST_MODE_MODELS
+    return _is_anthropic_fast_model(model_id) or _is_openai_fast_model(model_id)


 def _is_anthropic_fast_model(model_id: Optional[str]) -> bool:
-    """Return True if the model supports Anthropic's fast mode (speed='fast')."""
+    """Return True if the model is a Claude model eligible for Anthropic Fast Mode."""
    raw = _strip_vendor_prefix(str(model_id or ""))
    base = raw.split(":")[0]
-    return base in _ANTHROPIC_FAST_MODE_MODELS
+    return base.startswith("claude-")


 def resolve_fast_mode_overrides(model_id: Optional[str]) -> dict[str, Any] | None:
@@ -1685,14 +1756,61 @@ def resolve_fast_mode_overrides(model_id: Optional[str]) -> dict[str, Any] | Non


 def _resolve_copilot_catalog_api_key() -> str:
-    """Best-effort GitHub token for fetching the Copilot model catalog."""
+    """Best-effort GitHub token for fetching the Copilot model catalog.
+
+    Resolution order:
+      1. ``resolve_api_key_provider_credentials("copilot")`` — env vars
+         (``COPILOT_GITHUB_TOKEN`` / ``GH_TOKEN`` / ``GITHUB_TOKEN``) plus
+         the ``gh auth token`` CLI fallback.
+      2. ``read_credential_pool("copilot")`` — a token (typically a
+         ``gho_*`` from device-code login, or a fine-grained PAT) stored in
+         ``auth.json`` under ``credential_pool.copilot[]``. The pool is
+         populated by ``hermes auth add copilot`` and by ``_seed_from_env``
+         when the env var is set in ``~/.hermes/.env``.
+
+    Without (2), users whose only Copilot credential is in the pool see
+    the ``/model`` picker fall back to a stale hardcoded list because the
+    live catalog fetch silently 401s. To avoid wedging on a malformed pool
+    entry, each candidate is exchanged via ``exchange_copilot_token`` —
+    only entries that actually exchange successfully are returned, so a
+    later valid entry is reachable when an earlier one is unsupported.
+    """
    try:
        from hermes_cli.auth import resolve_api_key_provider_credentials

        creds = resolve_api_key_provider_credentials("copilot")
-        return str(creds.get("api_key") or "").strip()
+        api_key = str(creds.get("api_key") or "").strip()
+        if api_key:
+            return api_key
    except Exception:
-        return ""
+        pass
+
+    try:
+        from hermes_cli.auth import read_credential_pool
+        from hermes_cli.copilot_auth import (
+            exchange_copilot_token,
+            validate_copilot_token,
+        )
+
+        for entry in read_credential_pool("copilot"):
+            if not isinstance(entry, dict):
+                continue
+            raw = str(entry.get("access_token") or "").strip()
+            if not raw:
+                continue
+            valid, _ = validate_copilot_token(raw)
+            if not valid:
+                continue
+            try:
+                api_token, _expires_at = exchange_copilot_token(raw)
+            except Exception:
+                continue
+            if api_token:
+                return api_token
+    except Exception:
+        pass
+
+    return ""


 # Providers where models.dev is treated as authoritative: curated static
@@ -1849,6 +1967,19 @@ def provider_model_ids(provider: Optional[str], *, force_refresh: bool = False)
                    return live
            except Exception:
                pass
+    if normalized == "gmi":
+        try:
+            from hermes_cli.auth import resolve_api_key_provider_credentials
+
+            creds = resolve_api_key_provider_credentials("gmi")
+            api_key = str(creds.get("api_key") or "").strip()
+            base_url = str(creds.get("base_url") or "").strip()
+            if api_key and base_url:
+                live = fetch_api_models(api_key, base_url)
+                if live:
+                    return live
+        except Exception:
+            pass
    if normalized == "custom":
        base_url = _get_custom_base_url()
        if base_url:
@@ -1861,6 +1992,18 @@ def provider_model_ids(provider: Optional[str], *, force_refresh: bool = False)
            live = fetch_api_models(api_key, base_url)
            if live:
                return live
+    # Bedrock uses live discovery keyed by the resolved AWS region so that
+    # EU/AP users see eu.*/ap.* model IDs instead of the static us.* list.
+    # Note: early return intentionally skips _MODELS_DEV_PREFERRED merge
+    # below — bedrock is not expected to appear in that table.
+    if normalized == "bedrock":
+        try:
+            from agent.bedrock_adapter import bedrock_model_ids_or_none
+            ids = bedrock_model_ids_or_none()
+            if ids is not None:
+                return ids
+        except Exception:
+            pass
    curated_static = list(_PROVIDER_MODELS.get(normalized, []))
    if normalized in _MODELS_DEV_PREFERRED:
        return _merge_with_models_dev(normalized, curated_static)
@@ -2056,6 +2199,228 @@ def _is_github_models_base_url(base_url: Optional[str]) -> bool:
    )


+def _lmstudio_server_root(base_url: Optional[str]) -> Optional[str]:
+    """Strip ``/v1`` suffix from an LM Studio base URL to get the native API root.
+
+    Returns ``None`` when the base URL is empty/invalid.
+    """
+    root = (base_url or "").strip().rstrip("/")
+    if root.endswith("/v1"):
+        root = root[:-3].rstrip("/")
+    return root or None
+
+
+def _lmstudio_request_headers(api_key: Optional[str] = None) -> dict:
+    """Build HTTP headers for LM Studio native API requests."""
+    headers = {"User-Agent": _HERMES_USER_AGENT}
+    token = str(api_key or "").strip()
+    if token:
+        headers["Authorization"] = f"Bearer {token}"
+    return headers
+
+
+def _lmstudio_fetch_raw_models(
+    api_key: Optional[str] = None,
+    base_url: Optional[str] = None,
+    timeout: float = 5.0,
+) -> Optional[list[dict]]:
+    """Fetch the raw model list from LM Studio's ``/api/v1/models``.
+
+    Returns the ``models`` list of dicts on success, ``None`` on network
+    errors or malformed responses.  Raises ``AuthError`` on HTTP 401/403.
+    """
+    server_root = _lmstudio_server_root(base_url)
+    if not server_root:
+        return None
+
+    headers = _lmstudio_request_headers(api_key)
+    request = urllib.request.Request(server_root + "/api/v1/models", headers=headers)
+    try:
+        with urllib.request.urlopen(request, timeout=timeout) as resp:
+            payload = json.loads(resp.read().decode())
+    except urllib.error.HTTPError as exc:
+        if exc.code in (401, 403):
+            from hermes_cli.auth import AuthError
+            raise AuthError(
+                f"LM Studio rejected the request with HTTP {exc.code}.",
+                provider="lmstudio",
+                code="auth_rejected",
+            ) from exc
+        import logging
+        logging.getLogger(__name__).debug(
+            "LM Studio probe at %s failed with HTTP %s", server_root, exc.code,
+        )
+        return None
+    except Exception as exc:
+        import logging
+        logging.getLogger(__name__).debug(
+            "LM Studio probe at %s failed: %s", server_root, exc,
+        )
+        return None
+
+    raw_models = payload.get("models") if isinstance(payload, dict) else None
+    if not isinstance(raw_models, list):
+        import logging
+        logging.getLogger(__name__).debug(
+            "LM Studio probe at %s returned malformed payload (no `models` list)",
+            server_root,
+        )
+        return None
+    return raw_models
+
+
+def probe_lmstudio_models(
+    api_key: Optional[str] = None,
+    base_url: Optional[str] = None,
+    timeout: float = 5.0,
+) -> Optional[list[str]]:
+    """Probe LM Studio's model listing.
+
+    Returns chat-capable model keys on success, including the valid empty-list
+    case when the server is reachable but has no non-embedding models.
+    Returns ``None`` on network errors, malformed responses, or empty/invalid
+    base URLs.
+
+    Raises ``AuthError`` on HTTP 401/403 so callers can surface token issues
+    separately from reachability problems.
+    """
+    raw_models = _lmstudio_fetch_raw_models(api_key=api_key, base_url=base_url, timeout=timeout)
+    if raw_models is None:
+        return None
+
+    keys: list[str] = []
+    for raw in raw_models:
+        if not isinstance(raw, dict):
+            continue
+        if str(raw.get("type") or "").strip().lower() == "embedding":
+            continue
+        key = str(raw.get("key") or raw.get("id") or "").strip()
+        if key and key not in keys:
+            keys.append(key)
+    return keys
+
+
+def fetch_lmstudio_models(
+    api_key: Optional[str] = None,
+    base_url: Optional[str] = None,
+    timeout: float = 5.0,
+) -> list[str]:
+    """Fetch LM Studio chat-capable model keys from native ``/api/v1/models``.
+
+    Returns a list of model keys (e.g. ``publisher/model-name``) with embedding
+    models filtered out. Returns an empty list on network errors, malformed
+    responses, or empty/invalid base URLs.
+
+    Raises ``AuthError`` on HTTP 401/403 so callers can distinguish a missing
+    or wrong ``LM_API_KEY`` from an unreachable server — the most common
+    LM Studio support case once auth-enabled mode is turned on.
+    """
+    models = probe_lmstudio_models(api_key=api_key, base_url=base_url, timeout=timeout)
+    return models or []
+
+
+def ensure_lmstudio_model_loaded(
+    model: str,
+    base_url: Optional[str],
+    api_key: Optional[str],
+    target_context_length: int,
+    timeout: float = 120.0,
+) -> Optional[int]:
+    """Ensure LM Studio has ``model`` loaded with at least ``target_context_length``.
+
+    No-op when an instance is already loaded with sufficient context. Otherwise
+    POSTs ``/api/v1/models/load`` to (re)load with the target context, capped
+    at the model's ``max_context_length``. Returns the resolved loaded context
+    length, or ``None`` when the probe / load failed.
+    """
+    server_root = _lmstudio_server_root(base_url)
+    if not server_root:
+        return None
+
+    headers = _lmstudio_request_headers(api_key)
+
+    try:
+        raw_models = _lmstudio_fetch_raw_models(api_key=api_key, base_url=base_url, timeout=10)
+    except Exception:
+        raw_models = None
+    if raw_models is None:
+        return None
+
+    target_entry = None
+    for raw in raw_models:
+        if not isinstance(raw, dict):
+            continue
+        if raw.get("key") == model or raw.get("id") == model:
+            target_entry = raw
+            break
+    if target_entry is None:
+        return None
+
+    max_ctx = target_entry.get("max_context_length")
+    if isinstance(max_ctx, int) and max_ctx > 0:
+        target_context_length = min(target_context_length, max_ctx)
+
+    for inst in target_entry.get("loaded_instances") or []:
+        cfg = inst.get("config") if isinstance(inst, dict) else None
+        loaded_ctx = cfg.get("context_length") if isinstance(cfg, dict) else None
+        if isinstance(loaded_ctx, int) and loaded_ctx >= target_context_length:
+            return loaded_ctx
+
+    body = json.dumps({
+        "model": model,
+        "context_length": target_context_length,
+    }).encode()
+    load_headers = dict(headers)
+    load_headers["Content-Type"] = "application/json"
+    try:
+        with urllib.request.urlopen(
+            urllib.request.Request(
+                server_root + "/api/v1/models/load",
+                data=body,
+                headers=load_headers,
+                method="POST",
+            ),
+            timeout=timeout,
+        ) as resp:
+            resp.read()
+    except Exception:
+        return None
+    return target_context_length
+
+
+def lmstudio_model_reasoning_options(
+    model: str,
+    base_url: Optional[str],
+    api_key: Optional[str] = None,
+    timeout: float = 5.0,
+) -> list[str]:
+    """Return the reasoning ``allowed_options`` LM Studio publishes for ``model``.
+
+    Pulls ``capabilities.reasoning.allowed_options`` from ``/api/v1/models``.
+    Returns ``[]`` when the model is unknown, the endpoint is unreachable,
+    or the model does not declare a reasoning capability.
+    """
+    try:
+        raw_models = _lmstudio_fetch_raw_models(api_key=api_key, base_url=base_url, timeout=timeout)
+    except Exception:
+        raw_models = None
+    if not raw_models:
+        return []
+
+    for raw in raw_models:
+        if not isinstance(raw, dict):
+            continue
+        if raw.get("key") != model and raw.get("id") != model:
+            continue
+        caps = raw.get("capabilities")
+        reasoning = caps.get("reasoning") if isinstance(caps, dict) else None
+        opts = reasoning.get("allowed_options") if isinstance(reasoning, dict) else None
+        if isinstance(opts, list):
+            return [str(o).strip().lower() for o in opts if isinstance(o, str)]
+        return []
+    return []
+
+
 def _fetch_github_models(api_key: Optional[str] = None, timeout: float = 5.0) -> Optional[list[str]]:
    catalog = fetch_github_model_catalog(api_key=api_key, timeout=timeout)
    if not catalog:
@@ -2651,6 +3016,40 @@ def validate_requested_model(
            "message": "Model names cannot contain spaces.",
        }

+    if normalized == "lmstudio":
+        from hermes_cli.auth import AuthError
+        # Use probe_lmstudio_models so we can distinguish None (unreachable
+        # / malformed response) from [] (reachable, but no chat-capable models
+        # are loaded). fetch_lmstudio_models collapses both to [].
+        try:
+            models = probe_lmstudio_models(api_key=api_key, base_url=base_url)
+        except AuthError as exc:
+            return {
+                "accepted": False, "persist": False, "recognized": False,
+                "message": (
+                    f"{exc} Set `LM_API_KEY` (or update it) to match the server's bearer token."
+                ),
+            }
+        if models is None:
+            return {
+                "accepted": False, "persist": False, "recognized": False,
+                "message": f"Could not reach LM Studio's `/api/v1/models` to validate `{requested}`.",
+            }
+        if not models:
+            return {
+                "accepted": False, "persist": False, "recognized": False,
+                "message": (
+                    f"LM Studio is reachable but no chat-capable models are loaded. "
+                    f"Load `{requested}` in LM Studio (Developer tab → Load Model) and try again."
+                ),
+            }
+        if requested_for_lookup in set(models):
+            return {"accepted": True, "persist": True, "recognized": True, "message": None}
+        return {
+            "accepted": False, "persist": False, "recognized": False,
+            "message": f"Model `{requested}` was not found in LM Studio's model listing.",
+        }
+
    if normalized == "custom":
        # Try probing with correct auth for the api_mode.
        if api_mode == "anthropic_messages":
@@ -128,27 +128,44 @@ def _run_agent(
    # the user's configured default provider, which may not host the model
    # the caller just asked for.
    effective_provider = (provider or "").strip() or None
+    explicit_base_url_from_alias: Optional[str] = None
    if effective_provider is None and (model or env_model):
        # Only auto-detect when the model was explicitly requested via arg or
        # env var (not when it came from config — that's the "use my defaults"
        # path and the configured provider is already correct).
        explicit_model = (model or "").strip() or env_model
        if explicit_model:
-            cfg_provider = ""
-            if isinstance(model_cfg, dict):
-                cfg_provider = str(model_cfg.get("provider") or "").strip().lower()
-            current_provider = (
-                cfg_provider
-                or os.getenv("HERMES_INFERENCE_PROVIDER", "").strip().lower()
-                or "auto"
-            )
-            detected = detect_provider_for_model(explicit_model, current_provider)
-            if detected:
-                effective_provider, effective_model = detected
+            # First check DIRECT_ALIASES populated from config.yaml `model_aliases:`.
+            # These map a user-defined alias to (model, provider, base_url) for
+            # endpoints not in any catalog (local servers, custom proxies, etc.).
+            try:
+                from hermes_cli import model_switch as _ms
+                _ms._ensure_direct_aliases()
+                direct = _ms.DIRECT_ALIASES.get(explicit_model.strip().lower())
+            except Exception:
+                direct = None
+            if direct is not None:
+                effective_model = direct.model
+                effective_provider = direct.provider
+                if direct.base_url:
+                    explicit_base_url_from_alias = direct.base_url.rstrip("/")
+            else:
+                cfg_provider = ""
+                if isinstance(model_cfg, dict):
+                    cfg_provider = str(model_cfg.get("provider") or "").strip().lower()
+                current_provider = (
+                    cfg_provider
+                    or os.getenv("HERMES_INFERENCE_PROVIDER", "").strip().lower()
+                    or "auto"
+                )
+                detected = detect_provider_for_model(explicit_model, current_provider)
+                if detected:
+                    effective_provider, effective_model = detected

    runtime = resolve_runtime_provider(
        requested=effective_provider,
        target_model=effective_model or None,
+        explicit_base_url=explicit_base_url_from_alias,
    )

    # Pull in whatever toolsets the user has enabled for "cli".
@@ -79,6 +79,20 @@ VALID_HOOKS: Set[str] = {
    #   {"action": "allow"}  /  None             -> normal dispatch
    # Kwargs: event: MessageEvent, gateway: GatewayRunner, session_store.
    "pre_gateway_dispatch",
+    # Approval lifecycle hooks. Fired by tools/approval.py when a dangerous
+    # command needs user approval -- fires BOTH for CLI-interactive prompts
+    # and for gateway/ACP approvals (Telegram, Discord, Slack, TUI, etc.).
+    # Observers only: return values are ignored. Plugins cannot veto or
+    # pre-answer an approval from these hooks (use pre_tool_call to block
+    # a tool before it reaches approval).
+    #
+    # Kwargs for pre_approval_request:
+    #   command: str, description: str, pattern_key: str, pattern_keys: list[str],
+    #   session_key: str, surface: "cli" | "gateway"
+    # Kwargs for post_approval_response: same as above plus
+    #   choice: "once" | "session" | "always" | "deny" | "timeout"
+    "pre_approval_request",
+    "post_approval_response",
 }

 ENTRY_POINTS_GROUP = "hermes_agent.plugins"
@@ -999,7 +999,6 @@ def _run_composite_ui(curses, plugin_names, plugin_labels, plugin_selected,
            # We need to map logical cursor positions to screen rows
            # accounting for non-navigable separator/headers

-            draw_row = 0  # tracks navigable item index

            # --- General Plugins section ---
            if n_plugins > 0:
@@ -954,6 +954,59 @@ def import_profile(archive_path: str, name: Optional[str] = None) -> Path:
 # Rename
 # ---------------------------------------------------------------------------

+def _migrate_honcho_profile_host(old_name: str, new_name: str, new_dir: Path) -> None:
+    """Rename Honcho host blocks for a renamed profile without changing peers."""
+    old_host = f"hermes.{old_name}"
+    new_host = f"hermes.{new_name}"
+
+    candidates = [
+        new_dir / "honcho.json",
+        _get_default_hermes_home() / "honcho.json",
+        Path.home() / ".honcho" / "config.json",
+    ]
+
+    seen: set[Path] = set()
+    for path in candidates:
+        try:
+            resolved = path.resolve()
+        except OSError:
+            resolved = path
+        if resolved in seen or not path.is_file():
+            continue
+        seen.add(resolved)
+
+        try:
+            raw = json.loads(path.read_text(encoding="utf-8"))
+        except (OSError, json.JSONDecodeError):
+            continue
+
+        hosts = raw.get("hosts")
+        if not isinstance(hosts, dict) or old_host not in hosts:
+            continue
+
+        if new_host in hosts:
+            print(f"⚠ Honcho host block not migrated: {new_host} already exists in {path}")
+            continue
+
+        block = hosts[old_host]
+        if isinstance(block, dict) and "aiPeer" not in block:
+            bare = old_host.split(".", 1)[1] if "." in old_host else old_host
+            block["aiPeer"] = bare
+        hosts[new_host] = hosts.pop(old_host)
+        tmp = path.with_suffix(path.suffix + ".tmp")
+        try:
+            tmp.write_text(json.dumps(raw, indent=2, ensure_ascii=False) + "\n", encoding="utf-8")
+            tmp.replace(path)
+        except OSError:
+            try:
+                tmp.unlink(missing_ok=True)
+            except OSError:
+                pass
+            continue
+
+        print(f"✓ Honcho host updated: {old_host} → {new_host}")
+
+
 def rename_profile(old_name: str, new_name: str) -> Path:
    """Rename a profile: directory, wrapper script, service, active_profile.

@@ -984,7 +1037,10 @@ def rename_profile(old_name: str, new_name: str) -> Path:
    old_dir.rename(new_dir)
    print(f"✓ Renamed {old_dir.name} → {new_dir.name}")

-    # 3. Update wrapper script
+    # 3. Update profile-scoped Honcho host blocks, preserving aiPeer identity
+    _migrate_honcho_profile_host(old_name, new_name, new_dir)
+
+    # 4. Update wrapper script
    remove_wrapper_script(old_name)
    collision = check_alias_collision(new_name)
    if not collision:
@@ -993,7 +1049,7 @@ def rename_profile(old_name: str, new_name: str) -> Path:
    else:
        print(f"⚠ Cannot create alias '{new_name}' — {collision}")

-    # 4. Update active_profile if it pointed to old name
+    # 5. Update active_profile if it pointed to old name
    try:
        if get_active_profile() == old_name:
            set_active_profile(new_name)
@@ -71,6 +71,13 @@ HERMES_OVERLAYS: Dict[str, HermesOverlay] = {
        auth_type="oauth_external",
        base_url_override="cloudcode-pa://google",
    ),
+    "lmstudio": HermesOverlay(
+        transport="openai_chat",
+        auth_type="api_key",
+        extra_env_vars=("LM_API_KEY",),
+        base_url_override="http://127.0.0.1:1234/v1",
+        base_url_env_var="LM_BASE_URL",
+    ),
    "copilot-acp": HermesOverlay(
        transport="codex_responses",
        auth_type="external_process",
@@ -158,11 +165,21 @@ HERMES_OVERLAYS: Dict[str, HermesOverlay] = {
        transport="openai_chat",
        base_url_env_var="XIAOMI_BASE_URL",
    ),
+    "tencent-tokenhub": HermesOverlay(
+        transport="openai_chat",
+        base_url_env_var="TOKENHUB_BASE_URL",
+    ),
    "arcee": HermesOverlay(
        transport="openai_chat",
        base_url_override="https://api.arcee.ai/api/v1",
        base_url_env_var="ARCEE_BASE_URL",
    ),
+    "gmi": HermesOverlay(
+        transport="openai_chat",
+        extra_env_vars=("GMI_API_KEY",),
+        base_url_override="https://api.gmi-serving.com/v1",
+        base_url_env_var="GMI_BASE_URL",
+    ),
    "ollama-cloud": HermesOverlay(
        transport="openai_chat",
        base_url_env_var="OLLAMA_BASE_URL",
@@ -173,6 +190,10 @@ HERMES_OVERLAYS: Dict[str, HermesOverlay] = {
        transport="openai_chat",  # default; overridden by api_mode in config
        base_url_env_var="AZURE_FOUNDRY_BASE_URL",
    ),
+    "bedrock": HermesOverlay(
+        transport="bedrock_converse",
+        auth_type="aws_sdk",
+    ),
 }


@@ -287,6 +308,12 @@ ALIASES: Dict[str, str] = {
    "mimo": "xiaomi",
    "xiaomi-mimo": "xiaomi",

+    # tencent
+    "tencent": "tencent-tokenhub",
+    "tokenhub": "tencent-tokenhub",
+    "tencent-cloud": "tencent-tokenhub",
+    "tencentmaas": "tencent-tokenhub",
+
    # bedrock
    "aws": "bedrock",
    "aws-bedrock": "bedrock",
@@ -297,6 +324,10 @@ ALIASES: Dict[str, str] = {
    "arcee-ai": "arcee",
    "arceeai": "arcee",

+    # gmi
+    "gmi-cloud": "gmi",
+    "gmicloud": "gmi",
+
    # Local server aliases → virtual "local" concept (resolved via user config)
    "lmstudio": "lmstudio",
    "lm-studio": "lmstudio",
@@ -319,6 +350,9 @@ _LABEL_OVERRIDES: Dict[str, str] = {
    "copilot-acp": "GitHub Copilot ACP",
    "stepfun": "StepFun Step Plan",
    "xiaomi": "Xiaomi MiMo",
+    "gmi": "GMI Cloud",
+    "tencent-tokenhub": "Tencent TokenHub",
+    "lmstudio": "LM Studio",
    "local": "Local endpoint",
    "bedrock": "AWS Bedrock",
    "ollama-cloud": "Ollama Cloud",
@@ -260,11 +260,16 @@ def _resolve_runtime_from_pool_entry(
            if cfg_base_url:
                base_url = cfg_base_url
        configured_mode = _parse_api_mode(model_cfg.get("api_mode"))
-        if configured_mode and _provider_supports_explicit_api_mode(provider, configured_provider):
-            api_mode = configured_mode
-        elif provider in ("opencode-zen", "opencode-go"):
+        if provider in ("opencode-zen", "opencode-go"):
+            # Re-derive api_mode from the effective model rather than the
+            # persisted api_mode: the opencode providers serve both
+            # anthropic_messages and chat_completions models, so the previous
+            # session's mode must not leak across /model switches.
+            # Refs #16878.
            from hermes_cli.models import opencode_model_api_mode
            api_mode = opencode_model_api_mode(provider, effective_model)
+        elif configured_mode and _provider_supports_explicit_api_mode(provider, configured_provider):
+            api_mode = configured_mode
        else:
            # Auto-detect Anthropic-compatible endpoints (/anthropic suffix,
            # Kimi /coding, api.openai.com → codex_responses, api.x.ai →
@@ -464,6 +469,30 @@ def _resolve_named_custom_runtime(
    explicit_api_key: Optional[str] = None,
    explicit_base_url: Optional[str] = None,
 ) -> Optional[Dict[str, Any]]:
+    # Bare `provider="custom"` with an explicit base_url (e.g. propagated
+    # from a `model_aliases:` direct-alias resolution) — build a runtime
+    # directly so the alias's base_url actually takes effect.
+    requested_norm = (requested_provider or "").strip().lower()
+    if requested_norm == "custom" and explicit_base_url:
+        base_url = explicit_base_url.strip().rstrip("/")
+        api_key_candidates = [
+            (explicit_api_key or "").strip(),
+            os.getenv("OPENAI_API_KEY", "").strip(),
+            os.getenv("OPENROUTER_API_KEY", "").strip(),
+        ]
+        api_key = next(
+            (c for c in api_key_candidates if has_usable_secret(c)),
+            "",
+        ) or "no-key-required"
+        return {
+            "provider": "custom",
+            "api_mode": _detect_api_mode_for_url(base_url) or "chat_completions",
+            "base_url": base_url,
+            "api_key": api_key,
+            "source": "direct-alias",
+            "requested_provider": requested_provider,
+        }
+
    custom_provider = _get_named_custom_provider(requested_provider)
    if not custom_provider:
        return None
@@ -1095,13 +1124,34 @@ def resolve_runtime_provider(
            cfg_base_url and "azure.com" in cfg_base_url.lower()
        )
        if _is_azure_endpoint:
-            token = (
-                os.getenv("AZURE_ANTHROPIC_KEY", "").strip()
-                or os.getenv("ANTHROPIC_API_KEY", "").strip()
-            )
+            # Honor user-specified env var hints on the model config before
+            # falling back to the built-in AZURE_ANTHROPIC_KEY / ANTHROPIC_API_KEY
+            # chain.  Accept both `key_env` (Hermes canonical — matches the
+            # custom_providers field name) and `api_key_env` (documented in the
+            # Azure Foundry guide and read by most Hermes-compatible importers).
+            # Matches the config.yaml examples in website/docs/guides/azure-foundry.md.
+            token = ""
+            for hint_key in ("key_env", "api_key_env"):
+                env_var = str(model_cfg.get(hint_key) or "").strip()
+                if env_var:
+                    token = os.getenv(env_var, "").strip()
+                    if token:
+                        break
+            # Next: an inline api_key on the model config (useful in multi-profile
+            # setups that want to avoid env-var juggling).
+            if not token:
+                token = str(model_cfg.get("api_key") or "").strip()
+            # Finally fall back to the historical fixed names.
+            if not token:
+                token = (
+                    os.getenv("AZURE_ANTHROPIC_KEY", "").strip()
+                    or os.getenv("ANTHROPIC_API_KEY", "").strip()
+                )
            if not token:
                raise AuthError(
-                    "No Azure Anthropic API key found. Set AZURE_ANTHROPIC_KEY or ANTHROPIC_API_KEY."
+                    "No Azure Anthropic API key found. Set AZURE_ANTHROPIC_KEY or "
+                    "ANTHROPIC_API_KEY, or point key_env/api_key_env in your "
+                    "config.yaml model section at a custom env var."
                )
        else:
            from agent.anthropic_adapter import resolve_anthropic_token
@@ -1212,15 +1262,20 @@ def resolve_runtime_provider(
            configured_provider = str(model_cfg.get("provider") or "").strip().lower()
            # Only honor persisted api_mode when it belongs to the same provider family.
            configured_mode = _parse_api_mode(model_cfg.get("api_mode"))
-            if configured_mode and _provider_supports_explicit_api_mode(provider, configured_provider):
-                api_mode = configured_mode
-            elif provider in ("opencode-zen", "opencode-go"):
+            if provider in ("opencode-zen", "opencode-go"):
+                # opencode-zen/go must always re-derive api_mode from the
+                # target model (not the stale persisted api_mode), because
+                # the same provider serves both anthropic_messages
+                # (e.g. minimax-m2.7) and chat_completions (e.g.
+                # deepseek-v4-flash) and switching models via /model would
+                # otherwise carry the previous mode forward, stripping /v1
+                # from base_url for chat_completions models and 404'ing.
+                # Refs #16878.
                from hermes_cli.models import opencode_model_api_mode
-                # Prefer the target_model from the caller (explicit mid-session
-                # switch) over the stale model.default; see _resolve_runtime_from_pool_entry
-                # for the same rationale.
                _effective = target_model or model_cfg.get("default", "")
                api_mode = opencode_model_api_mode(provider, _effective)
+            elif configured_mode and _provider_supports_explicit_api_mode(provider, configured_provider):
+                api_mode = configured_mode
            else:
                # Auto-detect Anthropic-compatible endpoints by URL convention
                # (e.g. https://api.minimax.io/anthropic, https://dashscope.../anthropic)
@@ -712,8 +712,6 @@ def setup_model_provider(config: dict, *, quick: bool = False):
    if isinstance(_m, dict):
        selected_provider = _m.get("provider")

-    nous_subscription_selected = selected_provider == "nous"
-
    # ── Same-provider fallback & rotation setup (full setup only) ──
    if not quick and _supports_same_provider_pool_setup(selected_provider):
        try:
@@ -68,7 +68,7 @@ All fields are optional. Missing values inherit from the ``default`` skin.
      welcome: "Welcome message"          # Shown at CLI startup
      goodbye: "Goodbye! ⚕"              # Shown on exit
      response_label: " ⚕ Hermes "       # Response box header label
-      prompt_symbol: "❯ "                # Input prompt symbol
+      prompt_symbol: "❯"                 # Input prompt symbol (bare token; renderers add trailing space)
      help_header: "(^_^)? Commands"      # /help header text

    # Tool prefix: character for tool output lines (default: ┊)
@@ -103,6 +103,10 @@ BUILT-IN SKINS
 - ``slate``   — Cool blue developer-focused theme
 - ``daylight`` — Light background theme with dark text and blue accents
 - ``warm-lightmode`` — Warm brown/gold text for light terminal backgrounds
+- ``poseidon`` — Ocean-god theme (deep blue and seafoam)
+- ``sisyphus`` — Austere grayscale with boulder motif
+- ``charizard`` — Volcanic burnt-orange and ember
+- ``bunnny``   — Barbie-pink coquette theme (sparkles, hearts, bunnies)

 USER SKINS
 ==========
@@ -190,7 +194,7 @@ _BUILTIN_SKINS: Dict[str, Dict[str, Any]] = {
            "welcome": "Welcome to Hermes Agent! Type your message or /help for commands.",
            "goodbye": "Goodbye! ⚕",
            "response_label": " ⚕ Hermes ",
-            "prompt_symbol": "❯ ",
+            "prompt_symbol": "❯",
            "help_header": "(^_^)? Available Commands",
        },
        "tool_prefix": "┊",
@@ -242,7 +246,7 @@ _BUILTIN_SKINS: Dict[str, Dict[str, Any]] = {
            "welcome": "Welcome to Ares Agent! Type your message or /help for commands.",
            "goodbye": "Farewell, warrior! ⚔",
            "response_label": " ⚔ Ares ",
-            "prompt_symbol": "⚔ ❯ ",
+            "prompt_symbol": "⚔",
            "help_header": "(⚔) Available Commands",
        },
        "tool_prefix": "╎",
@@ -301,7 +305,7 @@ _BUILTIN_SKINS: Dict[str, Dict[str, Any]] = {
            "welcome": "Welcome to Hermes Agent! Type your message or /help for commands.",
            "goodbye": "Goodbye! ⚕",
            "response_label": " ⚕ Hermes ",
-            "prompt_symbol": "❯ ",
+            "prompt_symbol": "❯",
            "help_header": "[?] Available Commands",
        },
        "tool_prefix": "┊",
@@ -340,7 +344,7 @@ _BUILTIN_SKINS: Dict[str, Dict[str, Any]] = {
            "welcome": "Welcome to Hermes Agent! Type your message or /help for commands.",
            "goodbye": "Goodbye! ⚕",
            "response_label": " ⚕ Hermes ",
-            "prompt_symbol": "❯ ",
+            "prompt_symbol": "❯",
            "help_header": "(^_^)? Available Commands",
        },
        "tool_prefix": "┊",
@@ -377,7 +381,7 @@ _BUILTIN_SKINS: Dict[str, Dict[str, Any]] = {
            "welcome": "Welcome to Hermes Agent! Type your message or /help for commands.",
            "goodbye": "Goodbye! ⚕",
            "response_label": " ⚕ Hermes ",
-            "prompt_symbol": "❯ ",
+            "prompt_symbol": "❯",
            "help_header": "[?] Available Commands",
        },
        "tool_prefix": "│",
@@ -414,7 +418,7 @@ _BUILTIN_SKINS: Dict[str, Dict[str, Any]] = {
            "welcome": "Welcome to Hermes Agent! Type your message or /help for commands.",
            "goodbye": "Goodbye! \u2695",
            "response_label": " \u2695 Hermes ",
-            "prompt_symbol": "\u276f ",
+            "prompt_symbol": "\u276f",
            "help_header": "(^_^)? Available Commands",
        },
        "tool_prefix": "\u250a",
@@ -467,7 +471,7 @@ _BUILTIN_SKINS: Dict[str, Dict[str, Any]] = {
            "welcome": "Welcome to Poseidon Agent! Type your message or /help for commands.",
            "goodbye": "Fair winds! Ψ",
            "response_label": " Ψ Poseidon ",
-            "prompt_symbol": "Ψ ❯ ",
+            "prompt_symbol": "Ψ",
            "help_header": "(Ψ) Available Commands",
        },
        "tool_prefix": "│",
@@ -539,7 +543,7 @@ _BUILTIN_SKINS: Dict[str, Dict[str, Any]] = {
            "welcome": "Welcome to Sisyphus Agent! Type your message or /help for commands.",
            "goodbye": "The boulder waits. ◉",
            "response_label": " ◉ Sisyphus ",
-            "prompt_symbol": "◉ ❯ ",
+            "prompt_symbol": "◉",
            "help_header": "(◉) Available Commands",
        },
        "tool_prefix": "│",
@@ -612,7 +616,7 @@ _BUILTIN_SKINS: Dict[str, Dict[str, Any]] = {
            "welcome": "Welcome to Charizard Agent! Type your message or /help for commands.",
            "goodbye": "Flame out! ✦",
            "response_label": " ✦ Charizard ",
-            "prompt_symbol": "✦ ❯ ",
+            "prompt_symbol": "✦",
            "help_header": "(✦) Available Commands",
        },
        "tool_prefix": "│",
@@ -636,6 +640,83 @@ _BUILTIN_SKINS: Dict[str, Dict[str, Any]] = {
 [#F29C38]⠀⠀⠀⠀⠀⠀⠀⣼⡟⠀⠀⢻⣧⠀⠀⠀⠀⠀⠀⠀⠀[/]
 [dim #7A3511]⠀⠀⠀⠀⠀⠀⠀tail flame lit⠀⠀⠀⠀⠀⠀⠀⠀[/]""",
    },
+    "bunnny": {
+        "name": "bunnny",
+        "description": "Barbie-pink coquette theme — sparkles, bows, and bubblegum",
+        "colors": {
+            "banner_border": "#E91E63",
+            "banner_title": "#FF3366",
+            "banner_accent": "#FF69B4",
+            "banner_dim": "#C2185B",
+            "banner_text": "#FFF0F5",
+            "ui_accent": "#FF3366",
+            "ui_label": "#FF69B4",
+            "ui_ok": "#FFB6C1",
+            "ui_error": "#FF1744",
+            "ui_warn": "#FFAB91",
+            "prompt": "#FFF0F5",
+            "input_rule": "#E91E63",
+            "response_border": "#FF69B4",
+            "status_bar_bg": "#2A0E1E",
+            "status_bar_text": "#FFE4EC",
+            "status_bar_strong": "#FF3366",
+            "status_bar_dim": "#8E4B6B",
+            "status_bar_good": "#FFB6C1",
+            "status_bar_warn": "#FF69B4",
+            "status_bar_bad": "#FF3366",
+            "status_bar_critical": "#FF1744",
+            "session_label": "#FF69B4",
+            "session_border": "#8E4B6B",
+            "voice_status_bg": "#2A0E1E",
+            "completion_menu_bg": "#2A0E1E",
+            "completion_menu_current_bg": "#5A1D3A",
+            "completion_menu_meta_bg": "#2A0E1E",
+            "completion_menu_meta_current_bg": "#5A1D3A",
+        },
+        "spinner": {
+            "waiting_faces": ["(♡)", "(✿)", "(✧)", "(❀)", "(ෆ)", "(˘ᵕ˘)", "(⑅)"],
+            "thinking_faces": ["(♡)", "(✧)", "(❀)", "(✿)", "(ෆ)", "(˘ᵕ˘)"],
+            "thinking_verbs": [
+                "sparkling", "twirling", "glittering", "frosting",
+                "bedazzling", "bowtying", "sprinkling sugar", "picking ribbons",
+                "glossing up", "curating the vibe", "dusting pink",
+                "tying a little bow", "making it cute",
+            ],
+            "wings": [
+                ["⟪♡", "♡⟫"],
+                ["⟪✧", "✧⟫"],
+                ["⟪✿", "✿⟫"],
+                ["⟪❀", "❀⟫"],
+                ["⟪ෆ", "ෆ⟫"],
+            ],
+        },
+        "branding": {
+            "agent_name": "Hermes Agent",
+            "welcome": "hi bestie ♡ welcome to Hermes Agent! type your message or /help for commands (ﾉ◕ヮ◕)ﾉ*:･ﾟ✧",
+            "goodbye": "bye bestie ♡ ✧",
+            "response_label": " ♡ Hermes ",
+            "prompt_symbol": "♡",
+            "help_header": "(ﾉ◕ヮ◕)ﾉ*:･ﾟ✧ Commands",
+        },
+        "tool_prefix": "♡",
+        "banner_logo": """[bold #FFB6C1]██╗  ██╗███████╗██████╗ ███╗   ███╗███████╗███████╗  ██╗  ██╗ [/]
+[bold #FF69B4]██║  ██║██╔════╝██╔══██╗████╗ ████║██╔════╝██╔════╝ ████████╗[/]
+[#FF3C7F]███████║█████╗  ██████╔╝██╔████╔██║█████╗  ███████╗ ╚██████╔╝[/]
+[#FF3366]██╔══██║██╔══╝  ██╔══██╗██║╚██╔╝██║██╔══╝  ╚════██║  ╚████╔╝ [/]
+[#E91E63]██║  ██║███████╗██║  ██║██║ ╚═╝ ██║███████╗███████║   ╚██╔╝  [/]
+[#C2185B]╚═╝  ╚═╝╚══════╝╚═╝  ╚═╝╚═╝     ╚═╝╚══════╝╚══════╝    ╚═╝   [/]""",
+        "banner_hero": """[#FF69B4]⠀⠀✧⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀✧⠀⠀[/]
+[#FFB6C1]⠀⠀⠀⠀⠀⠀♡⠀⠀⠀⠀⠀⠀⠀⠀⠀⢀⣀⠀⠀⠀⠀⠀⢀⣀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀♡⠀⠀⠀⠀[/]
+[#FF69B4]⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⢠⣯⢬⣷⡀⠀⠀⣴⡯⢌⣧⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀[/]
+[#FF3366]⠀✿⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠸⣿♡⠹⣷⠀⢸⡝♡⢸⡿⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀✿⠀[/]
+[#FF3C7F]⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠻⣧⣀⣿⣦⣼⡁⣠⣿⠃⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀[/]
+[#FF3366]⠀⠀⠀⠀✧⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⢀⡾⠋⠀⠀⠀⠈⣙⣯⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀✧[/]
+[#FF3366]⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⣾⠀⠀⠀⠀⠀⠀⠀⠸⡆⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀[/]
+[#E91E63]⠀⠀⠀⠀⠀⠀⠀♡⠀⠀⠀⠀⠀⠀⠀⠀⢰⡧⢄⢰⡆⠀⢰⡆⡠⢄⣧⠀⠀⠀⠀⠀⠀⠀⠀♡⠀⠀⠀⠀⠀[/]
+[#C2185B]⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠳⣼⣤⣤⣤⣤⣤⣧⠾⠁⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀[/]
+[#FF69B4]⠀⠀⠀⠀⠀✿⠀⠀⠀⠀⠀⠀❀⠀⠀⠀⠀⠀❀⠀⠀❀⠀⠀⠀⠀⠀❀⠀⠀⠀⠀⠀⠀✿⠀⠀⠀⠀⠀[/]
+[dim #C2185B]⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀xoxo⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀[/]""",
+    },
 }


@@ -780,12 +861,21 @@ def init_skin_from_config(config: dict) -> None:
 # =============================================================================


-def get_active_prompt_symbol(fallback: str = "❯ ") -> str:
-    """Get the interactive prompt symbol from the active skin."""
+def get_active_prompt_symbol(fallback: str = "❯") -> str:
+    """Return the interactive prompt symbol with a single trailing space.
+
+    Skins store ``prompt_symbol`` as a bare token (no spaces). The trailing
+    space is appended here so callers can drop it straight into a rendered
+    prompt without hand-rolling whitespace.
+    """
    try:
-        return get_active_skin().get_branding("prompt_symbol", fallback)
+        raw = get_active_skin().get_branding("prompt_symbol", fallback)
    except Exception:
-        return fallback
+        raw = fallback
+
+    cleaned = (raw or fallback).strip()
+
+    return f"{cleaned or fallback.strip()} "



@@ -6,7 +6,7 @@ Shows the status of all Hermes Agent components.

 import os
 import sys
-import subprocess
+import subprocess  # noqa: F401 — re-exported for tests that monkeypatch status.subprocess to guard against regressions
 from pathlib import Path

 PROJECT_ROOT = Path(__file__).parent.parent.resolve()
@@ -26,12 +26,15 @@ def check_mark(ok: bool) -> str:
    return color("✗", Colors.RED)

 def redact_key(key: str) -> str:
-    """Redact an API key for display."""
-    if not key:
-        return "(not set)"
-    if len(key) < 12:
-        return "***"
-    return key[:4] + "..." + key[-4:]
+    """Redact an API key for display.
+
+    Thin wrapper over :func:`agent.redact.mask_secret`. Preserves the
+    "(not set)" placeholder in dim color to match ``hermes config``'s
+    output (previously this variant was missing the DIM color —
+    consolidated via PR that also introduced ``mask_secret``).
+    """
+    from agent.redact import mask_secret
+    return mask_secret(key, empty=color("(not set)", Colors.DIM))


 def _format_iso_timestamp(value) -> str:
@@ -274,6 +277,23 @@ def show_status(args):
        label = "configured" if configured else "not configured (run: hermes model)"
        print(f"  {pname:<16} {check_mark(configured)} {label}")

+    # LM Studio reachability — only probe when it's the active provider so
+    # users with foreign configs don't see noise. Auth rejection vs. silent
+    # empty list is the most common LM Studio support case.
+    if _effective_provider_label() == "LM Studio":
+        from hermes_cli.models import probe_lmstudio_models
+        model_cfg = config.get("model")
+        base = (model_cfg.get("base_url") if isinstance(model_cfg, dict) else None) or get_env_value("LM_BASE_URL") or "http://127.0.0.1:1234/v1"
+        try:
+            models = probe_lmstudio_models(api_key=get_env_value("LM_API_KEY") or "", base_url=base, timeout=1.5)
+            if models is None:
+                ok, msg = False, f"unreachable at {base}"
+            else:
+                ok, msg = True, f"reachable ({len(models)} model(s)) at {base}"
+        except AuthError:
+            ok, msg = False, "auth rejected — set LM_API_KEY"
+        print(f"  {'LM Studio':<16} {check_mark(ok)} {msg}")
+
    # =========================================================================
    # Terminal Configuration
    # =========================================================================
@@ -263,7 +263,6 @@ TIPS = [
    "hermes status --deep runs deeper diagnostic checks across all components.",

    # --- Hidden Gems & Power-User Tricks ---
-    "BOOT.md at ~/.hermes/BOOT.md runs automatically on every gateway start — use it for startup checks.",
    "Cron jobs can attach a Python script (--script) whose stdout is injected into the prompt as context.",
    "Cron scripts live in ~/.hermes/scripts/ and run before the agent — perfect for data collection pipelines.",
    "prefill_messages_file in config.yaml injects few-shot examples into every API call, never saved to history.",
@@ -425,6 +425,31 @@ TOOL_CATEGORIES = {
            },
        ],
    },
+    "langfuse": {
+        "name": "Langfuse Observability",
+        "icon": "📊",
+        "providers": [
+            {
+                "name": "Langfuse Cloud",
+                "tag": "Hosted Langfuse (cloud.langfuse.com)",
+                "env_vars": [
+                    {"key": "HERMES_LANGFUSE_PUBLIC_KEY", "prompt": "Langfuse public key (pk-lf-...)", "url": "https://cloud.langfuse.com"},
+                    {"key": "HERMES_LANGFUSE_SECRET_KEY", "prompt": "Langfuse secret key (sk-lf-...)", "url": "https://cloud.langfuse.com"},
+                ],
+                "post_setup": "langfuse",
+            },
+            {
+                "name": "Langfuse Self-Hosted",
+                "tag": "Self-hosted Langfuse instance",
+                "env_vars": [
+                    {"key": "HERMES_LANGFUSE_PUBLIC_KEY", "prompt": "Langfuse public key (pk-lf-...)"},
+                    {"key": "HERMES_LANGFUSE_SECRET_KEY", "prompt": "Langfuse secret key (sk-lf-...)"},
+                    {"key": "HERMES_LANGFUSE_BASE_URL", "prompt": "Langfuse server URL (e.g. http://localhost:3000)", "default": "http://localhost:3000"},
+                ],
+                "post_setup": "langfuse",
+            },
+        ],
+    },
 }

 # Simple env-var requirements for toolsets NOT in TOOL_CATEGORIES.
@@ -442,7 +467,10 @@ def _run_post_setup(post_setup_key: str):
    import shutil
    if post_setup_key in ("agent_browser", "browserbase"):
        node_modules = PROJECT_ROOT / "node_modules" / "agent-browser"
-        if not node_modules.exists() and shutil.which("npm"):
+        npm_bin = shutil.which("npm")
+        npx_bin = shutil.which("npx")
+        # Step 1: install the agent-browser npm package into node_modules/
+        if not node_modules.exists() and npm_bin:
            _print_info("    Installing Node.js dependencies for browser tools...")
            import subprocess
            result = subprocess.run(
@@ -454,8 +482,94 @@ def _run_post_setup(post_setup_key: str):
            else:
                from hermes_constants import display_hermes_home
                _print_warning(f"    npm install failed - run manually: cd {display_hermes_home()}/hermes-agent && npm install")
+                if result.stderr:
+                    _print_info(f"      {result.stderr.strip()[:200]}")
        elif not node_modules.exists():
            _print_warning("    Node.js not found - browser tools require: npm install (in hermes-agent directory)")
+            return
+
+        # Step 2: only the local browser provider actually needs Chromium on
+        # disk. Cloud providers (Browserbase, Browser Use, Firecrawl) host
+        # their own Chromium and don't need the local install.
+        if post_setup_key != "agent_browser":
+            return
+
+        # Step 3: ensure the Chromium / headless-shell build agent-browser
+        # drives is actually installed. Without it the CLI hangs on first
+        # use until the command timeout fires. Skip inside Docker — the
+        # image bakes Chromium in at build time, and runtime users usually
+        # can't write to PLAYWRIGHT_BROWSERS_PATH anyway.
+        try:
+            # Import lazily so the tools_config UI doesn't pull in the full
+            # browser_tool module at import time.
+            from tools.browser_tool import (
+                _chromium_installed,
+                _running_in_docker,
+            )
+        except Exception as exc:  # pragma: no cover — defensive
+            _print_warning(f"    Could not check Chromium status: {exc}")
+            return
+
+        if _chromium_installed():
+            _print_success("    Chromium browser already installed")
+            return
+
+        if _running_in_docker():
+            _print_warning(
+                "    Chromium is missing but you're running in Docker."
+            )
+            _print_info(
+                "    Pull the latest image to get the bundled Chromium:"
+            )
+            _print_info(
+                "      docker pull ghcr.io/nousresearch/hermes-agent:latest"
+            )
+            return
+
+        if not npx_bin:
+            _print_warning(
+                "    npx not found - install Chromium manually: npx agent-browser install --with-deps"
+            )
+            return
+
+        _print_info("    Installing Chromium (~170MB one-time download)...")
+        import subprocess
+        # Prefer the bundled agent-browser install subcommand so the
+        # version of Chromium matches the CLI. Fall back to npx shim on
+        # setups where the local bin stub isn't present.
+        local_ab = PROJECT_ROOT / "node_modules" / ".bin" / "agent-browser"
+        if sys.platform == "win32":
+            local_ab_win = local_ab.with_suffix(".cmd")
+            if local_ab_win.exists():
+                local_ab = local_ab_win
+        install_cmd = (
+            [str(local_ab), "install", "--with-deps"]
+            if local_ab.exists()
+            else [npx_bin, "-y", "agent-browser", "install", "--with-deps"]
+        )
+        try:
+            result = subprocess.run(
+                install_cmd,
+                capture_output=True, text=True, cwd=str(PROJECT_ROOT), timeout=600,
+            )
+            if result.returncode == 0:
+                _print_success("    Chromium installed")
+                # Invalidate the cached "missing" result so subsequent
+                # check_browser_requirements() calls see the new install.
+                import tools.browser_tool as _bt
+                _bt._cached_chromium_installed = None
+            else:
+                _print_warning("    Chromium install failed:")
+                tail = (result.stderr or result.stdout or "").strip().splitlines()[-3:]
+                for line in tail:
+                    _print_info(f"      {line[:200]}")
+                _print_info("    Run manually: npx agent-browser install --with-deps")
+        except subprocess.TimeoutExpired:
+            _print_warning("    Chromium install timed out (>10min)")
+            _print_info("    Run manually: npx agent-browser install --with-deps")
+        except Exception as exc:
+            _print_warning(f"    Chromium install failed: {exc}")
+            _print_info("    Run manually: npx agent-browser install --with-deps")

    elif post_setup_key == "camofox":
        camofox_dir = PROJECT_ROOT / "node_modules" / "@askjo" / "camofox-browser"
@@ -567,6 +681,40 @@ def _run_post_setup(post_setup_key: str):
                _print_info("      git submodule update --init --recursive")
                _print_info('      uv pip install -e "./tinker-atropos"')

+    elif post_setup_key == "langfuse":
+        # Install the langfuse SDK.
+        try:
+            __import__("langfuse")
+            _print_success("    langfuse SDK already installed")
+        except ImportError:
+            import subprocess
+            _print_info("    Installing langfuse SDK...")
+            result = subprocess.run(
+                [sys.executable, "-m", "pip", "install", "langfuse", "--quiet"],
+                capture_output=True, text=True, timeout=120,
+            )
+            if result.returncode == 0:
+                _print_success("    langfuse SDK installed")
+            else:
+                _print_warning("    langfuse SDK install failed — run manually: pip install langfuse")
+        # Opt the bundled observability/langfuse plugin into plugins.enabled.
+        # The plugin ships in the repo but doesn't load until the user enables
+        # it (standalone plugins are opt-in).
+        try:
+            from hermes_cli.plugins_cmd import _get_enabled_set, _save_enabled_set
+            enabled = _get_enabled_set()
+            if "observability/langfuse" in enabled or "langfuse" in enabled:
+                _print_success("    Plugin observability/langfuse already enabled")
+            else:
+                enabled.add("observability/langfuse")
+                _save_enabled_set(enabled)
+                _print_success("    Plugin observability/langfuse enabled")
+        except Exception as exc:
+            _print_warning(f"    Could not enable plugin automatically: {exc}")
+            _print_info("    Run manually: hermes plugins enable observability/langfuse")
+        _print_info("    Restart Hermes for tracing to take effect.")
+        _print_info("    Verify: hermes plugins list")
+

 # ─── Platform / Toolset Helpers ───────────────────────────────────────────────

@@ -777,6 +925,16 @@ def _get_platform_tools(
    else:
        enabled_toolsets.update(explicit_mcp_servers)

+    # Honor agent.disabled_toolsets from config.yaml — allows users to
+    # globally suppress specific toolsets (e.g. "memory") across all
+    # platforms without per-platform toolset configuration.  This runs
+    # last so it overrides everything above.
+    agent_cfg = config.get("agent") or {}
+    disabled_toolsets = agent_cfg.get("disabled_toolsets") or []
+    if disabled_toolsets:
+        disabled_set = {str(ts) for ts in disabled_toolsets}
+        enabled_toolsets -= disabled_set
+
    return enabled_toolsets


@@ -736,7 +736,7 @@ async def get_sessions(limit: int = 20, offset: int = 0):
            return {"sessions": sessions, "total": total, "limit": limit, "offset": offset}
        finally:
            db.close()
-    except Exception as e:
+    except Exception:
        _log.exception("GET /api/sessions failed")
        raise HTTPException(status_code=500, detail="Internal server error")

@@ -968,7 +968,7 @@ async def update_config(body: ConfigUpdate):
    try:
        save_config(_denormalize_config_from_web(body.config))
        return {"ok": True}
-    except Exception as e:
+    except Exception:
        _log.exception("PUT /api/config failed")
        raise HTTPException(status_code=500, detail="Internal server error")

@@ -997,7 +997,7 @@ async def set_env_var(body: EnvVarUpdate):
    try:
        save_env_value(body.key, body.value)
        return {"ok": True, "key": body.key}
-    except Exception as e:
+    except Exception:
        _log.exception("PUT /api/env failed")
        raise HTTPException(status_code=500, detail="Internal server error")

@@ -1011,7 +1011,7 @@ async def remove_env_var(body: EnvVarDelete):
        return {"ok": True, "key": body.key}
    except HTTPException:
        raise
-    except Exception as e:
+    except Exception:
        _log.exception("DELETE /api/env failed")
        raise HTTPException(status_code=500, detail="Internal server error")

@@ -1568,7 +1568,6 @@ async def _start_device_code_flow(provider_id: str) -> Dict[str, Any]:
    then spawns a background poller. Returns the user-facing display fields
    so the UI can render the verification page link + user code.
    """
-    from hermes_cli import auth as hauth
    if provider_id == "nous":
        from hermes_cli.auth import _request_device_code, PROVIDER_REGISTRY
        import httpx
@@ -11,7 +11,6 @@ hot-reloaded by the webhook adapter without a gateway restart.
 """

 import json
-import os
 import re
 import secrets
 import time
@@ -19,6 +18,7 @@ from pathlib import Path
 from typing import Dict

 from hermes_constants import display_hermes_home
+from utils import atomic_replace


 _SUBSCRIPTIONS_FILENAME = "webhook_subscriptions.json"
@@ -52,7 +52,7 @@ def _save_subscriptions(subs: Dict[str, dict]) -> None:
        json.dumps(subs, indent=2, ensure_ascii=False),
        encoding="utf-8",
    )
-    os.replace(str(tmp_path), str(path))
+    atomic_replace(tmp_path, path)


 def _get_webhook_config() -> dict:
@@ -22,6 +22,8 @@ import sqlite3
 import threading
 import time
 from pathlib import Path
+
+from agent.memory_manager import sanitize_context
 from hermes_constants import get_hermes_home
 from typing import Any, Callable, Dict, List, Optional, TypeVar

@@ -31,7 +33,7 @@ T = TypeVar("T")

 DEFAULT_DB_PATH = get_hermes_home() / "state.db"

-SCHEMA_VERSION = 9
+SCHEMA_VERSION = 11

 SCHEMA_SQL = """
 CREATE TABLE IF NOT EXISTS schema_version (
@@ -100,22 +102,56 @@ CREATE INDEX IF NOT EXISTS idx_messages_session ON messages(session_id, timestam

 FTS_SQL = """
 CREATE VIRTUAL TABLE IF NOT EXISTS messages_fts USING fts5(
-    content,
-    content=messages,
-    content_rowid=id
+    content
 );

 CREATE TRIGGER IF NOT EXISTS messages_fts_insert AFTER INSERT ON messages BEGIN
-    INSERT INTO messages_fts(rowid, content) VALUES (new.id, new.content);
+    INSERT INTO messages_fts(rowid, content) VALUES (
+        new.id,
+        COALESCE(new.content, '') || ' ' || COALESCE(new.tool_name, '') || ' ' || COALESCE(new.tool_calls, '')
+    );
 END;

 CREATE TRIGGER IF NOT EXISTS messages_fts_delete AFTER DELETE ON messages BEGIN
-    INSERT INTO messages_fts(messages_fts, rowid, content) VALUES('delete', old.id, old.content);
+    DELETE FROM messages_fts WHERE rowid = old.id;
 END;

 CREATE TRIGGER IF NOT EXISTS messages_fts_update AFTER UPDATE ON messages BEGIN
-    INSERT INTO messages_fts(messages_fts, rowid, content) VALUES('delete', old.id, old.content);
-    INSERT INTO messages_fts(rowid, content) VALUES (new.id, new.content);
+    DELETE FROM messages_fts WHERE rowid = old.id;
+    INSERT INTO messages_fts(rowid, content) VALUES (
+        new.id,
+        COALESCE(new.content, '') || ' ' || COALESCE(new.tool_name, '') || ' ' || COALESCE(new.tool_calls, '')
+    );
+END;
+"""
+
+# Trigram FTS5 table for CJK substring search.  The default unicode61
+# tokenizer splits CJK characters into individual tokens, breaking phrase
+# matching.  The trigram tokenizer creates overlapping 3-byte sequences so
+# substring queries work natively for any script (CJK, Thai, etc.).
+FTS_TRIGRAM_SQL = """
+CREATE VIRTUAL TABLE IF NOT EXISTS messages_fts_trigram USING fts5(
+    content,
+    tokenize='trigram'
+);
+
+CREATE TRIGGER IF NOT EXISTS messages_fts_trigram_insert AFTER INSERT ON messages BEGIN
+    INSERT INTO messages_fts_trigram(rowid, content) VALUES (
+        new.id,
+        COALESCE(new.content, '') || ' ' || COALESCE(new.tool_name, '') || ' ' || COALESCE(new.tool_calls, '')
+    );
+END;
+
+CREATE TRIGGER IF NOT EXISTS messages_fts_trigram_delete AFTER DELETE ON messages BEGIN
+    DELETE FROM messages_fts_trigram WHERE rowid = old.id;
+END;
+
+CREATE TRIGGER IF NOT EXISTS messages_fts_trigram_update AFTER UPDATE ON messages BEGIN
+    DELETE FROM messages_fts_trigram WHERE rowid = old.id;
+    INSERT INTO messages_fts_trigram(rowid, content) VALUES (
+        new.id,
+        COALESCE(new.content, '') || ' ' || COALESCE(new.tool_name, '') || ' ' || COALESCE(new.tool_calls, '')
+    );
 END;
 """

@@ -257,118 +293,201 @@ class SessionDB:
                self._conn.close()
                self._conn = None

+    @staticmethod
+    def _parse_schema_columns(schema_sql: str) -> Dict[str, Dict[str, str]]:
+        """Extract expected columns per table from SCHEMA_SQL.
+
+        Uses an in-memory SQLite database to parse the SQL — SQLite itself
+        handles all syntax (DEFAULT expressions with commas, inline
+        REFERENCES, CHECK constraints, etc.) so there are zero regex
+        edge cases.  The in-memory DB is opened, the schema DDL is
+        executed, and PRAGMA table_info extracts the column metadata.
+
+        Adding a column to SCHEMA_SQL is all that's needed; the
+        reconciliation loop picks it up automatically.
+        """
+        ref = sqlite3.connect(":memory:")
+        try:
+            ref.executescript(schema_sql)
+            table_columns: Dict[str, Dict[str, str]] = {}
+            for (tbl,) in ref.execute(
+                "SELECT name FROM sqlite_master "
+                "WHERE type='table' AND name NOT LIKE 'sqlite_%'"
+            ).fetchall():
+                cols: Dict[str, str] = {}
+                for row in ref.execute(
+                    f'PRAGMA table_info("{tbl}")'
+                ).fetchall():
+                    # row: (cid, name, type, notnull, dflt_value, pk)
+                    col_name = row[1]
+                    col_type = row[2] or ""
+                    notnull = row[3]
+                    default = row[4]
+                    pk = row[5]
+                    # Reconstruct the type expression for ALTER TABLE ADD COLUMN
+                    parts = [col_type] if col_type else []
+                    if notnull and not pk:
+                        parts.append("NOT NULL")
+                    if default is not None:
+                        parts.append(f"DEFAULT {default}")
+                    cols[col_name] = " ".join(parts)
+                table_columns[tbl] = cols
+            return table_columns
+        finally:
+            ref.close()
+
+    def _reconcile_columns(self, cursor: sqlite3.Cursor) -> None:
+        """Ensure live tables have every column declared in SCHEMA_SQL.
+
+        Follows the Beets/sqlite-utils pattern: the CREATE TABLE definition
+        in SCHEMA_SQL is the single source of truth for the desired schema.
+        On every startup this method diffs the live columns (via PRAGMA
+        table_info) against the declared columns, and ADDs any that are
+        missing.
+
+        This makes column additions a declarative operation — just add
+        the column to SCHEMA_SQL and it appears on the next startup.
+        Version-gated migration blocks are no longer needed for ADD COLUMN.
+        """
+        expected = self._parse_schema_columns(SCHEMA_SQL)
+        for table_name, declared_cols in expected.items():
+            # Get current columns from the live table
+            try:
+                rows = cursor.execute(
+                    f'PRAGMA table_info("{table_name}")'
+                ).fetchall()
+            except sqlite3.OperationalError:
+                continue  # Table doesn't exist yet (shouldn't happen after executescript)
+            live_cols = set()
+            for row in rows:
+                # PRAGMA table_info returns (cid, name, type, notnull, dflt_value, pk)
+                name = row[1] if isinstance(row, (tuple, list)) else row["name"]
+                live_cols.add(name)
+
+            for col_name, col_type in declared_cols.items():
+                if col_name not in live_cols:
+                    safe_name = col_name.replace('"', '""')
+                    try:
+                        cursor.execute(
+                            f'ALTER TABLE "{table_name}" ADD COLUMN "{safe_name}" {col_type}'
+                        )
+                    except sqlite3.OperationalError as exc:
+                        # Expected: "duplicate column name" from a race or
+                        # re-run.  Unexpected: "Cannot add a NOT NULL column
+                        # with default value NULL" from a schema mistake.
+                        # Log at DEBUG so it's visible in agent.log.
+                        logger.debug(
+                            "reconcile %s.%s: %s", table_name, col_name, exc,
+                        )
+
    def _init_schema(self):
-        """Create tables and FTS if they don't exist, run migrations."""
+        """Create tables and FTS if they don't exist, reconcile columns.
+
+        Schema management follows the declarative reconciliation pattern
+        (Beets, sqlite-utils): SCHEMA_SQL is the single source of truth.
+        On existing databases, _reconcile_columns() diffs live columns
+        against SCHEMA_SQL and ADDs any missing ones.  This eliminates
+        the version-gated migration chain for column additions, making
+        it impossible for reordered or inserted migrations to skip columns.
+
+        The schema_version table is retained for future data migrations
+        (transforming existing rows) which cannot be handled declaratively.
+        """
        cursor = self._conn.cursor()

        cursor.executescript(SCHEMA_SQL)

-        # Check schema version and run migrations
+        # ── Declarative column reconciliation ──────────────────────────
+        # Diff live tables against SCHEMA_SQL and ADD any missing columns.
+        # This is idempotent and self-healing: even if a version-gated
+        # migration was skipped (e.g. due to version renumbering), the
+        # column gets created here.
+        self._reconcile_columns(cursor)
+
+        # ── Schema version bookkeeping ─────────────────────────────────
+        # Bump to current so future data migrations (if any) can gate on
+        # version.  No version-gated column additions remain.
        cursor.execute("SELECT version FROM schema_version LIMIT 1")
        row = cursor.fetchone()
        if row is None:
-            cursor.execute("INSERT INTO schema_version (version) VALUES (?)", (SCHEMA_VERSION,))
+            cursor.execute(
+                "INSERT INTO schema_version (version) VALUES (?)",
+                (SCHEMA_VERSION,),
+            )
        else:
            current_version = row["version"] if isinstance(row, sqlite3.Row) else row[0]
-            if current_version < 2:
-                # v2: add finish_reason column to messages
+            # Data migrations that can't be expressed declaratively (row
+            # backfills, index changes tied to a specific version step) stay
+            # in a version-gated chain. Column additions are handled by
+            # _reconcile_columns() above and no longer need entries here.
+            if current_version < 10:
+                # v10: trigram FTS5 table for CJK/substring search. The
+                # virtual table + triggers are created unconditionally via
+                # FTS_TRIGRAM_SQL below, but existing rows need a one-time
+                # backfill into the FTS index.
                try:
-                    cursor.execute("ALTER TABLE messages ADD COLUMN finish_reason TEXT")
+                    cursor.execute("SELECT * FROM messages_fts_trigram LIMIT 0")
+                    _fts_trigram_exists = True
                except sqlite3.OperationalError:
-                    pass  # Column already exists
-                cursor.execute("UPDATE schema_version SET version = 2")
-            if current_version < 3:
-                # v3: add title column to sessions
-                try:
-                    cursor.execute("ALTER TABLE sessions ADD COLUMN title TEXT")
-                except sqlite3.OperationalError:
-                    pass  # Column already exists
-                cursor.execute("UPDATE schema_version SET version = 3")
-            if current_version < 4:
-                # v4: add unique index on title (NULLs allowed, only non-NULL must be unique)
-                try:
+                    _fts_trigram_exists = False
+                if not _fts_trigram_exists:
+                    cursor.executescript(FTS_TRIGRAM_SQL)
                    cursor.execute(
-                        "CREATE UNIQUE INDEX IF NOT EXISTS idx_sessions_title_unique "
-                        "ON sessions(title) WHERE title IS NOT NULL"
+                        "INSERT INTO messages_fts_trigram(rowid, content) "
+                        "SELECT id, content FROM messages WHERE content IS NOT NULL"
                    )
-                except sqlite3.OperationalError:
-                    pass  # Index already exists
-                cursor.execute("UPDATE schema_version SET version = 4")
-            if current_version < 5:
-                new_columns = [
-                    ("cache_read_tokens", "INTEGER DEFAULT 0"),
-                    ("cache_write_tokens", "INTEGER DEFAULT 0"),
-                    ("reasoning_tokens", "INTEGER DEFAULT 0"),
-                    ("billing_provider", "TEXT"),
-                    ("billing_base_url", "TEXT"),
-                    ("billing_mode", "TEXT"),
-                    ("estimated_cost_usd", "REAL"),
-                    ("actual_cost_usd", "REAL"),
-                    ("cost_status", "TEXT"),
-                    ("cost_source", "TEXT"),
-                    ("pricing_version", "TEXT"),
-                ]
-                for name, column_type in new_columns:
+            if current_version < 11:
+                # v11: re-index FTS5 tables to cover tool_name + tool_calls and
+                # switch from external-content to inline mode. Existing DBs have
+                # old-schema FTS tables and triggers that IF NOT EXISTS won't
+                # overwrite, so we drop them explicitly and let the post-migration
+                # existence checks (below) recreate them from FTS_SQL /
+                # FTS_TRIGRAM_SQL, then backfill every message row. Fixes #16751.
+                for _trig in (
+                    "messages_fts_insert",
+                    "messages_fts_delete",
+                    "messages_fts_update",
+                    "messages_fts_trigram_insert",
+                    "messages_fts_trigram_delete",
+                    "messages_fts_trigram_update",
+                ):
                    try:
-                        # name and column_type come from the hardcoded tuple above,
-                        # not user input. Double-quote identifier escaping is applied
-                        # as defense-in-depth; SQLite DDL cannot be parameterized.
-                        safe_name = name.replace('"', '""')
-                        cursor.execute(f'ALTER TABLE sessions ADD COLUMN "{safe_name}" {column_type}')
+                        cursor.execute(f"DROP TRIGGER IF EXISTS {_trig}")
                    except sqlite3.OperationalError:
                        pass
-                cursor.execute("UPDATE schema_version SET version = 5")
-            if current_version < 6:
-                # v6: add reasoning columns to messages table — preserves assistant
-                # reasoning text and structured reasoning_details across gateway
-                # session turns.  Without these, reasoning chains are lost on
-                # session reload, breaking multi-turn reasoning continuity for
-                # providers that replay reasoning (OpenRouter, OpenAI, Nous).
-                for col_name, col_type in [
-                    ("reasoning", "TEXT"),
-                    ("reasoning_details", "TEXT"),
-                    ("codex_reasoning_items", "TEXT"),
-                ]:
+                for _tbl in ("messages_fts", "messages_fts_trigram"):
                    try:
-                        safe = col_name.replace('"', '""')
-                        cursor.execute(
-                            f'ALTER TABLE messages ADD COLUMN "{safe}" {col_type}'
-                        )
+                        cursor.execute(f"DROP TABLE IF EXISTS {_tbl}")
                    except sqlite3.OperationalError:
-                        pass  # Column already exists
-                cursor.execute("UPDATE schema_version SET version = 6")
-            if current_version < 7:
-                # v7: preserve provider-native reasoning_content separately from
-                # normalized reasoning text. Kimi/Moonshot replay can require
-                # this field on assistant tool-call messages when thinking is on.
-                try:
-                    cursor.execute('ALTER TABLE messages ADD COLUMN "reasoning_content" TEXT')
-                except sqlite3.OperationalError:
-                    pass  # Column already exists
-                cursor.execute("UPDATE schema_version SET version = 7")
-            if current_version < 8:
-                # v8: add api_call_count column to sessions — tracks the number
-                # of individual LLM API calls made within a session (as opposed
-                # to the session count itself).
-                try:
-                    cursor.execute(
-                        'ALTER TABLE sessions ADD COLUMN "api_call_count" INTEGER DEFAULT 0'
-                    )
-                except sqlite3.OperationalError:
-                    pass  # Column already exists
-                cursor.execute("UPDATE schema_version SET version = 8")
-            if current_version < 9:
-                # v9: preserve replayable Codex assistant message ids/phases so
-                # follow-up turns can rebuild Responses API message items instead
-                # of flattening everything to plain assistant text.
-                try:
-                    cursor.execute('ALTER TABLE messages ADD COLUMN "codex_message_items" TEXT')
-                except sqlite3.OperationalError:
-                    pass  # Column already exists
-                cursor.execute("UPDATE schema_version SET version = 9")
+                        pass
+                # Recreate virtual tables + triggers with the new inline-mode
+                # schema that indexes content || tool_name || tool_calls.
+                cursor.executescript(FTS_SQL)
+                cursor.executescript(FTS_TRIGRAM_SQL)
+                # Backfill both indexes from every existing messages row.
+                cursor.execute(
+                    "INSERT INTO messages_fts(rowid, content) "
+                    "SELECT id, "
+                    "COALESCE(content, '') || ' ' || "
+                    "COALESCE(tool_name, '') || ' ' || "
+                    "COALESCE(tool_calls, '') "
+                    "FROM messages"
+                )
+                cursor.execute(
+                    "INSERT INTO messages_fts_trigram(rowid, content) "
+                    "SELECT id, "
+                    "COALESCE(content, '') || ' ' || "
+                    "COALESCE(tool_name, '') || ' ' || "
+                    "COALESCE(tool_calls, '') "
+                    "FROM messages"
+                )
+            if current_version < SCHEMA_VERSION:
+                cursor.execute(
+                    "UPDATE schema_version SET version = ?",
+                    (SCHEMA_VERSION,),
+                )

-        # Unique title index — always ensure it exists (safe to run after migrations
-        # since the title column is guaranteed to exist at this point)
+        # Unique title index — always ensure it exists
        try:
            cursor.execute(
                "CREATE UNIQUE INDEX IF NOT EXISTS idx_sessions_title_unique "
@@ -383,6 +502,12 @@ class SessionDB:
        except sqlite3.OperationalError:
            cursor.executescript(FTS_SQL)

+        # Trigram FTS5 for CJK/substring search
+        try:
+            cursor.execute("SELECT * FROM messages_fts_trigram LIMIT 0")
+        except sqlite3.OperationalError:
+            cursor.executescript(FTS_TRIGRAM_SQL)
+
        self._conn.commit()

    # =========================================================================
@@ -1047,6 +1172,85 @@ class SessionDB:

        return self._execute_write(_do)

+    def replace_messages(self, session_id: str, messages: List[Dict[str, Any]]) -> None:
+        """Atomically replace every message for a session.
+
+        Used by transcript-rewrite flows such as /retry, /undo, and /compress.
+        The delete + reinsert sequence must commit as one transaction so a
+        mid-rewrite failure does not leave SQLite with a partial transcript.
+        """
+
+        def _do(conn):
+            conn.execute(
+                "DELETE FROM messages WHERE session_id = ?", (session_id,)
+            )
+            conn.execute(
+                "UPDATE sessions SET message_count = 0, tool_call_count = 0 WHERE id = ?",
+                (session_id,),
+            )
+
+            now_ts = time.time()
+            total_messages = 0
+            total_tool_calls = 0
+            for msg in messages:
+                role = msg.get("role", "unknown")
+                tool_calls = msg.get("tool_calls")
+                reasoning_details = msg.get("reasoning_details") if role == "assistant" else None
+                codex_reasoning_items = (
+                    msg.get("codex_reasoning_items") if role == "assistant" else None
+                )
+                codex_message_items = (
+                    msg.get("codex_message_items") if role == "assistant" else None
+                )
+
+                reasoning_details_json = (
+                    json.dumps(reasoning_details) if reasoning_details else None
+                )
+                codex_items_json = (
+                    json.dumps(codex_reasoning_items) if codex_reasoning_items else None
+                )
+                codex_message_items_json = (
+                    json.dumps(codex_message_items) if codex_message_items else None
+                )
+                tool_calls_json = json.dumps(tool_calls) if tool_calls else None
+
+                conn.execute(
+                    """INSERT INTO messages (session_id, role, content, tool_call_id,
+                       tool_calls, tool_name, timestamp, token_count, finish_reason,
+                       reasoning, reasoning_content, reasoning_details, codex_reasoning_items,
+                       codex_message_items)
+                       VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)""",
+                    (
+                        session_id,
+                        role,
+                        msg.get("content"),
+                        msg.get("tool_call_id"),
+                        tool_calls_json,
+                        msg.get("tool_name"),
+                        now_ts,
+                        msg.get("token_count"),
+                        msg.get("finish_reason"),
+                        msg.get("reasoning") if role == "assistant" else None,
+                        msg.get("reasoning_content") if role == "assistant" else None,
+                        reasoning_details_json,
+                        codex_items_json,
+                        codex_message_items_json,
+                    ),
+                )
+                total_messages += 1
+                if tool_calls is not None:
+                    total_tool_calls += (
+                        len(tool_calls) if isinstance(tool_calls, list) else 1
+                    )
+                now_ts += 1e-6
+
+            conn.execute(
+                "UPDATE sessions SET message_count = ?, tool_call_count = ? WHERE id = ?",
+                (total_messages, total_tool_calls, session_id),
+            )
+
+        self._execute_write(_do)
+
    def get_messages(self, session_id: str) -> List[Dict[str, Any]]:
        """Load all messages for a session, ordered by timestamp."""
        with self._lock:
@@ -1155,7 +1359,10 @@ class SessionDB:

        messages = []
        for row in rows:
-            msg = {"role": row["role"], "content": row["content"]}
+            content = row["content"]
+            if row["role"] in {"user", "assistant"} and isinstance(content, str):
+                content = sanitize_context(content).strip()
+            msg = {"role": row["role"], "content": content}
            if row["tool_call_id"]:
                msg["tool_call_id"] = row["tool_call_id"]
            if row["tool_name"]:
@@ -1280,9 +1487,9 @@ class SessionDB:
        # quotes.  FTS5's tokenizer splits on dots and hyphens, turning
        # ``chat-send`` into ``chat AND send`` and ``P2.2`` into ``p2 AND 2``.
        # Quoting preserves phrase semantics.  A single pass avoids the
-        # double-quoting bug that would occur if dotted and hyphenated
+        # double-quoting bug that would occur if dotted, hyphenated and underscored
        # patterns were applied sequentially (e.g. ``my-app.config``).
-        sanitized = re.sub(r"\b(\w+(?:[.-]\w+)+)\b", r'"\1"', sanitized)
+        sanitized = re.sub(r"\b(\w+(?:[._-]\w+)+)\b", r'"\1"', sanitized)

        # Step 6: Restore preserved quoted phrases
        for i, quoted in enumerate(_quoted_parts):
@@ -1291,6 +1498,16 @@ class SessionDB:
        return sanitized.strip()


+    @staticmethod
+    def _is_cjk_codepoint(cp: int) -> bool:
+        return (0x4E00 <= cp <= 0x9FFF or    # CJK Unified Ideographs
+                0x3400 <= cp <= 0x4DBF or    # CJK Extension A
+                0x20000 <= cp <= 0x2A6DF or  # CJK Extension B
+                0x3000 <= cp <= 0x303F or    # CJK Symbols
+                0x3040 <= cp <= 0x309F or    # Hiragana
+                0x30A0 <= cp <= 0x30FF or    # Katakana
+                0xAC00 <= cp <= 0xD7AF)      # Hangul Syllables
+
    @staticmethod
    def _contains_cjk(text: str) -> bool:
        """Check if text contains CJK (Chinese, Japanese, Korean) characters."""
@@ -1306,6 +1523,11 @@ class SessionDB:
                return True
        return False

+    @classmethod
+    def _count_cjk(cls, text: str) -> int:
+        """Count CJK characters in text."""
+        return sum(1 for ch in text if cls._is_cjk_codepoint(ord(ch)))
+
    def search_messages(
        self,
        query: str,
@@ -1376,52 +1598,113 @@ class SessionDB:
            LIMIT ? OFFSET ?
        """

-        with self._lock:
-            try:
-                cursor = self._conn.execute(sql, params)
-            except sqlite3.OperationalError:
-                # FTS5 query syntax error despite sanitization — return empty
-                # unless query contains CJK (fall back to LIKE below)
-                if not self._contains_cjk(query):
-                    return []
-                matches = []
-            else:
-                matches = [dict(row) for row in cursor.fetchall()]
-
-        # LIKE fallback for CJK queries: FTS5 default tokenizer splits CJK
-        # characters individually, causing multi-character queries to fail.
-        if not matches and self._contains_cjk(query):
+        # CJK queries bypass the unicode61 FTS5 table.  The default tokenizer
+        # splits CJK characters into individual tokens, so "大别山项目" becomes
+        # "大 AND 别 AND 山 AND 项 AND 目" — producing false positives and
+        # missing exact phrase matches.
+        #
+        # For queries with 3+ CJK characters, we use the trigram FTS5 table
+        # (indexed substring matching with ranking and snippets).  For shorter
+        # CJK queries (1-2 chars), trigram can't match (it needs ≥9 UTF-8
+        # bytes = 3 CJK chars), so we fall back to LIKE.
+        is_cjk = self._contains_cjk(query)
+        if is_cjk:
            raw_query = query.strip('"').strip()
-            like_where = ["m.content LIKE ?"]
-            like_params: list = [f"%{raw_query}%"]
-            if source_filter is not None:
-                like_where.append(f"s.source IN ({','.join('?' for _ in source_filter)})")
-                like_params.extend(source_filter)
-            if exclude_sources is not None:
-                like_where.append(f"s.source NOT IN ({','.join('?' for _ in exclude_sources)})")
-                like_params.extend(exclude_sources)
-            if role_filter:
-                like_where.append(f"m.role IN ({','.join('?' for _ in role_filter)})")
-                like_params.extend(role_filter)
-            like_sql = f"""
-                SELECT m.id, m.session_id, m.role,
-                       substr(m.content,
-                              max(1, instr(m.content, ?) - 40),
-                              120) AS snippet,
-                       m.content, m.timestamp, m.tool_name,
-                       s.source, s.model, s.started_at AS session_started
-                FROM messages m
-                JOIN sessions s ON s.id = m.session_id
-                WHERE {' AND '.join(like_where)}
-                ORDER BY m.timestamp DESC
-                LIMIT ? OFFSET ?
-            """
-            like_params.extend([limit, offset])
-            # instr() parameter goes first in the bound list
-            like_params = [raw_query] + like_params
+            cjk_count = self._count_cjk(raw_query)
+
+            if cjk_count >= 3:
+                # Trigram FTS5 path — quote each non-operator token to handle
+                # FTS5 special chars (%, *, etc.) while preserving boolean
+                # operators (AND, OR, NOT) for multi-term queries.
+                tokens = raw_query.split()
+                parts = []
+                for tok in tokens:
+                    if tok.upper() in ("AND", "OR", "NOT"):
+                        parts.append(tok)
+                    else:
+                        parts.append('"' + tok.replace('"', '""') + '"')
+                trigram_query = " ".join(parts)
+                tri_where = ["messages_fts_trigram MATCH ?"]
+                tri_params: list = [trigram_query]
+                if source_filter is not None:
+                    tri_where.append(f"s.source IN ({','.join('?' for _ in source_filter)})")
+                    tri_params.extend(source_filter)
+                if exclude_sources is not None:
+                    tri_where.append(f"s.source NOT IN ({','.join('?' for _ in exclude_sources)})")
+                    tri_params.extend(exclude_sources)
+                if role_filter:
+                    tri_where.append(f"m.role IN ({','.join('?' for _ in role_filter)})")
+                    tri_params.extend(role_filter)
+                tri_sql = f"""
+                    SELECT
+                        m.id,
+                        m.session_id,
+                        m.role,
+                        snippet(messages_fts_trigram, 0, '>>>', '<<<', '...', 40) AS snippet,
+                        m.content,
+                        m.timestamp,
+                        m.tool_name,
+                        s.source,
+                        s.model,
+                        s.started_at AS session_started
+                    FROM messages_fts_trigram
+                    JOIN messages m ON m.id = messages_fts_trigram.rowid
+                    JOIN sessions s ON s.id = m.session_id
+                    WHERE {' AND '.join(tri_where)}
+                    ORDER BY rank
+                    LIMIT ? OFFSET ?
+                """
+                tri_params.extend([limit, offset])
+                with self._lock:
+                    try:
+                        tri_cursor = self._conn.execute(tri_sql, tri_params)
+                    except sqlite3.OperationalError:
+                        matches = []
+                    else:
+                        matches = [dict(row) for row in tri_cursor.fetchall()]
+            else:
+                # Short CJK query (1-2 chars) — trigram needs ≥3 CJK chars.
+                # Fall back to LIKE substring search.
+                escaped = raw_query.replace("\\", "\\\\").replace("%", "\\%").replace("_", "\\_")
+                like_where = ["(m.content LIKE ? ESCAPE '\\' OR m.tool_name LIKE ? ESCAPE '\\' OR m.tool_calls LIKE ? ESCAPE '\\')"]
+                like_params: list = [f"%{escaped}%", f"%{escaped}%", f"%{escaped}%"]
+                if source_filter is not None:
+                    like_where.append(f"s.source IN ({','.join('?' for _ in source_filter)})")
+                    like_params.extend(source_filter)
+                if exclude_sources is not None:
+                    like_where.append(f"s.source NOT IN ({','.join('?' for _ in exclude_sources)})")
+                    like_params.extend(exclude_sources)
+                if role_filter:
+                    like_where.append(f"m.role IN ({','.join('?' for _ in role_filter)})")
+                    like_params.extend(role_filter)
+                like_sql = f"""
+                    SELECT m.id, m.session_id, m.role,
+                           substr(m.content,
+                                  max(1, instr(m.content, ?) - 40),
+                                  120) AS snippet,
+                           m.content, m.timestamp, m.tool_name,
+                           s.source, s.model, s.started_at AS session_started
+                    FROM messages m
+                    JOIN sessions s ON s.id = m.session_id
+                    WHERE {' AND '.join(like_where)}
+                    ORDER BY m.timestamp DESC
+                    LIMIT ? OFFSET ?
+                """
+                like_params.extend([limit, offset])
+                # instr() parameter goes first in the bound list
+                like_params = [raw_query] + like_params
+                with self._lock:
+                    like_cursor = self._conn.execute(like_sql, like_params)
+                    matches = [dict(row) for row in like_cursor.fetchall()]
+        else:
            with self._lock:
-                like_cursor = self._conn.execute(like_sql, like_params)
-                matches = [dict(row) for row in like_cursor.fetchall()]
+                try:
+                    cursor = self._conn.execute(sql, params)
+                except sqlite3.OperationalError:
+                    # FTS5 query syntax error despite sanitization — return empty
+                    return []
+                else:
+                    matches = [dict(row) for row in cursor.fetchall()]

        # Add surrounding context (1 message before + after each match).
        # Done outside the lock so we don't hold it across N sequential queries.
@@ -1481,16 +1764,32 @@ class SessionDB:
        limit: int = 20,
        offset: int = 0,
    ) -> List[Dict[str, Any]]:
-        """List sessions, optionally filtered by source."""
+        """List sessions, optionally filtered by source.
+
+        Returns rows enriched with a computed ``last_active`` column (latest
+        message timestamp for the session, falling back to ``started_at``),
+        ordered by most-recently-used first.
+        """
+        select_with_last_active = (
+            "SELECT s.*, COALESCE(m.last_active, s.started_at) AS last_active "
+            "FROM sessions s "
+            "LEFT JOIN ("
+            "SELECT session_id, MAX(timestamp) AS last_active "
+            "FROM messages GROUP BY session_id"
+            ") m ON m.session_id = s.id "
+        )
        with self._lock:
            if source:
                cursor = self._conn.execute(
-                    "SELECT * FROM sessions WHERE source = ? ORDER BY started_at DESC LIMIT ? OFFSET ?",
+                    f"{select_with_last_active}"
+                    "WHERE s.source = ? "
+                    "ORDER BY last_active DESC, s.started_at DESC, s.id DESC LIMIT ? OFFSET ?",
                    (source, limit, offset),
                )
            else:
                cursor = self._conn.execute(
-                    "SELECT * FROM sessions ORDER BY started_at DESC LIMIT ? OFFSET ?",
+                    f"{select_with_last_active}"
+                    "ORDER BY last_active DESC, s.started_at DESC, s.id DESC LIMIT ? OFFSET ?",
                    (limit, offset),
                )
            return [dict(row) for row in cursor.fetchall()]
@@ -138,12 +138,18 @@ def _run_async(coro):

 discover_builtin_tools()

-# MCP tool discovery (external MCP servers from config)
-try:
-    from tools.mcp_tool import discover_mcp_tools
-    discover_mcp_tools()
-except Exception as e:
-    logger.debug("MCP tool discovery failed: %s", e)
+# MCP tool discovery (external MCP servers from config) used to run here as
+# a module-level side effect.  It was removed because discover_mcp_tools()
+# internally uses a blocking future.result(timeout=120) wait, and the
+# gateway lazy-imports this module from inside the asyncio event loop on
+# the first user message — freezing Discord/Telegram heartbeats for up to
+# 120s whenever any configured MCP server was slow or unreachable (#16856).
+#
+# Each entry point now runs discovery explicitly at its own startup:
+#   - gateway/run.py            -> start_gateway() uses run_in_executor
+#   - cli.py, hermes_cli/*      -> inline on startup (no event loop)
+#   - tui_gateway/server.py     -> inline on startup (no event loop)
+#   - acp_adapter/server.py     -> asyncio.to_thread on session init

 # Plugin tool discovery (user/project/pip plugins)
 try:
@@ -200,6 +206,27 @@ _LEGACY_TOOLSET_MAP = {
 # get_tool_definitions  (the main schema provider)
 # =============================================================================

+# Module-level memoization for get_tool_definitions(). Keyed on
+# (frozenset(enabled_toolsets), frozenset(disabled_toolsets), registry._generation).
+# Hot callers (gateway runner, AIAgent.__init__) invoke this on every turn
+# with quiet_mode=True; caching avoids ~7 ms of registry walking + schema
+# filtering + check_fn probing per call. Only active when quiet_mode=True
+# because quiet_mode=False has stdout side effects (tool-selection prints).
+#
+# Invalidation happens transparently via the registry's _generation counter,
+# which bumps on register() / deregister() / register_toolset_alias(). The
+# inner check_fn TTL cache in registry.py handles environment drift (Docker
+# daemon start/stop, env var changes, etc.) on a 30 s horizon.
+_tool_defs_cache: Dict[tuple, List[Dict[str, Any]]] = {}
+
+
+def _clear_tool_defs_cache() -> None:
+    """Drop memoized get_tool_definitions() results. Called when dynamic
+    schema dependencies change (e.g. discord capability cache reset,
+    execute_code sandbox reconfigured)."""
+    _tool_defs_cache.clear()
+
+
 def get_tool_definitions(
    enabled_toolsets: List[str] = None,
    disabled_toolsets: List[str] = None,
@@ -218,6 +245,50 @@ def get_tool_definitions(
    Returns:
        Filtered list of OpenAI-format tool definitions.
    """
+    # Fast path: memoized result when the caller doesn't need stdout prints.
+    # The cache key captures every argument-level input; the registry
+    # generation captures registry mutations (MCP refresh, plugin load).
+    # check_fn results are TTL-cached one level down, inside
+    # registry.get_definitions. The config-mtime fingerprint below captures
+    # user-visible config edits that affect dynamic schemas (execute_code
+    # mode, discord action allowlist, etc.) without needing an explicit
+    # invalidate hook on every config-writer.
+    if quiet_mode:
+        try:
+            from hermes_cli.config import get_config_path
+            cfg_path = get_config_path()
+            cfg_stat = cfg_path.stat()
+            cfg_fp = (cfg_stat.st_mtime_ns, cfg_stat.st_size)
+        except (FileNotFoundError, OSError, ImportError):
+            cfg_fp = None
+        cache_key = (
+            frozenset(enabled_toolsets) if enabled_toolsets is not None else None,
+            frozenset(disabled_toolsets) if disabled_toolsets else None,
+            registry._generation,
+            cfg_fp,
+        )
+        cached = _tool_defs_cache.get(cache_key)
+        if cached is not None:
+            # Update _last_resolved_tool_names so downstream callers see
+            # consistent state even on a cache hit.
+            global _last_resolved_tool_names
+            _last_resolved_tool_names = [t["function"]["name"] for t in cached]
+            # Return a shallow copy of the list but share the dict references —
+            # schemas are treated as read-only by all known callers.
+            return list(cached)
+
+    result = _compute_tool_definitions(enabled_toolsets, disabled_toolsets, quiet_mode)
+    if quiet_mode:
+        _tool_defs_cache[cache_key] = result
+    return result
+
+
+def _compute_tool_definitions(
+    enabled_toolsets: List[str] = None,
+    disabled_toolsets: List[str] = None,
+    quiet_mode: bool = False,
+) -> List[Dict[str, Any]]:
+    """Uncached implementation of :func:`get_tool_definitions`."""
    # Determine which tool names the caller wants
    tools_to_include: set = set()

@@ -409,24 +480,27 @@ def coerce_tool_args(tool_name: str, args: Dict[str, Any]) -> Dict[str, Any]:
        if not prop_schema:
            continue
        expected = prop_schema.get("type")
-        if not expected:
+        if not expected and not _schema_allows_null(prop_schema):
            continue
-        coerced = _coerce_value(value, expected)
+        coerced = _coerce_value(value, expected, schema=prop_schema)
        if coerced is not value:
            args[key] = coerced

    return args


-def _coerce_value(value: str, expected_type):
+def _coerce_value(value: str, expected_type, schema: dict | None = None):
    """Attempt to coerce a string *value* to *expected_type*.

    Returns the original string when coercion is not applicable or fails.
    """
+    if _schema_allows_null(schema) and value.strip().lower() == "null":
+        return None
+
    if isinstance(expected_type, list):
        # Union type — try each in order, return first successful coercion
        for t in expected_type:
-            result = _coerce_value(value, t)
+            result = _coerce_value(value, t, schema=schema)
            if result is not value:
                return result
        return value
@@ -439,9 +513,35 @@ def _coerce_value(value: str, expected_type):
        return _coerce_json(value, list)
    if expected_type == "object":
        return _coerce_json(value, dict)
+    if expected_type == "null" and value.strip().lower() == "null":
+        return None
    return value


+def _schema_allows_null(schema: dict | None) -> bool:
+    """Return True when a JSON Schema fragment explicitly permits null."""
+    if not isinstance(schema, dict):
+        return False
+
+    schema_type = schema.get("type")
+    if schema_type == "null":
+        return True
+    if isinstance(schema_type, list) and "null" in schema_type:
+        return True
+    if schema.get("nullable") is True:
+        return True
+
+    for union_key in ("anyOf", "oneOf"):
+        variants = schema.get(union_key)
+        if not isinstance(variants, list):
+            continue
+        for variant in variants:
+            if isinstance(variant, dict) and variant.get("type") == "null":
+                return True
+
+    return False
+
+
 def _coerce_json(value: str, expected_python_type: type):
    """Parse *value* as JSON when the schema expects an array or object.

@@ -7,9 +7,7 @@
  perSystem = { pkgs, system, lib, ... }:
    let
      hermes-agent = inputs.self.packages.${system}.default;
-      hermesVenv = pkgs.callPackage ./python.nix {
-        inherit (inputs) uv2nix pyproject-nix pyproject-build-systems;
-      };
+      hermesVenv = hermes-agent.hermesVenv;

      configMergeScript = pkgs.callPackage ./configMergeScript.nix { };

@@ -193,6 +191,35 @@ json.dump(sorted(leaf_paths(DEFAULT_CONFIG)), sys.stdout, indent=2)
          echo "ok" > $out/result
        '';

+        # Verify extraPythonPackages PYTHONPATH injection
+        extra-python-packages = let
+          testPkg = pkgs.python312Packages.pyfiglet;
+          hermesWithExtra = hermes-agent.override {
+            extraPythonPackages = [ testPkg ];
+          };
+        in pkgs.runCommand "hermes-extra-python-packages" { } ''
+          set -e
+          echo "=== Checking extraPythonPackages PYTHONPATH injection ==="
+
+          grep -q "PYTHONPATH" ${hermesWithExtra}/bin/hermes || \
+            (echo "FAIL: PYTHONPATH not in wrapper"; exit 1)
+          echo "PASS: PYTHONPATH present in wrapper"
+
+          grep -q "${testPkg}" ${hermesWithExtra}/bin/hermes || \
+            (echo "FAIL: test package path not in PYTHONPATH"; exit 1)
+          echo "PASS: test package path found in wrapper"
+
+          echo "=== Checking base package has no PYTHONPATH ==="
+          if grep -q "PYTHONPATH" ${hermes-agent}/bin/hermes; then
+            echo "FAIL: base package should not have PYTHONPATH"; exit 1
+          fi
+          echo "PASS: base package clean"
+
+          echo "=== All extraPythonPackages checks passed ==="
+          mkdir -p $out
+          echo "ok" > $out/result
+        '';
+
        # ── Config merge + round-trip test ────────────────────────────────
        # Tests the merge script (Nix activation behavior) across 7
        # scenarios, then verifies Python's load_config() reads correctly.
@@ -0,0 +1,186 @@
+# nix/hermes-agent.nix — Overridable Hermes Agent package
+#
+# callPackage auto-wires nixpkgs args; flake inputs are passed explicitly.
+# Users override via: pkgs.hermes-agent.override { extraPythonPackages = [...]; }
+{
+  lib,
+  stdenv,
+  makeWrapper,
+  callPackage,
+  python312,
+  nodejs_22,
+  ripgrep,
+  git,
+  openssh,
+  ffmpeg,
+  tirith,
+  # Flake inputs — passed explicitly by packages.nix and overlays.nix
+  uv2nix,
+  pyproject-nix,
+  pyproject-build-systems,
+  npm-lockfile-fix,
+  # Overridable parameters
+  extraPythonPackages ? [ ],
+}:
+let
+  hermesVenv = callPackage ./python.nix {
+    inherit uv2nix pyproject-nix pyproject-build-systems;
+  };
+
+  hermesNpmLib = callPackage ./lib.nix {
+    inherit npm-lockfile-fix;
+  };
+
+  hermesTui = callPackage ./tui.nix {
+    inherit hermesNpmLib;
+  };
+
+  hermesWeb = callPackage ./web.nix {
+    inherit hermesNpmLib;
+  };
+
+  bundledSkills = lib.cleanSourceWith {
+    src = ../skills;
+    filter = path: _type: !(lib.hasInfix "/index-cache/" path);
+  };
+
+  runtimeDeps = [
+    nodejs_22
+    ripgrep
+    git
+    openssh
+    ffmpeg
+    tirith
+  ];
+
+  runtimePath = lib.makeBinPath runtimeDeps;
+
+  sitePackagesPath = python312.sitePackages;
+
+  # Walk propagatedBuildInputs to include transitive Python deps in PYTHONPATH.
+  # Without this, a plugin listing e.g. requests as a dep would fail at runtime
+  # if requests isn't already in the sealed uv2nix venv.
+  allExtraPythonPackages = python312.pkgs.requiredPythonModules extraPythonPackages;
+
+  pythonPath = lib.makeSearchPath sitePackagesPath allExtraPythonPackages;
+
+  pyprojectHash = builtins.hashString "sha256" (builtins.readFile ../pyproject.toml);
+  uvLockHash =
+    if builtins.pathExists ../uv.lock then
+      builtins.hashString "sha256" (builtins.readFile ../uv.lock)
+    else
+      "none";
+in
+stdenv.mkDerivation {
+  pname = "hermes-agent";
+  version = (builtins.fromTOML (builtins.readFile ../pyproject.toml)).project.version;
+
+  dontUnpack = true;
+  dontBuild = true;
+  nativeBuildInputs = [ makeWrapper ];
+
+  installPhase = ''
+    runHook preInstall
+
+    mkdir -p $out/share/hermes-agent $out/bin
+    cp -r ${bundledSkills} $out/share/hermes-agent/skills
+    cp -r ${hermesWeb} $out/share/hermes-agent/web_dist
+
+    mkdir -p $out/ui-tui
+    cp -r ${hermesTui}/lib/hermes-tui/* $out/ui-tui/
+
+    ${lib.concatMapStringsSep "\n"
+      (name: ''
+        makeWrapper ${hermesVenv}/bin/${name} $out/bin/${name} \
+          --suffix PATH : "${runtimePath}" \
+          --set HERMES_BUNDLED_SKILLS $out/share/hermes-agent/skills \
+          --set HERMES_WEB_DIST $out/share/hermes-agent/web_dist \
+          --set HERMES_TUI_DIR $out/ui-tui \
+          --set HERMES_PYTHON ${hermesVenv}/bin/python3 \
+          --set HERMES_NODE ${nodejs_22}/bin/node \
+          ${lib.optionalString (extraPythonPackages != [ ]) ''--suffix PYTHONPATH : "${pythonPath}"''}
+      '')
+      [
+        "hermes"
+        "hermes-agent"
+        "hermes-acp"
+      ]
+    }
+
+    ${lib.optionalString (extraPythonPackages != [ ]) ''
+      echo "=== Checking for plugin/core package collisions ==="
+      ${hermesVenv}/bin/python3 -c "
+import pathlib, sys, re
+
+def canonical(name):
+    return re.sub(r'[-_.]+', '-', name).lower()
+
+# Collect core venv package names
+core = set()
+venv_sp = pathlib.Path('${hermesVenv}/${sitePackagesPath}')
+for di in venv_sp.glob('*.dist-info'):
+    meta = di / 'METADATA'
+    if meta.exists():
+        for line in meta.read_text().splitlines():
+            if line.startswith('Name:'):
+                core.add(canonical(line.split(':', 1)[1].strip()))
+                break
+
+# Check each extra package for collisions
+extras_dirs = [${lib.concatMapStringsSep ", " (p: "'${toString p}'") allExtraPythonPackages}]
+for edir in extras_dirs:
+    sp = pathlib.Path(edir) / '${sitePackagesPath}'
+    if not sp.exists():
+        continue
+    for di in sp.glob('*.dist-info'):
+        meta = di / 'METADATA'
+        if not meta.exists():
+            continue
+        for line in meta.read_text().splitlines():
+            if line.startswith('Name:'):
+                pkg = canonical(line.split(':', 1)[1].strip())
+                if pkg in core:
+                    print(f'ERROR: plugin package \"{pkg}\" collides with a package in hermes sealed venv', file=sys.stderr)
+                    print(f'  from: {di}', file=sys.stderr)
+                    print(f'  Remove this dependency from extraPythonPackages.', file=sys.stderr)
+                    sys.exit(1)
+                break
+
+print('No collisions found.')
+      "
+      echo "=== No collisions ==="
+    ''}
+
+    runHook postInstall
+  '';
+
+  passthru = {
+    inherit hermesTui hermesWeb hermesNpmLib hermesVenv;
+
+    devShellHook = ''
+      STAMP=".nix-stamps/hermes-agent"
+      STAMP_VALUE="${pyprojectHash}:${uvLockHash}"
+      if [ ! -f "$STAMP" ] || [ "$(cat "$STAMP")" != "$STAMP_VALUE" ]; then
+        echo "hermes-agent: installing Python dependencies..."
+        uv venv .venv --python ${python312}/bin/python3 2>/dev/null || true
+        source .venv/bin/activate
+        uv pip install -e ".[all]"
+        [ -d mini-swe-agent ] && uv pip install -e ./mini-swe-agent 2>/dev/null || true
+        [ -d tinker-atropos ] && uv pip install -e ./tinker-atropos 2>/dev/null || true
+        mkdir -p .nix-stamps
+        echo "$STAMP_VALUE" > "$STAMP"
+      else
+        source .venv/bin/activate
+        export HERMES_PYTHON=${hermesVenv}/bin/python3
+      fi
+    '';
+  };
+
+  meta = with lib; {
+    description = "AI agent with advanced tool-calling capabilities";
+    homepage = "https://github.com/NousResearch/hermes-agent";
+    mainProgram = "hermes";
+    license = licenses.mit;
+    platforms = platforms.unix;
+  };
+}
@@ -165,6 +165,17 @@

        NEW_HASH=$(echo "$OUTPUT" | awk '/got:/ {print $2; exit}')
        if [ -z "$NEW_HASH" ]; then
+          # Magic-Nix-Cache occasionally returns HTTP 418 / cache-throttled
+          # mid-run; nix then prints "outputs … not valid, so checking is
+          # not possible" without a `got:` line.  That's an infrastructure
+          # blip, not a stale lockfile — warn + skip rather than failing
+          # the lint.  A real hash mismatch would still surface in the
+          # primary `.#$ATTR` build, which is a separate CI job.
+          if echo "$OUTPUT" | grep -qE "throttled|HTTP error 418|substituter .* is disabled|some outputs of .* are not valid"; then
+            echo "    skipped (transient cache failure — see primary nix build for real status)" >&2
+            echo "$OUTPUT" | tail -8 >&2
+            continue
+          fi
          echo "    build failed with no hash mismatch:" >&2
          echo "$OUTPUT" | tail -40 >&2
          exit 1
@@ -187,7 +198,10 @@

        if [ "$MODE" = "--apply" ]; then
          sed -i "s|hash = \"sha256-[^\"]*\";|hash = \"$NEW_HASH\";|" "$NIX_FILE"
-          nix build ".#$ATTR.npmDeps" --no-link --print-build-logs
+          if ! nix build ".#$ATTR.npmDeps" --no-link --print-build-logs; then
+            echo "    verification build failed after hash update" >&2
+            exit 1
+          fi
          FIXED=1
          echo "    fixed"
        fi
@@ -28,6 +28,8 @@

  let
    cfg = config.services.hermes-agent;
+    effectivePackage = if cfg.extraPythonPackages == [ ] then cfg.package
+      else cfg.package.override { inherit (cfg) extraPythonPackages; };
    hermes-agent = inputs.self.packages.${pkgs.stdenv.hostPlatform.system}.default;

    # Deep-merge config type (from 0xrsydn/nix-hermes-agent)
@@ -453,7 +455,61 @@
      extraPackages = mkOption {
        type = types.listOf types.package;
        default = [ ];
-        description = "Extra packages available on PATH.";
+        description = ''
+          Extra packages available to the agent — terminal commands, skills,
+          cron jobs, and the service process all see them.
+
+          Implemented via the hermes user's per-user profile
+          (`/etc/profiles/per-user/${cfg.user}/bin`), which NixOS includes
+          in PATH for login shells.  The packages are also added to the
+          systemd service PATH for direct process access.
+        '';
+      };
+
+      extraPlugins = mkOption {
+        type = types.listOf types.package;
+        default = [ ];
+        description = ''
+          Directory-based plugin packages to symlink into the hermes plugins
+          directory. Each package should contain a plugin.yaml and __init__.py
+          at its root. Hermes discovers these automatically on startup.
+        '';
+        example = literalExpression ''
+          [
+            (pkgs.fetchFromGitHub {
+              owner = "stephenschoettler";
+              repo = "hermes-lcm";
+              name = "hermes-lcm";
+              rev = "v0.7.0";
+              hash = "sha256-...";
+            })
+          ]
+        '';
+      };
+
+      extraPythonPackages = mkOption {
+        type = types.listOf types.package;
+        default = [ ];
+        description = ''
+          Python packages to add to PYTHONPATH for entry-point plugin discovery.
+          These are pip-packaged plugins that register via the
+          hermes_agent.plugins entry-point group. Each package must be built
+          with the same Python interpreter as hermes (python312).
+        '';
+        example = literalExpression ''
+          [
+            (pkgs.python312Packages.buildPythonPackage {
+              pname = "rtk-hermes";
+              version = "1.0.0";
+              src = pkgs.fetchFromGitHub {
+                owner = "ogallotti";
+                repo = "rtk-hermes";
+                rev = "main";
+                hash = "sha256-...";
+              };
+            })
+          ]
+        '';
      };

      restart = mkOption {
@@ -570,7 +626,7 @@
      # so interactive shells share state (sessions, skills, cron) with the
      # gateway service instead of creating a separate ~/.hermes/.
      (lib.mkIf cfg.addToSystemPackages {
-        environment.systemPackages = [ cfg.package ];
+        environment.systemPackages = [ effectivePackage ];
        environment.variables.HERMES_HOME = "${cfg.stateDir}/.hermes";
      })

@@ -581,7 +637,28 @@
        });
      })

+      # ── Assertions ─────────────────────────────────────────────────────
+      {
+        assertions = let
+          names = map lib.getName cfg.extraPlugins;
+        in [{
+          assertion = (lib.length names) == (lib.length (lib.unique names));
+          message = "services.hermes-agent.extraPlugins: duplicate plugin names detected: ${toString names}. If using fetchFromGitHub, set name = \"plugin-name\" to disambiguate.";
+        }];
+      }
+
      # ── Warnings ──────────────────────────────────────────────────────
+      # ── Per-user profile for extraPackages ───────────────────────────
+      # Wire extraPackages into the hermes user's per-user profile so the
+      # login-shell snapshot (which rebuilds PATH from NixOS profiles) sees
+      # them.  The systemd service PATH also includes them for direct access.
+      (lib.mkIf (cfg.extraPackages != []) {
+        # listOf options are merged by the NixOS module system — this appends to
+        # any packages the operator assigned to this user externally (e.g. when
+        # createUser = false and the user definition lives elsewhere in the config).
+        users.users.${cfg.user}.packages = cfg.extraPackages;
+      })
+
      (lib.mkIf (cfg.container.enable && !cfg.addToSystemPackages && cfg.container.hostUsers != []) {
        warnings = [
          ''
@@ -602,6 +679,7 @@
          "d ${cfg.stateDir}/.hermes/sessions 2770 ${cfg.user} ${cfg.group} - -"
          "d ${cfg.stateDir}/.hermes/logs   2770 ${cfg.user} ${cfg.group} - -"
          "d ${cfg.stateDir}/.hermes/memories 2770 ${cfg.user} ${cfg.group} - -"
+          "d ${cfg.stateDir}/.hermes/plugins 2770 ${cfg.user} ${cfg.group} - -"
          "d ${cfg.stateDir}/home           0750 ${cfg.user} ${cfg.group} - -"
          "d ${cfg.workingDirectory}         2770 ${cfg.user} ${cfg.group} - -"
        ];
@@ -623,7 +701,7 @@
          find ${cfg.stateDir}/.hermes -maxdepth 1 \
            \( -name "*.db" -o -name "*.db-wal" -o -name "*.db-shm" -o -name "SOUL.md" \) \
            -exec chmod g+rw {} + 2>/dev/null || true
-          for _subdir in cron sessions logs memories; do
+          for _subdir in cron sessions logs memories plugins; do
            mkdir -p "${cfg.stateDir}/.hermes/$_subdir"
            chown ${cfg.user}:${cfg.group} "${cfg.stateDir}/.hermes/$_subdir"
            chmod 2770 "${cfg.stateDir}/.hermes/$_subdir"
@@ -732,6 +810,22 @@ HERMES_NIX_ENV_EOF
          ${lib.concatStringsSep "\n" (lib.mapAttrsToList (name: _value: ''
            install -o ${cfg.user} -g ${cfg.group} -m 0640 ${documentDerivation}/${name} ${cfg.workingDirectory}/${name}
          '') cfg.documents)}
+
+        # ── Declarative plugins ─────────────────────────────────────────
+        # Remove stale managed symlinks (plugins removed from config)
+        find ${cfg.stateDir}/.hermes/plugins -maxdepth 1 -type l -name 'nix-managed-*' -delete 2>/dev/null || true
+
+        ${lib.concatStringsSep "\n" (map (plugin:
+          let
+            name = lib.getName plugin;
+          in ''
+            if [ ! -f "${plugin}/plugin.yaml" ]; then
+              echo "ERROR: extraPlugins entry '${plugin}' has no plugin.yaml" >&2
+              exit 1
+            fi
+            ln -sfn ${plugin} ${cfg.stateDir}/.hermes/plugins/nix-managed-${name}
+            chown -h ${cfg.user}:${cfg.group} ${cfg.stateDir}/.hermes/plugins/nix-managed-${name}
+          '') cfg.extraPlugins)}
        '';
      }

@@ -762,7 +856,7 @@ HERMES_NIX_ENV_EOF
            # reads them at Python startup — no systemd EnvironmentFile needed.

            ExecStart = lib.concatStringsSep " " ([
-              "${cfg.package}/bin/hermes"
+              "${effectivePackage}/bin/hermes"
              "gateway"
            ] ++ cfg.extraArgs);

@@ -785,7 +879,7 @@ HERMES_NIX_ENV_EOF
          };

          path = [
-            cfg.package
+            effectivePackage
            pkgs.bash
            pkgs.coreutils
            pkgs.git
@@ -810,11 +904,11 @@ HERMES_NIX_ENV_EOF

          preStart = ''
            # Stable symlinks — container references these, not store paths directly
-            ln -sfn ${cfg.package} ${cfg.stateDir}/current-package
+            ln -sfn ${effectivePackage} ${cfg.stateDir}/current-package
            ln -sfn ${containerEntrypoint} ${cfg.stateDir}/current-entrypoint

            # GC roots so nix-collect-garbage doesn't remove store paths in use
-            ${pkgs.nix}/bin/nix-store --add-root ${cfg.stateDir}/.gc-root --indirect -r ${cfg.package} 2>/dev/null || true
+            ${pkgs.nix}/bin/nix-store --add-root ${cfg.stateDir}/.gc-root --indirect -r ${effectivePackage} 2>/dev/null || true
            ${pkgs.nix}/bin/nix-store --add-root ${cfg.stateDir}/.gc-root-entrypoint --indirect -r ${containerEntrypoint} 2>/dev/null || true

            # Check if container needs (re)creation
@@ -0,0 +1,10 @@
+# nix/overlays.nix — Expose pkgs.hermes-agent for external NixOS configs
+{ inputs, ... }:
+{
+  flake.overlays.default = final: _: {
+    hermes-agent = final.callPackage ./hermes-agent.nix {
+      inherit (inputs) uv2nix pyproject-nix pyproject-build-systems;
+      npm-lockfile-fix = inputs.npm-lockfile-fix.packages.${final.stdenv.hostPlatform.system}.default;
+    };
+  };
+}
@@ -4,120 +4,19 @@
  perSystem =
    { pkgs, inputs', ... }:
    let
-      hermesVenv = pkgs.callPackage ./python.nix {
+      hermesAgent = pkgs.callPackage ./hermes-agent.nix {
        inherit (inputs) uv2nix pyproject-nix pyproject-build-systems;
-      };
-
-      hermesNpmLib = pkgs.callPackage ./lib.nix {
        npm-lockfile-fix = inputs'.npm-lockfile-fix.packages.default;
      };
-
-      hermesTui = pkgs.callPackage ./tui.nix {
-        inherit hermesNpmLib;
-      };
-
-      # Import bundled skills, excluding runtime caches
-      bundledSkills = pkgs.lib.cleanSourceWith {
-        src = ../skills;
-        filter = path: _type: !(pkgs.lib.hasInfix "/index-cache/" path);
-      };
-
-      hermesWeb = pkgs.callPackage ./web.nix {
-        inherit hermesNpmLib;
-      };
-
-      runtimeDeps = with pkgs; [
-        nodejs_22
-        ripgrep
-        git
-        openssh
-        ffmpeg
-        tirith
-      ];
-
-      runtimePath = pkgs.lib.makeBinPath runtimeDeps;
-
-      # Lockfile hashes for dev shell stamps
-      pyprojectHash = builtins.hashString "sha256" (builtins.readFile ../pyproject.toml);
-      uvLockHash =
-        if builtins.pathExists ../uv.lock then
-          builtins.hashString "sha256" (builtins.readFile ../uv.lock)
-        else
-          "none";
    in
    {
      packages = {
-        default = pkgs.stdenv.mkDerivation {
-          pname = "hermes-agent";
-          version = (fromTOML (builtins.readFile ../pyproject.toml)).project.version;
+        default = hermesAgent;
+        tui = hermesAgent.hermesTui;
+        web = hermesAgent.hermesWeb;

-          dontUnpack = true;
-          dontBuild = true;
-          nativeBuildInputs = [ pkgs.makeWrapper ];
-
-          installPhase = ''
-            runHook preInstall
-
-            mkdir -p $out/share/hermes-agent $out/bin
-            cp -r ${bundledSkills} $out/share/hermes-agent/skills
-            cp -r ${hermesWeb} $out/share/hermes-agent/web_dist
-
-            # copy pre-built TUI (same layout as dev: ui-tui/dist/ + node_modules/)
-            mkdir -p $out/ui-tui
-            cp -r ${hermesTui}/lib/hermes-tui/* $out/ui-tui/
-
-            ${pkgs.lib.concatMapStringsSep "\n"
-              (name: ''
-                makeWrapper ${hermesVenv}/bin/${name} $out/bin/${name} \
-                  --suffix PATH : "${runtimePath}" \
-                  --set HERMES_BUNDLED_SKILLS $out/share/hermes-agent/skills \
-                  --set HERMES_WEB_DIST $out/share/hermes-agent/web_dist \
-                  --set HERMES_TUI_DIR $out/ui-tui \
-                  --set HERMES_PYTHON ${hermesVenv}/bin/python3 \
-                  --set HERMES_NODE ${pkgs.nodejs_22}/bin/node
-              '')
-              [
-                "hermes"
-                "hermes-agent"
-                "hermes-acp"
-              ]
-            }
-
-            runHook postInstall
-          '';
-
-          passthru.devShellHook = ''
-            STAMP=".nix-stamps/hermes-agent"
-            STAMP_VALUE="${pyprojectHash}:${uvLockHash}"
-            if [ ! -f "$STAMP" ] || [ "$(cat "$STAMP")" != "$STAMP_VALUE" ]; then
-              echo "hermes-agent: installing Python dependencies..."
-              uv venv .venv --python ${pkgs.python312}/bin/python3 2>/dev/null || true
-              source .venv/bin/activate
-              uv pip install -e ".[all]"
-              [ -d mini-swe-agent ] && uv pip install -e ./mini-swe-agent 2>/dev/null || true
-              [ -d tinker-atropos ] && uv pip install -e ./tinker-atropos 2>/dev/null || true
-              mkdir -p .nix-stamps
-              echo "$STAMP_VALUE" > "$STAMP"
-            else
-              source .venv/bin/activate
-              export HERMES_PYTHON=${hermesVenv}/bin/python3
-            fi
-          '';
-
-          meta = with pkgs.lib; {
-            description = "AI agent with advanced tool-calling capabilities";
-            homepage = "https://github.com/NousResearch/hermes-agent";
-            mainProgram = "hermes";
-            license = licenses.mit;
-            platforms = platforms.unix;
-          };
-        };
-
-        tui = hermesTui;
-        web = hermesWeb;
-
-        fix-lockfiles = hermesNpmLib.mkFixLockfiles {
-          packages = [ hermesTui hermesWeb ];
+        fix-lockfiles = hermesAgent.hermesNpmLib.mkFixLockfiles {
+          packages = [ hermesAgent.hermesTui hermesAgent.hermesWeb ];
        };
      };
    };
@@ -7,6 +7,7 @@
  pyproject-nix,
  pyproject-build-systems,
  stdenv,
+  dependency-groups ? [ "all" ],
 }:
 let
  workspace = uv2nix.lib.workspace.loadWorkspace { workspaceRoot = ./..; };
@@ -96,5 +97,5 @@ let
      ]);
 in
 pythonSet.mkVirtualEnv "hermes-agent-env" {
-  hermes-agent = [ "all" ];
+  hermes-agent = dependency-groups;
 }
@@ -17,6 +17,7 @@ pkgs.buildNpmPackage (npm // {
  inherit src npmDeps version;

  doCheck = false;
+  npmFlags = [ "--legacy-peer-deps" ];

  installPhase = ''
    runHook preInstall
@@ -4,7 +4,7 @@ let
  src = ../web;
  npmDeps = pkgs.fetchNpmDeps {
    inherit src;
-    hash = "sha256-4Z8KQ69QhO83X6zff+5urWBv6MME686MhTTMdwSl65o=";
+    hash = "sha256-+B2+Fe4djPzHHcUXRx+m0cuyaopAhW0PcHsMgYfV5VE=";
  };

  npm = hermesNpmLib.mkNpmPassthru { folder = "web"; attr = "web"; pname = "hermes-web"; };
@@ -224,6 +224,24 @@ MIGRATION_PRESETS: Dict[str, set[str]] = {
 }


+# ───────────────────────────────────────────────────────────────────────
+# Item shape constants — kept stable for downstream consumers of report.json.
+# Inspired by OpenClaw's src/plugin-sdk/migration.ts so both sides speak the
+# same vocabulary.  Values intentionally match the strings already produced
+# by this script (migrated/archived/skipped/conflict/error) so the addition
+# is backward-compatible.
+# ───────────────────────────────────────────────────────────────────────
+STATUS_MIGRATED = "migrated"
+STATUS_ARCHIVED = "archived"
+STATUS_SKIPPED = "skipped"
+STATUS_CONFLICT = "conflict"
+STATUS_ERROR = "error"
+STATUS_PLANNED = "planned"
+
+REASON_TARGET_EXISTS = "Target exists and overwrite is disabled"
+REASON_BLOCKED_BY_APPLY_CONFLICT = "blocked by earlier apply conflict"
+
+
@dataclass
 class ItemResult:
    kind: str
@@ -232,6 +250,7 @@ class ItemResult:
    status: str
    reason: str = ""
    details: Dict[str, Any] = field(default_factory=dict)
+    sensitive: bool = False


 def parse_selection_values(values: Optional[Sequence[str]]) -> List[str]:
@@ -547,32 +566,128 @@ def relative_label(path: Path, root: Path) -> str:
        return str(path)


+# ───────────────────────────────────────────────────────────────────────
+# Secret redaction for migration reports.
+#
+# The report JSON persists to disk inside the migration output directory and
+# frequently ends up in bug reports or support channels.  Anything that looks
+# like a credential — by key name or by value shape — is replaced with
+# "[redacted]" before the report is written.
+#
+# Modelled on OpenClaw's src/plugin-sdk/migration.ts so both migration tools
+# redact consistently.  Pure function — safe to call on any plain-data dict.
+# ───────────────────────────────────────────────────────────────────────
+REDACTED_MIGRATION_VALUE = "[redacted]"
+
+_SECRET_KEY_MARKERS = (
+    "accesstoken",
+    "apikey",
+    "authorization",
+    "bearertoken",
+    "clientsecret",
+    "cookie",
+    "credential",
+    "password",
+    "privatekey",
+    "refreshtoken",
+    "secret",
+)
+
+_SECRET_VALUE_PATTERNS = (
+    re.compile(r"\bBearer\s+[A-Za-z0-9._~+/=\-]+"),
+    re.compile(r"\bsk-[A-Za-z0-9_\-]{8,}\b"),
+    re.compile(r"\bgh[pousr]_[A-Za-z0-9_]{16,}\b"),
+    re.compile(r"\bxox[abprs]-[A-Za-z0-9\-]{8,}\b"),
+    re.compile(r"\bAIza[0-9A-Za-z_\-]{12,}\b"),
+)
+
+
+def _normalize_secret_key(key: str) -> str:
+    return re.sub(r"[^a-z0-9]", "", key.lower())
+
+
+def _is_secret_key(key: str) -> bool:
+    normalized = _normalize_secret_key(key)
+    if normalized == "token" or normalized.endswith("token"):
+        return True
+    if normalized in ("auth", "authorization"):
+        return True
+    return any(marker in normalized for marker in _SECRET_KEY_MARKERS)
+
+
+def _redact_string(value: str) -> str:
+    for pattern in _SECRET_VALUE_PATTERNS:
+        value = pattern.sub(REDACTED_MIGRATION_VALUE, value)
+    return value
+
+
+def redact_migration_value(value: Any) -> Any:
+    """Return a deep copy of ``value`` with secret-looking content replaced.
+
+    Applied to every report written to disk.  Keys whose normalized form
+    matches a credential marker get their value replaced wholesale.  Strings
+    anywhere in the tree are scanned for common token patterns (sk-..., ghp_...,
+    xox*-, AIza*, Bearer ...) and those substrings are replaced inline.
+    """
+    return _redact_internal(value, set())
+
+
+def _redact_internal(value: Any, seen: set) -> Any:
+    if isinstance(value, str):
+        return _redact_string(value)
+    if isinstance(value, (list, tuple)):
+        return [_redact_internal(entry, seen) for entry in value]
+    if isinstance(value, dict):
+        obj_id = id(value)
+        if obj_id in seen:
+            return REDACTED_MIGRATION_VALUE
+        seen.add(obj_id)
+        out: Dict[str, Any] = {}
+        for key, entry in value.items():
+            if isinstance(key, str) and _is_secret_key(key):
+                out[key] = REDACTED_MIGRATION_VALUE
+            else:
+                out[key] = _redact_internal(entry, seen)
+        return out
+    return value
+
+
 def write_report(output_dir: Path, report: Dict[str, Any]) -> None:
    output_dir.mkdir(parents=True, exist_ok=True)
+    # Always redact before persisting.  Callers who need the raw object
+    # (in-process) still get it back from build_report(); only the on-disk
+    # copy is redacted.
+    redacted = redact_migration_value(report)
    (output_dir / "report.json").write_text(
-        json.dumps(report, indent=2, ensure_ascii=False) + "\n",
+        json.dumps(redacted, indent=2, ensure_ascii=False) + "\n",
        encoding="utf-8",
    )

    grouped: Dict[str, List[Dict[str, Any]]] = {}
-    for item in report["items"]:
+    for item in redacted["items"]:
        grouped.setdefault(item["status"], []).append(item)

    lines = [
        "# OpenClaw -> Hermes Migration Report",
        "",
-        f"- Timestamp: {report['timestamp']}",
-        f"- Mode: {report['mode']}",
-        f"- Source: `{report['source_root']}`",
-        f"- Target: `{report['target_root']}`",
+        f"- Timestamp: {redacted['timestamp']}",
+        f"- Mode: {redacted['mode']}",
+        f"- Source: `{redacted['source_root']}`",
+        f"- Target: `{redacted['target_root']}`",
        "",
        "## Summary",
        "",
    ]

-    for key, value in report["summary"].items():
+    for key, value in redacted["summary"].items():
        lines.append(f"- {key}: {value}")

+    warnings = redacted.get("warnings") or []
+    if warnings:
+        lines.extend(["", "## Warnings", ""])
+        for warning in warnings:
+            lines.append(f"- {warning}")
+
    lines.extend(["", "## What Was Not Fully Brought Over", ""])
    skipped = grouped.get("skipped", []) + grouped.get("conflict", []) + grouped.get("error", [])
    if not skipped:
@@ -584,6 +699,12 @@ def write_report(output_dir: Path, report: Dict[str, Any]) -> None:
            reason = item["reason"] or item["status"]
            lines.append(f"- `{source}` -> `{dest}`: {reason}")

+    next_steps = redacted.get("next_steps") or []
+    if next_steps:
+        lines.extend(["", "## Next Steps", ""])
+        for step in next_steps:
+            lines.append(f"- {step}")
+
    (output_dir / "summary.md").write_text("\n".join(lines) + "\n", encoding="utf-8")


@@ -618,6 +739,31 @@ class Migrator:
        self.backup_dir = self.output_dir / "backups" if self.output_dir else None
        self.overflow_dir = self.output_dir / "overflow" if self.output_dir else None
        self.items: List[ItemResult] = []
+        # Once a config.yaml write hits conflict/error mid-run, later
+        # config.yaml writes are deliberately short-circuited to avoid
+        # leaving config in a partially-written state.  Modelled on
+        # OpenClaw's extensions/migrate-hermes/apply.ts "blocked by earlier
+        # apply conflict" sequencing.
+        self._config_apply_blocked: bool = False
+
+        # Resolve the configured workspace directory from openclaw.json.
+        # Many users (especially those who started before the OpenClaw rebrand)
+        # have a custom workspace path (e.g. ~/clawd/) that differs from the
+        # default ~/.openclaw/workspace/.  Reading agents.defaults.workspace
+        # lets source_candidate() find files in the actual workspace.
+        self._custom_workspace: Optional[Path] = None
+        oc_config = self.load_openclaw_config()
+        ws = (oc_config.get("agents", {}).get("defaults", {}).get("workspace") or "").strip()
+        if ws:
+            ws_path = Path(ws).expanduser().resolve()
+            # Only use it if it exists and is outside the source_root tree
+            # (otherwise the standard relative-path logic already covers it).
+            if ws_path.is_dir():
+                try:
+                    ws_path.relative_to(self.source_root)
+                except ValueError:
+                    # ws_path is outside source_root — use it as custom workspace
+                    self._custom_workspace = ws_path

        config = load_yaml_file(self.target_root / "config.yaml")
        mem_cfg = config.get("memory", {}) if isinstance(config.get("memory"), dict) else {}
@@ -635,6 +781,32 @@ class Migrator:
    def is_selected(self, option_id: str) -> bool:
        return option_id in self.selected_options

+    # Option ids that mutate the Hermes config.yaml file.  Once any one of
+    # them records a conflict/error on config.yaml, subsequent ones are
+    # short-circuited to avoid partial writes.  Keep in sync with methods
+    # that call load_yaml_file(target_root / "config.yaml") + dump_yaml_file.
+    _CONFIG_MUTATING_OPTIONS = frozenset({
+        "model-config",
+        "tts-config",
+        "mcp-servers",
+        "plugins-config",
+        "cron-jobs",
+        "hooks-config",
+        "agent-config",
+        "gateway-config",
+        "session-config",
+        "full-providers",
+        "deep-channels",
+        "browser-config",
+        "tools-config",
+        "approvals-config",
+        "memory-backend",
+        "skills-config",
+        "ui-identity",
+        "logging-config",
+        "command-allowlist",
+    })
+
    def record(
        self,
        kind: str,
@@ -644,6 +816,7 @@ class Migrator:
        reason: str = "",
        **details: Any,
    ) -> None:
+        sensitive = bool(details.pop("sensitive", False))
        self.items.append(
            ItemResult(
                kind=kind,
@@ -652,8 +825,16 @@ class Migrator:
                status=status,
                reason=reason,
                details=details,
+                sensitive=sensitive,
            )
        )
+        # Flip the config-block flag when a conflict/error occurs on a
+        # config.yaml write.  Later config-mutating options will skip rather
+        # than attempting a partial write.
+        if status in (STATUS_CONFLICT, STATUS_ERROR) and destination is not None:
+            dest_str = str(destination)
+            if dest_str.endswith("config.yaml") or dest_str.endswith("config.yml"):
+                self._config_apply_blocked = True

    def source_candidate(self, *relative_paths: str) -> Optional[Path]:
        for rel in relative_paths:
@@ -673,6 +854,23 @@ class Migrator:
                alt = self.source_root / "workspace-main" / suffix
                if alt.exists():
                    return alt
+
+        # Final fallback: check the configured workspace directory from
+        # agents.defaults.workspace in openclaw.json.  Users who started
+        # before the OpenClaw rebrand (when the project was named clawd /
+        # clawdbot) often have a custom workspace path outside ~/.openclaw/.
+        if self._custom_workspace:
+            for rel in relative_paths:
+                # Strip the leading "workspace/" or "workspace.default/"
+                # prefix to get the bare filename/subpath.
+                for prefix in ("workspace/", "workspace.default/"):
+                    if rel.startswith(prefix):
+                        suffix = rel[len(prefix):]
+                        alt = self._custom_workspace / suffix
+                        if alt.exists():
+                            return alt
+                        break
+
        return None

    def resolve_skill_destination(self, destination: Path) -> Path:
@@ -762,11 +960,30 @@ class Migrator:
        return self.build_report()

    def run_if_selected(self, option_id: str, func) -> None:
-        if self.is_selected(option_id):
-            func()
+        if not self.is_selected(option_id):
+            meta = MIGRATION_OPTION_METADATA[option_id]
+            self.record(option_id, None, None, "skipped", "Not selected for this run", option_label=meta["label"])
            return
-        meta = MIGRATION_OPTION_METADATA[option_id]
-        self.record(option_id, None, None, "skipped", "Not selected for this run", option_label=meta["label"])
+        # If a previous config.yaml write hit a conflict/error during apply,
+        # skip remaining config-mutating options rather than risk a partial
+        # write.  Dry-run mode never blocks — the user needs the full preview
+        # to decide how to proceed (re-run with --overwrite, etc.).
+        if (
+            self.execute
+            and self._config_apply_blocked
+            and option_id in self._CONFIG_MUTATING_OPTIONS
+        ):
+            meta = MIGRATION_OPTION_METADATA[option_id]
+            self.record(
+                option_id,
+                None,
+                None,
+                STATUS_SKIPPED,
+                REASON_BLOCKED_BY_APPLY_CONFLICT,
+                option_label=meta["label"],
+            )
+            return
+        func()

    def build_report(self) -> Dict[str, Any]:
        summary: Dict[str, int] = {
@@ -804,6 +1021,8 @@ class Migrator:
            },
            "summary": summary,
            "items": [asdict(item) for item in self.items],
+            "warnings": self._build_warnings(summary),
+            "next_steps": self._build_next_steps(summary),
        }

        if self.output_dir:
@@ -811,6 +1030,67 @@ class Migrator:

        return report

+    def _build_warnings(self, summary: Dict[str, int]) -> List[str]:
+        """Structured warnings surfaced on the report for downstream consumers.
+
+        Modelled on OpenClaw's extensions/migrate-hermes/plan.ts warnings[].
+        Keep the messages actionable — they show up in summary.md and the
+        JSON report.
+        """
+        warnings: List[str] = []
+        if summary.get("conflict", 0) > 0:
+            warnings.append(
+                "Conflicts were found. Re-run with --overwrite to replace conflicting "
+                "targets after item-level backups."
+            )
+        if summary.get("error", 0) > 0:
+            warnings.append(
+                "One or more items failed. Inspect the report and re-run after fixing "
+                "the underlying cause."
+            )
+        if self._config_apply_blocked and self.execute:
+            warnings.append(
+                "A config.yaml write hit a conflict or error mid-apply; later config "
+                "items were skipped to avoid a partial write."
+            )
+        # Detect whether secrets were detected but not migrated.
+        provider_keys_skipped = any(
+            item.kind == "provider-keys" and item.status == STATUS_SKIPPED
+            for item in self.items
+        )
+        if provider_keys_skipped and not self.migrate_secrets:
+            warnings.append(
+                "API keys and other credentials were detected but not imported. "
+                "Re-run with --migrate-secrets to copy supported keys into the "
+                "Hermes env file."
+            )
+        return warnings
+
+    def _build_next_steps(self, summary: Dict[str, int]) -> List[str]:
+        """Human-readable next-step guidance baked into the report."""
+        if not self.execute:
+            return [
+                "Re-run without --dry-run to apply the migration.",
+                "Pass --overwrite to resolve conflicts, or --migrate-secrets to "
+                "include API keys.",
+            ]
+        steps: List[str] = []
+        if summary.get("migrated", 0) > 0:
+            steps.append(
+                "Review the migration report at "
+                f"{self.output_dir}/summary.md"
+                if self.output_dir
+                else "Review the migration report."
+            )
+            steps.append(
+                "Start a new Hermes session (or /reset) to pick up the imported config."
+            )
+        if summary.get("conflict", 0) > 0:
+            steps.append(
+                "Re-run with --overwrite to apply items that were blocked by conflicts."
+            )
+        return steps
+
    def maybe_backup(self, path: Path) -> Optional[Path]:
        if not self.execute or not self.backup_dir or not path.exists():
            return None
@@ -1391,6 +1671,29 @@ class Migrator:

        model_str = model_str.strip()

+        # Resolve a model alias against the OpenClaw model catalog.
+        # OpenClaw stores agents.defaults.model as either a bare string or
+        # {"primary": "<value>"}, and that value can be either:
+        #   - a full provider/model API ID (e.g. "anthropic/claude-opus-4-6"), or
+        #   - a display alias (e.g. "Claude Opus 4.6") that maps to one.
+        # The catalog at agents.defaults.models is keyed by the full
+        # provider/model API ID with an "alias" field on the value, e.g.:
+        #   {"anthropic/claude-opus-4-6": {"alias": "Claude Opus 4.6"}}
+        # If model_str matches an alias in the catalog, rewrite it to the
+        # catalog key (the real API ID).  If it's already an API ID or has
+        # no catalog match, leave it alone and let downstream pass it through.
+        model_catalog = config.get("agents", {}).get("defaults", {}).get("models", {})
+        if isinstance(model_catalog, dict) and model_str not in model_catalog:
+            for api_id, entry in model_catalog.items():
+                if not isinstance(api_id, str):
+                    continue
+                if isinstance(entry, dict) and entry.get("alias") == model_str:
+                    model_str = api_id
+                    break
+                if isinstance(entry, str) and entry == model_str:
+                    model_str = api_id
+                    break
+
        if yaml is None:
            self.record("model-config", source_path, destination, "error", "PyYAML is not available")
            return
@@ -2695,6 +2998,13 @@ def parse_args() -> argparse.Namespace:
             f"Valid ids: {', '.join(sorted(MIGRATION_OPTION_METADATA))}",
    )
    parser.add_argument("--output-dir", help="Where to write report, backups, and archived docs")
+    parser.add_argument(
+        "--json",
+        action="store_true",
+        dest="json_output",
+        help="Print the migration report as JSON on stdout (redacted). "
+             "Combine with no --execute for a safe plan-only machine-readable preview.",
+    )
    return parser.parse_args()


@@ -2719,6 +3029,13 @@ def main() -> int:
    )
    report = migrator.migrate()

+    # ── Machine-readable JSON mode ────────────────────────────
+    # When --json is set, print the redacted report to stdout and skip the
+    # human-readable terminal recap.  Useful for CI and scripted wrappers.
+    if getattr(args, "json_output", False):
+        print(json.dumps(redact_migration_value(report), indent=2, ensure_ascii=False))
+        return 0
+
    # ── Human-readable terminal recap ─────────────────────────
    s = report["summary"]
    items = report["items"]
--- a/Show More
+++ b/Show More