docs(comfyui): add system detection + hardware-aware recommendations to onboarding

Instead of asking the user what they have, the agent now: 1. Runs system detection commands (OS, GPU, VRAM, RAM, disk, Python) 2. Checks if ComfyUI is already installed/running 3. Recommends the best path based on findings Adds: - Step 1: detection script block (nvidia-smi, system_profiler, etc.) - Step 2: decision table mapping detected system → recommended path - Hardware requirements table (VRAM tiers, RAM, disk) - Specific recommendations per platform: macOS → Desktop app, Linux+NVIDIA → comfy-cli, no GPU → Cloud, etc.
Potential fix for pull request finding 'CodeQL / Incomplete URL substring sanitization'
2026-04-30 01:07:53 +05:30 · 2026-04-30 00:58:11 +05:30 · 2026-04-30 00:58:03 +05:30 · 2026-04-30 00:56:08 +05:30 · 2026-04-30 00:49:58 +05:30 · 2026-04-29 12:10:40 -07:00
807 changed files with 96794 additions and 6168 deletions
@@ -5,7 +5,9 @@

 # Dependencies
 node_modules
+**/node_modules
 .venv
+**/.venv

 # CI/CD
 .github
@@ -13,7 +13,7 @@ concurrency:
  cancel-in-progress: true

 jobs:
-  check:
+  nix-lockfile-check:
    runs-on: ubuntu-latest
    timeout-minutes: 20
    steps:
@@ -36,6 +36,12 @@ jobs:
          LINK_SHA: ${{ steps.sha.outputs.full }}
        run: nix run .#fix-lockfiles -- --check

+      - name: Fail if check crashed without reporting
+        if: steps.check.outputs.stale != 'true' && steps.check.outputs.stale != 'false'
+        run: |
+          echo "::error::fix-lockfiles exited without reporting stale status — likely an infrastructure or script failure"
+          exit 1
+
      - name: Post sticky PR comment (stale)
        if: steps.check.outputs.stale == 'true' && github.event_name == 'pull_request'
        uses: marocchino/sticky-pull-request-comment@52423e01640425a022ef5fd42c6fb5f633a02728  # v2.9.1
@@ -1,6 +1,13 @@
 name: Nix Lockfile Fix

 on:
+  push:
+    branches: [main]
+    paths:
+      - 'ui-tui/package-lock.json'
+      - 'ui-tui/package.json'
+      - 'web/package-lock.json'
+      - 'web/package.json'
  workflow_dispatch:
    inputs:
      pr_number:
@@ -19,9 +26,103 @@ concurrency:
  cancel-in-progress: false

 jobs:
+  # ── Auto-fix on main ───────────────────────────────────────────────
+  # Fires when a push to main touches package.json or package-lock.json
+  # in ui-tui/ or web/. Runs fix-lockfiles --apply and pushes the hash
+  # update commit directly to main so Nix builds never stay broken.
+  #
+  # Safety invariants:
+  #   1. The fix commit only touches nix/*.nix files, which are NOT in
+  #      the paths filter above, so this cannot re-trigger itself.
+  #   2. An explicit file-whitelist check before commit aborts if
+  #      fix-lockfiles ever modifies unexpected files.
+  #   3. Job-level concurrency with cancel-in-progress: true ensures
+  #      back-to-back pushes collapse to the newest; ref: main checkout
+  #      always operates on the latest branch state.
+  #   4. Uses a GitHub App token (not GITHUB_TOKEN) so the fix commit
+  #      triggers downstream nix.yml verification.
+  auto-fix-main:
+    if: github.event_name == 'push'
+    runs-on: ubuntu-latest
+    timeout-minutes: 25
+    concurrency:
+      group: auto-fix-main
+      cancel-in-progress: true
+    steps:
+      - name: Generate GitHub App token
+        id: app-token
+        uses: actions/create-github-app-token@7bfa3a4717ef143a604ee0a99d859b8886a96d00  # v1.9.3
+        with:
+          app-id: ${{ secrets.APP_ID }}
+          private-key: ${{ secrets.APP_PRIVATE_KEY }}
+
+      - uses: actions/checkout@34e114876b0b11c390a56381ad16ebd13914f8d5  # v4
+        with:
+          ref: main
+          token: ${{ steps.app-token.outputs.token }}
+
+      - uses: ./.github/actions/nix-setup
+
+      - name: Apply lockfile hashes
+        id: apply
+        run: nix run .#fix-lockfiles -- --apply
+
+      - name: Commit & push
+        if: steps.apply.outputs.changed == 'true'
+        shell: bash
+        run: |
+          set -euo pipefail
+
+          # Ensure only nix files were modified — prevents accidental
+          # self-triggering if fix-lockfiles ever touches package files.
+          unexpected="$(git diff --name-only | grep -Ev '^nix/(tui|web)\.nix$' || true)"
+          if [ -n "$unexpected" ]; then
+            echo "::error::Unexpected modified files: $unexpected"
+            exit 1
+          fi
+
+          # Record the base SHA before committing — used to detect package
+          # file changes if we need to rebase after a non-fast-forward push.
+          BASE_SHA="$(git rev-parse HEAD)"
+
+          git config user.name 'github-actions[bot]'
+          git config user.email '41898282+github-actions[bot]@users.noreply.github.com'
+          git add nix/tui.nix nix/web.nix
+          git commit -m "fix(nix): auto-refresh npm lockfile hashes" \
+            -m "Source: $GITHUB_SHA" \
+            -m "Run: $GITHUB_SERVER_URL/$GITHUB_REPOSITORY/actions/runs/$GITHUB_RUN_ID"
+
+          # Retry push with rebase in case main advanced with an unrelated
+          # commit during the nix build. Without this, a non-fast-forward
+          # rejection silently loses the fix. If package files changed during
+          # the rebase, abort — a fresh auto-fix run will handle the new state.
+          for attempt in 1 2 3; do
+            if git push origin HEAD:main; then
+              exit 0
+            fi
+            echo "::warning::Push attempt $attempt failed (non-fast-forward?), rebasing…"
+            git fetch origin main
+
+            # If package files changed between our base and the new main,
+            # our computed hashes are stale. Abort and let the next triggered
+            # run recompute from the correct package-lock state.
+            pkg_changed="$(git diff --name-only "$BASE_SHA"..origin/main -- \
+              'ui-tui/package-lock.json' 'ui-tui/package.json' \
+              'web/package-lock.json' 'web/package.json' || true)"
+            if [ -n "$pkg_changed" ]; then
+              echo "::warning::Package files changed since hash computation — aborting; a fresh run will recompute"
+              exit 0
+            fi
+
+            git rebase origin/main
+          done
+          echo "::error::Failed to push after 3 rebase attempts"
+          exit 1
+
+  # ── PR fix (manual / checkbox) ─────────────────────────────────────
+  # Existing behavior: run on manual dispatch OR when a task-list
+  # checkbox in the sticky lockfile-check comment flips from [ ] to [x].
  fix:
-    # Run on manual dispatch OR when a task-list checkbox in the sticky
-    # lockfile-check comment flips from `[ ]` to `[x]`.
    if: |
      github.event_name == 'workflow_dispatch' ||
      (github.event_name == 'issue_comment'
@@ -69,3 +69,4 @@ mini-swe-agent/
 .nix-stamps/
 result
 website/static/api/skills-index.json
+models-dev-upstream/
@@ -38,7 +38,7 @@ hermes-agent/
 │   │                     #   homeassistant, signal, matrix, mattermost, email, sms,
 │   │                     #   dingtalk, wecom, weixin, feishu, qqbot, bluebubbles,
 │   │                     #   webhook, api_server, ...). See ADDING_A_PLATFORM.md.
-│   └── builtin_hooks/    # Always-registered gateway hooks (boot-md, ...)
+│   └── builtin_hooks/    # Extension point for always-registered gateway hooks (none shipped)
 ├── plugins/              # Plugin system (see "Plugins" section below)
 │   ├── memory/           # Memory-provider plugins (honcho, mem0, supermemory, ...)
 │   ├── context_engine/   # Context-engine plugins
@@ -494,7 +494,7 @@ branding:
  agent_name: "My Agent"
  welcome: "Welcome message"
  response_label: " ⚔ Agent "
-  prompt_symbol: "⚔ ❯ "
+  prompt_symbol: "⚔"

 tool_prefix: "╎"             # Tool output line prefix
 ```
@@ -14,7 +14,7 @@ ENV PLAYWRIGHT_BROWSERS_PATH=/opt/hermes/.playwright
 # that would otherwise accumulate when hermes runs as PID 1. See #15012.
 RUN apt-get update && \
    apt-get install -y --no-install-recommends \
-        build-essential nodejs npm python3 ripgrep ffmpeg gcc python3-dev libffi-dev procps git openssh-client docker-cli tini && \
+    build-essential nodejs npm python3 ripgrep ffmpeg gcc python3-dev libffi-dev procps git openssh-client docker-cli tini && \
    rm -rf /var/lib/apt/lists/*

 # Non-root user for runtime; UID can be overridden via HERMES_UID at runtime
@@ -30,18 +30,28 @@ WORKDIR /opt/hermes
 # unless the lockfiles themselves change.
 COPY package.json package-lock.json ./
 COPY web/package.json web/package-lock.json web/
+COPY ui-tui/package.json ui-tui/package-lock.json ui-tui/
+COPY ui-tui/packages/hermes-ink/package.json ui-tui/packages/hermes-ink/package-lock.json ui-tui/packages/hermes-ink/

 RUN npm install --prefer-offline --no-audit && \
    npx playwright install --with-deps chromium --only-shell && \
    (cd web && npm install --prefer-offline --no-audit) && \
+    (cd ui-tui && npm install --prefer-offline --no-audit) && \
    npm cache clean --force

 # ---------- Source code ----------
 # .dockerignore excludes node_modules, so the installs above survive.
 COPY --chown=hermes:hermes . .

-# Build web dashboard (Vite outputs to hermes_cli/web_dist/)
-RUN cd web && npm run build
+# Build browser dashboard and terminal UI assets.
+RUN cd web && npm run build && \
+    cd ../ui-tui && npm run build && \
+    rm -rf node_modules/@hermes/ink && \
+    rm -rf packages/hermes-ink/node_modules && \
+    cp -R packages/hermes-ink node_modules/@hermes/ink && \
+    npm install --omit=dev --prefer-offline --no-audit --prefix node_modules/@hermes/ink && \
+    rm -rf node_modules/@hermes/ink/node_modules/react && \
+    node --input-type=module -e "await import('@hermes/ink')"

 # ---------- Permissions ----------
 # Make install dir world-readable so any HERMES_UID can read it at runtime.
@@ -112,6 +112,17 @@ def main() -> None:
    import acp
    from .server import HermesACPAgent

+    # MCP tool discovery from config.yaml — run before asyncio.run() so
+    # it's safe to use blocking waits.  (ACP also registers per-session
+    # MCP servers dynamically via asyncio.to_thread inside the event
+    # loop; that path is unaffected.)  Moved from model_tools.py module
+    # scope to avoid freezing the gateway's loop on lazy import (#16856).
+    try:
+        from tools.mcp_tool import discover_mcp_tools
+        discover_mcp_tools()
+    except Exception:
+        logger.debug("MCP tool discovery failed at ACP startup", exc_info=True)
+
    agent = HermesACPAgent()
    try:
        asyncio.run(acp.run_agent(agent, use_unstable_protocol=True))
@@ -3,6 +3,7 @@
 from __future__ import annotations

 import asyncio
+import contextvars
 import logging
 import os
 from collections import defaultdict, deque
@@ -574,6 +575,22 @@ class HermesACPAgent(acp.Agent):

        def _run_agent() -> dict:
            nonlocal previous_approval_cb, previous_interactive
+            # Bind HERMES_SESSION_KEY for this session so per-session caches
+            # (e.g. the interactive sudo password cache in tools.terminal_tool)
+            # scope to the ACP session rather than leaking across sessions
+            # that land on the same reused executor thread. This call runs
+            # inside a contextvars.copy_context() below, so the ContextVar
+            # write is isolated from other concurrent ACP sessions.
+            try:
+                from gateway.session_context import (
+                    clear_session_vars,
+                    set_session_vars,
+                )
+                session_tokens = set_session_vars(session_key=session_id)
+            except Exception:
+                session_tokens = None
+                clear_session_vars = None  # type: ignore[assignment]
+                logger.debug("Could not set ACP session context", exc_info=True)
            if approval_cb:
                try:
                    from tools import terminal_tool as _terminal_tool
@@ -607,9 +624,19 @@ class HermesACPAgent(acp.Agent):
                        _terminal_tool.set_approval_callback(previous_approval_cb)
                    except Exception:
                        logger.debug("Could not restore approval callback", exc_info=True)
+                if session_tokens is not None and clear_session_vars is not None:
+                    try:
+                        clear_session_vars(session_tokens)
+                    except Exception:
+                        logger.debug("Could not clear ACP session context", exc_info=True)

        try:
-            result = await loop.run_in_executor(_executor, _run_agent)
+            # Wrap the executor call in a fresh copy of the current context so
+            # concurrent ACP sessions on the shared ThreadPoolExecutor don't
+            # stomp on each other's ContextVar writes (HERMES_SESSION_KEY in
+            # particular — used by the interactive sudo password cache scope).
+            ctx = contextvars.copy_context()
+            result = await loop.run_in_executor(_executor, ctx.run, _run_agent)
        except Exception:
            logger.exception("Executor error for session %s", session_id)
            return PromptResponse(stop_reason="end_turn")
@@ -20,12 +20,27 @@ from pathlib import Path

 from hermes_constants import get_hermes_home
 from typing import Any, Dict, List, Optional, Tuple
-from utils import normalize_proxy_env_vars
+from utils import base_url_host_matches, normalize_proxy_env_vars

-try:
-    import anthropic as _anthropic_sdk
-except ImportError:
-    _anthropic_sdk = None  # type: ignore[assignment]
+# NOTE: `import anthropic` is deliberately NOT at module top — the SDK pulls
+# ~220 ms of imports (anthropic.types, anthropic.lib.tools._beta_runner, etc.)
+# and the 3 usage sites (build_anthropic_client, build_anthropic_bedrock_client,
+# read_claude_code_credentials_from_keychain) are all on cold user-triggered
+# paths. Access via the `_get_anthropic_sdk()` accessor below, which caches
+# the module after the first call and returns None on ImportError.
+_anthropic_sdk: Any = ...  # sentinel — None means "tried and missing"
+
+
+def _get_anthropic_sdk():
+    """Return the ``anthropic`` SDK module, importing lazily. None if not installed."""
+    global _anthropic_sdk
+    if _anthropic_sdk is ...:
+        try:
+            import anthropic as _sdk
+            _anthropic_sdk = _sdk
+        except ImportError:
+            _anthropic_sdk = None
+    return _anthropic_sdk

 logger = logging.getLogger(__name__)

@@ -202,19 +217,33 @@ def _forbids_sampling_params(model: str) -> bool:


 # Beta headers for enhanced features (sent with ALL auth types).
-# As of Opus 4.7 (2026-04-16), both of these are GA on Claude 4.6+ — the
+# As of Opus 4.7 (2026-04-16), the first two are GA on Claude 4.6+ — the
 # beta headers are still accepted (harmless no-op) but not required. Kept
 # here so older Claude (4.5, 4.1) + third-party Anthropic-compat endpoints
 # that still gate on the headers continue to get the enhanced features.
-# Migration guide: remove these if you no longer support ≤4.5 models.
+#
+# ``context-1m-2025-08-07`` unlocks the 1M context window on Claude Opus 4.6/4.7
+# and Sonnet 4.6 when served via AWS Bedrock or Azure AI Foundry. 1M is GA on
+# native Anthropic (api.anthropic.com) for Opus 4.6+, but Bedrock/Azure still
+# gate it behind this beta header as of 2026-04 — without it Bedrock caps Opus
+# at 200K even though model_metadata.py advertises 1M. The header is a harmless
+# no-op on endpoints where 1M is GA.
+#
+# Migration guide: remove these if you no longer support ≤4.5 models or once
+# Bedrock/Azure promote 1M to GA.
 _COMMON_BETAS = [
    "interleaved-thinking-2025-05-14",
    "fine-grained-tool-streaming-2025-05-14",
+    "context-1m-2025-08-07",
 ]
 # MiniMax's Anthropic-compatible endpoints fail tool-use requests when
 # the fine-grained tool streaming beta is present.  Omit it so tool calls
 # fall back to the provider's default response path.
 _TOOL_STREAMING_BETA = "fine-grained-tool-streaming-2025-05-14"
+# 1M context beta — see comment on _COMMON_BETAS above. Stripped for
+# Bearer-auth (MiniMax) endpoints since they host their own models and
+# unknown Anthropic beta headers risk request rejection.
+_CONTEXT_1M_BETA = "context-1m-2025-08-07"

 # Fast mode beta — enables the ``speed: "fast"`` request parameter for
 # significantly higher output token throughput on Opus 4.6 (~2.5x).
@@ -336,6 +365,88 @@ def _is_kimi_coding_endpoint(base_url: str | None) -> bool:
    return normalized.rstrip("/").lower().startswith("https://api.kimi.com/coding")


+# Model-name prefixes that identify the Kimi / Moonshot family.  Covers
+# - official slugs: ``kimi-k2.5``, ``kimi_thinking``, ``moonshot-v1-8k``
+# - common release lines: ``k1.5-...``, ``k2-thinking``, ``k25-...``, ``k2.5-...``
+# Matched case-insensitively against the post-``normalize_model_name`` form,
+# so a caller's ``provider/vendor/model`` slug is handled the same as a
+# bare name.
+_KIMI_FAMILY_MODEL_PREFIXES = (
+    "kimi-", "kimi_",
+    "moonshot-", "moonshot_",
+    "k1.", "k1-",
+    "k2.", "k2-",
+    "k25", "k2.5",
+)
+
+
+def _model_name_is_kimi_family(model: str | None) -> bool:
+    if not isinstance(model, str):
+        return False
+    m = model.strip().lower()
+    if not m:
+        return False
+    # Strip vendor prefix (e.g. ``moonshotai/kimi-k2.5`` → ``kimi-k2.5``)
+    if "/" in m:
+        m = m.rsplit("/", 1)[-1]
+    return m.startswith(_KIMI_FAMILY_MODEL_PREFIXES)
+
+
+def _is_kimi_family_endpoint(base_url: str | None, model: str | None = None) -> bool:
+    """Return True for any Kimi / Moonshot Anthropic-Messages-speaking endpoint.
+
+    Broader than ``_is_kimi_coding_endpoint`` — matches:
+
+    - Kimi's official ``/coding`` URL (legacy check, preserved)
+    - Any ``api.kimi.com`` / ``moonshot.ai`` / ``moonshot.cn`` host
+    - Custom or proxied endpoints whose *model* name is in the Kimi / Moonshot
+      family (``kimi-*``, ``moonshot-*``, ``k1.*``, ``k2.*``, …).  Users with
+      ``api_mode: anthropic_messages`` on a private gateway fronting Kimi
+      fall into this branch — the upstream still enforces Kimi's thinking
+      semantics (reasoning_content required on every replayed tool-call
+      message) regardless of the gateway's hostname.
+
+    Used to decide whether to drop Anthropic's ``thinking`` kwarg and to
+    preserve unsigned reasoning_content-derived thinking blocks on replay.
+    See hermes-agent#13848, #17057.
+    """
+    if _is_kimi_coding_endpoint(base_url):
+        return True
+    for _domain in ("api.kimi.com", "moonshot.ai", "moonshot.cn"):
+        if base_url_host_matches(base_url or "", _domain):
+            return True
+    if _model_name_is_kimi_family(model):
+        return True
+    return False
+
+
+def _is_deepseek_anthropic_endpoint(base_url: str | None) -> bool:
+    """Return True for DeepSeek's Anthropic-compatible endpoint.
+
+    DeepSeek's ``/anthropic`` route speaks the Anthropic Messages protocol
+    but, when thinking mode is enabled, requires the ``thinking`` blocks
+    from prior assistant turns to round-trip on subsequent requests — the
+    generic third-party path strips them and triggers HTTP 400::
+
+        The content[].thinking in the thinking mode must be passed back
+        to the API.
+
+    Per DeepSeek's published compatibility matrix the blocks are unsigned
+    (no Anthropic-proprietary signature, no ``redacted_thinking`` support),
+    so this endpoint is handled with the same strip-signed / keep-unsigned
+    policy used for Kimi's ``/coding`` endpoint.  The match is pinned to
+    the ``/anthropic`` path so the OpenAI-compatible ``api.deepseek.com``
+    base URL (which never reaches this adapter) is not misclassified.
+    See hermes-agent#16748.
+    """
+    if not base_url_host_matches(base_url or "", "api.deepseek.com"):
+        return False
+    normalized = _normalize_base_url_text(base_url)
+    if not normalized:
+        return False
+    return "/anthropic" in normalized.rstrip("/").lower()
+
+
 def _requires_bearer_auth(base_url: str | None) -> bool:
    """Return True for Anthropic-compatible providers that require Bearer auth.

@@ -357,9 +468,14 @@ def _common_betas_for_base_url(base_url: str | None) -> list[str]:
    that include Anthropic's ``fine-grained-tool-streaming`` beta — every
    tool-use message triggers a connection error.  Strip that beta for
    Bearer-auth endpoints while keeping all other betas intact.
+
+    The ``context-1m-2025-08-07`` beta is also stripped for Bearer-auth
+    endpoints — MiniMax hosts its own models, not Claude, so the header is
+    irrelevant at best and risks request rejection at worst.
    """
    if _requires_bearer_auth(base_url):
-        return [b for b in _COMMON_BETAS if b != _TOOL_STREAMING_BETA]
+        _stripped = {_TOOL_STREAMING_BETA, _CONTEXT_1M_BETA}
+        return [b for b in _COMMON_BETAS if b not in _stripped]
    return _COMMON_BETAS


@@ -374,6 +490,7 @@ def build_anthropic_client(api_key: str, base_url: str = None, timeout: float =

    Returns an anthropic.Anthropic instance.
    """
+    _anthropic_sdk = _get_anthropic_sdk()
    if _anthropic_sdk is None:
        raise ImportError(
            "The 'anthropic' package is required for the Anthropic provider. "
@@ -390,7 +507,16 @@ def build_anthropic_client(api_key: str, base_url: str = None, timeout: float =
        "timeout": Timeout(timeout=float(_read_timeout), connect=10.0),
    }
    if normalized_base_url:
-        kwargs["base_url"] = normalized_base_url
+        # Azure Anthropic endpoints require an ``api-version`` query parameter.
+        # Pass it via default_query so the SDK appends it to every request URL
+        # without corrupting the base_url (appending it directly produces
+        # malformed paths like /anthropic?api-version=.../v1/messages).
+        _is_azure_endpoint = "azure.com" in normalized_base_url.lower()
+        if _is_azure_endpoint and "api-version" not in normalized_base_url:
+            kwargs["base_url"] = normalized_base_url.rstrip("/")
+            kwargs["default_query"] = {"api-version": "2025-04-15"}
+        else:
+            kwargs["base_url"] = normalized_base_url
    common_betas = _common_betas_for_base_url(normalized_base_url)

    if _is_kimi_coding_endpoint(base_url):
@@ -447,8 +573,16 @@ def build_anthropic_bedrock_client(region: str):
    Claude feature parity: prompt caching, thinking budgets, adaptive
    thinking, fast mode — features not available via the Converse API.

+    Attaches the common Anthropic beta headers as client-level defaults so
+    that Bedrock-hosted Claude models get the same enhanced features as
+    native Anthropic. The ``context-1m-2025-08-07`` beta in particular
+    unlocks the 1M context window for Opus 4.6/4.7 on Bedrock — without
+    it, Bedrock caps these models at 200K even though the Anthropic API
+    serves them with 1M natively.
+
    Auth uses the boto3 default credential chain (IAM roles, SSO, env vars).
    """
+    _anthropic_sdk = _get_anthropic_sdk()
    if _anthropic_sdk is None:
        raise ImportError(
            "The 'anthropic' package is required for the Bedrock provider. "
@@ -464,6 +598,7 @@ def build_anthropic_bedrock_client(region: str):
    return _anthropic_sdk.AnthropicBedrock(
        aws_region=region,
        timeout=Timeout(timeout=900.0, connect=10.0),
+        default_headers={"anthropic-beta": ",".join(_COMMON_BETAS)},
    )


@@ -479,9 +614,6 @@ def _read_claude_code_credentials_from_keychain() -> Optional[Dict[str, Any]]:

    Returns dict with {accessToken, refreshToken?, expiresAt?} or None.
    """
-    import platform
-    import subprocess
-
    if platform.system() != "Darwin":
        return None

@@ -1026,9 +1158,12 @@ def normalize_model_name(model: str, preserve_dots: bool = False) -> str:
        # These must not be converted to hyphens.  See issue #12295.
        if _is_bedrock_model_id(model):
            return model
-        # OpenRouter uses dots for version separators (claude-opus-4.6),
-        # Anthropic uses hyphens (claude-opus-4-6). Convert dots to hyphens.
-        model = model.replace(".", "-")
+        # Only convert dots to hyphens for Anthropic/Claude models.
+        # Non-Anthropic models (gpt-5.4, gemini-2.5, etc.) use dots
+        # as part of their canonical names.  See issue #17171.
+        _lower = model.lower()
+        if _lower.startswith("claude-") or _lower.startswith("anthropic/"):
+            model = model.replace(".", "-")
    return model


@@ -1045,6 +1180,33 @@ def _sanitize_tool_id(tool_id: str) -> str:
    return sanitized or "tool_0"


+def _normalize_tool_input_schema(schema: Any) -> Dict[str, Any]:
+    """Normalize tool schemas before sending them to Anthropic.
+
+    Anthropic's tool schema validator rejects nullable unions such as
+    ``anyOf: [{"type": "string"}, {"type": "null"}]`` that Pydantic/MCP
+    commonly emits for optional fields. Tool optionality is represented by
+    the parent ``required`` array, so we delegate to the shared
+    ``strip_nullable_unions`` helper to collapse nullable unions to the
+    non-null branch while preserving metadata like description/default.
+
+    ``keep_nullable_hint=False`` because the Anthropic validator does not
+    recognize the OpenAPI-style ``nullable: true`` extension and strict
+    schema-to-grammar converters may reject unknown keywords.
+    """
+    if not schema:
+        return {"type": "object", "properties": {}}
+
+    from tools.schema_sanitizer import strip_nullable_unions
+
+    normalized = strip_nullable_unions(schema, keep_nullable_hint=False)
+    if not isinstance(normalized, dict):
+        return {"type": "object", "properties": {}}
+    if normalized.get("type") == "object" and not isinstance(normalized.get("properties"), dict):
+        normalized = {**normalized, "properties": {}}
+    return normalized
+
+
 def convert_tools_to_anthropic(tools: List[Dict]) -> List[Dict]:
    """Convert OpenAI tool definitions to Anthropic format."""
    if not tools:
@@ -1055,7 +1217,9 @@ def convert_tools_to_anthropic(tools: List[Dict]) -> List[Dict]:
        result.append({
            "name": fn.get("name", ""),
            "description": fn.get("description", ""),
-            "input_schema": fn.get("parameters", {"type": "object", "properties": {}}),
+            "input_schema": _normalize_tool_input_schema(
+                fn.get("parameters", {"type": "object", "properties": {}})
+            ),
        })
    return result

@@ -1186,6 +1350,7 @@ def _convert_content_to_anthropic(content: Any) -> Any:
 def convert_messages_to_anthropic(
    messages: List[Dict],
    base_url: str | None = None,
+    model: str | None = None,
 ) -> Tuple[Optional[Any], List[Dict]]:
    """Convert OpenAI-format messages to Anthropic format.

@@ -1197,6 +1362,12 @@ def convert_messages_to_anthropic(
    endpoint, all thinking block signatures are stripped.  Signatures are
    Anthropic-proprietary — third-party endpoints cannot validate them and will
    reject them with HTTP 400 "Invalid signature in thinking block".
+
+    When *model* is provided and matches the Kimi / Moonshot family (or
+    *base_url* is a Kimi / Moonshot host), unsigned thinking blocks
+    synthesised from ``reasoning_content`` are preserved on replayed
+    assistant tool-call messages — Kimi requires the field to exist, even
+    if empty.
    """
    system = None
    result = []
@@ -1425,7 +1596,16 @@ def convert_messages_to_anthropic(
    #    cache markers can interfere with signature validation.
    _THINKING_TYPES = frozenset(("thinking", "redacted_thinking"))
    _is_third_party = _is_third_party_anthropic_endpoint(base_url)
-    _is_kimi = _is_kimi_coding_endpoint(base_url)
+    # Kimi /coding and DeepSeek /anthropic share a contract: both speak the
+    # Anthropic Messages protocol upstream but require that thinking blocks
+    # synthesised from reasoning_content round-trip on subsequent turns when
+    # thinking is enabled.  Signed Anthropic blocks still have to be stripped
+    # (neither endpoint can validate Anthropic's signatures); unsigned blocks
+    # are preserved.  See hermes-agent#13848 (Kimi) and #16748 (DeepSeek).
+    _preserve_unsigned_thinking = (
+        _is_kimi_family_endpoint(base_url, model)
+        or _is_deepseek_anthropic_endpoint(base_url)
+    )

    last_assistant_idx = None
    for i in range(len(result) - 1, -1, -1):
@@ -1437,22 +1617,22 @@ def convert_messages_to_anthropic(
        if m.get("role") != "assistant" or not isinstance(m.get("content"), list):
            continue

-        if _is_kimi:
-            # Kimi's /coding endpoint enables thinking server-side and
-            # requires unsigned thinking blocks on replayed assistant
-            # tool-call messages.  Strip signed Anthropic blocks (Kimi
-            # can't validate signatures) but preserve the unsigned ones
-            # we synthesised from reasoning_content above.
+        if _preserve_unsigned_thinking:
+            # Kimi's /coding and DeepSeek's /anthropic endpoints both enable
+            # thinking server-side and require unsigned thinking blocks on
+            # replayed assistant tool-call messages.  Strip signed Anthropic
+            # blocks (neither upstream can validate Anthropic signatures) but
+            # preserve the unsigned ones we synthesised from reasoning_content.
            new_content = []
            for b in m["content"]:
                if not isinstance(b, dict) or b.get("type") not in _THINKING_TYPES:
                    new_content.append(b)
                    continue
                if b.get("signature") or b.get("data"):
-                    # Anthropic-signed block — Kimi can't validate, strip
+                    # Anthropic-signed block — upstream can't validate, strip
                    continue
                # Unsigned thinking (synthesised from reasoning_content) —
-                # keep it: Kimi needs it for message-history validation.
+                # keep it: the upstream needs it for message-history validation.
                new_content.append(b)
            m["content"] = new_content or [{"type": "text", "text": "(empty)"}]
        elif _is_third_party or idx != last_assistant_idx:
@@ -1548,7 +1728,9 @@ def build_anthropic_kwargs(
    Currently only supported on native Anthropic endpoints (not third-party
    compatible ones).
    """
-    system, anthropic_messages = convert_messages_to_anthropic(messages, base_url=base_url)
+    system, anthropic_messages = convert_messages_to_anthropic(
+        messages, base_url=base_url, model=model
+    )
    anthropic_tools = convert_tools_to_anthropic(tools) if tools else []

    model = normalize_model_name(model, preserve_dots=preserve_dots)
@@ -1654,7 +1836,7 @@ def build_anthropic_kwargs(
    # silently hides reasoning text that Hermes surfaces in its CLI. We
    # request "summarized" so the reasoning blocks stay populated — matching
    # 4.6 behavior and preserving the activity-feed UX during long tool runs.
-    _is_kimi_coding = _is_kimi_coding_endpoint(base_url)
+    _is_kimi_coding = _is_kimi_family_endpoint(base_url, model)
    if reasoning_config and isinstance(reasoning_config, dict) and not _is_kimi_coding:
        if reasoning_config.get("enabled") is not False and "haiku" not in model.lower():
            effort = str(reasoning_config.get("effort", "medium")).lower()
@@ -41,9 +41,57 @@ import threading
 import time
 from pathlib import Path  # noqa: F401 — used by test mocks
 from types import SimpleNamespace
-from typing import Any, Dict, List, Optional, Tuple
+from typing import Any, Dict, List, Optional, Tuple, TYPE_CHECKING
+from urllib.parse import urlparse, parse_qs, urlunparse

-from openai import OpenAI
+# NOTE: `from openai import OpenAI` is deliberately NOT at module top — the
+# openai SDK pulls a large type tree (~240 ms cold, including responses/*,
+# graders/*). We expose `OpenAI` here as a thin proxy that imports the SDK on
+# first call and forwards, so:
+#   (a) the 15+ in-module `OpenAI(...)` construction sites work unchanged
+#       (Python's function-scope name lookup resolves `OpenAI` to the proxy
+#       object bound in module globals here, without triggering any import);
+#   (b) external code can still do `auxiliary_client.OpenAI` or
+#       `patch("agent.auxiliary_client.OpenAI", ...)` — tests see the proxy,
+#       and patch replaces the module attribute as usual;
+#   (c) `OpenAI` as a type annotation resolves at runtime to the proxy class
+#       (which is harmless — annotations aren't type-checked at runtime).
+# See tests/agent/test_auxiliary_client.py for patch patterns this supports.
+if TYPE_CHECKING:
+    from openai import OpenAI  # noqa: F401 — type hints only
+
+_OPENAI_CLS_CACHE: Optional[type] = None
+
+
+def _load_openai_cls() -> type:
+    """Import and cache ``openai.OpenAI``."""
+    global _OPENAI_CLS_CACHE
+    if _OPENAI_CLS_CACHE is None:
+        from openai import OpenAI as _cls
+        _OPENAI_CLS_CACHE = _cls
+    return _OPENAI_CLS_CACHE
+
+
+class _OpenAIProxy:
+    """Module-level proxy that looks like the ``openai.OpenAI`` class.
+
+    Forwards ``OpenAI(...)`` calls and ``isinstance(x, OpenAI)`` checks to the
+    real SDK class, importing the SDK lazily on first use.
+    """
+
+    __slots__ = ()
+
+    def __call__(self, *args, **kwargs):
+        return _load_openai_cls()(*args, **kwargs)
+
+    def __instancecheck__(self, obj):
+        return isinstance(obj, _load_openai_cls())
+
+    def __repr__(self):
+        return "<lazy openai.OpenAI proxy>"
+
+
+OpenAI = _OpenAIProxy()  # module-level name, resolves lazily on call/isinstance

 from agent.credential_pool import load_pool
 from hermes_cli.config import get_hermes_home
@@ -52,6 +100,25 @@ from utils import base_url_host_matches, base_url_hostname, normalize_proxy_env_

 logger = logging.getLogger(__name__)

+
+def _safe_isinstance(obj: Any, maybe_type: Any) -> bool:
+    """Return False instead of raising when a patched symbol is not a type."""
+    try:
+        return isinstance(obj, maybe_type)
+    except TypeError:
+        return False
+
+
+def _extract_url_query_params(url: str):
+    """Extract query params from URL, return (clean_url, default_query dict or None)."""
+    parsed = urlparse(url)
+    if parsed.query:
+        clean = urlunparse(parsed._replace(query=""))
+        params = {k: v[0] for k, v in parse_qs(parsed.query).items()}
+        return clean, params
+    return url, None
+
+
 # Module-level flag: only warn once per process about stale OPENAI_BASE_URL.
 _stale_base_url_warned = False

@@ -70,6 +137,8 @@ _PROVIDER_ALIASES = {
    "moonshot": "kimi-coding",
    "kimi-cn": "kimi-coding-cn",
    "moonshot-cn": "kimi-coding-cn",
+    "gmi-cloud": "gmi",
+    "gmicloud": "gmi",
    "minimax-china": "minimax-cn",
    "minimax_cn": "minimax-cn",
    "claude": "anthropic",
@@ -80,6 +149,10 @@ _PROVIDER_ALIASES = {
    "github-models": "copilot",
    "github-copilot-acp": "copilot-acp",
    "copilot-acp-agent": "copilot-acp",
+    "tencent": "tencent-tokenhub",
+    "tokenhub": "tencent-tokenhub",
+    "tencent-cloud": "tencent-tokenhub",
+    "tencentmaas": "tencent-tokenhub",
 }


@@ -143,7 +216,9 @@ _API_KEY_PROVIDER_AUX_MODELS: Dict[str, str] = {
    "kimi-coding": "kimi-k2-turbo-preview",
    "stepfun": "step-3.5-flash",
    "kimi-coding-cn": "kimi-k2-turbo-preview",
+    "gmi": "google/gemini-3.1-flash-lite-preview",
    "minimax": "MiniMax-M2.7",
+    "minimax-oauth": "MiniMax-M2.7-highspeed",
    "minimax-cn": "MiniMax-M2.7",
    "anthropic": "claude-haiku-4-5-20251001",
    "ai-gateway": "google/gemini-3-flash",
@@ -151,6 +226,7 @@ _API_KEY_PROVIDER_AUX_MODELS: Dict[str, str] = {
    "opencode-go": "glm-5",
    "kilocode": "google/gemini-3-flash-preview",
    "ollama-cloud": "nemotron-3-nano:30b",
+    "tencent-tokenhub": "hy3-preview",
 }

 # Vision-specific model overrides for direct providers.
@@ -162,6 +238,21 @@ _PROVIDER_VISION_MODELS: Dict[str, str] = {
    "zai": "glm-5v-turbo",
 }

+# Providers whose endpoint does not accept image input, even though the
+# provider's broader ecosystem has vision models available elsewhere.  When
+# `auxiliary.vision.provider: auto` sees one of these as the main provider,
+# it must skip straight to the aggregator chain instead of returning a client
+# that will 404 on every vision request.
+#
+# kimi-coding / kimi-coding-cn: the Kimi Coding Plan routes through
+# api.kimi.com/coding (Anthropic Messages wire) which Kimi's own docs
+# describe as having no image_in capability. Vision lives on the separate
+# Kimi Platform (api.moonshot.ai, OpenAI-wire, pay-as-you-go).  See #17076.
+_PROVIDERS_WITHOUT_VISION: frozenset = frozenset({
+    "kimi-coding",
+    "kimi-coding-cn",
+})
+
 # OpenRouter app attribution headers
 _OR_HEADERS = {
    "HTTP-Referer": "https://hermes-agent.nousresearch.com",
@@ -390,6 +481,33 @@ class _CodexCompletionsAdapter:
        # Note: the Codex endpoint (chatgpt.com/backend-api/codex) does NOT
        # support max_output_tokens or temperature — omit to avoid 400 errors.

+        # Translate extra_body.reasoning (chat.completions shape) into the
+        # Responses API's top-level reasoning + include fields.  Mirrors
+        # agent/transports/codex.py::build_kwargs() so auxiliary callers
+        # that configure reasoning via auxiliary.<task>.extra_body get the
+        # same behavior as the main agent's Codex transport.
+        extra_body = kwargs.get("extra_body") or {}
+        if isinstance(extra_body, dict):
+            reasoning_cfg = extra_body.get("reasoning")
+            if isinstance(reasoning_cfg, dict):
+                if reasoning_cfg.get("enabled") is False:
+                    # Reasoning explicitly disabled — do not set reasoning
+                    # or include.  The Codex backend still thinks by
+                    # default, but we honor the caller's intent where the
+                    # API allows it.
+                    pass
+                else:
+                    effort = reasoning_cfg.get("effort", "medium")
+                    # Codex backend rejects "minimal"; clamp to "low" to
+                    # match the main-agent Codex transport behavior.
+                    if effort == "minimal":
+                        effort = "low"
+                    resp_kwargs["reasoning"] = {
+                        "effort": effort,
+                        "summary": "auto",
+                    }
+                    resp_kwargs["include"] = ["reasoning.encrypted_content"]
+
        # Tools support for auxiliary callers (e.g. skills_hub) that pass function schemas
        tools = kwargs.get("tools")
        if tools:
@@ -699,6 +817,116 @@ class AsyncAnthropicAuxiliaryClient:
        self.base_url = sync_wrapper.base_url


+def _endpoint_speaks_anthropic_messages(base_url: str) -> bool:
+    """True if the endpoint at ``base_url`` speaks the Anthropic Messages
+    protocol instead of OpenAI chat.completions.
+
+    Mirrors ``hermes_cli.runtime_provider._detect_api_mode_for_url`` so the
+    auxiliary client and the main agent stay in sync on transport selection.
+    Covers:
+
+    - Any URL ending in ``/anthropic`` (MiniMax, Zhipu GLM, LiteLLM proxies,
+      Anthropic-compatible gateways).
+    - ``api.kimi.com/coding`` (Kimi Coding Plan — the /coding route only
+      speaks Claude-Code's native Anthropic shape; ``chat.completions``
+      returns 404 on Anthropic-only model aliases like ``kimi-for-coding``).
+    - ``api.anthropic.com`` (native Anthropic).
+    """
+    normalized = (base_url or "").strip().lower().rstrip("/")
+    if not normalized:
+        return False
+    if normalized.endswith("/anthropic"):
+        return True
+    hostname = base_url_hostname(normalized)
+    if hostname == "api.anthropic.com":
+        return True
+    if hostname == "api.kimi.com" and "/coding" in normalized:
+        return True
+    return False
+
+
+def _maybe_wrap_anthropic(
+    client_obj: Any,
+    model: str,
+    api_key: str,
+    base_url: str,
+    api_mode: Optional[str] = None,
+) -> Any:
+    """Rewrap a plain OpenAI client in ``AnthropicAuxiliaryClient`` when
+    the endpoint actually speaks Anthropic Messages.
+
+    This is the single chokepoint for aux-client transport correction.
+    Runs at the end of every ``resolve_provider_client`` branch so that
+    api_key providers (Kimi Coding Plan), the ``custom`` endpoint, and
+    future /anthropic gateways all land on the right wire format
+    regardless of which branch built the client.
+
+    Returns ``client_obj`` unchanged when:
+
+    - It's already an Anthropic/Codex/Gemini/CopilotACP wrapper.
+    - The endpoint is an OpenAI-wire endpoint.
+    - ``api_mode`` is explicitly set to a non-Anthropic transport.
+    - The ``anthropic`` SDK is not installed (falls back to OpenAI wire).
+    """
+    # Already wrapped — don't double-wrap.
+    if _safe_isinstance(client_obj, AnthropicAuxiliaryClient):
+        return client_obj
+    # Other specialized adapters we should never re-dispatch.
+    if _safe_isinstance(client_obj, CodexAuxiliaryClient):
+        return client_obj
+    try:
+        from agent.gemini_native_adapter import GeminiNativeClient
+        if _safe_isinstance(client_obj, GeminiNativeClient):
+            return client_obj
+    except ImportError:
+        pass
+    try:
+        from agent.copilot_acp_client import CopilotACPClient
+        if _safe_isinstance(client_obj, CopilotACPClient):
+            return client_obj
+    except ImportError:
+        pass
+
+    # Explicit non-anthropic api_mode wins over URL heuristics.
+    if api_mode and api_mode != "anthropic_messages":
+        return client_obj
+
+    should_wrap = (
+        api_mode == "anthropic_messages"
+        or _endpoint_speaks_anthropic_messages(base_url)
+    )
+    if not should_wrap:
+        return client_obj
+
+    try:
+        from agent.anthropic_adapter import build_anthropic_client
+    except ImportError:
+        logger.warning(
+            "Endpoint %s speaks Anthropic Messages but the anthropic SDK is "
+            "not installed — falling back to OpenAI-wire (will likely 404).",
+            base_url,
+        )
+        return client_obj
+
+    try:
+        real_client = build_anthropic_client(api_key, base_url)
+    except Exception as exc:
+        logger.warning(
+            "Failed to build Anthropic client for %s (%s) — falling back to "
+            "OpenAI-wire client.", base_url, exc,
+        )
+        return client_obj
+
+    logger.debug(
+        "Auxiliary transport: wrapping client in AnthropicAuxiliaryClient "
+        "(model=%s, base_url=%s, api_mode=%s)",
+        model, base_url[:60] if base_url else "", api_mode or "auto-detected",
+    )
+    return AnthropicAuxiliaryClient(
+        real_client, model, api_key, base_url, is_oauth=False,
+    )
+
+
 def _read_nous_auth() -> Optional[dict]:
    """Read and validate ~/.hermes/auth.json for an active Nous provider.

@@ -869,7 +1097,9 @@ def _resolve_api_key_provider() -> Tuple[Optional[OpenAI], Optional[str]]:
                from hermes_cli.models import copilot_default_headers

                extra["default_headers"] = copilot_default_headers()
-            return OpenAI(api_key=api_key, base_url=base_url, **extra), model
+            _client = OpenAI(api_key=api_key, base_url=base_url, **extra)
+            _client = _maybe_wrap_anthropic(_client, model, api_key, base_url)
+            return _client, model

        creds = resolve_api_key_provider_credentials(provider_id)
        api_key = str(creds.get("api_key", "")).strip()
@@ -895,7 +1125,9 @@ def _resolve_api_key_provider() -> Tuple[Optional[OpenAI], Optional[str]]:
            from hermes_cli.models import copilot_default_headers

            extra["default_headers"] = copilot_default_headers()
-        return OpenAI(api_key=api_key, base_url=base_url, **extra), model
+        _client = OpenAI(api_key=api_key, base_url=base_url, **extra)
+        _client = _maybe_wrap_anthropic(_client, model, api_key, base_url)
+        return _client, model

    return None, None

@@ -1157,8 +1389,10 @@ def _try_custom_endpoint() -> Tuple[Optional[Any], Optional[str]]:
        return None, None
    model = _read_main_model() or "gpt-4o-mini"
    logger.debug("Auxiliary client: custom endpoint (%s, api_mode=%s)", model, custom_mode or "chat_completions")
+    _clean_base, _dq = _extract_url_query_params(custom_base)
+    _extra = {"default_query": _dq} if _dq else {}
    if custom_mode == "codex_responses":
-        real_client = OpenAI(api_key=custom_key, base_url=custom_base)
+        real_client = OpenAI(api_key=custom_key, base_url=_clean_base, **_extra)
        return CodexAuxiliaryClient(real_client, model), model
    if custom_mode == "anthropic_messages":
        # Third-party Anthropic-compatible gateway (MiniMax, Zhipu GLM,
@@ -1172,12 +1406,18 @@ def _try_custom_endpoint() -> Tuple[Optional[Any], Optional[str]]:
                "Custom endpoint declares api_mode=anthropic_messages but the "
                "anthropic SDK is not installed — falling back to OpenAI-wire."
            )
-            return OpenAI(api_key=custom_key, base_url=custom_base), model
+            return OpenAI(api_key=custom_key, base_url=_clean_base, **_extra), model
        return (
            AnthropicAuxiliaryClient(real_client, model, custom_key, custom_base, is_oauth=False),
            model,
        )
-    return OpenAI(api_key=custom_key, base_url=custom_base), model
+    # URL-based anthropic detection for custom endpoints that didn't set
+    # api_mode explicitly (e.g. kimi.com/coding reached via custom config).
+    _fallback_client = OpenAI(api_key=custom_key, base_url=_clean_base, **_extra)
+    _fallback_client = _maybe_wrap_anthropic(
+        _fallback_client, model, custom_key, custom_base, custom_mode,
+    )
+    return _fallback_client, model


 def _try_codex() -> Tuple[Optional[Any], Optional[str]]:
@@ -1603,8 +1843,14 @@ def _resolve_auto(main_runtime: Optional[Dict[str, Any]] = None) -> Tuple[Option
 # below — never look up auth env vars ad-hoc.


-def _to_async_client(sync_client, model: str):
-    """Convert a sync client to its async counterpart, preserving Codex routing."""
+def _to_async_client(sync_client, model: str, is_vision: bool = False):
+    """Convert a sync client to its async counterpart, preserving Codex routing.
+
+    When ``is_vision=True`` and the underlying base URL is Copilot, the
+    resulting async client carries the ``Copilot-Vision-Request: true``
+    header so the request is routed to Copilot's vision-capable
+    infrastructure (otherwise vision payloads silently time out).
+    """
    from openai import AsyncOpenAI

    if isinstance(sync_client, CodexAuxiliaryClient):
@@ -1633,9 +1879,11 @@ def _to_async_client(sync_client, model: str):
    if base_url_host_matches(sync_base_url, "openrouter.ai"):
        async_kwargs["default_headers"] = dict(_OR_HEADERS)
    elif base_url_host_matches(sync_base_url, "api.githubcopilot.com"):
-        from hermes_cli.models import copilot_default_headers
+        from hermes_cli.copilot_auth import copilot_request_headers

-        async_kwargs["default_headers"] = copilot_default_headers()
+        async_kwargs["default_headers"] = copilot_request_headers(
+            is_agent_turn=True, is_vision=is_vision
+        )
    elif base_url_host_matches(sync_base_url, "api.kimi.com"):
        async_kwargs["default_headers"] = {"User-Agent": "claude-code/0.1.0"}
    return AsyncOpenAI(**async_kwargs), model
@@ -1662,6 +1910,7 @@ def resolve_provider_client(
    explicit_api_key: str = None,
    api_mode: str = None,
    main_runtime: Optional[Dict[str, Any]] = None,
+    is_vision: bool = False,
 ) -> Tuple[Optional[Any], Optional[str]]:
    """Central router: given a provider name and optional model, return a
    configured client with the correct auth, base URL, and API format.
@@ -1719,8 +1968,20 @@ def resolve_provider_client(
                return True
        return False

-    def _wrap_if_needed(client_obj, final_model_str: str, base_url_str: str = ""):
-        """Wrap a plain OpenAI client in CodexAuxiliaryClient if Responses API is needed."""
+    def _wrap_if_needed(client_obj, final_model_str: str, base_url_str: str = "",
+                        api_key_str: str = ""):
+        """Wrap a plain OpenAI client in the correct transport adapter.
+
+        Handles two cases:
+        - ``CodexAuxiliaryClient`` when the endpoint needs the Responses API
+          (explicit ``api_mode=codex_responses`` or api.openai.com + codex
+          model name).
+        - ``AnthropicAuxiliaryClient`` when the endpoint speaks Anthropic
+          Messages (explicit ``api_mode=anthropic_messages``, any ``/anthropic``
+          suffix, ``api.kimi.com/coding``, or ``api.anthropic.com``).
+
+        Clients that are already specialized wrappers pass through unchanged.
+        """
        if _needs_codex_wrap(client_obj, base_url_str, final_model_str):
            logger.debug(
                "resolve_provider_client: wrapping client in CodexAuxiliaryClient "
@@ -1728,7 +1989,11 @@ def resolve_provider_client(
                api_mode or "auto-detected", final_model_str,
                base_url_str[:60] if base_url_str else "")
            return CodexAuxiliaryClient(client_obj, final_model_str)
-        return client_obj
+        # Anthropic-wire endpoints: rewrap plain OpenAI clients so
+        # chat.completions.create() is translated to /v1/messages.
+        return _maybe_wrap_anthropic(
+            client_obj, final_model_str, api_key_str, base_url_str, api_mode,
+        )

    # ── Auto: try all providers in priority order ────────────────────
    if provider == "auto":
@@ -1745,7 +2010,7 @@ def resolve_provider_client(
                "auxiliary provider (using %r instead)", model, resolved)
            model = None
        final_model = model or resolved
-        return (_to_async_client(client, final_model) if async_mode
+        return (_to_async_client(client, final_model, is_vision=is_vision) if async_mode
                else (client, final_model))

    # ── OpenRouter ───────────────────────────────────────────────────
@@ -1758,7 +2023,7 @@ def resolve_provider_client(
            )
            return None, None
        final_model = _normalize_resolved_model(model or default, provider)
-        return (_to_async_client(client, final_model) if async_mode
+        return (_to_async_client(client, final_model, is_vision=is_vision) if async_mode
                else (client, final_model))

    # ── Nous Portal (OAuth) ──────────────────────────────────────────
@@ -1775,7 +2040,7 @@ def resolve_provider_client(
                           "but Nous Portal not configured (run: hermes auth)")
            return None, None
        final_model = _normalize_resolved_model(model or default, provider)
-        return (_to_async_client(client, final_model) if async_mode
+        return (_to_async_client(client, final_model, is_vision=is_vision) if async_mode
                else (client, final_model))

    # ── OpenAI Codex (OAuth → Responses API) ─────────────────────────
@@ -1802,13 +2067,13 @@ def resolve_provider_client(
                           "but no Codex OAuth token found (run: hermes model)")
            return None, None
        final_model = _normalize_resolved_model(model or default, provider)
-        return (_to_async_client(client, final_model) if async_mode
+        return (_to_async_client(client, final_model, is_vision=is_vision) if async_mode
                else (client, final_model))

    # ── Custom endpoint (OPENAI_BASE_URL + OPENAI_API_KEY) ───────────
    if provider == "custom":
        if explicit_base_url:
-            custom_base = explicit_base_url.strip()
+            custom_base = _to_openai_base_url(explicit_base_url).strip()
            custom_key = (
                (explicit_api_key or "").strip()
                or os.getenv("OPENAI_API_KEY", "").strip()
@@ -1821,18 +2086,23 @@ def resolve_provider_client(
                )
                return None, None
            final_model = _normalize_resolved_model(
-                model or _read_main_model() or "gpt-4o-mini",
+                model or (main_runtime.get("model") if main_runtime else None) or "gpt-4o-mini",
                provider,
            )
            extra = {}
+            _clean_base, _dq = _extract_url_query_params(custom_base)
+            if _dq:
+                extra["default_query"] = _dq
            if base_url_host_matches(custom_base, "api.kimi.com"):
                extra["default_headers"] = {"User-Agent": "claude-code/0.1.0"}
            elif base_url_host_matches(custom_base, "api.githubcopilot.com"):
-                from hermes_cli.models import copilot_default_headers
-                extra["default_headers"] = copilot_default_headers()
-            client = OpenAI(api_key=custom_key, base_url=custom_base, **extra)
-            client = _wrap_if_needed(client, final_model, custom_base)
-            return (_to_async_client(client, final_model) if async_mode
+                from hermes_cli.copilot_auth import copilot_request_headers
+                extra["default_headers"] = copilot_request_headers(
+                    is_agent_turn=True, is_vision=is_vision
+                )
+            client = OpenAI(api_key=custom_key, base_url=_clean_base, **extra)
+            client = _wrap_if_needed(client, final_model, custom_base, custom_key)
+            return (_to_async_client(client, final_model, is_vision=is_vision) if async_mode
                    else (client, final_model))
        # Try custom first, then codex, then API-key providers
        for try_fn in (_try_custom_endpoint, _try_codex,
@@ -1841,8 +2111,9 @@ def resolve_provider_client(
            if client is not None:
                final_model = _normalize_resolved_model(model or default, provider)
                _cbase = str(getattr(client, "base_url", "") or "")
-                client = _wrap_if_needed(client, final_model, _cbase)
-                return (_to_async_client(client, final_model) if async_mode
+                _ckey = str(getattr(client, "api_key", "") or "")
+                client = _wrap_if_needed(client, final_model, _cbase, _ckey)
+                return (_to_async_client(client, final_model, is_vision=is_vision) if async_mode
                        else (client, final_model))
        logger.warning("resolve_provider_client: custom/main requested "
                       "but no endpoint credentials found")
@@ -1864,9 +2135,23 @@ def resolve_provider_client(
            entry_api_mode = (api_mode or custom_entry.get("api_mode") or "").strip()
            if custom_base:
                final_model = _normalize_resolved_model(
-                    model or custom_entry.get("model") or _read_main_model() or "gpt-4o-mini",
+                    model
+                    or custom_entry.get("model")
+                    or (main_runtime.get("model") if main_runtime else None)
+                    or _read_main_model()
+                    or "gpt-4o-mini",
                    provider,
                )
+                # anthropic_messages talks to the /anthropic surface directly;
+                # OpenAI-wire paths (chat_completions / codex_responses) need the
+                # /v1 equivalent.  Rewrite only on the OpenAI-wire path so the
+                # Anthropic fallback SDK still sees the original URL.
+                if entry_api_mode == "anthropic_messages":
+                    openai_base = custom_base
+                else:
+                    openai_base = _to_openai_base_url(custom_base)
+                _clean_base2, _dq2 = _extract_url_query_params(openai_base)
+                _extra2 = {"default_query": _dq2} if _dq2 else {}
                logger.debug(
                    "resolve_provider_client: named custom provider %r (%s, api_mode=%s)",
                    provider, final_model, entry_api_mode or "chat_completions")
@@ -1884,8 +2169,13 @@ def resolve_provider_client(
                            "installed — falling back to OpenAI-wire.",
                            provider,
                        )
-                        client = OpenAI(api_key=custom_key, base_url=custom_base)
-                        return (_to_async_client(client, final_model) if async_mode
+                        # Fallback went OpenAI-wire after all — redo the query
+                        # extraction against the rewritten /v1 URL.
+                        _fallback_base = _to_openai_base_url(custom_base)
+                        _fb_clean, _fb_dq = _extract_url_query_params(_fallback_base)
+                        _fb_extra = {"default_query": _fb_dq} if _fb_dq else {}
+                        client = OpenAI(api_key=custom_key, base_url=_fb_clean, **_fb_extra)
+                        return (_to_async_client(client, final_model, is_vision=is_vision) if async_mode
                                else (client, final_model))
                    sync_anthropic = AnthropicAuxiliaryClient(
                        real_client, final_model, custom_key, custom_base, is_oauth=False,
@@ -1893,7 +2183,7 @@ def resolve_provider_client(
                    if async_mode:
                        return AsyncAnthropicAuxiliaryClient(sync_anthropic), final_model
                    return sync_anthropic, final_model
-                client = OpenAI(api_key=custom_key, base_url=custom_base)
+                client = OpenAI(api_key=custom_key, base_url=_clean_base2, **_extra2)
                # codex_responses or inherited auto-detect (via _wrap_if_needed).
                # _wrap_if_needed reads the closed-over `api_mode` (the task-level
                # override). Named-provider entry api_mode=codex_responses also
@@ -1903,8 +2193,8 @@ def resolve_provider_client(
                ):
                    client = CodexAuxiliaryClient(client, final_model)
                else:
-                    client = _wrap_if_needed(client, final_model, custom_base)
-                return (_to_async_client(client, final_model) if async_mode
+                    client = _wrap_if_needed(client, final_model, openai_base, custom_key)
+                return (_to_async_client(client, final_model, is_vision=is_vision) if async_mode
                        else (client, final_model))
            logger.warning(
                "resolve_provider_client: named custom provider %r has no base_url",
@@ -1936,7 +2226,7 @@ def resolve_provider_client(
                logger.warning("resolve_provider_client: anthropic requested but no Anthropic credentials found")
                return None, None
            final_model = _normalize_resolved_model(model or default_model, provider)
-            return (_to_async_client(client, final_model) if async_mode else (client, final_model))
+            return (_to_async_client(client, final_model, is_vision=is_vision) if async_mode else (client, final_model))

        creds = resolve_api_key_provider_credentials(provider)
        api_key = str(creds.get("api_key", "")).strip()
@@ -1962,7 +2252,7 @@ def resolve_provider_client(
            if is_native_gemini_base_url(base_url):
                client = GeminiNativeClient(api_key=api_key, base_url=base_url)
                logger.debug("resolve_provider_client: %s (%s)", provider, final_model)
-                return (_to_async_client(client, final_model) if async_mode
+                return (_to_async_client(client, final_model, is_vision=is_vision) if async_mode
                        else (client, final_model))

        # Provider-specific headers
@@ -1970,9 +2260,11 @@ def resolve_provider_client(
        if base_url_host_matches(base_url, "api.kimi.com"):
            headers["User-Agent"] = "claude-code/0.1.0"
        elif base_url_host_matches(base_url, "api.githubcopilot.com"):
-            from hermes_cli.models import copilot_default_headers
+            from hermes_cli.copilot_auth import copilot_request_headers

-            headers.update(copilot_default_headers())
+            headers.update(copilot_request_headers(
+                is_agent_turn=True, is_vision=is_vision
+            ))
        client = OpenAI(api_key=api_key, base_url=base_url,
                        **({"default_headers": headers} if headers else {}))

@@ -1994,16 +2286,24 @@ def resolve_provider_client(

        # Honor api_mode for any API-key provider (e.g. direct OpenAI with
        # codex-family models).  The copilot-specific wrapping above handles
-        # copilot; this covers the general case (#6800).
-        client = _wrap_if_needed(client, final_model, base_url)
+        # copilot; this covers the general case (#6800).  Also rewraps
+        # Anthropic-wire endpoints (Kimi Coding Plan api.kimi.com/coding,
+        # /anthropic-suffixed gateways) so named providers like kimi-coding
+        # land on the right transport without needing per-provider branches.
+        client = _wrap_if_needed(client, final_model, base_url, api_key)

        logger.debug("resolve_provider_client: %s (%s)", provider, final_model)
-        return (_to_async_client(client, final_model) if async_mode
+        return (_to_async_client(client, final_model, is_vision=is_vision) if async_mode
                else (client, final_model))

    if pconfig.auth_type == "external_process":
        creds = resolve_external_process_provider_credentials(provider)
-        final_model = _normalize_resolved_model(model or _read_main_model(), provider)
+        final_model = _normalize_resolved_model(
+            model
+            or (main_runtime.get("model") if main_runtime else None)
+            or _read_main_model(),
+            provider,
+        )
        if provider == "copilot-acp":
            api_key = str(creds.get("api_key", "")).strip()
            base_url = str(creds.get("base_url", "")).strip()
@@ -2030,7 +2330,7 @@ def resolve_provider_client(
                args=args,
            )
            logger.debug("resolve_provider_client: %s (%s)", provider, final_model)
-            return (_to_async_client(client, final_model) if async_mode
+            return (_to_async_client(client, final_model, is_vision=is_vision) if async_mode
                    else (client, final_model))
        logger.warning("resolve_provider_client: external-process provider %s not "
                       "directly supported", provider)
@@ -2066,7 +2366,7 @@ def resolve_provider_client(
            base_url=f"https://bedrock-runtime.{region}.amazonaws.com",
        )
        logger.debug("resolve_provider_client: bedrock (%s, %s)", final_model, region)
-        return (_to_async_client(client, final_model) if async_mode
+        return (_to_async_client(client, final_model, is_vision=is_vision) if async_mode
                else (client, final_model))

    elif pconfig.auth_type in ("oauth_device_code", "oauth_external"):
@@ -2141,8 +2441,13 @@ def _normalize_vision_provider(provider: Optional[str]) -> str:
    return _normalize_aux_provider(provider)


-def _resolve_strict_vision_backend(provider: str) -> Tuple[Optional[Any], Optional[str]]:
+def _resolve_strict_vision_backend(
+    provider: str,
+    model: Optional[str] = None,
+) -> Tuple[Optional[Any], Optional[str]]:
    provider = _normalize_vision_provider(provider)
+    if provider == "copilot":
+        return resolve_provider_client("copilot", model, is_vision=True)
    if provider == "openrouter":
        return _try_openrouter()
    if provider == "nous":
@@ -2210,7 +2515,7 @@ def resolve_vision_provider_client(
            return resolved_provider, None, None
        final_model = resolved_model or default_model
        if async_mode:
-            async_client, async_model = _to_async_client(sync_client, final_model)
+            async_client, async_model = _to_async_client(sync_client, final_model, is_vision=True)
            return resolved_provider, async_client, async_model
        return resolved_provider, sync_client, final_model

@@ -2242,19 +2547,35 @@ def resolve_vision_provider_client(
        main_provider = _read_main_provider()
        main_model = _read_main_model()
        if main_provider and main_provider not in ("auto", ""):
+            vision_model = _PROVIDER_VISION_MODELS.get(main_provider, main_model)
            if main_provider == "nous":
-                sync_client, default_model = _resolve_strict_vision_backend(main_provider)
+                sync_client, default_model = _resolve_strict_vision_backend(
+                    main_provider, vision_model
+                )
                if sync_client is not None:
                    logger.info(
                        "Vision auto-detect: using main provider %s (%s)",
                        main_provider, default_model or resolved_model or main_model,
                    )
                    return _finalize(main_provider, sync_client, default_model)
+            elif main_provider in _PROVIDERS_WITHOUT_VISION:
+                # Kimi Coding Plan's /coding endpoint (Anthropic Messages wire)
+                # does not accept image input — Kimi's own docs say "Current
+                # model does not support image input, switch to a model with
+                # image_in capability" and vision lives on the separate Kimi
+                # Platform (api.moonshot.ai). Skip the main provider and fall
+                # through to the aggregator chain instead of returning a
+                # client that will 404 on every vision request (#17076).
+                logger.debug(
+                    "Vision auto-detect: skipping main provider %s (no "
+                    "vision support) — falling through to aggregator chain",
+                    main_provider,
+                )
            else:
-                vision_model = _PROVIDER_VISION_MODELS.get(main_provider, main_model)
                rpc_client, rpc_model = resolve_provider_client(
                    main_provider, vision_model,
-                    api_mode=resolved_api_mode)
+                    api_mode=resolved_api_mode,
+                    is_vision=True)
                if rpc_client is not None:
                    logger.info(
                        "Vision auto-detect: using main provider %s (%s)",
@@ -2276,11 +2597,14 @@ def resolve_vision_provider_client(
        return None, None, None

    if requested in _VISION_AUTO_PROVIDER_ORDER:
-        sync_client, default_model = _resolve_strict_vision_backend(requested)
+        sync_client, default_model = _resolve_strict_vision_backend(
+            requested, resolved_model
+        )
        return _finalize(requested, sync_client, default_model)

    client, final_model = _get_cached_client(requested, resolved_model, async_mode,
-                                             api_mode=resolved_api_mode)
+                                             api_mode=resolved_api_mode,
+                                             is_vision=True)
    if client is None:
        return requested, None, None
    return requested, client, final_model
@@ -2344,10 +2668,11 @@ def _client_cache_key(
    api_key: Optional[str] = None,
    api_mode: Optional[str] = None,
    main_runtime: Optional[Dict[str, Any]] = None,
+    is_vision: bool = False,
 ) -> tuple:
    runtime = _normalize_main_runtime(main_runtime)
    runtime_key = tuple(runtime.get(field, "") for field in _MAIN_RUNTIME_FIELDS) if provider == "auto" else ()
-    return (provider, async_mode, base_url or "", api_key or "", api_mode or "", runtime_key)
+    return (provider, async_mode, base_url or "", api_key or "", api_mode or "", runtime_key, is_vision)


 def _store_cached_client(cache_key: tuple, client: Any, default_model: Optional[str], *, bound_loop: Any = None) -> None:
@@ -2373,6 +2698,7 @@ def _refresh_nous_auxiliary_client(
    api_key: Optional[str] = None,
    api_mode: Optional[str] = None,
    main_runtime: Optional[Dict[str, Any]] = None,
+    is_vision: bool = False,
 ) -> Tuple[Optional[Any], Optional[str]]:
    """Refresh Nous runtime creds, rebuild the client, and replace the cache entry."""
    runtime = _resolve_nous_runtime_api(force_refresh=True)
@@ -2390,7 +2716,7 @@ def _refresh_nous_auxiliary_client(
            current_loop = _aio.get_event_loop()
        except RuntimeError:
            pass
-        client, final_model = _to_async_client(sync_client, final_model or "")
+        client, final_model = _to_async_client(sync_client, final_model or "", is_vision=is_vision)
    else:
        client = sync_client

@@ -2401,6 +2727,7 @@ def _refresh_nous_auxiliary_client(
        api_key=api_key,
        api_mode=api_mode,
        main_runtime=main_runtime,
+        is_vision=is_vision,
    )
    _store_cached_client(cache_key, client, final_model, bound_loop=current_loop)
    return client, final_model
@@ -2512,12 +2839,19 @@ def _is_openrouter_client(client: Any) -> bool:
    return False


+def _cached_client_accepts_slash_models(client: Any, cached_default: Optional[str]) -> bool:
+    """Best-effort check for cached clients that accept ``vendor/model`` IDs."""
+    if _is_openrouter_client(client):
+        return True
+    return bool(cached_default and "/" in cached_default)
+
+
 def _compat_model(client: Any, model: Optional[str], cached_default: Optional[str]) -> Optional[str]:
-    """Drop OpenRouter-format model slugs (with '/') for non-OpenRouter clients.
+    """Keep slash-bearing model IDs only for cached clients that support them.

    Mirrors the guard in resolve_provider_client() which is skipped on cache hits.
    """
-    if model and "/" in model and not _is_openrouter_client(client):
+    if model and "/" in model and not _cached_client_accepts_slash_models(client, cached_default):
        return cached_default
    return model or cached_default

@@ -2530,6 +2864,7 @@ def _get_cached_client(
    api_key: str = None,
    api_mode: str = None,
    main_runtime: Optional[Dict[str, Any]] = None,
+    is_vision: bool = False,
 ) -> Tuple[Optional[Any], Optional[str]]:
    """Get or create a cached client for the given provider.

@@ -2566,6 +2901,7 @@ def _get_cached_client(
        api_key=api_key,
        api_mode=api_mode,
        main_runtime=main_runtime,
+        is_vision=is_vision,
    )
    with _client_cache_lock:
        if cache_key in _client_cache:
@@ -2597,6 +2933,7 @@ def _get_cached_client(
        explicit_api_key=api_key,
        api_mode=api_mode,
        main_runtime=runtime,
+        is_vision=is_vision,
    )
    if client is not None:
        # For async clients, remember which loop they were created on so we
@@ -2715,7 +3052,7 @@ def _get_task_extra_body(task: str) -> Dict[str, Any]:

 # Providers that use Anthropic-compatible endpoints (via OpenAI SDK wrapper).
 # Their image content blocks must use Anthropic format, not OpenAI format.
-_ANTHROPIC_COMPAT_PROVIDERS = frozenset({"minimax", "minimax-cn"})
+_ANTHROPIC_COMPAT_PROVIDERS = frozenset({"minimax", "minimax-oauth", "minimax-cn"})


 def _is_anthropic_compat_endpoint(provider: str, base_url: str) -> bool:
@@ -3060,6 +3397,7 @@ def call_llm(
                api_key=resolved_api_key,
                api_mode=resolved_api_mode,
                main_runtime=main_runtime,
+                is_vision=(task == "vision"),
            )
            if refreshed_client is not None:
                logger.info("Auxiliary %s: refreshed Nous runtime credentials after 401, retrying",
@@ -3350,6 +3688,7 @@ async def async_call_llm(
                base_url=resolved_base_url,
                api_key=resolved_api_key,
                api_mode=resolved_api_mode,
+                is_vision=(task == "vision"),
            )
            if refreshed_client is not None:
                logger.info("Auxiliary %s (async): refreshed Nous runtime credentials after 401, retrying",
@@ -3418,7 +3757,9 @@ async def async_call_llm(
                    extra_body=effective_extra_body,
                    base_url=str(getattr(fb_client, "base_url", "") or ""))
                # Convert sync fallback client to async
-                async_fb, async_fb_model = _to_async_client(fb_client, fb_model or "")
+                async_fb, async_fb_model = _to_async_client(
+                    fb_client, fb_model or "", is_vision=(task == "vision")
+                )
                if async_fb_model and async_fb_model != fb_kwargs.get("model"):
                    fb_kwargs["model"] = async_fb_model
                return _validate_llm_response(
@@ -291,14 +291,52 @@ def has_aws_credentials(env: Optional[Dict[str, str]] = None) -> bool:
 def resolve_bedrock_region(env: Optional[Dict[str, str]] = None) -> str:
    """Resolve the AWS region for Bedrock API calls.

-    Priority: AWS_REGION → AWS_DEFAULT_REGION → us-east-1 (fallback).
+    Priority:
+      1. AWS_REGION env var
+      2. AWS_DEFAULT_REGION env var
+      3. boto3/botocore configured region (from ~/.aws/config or SSO profile)
+      4. us-east-1 (hard fallback)
+
+    The boto3 fallback is critical for EU/AP users who configure their region
+    in ~/.aws/config via a named profile rather than env vars — without it,
+    live model discovery would always return us.* profile IDs regardless of
+    the user's actual region.
    """
    env = env if env is not None else os.environ
-    return (
+    explicit = (
        env.get("AWS_REGION", "").strip()
        or env.get("AWS_DEFAULT_REGION", "").strip()
-        or "us-east-1"
    )
+    if explicit:
+        return explicit
+    try:
+        import botocore.session
+        region = botocore.session.get_session().get_config_variable("region")
+        if region:
+            return region
+    except Exception:
+        pass
+    return "us-east-1"
+
+
+def bedrock_model_ids_or_none() -> Optional[List[str]]:
+    """Live-discover Bedrock model IDs for the active region.
+
+    Returns a list of model ID strings if discovery succeeds and yields
+    at least one model, or ``None`` on failure / empty result.  Callers
+    should fall back to the static curated list when ``None`` is returned.
+
+    This helper consolidates the discover → extract-ids → fallback
+    pattern that was previously duplicated across ``provider_model_ids``,
+    ``list_authenticated_providers`` section 2, and section 3.
+    """
+    try:
+        discovered = discover_bedrock_models(resolve_bedrock_region())
+        if discovered:
+            return [m["id"] for m in discovered]
+    except Exception:
+        pass
+    return None


 # ---------------------------------------------------------------------------
@@ -227,6 +227,23 @@ def _responses_tools(tools: Optional[List[Dict[str, Any]]] = None) -> Optional[L
 # Message format conversion
 # ---------------------------------------------------------------------------

+_RESPONSE_MESSAGE_STATUSES = {"completed", "incomplete", "in_progress"}
+
+
+def _normalize_responses_message_status(value: Any, *, default: str = "completed") -> str:
+    """Normalize a Responses assistant message status for replay.
+
+    The API accepts completed/incomplete/in_progress on replayed assistant
+    output messages.  Preserve those exactly (modulo case/hyphen spelling) so
+    incomplete Codex continuation turns don't get falsely marked completed.
+    """
+    if isinstance(value, str):
+        status = value.strip().lower().replace("-", "_").replace(" ", "_")
+        if status in _RESPONSE_MESSAGE_STATUSES:
+            return status
+    return default
+
+
 def _chat_messages_to_responses_input(messages: List[Dict[str, Any]]) -> List[Dict[str, Any]]:
    """Convert internal chat-style messages to Responses input items."""
    items: List[Dict[str, Any]] = []
@@ -272,7 +289,57 @@ def _chat_messages_to_responses_input(messages: List[Dict[str, Any]]) -> List[Di
                                seen_item_ids.add(item_id)
                            has_codex_reasoning = True

-                if content_parts:
+                # Replay exact assistant message items (with id/phase) from
+                # previous turns so the API can maintain prefix-cache hits.
+                # OpenAI docs: "preserve and resend phase on all assistant
+                # messages — dropping it can degrade performance."
+                codex_message_items = msg.get("codex_message_items")
+                replayed_message_items = 0
+                if isinstance(codex_message_items, list):
+                    for raw_item in codex_message_items:
+                        if not isinstance(raw_item, dict):
+                            continue
+                        if raw_item.get("type") != "message" or raw_item.get("role") != "assistant":
+                            continue
+                        raw_content_parts = raw_item.get("content")
+                        if not isinstance(raw_content_parts, list):
+                            continue
+
+                        normalized_content_parts = []
+                        for part in raw_content_parts:
+                            if not isinstance(part, dict):
+                                continue
+                            part_type = str(part.get("type") or "").strip()
+                            if part_type not in {"output_text", "text"}:
+                                continue
+                            text = part.get("text", "")
+                            if text is None:
+                                text = ""
+                            if not isinstance(text, str):
+                                text = str(text)
+                            normalized_content_parts.append({"type": "output_text", "text": text})
+
+                        if not normalized_content_parts:
+                            continue
+
+                        replay_item = {
+                            "type": "message",
+                            "role": "assistant",
+                            "status": _normalize_responses_message_status(raw_item.get("status")),
+                            "content": normalized_content_parts,
+                        }
+                        item_id = raw_item.get("id")
+                        if isinstance(item_id, str) and item_id.strip():
+                            replay_item["id"] = item_id.strip()
+                        phase = raw_item.get("phase")
+                        if isinstance(phase, str) and phase.strip():
+                            replay_item["phase"] = phase.strip()
+                        items.append(replay_item)
+                        replayed_message_items += 1
+
+                if replayed_message_items > 0:
+                    pass
+                elif content_parts:
                    items.append({"role": "assistant", "content": content_parts})
                elif content_text.strip():
                    items.append({"role": "assistant", "content": content_text})
@@ -432,6 +499,47 @@ def _preflight_codex_input_items(raw_items: Any) -> List[Dict[str, Any]]:
                normalized.append(reasoning_item)
            continue

+        if item_type == "message":
+            role = item.get("role")
+            if role != "assistant":
+                raise ValueError(f"Codex Responses input[{idx}] message items must have role='assistant'.")
+            content = item.get("content")
+            if not isinstance(content, list):
+                raise ValueError(f"Codex Responses input[{idx}] message item must have content list.")
+            normalized_content = []
+            for part_idx, part in enumerate(content):
+                if not isinstance(part, dict):
+                    raise ValueError(
+                        f"Codex Responses input[{idx}] message content[{part_idx}] must be an object."
+                    )
+                part_type = part.get("type")
+                if part_type not in {"output_text", "text"}:
+                    raise ValueError(
+                        f"Codex Responses input[{idx}] message content[{part_idx}] has unsupported type {part_type!r}."
+                    )
+                text = part.get("text", "")
+                if text is None:
+                    text = ""
+                if not isinstance(text, str):
+                    text = str(text)
+                normalized_content.append({"type": "output_text", "text": text})
+            if not normalized_content:
+                raise ValueError(f"Codex Responses input[{idx}] message item must contain at least one text part.")
+            normalized_item: Dict[str, Any] = {
+                "type": "message",
+                "role": "assistant",
+                "status": _normalize_responses_message_status(item.get("status")),
+                "content": normalized_content,
+            }
+            item_id = item.get("id")
+            if isinstance(item_id, str) and item_id.strip():
+                normalized_item["id"] = item_id.strip()
+            phase = item.get("phase")
+            if isinstance(phase, str) and phase.strip():
+                normalized_item["phase"] = phase.strip()
+            normalized.append(normalized_item)
+            continue
+
        role = item.get("role")
        if role in {"user", "assistant"}:
            content = item.get("content", "")
@@ -716,6 +824,7 @@ def _normalize_codex_response(response: Any) -> tuple[Any, str]:
    content_parts: List[str] = []
    reasoning_parts: List[str] = []
    reasoning_items_raw: List[Dict[str, Any]] = []
+    message_items_raw: List[Dict[str, Any]] = []
    tool_calls: List[Any] = []
    has_incomplete_items = response_status in {"queued", "in_progress", "incomplete"}
    saw_commentary_phase = False
@@ -734,6 +843,7 @@ def _normalize_codex_response(response: Any) -> tuple[Any, str]:

        if item_type == "message":
            item_phase = getattr(item, "phase", None)
+            normalized_phase = None
            if isinstance(item_phase, str):
                normalized_phase = item_phase.strip().lower()
                if normalized_phase in {"commentary", "analysis"}:
@@ -743,6 +853,18 @@ def _normalize_codex_response(response: Any) -> tuple[Any, str]:
            message_text = _extract_responses_message_text(item)
            if message_text:
                content_parts.append(message_text)
+                raw_message_item: Dict[str, Any] = {
+                    "type": "message",
+                    "role": "assistant",
+                    "status": _normalize_responses_message_status(item_status),
+                    "content": [{"type": "output_text", "text": message_text}],
+                }
+                item_id = getattr(item, "id", None)
+                if isinstance(item_id, str) and item_id:
+                    raw_message_item["id"] = item_id
+                if normalized_phase:
+                    raw_message_item["phase"] = normalized_phase
+                message_items_raw.append(raw_message_item)
        elif item_type == "reasoning":
            reasoning_text = _extract_responses_reasoning_text(item)
            if reasoning_text:
@@ -855,6 +977,7 @@ def _normalize_codex_response(response: Any) -> tuple[Any, str]:
        reasoning_content=None,
        reasoning_details=None,
        codex_reasoning_items=reasoning_items_raw or None,
+        codex_message_items=message_items_raw or None,
    )

    if tool_calls:
@@ -61,9 +61,52 @@ _PRUNED_TOOL_PLACEHOLDER = "[Old tool output cleared to save context space]"

 # Chars per token rough estimate
 _CHARS_PER_TOKEN = 4
+# Flat token cost per attached image part.  Real cost varies by provider and
+# dimensions (Anthropic ≈ width×height/750, GPT-4o up to ~1700 for
+# high-detail 2048×2048, Gemini 258/tile), but 1600 is a realistic ceiling
+# that keeps compression budgeting honest for multi-image conversations.
+# Matches Claude Code's IMAGE_TOKEN_ESTIMATE constant.
+_IMAGE_TOKEN_ESTIMATE = 1600
+# Same figure expressed in the char-budget currency the rest of the
+# compressor speaks in.  Used when accumulating message "content length"
+# for tail-cut decisions.
+_IMAGE_CHAR_EQUIVALENT = _IMAGE_TOKEN_ESTIMATE * _CHARS_PER_TOKEN
 _SUMMARY_FAILURE_COOLDOWN_SECONDS = 600


+def _content_length_for_budget(raw_content: Any) -> int:
+    """Return the effective char-length of a message's content for token budgeting.
+
+    Plain strings: ``len(content)``. Multimodal lists: sum of text-part
+    ``len(text)`` plus a flat ``_IMAGE_CHAR_EQUIVALENT`` per image part
+    (``image_url`` / ``input_image`` / Anthropic-style ``image``). This
+    keeps the compressor from treating a turn with 5 attached images as
+    near-zero tokens just because the text part is empty.
+    """
+    if isinstance(raw_content, str):
+        return len(raw_content)
+    if not isinstance(raw_content, list):
+        return len(str(raw_content or ""))
+
+    total = 0
+    for p in raw_content:
+        if isinstance(p, str):
+            total += len(p)
+            continue
+        if not isinstance(p, dict):
+            total += len(str(p))
+            continue
+        ptype = p.get("type")
+        if ptype in {"image_url", "input_image", "image"}:
+            total += _IMAGE_CHAR_EQUIVALENT
+        else:
+            # text / input_text / tool_result-with-text / anything else with
+            # a text field.  Ignore the raw base64 payload inside image_url
+            # dicts — dimensions don't matter, only whether it's an image.
+            total += len(p.get("text", "") or "")
+    return total
+
+
 def _content_text_for_contains(content: Any) -> str:
    """Return a best-effort text view of message content.

@@ -295,6 +338,10 @@ class ContextCompressor(ContextEngine):
        self._context_probe_persistable = False
        self._previous_summary = None
        self._last_summary_error = None
+        self._last_summary_dropped_count = 0
+        self._last_summary_fallback_used = False
+        self._last_aux_model_failure_error = None
+        self._last_aux_model_failure_model = None
        self._last_compression_savings_pct = 100.0
        self._ineffective_compression_count = 0

@@ -398,6 +445,17 @@ class ContextCompressor(ContextEngine):
        self._ineffective_compression_count: int = 0
        self._summary_failure_cooldown_until: float = 0.0
        self._last_summary_error: Optional[str] = None
+        # When summary generation fails and a static fallback is inserted,
+        # record how many turns were unrecoverably dropped so callers
+        # (gateway hygiene, /compress) can surface a visible warning.
+        self._last_summary_dropped_count: int = 0
+        self._last_summary_fallback_used: bool = False
+        # When a user-configured summary model fails and we recover by
+        # retrying on the main model, record the failure so gateway /
+        # CLI callers can still warn the user even though compression
+        # succeeded.  Silent recovery would hide the broken config.
+        self._last_aux_model_failure_error: Optional[str] = None
+        self._last_aux_model_failure_model: Optional[str] = None

    def update_from_response(self, usage: Dict[str, Any]):
        """Update tracked token usage from API response."""
@@ -484,7 +542,7 @@ class ContextCompressor(ContextEngine):
            for i in range(len(result) - 1, -1, -1):
                msg = result[i]
                raw_content = msg.get("content") or ""
-                content_len = sum(len(p.get("text", "")) for p in raw_content) if isinstance(raw_content, list) else len(raw_content)
+                content_len = _content_length_for_budget(raw_content)
                msg_tokens = content_len // _CHARS_PER_TOKEN + 10
                for tc in msg.get("tool_calls") or []:
                    if isinstance(tc, dict):
@@ -857,10 +915,50 @@ The user has requested that this compaction PRIORITISE preserving all informatio
                    "Falling back to main model '%s' for compression.",
                    self.summary_model, e, self.model,
                )
+                # Record the aux-model failure so callers can warn the user
+                # even if the retry-on-main succeeds — a misconfigured aux
+                # model is something the user needs to fix.
+                _err_text = str(e).strip() or e.__class__.__name__
+                if len(_err_text) > 220:
+                    _err_text = _err_text[:217].rstrip() + "..."
+                self._last_aux_model_failure_error = _err_text
+                self._last_aux_model_failure_model = self.summary_model
                self.summary_model = ""  # empty = use main model
                self._summary_failure_cooldown_until = 0.0  # no cooldown
                return self._generate_summary(turns_to_summarize, focus_topic=focus_topic)  # retry immediately

+            # Unknown-error best-effort retry on main model.  Losing N turns of
+            # context is almost always worse than one extra summary attempt, so
+            # if we haven't already fallen back and the summary model differs
+            # from the main model, try once more on main before entering
+            # cooldown.  Errors that DID match _is_model_not_found above are
+            # already handled by the fast-path retry; this branch catches
+            # everything else (400s, provider-specific "no route" strings,
+            # aggregator rejections, etc.) where auto-retry is still safer
+            # than dropping the turns.
+            if (
+                self.summary_model
+                and self.summary_model != self.model
+                and not getattr(self, "_summary_model_fallen_back", False)
+            ):
+                self._summary_model_fallen_back = True
+                logging.warning(
+                    "Summary model '%s' failed (%s). "
+                    "Retrying on main model '%s' before giving up.",
+                    self.summary_model, e, self.model,
+                )
+                # Record the aux-model failure (see 404 branch above) — user
+                # should know their configured model is broken even if main
+                # recovers the call.
+                _err_text = str(e).strip() or e.__class__.__name__
+                if len(_err_text) > 220:
+                    _err_text = _err_text[:217].rstrip() + "..."
+                self._last_aux_model_failure_error = _err_text
+                self._last_aux_model_failure_model = self.summary_model
+                self.summary_model = ""  # empty = use main model
+                self._summary_failure_cooldown_until = 0.0
+                return self._generate_summary(turns_to_summarize, focus_topic=focus_topic)
+
            # Transient errors (timeout, rate limit, network) — shorter cooldown
            _transient_cooldown = 60
            self._summary_failure_cooldown_until = time.monotonic() + _transient_cooldown
@@ -1082,8 +1180,9 @@ The user has requested that this compaction PRIORITISE preserving all informatio

        for i in range(n - 1, head_end - 1, -1):
            msg = messages[i]
-            content = msg.get("content") or ""
-            msg_tokens = len(content) // _CHARS_PER_TOKEN + 10  # +10 for role/metadata
+            raw_content = msg.get("content") or ""
+            content_len = _content_length_for_budget(raw_content)
+            msg_tokens = content_len // _CHARS_PER_TOKEN + 10  # +10 for role/metadata
            # Include tool call arguments in estimate
            for tc in msg.get("tool_calls") or []:
                if isinstance(tc, dict):
@@ -1152,6 +1251,13 @@ The user has requested that this compaction PRIORITISE preserving all informatio
                related to this topic and be more aggressive about compressing
                everything else.  Inspired by Claude Code's ``/compact``.
        """
+        # Reset per-call summary failure state — callers inspect these fields
+        # after compress() returns to decide whether to surface a warning.
+        self._last_summary_dropped_count = 0
+        self._last_summary_fallback_used = False
+        self._last_summary_error = None
+        self._last_aux_model_failure_error = None
+        self._last_aux_model_failure_model = None
        n_messages = len(messages)
        # Only need head + 3 tail messages minimum (token budget decides the real tail size)
        _min_for_compress = self.protect_first_n + 3 + 1
@@ -1230,11 +1336,13 @@ The user has requested that this compaction PRIORITISE preserving all informatio
            if not self.quiet_mode:
                logger.warning("Summary generation failed — inserting static fallback context marker")
            n_dropped = compress_end - compress_start
+            self._last_summary_dropped_count = n_dropped
+            self._last_summary_fallback_used = True
            summary = (
                f"{SUMMARY_PREFIX}\n"
-                f"Summary generation was unavailable. {n_dropped} conversation turns were "
+                f"Summary generation was unavailable. {n_dropped} message(s) were "
                f"removed to free context space but could not be summarized. The removed "
-                f"turns contained earlier work in this session. Continue based on the "
+                f"messages contained earlier work in this session. Continue based on the "
                f"recent messages below and the current state of any files or resources."
            )

@@ -7,13 +7,13 @@ import random
 import threading
 import time
 import uuid
-import os
 import re
 from dataclasses import dataclass, fields, replace
 from datetime import datetime
 from typing import Any, Dict, List, Optional, Set, Tuple

 from hermes_constants import OPENROUTER_BASE_URL
+from hermes_cli.config import get_env_value
 import hermes_cli.auth as auth_mod
 from hermes_cli.auth import (
    CODEX_ACCESS_TOKEN_REFRESH_SKEW_SECONDS,
@@ -455,6 +455,70 @@ class CredentialPool:
            logger.debug("Failed to sync from credentials file: %s", exc)
        return entry

+    def _sync_codex_entry_from_auth_store(self, entry: PooledCredential) -> PooledCredential:
+        """Sync a Codex device_code pool entry from auth.json if tokens differ.
+
+        When a Codex OAuth access token expires (or the ChatGPT account hits
+        its 5h/weekly quota), the pool entry gets marked ``STATUS_EXHAUSTED``
+        with a ``last_error_reset_at`` that can be many hours in the future.
+        Meanwhile the user may run ``hermes model`` / ``hermes auth`` which
+        performs a fresh device-code login and writes new tokens to
+        ``auth.json`` under ``_auth_store_lock``.  Without this sync the pool
+        entry stays frozen until ``last_error_reset_at`` elapses — even
+        though fresh credentials are sitting on disk — and every request
+        fails with "no available entries (all exhausted or empty)".
+
+        Mirrors the Nous/Anthropic resync paths above.  Only applies to
+        device_code-sourced entries; env/API-key-sourced entries have no
+        auth.json shadow to sync from.
+        """
+        if self.provider != "openai-codex" or entry.source != "device_code":
+            return entry
+        try:
+            with _auth_store_lock():
+                auth_store = _load_auth_store()
+                state = _load_provider_state(auth_store, "openai-codex")
+            if not isinstance(state, dict):
+                return entry
+            tokens = state.get("tokens")
+            if not isinstance(tokens, dict):
+                return entry
+            store_access = tokens.get("access_token", "")
+            store_refresh = tokens.get("refresh_token", "")
+            # Adopt auth.json tokens when either side differs.  Codex refresh
+            # tokens are single-use too, so a fresh refresh_token from
+            # another process means our entry's pair is consumed/stale.
+            entry_access = entry.access_token or ""
+            entry_refresh = entry.refresh_token or ""
+            if store_access and (
+                store_access != entry_access
+                or (store_refresh and store_refresh != entry_refresh)
+            ):
+                logger.debug(
+                    "Pool entry %s: syncing Codex tokens from auth.json "
+                    "(refreshed by another process)",
+                    entry.id,
+                )
+                field_updates: Dict[str, Any] = {
+                    "access_token": store_access,
+                    "refresh_token": store_refresh or entry.refresh_token,
+                    "last_status": None,
+                    "last_status_at": None,
+                    "last_error_code": None,
+                    "last_error_reason": None,
+                    "last_error_message": None,
+                    "last_error_reset_at": None,
+                }
+                if state.get("last_refresh"):
+                    field_updates["last_refresh"] = state["last_refresh"]
+                updated = replace(entry, **field_updates)
+                self._replace_entry(entry, updated)
+                self._persist()
+                return updated
+        except Exception as exc:
+            logger.debug("Failed to sync Codex entry from auth.json: %s", exc)
+        return entry
+
    def _sync_nous_entry_from_auth_store(self, entry: PooledCredential) -> PooledCredential:
        """Sync a Nous pool entry from auth.json if tokens differ.

@@ -787,6 +851,18 @@ class CredentialPool:
                if synced is not entry:
                    entry = synced
                    cleared_any = True
+            # For openai-codex entries, same pattern: the user may have
+            # re-authed via `hermes model` / `hermes auth` after a 429/401,
+            # leaving fresh tokens on disk while the pool entry is still
+            # frozen behind last_error_reset_at (can be hours in the
+            # future for ChatGPT weekly windows).
+            if (self.provider == "openai-codex"
+                    and entry.source == "device_code"
+                    and entry.last_status == STATUS_EXHAUSTED):
+                synced = self._sync_codex_entry_from_auth_store(entry)
+                if synced is not entry:
+                    entry = synced
+                    cleared_any = True
            if entry.last_status == STATUS_EXHAUSTED:
                exhausted_until = _exhausted_until(entry)
                if exhausted_until is not None and now < exhausted_until:
@@ -1223,6 +1299,48 @@ def _seed_from_singletons(provider: str, entries: List[PooledCredential]) -> Tup
        except Exception as exc:
            logger.debug("Qwen OAuth token seed failed: %s", exc)

+    elif provider == "minimax-oauth":
+        # MiniMax OAuth tokens live in ~/.hermes/auth.json providers.minimax-oauth.
+        # Seed the pool so `/auth list` reflects the logged-in state and the
+        # standard `hermes auth remove minimax-oauth <N>` flow works.
+        # Use refresh_if_expiring=False equivalent: resolve_minimax_oauth_runtime_credentials
+        # always refreshes on expiry, so instead read raw state here to avoid
+        # surprise network calls during provider discovery.
+        try:
+            from hermes_cli.auth import get_provider_auth_state
+            state = get_provider_auth_state("minimax-oauth")
+            if state and state.get("access_token"):
+                source_name = "oauth"
+                if not _is_suppressed(provider, source_name):
+                    active_sources.add(source_name)
+                    expires_at_ms = None
+                    try:
+                        from datetime import datetime as _dt
+                        raw = state.get("expires_at", "")
+                        if raw:
+                            expires_at_ms = int(_dt.fromisoformat(raw).timestamp() * 1000)
+                    except Exception:
+                        expires_at_ms = None
+                    base_url = str(state.get("inference_base_url", "") or "").rstrip("/")
+                    changed |= _upsert_entry(
+                        entries,
+                        provider,
+                        source_name,
+                        {
+                            "source": source_name,
+                            "auth_type": AUTH_TYPE_OAUTH,
+                            "access_token": state["access_token"],
+                            "refresh_token": state.get("refresh_token"),
+                            "expires_at_ms": expires_at_ms,
+                            "base_url": base_url,
+                            "label": state.get("label", "") or label_from_token(
+                                state.get("access_token", ""), source_name
+                            ),
+                        },
+                    )
+        except Exception as exc:
+            logger.debug("MiniMax OAuth token seed failed: %s", exc)
+
    elif provider == "openai-codex":
        # Respect user suppression — `hermes auth remove openai-codex` marks
        # the device_code source as suppressed so it won't be re-seeded from
@@ -1273,7 +1391,8 @@ def _seed_from_env(provider: str, entries: List[PooledCredential]) -> Tuple[bool
        def _is_source_suppressed(_p, _s):  # type: ignore[misc]
            return False
    if provider == "openrouter":
-        token = os.getenv("OPENROUTER_API_KEY", "").strip()
+        # Check both os.environ and ~/.hermes/.env file
+        token = (get_env_value("OPENROUTER_API_KEY") or "").strip()
        if token:
            source = "env:OPENROUTER_API_KEY"
            if _is_source_suppressed(provider, source):
@@ -1299,7 +1418,7 @@ def _seed_from_env(provider: str, entries: List[PooledCredential]) -> Tuple[bool

    env_url = ""
    if pconfig.base_url_env_var:
-        env_url = os.getenv(pconfig.base_url_env_var, "").strip().rstrip("/")
+        env_url = (get_env_value(pconfig.base_url_env_var) or "").strip().rstrip("/")

    env_vars = list(pconfig.api_key_env_vars)
    if provider == "anthropic":
@@ -1310,7 +1429,8 @@ def _seed_from_env(provider: str, entries: List[PooledCredential]) -> Tuple[bool
        ]

    for env_var in env_vars:
-        token = os.getenv(env_var, "").strip()
+        # Check both os.environ and ~/.hermes/.env file
+        token = (get_env_value(env_var) or "").strip()
        if not token:
            continue
        source = f"env:{env_var}"
@@ -47,7 +47,6 @@ from __future__ import annotations

 import os
 from dataclasses import dataclass, field
-from pathlib import Path
 from typing import Callable, List, Optional


@@ -253,6 +252,19 @@ def _remove_nous_device_code(provider: str, removed) -> RemovalResult:
    return result


+def _remove_minimax_oauth(provider: str, removed) -> RemovalResult:
+    """MiniMax OAuth lives in auth.json providers.minimax-oauth — clear it.
+
+    Same pattern as Nous: single-source OAuth state with refresh tokens.
+    Suppression of the `oauth` source ensures the pool reseed path
+    (_seed_from_singletons) doesn't instantly undo the removal.
+    """
+    result = RemovalResult()
+    if _clear_auth_store_provider(provider):
+        result.cleaned.append(f"Cleared {provider} OAuth tokens from auth store")
+    return result
+
+
 def _remove_codex_device_code(provider: str, removed) -> RemovalResult:
    """Codex tokens live in TWO places: our auth store AND ~/.codex/auth.json.

@@ -390,6 +402,11 @@ def _register_all_sources() -> None:
        remove_fn=_remove_qwen_cli,
        description="~/.qwen/oauth_creds.json",
    ))
+    register(RemovalStep(
+        provider="minimax-oauth", source_id="oauth",
+        remove_fn=_remove_minimax_oauth,
+        description="auth.json providers.minimax-oauth",
+    ))
    register(RemovalStep(
        provider="*", source_id="config:",
        match_fn=lambda src: src.startswith("config:") or src == "model_config",
@@ -0,0 +1,869 @@
+"""Curator — background skill maintenance orchestrator.
+
+The curator is an auxiliary-model task that periodically reviews agent-created
+skills and maintains the collection. It runs inactivity-triggered (no cron
+daemon): when the agent is idle and the last curator run was longer than
+``interval_hours`` ago, ``maybe_run_curator()`` spawns a forked AIAgent to do
+the review.
+
+Responsibilities:
+  - Auto-transition lifecycle states based on last_used_at timestamps
+  - Spawn a background review agent that can pin / archive / consolidate /
+    patch agent-created skills via skill_manage
+  - Persist curator state (last_run_at, paused, etc.) in .curator_state
+
+Strict invariants:
+  - Only touches agent-created skills (see tools/skill_usage.is_agent_created)
+  - Never auto-deletes — only archives. Archive is recoverable.
+  - Pinned skills bypass all auto-transitions
+  - Uses the auxiliary client; never touches the main session's prompt cache
+"""
+
+from __future__ import annotations
+
+import json
+import logging
+import os
+import tempfile
+import threading
+from datetime import datetime, timedelta, timezone
+from pathlib import Path
+from typing import Any, Callable, Dict, List, Optional, Set
+
+from hermes_constants import get_hermes_home
+from tools import skill_usage
+
+logger = logging.getLogger(__name__)
+
+
+DEFAULT_INTERVAL_HOURS = 24 * 7  # 7 days
+DEFAULT_MIN_IDLE_HOURS = 2
+DEFAULT_STALE_AFTER_DAYS = 30
+DEFAULT_ARCHIVE_AFTER_DAYS = 90
+
+
+# ---------------------------------------------------------------------------
+# .curator_state — persistent scheduler + status
+# ---------------------------------------------------------------------------
+
+def _state_file() -> Path:
+    return get_hermes_home() / "skills" / ".curator_state"
+
+
+def _default_state() -> Dict[str, Any]:
+    return {
+        "last_run_at": None,
+        "last_run_duration_seconds": None,
+        "last_run_summary": None,
+        "paused": False,
+        "run_count": 0,
+    }
+
+
+def load_state() -> Dict[str, Any]:
+    path = _state_file()
+    if not path.exists():
+        return _default_state()
+    try:
+        data = json.loads(path.read_text(encoding="utf-8"))
+        if isinstance(data, dict):
+            base = _default_state()
+            base.update({k: v for k, v in data.items() if k in base or k.startswith("_")})
+            return base
+    except (OSError, json.JSONDecodeError) as e:
+        logger.debug("Failed to read curator state: %s", e)
+    return _default_state()
+
+
+def save_state(data: Dict[str, Any]) -> None:
+    path = _state_file()
+    try:
+        path.parent.mkdir(parents=True, exist_ok=True)
+        fd, tmp = tempfile.mkstemp(dir=str(path.parent), prefix=".curator_state_", suffix=".tmp")
+        try:
+            with os.fdopen(fd, "w", encoding="utf-8") as f:
+                json.dump(data, f, indent=2, sort_keys=True, ensure_ascii=False)
+                f.flush()
+                os.fsync(f.fileno())
+            os.replace(tmp, path)
+        except BaseException:
+            try:
+                os.unlink(tmp)
+            except OSError:
+                pass
+            raise
+    except Exception as e:
+        logger.debug("Failed to save curator state: %s", e, exc_info=True)
+
+
+def set_paused(paused: bool) -> None:
+    state = load_state()
+    state["paused"] = bool(paused)
+    save_state(state)
+
+
+def is_paused() -> bool:
+    return bool(load_state().get("paused"))
+
+
+# ---------------------------------------------------------------------------
+# Config access
+# ---------------------------------------------------------------------------
+
+def _load_config() -> Dict[str, Any]:
+    """Read curator.* config from ~/.hermes/config.yaml. Tolerates missing file."""
+    try:
+        from hermes_cli.config import load_config
+        cfg = load_config()
+    except Exception as e:
+        logger.debug("Failed to load config for curator: %s", e)
+        return {}
+    if not isinstance(cfg, dict):
+        return {}
+    cur = cfg.get("curator") or {}
+    if not isinstance(cur, dict):
+        return {}
+    return cur
+
+
+def is_enabled() -> bool:
+    """Default ON when no config says otherwise."""
+    cfg = _load_config()
+    return bool(cfg.get("enabled", True))
+
+
+def get_interval_hours() -> int:
+    cfg = _load_config()
+    try:
+        return int(cfg.get("interval_hours", DEFAULT_INTERVAL_HOURS))
+    except (TypeError, ValueError):
+        return DEFAULT_INTERVAL_HOURS
+
+
+def get_min_idle_hours() -> float:
+    cfg = _load_config()
+    try:
+        return float(cfg.get("min_idle_hours", DEFAULT_MIN_IDLE_HOURS))
+    except (TypeError, ValueError):
+        return DEFAULT_MIN_IDLE_HOURS
+
+
+def get_stale_after_days() -> int:
+    cfg = _load_config()
+    try:
+        return int(cfg.get("stale_after_days", DEFAULT_STALE_AFTER_DAYS))
+    except (TypeError, ValueError):
+        return DEFAULT_STALE_AFTER_DAYS
+
+
+def get_archive_after_days() -> int:
+    cfg = _load_config()
+    try:
+        return int(cfg.get("archive_after_days", DEFAULT_ARCHIVE_AFTER_DAYS))
+    except (TypeError, ValueError):
+        return DEFAULT_ARCHIVE_AFTER_DAYS
+
+
+# ---------------------------------------------------------------------------
+# Idle / interval check
+# ---------------------------------------------------------------------------
+
+def _parse_iso(ts: Optional[str]) -> Optional[datetime]:
+    if not ts:
+        return None
+    try:
+        return datetime.fromisoformat(ts)
+    except (TypeError, ValueError):
+        return None
+
+
+def should_run_now(now: Optional[datetime] = None) -> bool:
+    """Return True if the curator should run immediately.
+
+    Gates:
+      - curator.enabled == True
+      - not paused
+      - last_run_at missing, OR older than interval_hours
+
+    The idle check (min_idle_hours) is applied at the call site where we know
+    whether an agent is actively running — here we only enforce the static
+    gates.
+    """
+    if not is_enabled():
+        return False
+    if is_paused():
+        return False
+
+    state = load_state()
+    last = _parse_iso(state.get("last_run_at"))
+    if last is None:
+        return True
+
+    if now is None:
+        now = datetime.now(timezone.utc)
+    if last.tzinfo is None:
+        last = last.replace(tzinfo=timezone.utc)
+    interval = timedelta(hours=get_interval_hours())
+    return (now - last) >= interval
+
+
+# ---------------------------------------------------------------------------
+# Automatic state transitions (pure function, no LLM)
+# ---------------------------------------------------------------------------
+
+def apply_automatic_transitions(now: Optional[datetime] = None) -> Dict[str, int]:
+    """Walk every agent-created skill and move active/stale/archived based on
+    last_used_at. Pinned skills are never touched. Returns a counter dict
+    describing what changed."""
+    from tools import skill_usage as _u
+
+    if now is None:
+        now = datetime.now(timezone.utc)
+    stale_cutoff = now - timedelta(days=get_stale_after_days())
+    archive_cutoff = now - timedelta(days=get_archive_after_days())
+
+    counts = {"marked_stale": 0, "archived": 0, "reactivated": 0, "checked": 0}
+
+    for row in _u.agent_created_report():
+        counts["checked"] += 1
+        name = row["name"]
+        if row.get("pinned"):
+            continue
+
+        last_used = _parse_iso(row.get("last_used_at"))
+        # If never used, treat as using created_at as the anchor so new skills
+        # don't immediately archive themselves.
+        anchor = last_used or _parse_iso(row.get("created_at")) or now
+        if anchor.tzinfo is None:
+            anchor = anchor.replace(tzinfo=timezone.utc)
+
+        current = row.get("state", _u.STATE_ACTIVE)
+
+        if anchor <= archive_cutoff and current != _u.STATE_ARCHIVED:
+            ok, _msg = _u.archive_skill(name)
+            if ok:
+                counts["archived"] += 1
+        elif anchor <= stale_cutoff and current == _u.STATE_ACTIVE:
+            _u.set_state(name, _u.STATE_STALE)
+            counts["marked_stale"] += 1
+        elif anchor > stale_cutoff and current == _u.STATE_STALE:
+            # Skill got used again after being marked stale — reactivate.
+            _u.set_state(name, _u.STATE_ACTIVE)
+            counts["reactivated"] += 1
+
+    return counts
+
+
+# ---------------------------------------------------------------------------
+# Review prompt for the forked agent
+# ---------------------------------------------------------------------------
+
+CURATOR_REVIEW_PROMPT = (
+    "You are running as Hermes' background skill CURATOR. This is an "
+    "UMBRELLA-BUILDING consolidation pass, not a passive audit and not a "
+    "duplicate-finder.\n\n"
+    "The goal of the skill collection is a LIBRARY OF CLASS-LEVEL "
+    "INSTRUCTIONS AND EXPERIENTIAL KNOWLEDGE. A collection of hundreds of "
+    "narrow skills where each one captures one session's specific bug is "
+    "a FAILURE of the library — not a feature. An agent searching skills "
+    "matches on descriptions, not on exact names; one broad umbrella "
+    "skill with labeled subsections beats five narrow siblings for "
+    "discoverability, not the other way around.\n\n"
+    "The right target shape is CLASS-LEVEL skills with rich SKILL.md "
+    "bodies + `references/`, `templates/`, and `scripts/` subfiles for "
+    "session-specific detail — not one-session-one-skill micro-entries.\n\n"
+    "Hard rules — do not violate:\n"
+    "1. DO NOT touch bundled or hub-installed skills. The candidate list "
+    "below is already filtered to agent-created skills only.\n"
+    "2. DO NOT delete any skill. Archiving (moving the skill's directory "
+    "into ~/.hermes/skills/.archive/) is the maximum destructive action. "
+    "Archives are recoverable; deletion is not.\n"
+    "3. DO NOT touch skills shown as pinned=yes. Skip them entirely.\n"
+    "4. DO NOT use usage counters as a reason to skip consolidation. The "
+    "counters are new and often mostly zero. Judge overlap on CONTENT, "
+    "not on use_count. 'use=0' is not evidence a skill is valuable; it's "
+    "absence of evidence either way.\n"
+    "5. DO NOT reject consolidation on the grounds that 'each skill has "
+    "a distinct trigger'. Pairwise distinctness is the wrong bar. The "
+    "right bar is: 'would a human maintainer write this as N separate "
+    "skills, or as one skill with N labeled subsections?' When the "
+    "answer is the latter, merge.\n\n"
+    "How to work — not optional:\n"
+    "1. Scan the full candidate list. Identify PREFIX CLUSTERS (skills "
+    "sharing a first word or domain keyword). Examples you are likely "
+    "to find: hermes-config-*, hermes-dashboard-*, gateway-*, codex-*, "
+    "ollama-*, anthropic-*, gemini-*, mcp-*, salvage-*, pr-*, "
+    "competitor-*, python-*, security-*, etc. Expect 10-25 clusters.\n"
+    "2. For each cluster with 2+ members, do NOT ask 'are these pairs "
+    "overlapping?' — ask 'what is the UMBRELLA CLASS these skills all "
+    "serve? Would a maintainer name that class and write one skill for "
+    "it?' If yes, pick (or create) the umbrella and absorb the siblings "
+    "into it.\n"
+    "3. Three ways to consolidate — use the right one per cluster:\n"
+    "   a. MERGE INTO EXISTING UMBRELLA — one skill in the cluster is "
+    "already broad enough to be the umbrella (example: `pr-triage-"
+    "salvage` for the PR review cluster). Patch it to add a labeled "
+    "section for each sibling's unique insight, then archive the "
+    "siblings.\n"
+    "   b. CREATE A NEW UMBRELLA SKILL.md — no existing member is broad "
+    "enough. Use skill_manage action=create to write a new class-level "
+    "skill whose SKILL.md covers the shared workflow and has short "
+    "labeled subsections. Archive the now-absorbed narrow siblings.\n"
+    "   c. DEMOTE TO REFERENCES/TEMPLATES/SCRIPTS — a sibling has "
+    "narrow-but-valuable session-specific content. Move it into the "
+    "umbrella's appropriate support directory:\n"
+    "      • `references/<topic>.md` for session-specific detail OR "
+    "condensed knowledge banks (quoted research, API docs excerpts, "
+    "domain notes, provider quirks, reproduction recipes)\n"
+    "      • `templates/<name>.<ext>` for starter files meant to be "
+    "copied and modified\n"
+    "      • `scripts/<name>.<ext>` for statically re-runnable actions "
+    "(verification scripts, fixture generators, probes)\n"
+    "      Then archive the old sibling. Use `terminal` with `mkdir -p "
+    "~/.hermes/skills/<umbrella>/references/ && mv ... <umbrella>/"
+    "references/<topic>.md` (or templates/ / scripts/).\n"
+    "4. Also flag skills whose NAME is too narrow (contains a PR number, "
+    "a feature codename, a specific error string, an 'audit' / "
+    "'diagnosis' / 'salvage' session artifact). These almost always "
+    "belong as a subsection or support file under a class-level umbrella.\n"
+    "5. Iterate. After one consolidation round, scan the remaining set "
+    "and look for the NEXT umbrella opportunity. Don't stop after 3 "
+    "merges.\n\n"
+    "Your toolset:\n"
+    "  - skills_list, skill_view        — read the current landscape\n"
+    "  - skill_manage action=patch      — add sections to the umbrella\n"
+    "  - skill_manage action=create     — create a new umbrella SKILL.md\n"
+    "  - skill_manage action=write_file — add a references/, templates/, "
+    "or scripts/ file under an existing skill (the skill must already "
+    "exist)\n"
+    "  - terminal                       — mv a sibling into the archive "
+    "OR move its content into a support subfile\n\n"
+    "'keep' is a legitimate decision ONLY when the skill is already a "
+    "class-level umbrella and none of the proposed merges would improve "
+    "discoverability. 'This is narrow but distinct from its siblings' "
+    "is NOT a reason to keep — it's a reason to move it under an "
+    "umbrella as a subsection or support file.\n\n"
+    "Expected output: real umbrella-ification. Process every obvious "
+    "cluster. If you end the pass with fewer than 10 archives, you "
+    "stopped too early — go back and look at the clusters you left "
+    "alone.\n\n"
+    "When done, write a summary with: clusters processed, skills "
+    "patched/absorbed, skills demoted to references/templates/scripts, "
+    "skills archived, new umbrellas created, and clusters you "
+    "deliberately left alone with one line each."
+)
+
+
+# ---------------------------------------------------------------------------
+# Per-run reports — {YYYYMMDD-HHMMSS}/run.json + REPORT.md under logs/curator/
+# ---------------------------------------------------------------------------
+
+def _reports_root() -> Path:
+    """Directory where curator run reports are written.
+
+    Lives under the profile-aware logs dir (``~/.hermes/logs/curator/``)
+    alongside ``agent.log`` and ``gateway.log`` so it's found by anyone
+    looking for operational telemetry, not mixed in with the user's
+    authored skill data in ``~/.hermes/skills/``.
+    """
+    return get_hermes_home() / "logs" / "curator"
+
+
+def _write_run_report(
+    *,
+    started_at: datetime,
+    elapsed_seconds: float,
+    auto_counts: Dict[str, int],
+    auto_summary: str,
+    before_report: List[Dict[str, Any]],
+    before_names: Set[str],
+    after_report: List[Dict[str, Any]],
+    llm_meta: Dict[str, Any],
+) -> Optional[Path]:
+    """Write run.json + REPORT.md under logs/curator/{YYYYMMDD-HHMMSS}/.
+
+    Returns the report directory path on success, None if the write
+    couldn't happen (caller logs and continues — reporting is best-effort).
+    """
+    root = _reports_root()
+    try:
+        root.mkdir(parents=True, exist_ok=True)
+    except Exception as e:
+        logger.debug("Curator report dir create failed: %s", e)
+        return None
+
+    stamp = started_at.strftime("%Y%m%d-%H%M%S")
+    run_dir = root / stamp
+    # If we crash-reran within the same second, append a disambiguator
+    suffix = 1
+    while run_dir.exists():
+        suffix += 1
+        run_dir = root / f"{stamp}-{suffix}"
+    try:
+        run_dir.mkdir(parents=True, exist_ok=False)
+    except Exception as e:
+        logger.debug("Curator run dir create failed: %s", e)
+        return None
+
+    # Diff before/after
+    after_by_name = {r.get("name"): r for r in after_report if isinstance(r, dict)}
+    after_names = set(after_by_name.keys())
+    removed = sorted(before_names - after_names)   # archived during this run
+    added = sorted(after_names - before_names)     # new skills this run
+    before_by_name = {r.get("name"): r for r in before_report if isinstance(r, dict)}
+
+    # State transitions between the two snapshots (e.g. active -> stale)
+    transitions: List[Dict[str, str]] = []
+    for name in sorted(after_names & before_names):
+        s_before = (before_by_name.get(name) or {}).get("state")
+        s_after = (after_by_name.get(name) or {}).get("state")
+        if s_before and s_after and s_before != s_after:
+            transitions.append({"name": name, "from": s_before, "to": s_after})
+
+    # Classify LLM tool calls
+    tc_counts: Dict[str, int] = {}
+    for tc in llm_meta.get("tool_calls", []) or []:
+        name = tc.get("name", "unknown")
+        tc_counts[name] = tc_counts.get(name, 0) + 1
+
+    payload = {
+        "started_at": started_at.isoformat(),
+        "duration_seconds": round(elapsed_seconds, 2),
+        "model": llm_meta.get("model", ""),
+        "provider": llm_meta.get("provider", ""),
+        "auto_transitions": auto_counts,
+        "counts": {
+            "before": len(before_names),
+            "after": len(after_names),
+            "delta": len(after_names) - len(before_names),
+            "archived_this_run": len(removed),
+            "added_this_run": len(added),
+            "state_transitions": len(transitions),
+            "tool_calls_total": sum(tc_counts.values()),
+        },
+        "tool_call_counts": tc_counts,
+        "archived": removed,
+        "added": added,
+        "state_transitions": transitions,
+        "llm_final": llm_meta.get("final", ""),
+        "llm_summary": llm_meta.get("summary", ""),
+        "llm_error": llm_meta.get("error"),
+        "tool_calls": llm_meta.get("tool_calls", []),
+    }
+
+    # run.json — machine-readable, full fidelity
+    try:
+        (run_dir / "run.json").write_text(
+            json.dumps(payload, indent=2, ensure_ascii=False) + "\n",
+            encoding="utf-8",
+        )
+    except Exception as e:
+        logger.debug("Curator run.json write failed: %s", e)
+
+    # REPORT.md — human-readable
+    try:
+        md = _render_report_markdown(payload)
+        (run_dir / "REPORT.md").write_text(md, encoding="utf-8")
+    except Exception as e:
+        logger.debug("Curator REPORT.md write failed: %s", e)
+
+    return run_dir
+
+
+def _render_report_markdown(p: Dict[str, Any]) -> str:
+    """Render the human-readable report."""
+    lines: List[str] = []
+    started = p.get("started_at", "")
+    duration = p.get("duration_seconds", 0) or 0
+    mins, secs = divmod(int(duration), 60)
+    dur_label = f"{mins}m {secs}s" if mins else f"{secs}s"
+
+    lines.append(f"# Curator run — {started}\n")
+    model = p.get("model") or "(not resolved)"
+    prov = p.get("provider") or "(not resolved)"
+    counts = p.get("counts") or {}
+    lines.append(
+        f"Model: `{model}` via `{prov}`  ·  Duration: {dur_label}  ·  "
+        f"Agent-created skills: {counts.get('before', 0)} → {counts.get('after', 0)} "
+        f"({counts.get('delta', 0):+d})\n"
+    )
+
+    error = p.get("llm_error")
+    if error:
+        lines.append(f"> ⚠ LLM pass error: `{error}`\n")
+
+    # Auto-transitions (pure, no LLM)
+    auto = p.get("auto_transitions") or {}
+    lines.append("## Auto-transitions (pure, no LLM)\n")
+    lines.append(f"- checked: {auto.get('checked', 0)}")
+    lines.append(f"- marked stale: {auto.get('marked_stale', 0)}")
+    lines.append(f"- archived: {auto.get('archived', 0)}")
+    lines.append(f"- reactivated: {auto.get('reactivated', 0)}")
+    lines.append("")
+
+    # LLM pass numbers
+    tc_counts = p.get("tool_call_counts") or {}
+    lines.append("## LLM consolidation pass\n")
+    lines.append(f"- tool calls: **{counts.get('tool_calls_total', 0)}** "
+                 f"(by name: {', '.join(f'{k}={v}' for k, v in sorted(tc_counts.items())) or 'none'})")
+    lines.append(f"- archived this run: **{counts.get('archived_this_run', 0)}**")
+    lines.append(f"- new skills this run: **{counts.get('added_this_run', 0)}**")
+    lines.append(f"- state transitions (active ↔ stale ↔ archived): "
+                 f"**{counts.get('state_transitions', 0)}**")
+    lines.append("")
+
+    # Archived list
+    archived = p.get("archived") or []
+    if archived:
+        lines.append(f"### Skills archived ({len(archived)})\n")
+        lines.append("_Archived skills are at `~/.hermes/skills/.archive/`. "
+                     "Restore any via `hermes curator restore <name>`._\n")
+        # Show first 50 inline, note truncation after that
+        SHOW = 50
+        for n in archived[:SHOW]:
+            lines.append(f"- `{n}`")
+        if len(archived) > SHOW:
+            lines.append(f"- … and {len(archived) - SHOW} more (see `run.json` for the full list)")
+        lines.append("")
+
+    # Added list
+    added = p.get("added") or []
+    if added:
+        lines.append(f"### New skills this run ({len(added)})\n")
+        lines.append("_Usually these are new class-level umbrellas created via `skill_manage action=create`._\n")
+        for n in added:
+            lines.append(f"- `{n}`")
+        lines.append("")
+
+    # State transitions
+    trans = p.get("state_transitions") or []
+    if trans:
+        lines.append(f"### State transitions ({len(trans)})\n")
+        for t in trans:
+            lines.append(f"- `{t.get('name')}`: {t.get('from')} → {t.get('to')}")
+        lines.append("")
+
+    # Full LLM final response
+    final = (p.get("llm_final") or "").strip()
+    if final:
+        lines.append("## LLM final summary\n")
+        lines.append(final)
+        lines.append("")
+    elif not error:
+        llm_sum = p.get("llm_summary") or ""
+        if llm_sum:
+            lines.append("## LLM summary\n")
+            lines.append(llm_sum)
+            lines.append("")
+
+    # Recovery footer
+    lines.append("## Recovery\n")
+    lines.append("- Restore an archived skill: `hermes curator restore <name>`")
+    lines.append("- All archives live under `~/.hermes/skills/.archive/` and are recoverable by `mv`")
+    lines.append("- See `run.json` in this directory for the full machine-readable record.")
+    lines.append("")
+
+    return "\n".join(lines)
+
+
+# ---------------------------------------------------------------------------
+# Orchestrator — spawn a forked AIAgent for the LLM review pass
+# ---------------------------------------------------------------------------
+
+def _render_candidate_list() -> str:
+    """Human/agent-readable list of agent-created skills with usage stats."""
+    rows = skill_usage.agent_created_report()
+    if not rows:
+        return "No agent-created skills to review."
+    lines = [f"Agent-created skills ({len(rows)}):\n"]
+    for r in rows:
+        lines.append(
+            f"- {r['name']}  "
+            f"state={r['state']}  "
+            f"pinned={'yes' if r.get('pinned') else 'no'}  "
+            f"use={r.get('use_count', 0)}  "
+            f"view={r.get('view_count', 0)}  "
+            f"patches={r.get('patch_count', 0)}  "
+            f"last_used={r.get('last_used_at') or 'never'}"
+        )
+    return "\n".join(lines)
+
+
+def run_curator_review(
+    on_summary: Optional[Callable[[str], None]] = None,
+    synchronous: bool = False,
+) -> Dict[str, Any]:
+    """Execute a single curator review pass.
+
+    Steps:
+      1. Apply automatic state transitions (pure, no LLM).
+      2. If there are agent-created skills, spawn a forked AIAgent that runs
+         the LLM review prompt against the current candidate list.
+      3. Update .curator_state with last_run_at and a one-line summary.
+      4. Invoke *on_summary* with a user-visible description.
+
+    If *synchronous* is True, the LLM review runs in the calling thread; the
+    default is to spawn a daemon thread so the caller returns immediately.
+    """
+    start = datetime.now(timezone.utc)
+    counts = apply_automatic_transitions(now=start)
+
+    auto_summary_parts = []
+    if counts["marked_stale"]:
+        auto_summary_parts.append(f"{counts['marked_stale']} marked stale")
+    if counts["archived"]:
+        auto_summary_parts.append(f"{counts['archived']} archived")
+    if counts["reactivated"]:
+        auto_summary_parts.append(f"{counts['reactivated']} reactivated")
+    auto_summary = ", ".join(auto_summary_parts) if auto_summary_parts else "no changes"
+
+    # Persist state before the LLM pass so a crash mid-review still records
+    # the run and doesn't immediately re-trigger.
+    state = load_state()
+    state["last_run_at"] = start.isoformat()
+    state["run_count"] = int(state.get("run_count", 0)) + 1
+    state["last_run_summary"] = f"auto: {auto_summary}"
+    save_state(state)
+
+    def _llm_pass():
+        nonlocal auto_summary
+        # Snapshot skill state BEFORE the LLM pass so the report can diff.
+        try:
+            before_report = skill_usage.agent_created_report()
+        except Exception:
+            before_report = []
+        before_names = {r.get("name") for r in before_report if isinstance(r, dict)}
+
+        llm_meta: Dict[str, Any] = {}
+        try:
+            candidate_list = _render_candidate_list()
+            if "No agent-created skills" in candidate_list:
+                final_summary = f"auto: {auto_summary}; llm: skipped (no candidates)"
+                llm_meta = {
+                    "final": "",
+                    "summary": "skipped (no candidates)",
+                    "model": "",
+                    "provider": "",
+                    "tool_calls": [],
+                    "error": None,
+                }
+            else:
+                prompt = f"{CURATOR_REVIEW_PROMPT}\n\n{candidate_list}"
+                llm_meta = _run_llm_review(prompt)
+                final_summary = (
+                    f"auto: {auto_summary}; llm: {llm_meta.get('summary', 'no change')}"
+                )
+        except Exception as e:
+            logger.debug("Curator LLM pass failed: %s", e, exc_info=True)
+            final_summary = f"auto: {auto_summary}; llm: error ({e})"
+            llm_meta = {
+                "final": "",
+                "summary": f"error ({e})",
+                "model": "",
+                "provider": "",
+                "tool_calls": [],
+                "error": str(e),
+            }
+
+        elapsed = (datetime.now(timezone.utc) - start).total_seconds()
+        state2 = load_state()
+        state2["last_run_duration_seconds"] = elapsed
+        state2["last_run_summary"] = final_summary
+
+        # Write the per-run report. Runs in a best-effort try so a
+        # reporting bug never breaks the curator itself. Report path is
+        # recorded in state so `hermes curator status` can point at it.
+        try:
+            after_report = skill_usage.agent_created_report()
+        except Exception:
+            after_report = []
+        try:
+            report_path = _write_run_report(
+                started_at=start,
+                elapsed_seconds=elapsed,
+                auto_counts=counts,
+                auto_summary=auto_summary,
+                before_report=before_report,
+                before_names=before_names,
+                after_report=after_report,
+                llm_meta=llm_meta,
+            )
+            if report_path is not None:
+                state2["last_report_path"] = str(report_path)
+        except Exception as e:
+            logger.debug("Curator report write failed: %s", e, exc_info=True)
+
+        save_state(state2)
+
+        if on_summary:
+            try:
+                on_summary(f"curator: {final_summary}")
+            except Exception:
+                pass
+
+    if synchronous:
+        _llm_pass()
+    else:
+        t = threading.Thread(target=_llm_pass, daemon=True, name="curator-review")
+        t.start()
+
+    return {
+        "started_at": start.isoformat(),
+        "auto_transitions": counts,
+        "summary_so_far": auto_summary,
+    }
+
+
+def _run_llm_review(prompt: str) -> Dict[str, Any]:
+    """Spawn an AIAgent fork to run the curator review prompt.
+
+    Returns a dict with:
+      - final: full (untruncated) final response from the reviewer
+      - summary: short summary suitable for state file (240-char cap)
+      - model, provider: what the fork actually ran on
+      - tool_calls: list of {name, arguments} for every tool call made during
+        the pass (arguments may be truncated for readability)
+      - error: set if the pass failed mid-run; final/summary may still be empty
+
+    Never raises; callers get a structured failure instead.
+    """
+    import contextlib
+    result_meta: Dict[str, Any] = {
+        "final": "",
+        "summary": "",
+        "model": "",
+        "provider": "",
+        "tool_calls": [],
+        "error": None,
+    }
+    try:
+        from run_agent import AIAgent
+    except Exception as e:
+        result_meta["error"] = f"AIAgent import failed: {e}"
+        result_meta["summary"] = result_meta["error"]
+        return result_meta
+
+    # Resolve provider + model the same way the CLI does, so the curator
+    # fork inherits the user's active main config rather than falling
+    # through to an empty provider/model pair (which sends HTTP 400
+    # "No models provided"). AIAgent() without explicit provider/model
+    # arguments hits an auto-resolution path that fails for OAuth-only
+    # providers and for pool-backed credentials.
+    _api_key = None
+    _base_url = None
+    _api_mode = None
+    _resolved_provider = None
+    _model_name = ""
+    try:
+        from hermes_cli.config import load_config
+        from hermes_cli.runtime_provider import resolve_runtime_provider
+        _cfg = load_config()
+        _m = _cfg.get("model", {}) if isinstance(_cfg.get("model"), dict) else {}
+        _provider = _m.get("provider") or "auto"
+        _model_name = _m.get("default") or _m.get("model") or ""
+        _rp = resolve_runtime_provider(
+            requested=_provider, target_model=_model_name
+        )
+        _api_key = _rp.get("api_key")
+        _base_url = _rp.get("base_url")
+        _api_mode = _rp.get("api_mode")
+        _resolved_provider = _rp.get("provider") or _provider
+    except Exception as e:
+        logger.debug("Curator provider resolution failed: %s", e, exc_info=True)
+
+    result_meta["model"] = _model_name
+    result_meta["provider"] = _resolved_provider or ""
+
+    review_agent = None
+    try:
+        review_agent = AIAgent(
+            model=_model_name,
+            provider=_resolved_provider,
+            api_key=_api_key,
+            base_url=_base_url,
+            api_mode=_api_mode,
+            # Umbrella-building over a large skill collection is worth a
+            # high iteration ceiling — the pass typically takes 50-100
+            # API calls against hundreds of candidate skills. The
+            # single-session review path caps itself at a much smaller
+            # number because it's not doing a curation sweep.
+            max_iterations=9999,
+            quiet_mode=True,
+            platform="curator",
+            skip_context_files=True,
+            skip_memory=True,
+        )
+        # Disable recursive nudges — the curator must never spawn its own review.
+        review_agent._memory_nudge_interval = 0
+        review_agent._skill_nudge_interval = 0
+
+        # Redirect the forked agent's stdout/stderr to /dev/null while it
+        # runs so its tool-call chatter doesn't pollute the foreground
+        # terminal. The background-thread runner also hides it; this
+        # belt-and-suspenders path matters when a caller invokes
+        # run_curator_review(synchronous=True) from the CLI.
+        with open(os.devnull, "w") as _devnull, \
+             contextlib.redirect_stdout(_devnull), \
+             contextlib.redirect_stderr(_devnull):
+            conv_result = review_agent.run_conversation(user_message=prompt)
+
+        final = ""
+        if isinstance(conv_result, dict):
+            final = str(conv_result.get("final_response") or "").strip()
+        result_meta["final"] = final
+        result_meta["summary"] = (final[:240] + "…") if len(final) > 240 else (final or "no change")
+
+        # Collect tool calls for the report. Walk the forked agent's
+        # session messages and extract every tool_call made during the
+        # pass. Truncate argument payloads so a giant skill_manage create
+        # doesn't blow up the report.
+        _calls: List[Dict[str, Any]] = []
+        for msg in getattr(review_agent, "_session_messages", []) or []:
+            if not isinstance(msg, dict):
+                continue
+            tcs = msg.get("tool_calls") or []
+            for tc in tcs:
+                if not isinstance(tc, dict):
+                    continue
+                fn = tc.get("function") or {}
+                name = fn.get("name") or ""
+                args_raw = fn.get("arguments") or ""
+                if isinstance(args_raw, str) and len(args_raw) > 400:
+                    args_raw = args_raw[:400] + "…"
+                _calls.append({"name": name, "arguments": args_raw})
+        result_meta["tool_calls"] = _calls
+    except Exception as e:
+        result_meta["error"] = f"error: {e}"
+        result_meta["summary"] = result_meta["error"]
+    finally:
+        if review_agent is not None:
+            try:
+                review_agent.close()
+            except Exception:
+                pass
+    return result_meta
+
+
+# ---------------------------------------------------------------------------
+# Public entrypoint for the session-start hook
+# ---------------------------------------------------------------------------
+
+def maybe_run_curator(
+    *,
+    idle_for_seconds: Optional[float] = None,
+    on_summary: Optional[Callable[[str], None]] = None,
+) -> Optional[Dict[str, Any]]:
+    """Best-effort: run a curator pass if all gates pass. Returns the result
+    dict if a pass was started, else None. Never raises."""
+    try:
+        if not should_run_now():
+            return None
+        # Idle gating: only enforce when the caller provided a measurement.
+        if idle_for_seconds is not None:
+            min_idle_s = get_min_idle_hours() * 3600.0
+            if idle_for_seconds < min_idle_s:
+                return None
+        return run_curator_review(on_summary=on_summary)
+    except Exception as e:
+        logger.debug("maybe_run_curator failed: %s", e, exc_info=True)
+        return None
@@ -42,6 +42,7 @@ class FailoverReason(enum.Enum):
    # Context / payload
    context_overflow = "context_overflow"  # Context too large — compress, not failover
    payload_too_large = "payload_too_large"  # 413 — compress payload
+    image_too_large = "image_too_large"   # Native image part exceeds provider's per-image limit — shrink and retry

    # Model
    model_not_found = "model_not_found"  # 404 or invalid model — fallback to different model
@@ -90,6 +91,7 @@ class ClassifiedError:
 _BILLING_PATTERNS = [
    "insufficient credits",
    "insufficient_quota",
+    "insufficient balance",
    "credit balance",
    "credits have been exhausted",
    "top up your credits",
@@ -147,6 +149,20 @@ _PAYLOAD_TOO_LARGE_PATTERNS = [
    "error code: 413",
 ]

+# Image-size patterns.  Matched against 400 bodies (not 413) because most
+# providers return a 400 with a specific image-too-big message before the
+# whole request hits the 413 size limit.  Anthropic's wording is the most
+# important here (hard 5 MB per image, returned as
+# "messages.N.content.K.image.source.base64: image exceeds 5 MB maximum").
+_IMAGE_TOO_LARGE_PATTERNS = [
+    "image exceeds",        # Anthropic: "image exceeds 5 MB maximum"
+    "image too large",      # generic
+    "image_too_large",      # error_code variant
+    "image size exceeds",   # variant
+    # "request_too_large" on a request known to contain an image → image is
+    # the likely culprit; we still try the shrink path before giving up.
+]
+
 # Context overflow patterns
 _CONTEXT_OVERFLOW_PATTERNS = [
    "context length",
@@ -671,6 +687,15 @@ def _classify_400(
 ) -> ClassifiedError:
    """Classify 400 Bad Request — context overflow, format error, or generic."""

+    # Image-too-large from 400 (Anthropic's 5 MB per-image check fires this way).
+    # Must be checked BEFORE context_overflow because messages can trip both
+    # patterns ("exceeds" + "image") and image-shrink is a cheaper recovery.
+    if any(p in error_msg for p in _IMAGE_TOO_LARGE_PATTERNS):
+        return result_fn(
+            FailoverReason.image_too_large,
+            retryable=True,
+        )
+
    # Context overflow from 400
    if any(p in error_msg for p in _CONTEXT_OVERFLOW_PATTERNS):
        return result_fn(
@@ -798,6 +823,13 @@ def _classify_by_message(
            should_compress=True,
        )

+    # Image-too-large patterns (from message text when no status_code)
+    if any(p in error_msg for p in _IMAGE_TOO_LARGE_PATTERNS):
+        return result_fn(
+            FailoverReason.image_too_large,
+            retryable=True,
+        )
+
    # Usage-limit patterns need the same disambiguation as 402: some providers
    # surface "usage limit" errors without an HTTP status code.  A transient
    # signal ("try again", "resets at", …) means it's a periodic quota, not
@@ -30,7 +30,6 @@ from __future__ import annotations

 import json
 import logging
-import os
 import time
 import uuid
 from types import SimpleNamespace
@@ -42,7 +41,6 @@ from agent import google_oauth
 from agent.gemini_schema import sanitize_gemini_tool_parameters
 from agent.google_code_assist import (
    CODE_ASSIST_ENDPOINT,
-    FREE_TIER_ID,
    CodeAssistError,
    ProjectContext,
    resolve_project_context,
@@ -2,7 +2,7 @@

 from __future__ import annotations

-from typing import Any, Dict, List
+from typing import Any, Dict

 # Gemini's ``FunctionDeclaration.parameters`` field accepts the ``Schema``
 # object, which is only a subset of OpenAPI 3.0 / JSON Schema.  Strip fields
@@ -29,7 +29,6 @@ from __future__ import annotations

 import json
 import logging
-import os
 import time
 import urllib.error
 import urllib.parse
@@ -49,14 +49,13 @@ import json
 import logging
 import os
 import secrets
-import socket
 import stat
 import threading
 import time
 import urllib.error
 import urllib.parse
 import urllib.request
-from dataclasses import dataclass, field
+from dataclasses import dataclass
 from pathlib import Path
 from typing import Any, Dict, Optional, Tuple

@@ -98,6 +97,7 @@ _DEFAULT_CLIENT_SECRET = f"GOCSPX-{_PUBLIC_CLIENT_SECRET_SUFFIX}"

 # Regex patterns for fallback scraping from an installed gemini-cli.
 import re as _re
+from utils import atomic_replace
 _CLIENT_ID_PATTERN = _re.compile(
    r"OAUTH_CLIENT_ID\s*=\s*['\"]([0-9]+-[a-z0-9]+\.apps\.googleusercontent\.com)['\"]"
 )
@@ -499,7 +499,7 @@ def save_credentials(creds: GoogleCredentials) -> Path:
                fh.flush()
                os.fsync(fh.fileno())
            os.chmod(tmp_path, stat.S_IRUSR | stat.S_IWUSR)
-            os.replace(tmp_path, path)
+            atomic_replace(tmp_path, path)
        finally:
            try:
                if tmp_path.exists():
@@ -0,0 +1,236 @@
+"""Routing helpers for inbound user-attached images.
+
+Two modes:
+
+  native  — attach images as OpenAI-style ``image_url`` content parts on the
+            user turn. Provider adapters (Anthropic, Gemini, Bedrock, Codex,
+            OpenAI chat.completions) already translate these into their
+            vendor-specific multimodal formats.
+
+  text    — run ``vision_analyze`` on each image up-front and prepend the
+            description to the user's text. The model never sees the pixels;
+            it only sees a lossy text summary. This is the pre-existing
+            behaviour and still the right choice for non-vision models.
+
+The decision is made once per message turn by :func:`decide_image_input_mode`.
+It reads ``agent.image_input_mode`` from config.yaml (``auto`` | ``native``
+| ``text``, default ``auto``) and the active model's capability metadata.
+
+In ``auto`` mode:
+  - If the user has explicitly configured ``auxiliary.vision.provider``
+    (i.e. not ``auto`` and not empty), we assume they want the text pipeline
+    regardless of the main model — they've opted in to a specific vision
+    backend for a reason (cost, quality, local-only, etc.).
+  - Otherwise, if the active model reports ``supports_vision=True`` in its
+    models.dev metadata, we attach natively.
+  - Otherwise (non-vision model, no explicit override), we fall back to text.
+
+This keeps ``vision_analyze`` surfaced as a tool in every session — skills
+and agent flows that chain it (browser screenshots, deeper inspection of
+URL-referenced images, style-gating loops) keep working. The routing only
+affects *how user-attached images on the current turn* are presented to the
+main model.
+"""
+
+from __future__ import annotations
+
+import base64
+import logging
+import mimetypes
+from pathlib import Path
+from typing import Any, Dict, List, Optional, Tuple
+
+logger = logging.getLogger(__name__)
+
+
+_VALID_MODES = frozenset({"auto", "native", "text"})
+
+
+def _coerce_mode(raw: Any) -> str:
+    """Normalize a config value into one of the valid modes."""
+    if not isinstance(raw, str):
+        return "auto"
+    val = raw.strip().lower()
+    if val in _VALID_MODES:
+        return val
+    return "auto"
+
+
+def _explicit_aux_vision_override(cfg: Optional[Dict[str, Any]]) -> bool:
+    """True when the user configured a specific auxiliary vision backend.
+
+    An explicit override means the user *wants* the text pipeline (they're
+    paying for a dedicated vision model), so we don't silently bypass it.
+    """
+    if not isinstance(cfg, dict):
+        return False
+    aux = cfg.get("auxiliary") or {}
+    if not isinstance(aux, dict):
+        return False
+    vision = aux.get("vision") or {}
+    if not isinstance(vision, dict):
+        return False
+
+    provider = str(vision.get("provider") or "").strip().lower()
+    model = str(vision.get("model") or "").strip()
+    base_url = str(vision.get("base_url") or "").strip()
+
+    # "auto" / "" / blank = not explicit
+    if provider in ("", "auto") and not model and not base_url:
+        return False
+    return True
+
+
+def _lookup_supports_vision(provider: str, model: str) -> Optional[bool]:
+    """Return True/False if we can resolve caps, None if unknown."""
+    if not provider or not model:
+        return None
+    try:
+        from agent.models_dev import get_model_capabilities
+        caps = get_model_capabilities(provider, model)
+    except Exception as exc:  # pragma: no cover - defensive
+        logger.debug("image_routing: caps lookup failed for %s:%s — %s", provider, model, exc)
+        return None
+    if caps is None:
+        return None
+    return bool(caps.supports_vision)
+
+
+def decide_image_input_mode(
+    provider: str,
+    model: str,
+    cfg: Optional[Dict[str, Any]],
+) -> str:
+    """Return ``"native"`` or ``"text"`` for the given turn.
+
+    Args:
+      provider: active inference provider ID (e.g. ``"anthropic"``, ``"openrouter"``).
+      model:    active model slug as it would be sent to the provider.
+      cfg:      loaded config.yaml dict, or None. When None, behaves as auto.
+    """
+    mode_cfg = "auto"
+    if isinstance(cfg, dict):
+        agent_cfg = cfg.get("agent") or {}
+        if isinstance(agent_cfg, dict):
+            mode_cfg = _coerce_mode(agent_cfg.get("image_input_mode"))
+
+    if mode_cfg == "native":
+        return "native"
+    if mode_cfg == "text":
+        return "text"
+
+    # auto
+    if _explicit_aux_vision_override(cfg):
+        return "text"
+
+    supports = _lookup_supports_vision(provider, model)
+    if supports is True:
+        return "native"
+    return "text"
+
+
+# Image size handling is REACTIVE rather than proactive: we attempt native
+# attachment at full size regardless of provider, and rely on
+# ``run_agent._try_shrink_image_parts_in_messages`` to shrink + retry if
+# the provider rejects the request (e.g. Anthropic's hard 5 MB per-image
+# ceiling returned as HTTP 400 "image exceeds 5 MB maximum").
+#
+# Why reactive: our knowledge of provider ceilings is partial and evolving
+# (OpenAI accepts 49 MB+, Anthropic 5 MB, Gemini 100 MB, others unknown).
+# A proactive per-provider table would be stale the moment a provider raises
+# or lowers its limit, and silently degrading quality for users on providers
+# that would have accepted the full image is the worse failure mode.
+# The shrink-on-reject path loses 1 API call + maybe 1s of Pillow work when
+# it fires, which is cheaper than permanent quality loss.
+
+
+def _guess_mime(path: Path) -> str:
+    mime, _ = mimetypes.guess_type(str(path))
+    if mime and mime.startswith("image/"):
+        return mime
+    # mimetypes on some Linux distros mis-maps .jpg; default to jpeg when
+    # the suffix looks imagey.
+    suffix = path.suffix.lower()
+    return {
+        ".jpg": "image/jpeg",
+        ".jpeg": "image/jpeg",
+        ".png": "image/png",
+        ".gif": "image/gif",
+        ".webp": "image/webp",
+        ".bmp": "image/bmp",
+    }.get(suffix, "image/jpeg")
+
+
+def _file_to_data_url(path: Path) -> Optional[str]:
+    """Encode a local image as a base64 data URL at its native size.
+
+    Size limits are NOT enforced here — the agent retry loop
+    (``run_agent._try_shrink_image_parts_in_messages``) shrinks on the
+    provider's first rejection. Keeping this simple means providers that
+    accept large images (OpenAI 49 MB+, Gemini 100 MB) don't pay a silent
+    quality tax just because one other provider is stricter.
+
+    Returns None only if the file can't be read (missing, permission
+    denied, etc.); the caller reports those paths in ``skipped``.
+    """
+    try:
+        raw = path.read_bytes()
+    except Exception as exc:
+        logger.warning("image_routing: failed to read %s — %s", path, exc)
+        return None
+    mime = _guess_mime(path)
+    b64 = base64.b64encode(raw).decode("ascii")
+    return f"data:{mime};base64,{b64}"
+
+
+def build_native_content_parts(
+    user_text: str,
+    image_paths: List[str],
+) -> Tuple[List[Dict[str, Any]], List[str]]:
+    """Build an OpenAI-style ``content`` list for a user turn.
+
+    Shape:
+      [{"type": "text", "text": "..."},
+       {"type": "image_url", "image_url": {"url": "data:image/png;base64,..."}},
+       ...]
+
+    Images are attached at their native size. If a provider rejects the
+    request because an image is too large (e.g. Anthropic's 5 MB per-image
+    ceiling), the agent's retry loop transparently shrinks and retries
+    once — see ``run_agent._try_shrink_image_parts_in_messages``.
+
+    Returns (content_parts, skipped_paths). Skipped paths are files that
+    couldn't be read from disk.
+    """
+    parts: List[Dict[str, Any]] = []
+    skipped: List[str] = []
+
+    text = (user_text or "").strip()
+    if text:
+        parts.append({"type": "text", "text": text})
+
+    for raw_path in image_paths:
+        p = Path(raw_path)
+        if not p.exists() or not p.is_file():
+            skipped.append(str(raw_path))
+            continue
+        data_url = _file_to_data_url(p)
+        if not data_url:
+            skipped.append(str(raw_path))
+            continue
+        parts.append({
+            "type": "image_url",
+            "image_url": {"url": data_url},
+        })
+
+    # If the text was empty, add a neutral prompt so the turn isn't just images.
+    if not text and any(p.get("type") == "image_url" for p in parts):
+        parts.insert(0, {"type": "text", "text": "What do you see in this image?"})
+
+    return parts, skipped
+
+
+__all__ = [
+    "decide_image_input_mode",
+    "build_native_content_parts",
+]
@@ -0,0 +1,48 @@
+"""LM Studio reasoning-effort resolution shared by the chat-completions
+transport and run_agent's iteration-limit summary path.
+
+LM Studio publishes per-model ``capabilities.reasoning.allowed_options`` (e.g.
+``["off","on"]`` for toggle-style models, ``["off","minimal","low"]`` for
+graduated models). We map the user's ``reasoning_config`` onto LM Studio's
+OpenAI-compatible vocabulary, then clamp against the model's allowed set so
+the server doesn't 400 on an unsupported effort.
+"""
+
+from __future__ import annotations
+
+from typing import List, Optional
+
+# LM Studio accepts these top-level reasoning_effort values via its
+# OpenAI-compatible chat.completions endpoint.
+_LM_VALID_EFFORTS = {"none", "minimal", "low", "medium", "high", "xhigh"}
+
+# Toggle-style models publish allowed_options as ["off","on"] in /api/v1/models.
+# Map them onto the OpenAI-compatible request vocabulary.
+_LM_EFFORT_ALIASES = {"off": "none", "on": "medium"}
+
+
+def resolve_lmstudio_effort(
+    reasoning_config: Optional[dict],
+    allowed_options: Optional[List[str]],
+) -> Optional[str]:
+    """Return the ``reasoning_effort`` string to send to LM Studio, or ``None``.
+
+    ``None`` means "omit the field": the user picked a level the model can't
+    honor, so let LM Studio fall back to the model's declared default rather
+    than silently substituting a different effort. When ``allowed_options`` is
+    falsy (probe failed), skip clamping and send the resolved effort anyway.
+    """
+    effort = "medium"
+    if reasoning_config and isinstance(reasoning_config, dict):
+        if reasoning_config.get("enabled") is False:
+            effort = "none"
+        else:
+            raw = (reasoning_config.get("effort") or "").strip().lower()
+            raw = _LM_EFFORT_ALIASES.get(raw, raw)
+            if raw in _LM_VALID_EFFORTS:
+                effort = raw
+    if allowed_options:
+        allowed = {_LM_EFFORT_ALIASES.get(opt, opt) for opt in allowed_options}
+        if effort not in allowed:
+            return None
+    return effort
@@ -28,7 +28,6 @@ Usage in run_agent.py:

 from __future__ import annotations

-import json
 import logging
 import re
 import inspect
@@ -63,15 +62,124 @@ def sanitize_context(text: str) -> str:
    return text


-def build_memory_context_block(raw_context: str) -> str:
-    """Wrap prefetched memory in a fenced block with system note.
+class StreamingContextScrubber:
+    """Stateful scrubber for streaming text that may contain split memory-context spans.

-    The fence prevents the model from treating recalled context as user
-    discourse.  Injected at API-call time only — never persisted.
+    The one-shot ``sanitize_context`` regex cannot survive chunk boundaries:
+    a ``<memory-context>`` opened in one delta and closed in a later delta
+    leaks its payload to the UI because the non-greedy block regex needs
+    both tags in one string.  This scrubber runs a small state machine
+    across deltas, holding back partial-tag tails and discarding
+    everything inside a span (including the system-note line).
+
+    Usage::
+
+        scrubber = StreamingContextScrubber()
+        for delta in stream:
+            visible = scrubber.feed(delta)
+            if visible:
+                emit(visible)
+        trailing = scrubber.flush()  # at end of stream
+        if trailing:
+            emit(trailing)
+
+    The scrubber is re-entrant per agent instance.  Callers building new
+    top-level responses (new turn) should create a fresh scrubber or call
+    ``reset()``.
    """
+
+    _OPEN_TAG = "<memory-context>"
+    _CLOSE_TAG = "</memory-context>"
+
+    def __init__(self) -> None:
+        self._in_span: bool = False
+        self._buf: str = ""
+
+    def reset(self) -> None:
+        self._in_span = False
+        self._buf = ""
+
+    def feed(self, text: str) -> str:
+        """Return the visible portion of ``text`` after scrubbing.
+
+        Any trailing fragment that could be the start of an open/close tag
+        is held back in the internal buffer and surfaced on the next
+        ``feed()`` call or discarded/emitted by ``flush()``.
+        """
+        if not text:
+            return ""
+        buf = self._buf + text
+        self._buf = ""
+        out: list[str] = []
+
+        while buf:
+            if self._in_span:
+                idx = buf.lower().find(self._CLOSE_TAG)
+                if idx == -1:
+                    # Hold back a potential partial close tag; drop the rest
+                    held = self._max_partial_suffix(buf, self._CLOSE_TAG)
+                    self._buf = buf[-held:] if held else ""
+                    return "".join(out)
+                # Found close — skip span content + tag, continue
+                buf = buf[idx + len(self._CLOSE_TAG):]
+                self._in_span = False
+            else:
+                idx = buf.lower().find(self._OPEN_TAG)
+                if idx == -1:
+                    # No open tag — hold back a potential partial open tag
+                    held = self._max_partial_suffix(buf, self._OPEN_TAG)
+                    if held:
+                        out.append(buf[:-held])
+                        self._buf = buf[-held:]
+                    else:
+                        out.append(buf)
+                    return "".join(out)
+                # Emit text before the tag, enter span
+                if idx > 0:
+                    out.append(buf[:idx])
+                buf = buf[idx + len(self._OPEN_TAG):]
+                self._in_span = True
+
+        return "".join(out)
+
+    def flush(self) -> str:
+        """Emit any held-back buffer at end-of-stream.
+
+        If we're still inside an unterminated span the remaining content is
+        discarded (safer: leaking partial memory context is worse than a
+        truncated answer).  Otherwise the held-back partial-tag tail is
+        emitted verbatim (it turned out not to be a real tag).
+        """
+        if self._in_span:
+            self._buf = ""
+            self._in_span = False
+            return ""
+        tail = self._buf
+        self._buf = ""
+        return tail
+
+    @staticmethod
+    def _max_partial_suffix(buf: str, tag: str) -> int:
+        """Return the length of the longest buf-suffix that is a tag-prefix.
+
+        Case-insensitive.  Returns 0 if no suffix could start the tag.
+        """
+        tag_lower = tag.lower()
+        buf_lower = buf.lower()
+        max_check = min(len(buf_lower), len(tag_lower) - 1)
+        for i in range(max_check, 0, -1):
+            if tag_lower.startswith(buf_lower[-i:]):
+                return i
+        return 0
+
+
+def build_memory_context_block(raw_context: str) -> str:
+    """Wrap prefetched memory in a fenced block with system note."""
    if not raw_context or not raw_context.strip():
        return ""
    clean = sanitize_context(raw_context)
+    if clean != raw_context:
+        logger.warning("memory provider returned pre-wrapped context; stripped")
    return (
        "<memory-context>\n"
        "[System note: The following is recalled memory context, "
@@ -294,6 +402,41 @@ class MemoryManager:
                    provider.name, e,
                )

+    def on_session_switch(
+        self,
+        new_session_id: str,
+        *,
+        parent_session_id: str = "",
+        reset: bool = False,
+        **kwargs,
+    ) -> None:
+        """Notify all providers that the agent's session_id has rotated.
+
+        Fires on ``/resume``, ``/branch``, ``/reset``, ``/new``, and
+        context compression — any path that reassigns
+        ``AIAgent.session_id`` without tearing the provider down.
+
+        Providers keep running; they only need to refresh cached
+        per-session state so subsequent writes land in the correct
+        session's record. See ``MemoryProvider.on_session_switch`` for
+        the full contract.
+        """
+        if not new_session_id:
+            return
+        for provider in self._providers:
+            try:
+                provider.on_session_switch(
+                    new_session_id,
+                    parent_session_id=parent_session_id,
+                    reset=reset,
+                    **kwargs,
+                )
+            except Exception as e:
+                logger.debug(
+                    "Memory provider '%s' on_session_switch failed: %s",
+                    provider.name, e,
+                )
+
    def on_pre_compress(self, messages: List[Dict[str, Any]]) -> str:
        """Notify all providers before context compression.

@@ -25,6 +25,7 @@ Lifecycle (called by MemoryManager, wired in run_agent.py):
 Optional hooks (override to opt in):
  on_turn_start(turn, message, **kwargs) — per-turn tick with runtime context
  on_session_end(messages)               — end-of-session extraction
+  on_session_switch(new_session_id, **kwargs) — mid-process session_id rotation
  on_pre_compress(messages) -> str       — extract before context compression
  on_memory_write(action, target, content, metadata=None) — mirror built-in memory writes
  on_delegation(task, result, **kwargs)  — parent-side observation of subagent work
@@ -160,6 +161,45 @@ class MemoryProvider(ABC):
        (CLI exit, /reset, gateway session expiry).
        """

+    def on_session_switch(
+        self,
+        new_session_id: str,
+        *,
+        parent_session_id: str = "",
+        reset: bool = False,
+        **kwargs,
+    ) -> None:
+        """Called when the agent switches session_id mid-process.
+
+        Fires on ``/resume``, ``/branch``, ``/reset``, ``/new`` (CLI), the
+        gateway equivalents, and context compression — any path that
+        reassigns ``AIAgent.session_id`` without tearing the provider down.
+
+        Providers that cache per-session state in ``initialize()``
+        (``_session_id``, ``_document_id``, accumulated turn buffers,
+        counters) should update or reset that state here so subsequent
+        writes land in the correct session's record.
+
+        Parameters
+        ----------
+        new_session_id:
+            The session_id the agent just switched to.
+        parent_session_id:
+            The previous session_id, if meaningful — set for ``/branch``
+            (fork lineage), context compression (continuation lineage),
+            and ``/resume`` (the session we're leaving). Empty string
+            when no lineage applies.
+        reset:
+            ``True`` when this is a genuinely new conversation, not a
+            resumption of an existing one. Fired by ``/reset`` / ``/new``.
+            Providers should flush accumulated per-session buffers
+            (``_session_turns``, ``_turn_counter``, etc.) when this is
+            set. ``False`` for ``/resume`` / ``/branch`` / compression
+            where the logical conversation continues under the new id.
+
+        Default is no-op for backward compatibility.
+        """
+
    def on_pre_compress(self, messages: List[Dict[str, Any]]) -> str:
        """Called before context compression discards old messages.

@@ -46,11 +46,13 @@ def _resolve_requests_verify() -> bool | str:
 # are preserved so the full model name reaches cache lookups and server queries.
 _PROVIDER_PREFIXES: frozenset[str] = frozenset({
    "openrouter", "nous", "openai-codex", "copilot", "copilot-acp",
-    "gemini", "ollama-cloud", "zai", "kimi-coding", "kimi-coding-cn", "stepfun", "minimax", "minimax-cn", "anthropic", "deepseek",
+    "gemini", "ollama-cloud", "zai", "kimi-coding", "kimi-coding-cn", "stepfun", "minimax", "minimax-oauth", "minimax-cn", "anthropic", "deepseek",
    "opencode-zen", "opencode-go", "ai-gateway", "kilocode", "alibaba",
    "qwen-oauth",
    "xiaomi",
    "arcee",
+    "gmi",
+    "tencent-tokenhub",
    "custom", "local",
    # Common aliases
    "google", "google-gemini", "google-ai-studio",
@@ -59,7 +61,9 @@ _PROVIDER_PREFIXES: frozenset[str] = frozenset({
    "ollama",
    "stepfun", "opencode", "zen", "go", "vercel", "kilo", "dashscope", "aliyun", "qwen",
    "mimo", "xiaomi-mimo",
+    "tencent", "tokenhub", "tencent-cloud", "tencentmaas",
    "arcee-ai", "arceeai",
+    "gmi-cloud", "gmicloud",
    "xai", "x-ai", "x.ai", "grok",
    "nvidia", "nim", "nvidia-nim", "nemotron",
    "qwen-portal",
@@ -106,9 +110,11 @@ _endpoint_model_metadata_cache_time: Dict[str, float] = {}
 _ENDPOINT_MODEL_CACHE_TTL = 300

 # Descending tiers for context length probing when the model is unknown.
-# We start at 128K (a safe default for most modern models) and step down
-# on context-length errors until one works.
+# We start at 256K (covers GPT-5.x, many current large-context models) and
+# step down on context-length errors until one works.  Tier[0] is also the
+# default fallback when no detection method succeeds.
 CONTEXT_PROBE_TIERS = [
+    256_000,
    128_000,
    64_000,
    32_000,
@@ -143,10 +149,11 @@ DEFAULT_CONTEXT_LENGTHS = {
    "claude": 200000,
    # OpenAI — GPT-5 family (most have 400k; specific overrides first)
    # Source: https://developers.openai.com/api/docs/models
-    # GPT-5.5 (launched Apr 23 2026). 400k is the fallback for providers we
-    # can't probe live. ChatGPT Codex OAuth actually caps lower (272k as of
-    # Apr 2026) and is resolved via _resolve_codex_oauth_context_length().
-    "gpt-5.5": 400000,
+    # GPT-5.5 (launched Apr 23 2026) is 1.05M on the direct OpenAI API and
+    # ChatGPT Codex OAuth caps it at 272K; both paths resolve via their own
+    # provider-aware branches (_resolve_codex_oauth_context_length + models.dev).
+    # This hardcoded value is only reached when every probe misses.
+    "gpt-5.5": 1050000,
    "gpt-5.4-nano": 400000,           # 400k (not 1.05M like full 5.4)
    "gpt-5.4-mini": 400000,           # 400k (not 1.05M like full 5.4)
    "gpt-5.4": 1050000,               # GPT-5.4, GPT-5.4 Pro (1.05M context)
@@ -162,7 +169,17 @@ DEFAULT_CONTEXT_LENGTHS = {
    "gemma-4-31b": 256000,
    "gemma-3": 131072,
    "gemma": 8192,  # fallback for older gemma models
-    # DeepSeek
+    # DeepSeek — V4 family ships with a 1M context window. The legacy
+    # aliases ``deepseek-chat`` / ``deepseek-reasoner`` are server-side
+    # mapped to the non-thinking / thinking modes of ``deepseek-v4-flash``
+    # and inherit the same 1M window. The ``deepseek`` substring entry
+    # below remains as a 128K fallback for older / unknown DeepSeek model
+    # ids (e.g. via custom endpoints).
+    # https://api-docs.deepseek.com/zh-cn/quick_start/pricing
+    "deepseek-v4-pro": 1_000_000,
+    "deepseek-v4-flash": 1_000_000,
+    "deepseek-chat": 1_000_000,
+    "deepseek-reasoner": 1_000_000,
    "deepseek": 128000,
    # Meta
    "llama": 131072,
@@ -193,6 +210,8 @@ DEFAULT_CONTEXT_LENGTHS = {
    "grok": 131072,             # catch-all (grok-beta, unknown grok-*)
    # Kimi
    "kimi": 262144,
+    # Tencent — Hy3 Preview (Hunyuan) with 256K context window
+    "hy3-preview": 256000,
    # Nemotron — NVIDIA's open-weights series (128K context across all sizes)
    "nemotron": 131072,
    # Arcee
@@ -294,6 +313,8 @@ _URL_TO_PROVIDER: Dict[str, str] = {
    "integrate.api.nvidia.com": "nvidia",
    "api.xiaomimimo.com": "xiaomi",
    "xiaomimimo.com": "xiaomi",
+    "api.gmi-serving.com": "gmi",
+    "tokenhub.tencentmaas.com": "tencent-tokenhub",
    "ollama.com": "ollama-cloud",
 }

@@ -604,8 +625,6 @@ def fetch_endpoint_model_metadata(
                        if isinstance(ctx, int) and ctx > 0:
                            context_length = ctx
                            break
-                    if context_length is None:
-                        context_length = _extract_context_length(model)
                    if context_length is not None:
                        entry["context_length"] = context_length

@@ -689,6 +708,29 @@ def fetch_endpoint_model_metadata(
    return {}


+def _resolve_endpoint_context_length(
+    model: str,
+    base_url: str,
+    api_key: str = "",
+) -> Optional[int]:
+    """Resolve context length from an endpoint's live ``/models`` metadata."""
+    endpoint_metadata = fetch_endpoint_model_metadata(base_url, api_key=api_key)
+    matched = endpoint_metadata.get(model)
+    if not matched:
+        if len(endpoint_metadata) == 1:
+            matched = next(iter(endpoint_metadata.values()))
+        else:
+            for key, entry in endpoint_metadata.items():
+                if model in key or key in model:
+                    matched = entry
+                    break
+    if matched:
+        context_length = matched.get("context_length")
+        if isinstance(context_length, int):
+            return context_length
+    return None
+
+
 def _get_context_cache_path() -> Path:
    """Return path to the persistent context length cache file."""
    from hermes_constants import get_hermes_home
@@ -972,10 +1014,7 @@ def _query_local_context_length(model: str, base_url: str, api_key: str = "") ->
                                ctx = cfg.get("context_length")
                                if ctx and isinstance(ctx, (int, float)):
                                    return int(ctx)
-                            # Fall back to max_context_length (theoretical model max)
-                            ctx = m.get("max_context_length") or m.get("context_length")
-                            if ctx and isinstance(ctx, (int, float)):
-                                return int(ctx)
+                            break

            # LM Studio / vLLM / llama.cpp: try /v1/models/{model}
            resp = client.get(f"{server_url}/v1/models/{model}")
@@ -1193,6 +1232,7 @@ def get_model_context_length(
    api_key: str = "",
    config_context_length: int | None = None,
    provider: str = "",
+    custom_providers: list | None = None,
 ) -> int:
    """Get the context length for a model.

@@ -1213,13 +1253,33 @@ def get_model_context_length(
    if config_context_length is not None and isinstance(config_context_length, int) and config_context_length > 0:
        return config_context_length

+    # 0b. custom_providers per-model override — check before any probe.
+    # This closes the gap where /model switch and display paths used to fall
+    # back to 128K despite the user having a per-model context_length set.
+    # See #15779.
+    if custom_providers and base_url and model:
+        try:
+            from hermes_cli.config import get_custom_provider_context_length
+            cp_ctx = get_custom_provider_context_length(
+                model=model,
+                base_url=base_url,
+                custom_providers=custom_providers,
+            )
+            if cp_ctx:
+                return cp_ctx
+        except Exception:
+            pass  # fall through to probing
+
    # Normalise provider-prefixed model names (e.g. "local:model-name" →
    # "model-name") so cache lookups and server queries use the bare ID that
    # local servers actually know about.  Ollama "model:tag" colons are preserved.
    model = _strip_provider_prefix(model)

    # 1. Check persistent cache (model+provider)
-    if base_url:
+    # LM Studio is excluded — its loaded context length is transient (the
+    # user can reload the model with a different context_length at any time
+    # via /api/v1/models/load), so a stale cached value would mask reloads.
+    if base_url and provider != "lmstudio":
        cached = get_cached_context_length(model, base_url)
        if cached is not None:
            # Invalidate stale Codex OAuth cache entries: pre-PR #14935 builds
@@ -1264,28 +1324,16 @@ def get_model_context_length(
    # returns 128k) instead of the model's full context (400k).  models.dev
    # has the correct per-provider values and is checked at step 5+.
    if _is_custom_endpoint(base_url) and not _is_known_provider_base_url(base_url):
-        endpoint_metadata = fetch_endpoint_model_metadata(base_url, api_key=api_key)
-        matched = endpoint_metadata.get(model)
-        if not matched:
-            # Single-model servers: if only one model is loaded, use it
-            if len(endpoint_metadata) == 1:
-                matched = next(iter(endpoint_metadata.values()))
-            else:
-                # Fuzzy match: substring in either direction
-                for key, entry in endpoint_metadata.items():
-                    if model in key or key in model:
-                        matched = entry
-                        break
-        if matched:
-            context_length = matched.get("context_length")
-            if isinstance(context_length, int):
-                return context_length
+        context_length = _resolve_endpoint_context_length(model, base_url, api_key=api_key)
+        if context_length is not None:
+            return context_length
        if not _is_known_provider_base_url(base_url):
            # 3. Try querying local server directly
            if is_local_endpoint(base_url):
                local_ctx = _query_local_context_length(model, base_url, api_key=api_key)
                if local_ctx and local_ctx > 0:
-                    save_context_length(model, base_url, local_ctx)
+                    if provider != "lmstudio":
+                        save_context_length(model, base_url, local_ctx)
                    return local_ctx
            logger.info(
                "Could not detect context length for model %r at %s — "
@@ -1343,6 +1391,12 @@ def get_model_context_length(
            if base_url:
                save_context_length(model, base_url, codex_ctx)
            return codex_ctx
+    if effective_provider == "gmi" and base_url:
+        # GMI exposes authoritative context_length via /models, but it is not
+        # in models.dev yet. Preserve that higher-fidelity endpoint lookup.
+        ctx = _resolve_endpoint_context_length(model, base_url, api_key=api_key)
+        if ctx is not None:
+            return ctx
    if effective_provider:
        from agent.models_dev import lookup_models_dev_context
        ctx = lookup_models_dev_context(effective_provider, model)
@@ -1352,7 +1406,7 @@ def get_model_context_length(
    # 6. OpenRouter live API metadata (provider-unaware fallback)
    metadata = fetch_model_metadata()
    if model in metadata:
-        return metadata[model].get("context_length", 128000)
+        return metadata[model].get("context_length", DEFAULT_FALLBACK_CONTEXT)

    # 8. Hardcoded defaults (fuzzy match — longest key first for specificity)
    # Only check `default_model in model` (is the key a substring of the input).
@@ -1369,7 +1423,8 @@ def get_model_context_length(
    if base_url and is_local_endpoint(base_url):
        local_ctx = _query_local_context_length(model, base_url, api_key=api_key)
        if local_ctx and local_ctx > 0:
-            save_context_length(model, base_url, local_ctx)
+            if provider != "lmstudio":
+                save_context_length(model, base_url, local_ctx)
            return local_ctx

    # 10. Default fallback — 128K
@@ -149,6 +149,7 @@ PROVIDER_TO_MODELS_DEV: Dict[str, str] = {
    "stepfun": "stepfun",
    "kimi-coding-cn": "kimi-for-coding",
    "minimax": "minimax",
+    "minimax-oauth": "minimax",
    "minimax-cn": "minimax-cn",
    "deepseek": "deepseek",
    "alibaba": "alibaba",
@@ -18,6 +18,7 @@ import os
 import tempfile
 import time
 from typing import Any, Mapping, Optional
+from utils import atomic_replace

 logger = logging.getLogger(__name__)

@@ -118,7 +119,7 @@ def record_nous_rate_limit(
        try:
            with os.fdopen(fd, "w") as f:
                json.dump(state, f)
-            os.replace(tmp_path, path)
+            atomic_replace(tmp_path, path)
        except Exception:
            # Clean up temp file on failure
            try:
@@ -180,3 +181,145 @@ def format_remaining(seconds: float) -> str:
    h, remainder = divmod(s, 3600)
    m = remainder // 60
    return f"{h}h {m}m" if m else f"{h}h"
+
+
+# Buckets with reset windows shorter than this are treated as transient
+# (upstream jitter, secondary throttling) rather than a genuine quota
+# exhaustion worth a cross-session breaker trip.
+_MIN_RESET_FOR_BREAKER_SECONDS = 60.0
+
+
+def is_genuine_nous_rate_limit(
+    *,
+    headers: Optional[Mapping[str, str]] = None,
+    last_known_state: Optional[Any] = None,
+) -> bool:
+    """Decide whether a 429 from Nous Portal is a real account rate limit.
+
+    Nous Portal multiplexes multiple upstream providers (DeepSeek, Kimi,
+    MiMo, Hermes, ...) behind one endpoint.  A 429 can mean either:
+
+      (a) The caller's own RPM / RPH / TPM / TPH bucket on Nous is
+          exhausted — a genuine rate limit that will last until the
+          bucket resets.
+      (b) The upstream provider is out of capacity for a specific model
+          — transient, clears in seconds, and has nothing to do with
+          the caller's quota on Nous.
+
+    Tripping the cross-session breaker on (b) blocks ALL Nous requests
+    (and all models, since Nous is one provider key) for minutes even
+    though the caller's account is healthy and a different model would
+    have worked.  That's the bug users hit when DeepSeek V4 Pro 429s
+    trigger a breaker that then blocks Kimi 2.6 and MiMo V2.5 Pro.
+
+    We tell the two apart by looking at:
+
+      1. The 429 response's own ``x-ratelimit-*`` headers.  Nous emits
+         the full suite on every response including 429s.  An exhausted
+         bucket (``remaining == 0`` with a reset window >= 60s) is
+         proof of (a).
+      2. The last-known-good rate-limit state captured by
+         ``_capture_rate_limits()`` on the previous successful
+         response.  If any bucket there was already near-exhausted with
+         a substantial reset window, the current 429 is almost
+         certainly (a) continuing from that condition.
+
+    If neither signal fires, we treat the 429 as (b): fail the single
+    request, let the retry loop or model-switch proceed, and do NOT
+    write the cross-session breaker file.
+
+    Returns True when the evidence points at (a).
+    """
+    # Signal 1: current 429 response headers.
+    state = _parse_buckets_from_headers(headers)
+    if _has_exhausted_bucket(state):
+        return True
+
+    # Signal 2: last-known-good state from a recent successful response.
+    # Accepts either a RateLimitState (dataclass from rate_limit_tracker)
+    # or a dict of bucket snapshots.
+    if last_known_state is not None and _has_exhausted_bucket_in_object(last_known_state):
+        return True
+
+    return False
+
+
+def _parse_buckets_from_headers(
+    headers: Optional[Mapping[str, str]],
+) -> dict[str, tuple[Optional[int], Optional[float]]]:
+    """Extract (remaining, reset_seconds) per bucket from x-ratelimit-* headers.
+
+    Returns empty dict when no rate-limit headers are present.
+    """
+    if not headers:
+        return {}
+
+    lowered = {k.lower(): v for k, v in headers.items()}
+    if not any(k.startswith("x-ratelimit-") for k in lowered):
+        return {}
+
+    def _maybe_int(raw: Optional[str]) -> Optional[int]:
+        if raw is None:
+            return None
+        try:
+            return int(float(raw))
+        except (TypeError, ValueError):
+            return None
+
+    def _maybe_float(raw: Optional[str]) -> Optional[float]:
+        if raw is None:
+            return None
+        try:
+            return float(raw)
+        except (TypeError, ValueError):
+            return None
+
+    result: dict[str, tuple[Optional[int], Optional[float]]] = {}
+    for tag in ("requests", "requests-1h", "tokens", "tokens-1h"):
+        remaining = _maybe_int(lowered.get(f"x-ratelimit-remaining-{tag}"))
+        reset = _maybe_float(lowered.get(f"x-ratelimit-reset-{tag}"))
+        if remaining is not None or reset is not None:
+            result[tag] = (remaining, reset)
+    return result
+
+
+def _has_exhausted_bucket(
+    buckets: Mapping[str, tuple[Optional[int], Optional[float]]],
+) -> bool:
+    """Return True when any bucket has remaining == 0 AND a meaningful reset window."""
+    for remaining, reset in buckets.values():
+        if remaining is None or remaining > 0:
+            continue
+        if reset is None:
+            continue
+        if reset >= _MIN_RESET_FOR_BREAKER_SECONDS:
+            return True
+    return False
+
+
+def _has_exhausted_bucket_in_object(state: Any) -> bool:
+    """Check a RateLimitState-like object for an exhausted bucket.
+
+    Accepts the dataclass from ``agent.rate_limit_tracker`` (buckets
+    exposed as attributes ``requests_min``, ``requests_hour``,
+    ``tokens_min``, ``tokens_hour``) and falls back gracefully for any
+    object missing those attributes.
+    """
+    for attr in ("requests_min", "requests_hour", "tokens_min", "tokens_hour"):
+        bucket = getattr(state, attr, None)
+        if bucket is None:
+            continue
+        limit = getattr(bucket, "limit", 0) or 0
+        remaining = getattr(bucket, "remaining", 0) or 0
+        # Prefer the adjusted "remaining_seconds_now" property when present;
+        # fall back to raw reset_seconds.
+        reset = getattr(bucket, "remaining_seconds_now", None)
+        if reset is None:
+            reset = getattr(bucket, "reset_seconds", 0.0) or 0.0
+        if limit <= 0:
+            continue
+        if remaining > 0:
+            continue
+        if reset >= _MIN_RESET_FOR_BREAKER_SECONDS:
+            return True
+    return False
@@ -0,0 +1,193 @@
+"""
+Contextual first-touch onboarding hints.
+
+Instead of blocking first-run questionnaires, show a one-time hint the *first*
+time a user hits a behavior fork — message-while-running, first long-running
+tool, etc.  Each hint is shown once per install (tracked in ``config.yaml`` under
+``onboarding.seen.<flag>``) and then never again.
+
+Keep this module tiny and dependency-free so both the CLI and gateway can import
+it without pulling in heavy modules.
+"""
+
+from __future__ import annotations
+
+import logging
+from pathlib import Path
+from typing import Any, Mapping, Optional
+
+logger = logging.getLogger(__name__)
+
+
+# -------------------------------------------------------------------------
+# Flag names (stable — used as config.yaml keys under onboarding.seen)
+# -------------------------------------------------------------------------
+
+BUSY_INPUT_FLAG = "busy_input_prompt"
+TOOL_PROGRESS_FLAG = "tool_progress_prompt"
+OPENCLAW_RESIDUE_FLAG = "openclaw_residue_cleanup"
+
+
+# -------------------------------------------------------------------------
+# Hint content
+# -------------------------------------------------------------------------
+
+def busy_input_hint_gateway(mode: str) -> str:
+    """Hint shown the first time a user messages while the agent is busy.
+
+    ``mode`` is the effective busy_input_mode that was just applied, so the
+    message matches reality ("I just interrupted…" vs "I just queued…").
+    """
+    if mode == "queue":
+        return (
+            "💡 First-time tip — I queued your message instead of interrupting. "
+            "Send `/busy interrupt` to make new messages stop the current task "
+            "immediately, or `/busy status` to check. This notice won't appear again."
+        )
+    if mode == "steer":
+        return (
+            "💡 First-time tip — I steered your message into the current run; "
+            "it will arrive after the next tool call instead of interrupting. "
+            "Send `/busy interrupt` or `/busy queue` to change this, or "
+            "`/busy status` to check. This notice won't appear again."
+        )
+    return (
+        "💡 First-time tip — I just interrupted my current task to answer you. "
+        "Send `/busy queue` to queue follow-ups for after the current task instead, "
+        "`/busy steer` to inject them mid-run without interrupting, or "
+        "`/busy status` to check. This notice won't appear again."
+    )
+
+
+def busy_input_hint_cli(mode: str) -> str:
+    """CLI version of the busy-input hint (plain text, no markdown)."""
+    if mode == "queue":
+        return (
+            "(tip) Your message was queued for the next turn. "
+            "Use /busy interrupt to make Enter stop the current run instead, "
+            "or /busy steer to inject mid-run. This tip only shows once."
+        )
+    if mode == "steer":
+        return (
+            "(tip) Your message was steered into the current run; it arrives "
+            "after the next tool call. Use /busy interrupt or /busy queue to "
+            "change this. This tip only shows once."
+        )
+    return (
+        "(tip) Your message interrupted the current run. "
+        "Use /busy queue to queue messages for the next turn instead, "
+        "or /busy steer to inject mid-run. This tip only shows once."
+    )
+
+
+def tool_progress_hint_gateway() -> str:
+    return (
+        "💡 First-time tip — that tool took a while and I'm streaming every step. "
+        "If the progress messages feel noisy, send `/verbose` to cycle modes "
+        "(all → new → off). This notice won't appear again."
+    )
+
+
+def tool_progress_hint_cli() -> str:
+    return (
+        "(tip) That tool ran for a while. Use /verbose to cycle tool-progress "
+        "display modes (all -> new -> off -> verbose). This tip only shows once."
+    )
+
+
+def openclaw_residue_hint_cli() -> str:
+    """Banner shown the first time Hermes starts and finds ``~/.openclaw/``.
+
+    Points users at ``hermes claw migrate`` (non-destructive port of config,
+    memory, and skills) first. ``hermes claw cleanup`` is mentioned as the
+    follow-up step for users who have already migrated and want to archive
+    the old directory — with a warning that archiving breaks OpenClaw.
+    """
+    return (
+        "A legacy OpenClaw directory was detected at ~/.openclaw/.\n"
+        "To port your config, memory, and skills over to Hermes, run "
+        "`hermes claw migrate`.\n"
+        "If you've already migrated and want to archive the old directory, "
+        "run `hermes claw cleanup` (renames it to ~/.openclaw.pre-migration — "
+        "OpenClaw will stop working after this).\n"
+        "This tip only shows once."
+    )
+
+
+def detect_openclaw_residue(home: Optional[Path] = None) -> bool:
+    """Return True if an OpenClaw workspace directory is present in ``$HOME``.
+
+    Pure filesystem check — no side effects. ``home`` override exists for tests.
+    """
+    base = home or Path.home()
+    try:
+        return (base / ".openclaw").is_dir()
+    except OSError:
+        return False
+
+
+# -------------------------------------------------------------------------
+# State read / write
+# -------------------------------------------------------------------------
+
+def _get_seen_dict(config: Mapping[str, Any]) -> Mapping[str, Any]:
+    onboarding = config.get("onboarding") if isinstance(config, Mapping) else None
+    if not isinstance(onboarding, Mapping):
+        return {}
+    seen = onboarding.get("seen")
+    return seen if isinstance(seen, Mapping) else {}
+
+
+def is_seen(config: Mapping[str, Any], flag: str) -> bool:
+    """Return True if the user has already been shown this first-touch hint."""
+    return bool(_get_seen_dict(config).get(flag))
+
+
+def mark_seen(config_path: Path, flag: str) -> bool:
+    """Persist ``onboarding.seen.<flag> = True`` to ``config_path``.
+
+    Uses the atomic YAML writer so a concurrent process can't observe a
+    partially-written file.  Returns True on success, False on any error
+    (including the config file being absent — onboarding is best-effort).
+    """
+    try:
+        import yaml
+        from utils import atomic_yaml_write
+    except Exception as e:  # pragma: no cover — dependency issue
+        logger.debug("onboarding: failed to import yaml/utils: %s", e)
+        return False
+
+    try:
+        cfg: dict = {}
+        if config_path.exists():
+            with open(config_path, encoding="utf-8") as f:
+                cfg = yaml.safe_load(f) or {}
+        if not isinstance(cfg.get("onboarding"), dict):
+            cfg["onboarding"] = {}
+        seen = cfg["onboarding"].get("seen")
+        if not isinstance(seen, dict):
+            seen = {}
+            cfg["onboarding"]["seen"] = seen
+        if seen.get(flag) is True:
+            return True  # already marked — nothing to do
+        seen[flag] = True
+        atomic_yaml_write(config_path, cfg)
+        return True
+    except Exception as e:
+        logger.debug("onboarding: failed to mark flag %s: %s", flag, e)
+        return False
+
+
+__all__ = [
+    "BUSY_INPUT_FLAG",
+    "TOOL_PROGRESS_FLAG",
+    "OPENCLAW_RESIDUE_FLAG",
+    "busy_input_hint_gateway",
+    "busy_input_hint_cli",
+    "tool_progress_hint_gateway",
+    "tool_progress_hint_cli",
+    "openclaw_residue_hint_cli",
+    "detect_openclaw_residue",
+    "is_seen",
+    "mark_seen",
+]
@@ -141,6 +141,12 @@ DEFAULT_AGENT_IDENTITY = (
    "Be targeted and efficient in your exploration and investigations."
 )

+HERMES_AGENT_HELP_GUIDANCE = (
+    "If the user asks about configuring, setting up, or using Hermes Agent "
+    "itself, load the `hermes-agent` skill with skill_view(name='hermes-agent') "
+    "before answering. Docs: https://hermes-agent.nousresearch.com/docs"
+)
+
 MEMORY_GUIDANCE = (
    "You have persistent memory across sessions. Save durable facts using the memory "
    "tool: user preferences, environment details, tool quirks, and stable conventions. "
@@ -304,6 +310,10 @@ PLATFORM_HINTS = {
        "Standard markdown is automatically converted to Telegram format. "
        "Supported: **bold**, *italic*, ~~strikethrough~~, ||spoiler||, "
        "`inline code`, ```code blocks```, [links](url), and ## headers. "
+        "Telegram has NO table syntax — prefer bullet lists or labeled "
+        "key: value pairs over pipe tables (any tables you do emit are "
+        "auto-rewritten into row-group bullets, which you can produce "
+        "directly for cleaner output). "
        "You can send media files natively: to deliver a file to the user, "
        "include MEDIA:/absolute/path/to/file in your response. Images "
        "(.png, .jpg, .webp) appear as photos, audio (.ogg) sends as voice "
@@ -422,6 +432,29 @@ PLATFORM_HINTS = {
        "your response. Images are sent as native photos, and other files arrive as downloadable "
        "documents."
    ),
+    "yuanbao": (
+        "You are on Yuanbao (腾讯元宝), a Chinese AI assistant platform. "
+        "Markdown formatting is supported (code blocks, tables, bold/italic). "
+        "You CAN send media files natively — to deliver a file to the user, include "
+        "MEDIA:/absolute/path/to/file in your response. The file will be sent as a native "
+        "Yuanbao attachment: images (.jpg, .png, .webp, .gif) are sent as photos, "
+        "and other files (.pdf, .docx, .txt, .zip, etc.) arrive as downloadable documents "
+        "(max 50 MB). You can also include image URLs in markdown format ![alt](url) and "
+        "they will be downloaded and sent as native photos. "
+        "Do NOT tell the user you lack file-sending capability — use MEDIA: syntax "
+        "whenever a file delivery is appropriate.\n\n"
+        "Stickers (贴纸 / 表情包 / TIM face): Yuanbao has a built-in sticker catalogue. "
+        "When the user sends a sticker (you see '[emoji: 名称]' in their message) or asks "
+        "you to send/reply-with a 贴纸/表情/表情包, you MUST use the sticker tools:\n"
+        "  1. Call yb_search_sticker with a Chinese keyword (e.g. '666', '比心', '吃瓜', "
+        "     '捂脸', '合十') to discover matching sticker_ids.\n"
+        "  2. Call yb_send_sticker with the chosen sticker_id or name — this sends a real "
+        "     TIMFaceElem that renders as a native sticker in the chat.\n"
+        "DO NOT draw sticker-like PNGs with execute_code/Pillow/matplotlib and then send "
+        "them via MEDIA: or send_image_file. That produces a fake low-quality 'sticker' "
+        "image and is the WRONG path. Bare Unicode emoji in text is also not a substitute "
+        "— when a sticker is the right response, use yb_send_sticker."
+    ),
 }

 # ---------------------------------------------------------------------------
@@ -825,6 +858,11 @@ def build_skills_system_prompt(
            "Skills also encode the user's preferred approach, conventions, and quality standards "
            "for tasks like code review, planning, and testing — load them even for tasks you "
            "already know how to do, because the skill defines how it should be done here.\n"
+            "Whenever the user asks you to configure, set up, install, enable, disable, modify, "
+            "or troubleshoot Hermes Agent itself — its CLI, config, models, providers, tools, "
+            "skills, voice, gateway, plugins, or any feature — load the `hermes-agent` skill "
+            "first. It has the actual commands (e.g. `hermes config set …`, `hermes tools`, "
+            "`hermes setup`) so you don't have to guess or invent workarounds.\n"
            "If a skill has issues, fix it with skill_manage(action='patch').\n"
            "After difficult/iterative tasks, offer to save as a skill. "
            "If a skill you loaded was missing steps, had wrong commands, or needed "
@@ -56,8 +56,12 @@ _SENSITIVE_BODY_KEYS = frozenset({
 })

 # Snapshot at import time so runtime env mutations (e.g. LLM-generated
-# `export HERMES_REDACT_SECRETS=false`) cannot disable redaction mid-session.
-_REDACT_ENABLED = os.getenv("HERMES_REDACT_SECRETS", "").lower() not in ("0", "false", "no", "off")
+# `export HERMES_REDACT_SECRETS=true`) cannot enable/disable redaction
+# mid-session.  OFF by default — user must opt in via
+# `security.redact_secrets: true` in config.yaml (bridged to this env var
+# in hermes_cli/main.py and gateway/run.py) or `HERMES_REDACT_SECRETS=true`
+# in ~/.hermes/.env.
+_REDACT_ENABLED = os.getenv("HERMES_REDACT_SECRETS", "").lower() in ("1", "true", "yes", "on")

 # Known API key prefixes -- match the prefix + contiguous token chars
 _PREFIX_PATTERNS = [
@@ -180,11 +184,59 @@ _PREFIX_RE = re.compile(
 )


+def mask_secret(
+    value: str,
+    *,
+    head: int = 4,
+    tail: int = 4,
+    floor: int = 12,
+    placeholder: str = "***",
+    empty: str = "",
+) -> str:
+    """Mask a secret for display, preserving ``head`` and ``tail`` characters.
+
+    Canonical helper for display-time redaction across Hermes — used by
+    ``hermes config``, ``hermes status``, ``hermes dump``, and anywhere
+    a secret needs to be shown truncated for debuggability while still
+    keeping the bulk hidden.
+
+    Args:
+        value:       The secret to mask. ``None``/empty returns ``empty``.
+        head:        Leading characters to preserve. Default 4.
+        tail:        Trailing characters to preserve. Default 4.
+        floor:       Values shorter than ``head + tail + floor_margin`` are
+                     fully masked (returns ``placeholder``). Default 12 —
+                     matches the existing config/status/dump convention.
+        placeholder: Value returned for too-short inputs. Default ``"***"``.
+        empty:       Value returned when ``value`` is falsy (None, ""). The
+                     caller can override this to e.g. ``color("(not set)",
+                     Colors.DIM)`` for user-facing display.
+
+    Examples:
+        >>> mask_secret("sk-proj-abcdef1234567890")
+        'sk-p...7890'
+        >>> mask_secret("short")                         # fully masked
+        '***'
+        >>> mask_secret("")                              # empty default
+        ''
+        >>> mask_secret("", empty="(not set)")           # empty override
+        '(not set)'
+        >>> mask_secret("long-token", head=6, tail=4, floor=18)
+        '***'
+    """
+    if not value:
+        return empty
+    if len(value) < floor:
+        return placeholder
+    return f"{value[:head]}...{value[-tail:]}"
+
+
 def _mask_token(token: str) -> str:
-    """Mask a token, preserving prefix for long tokens."""
-    if len(token) < 18:
+    """Mask a log token — conservative 18-char floor, preserves 6 prefix / 4 suffix."""
+    # Empty input: historically this returned "***" rather than "". Preserve.
+    if not token:
        return "***"
-    return f"{token[:6]}...{token[-4:]}"
+    return mask_secret(token, head=6, tail=4, floor=18)


 def _redact_query_string(query: str) -> str:
@@ -257,7 +309,7 @@ def redact_sensitive_text(text: str) -> str:
    """Apply all redaction patterns to a block of text.

    Safe to call on any string -- non-matching text passes through unchanged.
-    Disabled when security.redact_secrets is false in config.yaml.
+    Disabled by default — enable via security.redact_secrets: true in config.yaml.
    """
    if text is None:
        return None
@@ -76,6 +76,7 @@ except ImportError:  # pragma: no cover
    fcntl = None  # type: ignore[assignment]

 from hermes_constants import get_hermes_home
+from utils import atomic_replace

 logger = logging.getLogger(__name__)

@@ -568,7 +569,7 @@ def save_allowlist(data: Dict[str, Any]) -> None:
        try:
            with os.fdopen(fd, "w") as fh:
                fh.write(json.dumps(data, indent=2, sort_keys=True))
-            os.replace(tmp_path, p)
+            atomic_replace(tmp_path, p)
        except Exception:
            try:
                os.unlink(tmp_path)
@@ -754,7 +755,11 @@ def _resolve_effective_accept(
    if env in ("1", "true", "yes", "on"):
        return True
    cfg_val = cfg.get("hooks_auto_accept", False)
-    return bool(cfg_val)
+    if isinstance(cfg_val, bool):
+        return cfg_val
+    if isinstance(cfg_val, str):
+        return cfg_val.strip().lower() in ("1", "true", "yes", "on")
+    return False


 # ---------------------------------------------------------------------------
@@ -329,7 +329,7 @@ def build_skill_invocation_message(

    loaded_skill, skill_dir, skill_name = loaded
    activation_note = (
-        f'[SYSTEM: The user has invoked the "{skill_name}" skill, indicating they want '
+        f'[IMPORTANT: The user has invoked the "{skill_name}" skill, indicating they want '
        "you to follow its instructions. The full skill content is loaded below.]"
    )
    return _build_skill_message(
@@ -368,7 +368,7 @@ def build_preloaded_skills_prompt(

        loaded_skill, skill_dir, skill_name = loaded
        activation_note = (
-            f'[SYSTEM: The user launched this CLI session with the "{skill_name}" skill '
+            f'[IMPORTANT: The user launched this CLI session with the "{skill_name}" skill '
            "preloaded. Treat its instructions as active guidance for the duration of this "
            "session unless the user overrides them.]"
        )
@@ -200,6 +200,9 @@ def get_external_skills_dirs() -> List[Path]:
    if not isinstance(raw_dirs, list):
        return []

+    from hermes_constants import get_hermes_home
+
+    hermes_home = get_hermes_home()
    local_skills = get_skills_dir().resolve()
    seen: Set[Path] = set()
    result: List[Path] = []
@@ -210,7 +213,12 @@ def get_external_skills_dirs() -> List[Path]:
            continue
        # Expand ~ and environment variables
        expanded = os.path.expanduser(os.path.expandvars(entry))
-        p = Path(expanded).resolve()
+        p = Path(expanded)
+        # Resolve relative paths against HERMES_HOME, not cwd
+        if not p.is_absolute():
+            p = (hermes_home / p).resolve()
+        else:
+            p = p.resolve()
        if p == local_skills:
            continue
        if p in seen:
@@ -6,12 +6,18 @@ adds latency to the user-facing reply.

 import logging
 import threading
-from typing import Optional
+from typing import Callable, Optional

 from agent.auxiliary_client import call_llm

 logger = logging.getLogger(__name__)

+# Callback signature: (task_name, exception) -> None. Used to surface
+# auxiliary failures to the user through AIAgent._emit_auxiliary_failure
+# so silent-drops (e.g. OpenRouter 402 exhausting the fallback chain)
+# become visible instead of piling up as NULL session titles.
+FailureCallback = Callable[[str, BaseException], None]
+
 _TITLE_PROMPT = (
    "Generate a short, descriptive title (3-7 words) for a conversation that starts with the "
    "following exchange. The title should capture the main topic or intent. "
@@ -19,11 +25,23 @@ _TITLE_PROMPT = (
 )


-def generate_title(user_message: str, assistant_response: str, timeout: float = 30.0) -> Optional[str]:
+def generate_title(
+    user_message: str,
+    assistant_response: str,
+    timeout: float = 30.0,
+    failure_callback: Optional[FailureCallback] = None,
+    main_runtime: dict = None,
+) -> Optional[str]:
    """Generate a session title from the first exchange.

-    Uses the auxiliary LLM client (cheapest/fastest available model).
+    Uses the main runtime's model when available, falling back to the
+    auxiliary LLM client (cheapest/fastest available model).
    Returns the title string or None on failure.
+
+    ``failure_callback`` is invoked with ``(task, exception)`` when the
+    auxiliary call raises — the caller typically wires this to
+    ``AIAgent._emit_auxiliary_failure`` so the user sees a warning instead
+    of silently accumulating untitled sessions.
    """
    # Truncate long messages to keep the request small
    user_snippet = user_message[:500] if user_message else ""
@@ -41,6 +59,7 @@ def generate_title(user_message: str, assistant_response: str, timeout: float =
            max_tokens=500,
            temperature=0.3,
            timeout=timeout,
+            main_runtime=main_runtime,
        )
        title = (response.choices[0].message.content or "").strip()
        # Clean up: remove quotes, trailing punctuation, prefixes like "Title: "
@@ -52,7 +71,15 @@ def generate_title(user_message: str, assistant_response: str, timeout: float =
            title = title[:77] + "..."
        return title if title else None
    except Exception as e:
-        logger.debug("Title generation failed: %s", e)
+        # Log at WARNING so this shows up in agent.log without debug mode.
+        # Full detail at debug level for operators who need the stack.
+        logger.warning("Title generation failed: %s", e)
+        logger.debug("Title generation traceback", exc_info=True)
+        if failure_callback is not None:
+            try:
+                failure_callback("title generation", e)
+            except Exception:
+                logger.debug("Title generation failure_callback raised", exc_info=True)
        return None


@@ -61,6 +88,8 @@ def auto_title_session(
    session_id: str,
    user_message: str,
    assistant_response: str,
+    failure_callback: Optional[FailureCallback] = None,
+    main_runtime: dict = None,
 ) -> None:
    """Generate and set a session title if one doesn't already exist.

@@ -81,7 +110,9 @@ def auto_title_session(
    except Exception:
        return

-    title = generate_title(user_message, assistant_response)
+    title = generate_title(
+        user_message, assistant_response, failure_callback=failure_callback, main_runtime=main_runtime
+    )
    if not title:
        return

@@ -98,6 +129,8 @@ def maybe_auto_title(
    user_message: str,
    assistant_response: str,
    conversation_history: list,
+    failure_callback: Optional[FailureCallback] = None,
+    main_runtime: dict = None,
 ) -> None:
    """Fire-and-forget title generation after the first exchange.

@@ -119,6 +152,7 @@ def maybe_auto_title(
    thread = threading.Thread(
        target=auto_title_session,
        args=(session_db, session_id, user_message, assistant_response),
+        kwargs={"failure_callback": failure_callback, "main_runtime": main_runtime},
        daemon=True,
        name="auto-title",
    )
@@ -23,9 +23,14 @@ def get_transport(api_mode: str):
    This allows gradual migration — call sites can check for None
    and fall back to the legacy code path.
    """
-    if not _REGISTRY:
-        _discover_transports()
    cls = _REGISTRY.get(api_mode)
+    if cls is None:
+        # The registry can be partially populated when a specific transport
+        # module was imported directly (for example chat_completions before
+        # codex).  Discover on misses, not only when the registry is empty, so
+        # test/order-dependent imports do not make valid api_modes unavailable.
+        _discover_transports()
+        cls = _REGISTRY.get(api_mode)
    if cls is None:
        return None
    return cls()
@@ -12,12 +12,84 @@ reasoning configuration, temperature handling, and extra_body assembly.
 import copy
 from typing import Any, Dict, List, Optional

+from agent.lmstudio_reasoning import resolve_lmstudio_effort
 from agent.moonshot_schema import is_moonshot_model, sanitize_moonshot_tools
 from agent.prompt_builder import DEVELOPER_ROLE_MODELS
 from agent.transports.base import ProviderTransport
 from agent.transports.types import NormalizedResponse, ToolCall, Usage


+def _build_gemini_thinking_config(model: str, reasoning_config: dict | None) -> dict | None:
+    """Translate Hermes/OpenRouter-style reasoning config to Gemini thinkingConfig."""
+    if reasoning_config is None or not isinstance(reasoning_config, dict):
+        return None
+
+    if reasoning_config.get("enabled") is False:
+        # Gemini can hide thought parts even when internal thinking still
+        # happens; omit thinkingLevel to avoid model-specific validation quirks.
+        return {"includeThoughts": False}
+
+    effort = str(reasoning_config.get("effort", "medium") or "medium").strip().lower()
+    if effort == "none":
+        return {"includeThoughts": False}
+
+    thinking_config: Dict[str, Any] = {"includeThoughts": True}
+    normalized_model = (model or "").strip().lower()
+    if normalized_model.startswith("google/"):
+        normalized_model = normalized_model.split("/", 1)[1]
+
+    # Gemini 2.5 accepts thinkingBudget; don't guess a budget from Hermes'
+    # coarse effort levels. ``includeThoughts`` alone is enough to surface
+    # thought parts without risking request validation errors.
+    if normalized_model.startswith("gemini-2.5-"):
+        return thinking_config
+
+    if effort not in {"minimal", "low", "medium", "high", "xhigh"}:
+        effort = "medium"
+
+    # Gemini 3 Flash documents low/medium/high thinking levels; Gemini 3 Pro
+    # is stricter (low/high). Clamp Hermes' wider effort set to what each
+    # family accepts so we never forward an undocumented level verbatim.
+    if normalized_model.startswith(("gemini-3", "gemini-3.1")):
+        if "flash" in normalized_model:
+            if effort in {"minimal", "low"}:
+                thinking_config["thinkingLevel"] = "low"
+            elif effort in {"high", "xhigh"}:
+                thinking_config["thinkingLevel"] = "high"
+            else:
+                thinking_config["thinkingLevel"] = "medium"
+        elif "pro" in normalized_model:
+            thinking_config["thinkingLevel"] = (
+                "high" if effort in {"high", "xhigh"} else "low"
+            )
+
+    return thinking_config
+
+
+def _snake_case_gemini_thinking_config(config: dict | None) -> dict | None:
+    """Convert Gemini thinking config keys to the OpenAI-compat field names."""
+    if not isinstance(config, dict) or not config:
+        return None
+
+    translated: Dict[str, Any] = {}
+    if isinstance(config.get("includeThoughts"), bool):
+        translated["include_thoughts"] = config["includeThoughts"]
+    if isinstance(config.get("thinkingLevel"), str) and config["thinkingLevel"].strip():
+        translated["thinking_level"] = config["thinkingLevel"].strip().lower()
+    if isinstance(config.get("thinkingBudget"), (int, float)):
+        translated["thinking_budget"] = int(config["thinkingBudget"])
+    return translated or None
+
+
+def _is_gemini_openai_compat_base_url(base_url: Any) -> bool:
+    normalized = str(base_url or "").strip().rstrip("/").lower()
+    if not normalized:
+        return False
+    if "generativelanguage.googleapis.com" not in normalized:
+        return False
+    return normalized.endswith("/openai")
+
+
 class ChatCompletionsTransport(ProviderTransport):
    """Transport for api_mode='chat_completions'.

@@ -31,15 +103,15 @@ class ChatCompletionsTransport(ProviderTransport):
    def convert_messages(self, messages: List[Dict[str, Any]], **kwargs) -> List[Dict[str, Any]]:
        """Messages are already in OpenAI format — sanitize Codex leaks only.

-        Strips Codex Responses API fields (``codex_reasoning_items`` on the
-        message, ``call_id``/``response_item_id`` on tool_calls) that strict
-        chat-completions providers reject with 400/422.
+        Strips Codex Responses API fields (``codex_reasoning_items`` /
+        ``codex_message_items`` on the message, ``call_id``/``response_item_id``
+        on tool_calls) that strict chat-completions providers reject with 400/422.
        """
        needs_sanitize = False
        for msg in messages:
            if not isinstance(msg, dict):
                continue
-            if "codex_reasoning_items" in msg:
+            if "codex_reasoning_items" in msg or "codex_message_items" in msg:
                needs_sanitize = True
                break
            tool_calls = msg.get("tool_calls")
@@ -59,6 +131,7 @@ class ChatCompletionsTransport(ProviderTransport):
            if not isinstance(msg, dict):
                continue
            msg.pop("codex_reasoning_items", None)
+            msg.pop("codex_message_items", None)
            tool_calls = msg.get("tool_calls")
            if isinstance(tool_calls, list):
                for tc in tool_calls:
@@ -100,6 +173,7 @@ class ChatCompletionsTransport(ProviderTransport):
            is_github_models: bool
            is_nvidia_nim: bool
            is_kimi: bool
+            is_lmstudio: bool
            is_custom_provider: bool
            ollama_num_ctx: int | None
            # Provider routing
@@ -113,6 +187,7 @@ class ChatCompletionsTransport(ProviderTransport):
            # Reasoning
            supports_reasoning: bool
            github_reasoning_extra: dict | None
+            lmstudio_reasoning_options: list[str] | None  # raw allowed_options from /api/v1/models
            # Claude on OpenRouter/Nous max output
            anthropic_max_output: int | None
            # Extra
@@ -187,6 +262,7 @@ class ChatCompletionsTransport(ProviderTransport):
        anthropic_max_out = params.get("anthropic_max_output")
        is_nvidia_nim = params.get("is_nvidia_nim", False)
        is_kimi = params.get("is_kimi", False)
+        is_tokenhub = params.get("is_tokenhub", False)
        reasoning_config = params.get("reasoning_config")

        if ephemeral is not None and max_tokens_fn:
@@ -218,12 +294,41 @@ class ChatCompletionsTransport(ProviderTransport):
                        _kimi_effort = _e
                api_kwargs["reasoning_effort"] = _kimi_effort

+        # Tencent TokenHub: top-level reasoning_effort (unless thinking disabled)
+        if is_tokenhub:
+            _tokenhub_thinking_off = bool(
+                reasoning_config
+                and isinstance(reasoning_config, dict)
+                and reasoning_config.get("enabled") is False
+            )
+            if not _tokenhub_thinking_off:
+                _tokenhub_effort = "high"
+                if reasoning_config and isinstance(reasoning_config, dict):
+                    _e = (reasoning_config.get("effort") or "").strip().lower()
+                    if _e in ("low", "medium", "high"):
+                        _tokenhub_effort = _e
+                api_kwargs["reasoning_effort"] = _tokenhub_effort
+
+        # LM Studio: top-level reasoning_effort. Only emit when the model
+        # declares reasoning support via /api/v1/models capabilities (gated
+        # upstream by params["supports_reasoning"]). resolve_lmstudio_effort
+        # is shared with run_agent's summary path so both stay in sync.
+        if params.get("is_lmstudio", False) and params.get("supports_reasoning", False):
+            _lm_effort = resolve_lmstudio_effort(
+                reasoning_config,
+                params.get("lmstudio_reasoning_options"),
+            )
+            if _lm_effort is not None:
+                api_kwargs["reasoning_effort"] = _lm_effort
+
        # extra_body assembly
        extra_body: Dict[str, Any] = {}

        is_openrouter = params.get("is_openrouter", False)
        is_nous = params.get("is_nous", False)
        is_github_models = params.get("is_github_models", False)
+        provider_name = str(params.get("provider_name") or "").strip().lower()
+        base_url = params.get("base_url")

        provider_prefs = params.get("provider_preferences")
        if provider_prefs and is_openrouter:
@@ -239,8 +344,9 @@ class ChatCompletionsTransport(ProviderTransport):
                "type": "enabled" if _kimi_thinking_enabled else "disabled",
            }

-        # Reasoning
-        if params.get("supports_reasoning", False):
+        # Reasoning. LM Studio is handled above via top-level reasoning_effort,
+        # so skip emitting extra_body.reasoning for it.
+        if params.get("supports_reasoning", False) and not params.get("is_lmstudio", False):
            if is_github_models:
                gh_reasoning = params.get("github_reasoning_extra")
                if gh_reasoning is not None:
@@ -276,6 +382,23 @@ class ChatCompletionsTransport(ProviderTransport):
        if is_qwen:
            extra_body["vl_high_resolution_images"] = True

+        if provider_name == "gemini":
+            raw_thinking_config = _build_gemini_thinking_config(model, reasoning_config)
+            if _is_gemini_openai_compat_base_url(base_url):
+                thinking_config = _snake_case_gemini_thinking_config(raw_thinking_config)
+                if thinking_config:
+                    openai_compat_extra = extra_body.get("extra_body", {})
+                    google_extra = openai_compat_extra.get("google", {})
+                    google_extra["thinking_config"] = thinking_config
+                    openai_compat_extra["google"] = google_extra
+                    extra_body["extra_body"] = openai_compat_extra
+            elif raw_thinking_config:
+                extra_body["thinking_config"] = raw_thinking_config
+        elif provider_name == "google-gemini-cli":
+            thinking_config = _build_gemini_thinking_config(model, reasoning_config)
+            if thinking_config:
+                extra_body["thinking_config"] = thinking_config
+
        # Merge any pre-built extra_body additions
        additions = params.get("extra_body_additions")
        if additions:
@@ -8,7 +8,7 @@ streaming, or the _run_codex_stream() call path.
 from typing import Any, Dict, List, Optional

 from agent.transports.base import ProviderTransport
-from agent.transports.types import NormalizedResponse, ToolCall, Usage
+from agent.transports.types import NormalizedResponse, ToolCall


 class ResponsesApiTransport(ProviderTransport):
@@ -120,6 +120,24 @@ class ResponsesApiTransport(ProviderTransport):
        if request_overrides:
            kwargs.update(request_overrides)

+        if is_codex_backend:
+            prompt_cache_key = kwargs.get("prompt_cache_key")
+            cache_scope_id = str(prompt_cache_key or session_id or "").strip()
+            if cache_scope_id:
+                existing_extra_headers = kwargs.get("extra_headers")
+                merged_extra_headers: Dict[str, str] = {}
+                if isinstance(existing_extra_headers, dict):
+                    merged_extra_headers.update(
+                        {
+                            str(key): str(value)
+                            for key, value in existing_extra_headers.items()
+                            if key and value is not None
+                        }
+                    )
+                merged_extra_headers["session_id"] = cache_scope_id
+                merged_extra_headers["x-client-request-id"] = cache_scope_id
+                kwargs["extra_headers"] = merged_extra_headers
+
        max_tokens = params.get("max_tokens")
        if max_tokens is not None and not is_codex_backend:
            kwargs["max_output_tokens"] = max_tokens
@@ -133,8 +151,6 @@ class ResponsesApiTransport(ProviderTransport):
        """Normalize Codex Responses API response to NormalizedResponse."""
        from agent.codex_responses_adapter import (
            _normalize_codex_response,
-            _extract_responses_message_text,
-            _extract_responses_reasoning_text,
        )

        # _normalize_codex_response returns (SimpleNamespace, finish_reason_str)
@@ -160,6 +176,8 @@ class ResponsesApiTransport(ProviderTransport):
        provider_data = {}
        if msg and hasattr(msg, "codex_reasoning_items") and msg.codex_reasoning_items:
            provider_data["codex_reasoning_items"] = msg.codex_reasoning_items
+        if msg and hasattr(msg, "codex_message_items") and msg.codex_message_items:
+            provider_data["codex_message_items"] = msg.codex_message_items
        if msg and hasattr(msg, "reasoning_details") and msg.reasoning_details:
            provider_data["reasoning_details"] = msg.reasoning_details

@@ -97,7 +97,7 @@ class NormalizedResponse:
    Response-level ``provider_data`` examples:

    * Anthropic: ``{"reasoning_details": [...]}``
-    * Codex: ``{"codex_reasoning_items": [...]}``
+    * Codex: ``{"codex_reasoning_items": [...], "codex_message_items": [...]}``
    * Others: ``None``
    """

@@ -126,6 +126,11 @@ class NormalizedResponse:
        pd = self.provider_data or {}
        return pd.get("codex_reasoning_items")

+    @property
+    def codex_message_items(self):
+        pd = self.provider_data or {}
+        return pd.get("codex_message_items")
+

 # ---------------------------------------------------------------------------
 # Factory helpers
@@ -359,6 +359,25 @@ _OFFICIAL_DOCS_PRICING: Dict[tuple[str, str], PricingEntry] = {
        source_url="https://aws.amazon.com/bedrock/pricing/",
        pricing_version="bedrock-pricing-2026-04",
    ),
+    # MiniMax
+    (
+        "minimax",
+        "minimax-m2.7",
+    ): PricingEntry(
+        input_cost_per_million=Decimal("0.30"),
+        output_cost_per_million=Decimal("1.20"),
+        source="official_docs_snapshot",
+        pricing_version="minimax-pricing-2026-04",
+    ),
+    (
+        "minimax-cn",
+        "minimax-m2.7",
+    ): PricingEntry(
+        input_cost_per_million=Decimal("0.30"),
+        output_cost_per_million=Decimal("1.20"),
+        source="official_docs_snapshot",
+        pricing_version="minimax-pricing-2026-04",
+    ),
 }


@@ -400,6 +419,8 @@ def resolve_billing_route(
        return BillingRoute(provider="anthropic", model=model.split("/")[-1], base_url=base_url or "", billing_mode="official_docs_snapshot")
    if provider_name == "openai":
        return BillingRoute(provider="openai", model=model.split("/")[-1], base_url=base_url or "", billing_mode="official_docs_snapshot")
+    if provider_name in {"minimax", "minimax-cn"}:
+        return BillingRoute(provider=provider_name, model=model.split("/")[-1], base_url=base_url or "", billing_mode="official_docs_snapshot")
    if provider_name in {"custom", "local"} or (base and "localhost" in base):
        return BillingRoute(provider=provider_name or "custom", model=model, base_url=base_url or "", billing_mode="unknown")
    return BillingRoute(provider=provider_name or "unknown", model=model.split("/")[-1] if model else "", base_url=base_url or "", billing_mode="unknown")
@@ -30,14 +30,13 @@ model:
  #   "ollama-cloud" - Ollama Cloud (requires: OLLAMA_API_KEY — https://ollama.com/settings)
  #   "kilocode"     - KiloCode gateway (requires: KILOCODE_API_KEY)
  #   "ai-gateway"   - Vercel AI Gateway (requires: AI_GATEWAY_API_KEY)
+  #   "lmstudio"     - LM Studio local server (optional: LM_API_KEY, defaults to http://127.0.0.1:1234/v1)
  #
  # Local servers (LM Studio, Ollama, vLLM, llama.cpp):
-  #   "custom"       - Any OpenAI-compatible endpoint. Set base_url below.
-  #   Aliases: "lmstudio", "ollama", "vllm", "llamacpp" all map to "custom".
-  #   Example for LM Studio:
-  #     provider: "lmstudio"
-  #     base_url: "http://localhost:1234/v1"
-  #   No API key needed — local servers typically ignore auth.
+  #   "custom"       - Any other OpenAI-compatible endpoint. Set base_url below.
+  #   Aliases: "ollama", "vllm", "llamacpp" all map to "custom".
+  #   LM Studio is first-class and uses provider: "lmstudio".
+  #   It works with both no-auth and auth-enabled server modes.
  #
  # Can also be overridden with --provider flag or HERMES_INFERENCE_PROVIDER env var.
  provider: "auto"
@@ -181,6 +180,11 @@ terminal:
 #   lifetime_seconds: 300
 #   docker_image: "nikolaik/python-nodejs:python3.11-nodejs20"
 #   docker_mount_cwd_to_workspace: true   # Explicit opt-in: mount your launch cwd into /workspace
+#   # Optional: run the container as your host user's uid:gid so files written
+#   # into bind-mounted dirs are owned by you, not root. Drops SETUID/SETGID
+#   # caps too since no gosu privilege drop is needed. Leave off if your
+#   # chosen docker_image expects to start as root.
+#   docker_run_as_host_user: true
 #   # Optional: explicitly forward selected env vars into Docker.
 #   # These values come from your current shell first, then ~/.hermes/.env.
 #   # Warning: anything forwarded here is visible to commands run in the container.
@@ -606,6 +610,7 @@ platform_toolsets:
  signal: [hermes-signal]
  homeassistant: [hermes-homeassistant]
  qqbot: [hermes-qqbot]
+  yuanbao: [hermes-yuanbao]

 # =============================================================================
 # Gateway Platform Settings
@@ -824,7 +829,9 @@ delegation:
 # Display
 # =============================================================================
 display:
-  # Use compact banner mode
+  # Use compact banner mode (hides the ASCII-art banner, shows a single line).
+  #   true:  Compact single-line banner
+  #   false: Full ASCII banner with tool/skill summary (default)
  compact: false

  # Tool progress display level (CLI and gateway)
@@ -838,12 +845,19 @@ display:
  # Gateway-only natural mid-turn assistant updates.
  # When true, completed assistant status messages are sent as separate chat
  # messages. This is independent of tool_progress and gateway streaming.
+  #   true:  Send mid-turn assistant updates as separate messages (default)
+  #   false: Only send the final response
  interim_assistant_messages: true

-  # What Enter does when Hermes is already busy in the CLI.
+  # What Enter does when Hermes is already busy (CLI and gateway platforms).
  #   interrupt: Interrupt the current run and redirect Hermes (default)
  #   queue:     Queue your message for the next turn
-  # Ctrl+C always interrupts regardless of this setting.
+  #   steer:     Inject your message mid-run via /steer, arriving at the agent
+  #              after the next tool call — no interrupt, no role violation.
+  #              Falls back to 'queue' if the agent isn't running yet or if
+  #              images are attached (steer only carries text).
+  # Ctrl+C (or /stop in gateway) always interrupts regardless of this setting.
+  # Toggle at runtime with /busy <interrupt|queue|steer>.
  busy_input_mode: interrupt

  # Background process notifications (gateway/messaging only).
@@ -859,17 +873,22 @@ display:
  # Play terminal bell when agent finishes a response.
  # Useful for long-running tasks — your terminal will ding when the agent is done.
  # Works over SSH. Most terminals can be configured to flash the taskbar or play a sound.
+  #   true:  Ring the terminal bell on each response
+  #   false: Silent (default)
  bell_on_complete: false

  # Show model reasoning/thinking before each response.
  # When enabled, a dim box shows the model's thought process above the response.
  # Toggle at runtime with /reasoning show or /reasoning hide.
+  #   true:  Show the reasoning box
+  #   false: Hide reasoning (default)
  show_reasoning: false

  # Stream tokens to the terminal as they arrive instead of waiting for the
  # full response. The response box opens on first token and text appears
  # line-by-line. Tool calls are still captured silently.
-  # Stream tokens to the terminal in real-time. Disable to wait for full responses.
+  #   true:  Stream tokens as they arrive (default)
+  #   false: Wait for the full response before rendering
  streaming: true

  # ───────────────────────────────────────────────────────────────────────────
@@ -879,10 +898,15 @@ display:
  # response box label, and branding text. Change at runtime with /skin <name>.
  #
  # Built-in skins:
-  #   default  — Classic Hermes gold/kawaii
-  #   ares     — Crimson/bronze war-god theme with spinner wings
-  #   mono     — Clean grayscale monochrome
-  #   slate    — Cool blue developer-focused
+  #   default        — Classic Hermes gold/kawaii
+  #   ares           — Crimson/bronze war-god theme with spinner wings
+  #   mono           — Clean grayscale monochrome
+  #   slate          — Cool blue developer-focused
+  #   daylight       — Bright light-mode theme
+  #   warm-lightmode — Warm paper-tone light-mode theme
+  #   poseidon       — Sea-green/teal Olympian theme
+  #   sisyphus       — Earthy stone-and-moss theme
+  #   charizard      — Fiery orange dragon theme
  #
  # Custom skins: drop a YAML file in ~/.hermes/skins/<name>.yaml
  # Schema (all fields optional, missing values inherit from default):
@@ -908,7 +932,7 @@ display:
  #     agent_name: "My Agent"               # Banner title and branding
  #     welcome: "Welcome message"           # Shown at CLI startup
  #     response_label: " ⚔ Agent "         # Response box header label
-  #     prompt_symbol: "⚔ ❯ "              # Prompt symbol
+  #     prompt_symbol: "⚔"                  # Prompt symbol (bare token; renderers add trailing space)
  #   tool_prefix: "╎"                       # Tool output line prefix (default: ┊)
  #
  skin: default
@@ -21,6 +21,7 @@ from typing import Optional, Dict, List, Any, Union
 logger = logging.getLogger(__name__)

 from hermes_time import now as _hermes_now
+from utils import atomic_replace

 try:
    from croniter import croniter
@@ -311,8 +312,22 @@ def compute_next_run(schedule: Dict[str, Any], last_run_at: Optional[str] = None

    elif schedule["kind"] == "cron":
        if not HAS_CRONITER:
+            logger.warning(
+                "Cannot compute next run for cron schedule %r: 'croniter' is "
+                "not installed. croniter is a core dependency as of v0.9.x; "
+                "reinstall hermes-agent or run 'pip install croniter' in your "
+                "runtime env.",
+                schedule.get("expr"),
+            )
            return None
-        cron = croniter(schedule["expr"], now)
+        # Use last_run_at as the croniter base when available, consistent
+        # with interval jobs.  This ensures that after a crash/restart,
+        # the next run is anchored to the actual last execution time
+        # rather than to an arbitrary restart time.
+        base_time = now
+        if last_run_at:
+            base_time = _ensure_aware(datetime.fromisoformat(last_run_at))
+        cron = croniter(schedule["expr"], base_time)
        next_run = cron.get_next(datetime)
        return next_run.isoformat()

@@ -361,7 +376,7 @@ def save_jobs(jobs: List[Dict[str, Any]]):
            json.dump({"jobs": jobs, "updated_at": _hermes_now().isoformat()}, f, indent=2)
            f.flush()
            os.fsync(f.fileno())
-        os.replace(tmp_path, JOBS_FILE)
+        atomic_replace(tmp_path, JOBS_FILE)
        _secure_file(JOBS_FILE)
    except BaseException:
        try:
@@ -698,10 +713,32 @@ def mark_job_run(job_id: str, success: bool, error: Optional[str] = None,
                # Compute next run
                job["next_run_at"] = compute_next_run(job["schedule"], now)

-                # If no next run (one-shot completed), disable
+                # If no next run, decide whether this is terminal completion
+                # (one-shot) or a transient failure (recurring schedule couldn't
+                # compute — e.g. 'croniter' missing from the runtime env).
+                # Recurring jobs must NEVER be silently disabled: that turns a
+                # missing runtime dep into "job completed" and the user's
+                # schedule quietly goes off. See issue #16265.
                if job["next_run_at"] is None:
-                    job["enabled"] = False
-                    job["state"] = "completed"
+                    kind = job.get("schedule", {}).get("kind")
+                    if kind in ("cron", "interval"):
+                        job["state"] = "error"
+                        if not job.get("last_error"):
+                            job["last_error"] = (
+                                "Failed to compute next run for recurring "
+                                "schedule (is the 'croniter' package "
+                                "installed in the gateway's Python env?)"
+                            )
+                        logger.error(
+                            "Job '%s' (%s) could not compute next_run_at; "
+                            "leaving enabled and marking state=error so the "
+                            "job is not silently disabled.",
+                            job.get("name", job["id"]),
+                            kind,
+                        )
+                    else:
+                        job["enabled"] = False
+                        job["state"] = "completed"
                elif job.get("state") != "paused":
                    job["state"] = "scheduled"

@@ -835,7 +872,7 @@ def save_job_output(job_id: str, output: str):
            f.write(output)
            f.flush()
            os.fsync(f.fileno())
-        os.replace(tmp_path, output_file)
+        atomic_replace(tmp_path, output_file)
        _secure_file(output_file)
    except BaseException:
        try:
@@ -77,7 +77,7 @@ _KNOWN_DELIVERY_PLATFORMS = frozenset({
    "telegram", "discord", "slack", "whatsapp", "signal",
    "matrix", "mattermost", "homeassistant", "dingtalk", "feishu",
    "wecom", "wecom_callback", "weixin", "sms", "email", "webhook", "bluebubbles",
-    "qqbot",
+    "qqbot", "yuanbao",
 })

 # Platforms that support a configured cron/notification home target, mapped to
@@ -198,7 +198,9 @@ def _resolve_single_delivery_target(job: dict, deliver_value: str) -> Optional[d
            if resolved:
                parsed_chat_id, parsed_thread_id, resolved_is_explicit = _parse_target_ref(platform_key, resolved)
                if resolved_is_explicit:
-                    chat_id, thread_id = parsed_chat_id, parsed_thread_id
+                    chat_id = parsed_chat_id
+                    if parsed_thread_id is not None:
+                        thread_id = parsed_thread_id
                else:
                    chat_id = resolved
        except Exception:
@@ -231,12 +233,32 @@ def _resolve_single_delivery_target(job: dict, deliver_value: str) -> Optional[d
    }


+def _normalize_deliver_value(deliver) -> str:
+    """Normalize a stored/submitted ``deliver`` value to its canonical string form.
+
+    The contract is that ``deliver`` is a string (``"local"``, ``"origin"``,
+    ``"telegram"``, ``"telegram:-1001:17"``, or comma-separated combinations).
+    Historically some callers — MCP clients passing an array, direct edits of
+    ``jobs.json``, or stale code paths — have stored a list/tuple like
+    ``["telegram"]``.  ``str(["telegram"])`` would serialize to the literal
+    string ``"['telegram']"``, which is not a known platform and fails
+    resolution silently.  Flatten lists/tuples into a comma-separated string
+    so both forms work.  Returns ``"local"`` for anything falsy.
+    """
+    if deliver is None or deliver == "":
+        return "local"
+    if isinstance(deliver, (list, tuple)):
+        parts = [str(p).strip() for p in deliver if str(p).strip()]
+        return ",".join(parts) if parts else "local"
+    return str(deliver)
+
+
 def _resolve_delivery_targets(job: dict) -> List[dict]:
    """Resolve all concrete auto-delivery targets for a cron job (supports comma-separated deliver)."""
-    deliver = job.get("deliver", "local")
+    deliver = _normalize_deliver_value(job.get("deliver", "local"))
    if deliver == "local":
        return []
-    parts = [p.strip() for p in str(deliver).split(",") if p.strip()]
+    parts = [p.strip() for p in deliver.split(",") if p.strip()]
    seen = set()
    targets = []
    for part in parts:
@@ -337,6 +359,7 @@ def _deliver_result(job: dict, content: str, adapters=None, loop=None) -> Option
        "sms": Platform.SMS,
        "bluebubbles": Platform.BLUEBUBBLES,
        "qqbot": Platform.QQBOT,
+        "yuanbao": Platform.YUANBAO,
    }

    # Optionally wrap the content with a header/footer so the user knows this
@@ -715,7 +738,7 @@ def _build_job_prompt(job: dict, prerun_script: Optional[tuple] = None) -> str:
    # Always prepend cron execution guidance so the agent knows how
    # delivery works and can suppress delivery when appropriate.
    cron_hint = (
-        "[SYSTEM: You are running as a scheduled cron job. "
+        "[IMPORTANT: You are running as a scheduled cron job. "
        "DELIVERY: Your final response will be automatically delivered "
        "to the user — do NOT use send_message or try to deliver "
        "the output yourself. Just produce your report/output as your "
@@ -751,7 +774,7 @@ def _build_job_prompt(job: dict, prerun_script: Optional[tuple] = None) -> str:
            parts.append("")
        parts.extend(
            [
-                f'[SYSTEM: The user has invoked the "{skill_name}" skill, indicating they want you to follow its instructions. The full skill content is loaded below.]',
+                f'[IMPORTANT: The user has invoked the "{skill_name}" skill, indicating they want you to follow its instructions. The full skill content is loaded below.]',
                "",
                content,
            ]
@@ -759,7 +782,7 @@ def _build_job_prompt(job: dict, prerun_script: Optional[tuple] = None) -> str:

    if skipped:
        notice = (
-            f"[SYSTEM: The following skill(s) were listed for this job but could not be found "
+            f"[IMPORTANT: The following skill(s) were listed for this job but could not be found "
            f"and were skipped: {', '.join(skipped)}. "
            f"Start your response with a brief notice so the user is aware, e.g.: "
            f"'⚠️ Skill(s) not found and skipped: {', '.join(skipped)}']"
@@ -821,6 +844,8 @@ def run_job(job: dict) -> tuple[bool, str, str, Optional[str]]:
    logger.info("Running job '%s' (ID: %s)", job_name, job_id)
    logger.info("Prompt: %s", prompt[:100])

+    agent = None
+
    # Mark this as a cron session so the approval system can apply cron_mode.
    # This env var is process-wide and persists for the lifetime of the
    # scheduler process — every job this process runs is a cron job.
@@ -1008,10 +1033,12 @@ def run_job(job: dict) -> tuple[bool, str, str, Optional[str]]:
            enabled_toolsets=_resolve_cron_enabled_toolsets(job, _cfg),
            disabled_toolsets=["cronjob", "messaging", "clarify"],
            quiet_mode=True,
-            # When a workdir is configured, inject AGENTS.md / CLAUDE.md /
-            # .cursorrules from that directory; otherwise preserve the old
-            # behaviour (don't inject SOUL.md/AGENTS.md from the scheduler cwd).
+            # Cron jobs should always inherit the user's SOUL.md identity from
+            # HERMES_HOME. When a workdir is configured, also inject project
+            # context files (AGENTS.md / CLAUDE.md / .cursorrules) from there.
+            # Without a workdir, keep cwd context discovery disabled.
            skip_context_files=not bool(_job_workdir),
+            load_soul_identity=True,
            skip_memory=True,  # Cron system prompts would corrupt user representations
            platform="cron",
            session_id=_cron_session_id,
@@ -1026,7 +1053,18 @@ def run_job(job: dict) -> tuple[bool, str, str, Optional[str]]:
        #
        # Uses the agent's built-in activity tracker (updated by
        # _touch_activity() on every tool call, API call, and stream delta).
-        _cron_timeout = float(os.getenv("HERMES_CRON_TIMEOUT", 600))
+        _raw_cron_timeout = os.getenv("HERMES_CRON_TIMEOUT", "").strip()
+        if _raw_cron_timeout:
+            try:
+                _cron_timeout = float(_raw_cron_timeout)
+            except (ValueError, TypeError):
+                logger.warning(
+                    "Invalid HERMES_CRON_TIMEOUT=%r; using default 600s",
+                    _raw_cron_timeout,
+                )
+                _cron_timeout = 600.0
+        else:
+            _cron_timeout = 600.0
        _cron_inactivity_limit = _cron_timeout if _cron_timeout > 0 else None
        _POLL_INTERVAL = 5.0
        _cron_pool = concurrent.futures.ThreadPoolExecutor(max_workers=1)
@@ -1169,6 +1207,24 @@ def run_job(job: dict) -> tuple[bool, str, str, Optional[str]]:
                _session_db.close()
            except (Exception, KeyboardInterrupt) as e:
                logger.debug("Job '%s': failed to close SQLite session store: %s", job_id, e)
+        # Release subprocesses, terminal sandboxes, browser daemons, and the
+        # main OpenAI/httpx client held by this ephemeral cron agent. Without
+        # this, a gateway that ticks cron every N minutes leaks fds per job
+        # until it hits EMFILE (#10200 / "too many open files").
+        try:
+            if agent is not None:
+                agent.close()
+        except (Exception, KeyboardInterrupt) as e:
+            logger.debug("Job '%s': failed to close agent resources: %s", job_id, e)
+        # Each cron run spins up a short-lived worker thread whose event loop
+        # dies as soon as the ``ThreadPoolExecutor`` shuts down. Any async
+        # httpx clients cached under that loop are now unusable — reap them
+        # so their transports don't accumulate in the process-global cache.
+        try:
+            from agent.auxiliary_client import cleanup_stale_async_clients
+            cleanup_stale_async_clients()
+        except Exception as e:
+            logger.debug("Job '%s': failed to reap stale auxiliary clients: %s", job_id, e)


 def tick(verbose: bool = True, adapters=None, loop=None) -> int:
@@ -1308,6 +1364,17 @@ def tick(verbose: bool = True, adapters=None, loop=None) -> int:
                    _futures.append(_tick_pool.submit(_ctx.run, _process_job, job))
                _results.extend(f.result() for f in _futures)

+        # Best-effort sweep of MCP stdio subprocesses that survived their
+        # session teardown during this tick.  Runs AFTER every job has
+        # finished so active sessions (including live user chats) are
+        # never touched — only PIDs explicitly detected as orphans in
+        # tools.mcp_tool._run_stdio's finally block are reaped.
+        try:
+            from tools.mcp_tool import _kill_orphaned_mcp_children
+            _kill_orphaned_mcp_children()
+        except Exception as _e:
+            logger.debug("Post-tick MCP orphan cleanup failed: %s", _e)
+
        return sum(_results)
    finally:
        if fcntl:
@@ -41,6 +41,15 @@ if [ "$(id -u)" = "0" ]; then
            echo "Warning: chown failed (rootless container?) — continuing anyway"
    fi

+    # Ensure config.yaml is readable by the hermes runtime user even if it was
+    # edited on the host after initial ownership setup. Must run here (as root)
+    # rather than after the gosu drop, otherwise a non-root caller like
+    # `docker run -u $(id -u):$(id -g)` hits "Operation not permitted" (#15865).
+    if [ -f "$HERMES_HOME/config.yaml" ]; then
+        chown hermes:hermes "$HERMES_HOME/config.yaml" 2>/dev/null || true
+        chmod 640 "$HERMES_HOME/config.yaml" 2>/dev/null || true
+    fi
+
    echo "Dropping root privileges"
    exec gosu hermes "$0" "$@"
 fi
@@ -67,13 +76,6 @@ if [ ! -f "$HERMES_HOME/config.yaml" ]; then
    cp "$INSTALL_DIR/cli-config.yaml.example" "$HERMES_HOME/config.yaml"
 fi

-# Ensure the main config file remains accessible to the hermes runtime user
-# even if it was edited on the host after initial ownership setup.
-if [ -f "$HERMES_HOME/config.yaml" ]; then
-    chown hermes:hermes "$HERMES_HOME/config.yaml"
-    chmod 640 "$HERMES_HOME/config.yaml"
-fi
-
 # SOUL.md
 if [ ! -f "$HERMES_HOME/SOUL.md" ]; then
    cp "$INSTALL_DIR/docker/SOUL.md" "$HERMES_HOME/SOUL.md"
@@ -36,6 +36,7 @@

      imports = [
        ./nix/packages.nix
+        ./nix/overlays.nix
        ./nix/nixosModules.nix
        ./nix/checks.nix
        ./nix/devShell.nix
@@ -1,85 +0,0 @@
-"""Built-in boot-md hook — run ~/.hermes/BOOT.md on gateway startup.
-
-This hook is always registered. It silently skips if no BOOT.md exists.
-To activate, create ``~/.hermes/BOOT.md`` with instructions for the
-agent to execute on every gateway restart.
-
-Example BOOT.md::
-
-    # Startup Checklist
-
-    1. Check if any cron jobs failed overnight
-    2. Send a status update to Discord #general
-    3. If there are errors in /opt/app/deploy.log, summarize them
-
-The agent runs in a background thread so it doesn't block gateway
-startup. If nothing needs attention, it replies with [SILENT] to
-suppress delivery.
-"""
-
-import logging
-import threading
-
-logger = logging.getLogger("hooks.boot-md")
-
-from hermes_constants import get_hermes_home
-HERMES_HOME = get_hermes_home()
-BOOT_FILE = HERMES_HOME / "BOOT.md"
-
-
-def _build_boot_prompt(content: str) -> str:
-    """Wrap BOOT.md content in a system-level instruction."""
-    return (
-        "You are running a startup boot checklist. Follow the BOOT.md "
-        "instructions below exactly.\n\n"
-        "---\n"
-        f"{content}\n"
-        "---\n\n"
-        "Execute each instruction. If you need to send a message to a "
-        "platform, use the send_message tool.\n"
-        "If nothing needs attention and there is nothing to report, "
-        "reply with ONLY: [SILENT]"
-    )
-
-
-def _run_boot_agent(content: str) -> None:
-    """Spawn a one-shot agent session to execute the boot instructions."""
-    try:
-        from run_agent import AIAgent
-
-        prompt = _build_boot_prompt(content)
-        agent = AIAgent(
-            quiet_mode=True,
-            skip_context_files=True,
-            skip_memory=True,
-            max_iterations=20,
-        )
-        result = agent.run_conversation(prompt)
-        response = result.get("final_response", "")
-        if response and "[SILENT]" not in response:
-            logger.info("boot-md completed: %s", response[:200])
-        else:
-            logger.info("boot-md completed (nothing to report)")
-    except Exception as e:
-        logger.error("boot-md agent failed: %s", e)
-
-
-async def handle(event_type: str, context: dict) -> None:
-    """Gateway startup handler — run BOOT.md if it exists."""
-    if not BOOT_FILE.exists():
-        return
-
-    content = BOOT_FILE.read_text(encoding="utf-8").strip()
-    if not content:
-        return
-
-    logger.info("Running BOOT.md (%d chars)", len(content))
-
-    # Run in a background thread so we don't block gateway startup.
-    thread = threading.Thread(
-        target=_run_boot_agent,
-        args=(content,),
-        name="boot-md",
-        daemon=True,
-    )
-    thread.start()
@@ -57,7 +57,7 @@ def _session_entry_name(origin: Dict[str, Any]) -> str:
 # Build / refresh
 # ---------------------------------------------------------------------------

-def build_channel_directory(adapters: Dict[Any, Any]) -> Dict[str, Any]:
+async def build_channel_directory(adapters: Dict[Any, Any]) -> Dict[str, Any]:
    """
    Build a channel directory from connected platform adapters and session data.

@@ -72,7 +72,7 @@ def build_channel_directory(adapters: Dict[Any, Any]) -> Dict[str, Any]:
            if platform == Platform.DISCORD:
                platforms["discord"] = _build_discord(adapter)
            elif platform == Platform.SLACK:
-                platforms["slack"] = _build_slack(adapter)
+                platforms["slack"] = await _build_slack(adapter)
        except Exception as e:
            logger.warning("Channel directory: failed to build %s: %s", platform.value, e)

@@ -136,21 +136,66 @@ def _build_discord(adapter) -> List[Dict[str, str]]:
    return channels


-def _build_slack(adapter) -> List[Dict[str, str]]:
-    """List Slack channels the bot has joined."""
-    # Slack adapter may expose a web client
-    client = getattr(adapter, "_app", None) or getattr(adapter, "_client", None)
-    if not client:
+async def _build_slack(adapter) -> List[Dict[str, Any]]:
+    """List Slack channels the bot has joined across all workspaces.
+
+    Uses ``users.conversations`` against each workspace's web client. Pulls
+    public + private channels the bot is a member of, then merges in DMs
+    discovered from session history (IMs aren't useful to enumerate
+    proactively).
+    """
+    team_clients = getattr(adapter, "_team_clients", None) or {}
+    if not team_clients:
        return _build_from_sessions("slack")

-    try:
-        from tools.send_message_tool import _send_slack  # noqa: F401
-        # Use the Slack Web API directly if available
-    except Exception:
-        pass
+    channels: List[Dict[str, Any]] = []
+    seen_ids: set = set()

-    # Fallback to session data
-    return _build_from_sessions("slack")
+    for team_id, client in team_clients.items():
+        try:
+            cursor: Optional[str] = None
+            for _page in range(20):  # safety cap on pagination
+                response = await client.users_conversations(
+                    types="public_channel,private_channel",
+                    exclude_archived=True,
+                    limit=200,
+                    cursor=cursor,
+                )
+                if not response.get("ok"):
+                    logger.warning(
+                        "Channel directory: users.conversations not ok for team %s: %s",
+                        team_id,
+                        response.get("error", "unknown"),
+                    )
+                    break
+                for ch in response.get("channels", []):
+                    cid = ch.get("id")
+                    name = ch.get("name")
+                    if not cid or not name or cid in seen_ids:
+                        continue
+                    seen_ids.add(cid)
+                    channels.append({
+                        "id": cid,
+                        "name": name,
+                        "type": "private" if ch.get("is_private") else "channel",
+                    })
+                cursor = (response.get("response_metadata") or {}).get("next_cursor")
+                if not cursor:
+                    break
+        except Exception as e:
+            logger.warning(
+                "Channel directory: failed to list Slack channels for team %s: %s",
+                team_id, e,
+            )
+            continue
+
+    # Merge in DM/group entries discovered from session history.
+    for entry in _build_from_sessions("slack"):
+        if entry.get("id") not in seen_ids:
+            channels.append(entry)
+            seen_ids.add(entry.get("id"))
+
+    return channels


 def _build_from_sessions(platform_name: str) -> List[Dict[str, str]]:
@@ -223,6 +268,14 @@ def resolve_channel_name(platform_name: str, name: str) -> Optional[str]:
    if not channels:
        return None

+    # 0. Exact ID match — case-sensitive, no normalization. Lets callers pass
+    # raw platform IDs (e.g. Slack "C0B0QV5434G") even when the format guard
+    # in _parse_target_ref hasn't recognized them as explicit.
+    raw = name.strip()
+    for ch in channels:
+        if ch.get("id") == raw:
+            return ch["id"]
+
    query = _normalize_channel_query(name)

    # 1. Exact name match, including the display labels shown by send_message(action="list")
@@ -67,6 +67,7 @@ class Platform(Enum):
    WEIXIN = "weixin"
    BLUEBUBBLES = "bluebubbles"
    QQBOT = "qqbot"
+    YUANBAO = "yuanbao"


@dataclass
@@ -195,6 +196,14 @@ class StreamingConfig:
    edit_interval: float = 1.0    # Seconds between message edits (Telegram rate-limits at ~1/s)
    buffer_threshold: int = 40    # Chars before forcing an edit
    cursor: str = " ▉"           # Cursor shown during streaming
+    # Ported from openclaw/openclaw#72038.  When >0, the final edit for
+    # a long-running streamed response is delivered as a fresh message
+    # if the original preview has been visible for at least this many
+    # seconds, so the platform's visible timestamp reflects completion
+    # time instead of the preview creation time.  Currently applied to
+    # Telegram only (other platforms ignore the setting).  Default 60s
+    # matches the OpenClaw rollout.  Set to 0 to disable.
+    fresh_final_after_seconds: float = 60.0

    def to_dict(self) -> Dict[str, Any]:
        return {
@@ -203,6 +212,7 @@ class StreamingConfig:
            "edit_interval": self.edit_interval,
            "buffer_threshold": self.buffer_threshold,
            "cursor": self.cursor,
+            "fresh_final_after_seconds": self.fresh_final_after_seconds,
        }

    @classmethod
@@ -215,6 +225,9 @@ class StreamingConfig:
            edit_interval=float(data.get("edit_interval", 1.0)),
            buffer_threshold=int(data.get("buffer_threshold", 40)),
            cursor=data.get("cursor", " ▉"),
+            fresh_final_after_seconds=float(
+                data.get("fresh_final_after_seconds", 60.0)
+            ),
        )


@@ -314,6 +327,9 @@ class GatewayConfig:
            # QQBot uses extra dict for app credentials
            elif platform == Platform.QQBOT and config.extra.get("app_id") and config.extra.get("client_secret"):
                connected.append(platform)
+            # Yuanbao uses extra dict for app credentials
+            elif platform == Platform.YUANBAO and config.extra.get("app_id") and config.extra.get("app_secret"):
+                connected.append(platform)
            # DingTalk uses client_id/client_secret from config.extra or env vars
            elif platform == Platform.DINGTALK and (
                config.extra.get("client_id") or os.getenv("DINGTALK_CLIENT_ID")
@@ -550,6 +566,8 @@ def load_gateway_config() -> GatewayConfig:
                        existing = {}
                    # Deep-merge extra dicts so gateway.json defaults survive
                    merged_extra = {**existing.get("extra", {}), **plat_block.get("extra", {})}
+                    if plat_name == Platform.SLACK.value and "enabled" in plat_block:
+                        merged_extra["_enabled_explicit"] = True
                    merged = {**existing, **plat_block}
                    if merged_extra:
                        merged["extra"] = merged_extra
@@ -570,6 +588,8 @@ def load_gateway_config() -> GatewayConfig:
                    )
                if "reply_prefix" in platform_cfg:
                    bridged["reply_prefix"] = platform_cfg["reply_prefix"]
+                if "reply_in_thread" in platform_cfg:
+                    bridged["reply_in_thread"] = platform_cfg["reply_in_thread"]
                if "require_mention" in platform_cfg:
                    bridged["require_mention"] = platform_cfg["require_mention"]
                if "free_response_channels" in platform_cfg:
@@ -584,7 +604,7 @@ def load_gateway_config() -> GatewayConfig:
                    bridged["group_policy"] = platform_cfg["group_policy"]
                if "group_allow_from" in platform_cfg:
                    bridged["group_allow_from"] = platform_cfg["group_allow_from"]
-                if plat == Platform.DISCORD and "channel_skill_bindings" in platform_cfg:
+                if plat in (Platform.DISCORD, Platform.SLACK) and "channel_skill_bindings" in platform_cfg:
                    bridged["channel_skill_bindings"] = platform_cfg["channel_skill_bindings"]
                if "channel_prompts" in platform_cfg:
                    channel_prompts = platform_cfg["channel_prompts"]
@@ -592,16 +612,21 @@ def load_gateway_config() -> GatewayConfig:
                        bridged["channel_prompts"] = {str(k): v for k, v in channel_prompts.items()}
                    else:
                        bridged["channel_prompts"] = channel_prompts
-                if not bridged:
+                enabled_was_explicit = "enabled" in platform_cfg
+                if not bridged and not enabled_was_explicit:
                    continue
                plat_data = platforms_data.setdefault(plat.value, {})
                if not isinstance(plat_data, dict):
                    plat_data = {}
                    platforms_data[plat.value] = plat_data
+                if enabled_was_explicit:
+                    plat_data["enabled"] = platform_cfg["enabled"]
                extra = plat_data.setdefault("extra", {})
                if not isinstance(extra, dict):
                    extra = {}
                    plat_data["extra"] = extra
+                if plat == Platform.SLACK and enabled_was_explicit:
+                    extra["_enabled_explicit"] = True
                extra.update(bridged)

            # Slack settings → env vars (env vars take precedence)
@@ -609,6 +634,8 @@ def load_gateway_config() -> GatewayConfig:
            if isinstance(slack_cfg, dict):
                if "require_mention" in slack_cfg and not os.getenv("SLACK_REQUIRE_MENTION"):
                    os.environ["SLACK_REQUIRE_MENTION"] = str(slack_cfg["require_mention"]).lower()
+                if "strict_mention" in slack_cfg and not os.getenv("SLACK_STRICT_MENTION"):
+                    os.environ["SLACK_STRICT_MENTION"] = str(slack_cfg["strict_mention"]).lower()
                if "allow_bots" in slack_cfg and not os.getenv("SLACK_ALLOW_BOTS"):
                    os.environ["SLACK_ALLOW_BOTS"] = str(slack_cfg["allow_bots"]).lower()
                frc = slack_cfg.get("free_response_channels")
@@ -918,8 +945,20 @@ def _apply_env_overrides(config: GatewayConfig) -> None:
    slack_token = os.getenv("SLACK_BOT_TOKEN")
    if slack_token:
        if Platform.SLACK not in config.platforms:
+            # No yaml config for Slack — env-only setup, enable it
            config.platforms[Platform.SLACK] = PlatformConfig()
-        config.platforms[Platform.SLACK].enabled = True
+            config.platforms[Platform.SLACK].enabled = True
+        else:
+            slack_config = config.platforms[Platform.SLACK]
+            enabled_was_explicit = bool(slack_config.extra.pop("_enabled_explicit", False))
+            if not slack_config.enabled and not enabled_was_explicit:
+                # Top-level Slack settings such as channel prompts should not
+                # turn an env-token setup into a disabled platform. Only an
+                # explicit slack.enabled/platforms.slack.enabled false should.
+                slack_config.enabled = True
+        # If yaml config exists, respect its enabled flag (don't override
+        # explicit enabled: false). Token is still stored so skills that
+        # send Slack messages can use it without activating the gateway adapter.
        config.platforms[Platform.SLACK].token = slack_token
    slack_home = os.getenv("SLACK_HOME_CHANNEL")
    if slack_home and Platform.SLACK in config.platforms:
@@ -1276,6 +1315,48 @@ def _apply_env_overrides(config: GatewayConfig) -> None:
                name=os.getenv("QQBOT_HOME_CHANNEL_NAME") or os.getenv(qq_home_name_env, "Home"),
            )

+    # Yuanbao — YUANBAO_APP_ID preferred
+    yuanbao_app_id = os.getenv("YUANBAO_APP_ID") or os.getenv("YUANBAO_APP_KEY")
+    yuanbao_app_secret = os.getenv("YUANBAO_APP_SECRET")
+    if yuanbao_app_id and yuanbao_app_secret:
+        if Platform.YUANBAO not in config.platforms:
+            config.platforms[Platform.YUANBAO] = PlatformConfig()
+        config.platforms[Platform.YUANBAO].enabled = True
+        extra = config.platforms[Platform.YUANBAO].extra
+        extra["app_id"] = yuanbao_app_id
+        extra["app_secret"] = yuanbao_app_secret
+        yuanbao_bot_id = os.getenv("YUANBAO_BOT_ID")
+        if yuanbao_bot_id:
+            extra["bot_id"] = yuanbao_bot_id
+        yuanbao_ws_url = os.getenv("YUANBAO_WS_URL")
+        if yuanbao_ws_url:
+            extra["ws_url"] = yuanbao_ws_url
+        yuanbao_api_domain = os.getenv("YUANBAO_API_DOMAIN")
+        if yuanbao_api_domain:
+            extra["api_domain"] = yuanbao_api_domain
+        yuanbao_route_env = os.getenv("YUANBAO_ROUTE_ENV")
+        if yuanbao_route_env:
+            extra["route_env"] = yuanbao_route_env
+        yuanbao_home = os.getenv("YUANBAO_HOME_CHANNEL")
+        if yuanbao_home:
+            config.platforms[Platform.YUANBAO].home_channel = HomeChannel(
+                platform=Platform.YUANBAO,
+                chat_id=yuanbao_home,
+                name=os.getenv("YUANBAO_HOME_CHANNEL_NAME", "Home"),
+            )
+        yuanbao_dm_policy = os.getenv("YUANBAO_DM_POLICY")
+        if yuanbao_dm_policy:
+            extra["dm_policy"] = yuanbao_dm_policy.strip().lower()
+        yuanbao_dm_allow_from = os.getenv("YUANBAO_DM_ALLOW_FROM")
+        if yuanbao_dm_allow_from:
+            extra["dm_allow_from"] = yuanbao_dm_allow_from
+        yuanbao_group_policy = os.getenv("YUANBAO_GROUP_POLICY")
+        if yuanbao_group_policy:
+            extra["group_policy"] = yuanbao_group_policy.strip().lower()
+        yuanbao_group_allow_from = os.getenv("YUANBAO_GROUP_ALLOW_FROM")
+        if yuanbao_group_allow_from:
+            extra["group_allow_from"] = yuanbao_group_allow_from
+
    # Session settings
    idle_minutes = os.getenv("SESSION_IDLE_MINUTES")
    if idle_minutes:
@@ -79,7 +79,9 @@ _PLATFORM_DEFAULTS: dict[str, dict[str, Any]] = {
    "discord":     _TIER_HIGH,

    # Tier 2 — edit support, often customer/workspace channels
-    "slack":           _TIER_MEDIUM,
+    # Slack: tool_progress off by default — Bolt posts cannot be edited like CLI;
+    # "new"/"all" spam permanent lines in channels (hermes-agent#14663).
+    "slack":           {**_TIER_MEDIUM, "tool_progress": "off"},
    "mattermost":      _TIER_MEDIUM,
    "matrix":          _TIER_MEDIUM,
    "feishu":          _TIER_MEDIUM,
@@ -52,19 +52,13 @@ class HookRegistry:
        return list(self._loaded_hooks)

    def _register_builtin_hooks(self) -> None:
-        """Register built-in hooks that are always active."""
-        try:
-            from gateway.builtin_hooks.boot_md import handle as boot_md_handle
+        """Register built-in hooks that are always active.

-            self._handlers.setdefault("gateway:startup", []).append(boot_md_handle)
-            self._loaded_hooks.append({
-                "name": "boot-md",
-                "description": "Run ~/.hermes/BOOT.md on gateway startup",
-                "events": ["gateway:startup"],
-                "path": "(builtin)",
-            })
-        except Exception as e:
-            print(f"[hooks] Could not load built-in boot-md hook: {e}", flush=True)
+        Currently empty — no shipped built-in hooks. Kept as the extension
+        point for future always-on gateway hooks so they drop in without
+        re-plumbing discover_and_load().
+        """
+        return

    def discover_and_load(self) -> None:
        """
@@ -28,6 +28,7 @@ def mirror_to_session(
    message_text: str,
    source_label: str = "cli",
    thread_id: Optional[str] = None,
+    user_id: Optional[str] = None,
 ) -> bool:
    """
    Append a delivery-mirror message to the target session's transcript.
@@ -39,9 +40,20 @@ def mirror_to_session(
    All errors are caught -- this is never fatal.
    """
    try:
-        session_id = _find_session_id(platform, str(chat_id), thread_id=thread_id)
+        session_id = _find_session_id(
+            platform,
+            str(chat_id),
+            thread_id=thread_id,
+            user_id=user_id,
+        )
        if not session_id:
-            logger.debug("Mirror: no session found for %s:%s:%s", platform, chat_id, thread_id)
+            logger.debug(
+                "Mirror: no session found for %s:%s:%s:%s",
+                platform,
+                chat_id,
+                thread_id,
+                user_id,
+            )
            return False

        mirror_msg = {
@@ -59,17 +71,33 @@ def mirror_to_session(
        return True

    except Exception as e:
-        logger.debug("Mirror failed for %s:%s:%s: %s", platform, chat_id, thread_id, e)
+        logger.debug(
+            "Mirror failed for %s:%s:%s:%s: %s",
+            platform,
+            chat_id,
+            thread_id,
+            user_id,
+            e,
+        )
        return False


-def _find_session_id(platform: str, chat_id: str, thread_id: Optional[str] = None) -> Optional[str]:
+def _find_session_id(
+    platform: str,
+    chat_id: str,
+    thread_id: Optional[str] = None,
+    user_id: Optional[str] = None,
+) -> Optional[str]:
    """
    Find the active session_id for a platform + chat_id pair.

    Scans sessions.json entries and matches where origin.chat_id == chat_id
    on the right platform.  DM session keys don't embed the chat_id
    (e.g. "agent:main:telegram:dm"), so we check the origin dict.
+
+    When *user_id* is provided, prefer exact sender matches. If multiple
+    same-chat candidates exist and none matches the user, return None instead
+    of guessing and contaminating another participant's session.
    """
    if not _SESSIONS_INDEX.exists():
        return None
@@ -81,8 +109,7 @@ def _find_session_id(platform: str, chat_id: str, thread_id: Optional[str] = Non
        return None

    platform_lower = platform.lower()
-    best_match = None
-    best_updated = ""
+    candidates = []

    for _key, entry in data.items():
        origin = entry.get("origin") or {}
@@ -96,12 +123,31 @@ def _find_session_id(platform: str, chat_id: str, thread_id: Optional[str] = Non
            origin_thread_id = origin.get("thread_id")
            if thread_id is not None and str(origin_thread_id or "") != str(thread_id):
                continue
-            updated = entry.get("updated_at", "")
-            if updated > best_updated:
-                best_updated = updated
-                best_match = entry.get("session_id")
+            candidates.append(entry)

-    return best_match
+    if not candidates:
+        return None
+
+    if user_id:
+        exact_user_matches = [
+            entry for entry in candidates
+            if str((entry.get("origin") or {}).get("user_id") or "") == str(user_id)
+        ]
+        if exact_user_matches:
+            candidates = exact_user_matches
+        elif len(candidates) > 1:
+            return None
+    elif len(candidates) > 1:
+        distinct_user_ids = {
+            str((entry.get("origin") or {}).get("user_id") or "").strip()
+            for entry in candidates
+            if str((entry.get("origin") or {}).get("user_id") or "").strip()
+        }
+        if len(distinct_user_ids) > 1:
+            return None
+
+    best_entry = max(candidates, key=lambda entry: entry.get("updated_at", ""))
+    return best_entry.get("session_id")


 def _append_to_jsonl(session_id: str, message: dict) -> None:
@@ -28,6 +28,7 @@ from pathlib import Path
 from typing import Optional

 from hermes_constants import get_hermes_dir
+from utils import atomic_replace


 # Unambiguous alphabet -- excludes 0/O, 1/I to prevent confusion
@@ -59,7 +60,7 @@ def _secure_write(path: Path, data: str) -> None:
            f.write(data)
            f.flush()
            os.fsync(f.fileno())
-        os.replace(tmp_path, str(path))
+        atomic_replace(tmp_path, path)
        try:
            os.chmod(path, 0o600)
        except OSError:
@@ -10,10 +10,12 @@ Each adapter handles:

 from .base import BasePlatformAdapter, MessageEvent, SendResult
 from .qqbot import QQAdapter
+from .yuanbao import YuanbaoAdapter

 __all__ = [
    "BasePlatformAdapter",
    "MessageEvent",
    "SendResult",
    "QQAdapter",
+    "YuanbaoAdapter",
 ]
@@ -7,8 +7,11 @@ Exposes an HTTP server with endpoints:
 - GET  /v1/responses/{response_id} — Retrieve a stored response
 - DELETE /v1/responses/{response_id} — Delete a stored response
 - GET  /v1/models                  — lists hermes-agent as an available model
+- GET  /v1/capabilities            — machine-readable API capabilities for external UIs
 - POST /v1/runs                    — start a run, returns run_id immediately (202)
+- GET  /v1/runs/{run_id}           — retrieve current run status
 - GET  /v1/runs/{run_id}/events    — SSE stream of structured lifecycle events
+- POST /v1/runs/{run_id}/stop    — interrupt a running agent
 - GET  /health                     — health check
 - GET  /health/detailed            — rich status for cross-container dashboard probing

@@ -586,6 +589,11 @@ class APIServerAdapter(BasePlatformAdapter):
        self._run_streams: Dict[str, "asyncio.Queue[Optional[Dict]]"] = {}
        # Creation timestamps for orphaned-run TTL sweep
        self._run_streams_created: Dict[str, float] = {}
+        # Active run agent/task references for stop support
+        self._active_run_agents: Dict[str, Any] = {}
+        self._active_run_tasks: Dict[str, "asyncio.Task"] = {}
+        # Pollable run status for dashboards and external control-plane UIs.
+        self._run_statuses: Dict[str, Dict[str, Any]] = {}
        self._session_db: Optional[Any] = None  # Lazy-init SessionDB for session continuity

    @staticmethod
@@ -804,6 +812,51 @@ class APIServerAdapter(BasePlatformAdapter):
            ],
        })

+    async def _handle_capabilities(self, request: "web.Request") -> "web.Response":
+        """GET /v1/capabilities — advertise the stable API surface.
+
+        External UIs and orchestrators use this endpoint to discover the API
+        server's plugin-safe contract without scraping docs or assuming that
+        every Hermes version exposes the same endpoints.
+        """
+        auth_err = self._check_auth(request)
+        if auth_err:
+            return auth_err
+
+        return web.json_response({
+            "object": "hermes.api_server.capabilities",
+            "platform": "hermes-agent",
+            "model": self._model_name,
+            "auth": {
+                "type": "bearer",
+                "required": bool(self._api_key),
+            },
+            "features": {
+                "chat_completions": True,
+                "chat_completions_streaming": True,
+                "responses_api": True,
+                "responses_streaming": True,
+                "run_submission": True,
+                "run_status": True,
+                "run_events_sse": True,
+                "run_stop": True,
+                "tool_progress_events": True,
+                "session_continuity_header": "X-Hermes-Session-Id",
+                "cors": bool(self._cors_origins),
+            },
+            "endpoints": {
+                "health": {"method": "GET", "path": "/health"},
+                "health_detailed": {"method": "GET", "path": "/health/detailed"},
+                "models": {"method": "GET", "path": "/v1/models"},
+                "chat_completions": {"method": "POST", "path": "/v1/chat/completions"},
+                "responses": {"method": "POST", "path": "/v1/responses"},
+                "runs": {"method": "POST", "path": "/v1/runs"},
+                "run_status": {"method": "GET", "path": "/v1/runs/{run_id}"},
+                "run_events": {"method": "GET", "path": "/v1/runs/{run_id}/events"},
+                "run_stop": {"method": "POST", "path": "/v1/runs/{run_id}/stop"},
+            },
+        })
+
    async def _handle_chat_completions(self, request: "web.Request") -> "web.Response":
        """POST /v1/chat/completions — OpenAI Chat Completions format."""
        auth_err = self._check_auth(request)
@@ -928,39 +981,62 @@ class APIServerAdapter(BasePlatformAdapter):
                if delta is not None:
                    _stream_q.put(delta)

-            def _on_tool_progress(event_type, name, preview, args, **kwargs):
-                """Send tool progress as a separate SSE event.
+            # Track which tool_call_ids we've emitted a "running" lifecycle
+            # event for, so a "completed" event without a matching "running"
+            # (e.g. internal/filtered tools) is silently dropped instead of
+            # producing an orphaned event clients can't correlate.
+            _started_tool_call_ids: set[str] = set()

-                Previously, progress markers like ``⏰ list`` were injected
-                directly into ``delta.content``.  OpenAI-compatible frontends
-                (Open WebUI, LobeChat, …) store ``delta.content`` verbatim as
-                the assistant message and send it back on subsequent requests.
-                After enough turns the model learns to *emit* the markers as
-                plain text instead of issuing real tool calls — silently
-                hallucinating tool results.  See #6972.
+            def _on_tool_start(tool_call_id, function_name, function_args):
+                """Emit ``hermes.tool.progress`` with ``status: running``.

-                The fix: push a tagged tuple ``("__tool_progress__", payload)``
-                onto the stream queue.  The SSE writer emits it as a custom
-                ``event: hermes.tool.progress`` line that compliant frontends
-                can render for UX but will *not* persist into conversation
-                history.  Clients that don't understand the custom event type
-                silently ignore it per the SSE specification.
+                Replaces the old ``tool_progress_callback("tool.started",
+                ...)`` emit so SSE consumers receive a single event per
+                tool start, carrying both the legacy ``tool``/``emoji``/
+                ``label`` payload (for #6972 frontends) and the new
+                ``toolCallId``/``status`` correlation fields (#16588).
+
+                Skips tools whose names start with ``_`` so internal
+                events (``_thinking``, …) stay off the wire — matching
+                the prior ``_on_tool_progress`` filter exactly.
                """
-                if event_type != "tool.started":
+                if not tool_call_id or function_name.startswith("_"):
                    return
-                if name.startswith("_"):
-                    return
-                from agent.display import get_tool_emoji
-                emoji = get_tool_emoji(name)
-                label = preview or name
+                _started_tool_call_ids.add(tool_call_id)
+                from agent.display import build_tool_preview, get_tool_emoji
+                label = build_tool_preview(function_name, function_args) or function_name
                _stream_q.put(("__tool_progress__", {
-                    "tool": name,
-                    "emoji": emoji,
+                    "tool": function_name,
+                    "emoji": get_tool_emoji(function_name),
                    "label": label,
+                    "toolCallId": tool_call_id,
+                    "status": "running",
+                }))
+
+            def _on_tool_complete(tool_call_id, function_name, function_args, function_result):
+                """Emit the matching ``status: completed`` event.
+
+                Dropped if the start was filtered (internal tool, missing
+                id, or never seen) so clients never get an orphaned
+                ``completed`` they can't correlate to a prior ``running``.
+                """
+                if not tool_call_id or tool_call_id not in _started_tool_call_ids:
+                    return
+                _started_tool_call_ids.discard(tool_call_id)
+                _stream_q.put(("__tool_progress__", {
+                    "tool": function_name,
+                    "toolCallId": tool_call_id,
+                    "status": "completed",
                }))

            # Start agent in background.  agent_ref is a mutable container
            # so the SSE writer can interrupt the agent on client disconnect.
+            #
+            # ``tool_progress_callback`` is intentionally not wired here:
+            # it would duplicate every emit because ``run_agent`` fires it
+            # side-by-side with ``tool_start_callback``/``tool_complete_callback``.
+            # The structured callbacks are strictly richer (they carry the
+            # tool_call id), so they own the chat-completions SSE channel.
            agent_ref = [None]
            agent_task = asyncio.ensure_future(self._run_agent(
                user_message=user_message,
@@ -968,7 +1044,8 @@ class APIServerAdapter(BasePlatformAdapter):
                ephemeral_system_prompt=system_prompt,
                session_id=session_id,
                stream_delta_callback=_on_delta,
-                tool_progress_callback=_on_tool_progress,
+                tool_start_callback=_on_tool_start,
+                tool_complete_callback=_on_tool_complete,
                agent_ref=agent_ref,
            ))

@@ -1083,7 +1160,8 @@ class APIServerAdapter(BasePlatformAdapter):
                Tagged tuples ``("__tool_progress__", payload)`` are sent
                as a custom ``event: hermes.tool.progress`` SSE event so
                frontends can display them without storing the markers in
-                conversation history.  See #6972.
+                conversation history.  See #6972 for the original event,
+                #16588 for the ``toolCallId``/``status`` lifecycle fields.
                """
                if isinstance(item, tuple) and len(item) == 2 and item[0] == "__tool_progress__":
                    event_data = json.dumps(item[1])
@@ -2293,10 +2371,31 @@ class APIServerAdapter(BasePlatformAdapter):

    _MAX_CONCURRENT_RUNS = 10  # Prevent unbounded resource allocation
    _RUN_STREAM_TTL = 300  # seconds before orphaned runs are swept
+    _RUN_STATUS_TTL = 3600  # seconds to retain terminal run status for polling
+
+    def _set_run_status(self, run_id: str, status: str, **fields: Any) -> Dict[str, Any]:
+        """Update pollable run status without exposing private agent objects."""
+        now = time.time()
+        current = self._run_statuses.get(run_id, {})
+        current.update({
+            "object": "hermes.run",
+            "run_id": run_id,
+            "status": status,
+            "updated_at": now,
+        })
+        current.setdefault("created_at", fields.pop("created_at", now))
+        current.update(fields)
+        self._run_statuses[run_id] = current
+        return current

    def _make_run_event_callback(self, run_id: str, loop: "asyncio.AbstractEventLoop"):
        """Return a tool_progress_callback that pushes structured events to the run's SSE queue."""
        def _push(event: Dict[str, Any]) -> None:
+            self._set_run_status(
+                run_id,
+                self._run_statuses.get(run_id, {}).get("status", "running"),
+                last_event=event.get("event"),
+            )
            q = self._run_streams.get(run_id)
            if q is None:
                return
@@ -2361,28 +2460,6 @@ class APIServerAdapter(BasePlatformAdapter):
        if not user_message:
            return web.json_response(_openai_error("No user message found in input"), status=400)

-        run_id = f"run_{uuid.uuid4().hex}"
-        loop = asyncio.get_running_loop()
-        q: "asyncio.Queue[Optional[Dict]]" = asyncio.Queue()
-        self._run_streams[run_id] = q
-        self._run_streams_created[run_id] = time.time()
-
-        event_cb = self._make_run_event_callback(run_id, loop)
-
-        # Also wire stream_delta_callback so message.delta events flow through
-        def _text_cb(delta: Optional[str]) -> None:
-            if delta is None:
-                return
-            try:
-                loop.call_soon_threadsafe(q.put_nowait, {
-                    "event": "message.delta",
-                    "run_id": run_id,
-                    "timestamp": time.time(),
-                    "delta": delta,
-                })
-            except Exception:
-                pass
-
        instructions = body.get("instructions")
        previous_response_id = body.get("previous_response_id")

@@ -2430,17 +2507,49 @@ class APIServerAdapter(BasePlatformAdapter):
                        )
                    conversation_history.append({"role": msg["role"], "content": str(content)})

+        run_id = f"run_{uuid.uuid4().hex}"
        session_id = body.get("session_id") or stored_session_id or run_id
        ephemeral_system_prompt = instructions
+        loop = asyncio.get_running_loop()
+        q: "asyncio.Queue[Optional[Dict]]" = asyncio.Queue()
+        created_at = time.time()
+        self._run_streams[run_id] = q
+        self._run_streams_created[run_id] = created_at
+
+        event_cb = self._make_run_event_callback(run_id, loop)
+
+        # Also wire stream_delta_callback so message.delta events flow through.
+        def _text_cb(delta: Optional[str]) -> None:
+            if delta is None:
+                return
+            try:
+                loop.call_soon_threadsafe(q.put_nowait, {
+                    "event": "message.delta",
+                    "run_id": run_id,
+                    "timestamp": time.time(),
+                    "delta": delta,
+                })
+            except Exception:
+                pass
+
+        self._set_run_status(
+            run_id,
+            "queued",
+            created_at=created_at,
+            session_id=session_id,
+            model=body.get("model", self._model_name),
+        )

        async def _run_and_close():
            try:
+                self._set_run_status(run_id, "running")
                agent = self._create_agent(
                    ephemeral_system_prompt=ephemeral_system_prompt,
                    session_id=session_id,
                    stream_delta_callback=_text_cb,
                    tool_progress_callback=event_cb,
                )
+                self._active_run_agents[run_id] = agent
                def _run_sync():
                    r = agent.run_conversation(
                        user_message=user_message,
@@ -2463,8 +2572,36 @@ class APIServerAdapter(BasePlatformAdapter):
                    "output": final_response,
                    "usage": usage,
                })
+                self._set_run_status(
+                    run_id,
+                    "completed",
+                    output=final_response,
+                    usage=usage,
+                    last_event="run.completed",
+                )
+            except asyncio.CancelledError:
+                self._set_run_status(
+                    run_id,
+                    "cancelled",
+                    last_event="run.cancelled",
+                )
+                try:
+                    q.put_nowait({
+                        "event": "run.cancelled",
+                        "run_id": run_id,
+                        "timestamp": time.time(),
+                    })
+                except Exception:
+                    pass
+                raise
            except Exception as exc:
                logger.exception("[api_server] run %s failed", run_id)
+                self._set_run_status(
+                    run_id,
+                    "failed",
+                    error=str(exc),
+                    last_event="run.failed",
+                )
                try:
                    q.put_nowait({
                        "event": "run.failed",
@@ -2480,8 +2617,11 @@ class APIServerAdapter(BasePlatformAdapter):
                    q.put_nowait(None)
                except Exception:
                    pass
+                self._active_run_agents.pop(run_id, None)
+                self._active_run_tasks.pop(run_id, None)

        task = asyncio.create_task(_run_and_close())
+        self._active_run_tasks[run_id] = task
        try:
            self._background_tasks.add(task)
        except TypeError:
@@ -2491,6 +2631,21 @@ class APIServerAdapter(BasePlatformAdapter):

        return web.json_response({"run_id": run_id, "status": "started"}, status=202)

+    async def _handle_get_run(self, request: "web.Request") -> "web.Response":
+        """GET /v1/runs/{run_id} — return pollable run status for external UIs."""
+        auth_err = self._check_auth(request)
+        if auth_err:
+            return auth_err
+
+        run_id = request.match_info["run_id"]
+        status = self._run_statuses.get(run_id)
+        if status is None:
+            return web.json_response(
+                _openai_error(f"Run not found: {run_id}", code="run_not_found"),
+                status=404,
+            )
+        return web.json_response(status)
+
    async def _handle_run_events(self, request: "web.Request") -> "web.StreamResponse":
        """GET /v1/runs/{run_id}/events — SSE stream of structured agent lifecycle events."""
        auth_err = self._check_auth(request)
@@ -2540,6 +2695,46 @@ class APIServerAdapter(BasePlatformAdapter):

        return response

+    async def _handle_stop_run(self, request: "web.Request") -> "web.Response":
+        """POST /v1/runs/{run_id}/stop — interrupt a running agent."""
+        auth_err = self._check_auth(request)
+        if auth_err:
+            return auth_err
+
+        run_id = request.match_info["run_id"]
+        agent = self._active_run_agents.get(run_id)
+        task = self._active_run_tasks.get(run_id)
+
+        if agent is None and task is None:
+            return web.json_response(_openai_error(f"Run not found: {run_id}", code="run_not_found"), status=404)
+
+        self._set_run_status(run_id, "stopping", last_event="run.stopping")
+
+        if agent is not None:
+            try:
+                agent.interrupt("Stop requested via API")
+            except Exception:
+                pass
+
+        if task is not None and not task.done():
+            task.cancel()
+            # Bounded wait: run_conversation() executes in the default
+            # executor thread which task.cancel() cannot preempt — we rely on
+            # agent.interrupt() above to break the loop. Cap the wait so a
+            # slow/unresponsive interrupt can't hang this handler.
+            try:
+                await asyncio.wait_for(asyncio.shield(task), timeout=5.0)
+            except asyncio.TimeoutError:
+                logger.warning(
+                    "[api_server] stop for run %s timed out after 5s; "
+                    "agent may still be finishing the current step",
+                    run_id,
+                )
+            except (asyncio.CancelledError, Exception):
+                pass
+
+        return web.json_response({"run_id": run_id, "status": "stopping"})
+
    async def _sweep_orphaned_runs(self) -> None:
        """Periodically clean up run streams that were never consumed."""
        while True:
@@ -2554,6 +2749,17 @@ class APIServerAdapter(BasePlatformAdapter):
                logger.debug("[api_server] sweeping orphaned run %s", run_id)
                self._run_streams.pop(run_id, None)
                self._run_streams_created.pop(run_id, None)
+                self._active_run_agents.pop(run_id, None)
+                self._active_run_tasks.pop(run_id, None)
+
+            stale_statuses = [
+                run_id
+                for run_id, status in list(self._run_statuses.items())
+                if status.get("status") in {"completed", "failed", "cancelled"}
+                and now - float(status.get("updated_at", 0) or 0) > self._RUN_STATUS_TTL
+            ]
+            for run_id in stale_statuses:
+                self._run_statuses.pop(run_id, None)

    # ------------------------------------------------------------------
    # BasePlatformAdapter interface
@@ -2573,6 +2779,7 @@ class APIServerAdapter(BasePlatformAdapter):
            self._app.router.add_get("/health/detailed", self._handle_health_detailed)
            self._app.router.add_get("/v1/health", self._handle_health)
            self._app.router.add_get("/v1/models", self._handle_models)
+            self._app.router.add_get("/v1/capabilities", self._handle_capabilities)
            self._app.router.add_post("/v1/chat/completions", self._handle_chat_completions)
            self._app.router.add_post("/v1/responses", self._handle_responses)
            self._app.router.add_get("/v1/responses/{response_id}", self._handle_get_response)
@@ -2588,7 +2795,9 @@ class APIServerAdapter(BasePlatformAdapter):
            self._app.router.add_post("/api/jobs/{job_id}/run", self._handle_run_job)
            # Structured event streaming
            self._app.router.add_post("/v1/runs", self._handle_runs)
+            self._app.router.add_get("/v1/runs/{run_id}", self._handle_get_run)
            self._app.router.add_get("/v1/runs/{run_id}/events", self._handle_run_events)
+            self._app.router.add_post("/v1/runs/{run_id}/stop", self._handle_stop_run)
            # Start background sweep to clean up orphaned (unconsumed) run streams
            sweep_task = asyncio.create_task(self._sweep_orphaned_runs())
            try:
@@ -307,9 +307,14 @@ def proxy_kwargs_for_aiohttp(proxy_url: str | None) -> tuple[dict, dict]:
    """Build kwargs for standalone ``aiohttp.ClientSession`` with proxy.

    Returns ``(session_kwargs, request_kwargs)`` where:
-      - SOCKS → ``({"connector": ProxyConnector(...)}, {})``
-      - HTTP  → ``({}, {"proxy": url})``
-      - None  → ``({}, {})``
+      - With aiohttp-socks → ``({"connector": ProxyConnector(...)}, {})``
+        for *all* proxy schemes (SOCKS **and** HTTP/HTTPS).
+      - HTTP without aiohttp-socks → ``({}, {"proxy": url})``.
+      - None → ``({}, {})``.
+
+    Prefer the connector path: it works transparently with libraries
+    (like mautrix) that call ``session.request()`` without forwarding
+    per-request ``proxy=`` kwargs.

    Usage::

@@ -320,20 +325,53 @@ def proxy_kwargs_for_aiohttp(proxy_url: str | None) -> tuple[dict, dict]:
    """
    if not proxy_url:
        return {}, {}
-    if proxy_url.lower().startswith("socks"):
-        try:
-            from aiohttp_socks import ProxyConnector
+    try:
+        from aiohttp_socks import ProxyConnector

-            connector = ProxyConnector.from_url(proxy_url, rdns=True)
-            return {"connector": connector}, {}
-        except ImportError:
+        connector = ProxyConnector.from_url(proxy_url, rdns=True)
+        return {"connector": connector}, {}
+    except ImportError:
+        if proxy_url.lower().startswith("socks"):
            logger.warning(
                "aiohttp_socks not installed — SOCKS proxy %s ignored. "
                "Run: pip install aiohttp-socks",
                proxy_url,
            )
            return {}, {}
-    return {}, {"proxy": proxy_url}
+        return {}, {"proxy": proxy_url}
+
+
+def is_host_excluded_by_no_proxy(hostname: str, no_proxy_value: str | None = None) -> bool:
+    """Return True when ``hostname`` matches a ``NO_PROXY`` entry.
+
+    Supports comma- or whitespace-separated entries with optional leading dots
+    and ``*.`` wildcards, which match both the apex domain and subdomains.
+    """
+    raw = no_proxy_value
+    if raw is None:
+        raw = os.environ.get("NO_PROXY") or os.environ.get("no_proxy") or ""
+
+    raw = raw.strip()
+    if not raw:
+        return False
+
+    lower_hostname = hostname.lower()
+    for entry in re.split(r"[\s,]+", raw):
+        normalized = entry.strip().lower()
+        if not normalized:
+            continue
+        if normalized == "*":
+            return True
+
+        if normalized.startswith("*."):
+            normalized = normalized[2:]
+        elif normalized.startswith("."):
+            normalized = normalized[1:]
+
+        if lower_hostname == normalized or lower_hostname.endswith(f".{normalized}"):
+            return True
+
+    return False


 from dataclasses import dataclass, field
@@ -693,7 +731,15 @@ SUPPORTED_DOCUMENT_TYPES = {
    ".pdf": "application/pdf",
    ".md": "text/markdown",
    ".txt": "text/plain",
+    ".csv": "text/csv",
    ".log": "text/plain",
+    ".json": "application/json",
+    ".xml": "application/xml",
+    ".yaml": "application/yaml",
+    ".yml": "application/yaml",
+    ".toml": "application/toml",
+    ".ini": "text/plain",
+    ".cfg": "text/plain",
    ".zip": "application/zip",
    ".docx": "application/vnd.openxmlformats-officedocument.wordprocessingml.document",
    ".xlsx": "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet",
@@ -861,6 +907,41 @@ class MessageEvent:
        return args


+_PLAINTEXT_GATEWAY_RESTART_PATTERNS: tuple[re.Pattern[str], ...] = (
+    re.compile(r"^(?:please\s+)?restart\s+(?:the\s+)?gateway[.!?\s]*$", re.IGNORECASE),
+    re.compile(r"^(?:please\s+)?restart\s+(?:the\s+)?hermes\s+gateway[.!?\s]*$", re.IGNORECASE),
+    re.compile(r"^(?:please\s+)?restart\s+hermes[.!?\s]*$", re.IGNORECASE),
+)
+
+
+def coerce_plaintext_gateway_command(event: "MessageEvent") -> None:
+    """Rewrite a tiny set of DM plaintext admin phrases into slash commands.
+
+    This keeps high-impact operational phrases like ``restart gateway`` out of
+    the LLM/tool path, where they can trigger a self-restart from inside the
+    currently running agent and leave the gateway stuck in ``draining`` while it
+    waits for that same agent to finish.
+
+    Scope is intentionally narrow: DM text messages only, exact restart-style
+    phrases only. Group chats keep natural-language semantics.
+    """
+    try:
+        if event is None or event.message_type != MessageType.TEXT:
+            return
+        text = (event.text or "").strip()
+        if not text or text.startswith("/"):
+            return
+        source = getattr(event, "source", None)
+        if getattr(source, "chat_type", None) != "dm":
+            return
+        for pattern in _PLAINTEXT_GATEWAY_RESTART_PATTERNS:
+            if pattern.match(text):
+                event.text = "/restart"
+                return
+    except Exception:
+        return
+
+
@dataclass 
 class SendResult:
    """Result of sending a message."""
@@ -982,6 +1063,61 @@ def resolve_channel_prompt(
    return None


+def resolve_channel_skills(
+    config_extra: dict,
+    channel_id: str,
+    parent_id: str | None = None,
+) -> list[str] | None:
+    """Resolve auto-loaded skill(s) for a channel/thread from platform config.
+
+    Looks up ``channel_skill_bindings`` in the adapter's ``config.extra`` dict.
+
+    Config format::
+
+        channel_skill_bindings:
+          - id: "C0123"          # Slack channel ID or Discord channel/forum ID
+            skills: ["skill-a", "skill-b"]
+          - id: "D0ABCDE"
+            skill: "solo-skill"  # single string also accepted
+
+    Prefers an exact match on *channel_id*; falls back to *parent_id*
+    (useful for forum threads / Slack threads inheriting the parent channel's
+    binding).
+
+    Returns a deduplicated list of skill names (order preserved), or None if
+    no match is found.
+    """
+    bindings = config_extra.get("channel_skill_bindings") or []
+    if not isinstance(bindings, list) or not bindings:
+        return None
+    ids_to_check: set[str] = set()
+    if channel_id:
+        ids_to_check.add(str(channel_id))
+    if parent_id:
+        ids_to_check.add(str(parent_id))
+    if not ids_to_check:
+        return None
+    for entry in bindings:
+        if not isinstance(entry, dict):
+            continue
+        entry_id = str(entry.get("id", ""))
+        if entry_id in ids_to_check:
+            skills = entry.get("skills") or entry.get("skill")
+            if isinstance(skills, str):
+                s = skills.strip()
+                return [s] if s else None
+            if isinstance(skills, list) and skills:
+                seen: list[str] = []
+                for name in skills:
+                    if not isinstance(name, str):
+                        continue
+                    nm = name.strip()
+                    if nm and nm not in seen:
+                        seen.append(nm)
+                return seen or None
+    return None
+
+
 class BasePlatformAdapter(ABC):
    """
    Base class for platform adapters.
@@ -1025,7 +1161,20 @@ class BasePlatformAdapter(ABC):
        self._post_delivery_callbacks: Dict[str, Any] = {}
        self._expected_cancelled_tasks: set[asyncio.Task] = set()
        self._busy_session_handler: Optional[Callable[[MessageEvent, str], Awaitable[bool]]] = None
-        # Chats where auto-TTS on voice input is disabled (set by /voice off)
+        # Auto-TTS on voice input: ``_auto_tts_default`` is the global default
+        # (``voice.auto_tts`` in config.yaml, pushed by GatewayRunner on connect).
+        # Per-chat overrides live in two sets populated from ``_voice_mode``:
+        #   - ``_auto_tts_enabled_chats``: chat explicitly opted in via ``/voice on``
+        #     or ``/voice tts`` (mode is ``voice_only`` or ``all``). Fires even when
+        #     the global default is False.
+        #   - ``_auto_tts_disabled_chats``: chat explicitly opted out via
+        #     ``/voice off`` (mode is ``off``). Suppresses auto-TTS even when the
+        #     global default is True.
+        # The gate in _process_message() is:
+        #   fire if chat in _auto_tts_enabled_chats
+        #     OR (_auto_tts_default and chat not in _auto_tts_disabled_chats)
+        self._auto_tts_default: bool = False
+        self._auto_tts_enabled_chats: set = set()
        self._auto_tts_disabled_chats: set = set()
        # Chats where typing indicator is paused (e.g. during approval waits).
        # _keep_typing skips send_typing when the chat_id is in this set.
@@ -1047,6 +1196,21 @@ class BasePlatformAdapter(ABC):
    def fatal_error_retryable(self) -> bool:
        return self._fatal_error_retryable

+    def _should_auto_tts_for_chat(self, chat_id: str) -> bool:
+        """Whether auto-TTS on voice input should fire for ``chat_id``.
+
+        Decision layers (Issue #16007):
+          1. Explicit ``/voice on`` or ``/voice tts`` → always fire (even if
+             ``voice.auto_tts`` is False).
+          2. Explicit ``/voice off`` → never fire.
+          3. Fall back to the global ``voice.auto_tts`` config default.
+        """
+        if chat_id in self._auto_tts_enabled_chats:
+            return True
+        if chat_id in self._auto_tts_disabled_chats:
+            return False
+        return bool(self._auto_tts_default)
+
    def set_fatal_error_handler(self, handler: Callable[["BasePlatformAdapter"], Awaitable[None] | None]) -> None:
        self._fatal_error_handler = handler

@@ -1230,6 +1394,27 @@ class BasePlatformAdapter(ABC):
        """
        return SendResult(success=False, error="Not supported")

+    async def delete_message(
+        self,
+        chat_id: str,
+        message_id: str,
+    ) -> bool:
+        """
+        Delete a previously sent message.  Optional — platforms that don't
+        support deletion return ``False`` and callers fall back to leaving
+        the message in place.
+
+        Used by the stream consumer's fresh-final cleanup path (see
+        openclaw/openclaw#72038) to remove long-lived preview messages
+        after sending the completed reply as a fresh message so the
+        platform's visible timestamp reflects completion time.
+
+        Returns ``True`` on successful deletion, ``False`` otherwise.
+        Subclasses should override for platforms with a deletion API
+        (e.g. Telegram ``deleteMessage``).
+        """
+        return False
+
    async def send_typing(self, chat_id: str, metadata=None) -> None:
        """
        Send a typing indicator.
@@ -1557,13 +1742,41 @@ class BasePlatformAdapter(ABC):
        the agent is waiting for dangerous-command approval).  This is critical
        for Slack's Assistant API where ``assistant_threads_setStatus`` disables
        the compose box — pausing lets the user type ``/approve`` or ``/deny``.
+
+        Each ``send_typing`` call is bounded by a ~1.5s timeout so a slow
+        network round-trip can't stall the refresh cadence.  Telegram- and
+        Discord-side typing expire after ~5s; if any individual send_typing
+        takes longer than the refresh interval, the bubble would die and
+        stay dead until that call returns.  Abandoning the slow call lets
+        the next tick fire a fresh send_typing on schedule — as long as
+        one of them succeeds within the 5s platform-side window, the bubble
+        stays visible across provider stalls / upstream API timeouts.
        """
+        # Bound each send_typing round-trip so the refresh cadence isn't
+        # gated on network health.  Must stay below ``interval`` so a slow
+        # call gets abandoned before the next scheduled tick.
+        _send_typing_timeout = max(0.25, min(1.5, interval - 0.25))
        try:
            while True:
                if stop_event is not None and stop_event.is_set():
                    return
                if chat_id not in self._typing_paused:
-                    await self.send_typing(chat_id, metadata=metadata)
+                    try:
+                        await asyncio.wait_for(
+                            self.send_typing(chat_id, metadata=metadata),
+                            timeout=_send_typing_timeout,
+                        )
+                    except asyncio.TimeoutError:
+                        # Slow network — abandon this tick, keep the loop
+                        # on schedule so the next send_typing fires fresh.
+                        pass
+                    except asyncio.CancelledError:
+                        raise
+                    except Exception as typing_err:
+                        logger.debug(
+                            "[%s] send_typing error (non-fatal): %s",
+                            self.name, typing_err,
+                        )
                if stop_event is None:
                    await asyncio.sleep(interval)
                    continue
@@ -2015,6 +2228,8 @@ class BasePlatformAdapter(ABC):
        """
        if not self._message_handler:
            return
+
+        coerce_plaintext_gateway_command(event)
        
        session_key = build_session_key(
            event.source,
@@ -2214,12 +2429,14 @@ class BasePlatformAdapter(ABC):
                    logger.info("[%s] extract_local_files found %d file(s) in response", self.name, len(local_files))
                
                # Auto-TTS: if voice message, generate audio FIRST (before sending text)
-                # Skipped when the chat has voice mode disabled (/voice off)
+                # Gated via ``_should_auto_tts_for_chat``: fires when the chat has
+                # an explicit ``/voice on|tts`` opt-in OR when ``voice.auto_tts`` is
+                # True globally and no ``/voice off`` has been issued.
                _tts_path = None
-                if (event.message_type == MessageType.VOICE
+                if (self._should_auto_tts_for_chat(event.source.chat_id)
+                        and event.message_type == MessageType.VOICE
                        and text_content
-                        and not media_files
-                        and event.source.chat_id not in self._auto_tts_disabled_chats):
+                        and not media_files):
                    try:
                        from tools.tts_tool import text_to_speech_tool, check_tts_requirements
                        if check_tts_requirements():
@@ -305,7 +305,7 @@ class VoiceReceiver:
        encrypted = bytes(payload_with_nonce[:-4])

        try:
-            import nacl.secret  # noqa: delayed import – only in voice path
+            import nacl.secret  # noqa: E402 — delayed import, only in voice path
            box = nacl.secret.Aead(self._secret_key)
            decrypted = box.decrypt(encrypted, header, bytes(nonce))
        except Exception as e:
@@ -813,7 +813,14 @@ class DiscordAdapter(BasePlatformAdapter):
                logger.info("[%s] Synced %d slash command(s) via bulk tree sync", self.name, len(synced))
                return

-            summary = await asyncio.wait_for(self._safe_sync_slash_commands(), timeout=30)
+            # Discord's per-app command-management bucket is ~5 writes / 20 s,
+            # so a mass-prune-plus-upsert reconcile (e.g. 77 orphans + 30
+            # desired = 107 writes) takes several minutes of forced waits.
+            # A flat 30 s budget blew up reliably under bucket pressure and
+            # left slash commands broken for ~60 min until the bucket fully
+            # recovered. Use a wide ceiling; the cap still guards against a
+            # true hang. (#16713)
+            summary = await asyncio.wait_for(self._safe_sync_slash_commands(), timeout=600)
            logger.info(
                "[%s] Safely reconciled %d slash command(s): unchanged=%d updated=%d recreated=%d created=%d deleted=%d",
                self.name,
@@ -825,7 +832,11 @@ class DiscordAdapter(BasePlatformAdapter):
                summary["deleted"],
            )
        except asyncio.TimeoutError:
-            logger.warning("[%s] Slash command sync timed out after 30s", self.name)
+            logger.warning(
+                "[%s] Slash command sync timed out — Discord rate-limit bucket "
+                "may be saturated; will retry on next reconnect",
+                self.name,
+            )
        except asyncio.CancelledError:
            raise
        except Exception as e:  # pragma: no cover - defensive logging
@@ -2315,11 +2326,6 @@ class DiscordAdapter(BasePlatformAdapter):
        async def slash_background(interaction: discord.Interaction, prompt: str):
            await self._run_simple_slash(interaction, f"/background {prompt}", "Background task started~")

-        @tree.command(name="btw", description="Ephemeral side question using session context")
-        @discord.app_commands.describe(question="Your side question (no tools, not persisted)")
-        async def slash_btw(interaction: discord.Interaction, question: str):
-            await self._run_simple_slash(interaction, f"/btw {question}")
-
        # ── Auto-register any gateway-available commands not yet on the tree ──
        # This ensures new commands added to COMMAND_REGISTRY in
        # hermes_cli/commands.py automatically appear as Discord slash
@@ -2684,21 +2690,8 @@ class DiscordAdapter(BasePlatformAdapter):
                skills: ["skill-a", "skill-b"]
        Also checks parent_id so forum threads inherit the forum's bindings.
        """
-        bindings = self.config.extra.get("channel_skill_bindings", [])
-        if not bindings:
-            return None
-        ids_to_check = {channel_id}
-        if parent_id:
-            ids_to_check.add(parent_id)
-        for entry in bindings:
-            entry_id = str(entry.get("id", ""))
-            if entry_id in ids_to_check:
-                skills = entry.get("skills") or entry.get("skill")
-                if isinstance(skills, str):
-                    return [skills]
-                if isinstance(skills, list) and skills:
-                    return list(dict.fromkeys(skills))  # dedup, preserve order
-        return None
+        from gateway.platforms.base import resolve_channel_skills
+        return resolve_channel_skills(self.config.extra, channel_id, parent_id)

    def _resolve_channel_prompt(self, channel_id: str, parent_id: str | None = None) -> str | None:
        """Resolve a Discord per-channel prompt, preferring the exact channel over its parent."""
@@ -3312,6 +3305,7 @@ class DiscordAdapter(BasePlatformAdapter):
        chat_topic = self._get_effective_topic(message.channel, is_thread=is_thread)

        # Build source
+        guild = getattr(message, "guild", None)
        source = self.build_source(
            chat_id=str(effective_channel.id),
            chat_name=chat_name,
@@ -3321,7 +3315,7 @@ class DiscordAdapter(BasePlatformAdapter):
            thread_id=thread_id,
            chat_topic=chat_topic,
            is_bot=getattr(message.author, "bot", False),
-            guild_id=str(message.guild.id) if message.guild else None,
+            guild_id=str(guild.id) if guild else None,
            parent_chat_id=parent_channel_id,
            message_id=str(message.id),
        )
@@ -28,6 +28,7 @@ from email.header import decode_header
 from email.mime.multipart import MIMEMultipart
 from email.mime.text import MIMEText
 from email.mime.base import MIMEBase
+from email.utils import formatdate
 from email import encoders
 from pathlib import Path
 from typing import Any, Dict, List, Optional
@@ -504,6 +505,7 @@ class EmailAdapter(BasePlatformAdapter):
            msg["In-Reply-To"] = original_msg_id
            msg["References"] = original_msg_id

+        msg["Date"] = formatdate(localtime=True)
        msg_id = f"<hermes-{uuid.uuid4().hex[:12]}@{self._address.split('@')[1]}>"
        msg["Message-ID"] = msg_id

@@ -586,6 +588,7 @@ class EmailAdapter(BasePlatformAdapter):
            msg["In-Reply-To"] = original_msg_id
            msg["References"] = original_msg_id

+        msg["Date"] = formatdate(localtime=True)
        msg_id = f"<hermes-{uuid.uuid4().hex[:12]}@{self._address.split('@')[1]}>"
        msg["Message-ID"] = msg_id

@@ -974,7 +974,6 @@ def build_whole_comment_prompt(

 def _resolve_model_and_runtime() -> Tuple[str, dict]:
    """Resolve model and provider credentials, same as gateway message handling."""
-    import os
    from gateway.run import _load_gateway_config, _resolve_gateway_model

    user_config = _load_gateway_config()
@@ -11,10 +11,10 @@ import logging
 import re
 import time
 from pathlib import Path
-from typing import TYPE_CHECKING, Dict, Optional
+from typing import TYPE_CHECKING, Dict

 if TYPE_CHECKING:
-    from gateway.platforms.base import BasePlatformAdapter, MessageEvent
+    from gateway.platforms.base import MessageEvent

 logger = logging.getLogger(__name__)

@@ -57,6 +57,15 @@ class MessageDeduplicator:
        if len(self._seen) > self._max_size:
            cutoff = now - self._ttl
            self._seen = {k: v for k, v in self._seen.items() if v > cutoff}
+            if len(self._seen) > self._max_size:
+                # TTL pruning alone does not cap the cache when every entry is
+                # still fresh. Keep the newest entries so the helper's
+                # max_size bound is enforced under sustained traffic.
+                newest = sorted(
+                    self._seen.items(),
+                    key=lambda item: item[1],
+                )[-self._max_size:]
+                self._seen = dict(newest)
        return False

    def clear(self):
@@ -11,6 +11,7 @@ Environment variables:
    MATRIX_PASSWORD             Password (alternative to access token)
    MATRIX_ENCRYPTION           Set "true" to enable E2EE
    MATRIX_DEVICE_ID            Stable device ID for E2EE persistence across restarts
+    MATRIX_PROXY                HTTP(S) or SOCKS proxy URL for Matrix traffic
    MATRIX_ALLOWED_USERS    Comma-separated Matrix user IDs (@user:server)
    MATRIX_HOME_ROOM        Room ID for cron/notification delivery
    MATRIX_REACTIONS        Set "false" to disable processing lifecycle reactions
@@ -18,6 +19,7 @@ Environment variables:
    MATRIX_REQUIRE_MENTION      Require @mention in rooms (default: true)
    MATRIX_FREE_RESPONSE_ROOMS  Comma-separated room IDs exempt from mention requirement
    MATRIX_AUTO_THREAD          Auto-create threads for room messages (default: true)
+    MATRIX_DM_AUTO_THREAD       Auto-create threads for DM messages (default: false)
    MATRIX_RECOVERY_KEY         Recovery key for cross-signing verification after device key rotation
    MATRIX_DM_MENTION_THREADS   Create a thread when bot is @mentioned in a DM (default: false)
 """
@@ -30,6 +32,8 @@ import mimetypes
 import os
 import re
 import time
+from dataclasses import dataclass
+
 from html import escape as _html_escape
 from pathlib import Path
 from typing import Any, Dict, Optional, Set
@@ -95,11 +99,25 @@ from gateway.platforms.base import (
    MessageType,
    ProcessingOutcome,
    SendResult,
+    resolve_proxy_url,
+    proxy_kwargs_for_aiohttp,
 )
 from gateway.platforms.helpers import ThreadParticipationTracker

 logger = logging.getLogger(__name__)

+
+@dataclass
+class _MatrixApprovalPrompt:
+    """Tracks a pending Matrix reaction-based exec approval prompt."""
+
+    def __init__(self, session_key: str, chat_id: str, message_id: str, resolved: bool = False):
+        self.session_key = session_key
+        self.chat_id = chat_id
+        self.message_id = message_id
+        self.resolved = resolved
+        self.bot_reaction_events: dict[str, str] = {}  # emoji -> event_id
+
 # Matrix message size limit (4000 chars practical, spec has no hard limit
 # but clients render poorly above this).
 MAX_MESSAGE_LENGTH = 4000
@@ -114,11 +132,85 @@ _CRYPTO_DB_PATH = _STORE_DIR / "crypto.db"
 # Grace period: ignore messages older than this many seconds before startup.
 _STARTUP_GRACE_SECONDS = 5

+_OUTBOUND_MENTION_RE = re.compile(
+    r"(?<![\w/])(@[0-9A-Za-z._=/-]+:[0-9A-Za-z.-]+(?::\d+)?)"
+)

 _E2EE_INSTALL_HINT = (
    "Install with: pip install 'mautrix[encryption]'  (requires libolm C library)"
 )

+_MATRIX_IMAGE_FILENAME_EXTS = frozenset({
+    ".jpg",
+    ".jpeg",
+    ".png",
+    ".gif",
+    ".webp",
+    ".bmp",
+    ".svg",
+    ".heic",
+    ".heif",
+    ".avif",
+})
+
+
+def _looks_like_matrix_image_filename(text: str) -> bool:
+    """Return True when Matrix image body text is probably just a transport filename.
+
+    Matrix ``m.image`` events commonly populate ``content.body`` with the uploaded
+    filename when the user did not add a caption. Treating that raw filename as
+    user-authored text confuses downstream vision enrichment.
+    """
+    candidate = str(text or "").strip()
+    if not candidate or "\n" in candidate or candidate.endswith("/"):
+        return False
+
+    name = Path(candidate).name
+    if not name or name != candidate:
+        return False
+
+    suffix = Path(name).suffix.lower()
+    if not suffix:
+        return False
+
+    guessed_type, _ = mimetypes.guess_type(name)
+    if guessed_type and guessed_type.startswith("image/"):
+        return True
+    return suffix in _MATRIX_IMAGE_FILENAME_EXTS
+
+
+def _create_matrix_session(proxy_url: str | None):
+    """Create an ``aiohttp.ClientSession`` whose proxy applies to *all* requests.
+
+    mautrix's ``HTTPAPI._send()`` calls ``session.request()`` without forwarding
+    per-request ``proxy=`` kwargs.  For HTTP(S) proxies we use aiohttp's native
+    ``proxy=`` session parameter which sets a default for every request.  For SOCKS
+    we use ``aiohttp_socks.ProxyConnector`` (connector-level).
+    When no proxy is configured we enable ``trust_env`` so standard env vars
+    (``HTTP_PROXY`` / ``HTTPS_PROXY``) are honoured automatically.
+    """
+    import aiohttp
+
+    if not proxy_url:
+        return aiohttp.ClientSession(trust_env=True)
+
+    if proxy_url.split("://")[0].lower().startswith("socks"):
+        try:
+            from aiohttp_socks import ProxyConnector
+
+            return aiohttp.ClientSession(
+                connector=ProxyConnector.from_url(proxy_url, rdns=True),
+            )
+        except ImportError:
+            logger.warning(
+                "aiohttp_socks not installed — SOCKS proxy %s ignored. "
+                "Run: pip install aiohttp-socks",
+                proxy_url,
+            )
+            return aiohttp.ClientSession(trust_env=True)
+
+    return aiohttp.ClientSession(proxy=proxy_url)
+

 def _check_e2ee_deps() -> bool:
    """Return True if mautrix E2EE dependencies (python-olm) are available."""
@@ -260,6 +352,9 @@ class MatrixAdapter(BasePlatformAdapter):
            "1",
            "yes",
        )
+        self._dm_auto_thread: bool = os.getenv(
+            "MATRIX_DM_AUTO_THREAD", "false"
+        ).lower() in ("true", "1", "yes")
        self._dm_mention_threads: bool = os.getenv(
            "MATRIX_DM_MENTION_THREADS", "false"
        ).lower() in ("true", "1", "yes")
@@ -270,6 +365,11 @@ class MatrixAdapter(BasePlatformAdapter):
        ).lower() not in ("false", "0", "no")
        self._pending_reactions: dict[tuple[str, str], str] = {}

+        # Proxy support — resolve once at init, reuse for all HTTP traffic.
+        self._proxy_url: str | None = resolve_proxy_url(platform_env_var="MATRIX_PROXY")
+        if self._proxy_url:
+            logger.info("Matrix: proxy configured — %s", self._proxy_url)
+
        # Text batching: merge rapid successive messages (Telegram-style).
        # Matrix clients split long messages around 4000 chars.
        self._text_batch_delay_seconds = float(
@@ -281,6 +381,18 @@ class MatrixAdapter(BasePlatformAdapter):
        self._pending_text_batches: Dict[str, MessageEvent] = {}
        self._pending_text_batch_tasks: Dict[str, asyncio.Task] = {}

+        # Matrix reaction-based dangerous command approvals.
+        self._approval_reaction_map = {
+            "✅": "once",
+            "❎": "deny",
+        }
+        self._approval_prompts_by_event: Dict[str, _MatrixApprovalPrompt] = {}
+        self._approval_prompt_by_session: Dict[str, str] = {}
+        allowed_users_raw = os.getenv("MATRIX_ALLOWED_USERS", "")
+        self._allowed_user_ids: Set[str] = {
+            u.strip() for u in allowed_users_raw.split(",") if u.strip()
+        }
+
    def _is_duplicate_event(self, event_id) -> bool:
        """Return True if this event was already processed. Tracks the ID otherwise."""
        if not event_id:
@@ -326,7 +438,7 @@ class MatrixAdapter(BasePlatformAdapter):
                    )
                    return False
        except Exception as exc:
-            logger.error("Matrix: post-upload key verification failed: %s", exc)
+            logger.error("Matrix: post-upload key verification failed: %s", exc, exc_info=True)
            return False
        return True

@@ -342,6 +454,7 @@ class MatrixAdapter(BasePlatformAdapter):
            logger.error(
                "Matrix: cannot verify device keys on server: %s — refusing E2EE",
                exc,
+                exc_info=True,
            )
            return False

@@ -356,7 +469,7 @@ class MatrixAdapter(BasePlatformAdapter):
            try:
                await olm.share_keys()
            except Exception as exc:
-                logger.error("Matrix: failed to re-upload device keys: %s", exc)
+                logger.error("Matrix: failed to re-upload device keys: %s", exc, exc_info=True)
                return False
            return await self._reverify_keys_after_upload(client, local_ed25519)

@@ -396,6 +509,7 @@ class MatrixAdapter(BasePlatformAdapter):
                    "Try generating a new access token to get a fresh device.",
                    client.device_id,
                    exc,
+                    exc_info=True,
                )
                return False
            return await self._reverify_keys_after_upload(client, local_ed25519)
@@ -420,9 +534,11 @@ class MatrixAdapter(BasePlatformAdapter):
        _STORE_DIR.mkdir(parents=True, exist_ok=True)

        # Create the HTTP API layer.
+        client_session = _create_matrix_session(self._proxy_url)
        api = HTTPAPI(
            base_url=self._homeserver,
            token=self._access_token or "",
+            client_session=client_session,
        )

        # Create the client.
@@ -465,6 +581,7 @@ class MatrixAdapter(BasePlatformAdapter):
                logger.error(
                    "Matrix: whoami failed — check MATRIX_ACCESS_TOKEN and MATRIX_HOMESERVER: %s",
                    exc,
+                    exc_info=True,
                )
                await api.session.close()
                return False
@@ -607,6 +724,44 @@ class MatrixAdapter(BasePlatformAdapter):
                        logger.warning(
                            "Matrix: recovery key verification failed: %s", exc
                        )
+                else:
+                    # No recovery key — bootstrap cross-signing if the bot
+                    # has none yet. Without this, Element shows "Encrypted
+                    # by a device not verified by its owner" on every
+                    # message from this bot, indefinitely. mautrix's
+                    # generate_recovery_key does the full flow: generates
+                    # MSK/SSK/USK, uploads private keys to SSSS, publishes
+                    # public keys to the homeserver, and signs the current
+                    # device with the new SSK. Some homeservers require UIA
+                    # for /keys/device_signing/upload — those will need an
+                    # alternate path; Continuwuity and Synapse-with-shared-
+                    # secret accept the unauthenticated upload.
+                    try:
+                        own_xsign = await olm.get_own_cross_signing_public_keys()
+                    except Exception as exc:
+                        own_xsign = None
+                        logger.warning(
+                            "Matrix: cross-signing key lookup failed: %s", exc
+                        )
+                    if own_xsign is None:
+                        try:
+                            new_recovery_key = await olm.generate_recovery_key()
+                            logger.warning(
+                                "Matrix: bootstrapped cross-signing for %s. "
+                                "SAVE THIS RECOVERY KEY — set "
+                                "MATRIX_RECOVERY_KEY for future restarts so "
+                                "the bot can re-sign its device after key "
+                                "rotation: %s",
+                                client.mxid,
+                                new_recovery_key,
+                            )
+                        except Exception as exc:
+                            logger.warning(
+                                "Matrix: cross-signing bootstrap failed "
+                                "(non-fatal — Element will show 'not "
+                                "verified by its owner'): %s",
+                                exc,
+                            )

                client.crypto = olm
                logger.info(
@@ -664,6 +819,7 @@ class MatrixAdapter(BasePlatformAdapter):
                        await asyncio.gather(*tasks)
                except Exception as exc:
                    logger.warning("Matrix: initial sync event dispatch error: %s", exc)
+                await self._join_pending_invites(sync_data)
            else:
                logger.warning(
                    "Matrix: initial sync returned unexpected type %s",
@@ -727,17 +883,8 @@ class MatrixAdapter(BasePlatformAdapter):
        chunks = self.truncate_message(formatted, MAX_MESSAGE_LENGTH)

        last_event_id = None
-        for chunk in chunks:
-            msg_content: Dict[str, Any] = {
-                "msgtype": "m.text",
-                "body": chunk,
-            }
-
-            # Convert markdown to HTML for rich rendering.
-            html = self._markdown_to_html(chunk)
-            if html and html != chunk:
-                msg_content["format"] = "org.matrix.custom.html"
-                msg_content["formatted_body"] = html
+        for i, chunk in enumerate(chunks):
+            msg_content = self._build_text_message_content(chunk)

            # Reply-to support.
            if reply_to:
@@ -844,25 +991,21 @@ class MatrixAdapter(BasePlatformAdapter):
        """Edit an existing message (via m.replace)."""

        formatted = self.format_message(content)
+        new_content = self._build_text_message_content(formatted)
        msg_content: Dict[str, Any] = {
            "msgtype": "m.text",
            "body": f"* {formatted}",
-            "m.new_content": {
-                "msgtype": "m.text",
-                "body": formatted,
-            },
-            "m.relates_to": {
-                "rel_type": "m.replace",
-                "event_id": message_id,
-            },
+            "m.new_content": new_content,
        }
-
-        html = self._markdown_to_html(formatted)
-        if html and html != formatted:
-            msg_content["m.new_content"]["format"] = "org.matrix.custom.html"
-            msg_content["m.new_content"]["formatted_body"] = html
+        if "m.mentions" in new_content:
+            msg_content["m.mentions"] = new_content["m.mentions"]
+        if "formatted_body" in new_content:
            msg_content["format"] = "org.matrix.custom.html"
-            msg_content["formatted_body"] = f"* {html}"
+            msg_content["formatted_body"] = f'* {new_content["formatted_body"]}'
+        msg_content["m.relates_to"] = {
+            "rel_type": "m.replace",
+            "event_id": message_id,
+        }

        try:
            event_id = await self._client.send_message_event(
@@ -895,10 +1038,12 @@ class MatrixAdapter(BasePlatformAdapter):
            # Try aiohttp first (always available), fall back to httpx
            try:
                import aiohttp as _aiohttp
-
-                async with _aiohttp.ClientSession(trust_env=True) as http:
+                _sess_kw, _req_kw = proxy_kwargs_for_aiohttp(self._proxy_url)
+                async with _aiohttp.ClientSession(**_sess_kw) as http:
                    async with http.get(
-                        image_url, timeout=_aiohttp.ClientTimeout(total=30)
+                        image_url,
+                        timeout=_aiohttp.ClientTimeout(total=30),
+                        **_req_kw,
                    ) as resp:
                        resp.raise_for_status()
                        data = await resp.read()
@@ -908,8 +1053,10 @@ class MatrixAdapter(BasePlatformAdapter):
                        )
            except ImportError:
                import httpx
-
-                async with httpx.AsyncClient() as http:
+                _httpx_kw: dict = {}
+                if self._proxy_url:
+                    _httpx_kw["proxy"] = self._proxy_url
+                async with httpx.AsyncClient(**_httpx_kw) as http:
                    resp = await http.get(image_url, follow_redirects=True, timeout=30)
                    resp.raise_for_status()
                    data = resp.content
@@ -984,6 +1131,56 @@ class MatrixAdapter(BasePlatformAdapter):
            chat_id, video_path, "m.video", caption, reply_to, metadata=metadata
        )

+    async def send_exec_approval(
+        self,
+        chat_id: str,
+        command: str,
+        session_key: str,
+        description: str = "dangerous command",
+        metadata: Optional[dict] = None,
+    ) -> SendResult:
+        """Send a reaction-based exec approval prompt for Matrix."""
+        if not self._client:
+            return SendResult(success=False, error="Not connected")
+
+        cmd_preview = command[:2000] + "..." if len(command) > 2000 else command
+        text = (
+            "⚠️ **Dangerous command requires approval**\n"
+            f"```\n{cmd_preview}\n```\n"
+            f"Reason: {description}\n\n"
+            "Reply `/approve` to execute, `/approve session` to approve this pattern for the session, "
+            "`/approve always` to approve permanently, or `/deny` to cancel.\n\n"
+            "You can also click the reaction to approve:\n"
+            "✅ = /approve\n"
+            "❎ = /deny"
+        )
+
+        result = await self.send(chat_id, text, metadata=metadata)
+        if not result.success or not result.message_id:
+            return result
+
+        prompt = _MatrixApprovalPrompt(
+            session_key=session_key,
+            chat_id=chat_id,
+            message_id=result.message_id,
+        )
+        old_event = self._approval_prompt_by_session.get(session_key)
+        if old_event:
+            self._approval_prompts_by_event.pop(old_event, None)
+        self._approval_prompts_by_event[result.message_id] = prompt
+        self._approval_prompt_by_session[session_key] = result.message_id
+
+        for emoji in ("✅", "❎"):
+            try:
+                reaction_result = await self._send_reaction(chat_id, result.message_id, emoji)
+                # Save the bot's reaction event_id for later cleanup
+                if reaction_result:
+                    prompt.bot_reaction_events[emoji] = str(reaction_result)
+            except Exception as exc:
+                logger.debug("Matrix: failed to add approval reaction %s: %s", emoji, exc)
+
+        return result
+
    def format_message(self, content: str) -> str:
        """Pass-through — Matrix supports standard Markdown natively."""
        # Strip image markdown; media is uploaded separately.
@@ -1115,9 +1312,15 @@ class MatrixAdapter(BasePlatformAdapter):
        next_batch = await client.sync_store.get_next_batch()
        while not self._closing:
            try:
-                sync_data = await client.sync(
-                    since=next_batch,
-                    timeout=30000,
+                # Wrap in asyncio.wait_for to guard against TCP-level hangs
+                # that the Matrix long-poll timeout cannot catch. Long-poll
+                # is 30s, so 45s gives 15s slack for network drain.
+                sync_data = await asyncio.wait_for(
+                    client.sync(
+                        since=next_batch,
+                        timeout=30000,
+                    ),
+                    timeout=45.0,
                )

                # nio returns SyncError objects (not exceptions) for auth
@@ -1153,6 +1356,7 @@ class MatrixAdapter(BasePlatformAdapter):
                            await asyncio.gather(*tasks)
                    except Exception as exc:
                        logger.warning("Matrix: sync event dispatch error: %s", exc)
+                    await self._join_pending_invites(sync_data)

            except asyncio.CancelledError:
                return
@@ -1178,13 +1382,92 @@ class MatrixAdapter(BasePlatformAdapter):
    # Event callbacks
    # ------------------------------------------------------------------

+    def _is_self_sender(self, sender: str) -> bool:
+        """Return True if the sender refers to the bot's own account.
+
+        Matrix user IDs are byte-compared after trimming whitespace and
+        lowercasing — some homeservers normalize the localpart case
+        differently at different API surfaces, and the reply-loop tail
+        of the "hall of mirrors" bug (#15763) has been observed with the
+        bot's own account bypassing a case-sensitive equality check.
+
+        When ``self._user_id`` is empty (whoami hasn't resolved yet, or
+        login failed), we cannot prove a sender is NOT us, so we return
+        True defensively — an unidentified bot dropping its own events
+        is always preferable to falling into an echo loop.
+        """
+        own = (self._user_id or "").strip().lower()
+        if not own:
+            return True
+        return sender.strip().lower() == own
+
+    @staticmethod
+    def _is_system_or_bridge_sender(sender: str) -> bool:
+        """Return True if the sender looks like a system / bridge / appservice
+        identity rather than a real user.
+
+        Appservice namespaces on Matrix conventionally prefix bot / puppet
+        user IDs with an underscore (e.g. ``@_telegram_12345:server``,
+        ``@_discord_999:server``, ``@_slack_...:server``).  Server-notices
+        bots and bridge-controller bots on many homeservers use the same
+        pattern.
+
+        We treat these as system identities for pairing purposes: they
+        should never be offered a pairing code, because an operator
+        approving the code would hand the bridge itself permanent
+        authorization — and every outbound message relayed by the bridge
+        would then loop back into the agent as an "authorized user
+        message", which is the root of issue #15763.
+
+        Matches:
+            ``@_something:server``   — appservice namespace convention
+            ``@:server``             — malformed / empty localpart
+            ``:server``              — malformed, no leading ``@``
+        """
+        s = (sender or "").strip()
+        if not s:
+            return True
+        # Localpart is everything between leading '@' and ':'
+        if s.startswith("@"):
+            s = s[1:]
+        if ":" in s:
+            localpart, _, _ = s.partition(":")
+        else:
+            localpart = s
+        if not localpart:
+            return True
+        return localpart.startswith("_")
+
    async def _on_room_message(self, event: Any) -> None:
        """Handle incoming room message events (text, media)."""
        room_id = str(getattr(event, "room_id", ""))
        sender = str(getattr(event, "sender", ""))

-        # Ignore own messages.
-        if sender == self._user_id:
+        # Diagnostic: confirm the callback is firing at all when DEBUG is on.
+        # Helps users troubleshoot silent inbound issues like #5819, #7914, #12614.
+        logger.debug(
+            "Matrix: callback fired — event %s from %s in %s",
+            getattr(event, "event_id", "?"),
+            sender,
+            room_id,
+        )
+
+        # Ignore own messages (case-insensitive; also drops when our own
+        # user_id hasn't been resolved yet — see _is_self_sender docstring
+        # and issue #15763).
+        if self._is_self_sender(sender):
+            return
+
+        # Ignore appservice / bridge / system identities so they never
+        # trigger the pairing flow.  Once a bridge user is paired, every
+        # outbound message it relays would loop back as an authorized
+        # user message (the "hall of mirrors" in #15763).
+        if self._is_system_or_bridge_sender(sender):
+            logger.debug(
+                "Matrix: ignoring system/bridge sender %s in %s",
+                sender,
+                room_id,
+            )
            return

        # Deduplicate by event ID.
@@ -1280,6 +1563,12 @@ class MatrixAdapter(BasePlatformAdapter):
            in_bot_thread = bool(thread_id and thread_id in self._threads)
            if self._require_mention and not is_free_room and not in_bot_thread:
                if not is_mentioned:
+                    logger.debug(
+                        "Matrix: ignoring message %s in %s — no @mention "
+                        "(set MATRIX_REQUIRE_MENTION=false to disable)",
+                        event_id,
+                        room_id,
+                    )
                    return None

        # DM mention-thread.
@@ -1292,7 +1581,7 @@ class MatrixAdapter(BasePlatformAdapter):
            body = self._strip_mention(body)

        # Auto-thread.
-        if not is_dm and not thread_id and self._auto_thread:
+        if not thread_id and ((not is_dm and self._auto_thread) or (is_dm and self._dm_auto_thread)):
            thread_id = event_id
            self._threads.mark(thread_id)

@@ -1534,6 +1823,9 @@ class MatrixAdapter(BasePlatformAdapter):
            return
        body, is_dm, chat_type, thread_id, display_name, source = ctx

+        if msgtype == "m.image" and _looks_like_matrix_image_filename(body):
+            body = ""
+
        allow_http_fallback = bool(http_url) and not is_encrypted_media
        media_urls = (
            [cached_path]
@@ -1563,13 +1855,35 @@ class MatrixAdapter(BasePlatformAdapter):
            "Matrix: invited to %s — joining",
            room_id,
        )
+        await self._join_room_by_id(room_id)
+
+    async def _join_room_by_id(self, room_id: str) -> bool:
+        """Join a room by ID and refresh local caches on success."""
+        if not room_id:
+            return False
+        if room_id in self._joined_rooms:
+            return True
        try:
            await self._client.join_room(RoomID(room_id))
            self._joined_rooms.add(room_id)
            logger.info("Matrix: joined %s", room_id)
            await self._refresh_dm_cache()
+            return True
        except Exception as exc:
            logger.warning("Matrix: error joining %s: %s", room_id, exc)
+            return False
+
+    async def _join_pending_invites(self, sync_data: Dict[str, Any]) -> None:
+        """Join rooms still present in rooms.invite after sync processing."""
+        rooms = sync_data.get("rooms", {}) if isinstance(sync_data, dict) else {}
+        invites = rooms.get("invite", {})
+        if not isinstance(invites, dict):
+            return
+        for room_id in invites:
+            if room_id in self._joined_rooms:
+                continue
+            logger.info("Matrix: reconciling pending invite for %s", room_id)
+            await self._join_room_by_id(str(room_id))

    # ------------------------------------------------------------------
    # Reactions (send, receive, processing lifecycle)
@@ -1654,7 +1968,7 @@ class MatrixAdapter(BasePlatformAdapter):
    async def _on_reaction(self, event: Any) -> None:
        """Handle incoming reaction events."""
        sender = str(getattr(event, "sender", ""))
-        if sender == self._user_id:
+        if self._is_self_sender(sender):
            return
        event_id = str(getattr(event, "event_id", ""))
        if self._is_duplicate_event(event_id):
@@ -1684,6 +1998,51 @@ class MatrixAdapter(BasePlatformAdapter):
                room_id,
            )

+            # Check if this reaction resolves a pending approval prompt.
+            prompt = self._approval_prompts_by_event.get(reacts_to)
+            if prompt and not prompt.resolved:
+                if room_id != prompt.chat_id:
+                    return
+                if self._allowed_user_ids and sender not in self._allowed_user_ids:
+                    logger.info(
+                        "Matrix: ignoring approval reaction from unauthorized user %s on %s",
+                        sender, reacts_to,
+                    )
+                    return
+                choice = self._approval_reaction_map.get(key)
+                if not choice:
+                    return
+                try:
+                    from tools.approval import resolve_gateway_approval
+
+                    count = resolve_gateway_approval(prompt.session_key, choice)
+                    if count:
+                        prompt.resolved = True
+                        self._approval_prompts_by_event.pop(reacts_to, None)
+                        self._approval_prompt_by_session.pop(prompt.session_key, None)
+                        logger.info(
+                            "Matrix reaction resolved %d approval(s) for session %s "
+                            "(choice=%s, user=%s)",
+                            count, prompt.session_key, choice, sender,
+                        )
+                        # Redact bot's seed reactions, leaving only the user's
+                        await self._redact_bot_approval_reactions(room_id, prompt)
+                except Exception as exc:
+                    logger.error("Failed to resolve gateway approval from Matrix reaction: %s", exc)
+
+    async def _redact_bot_approval_reactions(
+        self,
+        room_id: str,
+        prompt: "_MatrixApprovalPrompt",
+    ) -> None:
+        """Redact the bot's seed ✅/❎ reactions, leaving only the user's reaction."""
+        for emoji, evt_id in prompt.bot_reaction_events.items():
+            try:
+                await self.redact_message(room_id, evt_id, "approval resolved")
+                logger.debug("Matrix: redacted bot reaction %s (%s)", emoji, evt_id)
+            except Exception as exc:
+                logger.debug("Matrix: failed to redact bot reaction %s: %s", emoji, exc)
+
    # ------------------------------------------------------------------
    # Text message aggregation (handles Matrix client-side splits)
    # ------------------------------------------------------------------
@@ -1909,11 +2268,7 @@ class MatrixAdapter(BasePlatformAdapter):
        if not self._client or not text:
            return SendResult(success=False, error="No client or empty text")

-        msg_content: Dict[str, Any] = {"msgtype": msgtype, "body": text}
-        html = self._markdown_to_html(text)
-        if html and html != text:
-            msg_content["format"] = "org.matrix.custom.html"
-            msg_content["formatted_body"] = html
+        msg_content = self._build_text_message_content(text, msgtype=msgtype)

        try:
            event_id = await self._client.send_message_event(
@@ -1976,6 +2331,77 @@ class MatrixAdapter(BasePlatformAdapter):
    # Mention detection helpers
    # ------------------------------------------------------------------

+    def _build_text_message_content(self, text: str, msgtype: str = "m.text") -> Dict[str, Any]:
+        """Build Matrix text content with HTML and outbound mention metadata."""
+        msg_content: Dict[str, Any] = {"msgtype": msgtype, "body": text}
+        mention_user_ids = self._extract_outbound_mentions(text)
+        if mention_user_ids:
+            msg_content["m.mentions"] = {"user_ids": mention_user_ids}
+
+        html_source = self._inject_outbound_mention_links(text)
+        html = self._markdown_to_html(html_source)
+        if html and html != text:
+            msg_content["format"] = "org.matrix.custom.html"
+            msg_content["formatted_body"] = html
+
+        return msg_content
+
+    def _extract_outbound_mentions(self, text: str) -> list[str]:
+        """Return unique Matrix user IDs mentioned in outbound text."""
+        protected, _ = self._protect_outbound_mention_regions(text)
+        seen: Set[str] = set()
+        mentions: list[str] = []
+        for match in _OUTBOUND_MENTION_RE.finditer(protected):
+            user_id = match.group(1)
+            if user_id not in seen:
+                seen.add(user_id)
+                mentions.append(user_id)
+        return mentions
+
+    def _inject_outbound_mention_links(self, text: str) -> str:
+        """Wrap outbound Matrix mentions in markdown links outside code spans."""
+        if not text:
+            return text
+
+        protected, placeholders = self._protect_outbound_mention_regions(text)
+
+        linked = _OUTBOUND_MENTION_RE.sub(
+            lambda match: f"[{match.group(1)}](https://matrix.to/#/{match.group(1)})",
+            protected,
+        )
+
+        for idx, original in enumerate(placeholders):
+            linked = linked.replace(f"\x00MENTION_PROTECTED{idx}\x00", original)
+
+        return linked
+
+    def _protect_outbound_mention_regions(self, text: str) -> tuple[str, list[str]]:
+        """Protect markdown regions where outbound mentions should stay literal."""
+        placeholders: list[str] = []
+
+        def _protect(fragment: str) -> str:
+            idx = len(placeholders)
+            placeholders.append(fragment)
+            return f"\x00MENTION_PROTECTED{idx}\x00"
+
+        protected = re.sub(
+            r"```[\s\S]*?```",
+            lambda match: _protect(match.group(0)),
+            text or "",
+        )
+        protected = re.sub(
+            r"`[^`\n]+`",
+            lambda match: _protect(match.group(0)),
+            protected,
+        )
+        protected = re.sub(
+            r"\[[^\]]+\]\([^)]+\)",
+            lambda match: _protect(match.group(0)),
+            protected,
+        )
+
+        return protected, placeholders
+
    def _is_bot_mentioned(
        self,
        body: str,
@@ -2010,13 +2436,33 @@ class MatrixAdapter(BasePlatformAdapter):
        return False

    def _strip_mention(self, body: str) -> str:
-        """Strip the bot's full MXID (``@user:server``) from *body*.
+        """Remove explicit bot mentions from message body.

-        The bare localpart is intentionally *not* stripped — it would
-        mangle file paths like ``/home/hermes/media/file.png``.
+        Important: only strip explicit mention tokens (``@user:server`` or
+        ``@localpart``). Do NOT strip bare words matching the bot localpart,
+        otherwise normal phrases like "Hermes Agent" become "Agent".
        """
+        if not body:
+            return ""
+
+        # Strip explicit full MXID mentions.
        if self._user_id:
            body = body.replace(self._user_id, "")
+
+        # Strip explicit @localpart mentions only (not bare localpart words).
+        if self._user_id and ":" in self._user_id:
+            localpart = self._user_id.split(":")[0].lstrip("@")
+            if localpart:
+                body = re.sub(
+                    r'(?<![\w])@' + re.escape(localpart) + r'\b',
+                    '',
+                    body,
+                    flags=re.IGNORECASE,
+                )
+
+        # Normalize spacing after mention removal.
+        body = re.sub(r'[ \t]{2,}', ' ', body)
+        body = re.sub(r'\s+([,.;:!?])', r'\1', body)
        return body.strip()

    async def _get_display_name(self, room_id: str, user_id: str) -> str:
@@ -412,7 +412,6 @@ class MattermostAdapter(BasePlatformAdapter):

        import aiohttp

-        last_exc = None
        file_data = None
        ct = "application/octet-stream"
        fname = url.rsplit("/", 1)[-1].split("?")[0] or f"{kind}.png"
@@ -1957,7 +1957,7 @@ class QQAdapter(BasePlatformAdapter):
            self, openid: str, content: str, reply_to: Optional[str] = None
    ) -> SendResult:
        """Send text to a C2C user via REST API."""
-        msg_seq = self._next_msg_seq(reply_to or openid)
+        self._next_msg_seq(reply_to or openid)
        body = self._build_text_body(content, reply_to)
        if reply_to:
            body["msg_id"] = reply_to
@@ -1970,7 +1970,7 @@ class QQAdapter(BasePlatformAdapter):
            self, group_openid: str, content: str, reply_to: Optional[str] = None
    ) -> SendResult:
        """Send text to a group via REST API."""
-        msg_seq = self._next_msg_seq(reply_to or group_openid)
+        self._next_msg_seq(reply_to or group_openid)
        body = self._build_text_body(content, reply_to)
        if reply_to:
            body["msg_id"] = reply_to
@@ -2135,11 +2135,6 @@ class QQAdapter(BasePlatformAdapter):

            # Route
            chat_type = self._guess_chat_type(chat_id)
-            target_path = (
-                f"/v2/users/{chat_id}/files"
-                if chat_type == "c2c"
-                else f"/v2/groups/{chat_id}/files"
-            )

            if chat_type == "guild":
                # Guild channels don't support native media upload in the same way
@@ -31,6 +31,7 @@ from gateway.platforms.base import (
    BasePlatformAdapter,
    MessageEvent,
    MessageType,
+    ProcessingOutcome,
    SendResult,
    cache_image_from_bytes,
    cache_audio_from_bytes,
@@ -162,6 +163,10 @@ class SignalAdapter(BasePlatformAdapter):
    """Signal messenger adapter using signal-cli HTTP daemon."""

    platform = Platform.SIGNAL
+    # Signal has no real edit API for already-sent messages. Mark it explicitly
+    # so streaming suppresses the visible cursor instead of leaving a stale tofu
+    # square behind in chat clients when edit attempts fail.
+    SUPPORTS_MESSAGE_EDITING = False

    def __init__(self, config: PlatformConfig):
        super().__init__(config, Platform.SIGNAL)
@@ -488,6 +493,11 @@ class SignalAdapter(BasePlatformAdapter):
        if text and mentions:
            text = _render_mentions(text, mentions)

+        # Extract quote (reply-to) context from Signal dataMessage
+        quote_data = data_message.get("quote") or {}
+        reply_to_id = str(quote_data.get("id")) if quote_data.get("id") else None
+        reply_to_text = quote_data.get("text")
+
        # Process attachments
        attachments_data = data_message.get("attachments", [])
        media_urls = []
@@ -541,7 +551,9 @@ class SignalAdapter(BasePlatformAdapter):
        else:
            timestamp = datetime.now(tz=timezone.utc)

-        # Build and dispatch event
+        # Build and dispatch event.
+        # Store raw envelope data in raw_message so on_processing_start/complete
+        # can extract targetAuthor + targetTimestamp for sendReaction.
        event = MessageEvent(
            source=source,
            text=text or "",
@@ -549,6 +561,9 @@ class SignalAdapter(BasePlatformAdapter):
            media_urls=media_urls,
            media_types=media_types,
            timestamp=timestamp,
+            raw_message={"sender": sender, "timestamp_ms": ts_ms},
+            reply_to_message_id=reply_to_id,
+            reply_to_text=reply_to_text,
        )

        logger.debug("Signal: message from %s in %s: %s",
@@ -707,6 +722,159 @@ class SignalAdapter(BasePlatformAdapter):
                logger.debug("Signal RPC %s failed: %s", method, e)
            return None

+    # ------------------------------------------------------------------
+    # Formatting — markdown → Signal body ranges
+    # ------------------------------------------------------------------
+
+    @staticmethod
+    def _markdown_to_signal(text: str) -> tuple:
+        """Convert markdown to plain text + Signal textStyles list.
+
+        Signal doesn't render markdown.  Instead it uses ``bodyRanges``
+        (exposed by signal-cli as ``textStyle`` / ``textStyles`` params)
+        with the format ``start:length:STYLE``.
+
+        Positions are measured in **UTF-16 code units** (not Python code
+        points) because that's what the Signal protocol uses.
+
+        Supported styles: BOLD, ITALIC, STRIKETHROUGH, MONOSPACE.
+        (Signal's SPOILER style is not currently mapped — no standard
+        markdown syntax for it; would need ``||spoiler||`` parsing.)
+
+        Returns ``(plain_text, styles_list)`` where *styles_list* may be
+        empty if there's nothing to format.
+        """
+        import re
+
+        def _utf16_len(s: str) -> int:
+            """Length of *s* in UTF-16 code units."""
+            return len(s.encode("utf-16-le")) // 2
+
+        # Pre-process: normalize whitespace before any position tracking
+        # so later operations don't invalidate recorded offsets.
+        text = re.sub(r"\n{3,}", "\n\n", text)
+        text = text.strip()
+
+        styles: list = []
+
+        # --- Phase 1: fenced code blocks  ```...``` → MONOSPACE ---
+        _CB = re.compile(r"```[a-zA-Z0-9_+-]*\n?(.*?)```", re.DOTALL)
+        while m := _CB.search(text):
+            inner = m.group(1).rstrip("\n")
+            start = m.start()
+            text = text[: m.start()] + inner + text[m.end() :]
+            styles.append((start, len(inner), "MONOSPACE"))
+
+        # --- Phase 2: heading markers  # Foo → Foo (BOLD) ---
+        _HEADING = re.compile(r"^#{1,6}\s+", re.MULTILINE)
+        new_text = ""
+        last_end = 0
+        for m in _HEADING.finditer(text):
+            new_text += text[last_end : m.start()]
+            last_end = m.end()
+            eol = text.find("\n", m.end())
+            if eol == -1:
+                eol = len(text)
+            heading_text = text[m.end() : eol]
+            start = len(new_text)
+            new_text += heading_text
+            styles.append((start, len(heading_text), "BOLD"))
+            last_end = eol
+        new_text += text[last_end:]
+        text = new_text
+
+        # --- Phase 3: inline patterns (single-pass to avoid offset drift) ---
+        # The old code processed each pattern sequentially, stripping markers
+        # and recording positions per-pass.  Later passes shifted text without
+        # adjusting earlier positions → bold/italic landed mid-word.
+        #
+        # Fix: collect ALL non-overlapping matches first, then strip every
+        # marker in one pass so positions are computed against the final text.
+        _PATTERNS = [
+            (re.compile(r"\*\*(.+?)\*\*", re.DOTALL), "BOLD"),
+            (re.compile(r"__(.+?)__", re.DOTALL), "BOLD"),
+            (re.compile(r"~~(.+?)~~", re.DOTALL), "STRIKETHROUGH"),
+            (re.compile(r"`(.+?)`"), "MONOSPACE"),
+            (re.compile(r"(?<!\*)\*(?!\*| )(.+?)(?<!\*)\*(?!\*)"), "ITALIC"),
+            (re.compile(r"(?<!\w)_(?!_)(.+?)(?<!_)_(?!\w)"), "ITALIC"),
+        ]
+
+        # Collect all non-overlapping matches (earlier patterns win ties).
+        all_matches: list = []  # (start, end, g1_start, g1_end, style)
+        occupied: list = []     # (start, end) intervals already claimed
+        for pat, style in _PATTERNS:
+            for m in pat.finditer(text):
+                ms, me = m.start(), m.end()
+                if not any(ms < oe and me > os for os, oe in occupied):
+                    all_matches.append((ms, me, m.start(1), m.end(1), style))
+                    occupied.append((ms, me))
+        all_matches.sort()
+
+        # Build removal list so we can adjust Phase 1/2 styles.
+        # Each match removes its prefix markers (start..g1_start) and
+        # suffix markers (g1_end..end).
+        removals: list = []  # (position, length) sorted
+        for ms, me, g1s, g1e, _ in all_matches:
+            if g1s > ms:
+                removals.append((ms, g1s - ms))
+            if me > g1e:
+                removals.append((g1e, me - g1e))
+        removals.sort()
+
+        # Adjust Phase 1/2 styles for characters about to be removed.
+        def _adj(pos: int) -> int:
+            shift = 0
+            for rp, rl in removals:
+                if rp < pos:
+                    shift += min(rl, pos - rp)
+                else:
+                    break
+            return pos - shift
+
+        adjusted_prior: list = []
+        for s, l, st in styles:
+            ns = _adj(s)
+            ne = _adj(s + l)
+            if ne > ns:
+                adjusted_prior.append((ns, ne - ns, st))
+
+        # Strip all inline markers in one pass → positions are correct.
+        result = ""
+        last_end = 0
+        inline_styles: list = []
+        for ms, me, g1s, g1e, sty in all_matches:
+            result += text[last_end:ms]
+            pos = len(result)
+            inner = text[g1s:g1e]
+            result += inner
+            inline_styles.append((pos, len(inner), sty))
+            last_end = me
+        result += text[last_end:]
+        text = result
+
+        styles = adjusted_prior + inline_styles
+
+        # Convert code-point offsets → UTF-16 code-unit offsets
+        style_strings = []
+        for cp_start, cp_len, stype in sorted(styles):
+            # Safety: skip any out-of-bounds styles
+            if cp_start < 0 or cp_start + cp_len > len(text):
+                continue
+            u16_start = _utf16_len(text[:cp_start])
+            u16_len = _utf16_len(text[cp_start : cp_start + cp_len])
+            style_strings.append(f"{u16_start}:{u16_len}:{stype}")
+
+        return text, style_strings
+
+    def format_message(self, content: str) -> str:
+        """Strip markdown for plain-text fallback (used by base class).
+
+        The actual rich formatting happens in send() via _markdown_to_signal().
+        """
+        # This is only called if someone uses the base-class send path.
+        # Our send() override bypasses this entirely.
+        return content
+
    # ------------------------------------------------------------------
    # Sending
    # ------------------------------------------------------------------
@@ -718,14 +886,22 @@ class SignalAdapter(BasePlatformAdapter):
        reply_to: Optional[str] = None,
        metadata: Optional[Dict[str, Any]] = None,
    ) -> SendResult:
-        """Send a text message."""
+        """Send a text message with native Signal formatting."""
        await self._stop_typing_indicator(chat_id)

+        plain_text, text_styles = self._markdown_to_signal(content)
+
        params: Dict[str, Any] = {
            "account": self.account,
-            "message": content,
+            "message": plain_text,
        }

+        if text_styles:
+            if len(text_styles) == 1:
+                params["textStyle"] = text_styles[0]
+            else:
+                params["textStyles"] = text_styles
+
        if chat_id.startswith("group:"):
            params["groupId"] = chat_id[6:]
        else:
@@ -735,11 +911,10 @@ class SignalAdapter(BasePlatformAdapter):

        if result is not None:
            self._track_sent_timestamp(result)
-            # Use the timestamp from the RPC result as a pseudo message_id.
-            # Signal doesn't have real message IDs, but the stream consumer
-            # needs a truthy value to follow its edit→fallback path correctly.
-            _msg_id = str(result.get("timestamp", "")) if isinstance(result, dict) else None
-            return SendResult(success=True, message_id=_msg_id or None)
+            # Signal has no editable message identifier. Returning None keeps the
+            # stream consumer on the non-edit fallback path instead of pretending
+            # future edits can remove an in-progress cursor from the chat thread.
+            return SendResult(success=True, message_id=None)
        return SendResult(success=False, error="RPC send failed")

    def _track_sent_timestamp(self, rpc_result) -> None:
@@ -963,6 +1138,110 @@ class SignalAdapter(BasePlatformAdapter):
        _keep_typing finally block to clean up platform-level typing tasks."""
        await self._stop_typing_indicator(chat_id)

+    # ------------------------------------------------------------------
+    # Reactions
+    # ------------------------------------------------------------------
+
+    async def send_reaction(
+        self,
+        chat_id: str,
+        emoji: str,
+        target_author: str,
+        target_timestamp: int,
+    ) -> bool:
+        """Send a reaction emoji to a specific message via signal-cli RPC.
+
+        Args:
+            chat_id: The chat (phone number or "group:<id>")
+            emoji: Reaction emoji string (e.g. "👀", "✅")
+            target_author: Phone number / UUID of the message author
+            target_timestamp: Signal timestamp (ms) of the message to react to
+        """
+        params: Dict[str, Any] = {
+            "account": self.account,
+            "emoji": emoji,
+            "targetAuthor": target_author,
+            "targetTimestamp": target_timestamp,
+        }
+
+        if chat_id.startswith("group:"):
+            params["groupId"] = chat_id[6:]
+        else:
+            params["recipient"] = [chat_id]
+
+        result = await self._rpc("sendReaction", params)
+        if result is not None:
+            return True
+        logger.debug("Signal: sendReaction failed (chat=%s, emoji=%s)", chat_id[:20], emoji)
+        return False
+
+    async def remove_reaction(
+        self,
+        chat_id: str,
+        target_author: str,
+        target_timestamp: int,
+    ) -> bool:
+        """Remove a reaction by sending an empty-string emoji."""
+        params: Dict[str, Any] = {
+            "account": self.account,
+            "emoji": "",
+            "targetAuthor": target_author,
+            "targetTimestamp": target_timestamp,
+            "remove": True,
+        }
+
+        if chat_id.startswith("group:"):
+            params["groupId"] = chat_id[6:]
+        else:
+            params["recipient"] = [chat_id]
+
+        result = await self._rpc("sendReaction", params)
+        return result is not None
+
+    # ------------------------------------------------------------------
+    # Processing Lifecycle Hooks (reactions as progress indicators)
+    # ------------------------------------------------------------------
+
+    def _extract_reaction_target(self, event: MessageEvent) -> Optional[tuple]:
+        """Extract (target_author, target_timestamp) from a MessageEvent.
+
+        Returns None if the event doesn't carry the raw Signal envelope data
+        needed for sendReaction.
+        """
+        raw = event.raw_message
+        if not isinstance(raw, dict):
+            return None
+        author = raw.get("sender")
+        ts = raw.get("timestamp_ms")
+        if not author or not ts:
+            return None
+        return (author, ts)
+
+    async def on_processing_start(self, event: MessageEvent) -> None:
+        """React with 👀 when processing begins."""
+        target = self._extract_reaction_target(event)
+        if target:
+            await self.send_reaction(event.source.chat_id, "👀", *target)
+
+    async def on_processing_complete(self, event: MessageEvent, outcome: "ProcessingOutcome") -> None:
+        """Swap the 👀 reaction for ✅ (success) or ❌ (failure).
+
+        On CANCELLED we leave the 👀 in place — no terminal outcome means
+        the reaction should keep reflecting "in progress" (matches Telegram).
+        """
+        if outcome == ProcessingOutcome.CANCELLED:
+            return
+        target = self._extract_reaction_target(event)
+        if not target:
+            return
+        chat_id = event.source.chat_id
+        # Remove the in-progress reaction, then add the final one
+        await self.remove_reaction(chat_id, *target)
+        if outcome == ProcessingOutcome.SUCCESS:
+            await self.send_reaction(chat_id, "✅", *target)
+        elif outcome == ProcessingOutcome.FAILURE:
+            await self.send_reaction(chat_id, "❌", *target)
+
    # ------------------------------------------------------------------
    # Chat Info
    # ------------------------------------------------------------------
@@ -84,6 +84,7 @@ from gateway.platforms.telegram_network import (
    discover_fallback_ips,
    parse_fallback_ip_env,
 )
+from utils import atomic_replace


 def check_telegram_requirements() -> bool:
@@ -122,12 +123,12 @@ def _strip_mdv2(text: str) -> str:


 # ---------------------------------------------------------------------------
-# Markdown table → code block conversion
+# Markdown table → Telegram-friendly row groups
 # ---------------------------------------------------------------------------
 # Telegram's MarkdownV2 has no table syntax — '|' is just an escaped literal,
 # so pipe tables render as noisy backslash-pipe text with no alignment.
-# Wrapping the table in a fenced code block makes Telegram render it as
-# monospace preformatted text with columns intact.
+# Reformating each row into a bold heading plus bullet list keeps the content
+# readable on mobile clients while preserving the source data.

 # Matches a GFM table delimiter row: optional outer pipes, cells containing
 # only dashes (with optional leading/trailing colons for alignment) separated
@@ -144,13 +145,49 @@ def _is_table_row(line: str) -> bool:
    return bool(stripped) and '|' in stripped


+def _split_markdown_table_row(line: str) -> list[str]:
+    """Split a simple GFM table row into stripped cell values."""
+    stripped = line.strip()
+    if stripped.startswith("|"):
+        stripped = stripped[1:]
+    if stripped.endswith("|"):
+        stripped = stripped[:-1]
+    return [cell.strip() for cell in stripped.split("|")]
+
+
+def _render_table_block_for_telegram(table_block: list[str]) -> str:
+    """Render a detected GFM table as Telegram-friendly row groups."""
+    if len(table_block) < 3:
+        return "\n".join(table_block)
+
+    headers = _split_markdown_table_row(table_block[0])
+    if len(headers) < 2:
+        return "\n".join(table_block)
+
+    rendered_rows: list[str] = []
+    for index, row in enumerate(table_block[2:], start=1):
+        cells = _split_markdown_table_row(row)
+        if len(cells) < len(headers):
+            cells.extend([""] * (len(headers) - len(cells)))
+        elif len(cells) > len(headers):
+            cells = cells[: len(headers)]
+
+        heading = next((cell for cell in cells if cell), f"Row {index}")
+        rendered_rows.append(f"**{heading}**")
+        rendered_rows.extend(
+            f"• {header}: {value}" for header, value in zip(headers, cells)
+        )
+
+    return "\n\n".join(rendered_rows)
+
+
 def _wrap_markdown_tables(text: str) -> str:
-    """Wrap GFM-style pipe tables in ``` fences so Telegram renders them.
+    """Rewrite GFM-style pipe tables into Telegram-friendly bullet groups.

    Detected by a row containing '|' immediately followed by a delimiter
    row matching :data:`_TABLE_SEPARATOR_RE`.  Subsequent pipe-containing
-    non-blank lines are consumed as the table body and included in the
-    wrapped block.  Tables inside existing fenced code blocks are left
+    non-blank lines are consumed as the table body and rewritten as
+    per-row bullet groups. Tables inside existing fenced code blocks are left
    alone.
    """
    if '|' not in text or '-' not in text:
@@ -187,9 +224,7 @@ def _wrap_markdown_tables(text: str) -> str:
            while j < len(lines) and _is_table_row(lines[j]):
                table_block.append(lines[j])
                j += 1
-            out.append('```')
-            out.extend(table_block)
-            out.append('```')
+            out.append(_render_table_block_for_telegram(table_block))
            i = j
            continue

@@ -334,6 +369,49 @@ class TelegramAdapter(BasePlatformAdapter):
            return {"link_preview_options": LinkPreviewOptions(is_disabled=True)}
        return {"disable_web_page_preview": True}

+    async def _drain_polling_connections(self) -> None:
+        """Reset the httpx connection pool used for getUpdates polling.
+
+        Network errors (especially through proxies like sing-box) can leave
+        httpx connections in a half-closed state that still occupy pool slots.
+        After enough reconnect cycles the pool fills up entirely, causing
+        ``Pool timeout: All connections in the connection pool are occupied.``
+
+        We reset ONLY ``_request[0]`` (the getUpdates request) — the general
+        request (``_request[1]``) is left untouched so concurrent
+        ``send_message`` / ``edit_message`` calls are never interrupted.
+
+        Implementation note: accesses ``Bot._request[0]`` which is the
+        get-updates ``BaseRequest`` in the PTB 22.x internal tuple
+        ``(get_updates_request, general_request)``.  There is no public
+        accessor for the polling request; review if upgrading to PTB 23+.
+        """
+        if not (self._app and self._app.bot):
+            return
+        try:
+            # PTB 22.x: _request is a (get_updates, general) tuple;
+            # no public accessor exists for the polling request.
+            polling_req = self._app.bot._request[0]  # noqa: SLF001
+        except Exception:
+            return
+        try:
+            await polling_req.shutdown()
+        except Exception:
+            logger.debug(
+                "[%s] Polling request shutdown failed (non-fatal)",
+                self.name, exc_info=True,
+            )
+        try:
+            await polling_req.initialize()
+            logger.debug(
+                "[%s] Polling request pool drained before reconnect", self.name
+            )
+        except Exception:
+            logger.debug(
+                "[%s] Polling request re-initialize failed (non-fatal)",
+                self.name, exc_info=True,
+            )
+
    async def _handle_polling_network_error(self, error: Exception) -> None:
        """Reconnect polling after a transient network interruption.

@@ -379,6 +457,8 @@ class TelegramAdapter(BasePlatformAdapter):
        except Exception:
            pass

+        await self._drain_polling_connections()
+
        try:
            await self._app.updater.start_polling(
                allowed_updates=Update.ALL_TYPES,
@@ -426,6 +506,7 @@ class TelegramAdapter(BasePlatformAdapter):
            except Exception:
                pass
            await asyncio.sleep(RETRY_DELAY)
+            await self._drain_polling_connections()
            try:
                await self._app.updater.start_polling(
                    allowed_updates=Update.ALL_TYPES,
@@ -554,7 +635,7 @@ class TelegramAdapter(BasePlatformAdapter):
                        _yaml.dump(config, f, default_flow_style=False, sort_keys=False)
                        f.flush()
                        os.fsync(f.fileno())
-                    os.replace(tmp_path, config_path)
+                    atomic_replace(tmp_path, config_path)
                except BaseException:
                    try:
                        os.unlink(tmp_path)
@@ -1209,6 +1290,31 @@ class TelegramAdapter(BasePlatformAdapter):
            )
            return SendResult(success=False, error=str(e))

+    async def delete_message(self, chat_id: str, message_id: str) -> bool:
+        """Delete a previously sent Telegram message.
+
+        Used by the stream consumer's fresh-final cleanup path (ported
+        from openclaw/openclaw#72038) to remove long-lived preview
+        messages after sending the completed reply as a fresh message.
+        Telegram's Bot API ``deleteMessage`` works for bot-posted
+        messages in the last 48 hours.  Failures are non-fatal — the
+        caller leaves the preview in place and logs at debug level.
+        """
+        if not self._bot:
+            return False
+        try:
+            await self._bot.delete_message(
+                chat_id=int(chat_id),
+                message_id=int(message_id),
+            )
+            return True
+        except Exception as e:
+            logger.debug(
+                "[%s] Failed to delete Telegram message %s: %s",
+                self.name, message_id, e,
+            )
+            return False
+
    async def send_update_prompt(
        self, chat_id: str, prompt: str, default: str = "",
        session_key: str = "",
@@ -2055,10 +2161,8 @@ class TelegramAdapter(BasePlatformAdapter):

        text = content

-        # 0) Pre-wrap GFM-style pipe tables in ``` fences.  Telegram can't
-        #    render tables natively, but fenced code blocks render as
-        #    monospace preformatted text with columns intact.  The wrapped
-        #    tables then flow through step (1) below as protected regions.
+        # 0) Rewrite GFM-style pipe tables into Telegram-friendly row groups
+        #    before the normal MarkdownV2 conversions run.
        text = _wrap_markdown_tables(text)

        # 1) Protect fenced code blocks (``` ... ```)
@@ -2328,6 +2432,26 @@ class TelegramAdapter(BasePlatformAdapter):
                    user = getattr(entity, "user", None)
                    if user and getattr(user, "id", None) == bot_id:
                        return True
+                elif entity_type == "bot_command" and expected:
+                    # Telegram's official group-disambiguation form for slash
+                    # commands (``/cmd@botname``) is emitted as a single
+                    # ``bot_command`` entity covering the whole span — there
+                    # is no accompanying ``mention`` entity. Treat it as a
+                    # direct address to this bot when the ``@botname`` suffix
+                    # matches. This is the form Telegram's own command menu
+                    # autocomplete produces in groups, so dropping it at the
+                    # mention gate would break /new, /reset, /help, ... for
+                    # every group that has ``require_mention`` enabled (#15415).
+                    offset = int(getattr(entity, "offset", -1))
+                    length = int(getattr(entity, "length", 0))
+                    if offset < 0 or length <= 0:
+                        continue
+                    command_text = source_text[offset:offset + length]
+                    at_index = command_text.find("@")
+                    if at_index < 0:
+                        continue
+                    if command_text[at_index:].strip().lower() == expected:
+                        return True
        return False

    def _message_matches_mention_patterns(self, message: Message) -> bool:
@@ -89,8 +89,21 @@ MAX_CONSECUTIVE_FAILURES = 3
 RETRY_DELAY_SECONDS = 2
 BACKOFF_DELAY_SECONDS = 30
 SESSION_EXPIRED_ERRCODE = -14
+RATE_LIMIT_ERRCODE = -2  # iLink frequency limit — backoff and retry
 MESSAGE_DEDUP_TTL_SECONDS = 300

+
+def _is_stale_session_ret(
+    ret: "Optional[int]", errcode: "Optional[int]", errmsg: "Optional[str]",
+) -> bool:
+    """True when iLink returns ret=-2 / errcode=-2 with 'unknown error',
+    which is a stale-session signal (same as errcode=-14) rather than
+    a genuine rate limit."""
+    if ret != RATE_LIMIT_ERRCODE and errcode != RATE_LIMIT_ERRCODE:
+        return False
+    return (errmsg or "").lower() == "unknown error"
+
+
 MEDIA_IMAGE = 1
 MEDIA_VIDEO = 2
 MEDIA_FILE = 3
@@ -1113,7 +1126,7 @@ async def qr_login(
 class WeixinAdapter(BasePlatformAdapter):
    """Native Hermes adapter for Weixin personal accounts."""

-    MAX_MESSAGE_LENGTH = 4000
+    MAX_MESSAGE_LENGTH = 2000

    # WeChat does not support editing sent messages — streaming must use the
    # fallback "send-final-only" path so the cursor (▉) is never left visible.
@@ -1138,10 +1151,10 @@ class WeixinAdapter(BasePlatformAdapter):
            extra.get("cdn_base_url") or os.getenv("WEIXIN_CDN_BASE_URL", WEIXIN_CDN_BASE_URL)
        ).strip().rstrip("/")
        self._send_chunk_delay_seconds = float(
-            extra.get("send_chunk_delay_seconds") or os.getenv("WEIXIN_SEND_CHUNK_DELAY_SECONDS", "0.35")
+            extra.get("send_chunk_delay_seconds") or os.getenv("WEIXIN_SEND_CHUNK_DELAY_SECONDS", "1.5")
        )
        self._send_chunk_retries = int(
-            extra.get("send_chunk_retries") or os.getenv("WEIXIN_SEND_CHUNK_RETRIES", "2")
+            extra.get("send_chunk_retries") or os.getenv("WEIXIN_SEND_CHUNK_RETRIES", "4")
        )
        self._send_chunk_retry_delay_seconds = float(
            extra.get("send_chunk_retry_delay_seconds")
@@ -1209,6 +1222,17 @@ class WeixinAdapter(BasePlatformAdapter):
        self._mark_connected()
        _LIVE_ADAPTERS[self._token] = self
        logger.info("[%s] Connected account=%s base=%s", self.name, _safe_id(self._account_id), self._base_url)
+        if self._group_policy != "disabled":
+            logger.warning(
+                "[%s] WEIXIN_GROUP_POLICY=%s is set, but QR-login connects an iLink bot "
+                "identity (e.g. ...@im.bot) which typically cannot be invited into ordinary "
+                "WeChat groups. iLink usually does not deliver ordinary-group events for "
+                "these accounts, so group messages may never reach Hermes regardless of this "
+                "policy. If group delivery doesn't work, the limitation is on the iLink side, "
+                "not in Hermes.",
+                self.name,
+                self._group_policy,
+            )
        return True

    async def disconnect(self) -> None:
@@ -1253,7 +1277,8 @@ class WeixinAdapter(BasePlatformAdapter):
                ret = response.get("ret", 0)
                errcode = response.get("errcode", 0)
                if ret not in (0, None) or errcode not in (0, None):
-                    if ret == SESSION_EXPIRED_ERRCODE or errcode == SESSION_EXPIRED_ERRCODE:
+                    if (ret == SESSION_EXPIRED_ERRCODE or errcode == SESSION_EXPIRED_ERRCODE
+                            or _is_stale_session_ret(ret, errcode, response.get("errmsg"))):
                        logger.error("[%s] Session expired; pausing for 10 minutes", self.name)
                        await asyncio.sleep(600)
                        consecutive_failures = 0
@@ -1518,6 +1543,7 @@ class WeixinAdapter(BasePlatformAdapter):
                        is_session_expired = (
                            ret == SESSION_EXPIRED_ERRCODE
                            or errcode == SESSION_EXPIRED_ERRCODE
+                            or _is_stale_session_ret(ret, errcode, resp.get("errmsg"))
                        )
                        # Session expired — strip token and retry once
                        if is_session_expired and not retried_without_token and context_token:
@@ -1531,6 +1557,28 @@ class WeixinAdapter(BasePlatformAdapter):
                                self.name, _safe_id(chat_id),
                            )
                            continue
+                        # Rate limit (-2) — backoff and retry
+                        is_rate_limited = (
+                            ret == RATE_LIMIT_ERRCODE
+                            or errcode == RATE_LIMIT_ERRCODE
+                        )
+                        if is_rate_limited:
+                            errmsg = resp.get("errmsg") or resp.get("msg") or "rate limited"
+                            # Record the error so we raise a descriptive
+                            # RuntimeError (instead of AssertionError) if the
+                            # loop exhausts with the server still rate-limiting.
+                            last_error = RuntimeError(
+                                f"iLink sendmessage rate limited: ret={ret} errcode={errcode} errmsg={errmsg}"
+                            )
+                            if attempt >= self._send_chunk_retries:
+                                break
+                            wait = self._send_chunk_retry_delay_seconds * 3  # 3x backoff for rate limit
+                            logger.warning(
+                                "[%s] rate limited for %s; backing off %.1fs before retry",
+                                self.name, _safe_id(chat_id), wait,
+                            )
+                            await asyncio.sleep(wait)
+                            continue
                        errmsg = resp.get("errmsg") or resp.get("msg") or "unknown error"
                        raise RuntimeError(
                            f"iLink sendmessage error: ret={ret} errcode={errcode} errmsg={errmsg}"
@@ -0,0 +1,645 @@
+"""
+yuanbao_media.py — 元宝平台媒体处理模块
+
+提供 COS 上传、文件下载、TIM 媒体消息构建等功能。
+移植自 TypeScript 版 media.ts（yuanbao-openclaw-plugin），
+使用 httpx 替代 cos-nodejs-sdk-v5，避免引入额外 SDK 依赖。
+
+COS 上传流程：
+  1. 调用 genUploadInfo 获取临时凭证（tmpSecretId/tmpSecretKey/sessionToken）
+  2. 用临时凭证通过 HMAC-SHA1 签名构建 Authorization 头
+  3. HTTP PUT 上传到 COS
+
+TIM 消息体构建：
+  - buildImageMsgBody() → TIMImageElem
+  - buildFileMsgBody()  → TIMFileElem
+"""
+
+from __future__ import annotations
+
+import hashlib
+import hmac
+import logging
+import os
+import secrets
+import struct
+import time
+import urllib.parse
+from typing import Optional, Any
+
+import httpx
+
+logger = logging.getLogger(__name__)
+
+# ============ 常量 ============
+
+UPLOAD_INFO_PATH = "/api/resource/genUploadInfo"
+DEFAULT_API_DOMAIN = "yuanbao.tencent.com"
+DEFAULT_MAX_SIZE_MB = 50
+
+# COS 加速域名后缀（优先使用全球加速）
+COS_USE_ACCELERATE = True
+
+# ============ 类型映射 ============
+
+# MIME → image_format 数字（TIM 协议字段）
+_MIME_TO_IMAGE_FORMAT: dict[str, int] = {
+    "image/jpeg": 1,
+    "image/jpg": 1,
+    "image/gif": 2,
+    "image/png": 3,
+    "image/bmp": 4,
+    "image/webp": 255,
+    "image/heic": 255,
+    "image/tiff": 255,
+}
+
+# 文件扩展名 → MIME
+_EXT_TO_MIME: dict[str, str] = {
+    ".jpg": "image/jpeg",
+    ".jpeg": "image/jpeg",
+    ".png": "image/png",
+    ".gif": "image/gif",
+    ".webp": "image/webp",
+    ".bmp": "image/bmp",
+    ".heic": "image/heic",
+    ".tiff": "image/tiff",
+    ".ico": "image/x-icon",
+    ".pdf": "application/pdf",
+    ".doc": "application/msword",
+    ".docx": "application/vnd.openxmlformats-officedocument.wordprocessingml.document",
+    ".xls": "application/vnd.ms-excel",
+    ".xlsx": "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet",
+    ".ppt": "application/vnd.ms-powerpoint",
+    ".pptx": "application/vnd.openxmlformats-officedocument.presentationml.presentation",
+    ".txt": "text/plain",
+    ".zip": "application/zip",
+    ".tar": "application/x-tar",
+    ".gz": "application/gzip",
+    ".mp3": "audio/mpeg",
+    ".mp4": "video/mp4",
+    ".wav": "audio/wav",
+    ".ogg": "audio/ogg",
+    ".webm": "video/webm",
+}
+
+
+# ============ 工具函数 ============
+
+def guess_mime_type(filename: str) -> str:
+    """根据文件扩展名猜测 MIME 类型。"""
+    ext = os.path.splitext(filename)[-1].lower()
+    return _EXT_TO_MIME.get(ext, "application/octet-stream")
+
+
+def is_image(filename: str, mime_type: str = "") -> bool:
+    """判断是否为图片类型。"""
+    if mime_type.startswith("image/"):
+        return True
+    ext = os.path.splitext(filename)[-1].lower()
+    return ext in {".jpg", ".jpeg", ".png", ".gif", ".webp", ".bmp", ".heic", ".tiff", ".ico"}
+
+
+def get_image_format(mime_type: str) -> int:
+    """获取 TIM 图片格式编号。"""
+    return _MIME_TO_IMAGE_FORMAT.get(mime_type.lower(), 255)
+
+
+def md5_hex(data: bytes) -> str:
+    """计算 MD5 十六进制摘要。"""
+    return hashlib.md5(data).hexdigest()
+
+
+def generate_file_id() -> str:
+    """生成随机文件 ID（32 位 hex）。"""
+    return secrets.token_hex(16)
+
+
+
+# ============ 图片尺寸解析（纯 Python，无需 Pillow） ============
+
+def parse_image_size(data: bytes) -> Optional[dict[str, int]]:
+    """
+    解析图片宽高（支持 JPEG/PNG/GIF/WebP），无需第三方依赖。
+    返回 {"width": w, "height": h} 或 None（无法识别）。
+    """
+    return (
+        _parse_png_size(data)
+        or _parse_jpeg_size(data)
+        or _parse_gif_size(data)
+        or _parse_webp_size(data)
+    )
+
+
+def _parse_png_size(buf: bytes) -> Optional[dict[str, int]]:
+    if len(buf) < 24:
+        return None
+    if buf[:4] != b"\x89PNG":
+        return None
+    w = struct.unpack(">I", buf[16:20])[0]
+    h = struct.unpack(">I", buf[20:24])[0]
+    return {"width": w, "height": h}
+
+
+def _parse_jpeg_size(buf: bytes) -> Optional[dict[str, int]]:
+    if len(buf) < 4 or buf[0] != 0xFF or buf[1] != 0xD8:
+        return None
+    i = 2
+    while i < len(buf) - 9:
+        if buf[i] != 0xFF:
+            i += 1
+            continue
+        marker = buf[i + 1]
+        if marker in (0xC0, 0xC2):
+            h = struct.unpack(">H", buf[i + 5: i + 7])[0]
+            w = struct.unpack(">H", buf[i + 7: i + 9])[0]
+            return {"width": w, "height": h}
+        if i + 3 < len(buf):
+            i += 2 + struct.unpack(">H", buf[i + 2: i + 4])[0]
+        else:
+            break
+    return None
+
+
+def _parse_gif_size(buf: bytes) -> Optional[dict[str, int]]:
+    if len(buf) < 10:
+        return None
+    sig = buf[:6].decode("ascii", errors="replace")
+    if sig not in ("GIF87a", "GIF89a"):
+        return None
+    w = struct.unpack("<H", buf[6:8])[0]
+    h = struct.unpack("<H", buf[8:10])[0]
+    return {"width": w, "height": h}
+
+
+def _parse_webp_size(buf: bytes) -> Optional[dict[str, int]]:
+    if len(buf) < 16:
+        return None
+    if buf[:4] != b"RIFF" or buf[8:12] != b"WEBP":
+        return None
+    chunk = buf[12:16].decode("ascii", errors="replace")
+    if chunk == "VP8 ":
+        if len(buf) >= 30 and buf[23] == 0x9D and buf[24] == 0x01 and buf[25] == 0x2A:
+            w = struct.unpack("<H", buf[26:28])[0] & 0x3FFF
+            h = struct.unpack("<H", buf[28:30])[0] & 0x3FFF
+            return {"width": w, "height": h}
+    elif chunk == "VP8L":
+        if len(buf) >= 25 and buf[20] == 0x2F:
+            bits = struct.unpack("<I", buf[21:25])[0]
+            w = (bits & 0x3FFF) + 1
+            h = ((bits >> 14) & 0x3FFF) + 1
+            return {"width": w, "height": h}
+    elif chunk == "VP8X":
+        if len(buf) >= 30:
+            w = (buf[24] | (buf[25] << 8) | (buf[26] << 16)) + 1
+            h = (buf[27] | (buf[28] << 8) | (buf[29] << 16)) + 1
+            return {"width": w, "height": h}
+    return None
+
+
+# ============ URL 下载 ============
+
+async def download_url(
+    url: str,
+    max_size_mb: int = DEFAULT_MAX_SIZE_MB,
+) -> tuple[bytes, str]:
+    """
+    下载 URL 内容，返回 (bytes, content_type)。
+
+    Args:
+        url:          HTTP(S) URL
+        max_size_mb:  最大允许大小（MB），超过则抛出异常
+
+    Returns:
+        (data_bytes, content_type_string)
+
+    Raises:
+        ValueError:  内容超过大小限制
+        httpx.HTTPError: 网络/HTTP 错误
+    """
+    max_bytes = max_size_mb * 1024 * 1024
+    async with httpx.AsyncClient(timeout=30.0, follow_redirects=True) as client:
+        # 先 HEAD 检查大小
+        try:
+            head = await client.head(url)
+            content_length = int(head.headers.get("content-length", 0) or 0)
+            if content_length > 0 and content_length > max_bytes:
+                raise ValueError(
+                    f"文件过大: {content_length / 1024 / 1024:.1f} MB > {max_size_mb} MB"
+                )
+        except httpx.HTTPStatusError:
+            pass  # 部分服务器不支持 HEAD，忽略
+
+        # GET 下载（流式读取，防止超限）
+        async with client.stream("GET", url) as resp:
+            resp.raise_for_status()
+
+            content_type = resp.headers.get("content-type", "").split(";")[0].strip()
+
+            chunks: list[bytes] = []
+            downloaded = 0
+            async for chunk in resp.aiter_bytes(65536):
+                downloaded += len(chunk)
+                if downloaded > max_bytes:
+                    raise ValueError(
+                        f"文件过大: 已超过 {max_size_mb} MB 限制"
+                    )
+                chunks.append(chunk)
+
+        data = b"".join(chunks)
+        return data, content_type
+
+
+# ============ COS 鉴权（HMAC-SHA1） ============
+
+def _cos_sign(
+    method: str,
+    path: str,
+    params: dict[str, str],
+    headers: dict[str, str],
+    secret_id: str,
+    secret_key: str,
+    start_time: Optional[int] = None,
+    expire_seconds: int = 3600,
+) -> str:
+    """
+    构建 COS 请求签名（q-sign-algorithm=sha1 方案）。
+    参考：https://cloud.tencent.com/document/product/436/7778
+
+    Args:
+        method:         HTTP 方法（小写，如 "put"）
+        path:           URL 路径（URL encode 后的小写）
+        params:         URL 查询参数 dict（用于签名）
+        headers:        参与签名的请求头 dict（key 需小写）
+        secret_id:      临时 SecretId（tmpSecretId）
+        secret_key:     临时 SecretKey（tmpSecretKey）
+        start_time:     签名起始 Unix 时间戳（默认 now）
+        expire_seconds: 签名有效期（秒，默认 3600）
+
+    Returns:
+        Authorization header 值（完整字符串）
+    """
+    now = int(time.time())
+    q_sign_time = f"{start_time or now};{(start_time or now) + expire_seconds}"
+
+    # Step 1: SignKey = HMAC-SHA1(SecretKey, q-sign-time)
+    sign_key = hmac.new(
+        secret_key.encode("utf-8"),
+        q_sign_time.encode("utf-8"),
+        hashlib.sha1,
+    ).hexdigest()
+
+    # Step 2: HttpString
+    # 参数和头部需按字典序排列，key 小写
+    sorted_params = sorted((k.lower(), urllib.parse.quote(str(v), safe="") ) for k, v in params.items())
+    sorted_headers = sorted((k.lower(), urllib.parse.quote(str(v), safe="") ) for k, v in headers.items())
+
+    url_param_list = ";".join(k for k, _ in sorted_params)
+    url_params = "&".join(f"{k}={v}" for k, v in sorted_params)
+    header_list = ";".join(k for k, _ in sorted_headers)
+    header_str = "&".join(f"{k}={v}" for k, v in sorted_headers)
+
+    http_string = "\n".join([
+        method.lower(),
+        path,
+        url_params,
+        header_str,
+        "",
+    ])
+
+    # Step 3: StringToSign = sha1 hash of HttpString
+    sha1_of_http = hashlib.sha1(http_string.encode("utf-8")).hexdigest()
+    string_to_sign = "\n".join([
+        "sha1",
+        q_sign_time,
+        sha1_of_http,
+        "",
+    ])
+
+    # Step 4: Signature = HMAC-SHA1(SignKey, StringToSign)
+    signature = hmac.new(
+        sign_key.encode("utf-8"),
+        string_to_sign.encode("utf-8"),
+        hashlib.sha1,
+    ).hexdigest()
+
+    return (
+        f"q-sign-algorithm=sha1"
+        f"&q-ak={secret_id}"
+        f"&q-sign-time={q_sign_time}"
+        f"&q-key-time={q_sign_time}"
+        f"&q-header-list={header_list}"
+        f"&q-url-param-list={url_param_list}"
+        f"&q-signature={signature}"
+    )
+
+
+# ============ 主要公开 API ============
+
+async def get_cos_credentials(
+    app_key: str,
+    api_domain: str,
+    token: str,
+    filename: str = "file",
+    file_id: Optional[str] = None,
+    bot_id: str = "",
+    route_env: str = "",
+) -> dict:
+    """
+    调用 genUploadInfo 接口获取 COS 临时密钥及上传配置。
+
+    Args:
+        app_key:        应用 Key（用于 X-ID 头）
+        api_domain:     API 域名（如 https://bot.yuanbao.tencent.com）
+        token:          当前有效的签票 token（X-Token 头）
+        filename:       待上传的文件名（含扩展名）
+        file_id:        客户端生成的唯一文件 ID（不传则自动生成）
+        bot_id:         Bot 账号 ID（用于 X-ID 头）
+
+    Returns:
+        COS 上传配置 dict，包含以下字段：
+            bucketName         (str)  — COS Bucket 名称
+            region             (str)  — COS 地域
+            location           (str)  — 上传 Key（对象路径）
+            encryptTmpSecretId (str)  — 临时 SecretId
+            encryptTmpSecretKey(str)  — 临时 SecretKey
+            encryptToken       (str)  — SessionToken
+            startTime          (int)  — 凭证起始时间戳（Unix）
+            expiredTime        (int)  — 凭证过期时间戳（Unix）
+            resourceUrl        (str)  — 上传后的公网访问 URL
+            resourceID         (str)  — 资源 ID（可选）
+
+    Raises:
+        RuntimeError: 接口返回非 0 code 或字段缺失
+    """
+    if file_id is None:
+        file_id = generate_file_id()
+
+    upload_url = f"{api_domain.rstrip('/')}{UPLOAD_INFO_PATH}"
+
+    headers = {
+        "Content-Type": "application/json",
+        "X-Token": token,
+        "X-ID": bot_id or app_key,
+        "X-Source": "web",
+    }
+    if route_env:
+        headers["X-Route-Env"] = route_env
+    body = {
+        "fileName": filename,
+        "fileId": file_id,
+        "docFrom": "localDoc",
+        "docOpenId": "",
+    }
+
+    async with httpx.AsyncClient(timeout=15.0) as client:
+        resp = await client.post(upload_url, json=body, headers=headers)
+        resp.raise_for_status()
+        result: dict[str, Any] = resp.json()
+
+    code = result.get("code")
+    if code != 0 and code is not None:
+        raise RuntimeError(
+            f"genUploadInfo 失败: code={code}, msg={result.get('msg', '')}"
+        )
+
+    data = result.get("data") or result
+    required_fields = ["bucketName", "location"]
+    missing = [f for f in required_fields if not data.get(f)]
+    if missing:
+        raise RuntimeError(
+            f"genUploadInfo 返回字段不完整: 缺少字段 {missing}"
+        )
+
+    return data
+
+
+async def upload_to_cos(
+    file_bytes: bytes,
+    filename: str,
+    content_type: str,
+    credentials: dict,
+    bucket: str,
+    region: str,
+) -> dict:
+    """
+    通过 httpx PUT 请求将文件上传到 COS。
+    使用临时凭证（tmpSecretId/tmpSecretKey/sessionToken）构建 HMAC-SHA1 签名。
+
+    Args:
+        file_bytes:   文件二进制内容
+        filename:     文件名（用于辅助计算 MIME、UUID）
+        content_type: MIME 类型（如 "image/jpeg"）
+        credentials:  get_cos_credentials() 返回的 dict，包含：
+                        encryptTmpSecretId  → tmpSecretId
+                        encryptTmpSecretKey → tmpSecretKey
+                        encryptToken        → sessionToken
+                        location            → COS key（对象路径）
+                        resourceUrl         → 上传后公网 URL
+                        startTime           → 凭证起始时间（Unix）
+                        expiredTime         → 凭证过期时间（Unix）
+        bucket:       COS Bucket 名称（如 chatbot-1234567890）
+        region:       COS 地域（如 ap-guangzhou）
+
+    Returns:
+        上传结果 dict，包含：
+            url       (str)           — COS 公网访问 URL
+            uuid      (str)           — 文件内容 MD5
+            size      (int)           — 文件大小（字节）
+            width     (int, optional) — 图片宽度（仅图片）
+            height    (int, optional) — 图片高度（仅图片）
+
+    Raises:
+        httpx.HTTPStatusError: COS 返回非 2xx 状态
+        RuntimeError:          credentials 字段缺失
+    """
+    secret_id: str = credentials.get("encryptTmpSecretId", "")
+    secret_key: str = credentials.get("encryptTmpSecretKey", "")
+    session_token: str = credentials.get("encryptToken", "")
+    cos_key: str = credentials.get("location", "")
+    resource_url: str = credentials.get("resourceUrl", "")
+    start_time: Optional[int] = credentials.get("startTime")
+    expired_time: Optional[int] = credentials.get("expiredTime")
+
+    if not secret_id or not secret_key or not cos_key:
+        raise RuntimeError(
+            f"COS credentials 不完整: secretId={bool(secret_id)}, "
+            f"secretKey={bool(secret_key)}, location={bool(cos_key)}"
+        )
+
+    # 构建 COS 上传 URL（优先使用全球加速域名）
+    if COS_USE_ACCELERATE:
+        cos_host = f"{bucket}.cos.accelerate.myqcloud.com"
+    else:
+        cos_host = f"{bucket}.cos.{region}.myqcloud.com"
+
+    # URL encode cos_key（保留 /）
+    encoded_key = urllib.parse.quote(cos_key, safe="/")
+    cos_url = f"https://{cos_host}/{encoded_key.lstrip('/')}"
+
+    # 确定 Content-Type
+    if not content_type or content_type == "application/octet-stream":
+        if is_image(filename):
+            content_type = guess_mime_type(filename)
+        else:
+            content_type = "application/octet-stream"
+
+    # 计算文件 MD5 + size
+    file_uuid = md5_hex(file_bytes)
+    file_size = len(file_bytes)
+
+    # 参与签名的请求头
+    sign_headers = {
+        "host": cos_host,
+        "content-type": content_type,
+        "x-cos-security-token": session_token,
+    }
+
+    # 计算签名有效期
+    now = int(time.time())
+    sign_start = start_time if start_time else now
+    sign_expire = (expired_time - now) if expired_time and expired_time > now else 3600
+
+    authorization = _cos_sign(
+        method="put",
+        path=f"/{encoded_key.lstrip('/')}",
+        params={},
+        headers=sign_headers,
+        secret_id=secret_id,
+        secret_key=secret_key,
+        start_time=sign_start,
+        expire_seconds=sign_expire,
+    )
+
+    put_headers = {
+        "Authorization": authorization,
+        "Content-Type": content_type,
+        "x-cos-security-token": session_token,
+    }
+
+    logger.info(
+        "COS PUT: bucket=%s region=%s key=%s size=%d mime=%s",
+        bucket, region, cos_key, file_size, content_type,
+    )
+
+    async with httpx.AsyncClient(timeout=120.0) as client:
+        resp = await client.put(
+            cos_url,
+            content=file_bytes,
+            headers=put_headers,
+        )
+        resp.raise_for_status()
+
+    # 解析图片尺寸（仅图片类型）
+    result: dict[str, Any] = {
+        "url": resource_url or cos_url,
+        "uuid": file_uuid,
+        "size": file_size,
+    }
+
+    if content_type.startswith("image/"):
+        size_info = parse_image_size(file_bytes)
+        if size_info:
+            result["width"] = size_info["width"]
+            result["height"] = size_info["height"]
+
+    logger.info(
+        "COS 上传成功: url=%s size=%d",
+        result["url"], file_size,
+    )
+    return result
+
+
+# ============ TIM 媒体消息构建 ============
+
+def build_image_msg_body(
+    url: str,
+    uuid: Optional[str] = None,
+    filename: Optional[str] = None,
+    size: int = 0,
+    width: int = 0,
+    height: int = 0,
+    mime_type: str = "",
+) -> list[dict]:
+    """
+    构建腾讯 IM TIMImageElem 消息体。
+    参考：https://cloud.tencent.com/document/product/269/2720
+
+    Args:
+        url:       图片公网访问 URL（COS resourceUrl）
+        uuid:      文件 UUID（MD5 或其他唯一标识）
+        filename:  文件名（uuid 为空时作为备用）
+        size:      文件大小（字节）
+        width:     图片宽度（像素）
+        height:    图片高度（像素）
+        mime_type: MIME 类型（用于确定 image_format）
+
+    Returns:
+        TIMImageElem 消息体列表（适合直接放入 msg_body）
+    """
+    _uuid = uuid or filename or _basename_from_url(url) or "image"
+    image_format = get_image_format(mime_type) if mime_type else 255
+
+    return [
+        {
+            "msg_type": "TIMImageElem",
+            "msg_content": {
+                "uuid": _uuid,
+                "image_format": image_format,
+                "image_info_array": [
+                    {
+                        "type": 1,       # 1 = 原图
+                        "size": size,
+                        "width": width,
+                        "height": height,
+                        "url": url,
+                    }
+                ],
+            },
+        }
+    ]
+
+
+def build_file_msg_body(
+    url: str,
+    filename: str,
+    uuid: Optional[str] = None,
+    size: int = 0,
+) -> list[dict]:
+    """
+    构建腾讯 IM TIMFileElem 消息体。
+    参考：https://cloud.tencent.com/document/product/269/2720
+
+    Args:
+        url:      文件公网访问 URL（COS resourceUrl）
+        filename: 文件名（含扩展名）
+        uuid:     文件 UUID（MD5 或其他唯一标识，不传则使用 filename）
+        size:     文件大小（字节）
+
+    Returns:
+        TIMFileElem 消息体列表（适合直接放入 msg_body）
+    """
+    _uuid = uuid or filename
+
+    return [
+        {
+            "msg_type": "TIMFileElem",
+            "msg_content": {
+                "uuid": _uuid,
+                "file_name": filename,
+                "file_size": size,
+                "url": url,
+            },
+        }
+    ]
+
+
+# ============ 内部工具 ============
+
+def _basename_from_url(url: str) -> str:
+    """从 URL 提取文件名。"""
+    try:
+        parsed = urllib.parse.urlparse(url)
+        return os.path.basename(parsed.path)
+    except Exception:
+        return ""
@@ -0,0 +1,558 @@
+"""
+Yuanbao sticker (TIMFaceElem) support.
+
+Ported from yuanbao-openclaw-plugin/src/sticker/.
+
+TIMFaceElem wire format:
+    {
+        "msg_type": "TIMFaceElem",
+        "msg_content": {
+            "index": 0,          # always 0 per Yuanbao convention
+            "data": "<json>",    # serialised sticker metadata
+        }
+    }
+
+The `data` field carries a JSON string with the sticker's metadata so the
+receiver can look up the correct asset in the emoji pack.
+"""
+
+from __future__ import annotations
+
+import json
+import random
+import re
+import unicodedata
+from typing import Optional
+
+# ---------------------------------------------------------------------------
+# Sticker catalogue – ported from builtin-stickers.json
+# Key   : canonical name (Chinese)
+# Value : {sticker_id, package_id, name, description, width, height, formats}
+# ---------------------------------------------------------------------------
+STICKER_MAP: dict[str, dict] = {
+    "六六六": {
+        "sticker_id": "278", "package_id": "1003", "name": "六六六",
+        "description": "666 厉害 牛 棒 绝了 好强 awesome",
+        "width": 128, "height": 128, "formats": "png",
+    },
+    "我想开了": {
+        "sticker_id": "262", "package_id": "1003", "name": "我想开了",
+        "description": "想开 佛系 释怀 顿悟 看淡了 无所谓",
+        "width": 128, "height": 128, "formats": "png",
+    },
+    "害羞": {
+        "sticker_id": "130", "package_id": "1003", "name": "害羞",
+        "description": "腼腆 不好意思 脸红 娇羞 羞涩 捂脸",
+        "width": 128, "height": 128, "formats": "png",
+    },
+    "比心": {
+        "sticker_id": "252", "package_id": "1003", "name": "比心",
+        "description": "笔芯 爱你 爱心手势 love heart 喜欢你",
+        "width": 128, "height": 128, "formats": "png",
+    },
+    "委屈": {
+        "sticker_id": "125", "package_id": "1003", "name": "委屈",
+        "description": "难过 想哭 可怜巴巴 瘪嘴 受伤 被欺负",
+        "width": 128, "height": 128, "formats": "png",
+    },
+    "亲亲": {
+        "sticker_id": "146", "package_id": "1003", "name": "亲亲",
+        "description": "么么 mua 亲一下 kiss 飞吻 啵",
+        "width": 128, "height": 128, "formats": "png",
+    },
+    "酷": {
+        "sticker_id": "131", "package_id": "1003", "name": "酷",
+        "description": "帅 墨镜 cool 高冷 有型 swagger",
+        "width": 128, "height": 128, "formats": "png",
+    },
+    "睡": {
+        "sticker_id": "145", "package_id": "1003", "name": "睡",
+        "description": "睡觉 困 zzZ 打盹 躺平 休眠 sleepy",
+        "width": 128, "height": 128, "formats": "png",
+    },
+    "发呆": {
+        "sticker_id": "152", "package_id": "1003", "name": "发呆",
+        "description": "懵 愣住 放空 呆滞 出神 脑子空白",
+        "width": 128, "height": 128, "formats": "png",
+    },
+    "可怜": {
+        "sticker_id": "157", "package_id": "1003", "name": "可怜",
+        "description": "卖萌 求饶 委屈巴巴 弱小 拜托 眼巴巴",
+        "width": 128, "height": 128, "formats": "png",
+    },
+    "摊手": {
+        "sticker_id": "200", "package_id": "1003", "name": "摊手",
+        "description": "无奈 没办法 耸肩 随便 那咋整 whatever",
+        "width": 128, "height": 128, "formats": "png",
+    },
+    "头大": {
+        "sticker_id": "213", "package_id": "1003", "name": "头大",
+        "description": "头疼 烦恼 郁闷 难搞 崩溃 一团乱",
+        "width": 128, "height": 128, "formats": "png",
+    },
+    "吓": {
+        "sticker_id": "256", "package_id": "1003", "name": "吓",
+        "description": "害怕 惊恐 震惊 吓一跳 恐怖 怂",
+        "width": 128, "height": 128, "formats": "png",
+    },
+    "吐血": {
+        "sticker_id": "203", "package_id": "1003", "name": "吐血",
+        "description": "无语 崩溃 被雷 内伤 一口老血 屮",
+        "width": 128, "height": 128, "formats": "png",
+    },
+    "哼": {
+        "sticker_id": "185", "package_id": "1003", "name": "哼",
+        "description": "傲娇 生气 不满 撇嘴 不理 赌气",
+        "width": 128, "height": 128, "formats": "png",
+    },
+    "嘿嘿": {
+        "sticker_id": "220", "package_id": "1003", "name": "嘿嘿",
+        "description": "坏笑 猥琐笑 偷笑 憨笑 得意 你懂的",
+        "width": 128, "height": 128, "formats": "png",
+    },
+    "头秃": {
+        "sticker_id": "218", "package_id": "1003", "name": "头秃",
+        "description": "程序员 加班 焦虑 没头发 秃了 肝爆",
+        "width": 128, "height": 128, "formats": "png",
+    },
+    "暗中观察": {
+        "sticker_id": "221", "package_id": "1003", "name": "暗中观察",
+        "description": "窥屏 潜水 偷偷看 角落 围观 屏住呼吸",
+        "width": 128, "height": 128, "formats": "png",
+    },
+    "我酸了": {
+        "sticker_id": "224", "package_id": "1003", "name": "我酸了",
+        "description": "嫉妒 柠檬精 羡慕 吃柠檬 眼红 恰柠檬",
+        "width": 128, "height": 128, "formats": "png",
+    },
+    "打call": {
+        "sticker_id": "246", "package_id": "1003", "name": "打call",
+        "description": "应援 加油 支持 喝彩 助威 call",
+        "width": 128, "height": 128, "formats": "png",
+    },
+    "庆祝": {
+        "sticker_id": "251", "package_id": "1003", "name": "庆祝",
+        "description": "祝贺 开心 耶 party 胜利 干杯",
+        "width": 128, "height": 128, "formats": "png",
+    },
+    "奋斗": {
+        "sticker_id": "151", "package_id": "1003", "name": "奋斗",
+        "description": "努力 加油 拼搏 冲 干劲 卷起来",
+        "width": 128, "height": 128, "formats": "png",
+    },
+    "惊讶": {
+        "sticker_id": "143", "package_id": "1003", "name": "惊讶",
+        "description": "震惊 哇 不敢相信 OMG 居然 这么离谱",
+        "width": 128, "height": 128, "formats": "png",
+    },
+    "疑问": {
+        "sticker_id": "144", "package_id": "1003", "name": "疑问",
+        "description": "问号 不懂 啥 为什么 啥情况 懵逼问",
+        "width": 128, "height": 128, "formats": "png",
+    },
+    "仔细分析": {
+        "sticker_id": "248", "package_id": "1003", "name": "仔细分析",
+        "description": "思考 推敲 认真 研究 琢磨 让我想想",
+        "width": 128, "height": 128, "formats": "png",
+    },
+    "撅嘴": {
+        "sticker_id": "184", "package_id": "1003", "name": "撅嘴",
+        "description": "嘟嘴 卖萌 不高兴 撒娇 嘴翘",
+        "width": 128, "height": 128, "formats": "png",
+    },
+    "泪奔": {
+        "sticker_id": "199", "package_id": "1003", "name": "泪奔",
+        "description": "大哭 伤心 破防 感动哭 泪流满面 呜呜",
+        "width": 128, "height": 128, "formats": "png",
+    },
+    "尊嘟假嘟": {
+        "sticker_id": "276", "package_id": "1003", "name": "尊嘟假嘟",
+        "description": "真的假的 真假 可爱问 你骗我 是不是",
+        "width": 128, "height": 128, "formats": "png",
+    },
+    "略略略": {
+        "sticker_id": "113", "package_id": "1003", "name": "略略略",
+        "description": "调皮 吐舌 不服 略 气死你 鬼脸",
+        "width": 128, "height": 128, "formats": "png",
+    },
+    "困": {
+        "sticker_id": "180", "package_id": "1003", "name": "困",
+        "description": "想睡 倦 打哈欠 睁不开眼 好困啊 sleepy",
+        "width": 128, "height": 128, "formats": "png",
+    },
+    "折磨": {
+        "sticker_id": "181", "package_id": "1003", "name": "折磨",
+        "description": "难受 痛苦 煎熬 蚌埠住了 受不了 要命",
+        "width": 128, "height": 128, "formats": "png",
+    },
+    "抠鼻": {
+        "sticker_id": "182", "package_id": "1003", "name": "抠鼻",
+        "description": "不屑 无聊 淡定 无所谓 鄙视 挖鼻",
+        "width": 128, "height": 128, "formats": "png",
+    },
+    "鼓掌": {
+        "sticker_id": "183", "package_id": "1003", "name": "鼓掌",
+        "description": "拍手 叫好 赞同 666 喝彩 掌声",
+        "width": 128, "height": 128, "formats": "png",
+    },
+    "斜眼笑": {
+        "sticker_id": "204", "package_id": "1003", "name": "斜眼笑",
+        "description": "滑稽 坏笑 doge 意味深长 阴阳怪气 嘿嘿嘿",
+        "width": 128, "height": 128, "formats": "png",
+    },
+    "辣眼睛": {
+        "sticker_id": "216", "package_id": "1003", "name": "辣眼睛",
+        "description": "看不下去 cringe 毁三观 太丑了 瞎了",
+        "width": 128, "height": 128, "formats": "png",
+    },
+    "哦哟": {
+        "sticker_id": "217", "package_id": "1003", "name": "哦哟",
+        "description": "惊讶 起哄 哇哦 有戏 不简单 哟",
+        "width": 128, "height": 128, "formats": "png",
+    },
+    "吃瓜": {
+        "sticker_id": "222", "package_id": "1003", "name": "吃瓜",
+        "description": "围观 看戏 八卦 路人 看热闹 板凳",
+        "width": 128, "height": 128, "formats": "png",
+    },
+    "狗头": {
+        "sticker_id": "225", "package_id": "1003", "name": "狗头",
+        "description": "doge 保命 开玩笑 滑稽 反讽 懂的都懂",
+        "width": 128, "height": 128, "formats": "png",
+    },
+    "敬礼": {
+        "sticker_id": "227", "package_id": "1003", "name": "敬礼",
+        "description": "salute 尊重 收到 遵命 致敬 报告",
+        "width": 128, "height": 128, "formats": "png",
+    },
+    "哦": {
+        "sticker_id": "231", "package_id": "1003", "name": "哦",
+        "description": "知道了 明白 敷衍 嗯 这样啊 收到",
+        "width": 128, "height": 128, "formats": "png",
+    },
+    "拿到红包": {
+        "sticker_id": "236", "package_id": "1003", "name": "拿到红包",
+        "description": "红包 谢谢老板 发财 开心 抢到了 欧气",
+        "width": 128, "height": 128, "formats": "png",
+    },
+    "牛吖": {
+        "sticker_id": "239", "package_id": "1003", "name": "牛吖",
+        "description": "牛 厉害 强 666 佩服 大佬",
+        "width": 128, "height": 128, "formats": "png",
+    },
+    "贴贴": {
+        "sticker_id": "272", "package_id": "1003", "name": "贴贴",
+        "description": "抱抱 亲昵 蹭蹭 亲密 靠靠 撒娇贴",
+        "width": 128, "height": 128, "formats": "png",
+    },
+    "爱心": {
+        "sticker_id": "138", "package_id": "1003", "name": "爱心",
+        "description": "心 love 喜欢你 红心 示爱 么么哒",
+        "width": 128, "height": 128, "formats": "png",
+    },
+    "晚安": {
+        "sticker_id": "170", "package_id": "1003", "name": "晚安",
+        "description": "好梦 睡了 night 早点休息 安啦 moon",
+        "width": 128, "height": 128, "formats": "png",
+    },
+    "太阳": {
+        "sticker_id": "176", "package_id": "1003", "name": "太阳",
+        "description": "晴天 早上好 阳光 morning 好天气 日",
+        "width": 128, "height": 128, "formats": "png",
+    },
+    "柠檬": {
+        "sticker_id": "266", "package_id": "1003", "name": "柠檬",
+        "description": "酸 嫉妒 柠檬精 羡慕 我酸 恰柠檬",
+        "width": 128, "height": 128, "formats": "png",
+    },
+    "大冤种": {
+        "sticker_id": "267", "package_id": "1003", "name": "大冤种",
+        "description": "倒霉 吃亏 自嘲 好心没好报 背锅 工具人",
+        "width": 128, "height": 128, "formats": "png",
+    },
+    "吐了": {
+        "sticker_id": "132", "package_id": "1003", "name": "吐了",
+        "description": "恶心 yue 受不了 嫌弃 想吐 生理不适",
+        "width": 128, "height": 128, "formats": "png",
+    },
+    "怒": {
+        "sticker_id": "134", "package_id": "1003", "name": "怒",
+        "description": "生气 愤怒 火大 暴躁 气炸 怼",
+        "width": 128, "height": 128, "formats": "png",
+    },
+    "玫瑰": {
+        "sticker_id": "165", "package_id": "1003", "name": "玫瑰",
+        "description": "花 示爱 表白 浪漫 送你花 情人节",
+        "width": 128, "height": 128, "formats": "png",
+    },
+    "凋谢": {
+        "sticker_id": "119", "package_id": "1003", "name": "凋谢",
+        "description": "花谢 失恋 难过 枯萎 心碎 凉了",
+        "width": 128, "height": 128, "formats": "png",
+    },
+    "点赞": {
+        "sticker_id": "159", "package_id": "1003", "name": "点赞",
+        "description": "赞 认同 好棒 good like 大拇指 顶",
+        "width": 128, "height": 128, "formats": "png",
+    },
+    "握手": {
+        "sticker_id": "164", "package_id": "1003", "name": "握手",
+        "description": "合作 你好 商务 hello deal 成交 友好",
+        "width": 128, "height": 128, "formats": "png",
+    },
+    "抱拳": {
+        "sticker_id": "163", "package_id": "1003", "name": "抱拳",
+        "description": "谢谢 失敬 江湖 承让 拜托 有礼",
+        "width": 128, "height": 128, "formats": "png",
+    },
+    "ok": {
+        "sticker_id": "169", "package_id": "1003", "name": "ok",
+        "description": "好的 收到 没问题 okay 行 可以 懂了",
+        "width": 128, "height": 128, "formats": "png",
+    },
+    "拳头": {
+        "sticker_id": "174", "package_id": "1003", "name": "拳头",
+        "description": "加油 干 冲 fight 力量 击拳 硬气",
+        "width": 128, "height": 128, "formats": "png",
+    },
+    "鞭炮": {
+        "sticker_id": "191", "package_id": "1003", "name": "鞭炮",
+        "description": "过年 喜庆 爆竹 春节 噼里啪啦 红",
+        "width": 128, "height": 128, "formats": "png",
+    },
+    "烟花": {
+        "sticker_id": "258", "package_id": "1003", "name": "烟花",
+        "description": "庆典 漂亮 新年 嘭 绽放 节日快乐",
+        "width": 128, "height": 128, "formats": "png",
+    },
+}
+
+
+def get_sticker_by_name(name: str) -> Optional[dict]:
+    """
+    按名称查找贴纸，支持模糊匹配。
+
+    匹配优先级：
+      1. 完全相等（name）
+      2. name 包含查询词（前缀/子串）
+      3. description 包含查询词（同义词搜索）
+      4. 通用模糊评分（与 sticker-search 同算法），命中即返回得分最高的一条
+
+    返回 sticker dict，找不到返回 None。
+    """
+    if not name:
+        return None
+
+    query = name.strip()
+
+    if query in STICKER_MAP:
+        return STICKER_MAP[query]
+
+    for key, sticker in STICKER_MAP.items():
+        if query in key or key in query:
+            return sticker
+
+    for sticker in STICKER_MAP.values():
+        desc = sticker.get("description", "")
+        if query in desc:
+            return sticker
+
+    matches = search_stickers(query, limit=1)
+    return matches[0] if matches else None
+
+
+def get_random_sticker(category: str = None) -> dict:
+    """
+    随机返回一个贴纸。
+
+    若指定 category，则在 description 中含有该关键词的贴纸里随机选取；
+    category 为 None 时从全表随机。
+    """
+    if category:
+        candidates = [
+            s for s in STICKER_MAP.values()
+            if category in s.get("description", "") or category in s.get("name", "")
+        ]
+        if candidates:
+            return random.choice(candidates)
+    return random.choice(list(STICKER_MAP.values()))
+
+
+def get_sticker_by_id(sticker_id: str) -> Optional[dict]:
+    """按 sticker_id 精确查找贴纸。"""
+    if not sticker_id:
+        return None
+    sid = str(sticker_id).strip()
+    for sticker in STICKER_MAP.values():
+        if sticker.get("sticker_id") == sid:
+            return sticker
+    return None
+
+
+# ---------------------------------------------------------------------------
+# 模糊搜索（对齐 chatbot-web yuanbao-openclaw-plugin/sticker-cache.ts.searchStickers）
+# ---------------------------------------------------------------------------
+
+_PUNCT_RE = re.compile(r"[\s\u3000\-_·.,，。!！?？\"“”'‘’、/\\]+")
+
+
+def _normalize_text(raw: str) -> str:
+    return unicodedata.normalize("NFKC", str(raw or "")).strip().lower()
+
+
+def _compact_text(raw: str) -> str:
+    return _PUNCT_RE.sub("", _normalize_text(raw))
+
+
+def _multiset_char_hit_ratio(needle: str, haystack: str) -> float:
+    if not needle:
+        return 0.0
+    bag: dict[str, int] = {}
+    for ch in haystack:
+        bag[ch] = bag.get(ch, 0) + 1
+    hits = 0
+    for ch in needle:
+        n = bag.get(ch, 0)
+        if n > 0:
+            hits += 1
+            bag[ch] = n - 1
+    return hits / len(needle)
+
+
+def _bigram_jaccard(a: str, b: str) -> float:
+    if len(a) < 2 or len(b) < 2:
+        return 0.0
+    A = {a[i:i + 2] for i in range(len(a) - 1)}
+    B = {b[i:i + 2] for i in range(len(b) - 1)}
+    inter = len(A & B)
+    union = len(A) + len(B) - inter
+    return inter / union if union else 0.0
+
+
+def _longest_subsequence_ratio(needle: str, haystack: str) -> float:
+    if not needle:
+        return 0.0
+    j = 0
+    for ch in haystack:
+        if j >= len(needle):
+            break
+        if ch == needle[j]:
+            j += 1
+    return j / len(needle)
+
+
+def _score_field(haystack: str, query: str) -> float:
+    hay = _normalize_text(haystack)
+    q = _normalize_text(query)
+    if not hay or not q:
+        return 0.0
+    hay_c = _compact_text(haystack)
+    q_c = _compact_text(query)
+    best = 0.0
+    if hay == q:
+        best = max(best, 100.0)
+    if q in hay:
+        best = max(best, 92 + min(6, len(q)))
+    if len(q) >= 2 and hay.startswith(q):
+        best = max(best, 88.0)
+    if q_c and q_c in hay_c:
+        best = max(best, 86.0)
+    best = max(best, _multiset_char_hit_ratio(q_c, hay_c) * 62)
+    best = max(best, _bigram_jaccard(q_c, hay_c) * 58)
+    best = max(best, _longest_subsequence_ratio(q_c, hay_c) * 52)
+    if len(q) == 1 and q in hay:
+        best = max(best, 68.0)
+    return best
+
+
+def search_stickers(query: str, limit: int = 10) -> list[dict]:
+    """
+    在内置贴纸表中按模糊匹配排序返回前 N 条结果。
+
+    评分综合 name/description 字段的子串、字符多重集覆盖、bigram Jaccard、子序列比例。
+    name 权重略高于 description（×0.88）。空 query 时按字典顺序返回前 N 条。
+    """
+    safe_limit = max(1, min(500, int(limit) if limit else 10))
+    if not query or not _normalize_text(query):
+        return list(STICKER_MAP.values())[:safe_limit]
+
+    scored: list[tuple[float, dict]] = []
+    for sticker in STICKER_MAP.values():
+        name_s = _score_field(sticker.get("name", ""), query)
+        desc_s = _score_field(sticker.get("description", ""), query) * 0.88
+        sid = str(sticker.get("sticker_id", "")).strip()
+        q_norm = _normalize_text(query)
+        id_s = 0.0
+        if sid and q_norm:
+            sid_norm = _normalize_text(sid)
+            if sid_norm == q_norm:
+                id_s = 100.0
+            elif q_norm in sid_norm:
+                id_s = 84.0
+        scored.append((max(name_s, desc_s, id_s), sticker))
+
+    scored.sort(key=lambda x: x[0], reverse=True)
+    top = scored[0][0] if scored else 0
+    if top <= 0:
+        return [s for _, s in scored[:safe_limit]]
+
+    if top >= 22:
+        floor = 18.0
+    elif top >= 12:
+        floor = max(10.0, top * 0.5)
+    else:
+        floor = max(6.0, top * 0.35)
+
+    filtered = [pair for pair in scored if pair[0] >= floor]
+    out = filtered if filtered else scored
+    return [s for _, s in out[:safe_limit]]
+
+
+def build_face_msg_body(
+    face_index: int,
+    face_type: int = 1,
+    data: Optional[str] = None,
+) -> list:
+    """
+    构造 TIMFaceElem 消息体。
+
+    Yuanbao 约定：
+      - index 固定传 0（服务端通过 data 字段识别具体表情）
+      - data 为 JSON 字符串，包含 sticker_id / package_id 等字段
+
+    Args:
+        face_index: 保留字段，暂时不影响 wire format（Yuanbao 固定 index=0）。
+                    当 face_index > 0 时视为旧版 QQ 表情 ID，直接放入 index。
+        face_type:  保留字段（兼容旧接口，当前未使用）。
+        data:       已序列化的 JSON 字符串；为 None 时仅传 index。
+
+    Returns:
+        符合 Yuanbao TIM 协议的 msg_body list，如::
+
+            [{"msg_type": "TIMFaceElem", "msg_content": {"index": 0, "data": "..."}}]
+    """
+    msg_content: dict = {"index": face_index}
+    if data is not None:
+        msg_content["data"] = data
+    return [{"msg_type": "TIMFaceElem", "msg_content": msg_content}]
+
+
+def build_sticker_msg_body(sticker: dict) -> list:
+    """
+    从 STICKER_MAP 中的 sticker dict 直接构造 TIMFaceElem 消息体。
+
+    这是 send_sticker() 的内部辅助，确保 data 字段与原始 JS 插件一致。
+    """
+    data_payload = json.dumps(
+        {
+            "sticker_id": sticker["sticker_id"],
+            "package_id": sticker["package_id"],
+            "width": sticker.get("width", 128),
+            "height": sticker.get("height", 128),
+            "formats": sticker.get("formats", "png"),
+            "name": sticker["name"],
+        },
+        ensure_ascii=False,
+        separators=(",", ":"),
+    )
+    return build_face_msg_body(face_index=0, data=data_payload)
@@ -0,0 +1,150 @@
+"""Gateway runtime-metadata footer.
+
+Renders a compact footer showing runtime state (model, context %, cwd) and
+appends it to the FINAL message of an agent turn when enabled.  Off by default
+to keep replies minimal.
+
+Config (``~/.hermes/config.yaml``)::
+
+    display:
+      runtime_footer:
+        enabled: true                       # off by default
+        fields: [model, context_pct, cwd]   # order shown; drop any to hide
+
+Per-platform overrides live under ``display.platforms.<platform>.runtime_footer``.
+Users can toggle the global setting with ``/footer on|off`` from both the CLI
+and any gateway platform.
+
+The footer is appended to the final response text in ``gateway/run.py`` right
+before returning the response to the adapter send path — so it only lands on
+the final message a user sees, not on tool-progress updates or streaming
+partials.  When streaming is on and the final text has already been delivered
+piecemeal, the footer is sent as a separate trailing message via
+``send_trailing_footer()``.
+"""
+
+from __future__ import annotations
+
+import os
+from pathlib import Path
+from typing import Any, Iterable, Optional
+
+_DEFAULT_FIELDS: tuple[str, ...] = ("model", "context_pct", "cwd")
+_SEP = " · "
+
+
+def _home_relative_cwd(cwd: str) -> str:
+    """Return *cwd* with ``$HOME`` collapsed to ``~``.  Empty string if unset."""
+    if not cwd:
+        return ""
+    try:
+        home = os.path.expanduser("~")
+        p = os.path.abspath(cwd)
+        if home and (p == home or p.startswith(home + os.sep)):
+            return "~" + p[len(home):]
+        return p
+    except Exception:
+        return cwd
+
+
+def _model_short(model: Optional[str]) -> str:
+    """Drop ``vendor/`` prefix for readability (``openai/gpt-5.4`` → ``gpt-5.4``)."""
+    if not model:
+        return ""
+    return model.rsplit("/", 1)[-1]
+
+
+def resolve_footer_config(
+    user_config: dict[str, Any] | None,
+    platform_key: str | None = None,
+) -> dict[str, Any]:
+    """Resolve effective runtime-footer config for *platform_key*.
+
+    Merge order (later wins):
+        1. Built-in defaults (enabled=False)
+        2. ``display.runtime_footer``
+        3. ``display.platforms.<platform_key>.runtime_footer``
+    """
+    resolved = {"enabled": False, "fields": list(_DEFAULT_FIELDS)}
+    cfg = (user_config or {}).get("display") or {}
+
+    global_cfg = cfg.get("runtime_footer")
+    if isinstance(global_cfg, dict):
+        if "enabled" in global_cfg:
+            resolved["enabled"] = bool(global_cfg.get("enabled"))
+        if isinstance(global_cfg.get("fields"), list) and global_cfg["fields"]:
+            resolved["fields"] = [str(f) for f in global_cfg["fields"]]
+
+    if platform_key:
+        platforms = cfg.get("platforms") or {}
+        plat_cfg = platforms.get(platform_key)
+        if isinstance(plat_cfg, dict):
+            plat_footer = plat_cfg.get("runtime_footer")
+            if isinstance(plat_footer, dict):
+                if "enabled" in plat_footer:
+                    resolved["enabled"] = bool(plat_footer.get("enabled"))
+                if isinstance(plat_footer.get("fields"), list) and plat_footer["fields"]:
+                    resolved["fields"] = [str(f) for f in plat_footer["fields"]]
+
+    return resolved
+
+
+def format_runtime_footer(
+    *,
+    model: Optional[str],
+    context_tokens: int,
+    context_length: Optional[int],
+    cwd: Optional[str] = None,
+    fields: Iterable[str] = _DEFAULT_FIELDS,
+) -> str:
+    """Render the footer line, or return "" if no fields have data.
+
+    Fields are skipped silently when their underlying data is missing — a
+    partially-populated footer is better than a line with ``?%`` or empty slots.
+    """
+    parts: list[str] = []
+    for field in fields:
+        if field == "model":
+            m = _model_short(model)
+            if m:
+                parts.append(m)
+        elif field == "context_pct":
+            if context_length and context_length > 0 and context_tokens >= 0:
+                pct = max(0, min(100, round((context_tokens / context_length) * 100)))
+                parts.append(f"{pct}%")
+        elif field == "cwd":
+            rel = _home_relative_cwd(cwd or os.environ.get("TERMINAL_CWD", ""))
+            if rel:
+                parts.append(rel)
+        # Unknown field names are silently ignored.
+
+    if not parts:
+        return ""
+    return _SEP.join(parts)
+
+
+def build_footer_line(
+    *,
+    user_config: dict[str, Any] | None,
+    platform_key: str | None,
+    model: Optional[str],
+    context_tokens: int,
+    context_length: Optional[int],
+    cwd: Optional[str] = None,
+) -> str:
+    """Top-level entry point used by gateway/run.py.
+
+    Returns the footer text (empty string when disabled or no data).  Callers
+    append this to the final response themselves, preserving a single blank
+    line of separation.
+    """
+    cfg = resolve_footer_config(user_config, platform_key)
+    if not cfg.get("enabled"):
+        return ""
+    return format_runtime_footer(
+        model=model,
+        context_tokens=context_tokens,
+        context_length=context_length,
+        cwd=cwd,
+        fields=cfg.get("fields") or _DEFAULT_FIELDS,
+    )
@@ -62,8 +62,8 @@ from .config import (
 )
 from .whatsapp_identity import (
    canonical_whatsapp_identifier,
-    normalize_whatsapp_identifier,
 )
+from utils import atomic_replace


@dataclass
@@ -310,8 +310,9 @@ def build_session_context_prompt(
            "**Platform notes:** You are running inside Slack. "
            "You do NOT have access to Slack-specific APIs — you cannot search "
            "channel history, pin/unpin messages, manage channels, or list users. "
-            "Do not promise to perform these actions. If the user asks, explain "
-            "that you can only read messages sent directly to you and respond."
+            "Do not promise to perform these actions. The gateway may inline the "
+            "current message's Slack block/attachment payload when available, but "
+            "you still cannot call Slack APIs yourself."
        )
    elif context.source.platform == Platform.DISCORD:
        # Inject the Discord IDs block only when the agent actually has
@@ -353,6 +354,14 @@ def build_session_context_prompt(
            "If the user needs a detailed answer, give the short version first "
            "and offer to elaborate."
        )
+    elif context.source.platform == Platform.YUANBAO:
+        lines.append("")
+        lines.append(
+            "**Platform notes:** You are running inside Yuanbao. "
+            "You CAN send private (DM) messages via the send_message tool. "
+            "Use target='yuanbao:direct:<account_id>' for DM "
+            "and target='yuanbao:group:<group_code>' for group chat."
+        )

    # Connected platforms
    platforms_list = ["local (files on this machine)"]
@@ -696,7 +705,7 @@ class SessionStore:
                json.dump(data, f, indent=2)
                f.flush()
                os.fsync(f.fileno())
-            os.replace(tmp_path, sessions_file)
+            atomic_replace(tmp_path, sessions_file)
        except BaseException:
            try:
                os.unlink(tmp_path)
@@ -1232,6 +1241,7 @@ class SessionStore:
                    reasoning_content=message.get("reasoning_content") if message.get("role") == "assistant" else None,
                    reasoning_details=message.get("reasoning_details") if message.get("role") == "assistant" else None,
                    codex_reasoning_items=message.get("codex_reasoning_items") if message.get("role") == "assistant" else None,
+                    codex_message_items=message.get("codex_message_items") if message.get("role") == "assistant" else None,
                )
            except Exception as e:
                logger.debug("Session DB operation failed: %s", e)
@@ -1247,24 +1257,11 @@ class SessionStore:
        Used by /retry, /undo, and /compress to persist modified conversation history.
        Rewrites both SQLite and legacy JSONL storage.
        """
-        # SQLite: clear old messages and re-insert
+        # SQLite: replace atomically so a mid-rewrite failure doesn't leave
+        # the session half-empty in the DB while JSONL still has history.
        if self._db:
            try:
-                self._db.clear_messages(session_id)
-                for msg in messages:
-                    role = msg.get("role", "unknown")
-                    self._db.append_message(
-                        session_id=session_id,
-                        role=role,
-                        content=msg.get("content"),
-                        tool_name=msg.get("tool_name"),
-                        tool_calls=msg.get("tool_calls"),
-                        tool_call_id=msg.get("tool_call_id"),
-                        reasoning=msg.get("reasoning") if role == "assistant" else None,
-                        reasoning_content=msg.get("reasoning_content") if role == "assistant" else None,
-                        reasoning_details=msg.get("reasoning_details") if role == "assistant" else None,
-                        codex_reasoning_items=msg.get("codex_reasoning_items") if role == "assistant" else None,
-                    )
+                self._db.replace_messages(session_id, messages)
            except Exception as e:
                logger.debug("Failed to rewrite transcript in DB: %s", e)
        
@@ -44,6 +44,14 @@ class StreamConsumerConfig:
    buffer_threshold: int = 40
    cursor: str = " ▉"
    buffer_only: bool = False
+    # When >0, the final edit for a streamed response is delivered as a
+    # fresh message if the original preview has been visible for at least
+    # this many seconds.  This makes the platform's visible timestamp
+    # reflect completion time instead of first-token time for long-running
+    # responses (e.g. reasoning models that stream slowly).  Ported from
+    # openclaw/openclaw#72038.  Default 0 = always edit in place (legacy
+    # behavior).  The gateway enables this selectively per-platform.
+    fresh_final_after_seconds: float = 0.0


 class GatewayStreamConsumer:
@@ -83,14 +91,29 @@ class GatewayStreamConsumer:
        chat_id: str,
        config: Optional[StreamConsumerConfig] = None,
        metadata: Optional[dict] = None,
+        on_new_message: Optional[callable] = None,
    ):
        self.adapter = adapter
        self.chat_id = chat_id
        self.cfg = config or StreamConsumerConfig()
        self.metadata = metadata
+        # Fired whenever a fresh content bubble is created on the platform
+        # (first-send of a new message, commentary, overflow chunk, or
+        # fallback continuation). The gateway uses this to linearize the
+        # tool-progress bubble: when content resumes after a tool batch,
+        # the next tool.started should open a NEW progress bubble below
+        # the content, not edit the old bubble above it.
+        # Called with no arguments. Exceptions are swallowed.
+        self._on_new_message = on_new_message
        self._queue: queue.Queue = queue.Queue()
        self._accumulated = ""
        self._message_id: Optional[str] = None
+        # Wall-clock timestamp (time.monotonic) when ``_message_id`` was
+        # first assigned from a successful first-send.  Used by the
+        # fresh-final logic to detect long-lived previews whose edit
+        # timestamps would be stale by completion time.  Ported from
+        # openclaw/openclaw#72038.
+        self._message_created_ts: Optional[float] = None
        self._already_sent = False
        self._edit_supported = True  # Disabled when progressive edits are no longer usable
        self._last_edit_time = 0.0
@@ -132,10 +155,21 @@ class GatewayStreamConsumer:
        if text:
            self._queue.put((_COMMENTARY, text))

+    def _notify_new_message(self) -> None:
+        """Fire the on_new_message callback, swallowing any errors."""
+        cb = self._on_new_message
+        if cb is None:
+            return
+        try:
+            cb()
+        except Exception:
+            logger.debug("on_new_message callback error", exc_info=True)
+
    def _reset_segment_state(self, *, preserve_no_edit: bool = False) -> None:
        if preserve_no_edit and self._message_id == "__no_edit__":
            return
        self._message_id = None
+        self._message_created_ts = None
        self._accumulated = ""
        self._last_sent_text = ""
        self._fallback_final_send = False
@@ -514,6 +548,9 @@ class GatewayStreamConsumer:
                self._message_id = str(result.message_id)
                self._already_sent = True
                self._last_sent_text = text
+                # Fresh content bubble — close off any stale tool bubble
+                # above so the next tool starts a new bubble below.
+                self._notify_new_message()
                return str(result.message_id)
            else:
                self._edit_supported = False
@@ -646,6 +683,9 @@ class GatewayStreamConsumer:
            sent_any_chunk = True
            last_successful_chunk = chunk
            last_message_id = result.message_id or last_message_id
+            # Each fallback chunk is a fresh platform message — notify
+            # so any stale tool-progress bubble gets closed off.
+            self._notify_new_message()

        self._message_id = last_message_id
        self._already_sent = True
@@ -729,11 +769,91 @@ class GatewayStreamConsumer:
            # tool..."), not the final response. Setting already_sent would cause
            # the final response to be incorrectly suppressed when there are
            # multiple tool calls. See: https://github.com/NousResearch/hermes-agent/issues/10454
+            if result.success:
+                # Commentary counts as fresh content — close off any
+                # stale tool bubble above it so the next tool starts a
+                # new bubble below.
+                self._notify_new_message()
            return result.success
        except Exception as e:
            logger.error("Commentary send error: %s", e)
            return False

+    def _should_send_fresh_final(self) -> bool:
+        """Return True when a long-lived preview should be replaced with a
+        fresh final message instead of an edit.
+
+        Conditions:
+        - Fresh-final is enabled (``fresh_final_after_seconds > 0``).
+        - We have a real preview message id (not the ``__no_edit__`` sentinel
+          and not ``None``).
+        - The preview has been visible for at least the configured threshold.
+
+        Ported from openclaw/openclaw#72038.
+        """
+        threshold = getattr(self.cfg, "fresh_final_after_seconds", 0.0) or 0.0
+        if threshold <= 0:
+            return False
+        if not self._message_id or self._message_id == "__no_edit__":
+            return False
+        if self._message_created_ts is None:
+            return False
+        age = time.monotonic() - self._message_created_ts
+        return age >= threshold
+
+    async def _try_fresh_final(self, text: str) -> bool:
+        """Send ``text`` as a brand-new message (best-effort delete the old
+        preview) so the platform's visible timestamp reflects completion
+        time.  Returns True on successful delivery, False on any failure so
+        the caller falls back to the normal edit path.
+
+        Ported from openclaw/openclaw#72038.
+        """
+        old_message_id = self._message_id
+        try:
+            result = await self.adapter.send(
+                chat_id=self.chat_id,
+                content=text,
+                metadata=self.metadata,
+            )
+        except Exception as e:
+            logger.debug("Fresh-final send failed, falling back to edit: %s", e)
+            return False
+        if not getattr(result, "success", False):
+            return False
+        # Successful fresh send — try to delete the stale preview so the
+        # user doesn't see the old edit-stuck message underneath.  Cleanup
+        # is best-effort; platforms that don't implement ``delete_message``
+        # just leave the preview behind (still an acceptable outcome —
+        # the visible final timestamp is the important part).
+        if old_message_id and old_message_id != "__no_edit__":
+            delete_fn = getattr(self.adapter, "delete_message", None)
+            if delete_fn is not None:
+                try:
+                    await delete_fn(self.chat_id, old_message_id)
+                except Exception as e:
+                    logger.debug(
+                        "Fresh-final preview cleanup failed (%s): %s",
+                        old_message_id, e,
+                    )
+        # Adopt the new message id as the current message so subsequent
+        # callers (e.g. overflow split loops, finalize retries) see a
+        # consistent state.
+        new_message_id = getattr(result, "message_id", None)
+        if new_message_id:
+            self._message_id = new_message_id
+            self._message_created_ts = time.monotonic()
+        else:
+            # Send succeeded but platform didn't return an id — treat the
+            # delivery as final-only and fall back to "__no_edit__" so we
+            # don't try to edit something we can't address.
+            self._message_id = "__no_edit__"
+            self._message_created_ts = None
+        self._already_sent = True
+        self._last_sent_text = text
+        self._final_response_sent = True
+        return True
+
    async def _send_or_edit(self, text: str, *, finalize: bool = False) -> bool:
        """Send or edit the streaming message.

@@ -786,6 +906,22 @@ class GatewayStreamConsumer:
                        finalize and self._adapter_requires_finalize
                    ):
                        return True
+                    # Fresh-final for long-lived previews: when finalizing
+                    # the last edit in a streaming sequence, if the
+                    # original preview has been visible for at least
+                    # ``fresh_final_after_seconds``, send the completed
+                    # reply as a fresh message so the platform's visible
+                    # timestamp reflects completion time instead of the
+                    # preview creation time.  Best-effort cleanup of the
+                    # old preview follows.  Ported from
+                    # openclaw/openclaw#72038.  Gated by config so the
+                    # legacy edit-in-place path stays the default.
+                    if (
+                        finalize
+                        and self._should_send_fresh_final()
+                        and await self._try_fresh_final(text)
+                    ):
+                        return True
                    # Edit existing message
                    result = await self.adapter.edit_message(
                        chat_id=self.chat_id,
@@ -852,6 +988,10 @@ class GatewayStreamConsumer:
                if result.success:
                    if result.message_id:
                        self._message_id = result.message_id
+                        # Track when the preview first became visible to
+                        # the user so fresh-final logic can detect stale
+                        # preview timestamps on long-running responses.
+                        self._message_created_ts = time.monotonic()
                    else:
                        self._edit_supported = False
                    self._already_sent = True
@@ -863,6 +1003,11 @@ class GatewayStreamConsumer:
                        # every delta/tool boundary when platforms accept a
                        # message but do not return an editable message id.
                        self._message_id = "__no_edit__"
+                    # Notify the gateway that a fresh content bubble was
+                    # created so any accumulated tool-progress bubble above
+                    # gets closed off — the next tool fires into a new
+                    # bubble below, preserving chronological order.
+                    self._notify_new_message()
                    return True
                else:
                    # Initial send failed — disable streaming for this session
@@ -31,8 +31,17 @@ Hermes' own session keys.
 from __future__ import annotations

 import json
+import logging
+import re
 from typing import Set

+logger = logging.getLogger(__name__)
+
+# WhatsApp JIDs are numeric (or plus-prefixed numeric) with optional
+# ``@``, ``.`` and ``:`` separators. ``\w`` is pinned to ASCII so
+# full-width digits / Unicode word chars can't sneak through.
+_SAFE_IDENTIFIER_RE = re.compile(r"^[A-Za-z0-9@.+\-]+$")
+
 from hermes_constants import get_hermes_home


@@ -81,6 +90,16 @@ def expand_whatsapp_aliases(identifier: str) -> Set[str]:
        current = queue.pop(0)
        if not current or current in resolved:
            continue
+        # Defense-in-depth: reject identifiers that could sneak path
+        # separators / traversal segments into the ``lid-mapping-{current}``
+        # filename below. The hardcoded ``lid-mapping-`` prefix already
+        # prevents escape via pathlib's component split (an attacker can't
+        # create ``lid-mapping-..`` as a real directory in session_dir), but
+        # this keeps the identifier space to the characters WhatsApp JIDs
+        # actually use and avoids depending on that filesystem-layout
+        # invariant.
+        if not _SAFE_IDENTIFIER_RE.match(current):
+            continue

        resolved.add(current)
        for suffix in ("", "_reverse"):
@@ -91,7 +110,8 @@ def expand_whatsapp_aliases(identifier: str) -> Set[str]:
                mapped = normalize_whatsapp_identifier(
                    json.loads(mapping_path.read_text(encoding="utf-8"))
                )
-            except Exception:
+            except (OSError, json.JSONDecodeError) as exc:
+                logger.debug("whatsapp_identity: failed to read %s: %s", mapping_path, exc)
                continue
            if mapped and mapped not in resolved:
                queue.append(mapped)
@@ -43,6 +43,7 @@ import yaml

 from hermes_cli.config import get_hermes_home, get_config_path, read_raw_config
 from hermes_constants import OPENROUTER_BASE_URL
+from utils import atomic_replace

 logger = logging.getLogger(__name__)

@@ -71,6 +72,14 @@ DEFAULT_AGENT_KEY_MIN_TTL_SECONDS = 30 * 60  # 30 minutes
 ACCESS_TOKEN_REFRESH_SKEW_SECONDS = 120       # refresh 2 min before expiry
 DEVICE_AUTH_POLL_INTERVAL_CAP_SECONDS = 1     # poll at most every 1s
 DEFAULT_CODEX_BASE_URL = "https://chatgpt.com/backend-api/codex"
+MINIMAX_OAUTH_CLIENT_ID = "78257093-7e40-4613-99e0-527b14b39113"
+MINIMAX_OAUTH_SCOPE = "group_id profile model.completion"
+MINIMAX_OAUTH_GRANT_TYPE = "urn:ietf:params:oauth:grant-type:user_code"
+MINIMAX_OAUTH_GLOBAL_BASE = "https://api.minimax.io"
+MINIMAX_OAUTH_CN_BASE = "https://api.minimaxi.com"
+MINIMAX_OAUTH_GLOBAL_INFERENCE = "https://api.minimax.io/anthropic"
+MINIMAX_OAUTH_CN_INFERENCE = "https://api.minimaxi.com/anthropic"
+MINIMAX_OAUTH_REFRESH_SKEW_SECONDS = 60
 DEFAULT_QWEN_BASE_URL = "https://portal.qwen.ai/v1"
 DEFAULT_GITHUB_MODELS_BASE_URL = "https://api.githubcopilot.com"
 DEFAULT_COPILOT_ACP_BASE_URL = "acp://copilot"
@@ -109,6 +118,12 @@ SERVICE_PROVIDER_NAMES: Dict[str, str] = {
 DEFAULT_GEMINI_CLOUDCODE_BASE_URL = "cloudcode-pa://google"
 GEMINI_OAUTH_ACCESS_TOKEN_REFRESH_SKEW_SECONDS = 60  # refresh 60s before expiry

+# LM Studio's default no-auth mode still requires *some* non-empty bearer for
+# the API-key code paths (auxiliary_client, runtime resolver) to treat the
+# provider as configured. This sentinel is sent only to LM Studio, never to
+# any remote service.
+LMSTUDIO_NOAUTH_PLACEHOLDER = "dummy-lm-api-key"
+

 # =============================================================================
 # Provider Registry
@@ -119,7 +134,7 @@ class ProviderConfig:
    """Describes a known inference provider."""
    id: str
    name: str
-    auth_type: str  # "oauth_device_code", "oauth_external", or "api_key"
+    auth_type: str  # "oauth_device_code", "oauth_external", "oauth_minimax", or "api_key"
    portal_base_url: str = ""
    inference_base_url: str = ""
    client_id: str = ""
@@ -159,6 +174,14 @@ PROVIDER_REGISTRY: Dict[str, ProviderConfig] = {
        auth_type="oauth_external",
        inference_base_url=DEFAULT_GEMINI_CLOUDCODE_BASE_URL,
    ),
+    "lmstudio": ProviderConfig(
+        id="lmstudio",
+        name="LM Studio",
+        auth_type="api_key",
+        inference_base_url="http://127.0.0.1:1234/v1",
+        api_key_env_vars=("LM_API_KEY",),
+        base_url_env_var="LM_BASE_URL",
+    ),
    "copilot": ProviderConfig(
        id="copilot",
        name="GitHub Copilot",
@@ -224,6 +247,14 @@ PROVIDER_REGISTRY: Dict[str, ProviderConfig] = {
        api_key_env_vars=("ARCEEAI_API_KEY",),
        base_url_env_var="ARCEE_BASE_URL",
    ),
+    "gmi": ProviderConfig(
+        id="gmi",
+        name="GMI Cloud",
+        auth_type="api_key",
+        inference_base_url="https://api.gmi-serving.com/v1",
+        api_key_env_vars=("GMI_API_KEY",),
+        base_url_env_var="GMI_BASE_URL",
+    ),
    "minimax": ProviderConfig(
        id="minimax",
        name="MiniMax",
@@ -232,6 +263,17 @@ PROVIDER_REGISTRY: Dict[str, ProviderConfig] = {
        api_key_env_vars=("MINIMAX_API_KEY",),
        base_url_env_var="MINIMAX_BASE_URL",
    ),
+    "minimax-oauth": ProviderConfig(
+        id="minimax-oauth",
+        name="MiniMax (OAuth \u00b7 minimax.io)",
+        auth_type="oauth_minimax",
+        portal_base_url=MINIMAX_OAUTH_GLOBAL_BASE,
+        inference_base_url=MINIMAX_OAUTH_GLOBAL_INFERENCE,
+        client_id=MINIMAX_OAUTH_CLIENT_ID,
+        scope=MINIMAX_OAUTH_SCOPE,
+        extra={"region": "global", "cn_portal_base_url": MINIMAX_OAUTH_CN_BASE,
+               "cn_inference_base_url": MINIMAX_OAUTH_CN_INFERENCE},
+    ),
    "anthropic": ProviderConfig(
        id="anthropic",
        name="Anthropic",
@@ -340,6 +382,14 @@ PROVIDER_REGISTRY: Dict[str, ProviderConfig] = {
        api_key_env_vars=("XIAOMI_API_KEY",),
        base_url_env_var="XIAOMI_BASE_URL",
    ),
+    "tencent-tokenhub": ProviderConfig(
+        id="tencent-tokenhub",
+        name="Tencent TokenHub",
+        auth_type="api_key",
+        inference_base_url="https://tokenhub.tencentmaas.com/v1",
+        api_key_env_vars=("TOKENHUB_API_KEY",),
+        base_url_env_var="TOKENHUB_BASE_URL",
+    ),
    "ollama-cloud": ProviderConfig(
        id="ollama-cloud",
        name="Ollama Cloud",
@@ -356,6 +406,14 @@ PROVIDER_REGISTRY: Dict[str, ProviderConfig] = {
        api_key_env_vars=(),
        base_url_env_var="BEDROCK_BASE_URL",
    ),
+    "azure-foundry": ProviderConfig(
+        id="azure-foundry",
+        name="Azure Foundry",
+        auth_type="api_key",
+        inference_base_url="",  # User-provided endpoint
+        api_key_env_vars=("AZURE_FOUNDRY_API_KEY",),
+        base_url_env_var="AZURE_FOUNDRY_BASE_URL",
+    ),
 }


@@ -459,11 +517,27 @@ def _resolve_api_key_provider_secret(
            pass
        return "", ""

+    from hermes_cli.config import get_env_value
    for env_var in pconfig.api_key_env_vars:
-        val = os.getenv(env_var, "").strip()
+        # Check both os.environ and ~/.hermes/.env file
+        val = (get_env_value(env_var) or "").strip()
        if has_usable_secret(val):
            return val, env_var

+    # Fallback: try credential pool (e.g. zai key stored via auth.json)
+    try:
+        from agent.credential_pool import load_pool
+        pool = load_pool(provider_id)
+        if pool and pool.has_credentials():
+            entry = pool.peek()
+            if entry:
+                key = getattr(entry, "access_token", "") or getattr(entry, "runtime_api_key", "")
+                key = str(key).strip()
+                if has_usable_secret(key):
+                    return key, f"credential_pool:{provider_id}"
+    except Exception:
+        pass
+
    return "", ""


@@ -788,7 +862,7 @@ def _save_auth_store(auth_store: Dict[str, Any]) -> Path:
            handle.write(payload)
            handle.flush()
            os.fsync(handle.fileno())
-        os.replace(tmp_path, auth_file)
+        atomic_replace(tmp_path, auth_file)
        try:
            dir_fd = os.open(str(auth_file.parent), os.O_RDONLY)
        except OSError:
@@ -1096,7 +1170,9 @@ def resolve_provider(
        "kimi-cn": "kimi-coding-cn", "moonshot-cn": "kimi-coding-cn",
        "step": "stepfun", "stepfun-coding-plan": "stepfun",
        "arcee-ai": "arcee", "arceeai": "arcee",
+        "gmi-cloud": "gmi", "gmicloud": "gmi",
        "minimax-china": "minimax-cn", "minimax_cn": "minimax-cn",
+        "minimax-portal": "minimax-oauth", "minimax-global": "minimax-oauth", "minimax_oauth": "minimax-oauth",
        "alibaba_coding": "alibaba-coding-plan", "alibaba-coding": "alibaba-coding-plan",
        "alibaba_coding_plan": "alibaba-coding-plan",
        "claude": "anthropic", "claude-code": "anthropic",
@@ -1108,11 +1184,13 @@ def resolve_provider(
        "qwen-portal": "qwen-oauth", "qwen-cli": "qwen-oauth", "qwen-oauth": "qwen-oauth", "google-gemini-cli": "google-gemini-cli", "gemini-cli": "google-gemini-cli", "gemini-oauth": "google-gemini-cli",
        "hf": "huggingface", "hugging-face": "huggingface", "huggingface-hub": "huggingface",
        "mimo": "xiaomi", "xiaomi-mimo": "xiaomi",
+        "tencent": "tencent-tokenhub", "tokenhub": "tencent-tokenhub",
+        "tencent-cloud": "tencent-tokenhub", "tencentmaas": "tencent-tokenhub",
        "aws": "bedrock", "aws-bedrock": "bedrock", "amazon-bedrock": "bedrock", "amazon": "bedrock",
        "go": "opencode-go", "opencode-go-sub": "opencode-go",
        "kilo": "kilocode", "kilo-code": "kilocode", "kilo-gateway": "kilocode",
+        "lmstudio": "lmstudio", "lm-studio": "lmstudio", "lm_studio": "lmstudio",
        # Local server aliases — route through the generic custom provider
-        "lmstudio": "custom", "lm-studio": "custom", "lm_studio": "custom",
        "ollama": "custom", "ollama_cloud": "ollama-cloud",
        "vllm": "custom", "llamacpp": "custom",
        "llama.cpp": "custom", "llama-cpp": "custom",
@@ -1159,8 +1237,11 @@ def resolve_provider(
            continue
        # GitHub tokens are commonly present for repo/tool access but should not
        # hijack inference auto-selection unless the user explicitly chooses
-        # Copilot/GitHub Models as the provider.
-        if pid == "copilot":
+        # Copilot/GitHub Models as the provider. LM Studio is a local server
+        # whose availability isn't implied by LM_API_KEY presence (it may be
+        # offline, and the no-auth setup uses a placeholder value), so it
+        # also requires explicit selection.
+        if pid in ("copilot", "lmstudio"):
            continue
        for env_var in pconfig.api_key_env_vars:
            if has_usable_secret(os.getenv(env_var, "")):
@@ -3438,6 +3519,13 @@ def resolve_api_key_provider_credentials(provider_id: str) -> Dict[str, Any]:
    key_source = ""
    api_key, key_source = _resolve_api_key_provider_secret(provider_id, pconfig)

+    # No-auth LM Studio: substitute a placeholder so runtime / auxiliary_client
+    # see the local server as configured. doctor still reports unconfigured
+    # because get_api_key_provider_status uses the raw secret resolver.
+    if not api_key and provider_id == "lmstudio":
+        api_key = LMSTUDIO_NOAUTH_PLACEHOLDER
+        key_source = key_source or "default"
+
    env_url = ""
    if pconfig.base_url_env_var:
        env_url = os.getenv(pconfig.base_url_env_var, "").strip()
@@ -4048,6 +4136,326 @@ def _codex_device_code_login() -> Dict[str, Any]:
    }


+# ==================== MiniMax Portal OAuth ====================
+
+def _minimax_pkce_pair() -> tuple:
+    """Generate (code_verifier, code_challenge_S256, state) for MiniMax OAuth."""
+    import secrets
+    verifier = secrets.token_urlsafe(64)[:96]
+    challenge = base64.urlsafe_b64encode(
+        hashlib.sha256(verifier.encode()).digest()
+    ).decode().rstrip("=")
+    state = secrets.token_urlsafe(16)
+    return verifier, challenge, state
+
+
+def _minimax_request_user_code(
+    client: httpx.Client, *, portal_base_url: str, client_id: str,
+    code_challenge: str, state: str,
+) -> Dict[str, Any]:
+    response = client.post(
+        f"{portal_base_url}/oauth/code",
+        data={
+            "response_type": "code",
+            "client_id": client_id,
+            "scope": MINIMAX_OAUTH_SCOPE,
+            "code_challenge": code_challenge,
+            "code_challenge_method": "S256",
+            "state": state,
+        },
+        headers={
+            "Content-Type": "application/x-www-form-urlencoded",
+            "Accept": "application/json",
+            "x-request-id": str(uuid.uuid4()),
+        },
+    )
+    if response.status_code != 200:
+        raise AuthError(
+            f"MiniMax OAuth authorization failed: {response.text or response.reason_phrase}",
+            provider="minimax-oauth", code="authorization_failed",
+        )
+    payload = response.json()
+    for field in ("user_code", "verification_uri", "expired_in"):
+        if field not in payload:
+            raise AuthError(
+                f"MiniMax OAuth response missing field: {field}",
+                provider="minimax-oauth", code="authorization_incomplete",
+            )
+    if payload.get("state") != state:
+        raise AuthError(
+            "MiniMax OAuth state mismatch (possible CSRF).",
+            provider="minimax-oauth", code="state_mismatch",
+        )
+    return payload
+
+
+def _minimax_poll_token(
+    client: httpx.Client, *, portal_base_url: str, client_id: str,
+    user_code: str, code_verifier: str, expired_in: int, interval_ms: Optional[int],
+) -> Dict[str, Any]:
+    # OpenClaw treats expired_in as a unix-ms timestamp (Date.now() < expireTimeMs).
+    # Defensive parsing: if it's small enough to be a duration, treat as seconds.
+    import time as _time
+    now_ms = int(_time.time() * 1000)
+    if expired_in > now_ms // 2:
+        # Looks like a unix-ms timestamp.
+        deadline = expired_in / 1000.0
+    else:
+        # Treat as duration in seconds from now.
+        deadline = _time.time() + max(1, expired_in)
+    interval = max(2.0, (interval_ms or 2000) / 1000.0)
+
+    while _time.time() < deadline:
+        response = client.post(
+            f"{portal_base_url}/oauth/token",
+            data={
+                "grant_type": MINIMAX_OAUTH_GRANT_TYPE,
+                "client_id": client_id,
+                "user_code": user_code,
+                "code_verifier": code_verifier,
+            },
+            headers={
+                "Content-Type": "application/x-www-form-urlencoded",
+                "Accept": "application/json",
+            },
+        )
+        try:
+            payload = response.json() if response.text else {}
+        except Exception:
+            payload = {}
+
+        if response.status_code != 200:
+            msg = (payload.get("base_resp", {}) or {}).get("status_msg") or response.text
+            raise AuthError(
+                f"MiniMax OAuth error: {msg or 'unknown'}",
+                provider="minimax-oauth", code="token_exchange_failed",
+            )
+
+        status = payload.get("status")
+        if status == "error":
+            raise AuthError(
+                "MiniMax OAuth reported an error. Please try again later.",
+                provider="minimax-oauth", code="authorization_denied",
+            )
+        if status == "success":
+            if not all(payload.get(k) for k in ("access_token", "refresh_token", "expired_in")):
+                raise AuthError(
+                    "MiniMax OAuth success payload missing required token fields.",
+                    provider="minimax-oauth", code="token_incomplete",
+                )
+            return payload
+        # "pending" or any other status -> keep polling
+        _time.sleep(interval)
+
+    raise AuthError(
+        "MiniMax OAuth timed out before authorization completed.",
+        provider="minimax-oauth", code="timeout",
+    )
+
+
+def _minimax_save_auth_state(auth_state: Dict[str, Any]) -> None:
+    """Persist MiniMax OAuth state to Hermes auth store (~/.hermes/auth.json)."""
+    with _auth_store_lock():
+        auth_store = _load_auth_store()
+        _save_provider_state(auth_store, "minimax-oauth", auth_state)
+        _save_auth_store(auth_store)
+
+
+def _minimax_oauth_login(
+    *, region: str = "global", open_browser: bool = True,
+    timeout_seconds: float = 15.0,
+) -> Dict[str, Any]:
+    """Run MiniMax OAuth flow, persist tokens, return auth state dict."""
+    pconfig = PROVIDER_REGISTRY["minimax-oauth"]
+    if region == "cn":
+        portal_base_url = pconfig.extra["cn_portal_base_url"]
+        inference_base_url = pconfig.extra["cn_inference_base_url"]
+    else:
+        portal_base_url = pconfig.portal_base_url
+        inference_base_url = pconfig.inference_base_url
+
+    verifier, challenge, state = _minimax_pkce_pair()
+
+    if _is_remote_session():
+        open_browser = False
+
+    print(f"Starting Hermes login via MiniMax ({region}) OAuth...")
+    print(f"Portal: {portal_base_url}")
+
+    with httpx.Client(timeout=httpx.Timeout(timeout_seconds),
+                      headers={"Accept": "application/json"}) as client:
+        code_data = _minimax_request_user_code(
+            client, portal_base_url=portal_base_url,
+            client_id=pconfig.client_id,
+            code_challenge=challenge, state=state,
+        )
+        verification_url = str(code_data["verification_uri"])
+        user_code = str(code_data["user_code"])
+
+        print()
+        print("To continue:")
+        print(f"  1. Open: {verification_url}")
+        print(f"  2. If prompted, enter code: {user_code}")
+        if open_browser:
+            if webbrowser.open(verification_url):
+                print("  (Opened browser for verification)")
+            else:
+                print("  Could not open browser automatically -- use the URL above.")
+
+        interval_raw = code_data.get("interval")
+        interval_ms = int(interval_raw) if interval_raw is not None else None
+        print("Waiting for approval...")
+
+        token_data = _minimax_poll_token(
+            client, portal_base_url=portal_base_url,
+            client_id=pconfig.client_id,
+            user_code=user_code, code_verifier=verifier,
+            expired_in=int(code_data["expired_in"]),
+            interval_ms=interval_ms,
+        )
+
+    now = datetime.now(timezone.utc)
+    expires_in_s = int(token_data["expired_in"])
+    expires_at = now.timestamp() + expires_in_s
+
+    auth_state = {
+        "provider": "minimax-oauth",
+        "region": region,
+        "portal_base_url": portal_base_url,
+        "inference_base_url": inference_base_url,
+        "client_id": pconfig.client_id,
+        "scope": MINIMAX_OAUTH_SCOPE,
+        "token_type": token_data.get("token_type", "Bearer"),
+        "access_token": token_data["access_token"],
+        "refresh_token": token_data["refresh_token"],
+        "resource_url": token_data.get("resource_url"),
+        "obtained_at": now.isoformat(),
+        "expires_at": datetime.fromtimestamp(expires_at, tz=timezone.utc).isoformat(),
+        "expires_in": expires_in_s,
+    }
+
+    _minimax_save_auth_state(auth_state)
+    print("\u2713 MiniMax OAuth login successful.")
+    if msg := token_data.get("notification_message"):
+        print(f"Note from MiniMax: {msg}")
+    return auth_state
+
+
+def _refresh_minimax_oauth_state(
+    state: Dict[str, Any], *, timeout_seconds: float = 15.0,
+    force: bool = False,
+) -> Dict[str, Any]:
+    """Refresh MiniMax OAuth access token if close to expiry (or forced)."""
+    if not state.get("refresh_token"):
+        raise AuthError(
+            "MiniMax OAuth state has no refresh_token; please re-login.",
+            provider="minimax-oauth", code="no_refresh_token", relogin_required=True,
+        )
+    try:
+        expires_at = datetime.fromisoformat(state.get("expires_at", "")).timestamp()
+    except Exception:
+        expires_at = 0.0
+    now = time.time()
+    if not force and (expires_at - now) > MINIMAX_OAUTH_REFRESH_SKEW_SECONDS:
+        return state
+
+    portal_base_url = state["portal_base_url"]
+    with httpx.Client(timeout=httpx.Timeout(timeout_seconds)) as client:
+        response = client.post(
+            f"{portal_base_url}/oauth/token",
+            data={
+                "grant_type": "refresh_token",
+                "client_id": state["client_id"],
+                "refresh_token": state["refresh_token"],
+            },
+            headers={
+                "Content-Type": "application/x-www-form-urlencoded",
+                "Accept": "application/json",
+            },
+        )
+    if response.status_code != 200:
+        body = response.text.lower()
+        relogin = any(m in body for m in
+                      ("invalid_grant", "refresh_token_reused", "invalid_refresh_token"))
+        raise AuthError(
+            f"MiniMax OAuth refresh failed: {response.text or response.reason_phrase}",
+            provider="minimax-oauth", code="refresh_failed",
+            relogin_required=relogin,
+        )
+    payload = response.json()
+    if payload.get("status") != "success":
+        raise AuthError(
+            "MiniMax OAuth refresh did not return success.",
+            provider="minimax-oauth", code="refresh_failed",
+            relogin_required=True,
+        )
+    now_dt = datetime.now(timezone.utc)
+    expires_in_s = int(payload["expired_in"])
+    new_state = dict(state)
+    new_state.update({
+        "access_token": payload["access_token"],
+        "refresh_token": payload.get("refresh_token", state["refresh_token"]),
+        "obtained_at": now_dt.isoformat(),
+        "expires_at": datetime.fromtimestamp(now_dt.timestamp() + expires_in_s,
+                                             tz=timezone.utc).isoformat(),
+        "expires_in": expires_in_s,
+    })
+    _minimax_save_auth_state(new_state)
+    return new_state
+
+
+def resolve_minimax_oauth_runtime_credentials(
+    *, min_token_ttl_seconds: int = MINIMAX_OAUTH_REFRESH_SKEW_SECONDS,
+) -> Dict[str, Any]:
+    """Return {provider, api_key, base_url, source} for minimax-oauth."""
+    state = get_provider_auth_state("minimax-oauth")
+    if not state or not state.get("access_token"):
+        raise AuthError(
+            "Not logged into MiniMax OAuth. Run `hermes model` and select "
+            "MiniMax (OAuth).",
+            provider="minimax-oauth", code="not_logged_in", relogin_required=True,
+        )
+    state = _refresh_minimax_oauth_state(state)
+    return {
+        "provider": "minimax-oauth",
+        "api_key": state["access_token"],
+        "base_url": state["inference_base_url"].rstrip("/"),
+        "source": "oauth",
+    }
+
+
+def get_minimax_oauth_auth_status() -> Dict[str, Any]:
+    """Return auth status dict for MiniMax OAuth provider."""
+    state = get_provider_auth_state("minimax-oauth")
+    if not state or not state.get("access_token"):
+        return {"logged_in": False, "provider": "minimax-oauth"}
+    try:
+        expires_at = datetime.fromisoformat(state.get("expires_at", "")).timestamp()
+        token_valid = (expires_at - time.time()) > 0
+    except Exception:
+        token_valid = bool(state.get("access_token"))
+    return {
+        "logged_in": token_valid,
+        "provider": "minimax-oauth",
+        "region": state.get("region", "global"),
+        "expires_at": state.get("expires_at"),
+    }
+
+
+def _login_minimax_oauth(args, pconfig: ProviderConfig) -> None:
+    """CLI entry for MiniMax OAuth login."""
+    region = getattr(args, "region", None) or "global"
+    open_browser = not getattr(args, "no_browser", False)
+    timeout = getattr(args, "timeout", None) or 15.0
+    try:
+        _minimax_oauth_login(
+            region=region, open_browser=open_browser, timeout_seconds=timeout,
+        )
+    except AuthError as exc:
+        print(format_auth_error(exc))
+        raise SystemExit(1)
+
+
 def _nous_device_code_login(
    *,
    portal_base_url: Optional[str] = None,
@@ -4236,10 +4644,10 @@ def _login_nous(args, pconfig: ProviderConfig) -> None:
                )

            from hermes_cli.models import (
-                _PROVIDER_MODELS, get_pricing_for_provider,
+                get_curated_nous_model_ids, get_pricing_for_provider,
                check_nous_free_tier, partition_nous_models_by_tier,
            )
-            model_ids = _PROVIDER_MODELS.get("nous", [])
+            model_ids = get_curated_nous_model_ids()

            print()
            unavailable_models: list = []
@@ -33,7 +33,7 @@ from hermes_constants import OPENROUTER_BASE_URL


 # Providers that support OAuth login in addition to API keys.
-_OAUTH_CAPABLE_PROVIDERS = {"anthropic", "nous", "openai-codex", "qwen-oauth", "google-gemini-cli"}
+_OAUTH_CAPABLE_PROVIDERS = {"anthropic", "nous", "openai-codex", "qwen-oauth", "google-gemini-cli", "minimax-oauth"}


 def _get_custom_provider_names() -> list:
@@ -170,7 +170,7 @@ def auth_add_command(args) -> None:
        if provider.startswith(CUSTOM_POOL_PREFIX):
            requested_type = AUTH_TYPE_API_KEY
        else:
-            requested_type = AUTH_TYPE_OAUTH if provider in {"anthropic", "nous", "openai-codex", "qwen-oauth", "google-gemini-cli"} else AUTH_TYPE_API_KEY
+            requested_type = AUTH_TYPE_OAUTH if provider in {"anthropic", "nous", "openai-codex", "qwen-oauth", "google-gemini-cli", "minimax-oauth"} else AUTH_TYPE_API_KEY

    pool = load_pool(provider)

@@ -333,6 +333,27 @@ def auth_add_command(args) -> None:
        print(f'Added {provider} OAuth credential #{len(pool.entries())}: "{entry.label}"')
        return

+    if provider == "minimax-oauth":
+        from hermes_cli.auth import resolve_minimax_oauth_runtime_credentials
+        creds = resolve_minimax_oauth_runtime_credentials()
+        label = (getattr(args, "label", None) or "").strip() or label_from_token(
+            creds["api_key"],
+            _oauth_default_label(provider, len(pool.entries()) + 1),
+        )
+        entry = PooledCredential(
+            provider=provider,
+            id=uuid.uuid4().hex[:6],
+            label=label,
+            auth_type=AUTH_TYPE_OAUTH,
+            priority=0,
+            source=f"{SOURCE_MANUAL}:minimax_oauth",
+            access_token=creds["api_key"],
+            base_url=creds.get("base_url"),
+        )
+        pool.add_entry(entry)
+        print(f'Added {provider} OAuth credential #{len(pool.entries())}: "{entry.label}"')
+        return
+
    raise SystemExit(f"`hermes auth add {provider}` is not implemented for auth type {requested_type} yet.")


@@ -0,0 +1,300 @@
+"""Azure Foundry endpoint auto-detection.
+
+Inspect an Azure AI Foundry / Azure OpenAI endpoint to determine:
+  - API transport (OpenAI-style ``chat_completions`` vs
+    Anthropic-style ``anthropic_messages``)
+  - Available models (best effort — Azure does not expose a deployment
+    listing via the inference API key, but Azure OpenAI v1 endpoints
+    return the resource's model catalog via ``GET /models``)
+  - Context length for each discovered/entered model, via the existing
+    :func:`agent.model_metadata.get_model_context_length` resolver.
+
+Rationale:
+
+Azure has no pure-API-key deployment-listing endpoint — per Microsoft,
+deployment enumeration requires ARM management-plane auth.  Azure
+OpenAI v1 endpoints ``{resource}.openai.azure.com/openai/v1`` do return
+a ``/models`` list, but it reflects the resource's *available* models
+rather than the user's *deployed* deployment names.  In practice it is
+still a useful hint — the user picks a familiar model name and we look
+up its context length from the catalog.
+
+The detector never crashes on errors (every HTTP call is wrapped in a
+broad try/except).  Callers get a :class:`DetectionResult` with whatever
+information could be gathered, and fall back to manual entry for the
+rest.
+"""
+
+from __future__ import annotations
+
+import json
+import logging
+import re
+from dataclasses import dataclass, field
+from typing import Optional
+from urllib import request as urllib_request
+from urllib.error import HTTPError, URLError
+from urllib.parse import urlparse
+
+logger = logging.getLogger(__name__)
+
+
+# Default Azure OpenAI ``api-version`` to probe with.  The v1 GA endpoint
+# accepts requests without ``api-version`` entirely, so this is only used
+# as a fallback for pre-v1 resources that still require it.
+_AZURE_OPENAI_PROBE_API_VERSIONS = (
+    "2025-04-01-preview",
+    "2024-10-21",  # oldest GA that supports /models
+)
+
+# Default Azure Anthropic ``api-version``.  Matches the value used by
+# ``agent/anthropic_adapter.py`` when building the Anthropic client.
+_AZURE_ANTHROPIC_API_VERSION = "2025-04-15"
+
+
+@dataclass
+class DetectionResult:
+    """Everything auto-detection could gather from a base URL + API key."""
+
+    #: Detected API transport: ``"chat_completions"``,
+    #: ``"anthropic_messages"``, or ``None`` when detection failed.
+    api_mode: Optional[str] = None
+
+    #: Deployment / model IDs returned by ``/models`` (best effort).
+    #: Empty when the endpoint doesn't expose the list with an API key.
+    models: list[str] = field(default_factory=list)
+
+    #: Lowercased host from the base URL (used for display messages).
+    hostname: str = ""
+
+    #: Human-readable reason the detector chose ``api_mode``.  Useful
+    #: for explaining auto-detection to the user in the wizard.
+    reason: str = ""
+
+    #: ``True`` when ``/models`` returned a valid OpenAI-shaped payload.
+    models_probe_ok: bool = False
+
+    #: ``True`` when the URL was determined to be an Anthropic-style
+    #: endpoint (from path suffix or live probe).
+    is_anthropic: bool = False
+
+
+def _http_get_json(url: str, api_key: str, timeout: float = 6.0) -> tuple[int, Optional[dict]]:
+    """GET a URL with ``api-key`` + ``Authorization`` headers.  Return
+    ``(status_code, parsed_json_or_None)``.  Never raises."""
+    req = urllib_request.Request(url, method="GET")
+    # Azure OpenAI uses ``api-key``.  Some Azure deployments (and
+    # Anthropic-style routes) use ``Authorization: Bearer``.  Send both
+    # so we probe once per URL rather than twice.
+    req.add_header("api-key", api_key)
+    req.add_header("Authorization", f"Bearer {api_key}")
+    req.add_header("User-Agent", "hermes-agent/azure-detect")
+    try:
+        with urllib_request.urlopen(req, timeout=timeout) as resp:
+            body = resp.read()
+            try:
+                return resp.status, json.loads(body.decode("utf-8", errors="replace"))
+            except Exception:
+                return resp.status, None
+    except HTTPError as exc:
+        return exc.code, None
+    except (URLError, TimeoutError, OSError) as exc:
+        logger.debug("azure_detect: GET %s failed: %s", url, exc)
+        return 0, None
+    except Exception as exc:  # pragma: no cover — defensive
+        logger.debug("azure_detect: GET %s unexpected error: %s", url, exc)
+        return 0, None
+
+
+def _strip_trailing_v1(url: str) -> str:
+    """Strip trailing ``/v1`` or ``/v1/`` so we can construct sub-paths."""
+    return re.sub(r"/v1/?$", "", url.rstrip("/"))
+
+
+def _looks_like_anthropic_path(url: str) -> bool:
+    """Return True when the URL's path ends in ``/anthropic`` or
+    contains a ``/anthropic/`` segment.  Used by Azure Foundry
+    resources that route Claude traffic through a dedicated path."""
+    try:
+        parsed = urlparse(url)
+        path = (parsed.path or "").lower().rstrip("/")
+        return path.endswith("/anthropic") or "/anthropic/" in path + "/"
+    except Exception:
+        return False
+
+
+def _extract_model_ids(payload: dict) -> list[str]:
+    """Extract a list of model IDs from an OpenAI-shaped ``/models``
+    response.  Returns ``[]`` on any shape mismatch."""
+    data = payload.get("data") if isinstance(payload, dict) else None
+    if not isinstance(data, list):
+        return []
+    ids: list[str] = []
+    for item in data:
+        if not isinstance(item, dict):
+            continue
+        # OpenAI shape: {"id": "gpt-5.4", "object": "model", ...}
+        mid = item.get("id") or item.get("model") or item.get("name")
+        if isinstance(mid, str) and mid:
+            ids.append(mid)
+    return ids
+
+
+def _probe_openai_models(base_url: str, api_key: str) -> tuple[bool, list[str]]:
+    """Probe ``<base>/models`` for an OpenAI-shaped response.
+
+    Returns ``(ok, models)``.  ``ok`` is True iff the endpoint accepted
+    us as an OpenAI-style caller (200 OK + OpenAI-shaped JSON body).
+    """
+    base_url = base_url.rstrip("/")
+
+    # Azure OpenAI v1: {resource}.openai.azure.com/openai/v1 — no
+    # api-version required for GA paths, so probe without first.
+    candidates = [f"{base_url}/models"]
+    # Fallback: explicit api-version for pre-v1 resources
+    for v in _AZURE_OPENAI_PROBE_API_VERSIONS:
+        candidates.append(f"{base_url}/models?api-version={v}")
+
+    for url in candidates:
+        status, body = _http_get_json(url, api_key)
+        if status == 200 and body is not None:
+            ids = _extract_model_ids(body)
+            if ids:
+                logger.info(
+                    "azure_detect: /models probe OK at %s (%d models)",
+                    url, len(ids),
+                )
+                return True, ids
+            # 200 + empty list still counts as "OpenAI shape, no models
+            # listed" — let the user proceed with manual entry.
+            if isinstance(body, dict) and "data" in body:
+                return True, []
+    return False, []
+
+
+def _probe_anthropic_messages(base_url: str, api_key: str) -> bool:
+    """Send a zero-token request to ``<base>/v1/messages`` and check
+    whether the endpoint at least *recognises* the Anthropic Messages
+    shape (any 4xx that mentions ``messages`` or ``model``, or a 400
+    ``invalid_request`` with an Anthropic error shape).  Never completes
+    a real chat.
+    """
+    base = _strip_trailing_v1(base_url)
+    url = f"{base}/v1/messages?api-version={_AZURE_ANTHROPIC_API_VERSION}"
+    payload = json.dumps({
+        "model": "probe",
+        "max_tokens": 1,
+        "messages": [{"role": "user", "content": "ping"}],
+    }).encode("utf-8")
+    req = urllib_request.Request(url, method="POST", data=payload)
+    req.add_header("api-key", api_key)
+    req.add_header("Authorization", f"Bearer {api_key}")
+    req.add_header("anthropic-version", "2023-06-01")
+    req.add_header("content-type", "application/json")
+    req.add_header("User-Agent", "hermes-agent/azure-detect")
+    try:
+        with urllib_request.urlopen(req, timeout=6.0) as resp:
+            # Should never 200 — "probe" isn't a real deployment.  But
+            # if it does, the endpoint definitely speaks Anthropic.
+            return resp.status < 500
+    except HTTPError as exc:
+        # 4xx with an Anthropic-shaped error body = Anthropic endpoint.
+        try:
+            body = exc.read().decode("utf-8", errors="replace")
+            lowered = body.lower()
+            if "anthropic" in lowered or '"type"' in lowered and '"error"' in lowered:
+                return True
+            # Pre-Azure-v1 Azure Foundry returns a plain 404 for
+            # Anthropic-style calls on non-Anthropic deployments.  A
+            # 400 "model not found" IS Anthropic though.
+            if exc.code == 400 and ("messages" in lowered or "model" in lowered):
+                return True
+            return False
+        except Exception:
+            return False
+    except (URLError, TimeoutError, OSError):
+        return False
+    except Exception:  # pragma: no cover
+        return False
+
+
+def detect(base_url: str, api_key: str) -> DetectionResult:
+    """Inspect an Azure endpoint and describe its transport + models.
+
+    Call this from the wizard before asking the user to pick an API
+    mode manually.  The caller should treat the returned
+    :class:`DetectionResult` as *advisory* — if ``api_mode`` is None,
+    fall back to asking the user.
+    """
+    result = DetectionResult()
+
+    try:
+        parsed = urlparse(base_url)
+        result.hostname = (parsed.hostname or "").lower()
+    except Exception:
+        result.hostname = ""
+
+    # 1. Path sniff.  Azure Foundry exposes Anthropic-style deployments
+    #    under a dedicated ``/anthropic`` path.
+    if _looks_like_anthropic_path(base_url):
+        result.is_anthropic = True
+        result.api_mode = "anthropic_messages"
+        result.reason = "URL path ends in /anthropic → Anthropic Messages API"
+        return result
+
+    # 2. Try the OpenAI-style /models probe.  If this works, the
+    #    endpoint definitely speaks OpenAI wire.
+    ok, models = _probe_openai_models(base_url, api_key)
+    if ok:
+        result.models_probe_ok = True
+        result.models = models
+        result.api_mode = "chat_completions"
+        result.reason = (
+            f"GET /models returned {len(models)} model(s) — OpenAI-style endpoint"
+            if models
+            else "GET /models returned an OpenAI-shaped empty list — OpenAI-style endpoint"
+        )
+        return result
+
+    # 3. Fallback: probe the Anthropic Messages shape.  Slower and more
+    #    intrusive than /models, so only run it when the OpenAI probe
+    #    failed.
+    if _probe_anthropic_messages(base_url, api_key):
+        result.is_anthropic = True
+        result.api_mode = "anthropic_messages"
+        result.reason = "Endpoint accepts Anthropic Messages shape"
+        return result
+
+    # Nothing matched.  Caller falls back to manual selection.
+    result.reason = (
+        "Could not probe endpoint (private network, missing model list, or "
+        "non-standard path) — falling back to manual API-mode selection"
+    )
+    return result
+
+
+def lookup_context_length(model: str, base_url: str, api_key: str) -> Optional[int]:
+    """Thin wrapper around :func:`agent.model_metadata.get_model_context_length`
+    that returns ``None`` when only the fallback default (128k) would
+    fire, so the wizard can distinguish "we actually know this" from
+    "we guessed."""
+    try:
+        from agent.model_metadata import (
+            DEFAULT_FALLBACK_CONTEXT,
+            get_model_context_length,
+        )
+    except Exception:
+        return None
+
+    try:
+        n = get_model_context_length(model, base_url=base_url, api_key=api_key)
+    except Exception as exc:
+        logger.debug("azure_detect: context length lookup failed: %s", exc)
+        return None
+
+    if isinstance(n, int) and n > 0 and n != DEFAULT_FALLBACK_CONTEXT:
+        return n
+    return None
+
+
+__all__ = ["DetectionResult", "detect", "lookup_context_length"]
@@ -36,12 +36,23 @@ _EXCLUDED_DIRS = {
    "__pycache__",      # bytecode caches — regenerated on import
    ".git",             # nested git dirs (profiles shouldn't have these, but safety)
    "node_modules",     # js deps if website/ somehow leaks in
+    "backups",          # prior auto-backups — don't nest backups exponentially
+    "checkpoints",      # session-local trajectory caches — regenerated per-session,
+                        # session-hash-keyed so they don't port to another machine anyway
 }

 # File-name suffixes to skip
 _EXCLUDED_SUFFIXES = (
    ".pyc",
    ".pyo",
+    # SQLite sidecar files — the backup takes a consistent snapshot of ``*.db``
+    # via ``sqlite3.backup()``, so shipping the live WAL / shared-memory /
+    # rollback-journal alongside would pair a fresh snapshot with stale sidecar
+    # state and produce a torn restore on the next open. They're transient and
+    # regenerated on first connection anyway.
+    ".db-wal",
+    ".db-shm",
+    ".db-journal",
 )

 # File names to skip (runtime state that's meaningless on another machine)
@@ -454,6 +465,12 @@ def run_import(args) -> None:
 # Critical state files to include in quick snapshots (relative to HERMES_HOME).
 # Everything else is either regeneratable (logs, cache) or managed separately
 # (skills, repo, sessions/).
+#
+# Entries may be individual files OR directories.  Directories are captured
+# recursively; missing entries are silently skipped.  Pairing data lives in
+# platform-specific JSON blobs outside state.db, so it's listed here explicitly
+# — `hermes update` snapshots this set before pulling so approved-user lists
+# are recoverable if anything goes wrong (issue #15733).
 _QUICK_STATE_FILES = (
    "state.db",
    "config.yaml",
@@ -463,6 +480,10 @@ _QUICK_STATE_FILES = (
    "gateway_state.json",
    "channel_directory.json",
    "processes.json",
+    # Pairing stores (generic + per-platform JSONs outside state.db)
+    "pairing",                          # legacy location (gateway/pairing.py)
+    "platforms/pairing",                # new location (gateway/pairing.py)
+    "feishu_comment_pairing.json",      # Feishu comment subscription pairings
 )

 _QUICK_SNAPSHOTS_DIR = "state-snapshots"
@@ -498,7 +519,27 @@ def create_quick_snapshot(

    for rel in _QUICK_STATE_FILES:
        src = home / rel
-        if not src.exists() or not src.is_file():
+        if not src.exists():
+            continue
+
+        if src.is_dir():
+            # Walk the directory and record each file individually in the
+            # manifest so restore can treat them uniformly.  Empty dirs are
+            # skipped (nothing to snapshot).
+            for sub in src.rglob("*"):
+                if not sub.is_file():
+                    continue
+                sub_rel = sub.relative_to(home).as_posix()
+                dst = snap_dir / sub_rel
+                dst.parent.mkdir(parents=True, exist_ok=True)
+                try:
+                    shutil.copy2(sub, dst)
+                    manifest[sub_rel] = dst.stat().st_size
+                except (OSError, PermissionError) as exc:
+                    logger.warning("Could not snapshot %s: %s", sub_rel, exc)
+            continue
+
+        if not src.is_file():
            continue

        dst = snap_dir / rel
@@ -653,3 +694,233 @@ def run_quick_backup(args) -> None:
        print(f"  Restore with: /snapshot restore {snap_id}")
    else:
        print("No state files found to snapshot.")
+
+
+# ---------------------------------------------------------------------------
+# Shared full-zip backup helper
+# ---------------------------------------------------------------------------
+
+def _write_full_zip_backup(out_path: Path, hermes_root: Path) -> Optional[Path]:
+    """Write a full zip snapshot of ``hermes_root`` to ``out_path``.
+
+    Uses the same exclusion rules and SQLite safe-copy as :func:`run_backup`.
+    Returns the output path on success, None on failure (nothing to back up,
+    or write error — caller should surface the outcome but not raise).
+    """
+    files_to_add: list[tuple[Path, Path]] = []
+    try:
+        for dirpath, dirnames, filenames in os.walk(hermes_root, followlinks=False):
+            dp = Path(dirpath)
+            # Prune excluded directories in-place so os.walk doesn't descend
+            dirnames[:] = [d for d in dirnames if d not in _EXCLUDED_DIRS]
+
+            for fname in filenames:
+                fpath = dp / fname
+                try:
+                    rel = fpath.relative_to(hermes_root)
+                except ValueError:
+                    continue
+
+                if _should_exclude(rel):
+                    continue
+
+                # Skip the output zip itself if it already exists inside root.
+                try:
+                    if fpath.resolve() == out_path.resolve():
+                        continue
+                except (OSError, ValueError):
+                    pass
+
+                files_to_add.append((fpath, rel))
+    except OSError as exc:
+        logger.warning("Full-zip backup: walk failed: %s", exc)
+        return None
+
+    if not files_to_add:
+        return None
+
+    try:
+        with zipfile.ZipFile(out_path, "w", zipfile.ZIP_DEFLATED, compresslevel=6) as zf:
+            for abs_path, rel_path in files_to_add:
+                try:
+                    if abs_path.suffix == ".db":
+                        with tempfile.NamedTemporaryFile(suffix=".db", delete=False) as tmp:
+                            tmp_db = Path(tmp.name)
+                        try:
+                            if _safe_copy_db(abs_path, tmp_db):
+                                zf.write(tmp_db, arcname=str(rel_path))
+                        finally:
+                            tmp_db.unlink(missing_ok=True)
+                    else:
+                        zf.write(abs_path, arcname=str(rel_path))
+                except (PermissionError, OSError, ValueError) as exc:
+                    logger.debug("Skipping %s in zip backup: %s", rel_path, exc)
+                    continue
+    except OSError as exc:
+        logger.warning("Full-zip backup: zip write failed: %s", exc)
+        # Best-effort cleanup of partial file
+        try:
+            out_path.unlink(missing_ok=True)
+        except OSError:
+            pass
+        return None
+
+    return out_path
+
+
+# ---------------------------------------------------------------------------
+# Pre-update auto-backup
+# ---------------------------------------------------------------------------
+
+_PRE_UPDATE_BACKUPS_DIR = "backups"
+_PRE_UPDATE_PREFIX = "pre-update-"
+_PRE_UPDATE_DEFAULT_KEEP = 5
+
+
+def _pre_update_backup_dir(hermes_home: Optional[Path] = None) -> Path:
+    home = hermes_home or get_hermes_home()
+    return home / _PRE_UPDATE_BACKUPS_DIR
+
+
+def _prune_pre_update_backups(backup_dir: Path, keep: int) -> int:
+    """Remove oldest pre-update backups beyond the keep limit.
+
+    Returns the number of files deleted.  Only touches files matching
+    ``pre-update-*.zip`` so hand-made zips dropped in the same directory
+    are never touched.
+    """
+    if keep < 0:
+        keep = 0
+    if not backup_dir.exists():
+        return 0
+
+    backups = sorted(
+        (p for p in backup_dir.iterdir()
+         if p.is_file() and p.name.startswith(_PRE_UPDATE_PREFIX) and p.suffix.lower() == ".zip"),
+        key=lambda p: p.name,
+        reverse=True,
+    )
+
+    deleted = 0
+    for p in backups[keep:]:
+        try:
+            p.unlink()
+            deleted += 1
+        except OSError as exc:
+            logger.warning("Failed to prune backup %s: %s", p.name, exc)
+
+    return deleted
+
+
+def create_pre_update_backup(
+    hermes_home: Optional[Path] = None,
+    keep: int = _PRE_UPDATE_DEFAULT_KEEP,
+) -> Optional[Path]:
+    """Create a full zip backup of HERMES_HOME under ``backups/``.
+
+    Mirrors :func:`run_backup` (same exclusion rules, same SQLite safe-copy)
+    but writes to ``<HERMES_HOME>/backups/pre-update-<timestamp>.zip`` and
+    auto-prunes old pre-update backups.
+
+    Returns the path to the created zip, or ``None`` if no files were
+    found or the backup could not be created.  Never raises — the caller
+    (``hermes update``) should continue even if the backup fails.
+    """
+    hermes_root = hermes_home or get_default_hermes_root()
+    if not hermes_root.is_dir():
+        return None
+
+    backup_dir = _pre_update_backup_dir(hermes_root)
+    try:
+        backup_dir.mkdir(parents=True, exist_ok=True)
+    except OSError as exc:
+        logger.warning("Could not create pre-update backup dir %s: %s", backup_dir, exc)
+        return None
+
+    stamp = datetime.now().strftime("%Y-%m-%d-%H%M%S")
+    out_path = backup_dir / f"{_PRE_UPDATE_PREFIX}{stamp}.zip"
+
+    result = _write_full_zip_backup(out_path, hermes_root)
+    if result is None:
+        return None
+
+    _prune_pre_update_backups(backup_dir, keep=keep)
+    return out_path
+
+
+# ---------------------------------------------------------------------------
+# Pre-migration auto-backup (used by `hermes claw migrate`)
+# ---------------------------------------------------------------------------
+
+_PRE_MIGRATION_PREFIX = "pre-migration-"
+_PRE_MIGRATION_DEFAULT_KEEP = 5
+
+
+def _prune_pre_migration_backups(backup_dir: Path, keep: int) -> int:
+    """Remove oldest pre-migration backups beyond the keep limit.
+
+    Only touches files matching ``pre-migration-*.zip`` so other backups in
+    the same directory are never touched.
+    """
+    if keep < 0:
+        keep = 0
+    if not backup_dir.exists():
+        return 0
+
+    backups = sorted(
+        (p for p in backup_dir.iterdir()
+         if p.is_file() and p.name.startswith(_PRE_MIGRATION_PREFIX) and p.suffix.lower() == ".zip"),
+        key=lambda p: p.name,
+        reverse=True,
+    )
+
+    deleted = 0
+    for p in backups[keep:]:
+        try:
+            p.unlink()
+            deleted += 1
+        except OSError as exc:
+            logger.warning("Failed to prune pre-migration backup %s: %s", p.name, exc)
+
+    return deleted
+
+
+def create_pre_migration_backup(
+    hermes_home: Optional[Path] = None,
+    keep: int = _PRE_MIGRATION_DEFAULT_KEEP,
+) -> Optional[Path]:
+    """Create a full zip backup of HERMES_HOME under ``backups/`` before a
+    ``hermes claw migrate`` apply.
+
+    Shares implementation with :func:`create_pre_update_backup` via
+    ``_write_full_zip_backup`` — same exclusions, same SQLite safe-copy,
+    restorable with ``hermes import <archive>``.  Writes to
+    ``<HERMES_HOME>/backups/pre-migration-<timestamp>.zip`` and auto-prunes
+    old pre-migration backups.
+
+    Returns the path to the created zip, or ``None`` if nothing was found
+    to back up (fresh install) or the write failed.  Never raises — the
+    caller decides whether to abort or proceed.
+    """
+    hermes_root = hermes_home or get_default_hermes_root()
+    if not hermes_root.is_dir():
+        return None
+
+    # Reuses the shared backups/ directory so `hermes import` and the
+    # update-backup listing pick up pre-migration archives too.
+    backup_dir = _pre_update_backup_dir(hermes_root)
+    try:
+        backup_dir.mkdir(parents=True, exist_ok=True)
+    except OSError as exc:
+        logger.warning("Could not create pre-migration backup dir %s: %s", backup_dir, exc)
+        return None
+
+    stamp = datetime.now().strftime("%Y-%m-%d-%H%M%S")
+    out_path = backup_dir / f"{_PRE_MIGRATION_PREFIX}{stamp}.zip"
+
+    result = _write_full_zip_backup(out_path, hermes_root)
+    if result is None:
+        return None
+
+    _prune_pre_migration_backups(backup_dir, keep=keep)
+    return out_path
@@ -562,7 +562,6 @@ def build_welcome_banner(console: Console, model: str, cwd: str,
    right_content = "\n".join(right_lines)
    layout_table.add_row(left_content, right_content)

-    agent_name = _skin_branding("agent_name", "Hermes Agent")
    title_color = _skin_color("banner_title", "#FFD700")
    border_color = _skin_color("banner_border", "#CD7F32")
    version_label = format_banner_version_label()
@@ -0,0 +1,138 @@
+"""Shared helpers for attaching Hermes to a local Chrome CDP port."""
+
+from __future__ import annotations
+
+import os
+import platform
+import shlex
+import shutil
+import subprocess
+
+from hermes_constants import get_hermes_home
+
+
+DEFAULT_BROWSER_CDP_PORT = 9222
+DEFAULT_BROWSER_CDP_URL = f"http://127.0.0.1:{DEFAULT_BROWSER_CDP_PORT}"
+
+_DARWIN_APPS = (
+    "/Applications/Google Chrome.app/Contents/MacOS/Google Chrome",
+    "/Applications/Chromium.app/Contents/MacOS/Chromium",
+    "/Applications/Brave Browser.app/Contents/MacOS/Brave Browser",
+    "/Applications/Microsoft Edge.app/Contents/MacOS/Microsoft Edge",
+)
+
+_WINDOWS_INSTALL_PARTS = (
+    ("Google", "Chrome", "Application", "chrome.exe"),
+    ("Chromium", "Application", "chrome.exe"),
+    ("Chromium", "Application", "chromium.exe"),
+    ("BraveSoftware", "Brave-Browser", "Application", "brave.exe"),
+    ("Microsoft", "Edge", "Application", "msedge.exe"),
+)
+
+_LINUX_BIN_NAMES = (
+    "google-chrome", "google-chrome-stable", "chromium-browser",
+    "chromium", "brave-browser", "microsoft-edge",
+)
+
+_WINDOWS_BIN_NAMES = (
+    "chrome.exe", "msedge.exe", "brave.exe", "chromium.exe",
+    "chrome", "msedge", "brave", "chromium",
+)
+
+
+def get_chrome_debug_candidates(system: str) -> list[str]:
+    candidates: list[str] = []
+    seen: set[str] = set()
+
+    def add(path: str | None) -> None:
+        if not path:
+            return
+        normalized = os.path.normcase(os.path.normpath(path))
+        if normalized in seen or not os.path.isfile(path):
+            return
+        candidates.append(path)
+        seen.add(normalized)
+
+    def add_install_paths(bases: tuple[str | None, ...]) -> None:
+        for base in filter(None, bases):
+            for parts in _WINDOWS_INSTALL_PARTS:
+                add(os.path.join(base, *parts))
+
+    if system == "Darwin":
+        for app in _DARWIN_APPS:
+            add(app)
+        return candidates
+
+    if system == "Windows":
+        for name in _WINDOWS_BIN_NAMES:
+            add(shutil.which(name))
+        add_install_paths((
+            os.environ.get("ProgramFiles"),
+            os.environ.get("ProgramFiles(x86)"),
+            os.environ.get("LOCALAPPDATA"),
+        ))
+        return candidates
+
+    for name in _LINUX_BIN_NAMES:
+        add(shutil.which(name))
+    add_install_paths(("/mnt/c/Program Files", "/mnt/c/Program Files (x86)"))
+    return candidates
+
+
+def chrome_debug_data_dir() -> str:
+    return str(get_hermes_home() / "chrome-debug")
+
+
+def _chrome_debug_args(port: int) -> list[str]:
+    return [
+        f"--remote-debugging-port={port}",
+        f"--user-data-dir={chrome_debug_data_dir()}",
+        "--no-first-run",
+        "--no-default-browser-check",
+    ]
+
+
+def manual_chrome_debug_command(port: int = DEFAULT_BROWSER_CDP_PORT, system: str | None = None) -> str | None:
+    system = system or platform.system()
+    candidates = get_chrome_debug_candidates(system)
+
+    if candidates:
+        argv = [candidates[0], *_chrome_debug_args(port)]
+        return subprocess.list2cmdline(argv) if system == "Windows" else shlex.join(argv)
+
+    if system == "Darwin":
+        data_dir = chrome_debug_data_dir()
+        return (
+            f'open -a "Google Chrome" --args --remote-debugging-port={port} '
+            f'--user-data-dir="{data_dir}" --no-first-run --no-default-browser-check'
+        )
+
+    return None
+
+
+def _detach_kwargs(system: str) -> dict:
+    if system != "Windows":
+        return {"start_new_session": True}
+    flags = getattr(subprocess, "DETACHED_PROCESS", 0) | getattr(
+        subprocess, "CREATE_NEW_PROCESS_GROUP", 0
+    )
+    return {"creationflags": flags} if flags else {}
+
+
+def try_launch_chrome_debug(port: int = DEFAULT_BROWSER_CDP_PORT, system: str | None = None) -> bool:
+    system = system or platform.system()
+    candidates = get_chrome_debug_candidates(system)
+    if not candidates:
+        return False
+
+    os.makedirs(chrome_debug_data_dir(), exist_ok=True)
+    try:
+        subprocess.Popen(
+            [candidates[0], *_chrome_debug_args(port)],
+            stdout=subprocess.DEVNULL,
+            stderr=subprocess.DEVNULL,
+            **_detach_kwargs(system),
+        )
+        return True
+    except Exception:
+        return False
@@ -4,7 +4,8 @@ Usage:
    hermes claw migrate              # Preview then migrate (always shows preview first)
    hermes claw migrate --dry-run    # Preview only, no changes
    hermes claw migrate --yes        # Skip confirmation prompt
-    hermes claw migrate --preset full --overwrite  # Full migration, overwrite conflicts
+    hermes claw migrate --preset full --overwrite --migrate-secrets  # Full run w/ secrets
+    hermes claw migrate --no-backup  # Skip pre-migration snapshot
    hermes claw cleanup              # Archive leftover OpenClaw directories
    hermes claw cleanup --dry-run    # Preview what would be archived
 """
@@ -15,6 +16,7 @@ import subprocess
 import sys
 from datetime import datetime
 from pathlib import Path
+from typing import Optional

 from hermes_cli.config import get_hermes_home, get_config_path, load_config, save_config
 from hermes_constants import get_optional_skills_dir
@@ -321,10 +323,13 @@ def _cmd_migrate(args):
    migrate_secrets = getattr(args, "migrate_secrets", False)
    workspace_target = getattr(args, "workspace_target", None)
    skill_conflict = getattr(args, "skill_conflict", "skip")
+    no_backup = getattr(args, "no_backup", False)

-    # If using the "full" preset, secrets are included by default
-    if preset == "full":
-        migrate_secrets = True
+    # Secrets are never included implicitly — they must be explicitly requested
+    # via --migrate-secrets, even under --preset full.  This mirrors OpenClaw's
+    # migrate-hermes posture (two-phase: run once without secrets, rerun with
+    # --include-secrets) and prevents a --preset full invocation from silently
+    # importing API keys that the user may not have intended to copy.

    print()
    print(
@@ -431,15 +436,24 @@ def _cmd_migrate(args):

    preview_summary = preview_report.get("summary", {})
    preview_count = preview_summary.get("migrated", 0)
+    preview_conflicts = preview_summary.get("conflict", 0)

-    if preview_count == 0:
+    # "Nothing to migrate" means nothing migrated AND nothing blocked by
+    # conflicts.  If there are conflicts, we still want to show the plan and
+    # surface the refusal/--overwrite guidance instead of silently bailing.
+    if preview_count == 0 and preview_conflicts == 0:
        print()
        print_info("Nothing to migrate from OpenClaw.")
        _print_migration_report(preview_report, dry_run=True)
        return

    print()
-    print_header(f"Migration Preview — {preview_count} item(s) would be imported")
+    if preview_count > 0:
+        print_header(f"Migration Preview — {preview_count} item(s) would be imported")
+    else:
+        print_header(
+            f"Migration Preview — {preview_conflicts} conflict(s), nothing would be imported"
+        )
    print_info("No changes have been made yet. Review the list below:")
    _print_migration_report(preview_report, dry_run=True)

@@ -447,6 +461,24 @@ def _cmd_migrate(args):
    if dry_run:
        return

+    # ── Phase 1b: Refuse if the plan has conflicts and --overwrite is not set ─
+    # Modelled on OpenClaw's assertConflictFreePlan() — apply is a safe no-op
+    # on conflicts unless the user explicitly opts in to overwriting.  Without
+    # this guard, the user would answer "yes, proceed" and silently end up
+    # with a migration that skipped every conflicting item.
+    if preview_conflicts > 0 and not overwrite:
+        print()
+        print_error(
+            f"Plan has {preview_conflicts} conflict(s). Refusing to apply."
+        )
+        print_info(
+            "Each conflict is an item whose target already exists in ~/.hermes/. "
+            "Re-run with --overwrite to replace conflicting targets (item-level "
+            "backups are written to the migration report directory)."
+        )
+        print_info("Or re-run with --dry-run to review the full plan.")
+        return
+
    # ── Phase 2: Confirm and execute ───────────────────────────
    print()
    if not auto_yes:
@@ -458,6 +490,32 @@ def _cmd_migrate(args):
            print_info("Migration cancelled.")
            return

+    # ── Phase 2b: Pre-apply backup of the Hermes home ─────────
+    # Delegates to hermes_cli.backup.create_pre_migration_backup(), which
+    # shares implementation with the pre-update backup (same exclusion
+    # rules, same SQLite safe-copy, zip format) so the archive is
+    # restorable with `hermes import`.  Mirrors OpenClaw's
+    # createPreMigrationBackup posture — one atomic restore point before
+    # any mutation, auto-pruned to the last 5 pre-migration zips.
+    backup_archive: Optional[Path] = None
+    if not no_backup:
+        try:
+            from hermes_cli.backup import create_pre_migration_backup, _format_size
+            backup_archive = create_pre_migration_backup(hermes_home=hermes_home)
+            if backup_archive:
+                size_str = _format_size(backup_archive.stat().st_size)
+                print()
+                print_success(f"Pre-migration backup: {backup_archive} ({size_str})")
+                print_info(f"Restore with: hermes import {backup_archive.name}")
+        except Exception as e:
+            print()
+            print_error(f"Could not create pre-migration backup: {e}")
+            print_info(
+                "Re-run with --no-backup to skip, or free up disk space under the Hermes home."
+            )
+            logger.debug("Pre-migration backup error", exc_info=True)
+            return
+
    try:
        migrator = mod.Migrator(
            source_root=source_dir.resolve(),
@@ -476,6 +534,9 @@ def _cmd_migrate(args):
        print()
        print_error(f"Migration failed: {e}")
        logger.debug("OpenClaw migration error", exc_info=True)
+        if backup_archive:
+            print_info(f"A pre-migration backup is available at: {backup_archive}")
+            print_info(f"Restore with: hermes import {backup_archive.name}")
        return

    # Print results
@@ -62,6 +62,8 @@ COMMAND_REGISTRY: list[CommandDef] = [
               aliases=("reset",)),
    CommandDef("clear", "Clear screen and start a new session", "Session",
               cli_only=True),
+    CommandDef("redraw", "Force a full UI repaint (recovers from terminal drift)", "Session",
+               cli_only=True),
    CommandDef("history", "Show conversation history", "Session",
               cli_only=True),
    CommandDef("save", "Save the current conversation", "Session",
@@ -84,9 +86,7 @@ COMMAND_REGISTRY: list[CommandDef] = [
    CommandDef("deny", "Deny a pending dangerous command", "Session",
               gateway_only=True),
    CommandDef("background", "Run a prompt in the background", "Session",
-               aliases=("bg",), args_hint="<prompt>"),
-    CommandDef("btw", "Ephemeral side question using session context (no tools, not persisted)", "Session",
-               args_hint="<question>"),
+               aliases=("bg", "btw"), args_hint="<prompt>"),
    CommandDef("agents", "Show active agents and running tasks", "Session",
               aliases=("tasks",)),
    CommandDef("queue", "Queue a prompt for the next turn (doesn't interrupt)", "Session",
@@ -115,6 +115,9 @@ COMMAND_REGISTRY: list[CommandDef] = [
    CommandDef("verbose", "Cycle tool progress display: off -> new -> all -> verbose",
               "Configuration", cli_only=True,
               gateway_config_gate="display.tool_progress_command"),
+    CommandDef("footer", "Toggle gateway runtime-metadata footer on final replies",
+               "Configuration", args_hint="[on|off|status]",
+               subcommands=("on", "off", "status")),
    CommandDef("yolo", "Toggle YOLO mode (skip all dangerous command approvals)",
               "Configuration"),
    CommandDef("reasoning", "Manage reasoning effort and display", "Configuration",
@@ -125,11 +128,14 @@ COMMAND_REGISTRY: list[CommandDef] = [
               subcommands=("normal", "fast", "status", "on", "off")),
    CommandDef("skin", "Show or change the display skin/theme", "Configuration",
               cli_only=True, args_hint="[name]"),
+    CommandDef("indicator", "Pick the TUI busy-indicator style", "Configuration",
+               cli_only=True, args_hint="[kaomoji|emoji|unicode|ascii]",
+               subcommands=("kaomoji", "emoji", "unicode", "ascii")),
    CommandDef("voice", "Toggle voice mode", "Configuration",
               args_hint="[on|off|tts|status]", subcommands=("on", "off", "tts", "status")),
    CommandDef("busy", "Control what Enter does while Hermes is working", "Configuration",
-               cli_only=True, args_hint="[queue|interrupt|status]",
-               subcommands=("queue", "interrupt", "status")),
+               cli_only=True, args_hint="[queue|steer|interrupt|status]",
+               subcommands=("queue", "steer", "interrupt", "status")),

    # Tools & Skills
    CommandDef("tools", "Manage tools: /tools [list|disable|enable] [name...]", "Tools & Skills",
@@ -142,6 +148,9 @@ COMMAND_REGISTRY: list[CommandDef] = [
    CommandDef("cron", "Manage scheduled tasks", "Tools & Skills",
               cli_only=True, args_hint="[subcommand]",
               subcommands=("list", "add", "create", "edit", "pause", "resume", "run", "remove")),
+    CommandDef("curator", "Background skill maintenance (status, run, pin, archive)",
+               "Tools & Skills", args_hint="[subcommand]",
+               subcommands=("status", "run", "pause", "resume", "pin", "unpin", "restore")),
    CommandDef("reload", "Reload .env variables into the running session", "Tools & Skills",
               cli_only=True),
    CommandDef("reload-mcp", "Reload MCP servers from config", "Tools & Skills",
@@ -808,6 +817,114 @@ def discord_skill_commands_by_category(
    return trimmed_categories, uncategorized, hidden


+# ---------------------------------------------------------------------------
+# Slack native slash commands
+# ---------------------------------------------------------------------------
+
+# Slack slash command name constraints: lowercase a-z, 0-9, hyphens,
+# underscores. Max 32 chars. Slack app manifest accepts up to 50 slash
+# commands per app.
+_SLACK_MAX_SLASH_COMMANDS = 50
+_SLACK_NAME_LIMIT = 32
+_SLACK_INVALID_CHARS = re.compile(r"[^a-z0-9_\-]")
+
+
+def _sanitize_slack_name(raw: str) -> str:
+    """Convert a command name to a valid Slack slash command name.
+
+    Slack allows lowercase a-z, digits, hyphens, and underscores. Max 32
+    chars. Uppercase is lowercased; invalid chars are stripped.
+    """
+    name = raw.lower()
+    name = _SLACK_INVALID_CHARS.sub("", name)
+    name = name.strip("-_")
+    return name[:_SLACK_NAME_LIMIT]
+
+
+def slack_native_slashes() -> list[tuple[str, str, str]]:
+    """Return (slash_name, description, usage_hint) triples for Slack.
+
+    Every gateway-available command in ``COMMAND_REGISTRY`` is surfaced as
+    a standalone Slack slash command (e.g. ``/btw``, ``/stop``, ``/model``),
+    matching Discord's and Telegram's model where every command is a
+    first-class slash and not a ``/hermes <verb>`` subcommand.
+
+    Both canonical names and aliases are included so users can type any
+    documented form (e.g. ``/background``, ``/bg``, and ``/btw`` all work).
+    Plugin-registered slash commands are included too.
+
+    Results are clamped to Slack's 50-command limit with duplicate-name
+    avoidance. ``/hermes`` is always reserved as the first entry so the
+    legacy ``/hermes <subcommand>`` form keeps working for anything that
+    gets dropped by the clamp or for free-form questions.
+    """
+    overrides = _resolve_config_gates()
+    entries: list[tuple[str, str, str]] = []
+    seen: set[str] = set()
+
+    # Reserve /hermes as the catch-all top-level command.
+    entries.append(("hermes", "Talk to Hermes or run a subcommand", "[subcommand] [args]"))
+    seen.add("hermes")
+
+    def _add(name: str, desc: str, hint: str) -> None:
+        slack_name = _sanitize_slack_name(name)
+        if not slack_name or slack_name in seen:
+            return
+        if len(entries) >= _SLACK_MAX_SLASH_COMMANDS:
+            return
+        # Slack description cap is 2000 chars; keep it short.
+        entries.append((slack_name, desc[:140], hint[:100]))
+        seen.add(slack_name)
+
+    # First pass: canonical names (so they win slots if we hit the cap).
+    for cmd in COMMAND_REGISTRY:
+        if not _is_gateway_available(cmd, overrides):
+            continue
+        _add(cmd.name, cmd.description, cmd.args_hint or "")
+
+    # Second pass: aliases.
+    for cmd in COMMAND_REGISTRY:
+        if not _is_gateway_available(cmd, overrides):
+            continue
+        for alias in cmd.aliases:
+            # Skip aliases that only differ from canonical by case/punctuation
+            # normalization (already covered by _add dedup).
+            _add(alias, f"Alias for /{cmd.name} — {cmd.description}", cmd.args_hint or "")
+
+    # Third pass: plugin commands.
+    for name, description, args_hint in _iter_plugin_command_entries():
+        _add(name, description, args_hint or "")
+
+    return entries
+
+
+def slack_app_manifest(request_url: str = "https://hermes-agent.local/slack/commands") -> dict[str, Any]:
+    """Generate a Slack app manifest with all gateway commands as slashes.
+
+    ``request_url`` is required by Slack's manifest schema for every slash
+    command, but in Socket Mode (which we use) Slack ignores it and routes
+    the command event through the WebSocket. A placeholder URL is fine.
+
+    The returned dict is the ``features.slash_commands`` portion only —
+    callers compose it into a full manifest (or merge into an existing
+    one). Keeping it narrow avoids coupling us to the rest of the manifest
+    schema (display_information, oauth_config, settings, etc.) which users
+    set up once in the Slack UI and rarely change.
+    """
+    slashes = []
+    for name, desc, usage in slack_native_slashes():
+        entry = {
+            "command": f"/{name}",
+            "description": desc or f"Run /{name}",
+            "should_escape": False,
+            "url": request_url,
+        }
+        if usage:
+            entry["usage_hint"] = usage
+        slashes.append(entry)
+    return {"features": {"slash_commands": slashes}}
+
+
 def slack_subcommand_map() -> dict[str, str]:
    """Return subcommand -> /command mapping for Slack /hermes handler.

@@ -835,6 +952,42 @@ def slack_subcommand_map() -> dict[str, str]:
 # Autocomplete
 # ---------------------------------------------------------------------------

+
+# Per-process cache for /model<space> LM Studio autocomplete. Probing on
+# every keystroke would block the UI; a short TTL keeps it live without
+# hammering the server.
+_LMSTUDIO_COMPLETION_CACHE: tuple[float, list[str]] | None = None
+
+
+def _lmstudio_completion_models() -> list[str]:
+    """Locally-loaded LM Studio models for /model autocomplete (cached, gated)."""
+    global _LMSTUDIO_COMPLETION_CACHE
+    # Gate: don't probe 127.0.0.1 on every keystroke for users who don't use LM Studio.
+    if not (os.environ.get("LM_API_KEY") or os.environ.get("LM_BASE_URL")):
+        try:
+            from hermes_cli.auth import _load_auth_store
+            store = _load_auth_store() or {}
+            if "lmstudio" not in (store.get("providers") or {}) \
+               and "lmstudio" not in (store.get("credential_pool") or {}):
+                return []
+        except Exception:
+            return []
+    now = time.time()
+    if _LMSTUDIO_COMPLETION_CACHE and (now - _LMSTUDIO_COMPLETION_CACHE[0]) < 30.0:
+        return _LMSTUDIO_COMPLETION_CACHE[1]
+    try:
+        from hermes_cli.models import fetch_lmstudio_models
+        models = fetch_lmstudio_models(
+            api_key=os.environ.get("LM_API_KEY", ""),
+            base_url=os.environ.get("LM_BASE_URL") or "http://127.0.0.1:1234/v1",
+            timeout=0.8,
+        )
+    except Exception:
+        models = []
+    _LMSTUDIO_COMPLETION_CACHE = (now, models)
+    return models
+
+
 class SlashCommandCompleter(Completer):
    """Autocomplete for built-in slash commands, subcommands, and skill commands."""

@@ -1258,6 +1411,19 @@ class SlashCommandCompleter(Completer):
                    )
        except Exception:
            pass
+        # LM Studio: surface locally-loaded models. Gated on the user actually
+        # having LM Studio configured (env var or auth-store entry) so we
+        # don't probe 127.0.0.1 on every keystroke for users who don't use it.
+        for name in _lmstudio_completion_models():
+            if name in seen:
+                continue
+            if name.startswith(sub_lower) and name != sub_lower:
+                yield Completion(
+                    name,
+                    start_position=-len(sub_text),
+                    display=name,
+                    display_meta="LM Studio",
+                )

    def get_completions(self, document, complete_event):
        text = document.text_before_cursor
@@ -30,34 +30,67 @@ logger = logging.getLogger(__name__)
 _IS_WINDOWS = platform.system() == "Windows"
 _ENV_VAR_NAME_RE = re.compile(r"^[A-Za-z_][A-Za-z0-9_]*$")
 _LAST_EXPANDED_CONFIG_BY_PATH: Dict[str, Any] = {}
+# (path, mtime_ns, size) -> cached expanded config dict.
+# load_config() returns a deepcopy of the cached value when the file
+# hasn't changed since the last load, skipping yaml.safe_load +
+# _deep_merge + _normalize_* + _expand_env_vars (~13 ms/call).
+# save_config() + migrate_config() write via atomic_yaml_write which
+# produces a fresh inode, so stat() sees a new mtime_ns and the next
+# load repopulates automatically — no explicit invalidation hook.
+_LOAD_CONFIG_CACHE: Dict[str, Tuple[int, int, Dict[str, Any]]] = {}
+# (path, mtime_ns, size) -> cached raw yaml dict. Same pattern as
+# _LOAD_CONFIG_CACHE but for read_raw_config() — used when callers want
+# the user's on-disk values without defaults merged in.
+_RAW_CONFIG_CACHE: Dict[str, Tuple[int, int, Dict[str, Any]]] = {}
 # Env var names written to .env that aren't in OPTIONAL_ENV_VARS
 # (managed by setup/provider flows directly).
 _EXTRA_ENV_KEYS = frozenset({
    "OPENAI_API_KEY", "OPENAI_BASE_URL",
    "ANTHROPIC_API_KEY", "ANTHROPIC_TOKEN",
-    "DISCORD_HOME_CHANNEL", "TELEGRAM_HOME_CHANNEL",
+    "DISCORD_HOME_CHANNEL", "DISCORD_HOME_CHANNEL_NAME",
+    "TELEGRAM_HOME_CHANNEL", "TELEGRAM_HOME_CHANNEL_NAME",
+    "SLACK_HOME_CHANNEL", "SLACK_HOME_CHANNEL_NAME",
    "SIGNAL_ACCOUNT", "SIGNAL_HTTP_URL",
    "SIGNAL_ALLOWED_USERS", "SIGNAL_GROUP_ALLOWED_USERS",
+    "SIGNAL_HOME_CHANNEL", "SIGNAL_HOME_CHANNEL_NAME",
+    "SMS_HOME_CHANNEL", "SMS_HOME_CHANNEL_NAME",
    "DINGTALK_CLIENT_ID", "DINGTALK_CLIENT_SECRET",
+    "DINGTALK_HOME_CHANNEL", "DINGTALK_HOME_CHANNEL_NAME",
    "FEISHU_APP_ID", "FEISHU_APP_SECRET", "FEISHU_ENCRYPT_KEY", "FEISHU_VERIFICATION_TOKEN",
+    "FEISHU_HOME_CHANNEL", "FEISHU_HOME_CHANNEL_NAME",
+    "YUANBAO_HOME_CHANNEL", "YUANBAO_HOME_CHANNEL_NAME",
    "WECOM_BOT_ID", "WECOM_SECRET",
    "WECOM_CALLBACK_CORP_ID", "WECOM_CALLBACK_CORP_SECRET", "WECOM_CALLBACK_AGENT_ID",
    "WECOM_CALLBACK_TOKEN", "WECOM_CALLBACK_ENCODING_AES_KEY",
    "WECOM_CALLBACK_HOST", "WECOM_CALLBACK_PORT",
+    "WECOM_HOME_CHANNEL", "WECOM_HOME_CHANNEL_NAME",
    "WEIXIN_ACCOUNT_ID", "WEIXIN_TOKEN", "WEIXIN_BASE_URL", "WEIXIN_CDN_BASE_URL",
    "WEIXIN_HOME_CHANNEL", "WEIXIN_HOME_CHANNEL_NAME", "WEIXIN_DM_POLICY", "WEIXIN_GROUP_POLICY",
    "WEIXIN_ALLOWED_USERS", "WEIXIN_GROUP_ALLOWED_USERS", "WEIXIN_ALLOW_ALL_USERS",
    "BLUEBUBBLES_SERVER_URL", "BLUEBUBBLES_PASSWORD",
+    "BLUEBUBBLES_HOME_CHANNEL", "BLUEBUBBLES_HOME_CHANNEL_NAME",
    "QQ_APP_ID", "QQ_CLIENT_SECRET", "QQBOT_HOME_CHANNEL", "QQBOT_HOME_CHANNEL_NAME",
    "QQ_HOME_CHANNEL", "QQ_HOME_CHANNEL_NAME",  # legacy aliases (pre-rename, still read for back-compat)
    "QQ_ALLOWED_USERS", "QQ_GROUP_ALLOWED_USERS", "QQ_ALLOW_ALL_USERS", "QQ_MARKDOWN_SUPPORT",
    "QQ_STT_API_KEY", "QQ_STT_BASE_URL", "QQ_STT_MODEL",
    "TERMINAL_ENV", "TERMINAL_SSH_KEY", "TERMINAL_SSH_PORT",
    "WHATSAPP_MODE", "WHATSAPP_ENABLED",
-    "MATTERMOST_HOME_CHANNEL", "MATTERMOST_REPLY_MODE",
+    "MATTERMOST_HOME_CHANNEL", "MATTERMOST_HOME_CHANNEL_NAME", "MATTERMOST_REPLY_MODE",
    "MATRIX_PASSWORD", "MATRIX_ENCRYPTION", "MATRIX_DEVICE_ID", "MATRIX_HOME_ROOM",
-    "MATRIX_REQUIRE_MENTION", "MATRIX_FREE_RESPONSE_ROOMS", "MATRIX_AUTO_THREAD",
+    "MATRIX_REQUIRE_MENTION", "MATRIX_FREE_RESPONSE_ROOMS", "MATRIX_AUTO_THREAD", "MATRIX_DM_AUTO_THREAD",
    "MATRIX_RECOVERY_KEY",
+    # Langfuse observability plugin — optional tuning keys + standard SDK vars.
+    # Activation is via plugins.enabled (opt-in through `hermes plugins enable
+    # observability/langfuse` or `hermes tools → Langfuse`); credentials gate
+    # the plugin at runtime.
+    "HERMES_LANGFUSE_ENV",
+    "HERMES_LANGFUSE_RELEASE",
+    "HERMES_LANGFUSE_SAMPLE_RATE",
+    "HERMES_LANGFUSE_MAX_CHARS",
+    "HERMES_LANGFUSE_DEBUG",
+    "LANGFUSE_PUBLIC_KEY",
+    "LANGFUSE_SECRET_KEY",
+    "LANGFUSE_BASE_URL",
 })
 import yaml

@@ -206,6 +239,7 @@ def get_container_exec_info() -> Optional[dict]:

 # Re-export from hermes_constants — canonical definition lives there.
 from hermes_constants import get_hermes_home  # noqa: F811,E402
+from utils import atomic_replace

 def get_config_path() -> Path:
    """Get the main config file path."""
@@ -389,6 +423,34 @@ DEFAULT_CONFIG = {
        # (60+ tool iterations with tiny output) before users assume the
        # bot is dead and /restart.
        "gateway_notify_interval": 180,
+        # Freshness window for the gateway auto-continue note (seconds).
+        # After a gateway crash/restart/SIGTERM mid-run, the next user
+        # message gets a "[System note: your previous turn was
+        # interrupted — process the unfinished tool result(s) first]"
+        # prepended so the model picks up where it left off.  That's the
+        # right behaviour while the interruption is fresh, but stale
+        # markers (transcript last touched hours or days ago) can revive
+        # an unrelated old task when the user's next message starts new
+        # work.  This window is the max age of the last persisted
+        # transcript row for which we still inject the continue note.
+        # Default 3600s comfortably covers a long turn (gateway_timeout
+        # default is 1800s) plus runtime slack.  Set to 0 to disable the
+        # gate and restore pre-fix behaviour (always inject).
+        "gateway_auto_continue_freshness": 3600,
+        # How user-attached images are presented to the main model on each turn.
+        #   "auto"   — attach natively when the active model reports
+        #              supports_vision=True AND the user hasn't explicitly
+        #              configured auxiliary.vision.provider.  Otherwise fall
+        #              back to text (vision_analyze pre-analysis).
+        #   "native" — always attach natively; non-vision models will either
+        #              error at the provider or get a last-chance text fallback
+        #              (see run_agent._prepare_messages_for_api).
+        #   "text"   — always pre-analyze with vision_analyze and prepend the
+        #              description as text; the main model never sees pixels.
+        # Affects gateway platforms, the TUI, and CLI /attach.  vision_analyze
+        # remains available as a tool regardless of this setting — the routing
+        # only controls how inbound user images are presented.
+        "image_input_mode": "auto",
    },
    
    "terminal": {
@@ -437,7 +499,8 @@ DEFAULT_CONFIG = {
        "singularity_image": "docker://nikolaik/python-nodejs:python3.11-nodejs20",
        "modal_image": "nikolaik/python-nodejs:python3.11-nodejs20",
        "daytona_image": "nikolaik/python-nodejs:python3.11-nodejs20",
-        # Container resource limits (docker, singularity, modal, daytona — ignored for local/ssh)
+        "vercel_runtime": "node24",
+        # Container resource limits (docker, singularity, modal, daytona, vercel_sandbox — ignored for local/ssh)
        "container_cpu": 1,
        "container_memory": 5120,       # MB (default 5GB)
        "container_disk": 51200,        # MB (default 50GB)
@@ -453,6 +516,16 @@ DEFAULT_CONFIG = {
        # Explicit opt-in: mount the host cwd into /workspace for Docker sessions.
        # Default off because passing host directories into a sandbox weakens isolation.
        "docker_mount_cwd_to_workspace": False,
+        # Explicit opt-in: run the Docker container as the host user's uid:gid
+        # (via `--user`).  When enabled, files written into bind-mounted dirs
+        # (docker_volumes, the persistent workspace, or the auto-mounted cwd)
+        # are owned by your host user instead of root, which avoids needing
+        # `sudo chown` after container runs. Default off to preserve behavior
+        # for images whose entrypoints expect to start as root (e.g. the
+        # bundled Hermes image, which drops to the `hermes` user via gosu).
+        # When on, SETUID/SETGID caps are omitted from the container since
+        # no privilege drop is needed.
+        "docker_run_as_host_user": False,
        # Persistent shell — keep a long-lived bash shell across execute() calls
        # so cwd/env vars/shell variables survive between commands.
        # Enabled by default for non-local backends (SSH); local is always opt-in
@@ -465,6 +538,7 @@ DEFAULT_CONFIG = {
        "command_timeout": 30,  # Timeout for browser commands in seconds (screenshot, navigate, etc.)
        "record_sessions": False,  # Auto-record browser sessions as WebM videos
        "allow_private_urls": False,  # Allow navigating to private/internal IPs (localhost, 192.168.x.x, etc.)
+        "auto_local_for_private_urls": True,  # When a cloud provider is set, auto-spawn local Chromium for LAN/localhost URLs instead of sending them to the cloud
        "cdp_url": "",  # Optional persistent CDP endpoint for attaching to an existing Chromium/Chrome
        # CDP supervisor — dialog + frame detection via a persistent WebSocket.
        # Active only when a CDP-capable backend is attached (Browserbase or
@@ -486,6 +560,19 @@ DEFAULT_CONFIG = {
    "checkpoints": {
        "enabled": True,
        "max_snapshots": 50,  # Max checkpoints to keep per directory
+        # Auto-maintenance: shadow repos accumulate forever under
+        # ~/.hermes/checkpoints/ (one per cd'd working directory). Field
+        # reports put the typical offender at 1000+ repos / ~12 GB. When
+        # auto_prune is on, hermes sweeps at startup (at most once per
+        # min_interval_hours) and deletes:
+        #   * orphan repos: HERMES_WORKDIR no longer exists on disk
+        #   * stale repos:  newest mtime older than retention_days
+        # Opt-in so users who rely on /rollback against long-ago sessions
+        # never lose data silently.
+        "auto_prune": False,
+        "retention_days": 7,
+        "delete_orphans": True,
+        "min_interval_hours": 24,
    },

    # Maximum characters returned by a single read_file call.  Reads that
@@ -518,7 +605,7 @@ DEFAULT_CONFIG = {
        "threshold": 0.50,            # compress when context usage exceeds this ratio
        "target_ratio": 0.20,         # fraction of threshold to preserve as recent tail
        "protect_last_n": 20,         # minimum recent messages to keep uncompressed
-
+        "hygiene_hard_message_limit": 400,  # gateway session-hygiene force-compress threshold by message count
    },

    # Anthropic prompt caching (Claude via OpenRouter or native Anthropic API).
@@ -626,7 +713,12 @@ DEFAULT_CONFIG = {
        "compact": False,
        "personality": "kawaii",
        "resume_display": "full",
-        "busy_input_mode": "interrupt",
+        "busy_input_mode": "interrupt",  # interrupt | queue | steer
+        # When true, `hermes --tui` auto-resumes the most recent human-
+        # facing session on launch instead of forging a fresh one.
+        # Mirrors `hermes -c` muscle memory.  Default off so existing
+        # users aren't surprised.  HERMES_TUI_RESUME=<id> always wins.
+        "tui_auto_resume_recent": False,
        "bell_on_complete": False,
        "show_reasoning": False,
        "streaming": False,
@@ -634,6 +726,9 @@ DEFAULT_CONFIG = {
        "inline_diffs": True,     # Show inline diff previews for write actions (write_file, patch, skill_manage)
        "show_cost": False,       # Show $ cost in the status bar (off by default)
        "skin": "default",
+        # TUI busy indicator style: kaomoji (default), emoji, unicode (braille
+        # spinner), or ascii.  Live-swappable via `/indicator <style>`.
+        "tui_status_indicator": "kaomoji",
        "user_message_preview": {  # CLI: how many submitted user-message lines to echo back in scrollback
            "first_lines": 2,
            "last_lines": 2,
@@ -643,6 +738,14 @@ DEFAULT_CONFIG = {
        "tool_progress_overrides": {},  # DEPRECATED — use display.platforms instead
        "tool_preview_length": 0,  # Max chars for tool call previews (0 = no limit, show full paths/commands)
        "platforms": {},  # Per-platform display overrides: {"telegram": {"tool_progress": "all"}, "slack": {"tool_progress": "off"}}
+        # Gateway runtime-metadata footer appended to the FINAL message of a turn
+        # (disabled by default to keep replies minimal). When enabled, renders
+        # e.g. `model · 68% · ~/projects/hermes`. Per-platform overrides go under
+        # display.platforms.<platform>.runtime_footer.
+        "runtime_footer": {
+            "enabled": False,
+            "fields": ["model", "context_pct", "cwd"],  # Order shown; drop any to hide
+        },
    },

    # Web dashboard settings
@@ -823,6 +926,35 @@ DEFAULT_CONFIG = {
        "guard_agent_created": False,
    },

+    # Curator — background skill maintenance.
+    #
+    # Periodically reviews AGENT-CREATED skills (never bundled or
+    # hub-installed) and keeps the collection tidy: marks long-unused skills
+    # as stale, archives genuinely obsolete ones (archive only, never
+    # deletes), and spawns a forked aux-model agent to consolidate overlaps
+    # and patch drift. Runs inactivity-triggered from session start — no
+    # cron daemon.
+    #
+    # See `hermes curator status` for the last run summary.
+    "curator": {
+        "enabled": True,
+        # How long to wait between curator runs (hours).  Default: 7 days.
+        "interval_hours": 24 * 7,
+        # Only run when the agent has been idle at least this long (hours).
+        "min_idle_hours": 2,
+        # Mark a skill as "stale" after this many days without use.
+        "stale_after_days": 30,
+        # Archive a skill (move to skills/.archive/) after this many days
+        # without use. Archived skills are recoverable — no auto-deletion.
+        "archive_after_days": 90,
+        # Optional per-task override for the curator's aux model. Leave null
+        # to use Hermes' main auxiliary client resolution.
+        "auxiliary": {
+            "provider": None,
+            "model": None,
+        },
+    },
+
    # Honcho AI-native memory -- reads ~/.honcho/config.json as single source of truth.
    # This section is only needed for hermes-specific overrides; everything else
    # (apiKey, workspace, peerName, sessions, enabled) comes from the global config.
@@ -860,6 +992,7 @@ DEFAULT_CONFIG = {

    # Telegram platform settings (gateway mode)
    "telegram": {
+        "reactions": False,            # Add 👀/✅/❌ reactions to messages during processing
        "channel_prompts": {},         # Per-chat/topic ephemeral system prompts (topics inherit from parent group)
    },

@@ -914,7 +1047,7 @@ DEFAULT_CONFIG = {
    # Pre-exec security scanning via tirith
    "security": {
        "allow_private_urls": False,  # Allow requests to private/internal IPs (for OpenWrt, proxies, VPNs)
-        "redact_secrets": True,
+        "redact_secrets": False,
        "tirith_enabled": True,
        "tirith_path": "tirith",
        "tirith_timeout": 5,
@@ -959,6 +1092,27 @@ DEFAULT_CONFIG = {
        "backup_count": 3,     # Number of rotated backup files to keep
    },

+    # Remotely-hosted model catalog manifest.  When enabled, the CLI fetches
+    # curated model lists for OpenRouter and Nous Portal from this URL,
+    # falling back to the in-repo snapshot on network failure.  Lets us
+    # update model picker lists without shipping a hermes-agent release.
+    # The default URL is served by the docs site GitHub Pages deploy.
+    "model_catalog": {
+        "enabled": True,
+        "url": "https://hermes-agent.nousresearch.com/docs/api/model-catalog.json",
+        # Disk cache TTL in hours.  Beyond this, the CLI refetches on the
+        # next /model or `hermes model` invocation; network failures
+        # silently fall back to the stale cache.
+        "ttl_hours": 24,
+        # Optional per-provider override URLs for third parties that want
+        # to self-host their own curation list using the same schema.
+        # Example:
+        #   providers:
+        #     openrouter:
+        #       url: https://example.com/my-curation.json
+        "providers": {},
+    },
+
    # Network settings — workarounds for connectivity issues.
    "network": {
        # Force IPv4 connections.  On servers with broken or unreachable IPv6,
@@ -995,6 +1149,27 @@ DEFAULT_CONFIG = {
        "min_interval_hours": 24,
    },

+    # Contextual first-touch onboarding hints (see agent/onboarding.py).
+    # Each hint is shown once per install and then latched here so it
+    # never fires again.  Users can wipe the section to re-see all hints.
+    "onboarding": {
+        "seen": {},
+    },
+
+    # ``hermes update`` behaviour.
+    "updates": {
+        # Run a full ``hermes backup``-style zip of HERMES_HOME before every
+        # ``hermes update``.  Backups land in ``<HERMES_HOME>/backups/`` and
+        # can be restored with ``hermes import <path>``.  Off by default —
+        # on large HERMES_HOME directories the zip can add minutes to every
+        # update.  Set to true to re-enable, or pass ``--backup`` to opt in
+        # for a single update run.
+        "pre_update_backup": False,
+        # How many pre-update backup zips to retain.  Older ones are pruned
+        # automatically after each successful backup.
+        "backup_keep": 5,
+    },
+
    # Config schema version - bump this when adding new required fields
    "_config_version": 22,
 }
@@ -1096,6 +1271,22 @@ OPTIONAL_ENV_VARS = {
        "category": "provider",
        "advanced": True,
    },
+    "LM_API_KEY": {
+        "description": "LM Studio bearer token for auth-enabled local servers",
+        "prompt": "LM Studio API key / bearer token",
+        "url": None,
+        "password": True,
+        "category": "provider",
+        "advanced": True,
+    },
+    "LM_BASE_URL": {
+        "description": "LM Studio base URL override",
+        "prompt": "LM Studio base URL (leave empty for default)",
+        "url": None,
+        "password": False,
+        "category": "provider",
+        "advanced": True,
+    },
    "GLM_API_KEY": {
        "description": "Z.AI / GLM API key (also recognized as ZAI_API_KEY / Z_AI_API_KEY)",
        "prompt": "Z.AI / GLM API key",
@@ -1184,6 +1375,22 @@ OPTIONAL_ENV_VARS = {
        "category": "provider",
        "advanced": True,
    },
+    "GMI_API_KEY": {
+        "description": "GMI Cloud API key",
+        "prompt": "GMI Cloud API key",
+        "url": "https://www.gmicloud.ai/",
+        "password": True,
+        "category": "provider",
+        "advanced": True,
+    },
+    "GMI_BASE_URL": {
+        "description": "GMI Cloud base URL override",
+        "prompt": "GMI Cloud base URL (leave empty for default)",
+        "url": None,
+        "password": False,
+        "category": "provider",
+        "advanced": True,
+    },
    "MINIMAX_API_KEY": {
        "description": "MiniMax API key (international)",
        "prompt": "MiniMax API key",
@@ -1371,6 +1578,21 @@ OPTIONAL_ENV_VARS = {
        "category": "provider",
        "advanced": True,
    },
+    "AZURE_FOUNDRY_API_KEY": {
+        "description": "Azure Foundry API key for custom Azure endpoints",
+        "prompt": "Azure Foundry API Key",
+        "url": "https://ai.azure.com/",
+        "password": True,
+        "category": "provider",
+    },
+    "AZURE_FOUNDRY_BASE_URL": {
+        "description": "Azure Foundry base URL (set via 'hermes model' for endpoint-specific config)",
+        "prompt": "Azure Foundry base URL",
+        "url": None,
+        "password": False,
+        "category": "provider",
+        "advanced": True,
+    },

    # ── Tool API keys ──
    "EXA_API_KEY": {
@@ -1538,6 +1760,44 @@ OPTIONAL_ENV_VARS = {
        "category": "tool",
    },

+    # ── Bundled skills (opt-in: only needed if the user uses that skill) ──
+    # These use category="skill" (distinct from "tool") so the sandbox
+    # env blocklist in tools/environments/local.py does NOT rewrite them —
+    # skills legitimately need these passed through to curl via
+    # tools/env_passthrough.py when the user's skill calls out.
+    "NOTION_API_KEY": {
+        "description": "Notion integration token (used by the `notion` skill)",
+        "prompt": "Notion API key",
+        "url": "https://www.notion.so/my-integrations",
+        "password": True,
+        "category": "skill",
+        "advanced": True,
+    },
+    "LINEAR_API_KEY": {
+        "description": "Linear personal API key (used by the `linear` skill)",
+        "prompt": "Linear API key",
+        "url": "https://linear.app/settings/api",
+        "password": True,
+        "category": "skill",
+        "advanced": True,
+    },
+    "AIRTABLE_API_KEY": {
+        "description": "Airtable personal access token (used by the `airtable` skill)",
+        "prompt": "Airtable API key",
+        "url": "https://airtable.com/create/tokens",
+        "password": True,
+        "category": "skill",
+        "advanced": True,
+    },
+    "TENOR_API_KEY": {
+        "description": "Tenor API key for GIF search (used by the `gif-search` skill)",
+        "prompt": "Tenor API key",
+        "url": "https://developers.google.com/tenor/guides/quickstart",
+        "password": True,
+        "category": "skill",
+        "advanced": True,
+    },
+
    # ── Honcho ──
    "HONCHO_API_KEY": {
        "description": "Honcho API key for AI-native persistent memory",
@@ -1553,6 +1813,30 @@ OPTIONAL_ENV_VARS = {
        "category": "tool",
    },

+    # ── Langfuse observability ──
+    "HERMES_LANGFUSE_PUBLIC_KEY": {
+        "description": "Langfuse project public key (pk-lf-...)",
+        "prompt": "Langfuse public key",
+        "url": "https://cloud.langfuse.com",
+        "password": False,
+        "category": "tool",
+    },
+    "HERMES_LANGFUSE_SECRET_KEY": {
+        "description": "Langfuse project secret key (sk-lf-...)",
+        "prompt": "Langfuse secret key",
+        "url": "https://cloud.langfuse.com",
+        "password": True,
+        "category": "tool",
+    },
+    "HERMES_LANGFUSE_BASE_URL": {
+        "description": "Langfuse server URL (default: https://cloud.langfuse.com)",
+        "prompt": "Langfuse server URL (leave empty for cloud.langfuse.com)",
+        "url": None,
+        "password": False,
+        "category": "tool",
+        "advanced": True,
+    },
+
    # ── Messaging platforms ──
    "TELEGRAM_BOT_TOKEN": {
        "description": "Telegram bot token from @BotFather",
@@ -1700,6 +1984,14 @@ OPTIONAL_ENV_VARS = {
        "category": "messaging",
        "advanced": True,
    },
+    "MATRIX_DM_AUTO_THREAD": {
+        "description": "Auto-create threads for DM messages in Matrix (default: false)",
+        "prompt": "Auto-create threads in DMs (true/false)",
+        "url": None,
+        "password": False,
+        "category": "messaging",
+        "advanced": True,
+    },
    "MATRIX_DEVICE_ID": {
        "description": "Stable Matrix device ID for E2EE persistence across restarts (e.g. HERMES_BOT)",
        "prompt": "Matrix device ID (stable across restarts)",
@@ -2041,14 +2333,21 @@ def _normalize_custom_provider_entry(
        "baseUrl": "base_url",
        "apiMode": "api_mode",
        "keyEnv": "key_env",
+        "apiKeyEnv": "key_env",  # alias — OpenClaw-compatible + docs variant
        "defaultModel": "default_model",
        "contextLength": "context_length",
        "rateLimitDelay": "rate_limit_delay",
    }
+    # api_key_env is a documented snake_case alias for key_env (see
+    # website/docs/guides/azure-foundry.md).  Normalize it up front so the
+    # rest of the normalizer treats it as the canonical field.
+    if "api_key_env" in entry and "key_env" not in entry:
+        entry["key_env"] = entry["api_key_env"]
    _KNOWN_KEYS = {
-        "name", "api", "url", "base_url", "api_key", "key_env",
+        "name", "api", "url", "base_url", "api_key", "key_env", "api_key_env",
        "api_mode", "transport", "model", "default_model", "models",
        "context_length", "rate_limit_delay",
+        "request_timeout_seconds", "stale_timeout_seconds",
    }
    for camel, snake in _CAMEL_ALIASES.items():
        if camel in entry and snake not in entry:
@@ -2206,6 +2505,71 @@ def get_compatible_custom_providers(
    return compatible


+def get_custom_provider_context_length(
+    model: str,
+    base_url: str,
+    custom_providers: Optional[List[Dict[str, Any]]] = None,
+    config: Optional[Dict[str, Any]] = None,
+) -> Optional[int]:
+    """Look up a per-model ``context_length`` override from ``custom_providers``.
+
+    Matches any entry whose ``base_url`` equals ``base_url`` (trailing-slash
+    insensitive) and returns ``custom_providers[i].models.<model>.context_length``
+    if present and valid.  Returns ``None`` when no override applies.
+
+    This is the single source of truth for custom-provider context overrides,
+    used by:
+      * ``AIAgent.__init__`` (startup resolution)
+      * ``AIAgent.switch_model`` (mid-session ``/model`` switch)
+      * ``hermes_cli.model_switch.resolve_display_context_length`` (``/model`` confirmation display)
+      * ``gateway.run._format_session_info`` (``/info`` display)
+      * ``agent.model_metadata.get_model_context_length`` (when custom_providers is threaded through)
+
+    Before this helper existed, the lookup was duplicated in ``run_agent.py``'s
+    startup path only; every other path (notably ``/model`` switch) fell back
+    to the 128K default.  See #15779.
+    """
+    if not model or not base_url:
+        return None
+    if custom_providers is None:
+        try:
+            custom_providers = get_compatible_custom_providers(config)
+        except Exception:
+            if config is None:
+                return None
+            raw = config.get("custom_providers")
+            custom_providers = raw if isinstance(raw, list) else []
+    if not isinstance(custom_providers, list):
+        return None
+
+    target_url = (base_url or "").rstrip("/")
+    if not target_url:
+        return None
+
+    for entry in custom_providers:
+        if not isinstance(entry, dict):
+            continue
+        entry_url = (entry.get("base_url") or "").rstrip("/")
+        if not entry_url or entry_url != target_url:
+            continue
+        models = entry.get("models")
+        if not isinstance(models, dict):
+            continue
+        model_cfg = models.get(model)
+        if not isinstance(model_cfg, dict):
+            continue
+        raw_ctx = model_cfg.get("context_length")
+        if raw_ctx is None:
+            continue
+        try:
+            ctx = int(raw_ctx)
+        except (TypeError, ValueError):
+            continue
+        if ctx > 0:
+            return ctx
+    return None
+
+
 def check_config_version() -> Tuple[int, int]:
    """
    Check config version.
@@ -2235,6 +2599,9 @@ _KNOWN_ROOT_KEYS = {
 _VALID_CUSTOM_PROVIDER_FIELDS = {
    "name", "base_url", "api_key", "api_mode", "model", "models",
    "context_length", "rate_limit_delay",
+    # key_env is read at runtime by runtime_provider.py and auxiliary_client.py
+    # — include it here so the set accurately describes the supported schema.
+    "key_env",
 }

 # Fields that look like they should be inside custom_providers, not at root
@@ -2311,10 +2678,32 @@ def validate_config_structure(config: Optional[Dict[str, Any]] = None) -> List["
                        "Add the API endpoint URL, e.g.: base_url: https://api.example.com/v1",
                    ))

-    # ── fallback_model must be a top-level dict with provider + model ────
+    # ── fallback_model: single dict OR list of dicts (chain) ─────────────
    fb = config.get("fallback_model")
    if fb is not None:
-        if not isinstance(fb, dict):
+        if isinstance(fb, list):
+            # Chain fallback — validate each entry
+            for i, entry in enumerate(fb):
+                if not isinstance(entry, dict):
+                    issues.append(ConfigIssue(
+                        "error",
+                        f"fallback_model[{i}] should be a dict, got {type(entry).__name__}",
+                        "Each entry needs provider + model",
+                    ))
+                else:
+                    if not entry.get("provider"):
+                        issues.append(ConfigIssue(
+                            "warning",
+                            f"fallback_model[{i}] is missing 'provider' field",
+                            "Add: provider: openrouter (or another provider)",
+                        ))
+                    if not entry.get("model"):
+                        issues.append(ConfigIssue(
+                            "warning",
+                            f"fallback_model[{i}] is missing 'model' field",
+                            "Add: model: <model-name>",
+                        ))
+        elif not isinstance(fb, dict):
            issues.append(ConfigIssue(
                "error",
                f"fallback_model should be a dict with 'provider' and 'model', got {type(fb).__name__}",
@@ -3099,6 +3488,52 @@ def _normalize_max_turns_config(config: Dict[str, Any]) -> Dict[str, Any]:
    return config


+def cfg_get(cfg: Optional[Dict[str, Any]], *keys: str, default: Any = None) -> Any:
+    """Traverse nested dict keys safely, returning ``default`` on any miss.
+
+    Canonical helper for the ``cfg.get("X", {}).get("Y", default)`` pattern
+    that appears 50+ times across the codebase. Handles three common gotchas
+    in one place:
+
+      1. Missing intermediate keys (returns ``default``, no KeyError).
+      2. An intermediate value that's not a dict (e.g. a user wrote a string
+         where a section was expected). Returns ``default`` instead of
+         AttributeError on ``.get()``.
+      3. ``cfg is None`` (callers sometimes pass ``load_config() or None``).
+
+    Named ``cfg_get`` rather than ``cfg_path`` to avoid shadowing the
+    ubiquitous ``cfg_path = _hermes_home / "config.yaml"`` local variable
+    that appears in gateway/run.py, cron/scheduler.py, main.py, etc.
+
+    Explicit ``None`` values are returned as-is (matches ``dict.get(key,
+    default)`` semantics — ``default`` is only returned when the key is
+    *absent*, not when it's present but set to ``None``).
+
+    Examples:
+        >>> cfg_get({"agent": {"reasoning_effort": "high"}}, "agent", "reasoning_effort")
+        'high'
+        >>> cfg_get({}, "agent", "reasoning_effort", default="medium")
+        'medium'
+        >>> cfg_get({"agent": "oops_a_string"}, "agent", "reasoning_effort", default="low")
+        'low'
+        >>> cfg_get(None, "anything", default=42)
+        42
+        >>> cfg_get({"a": {"b": None}}, "a", "b", default="def")  # explicit None preserved
+        >>> cfg_get({"a": {"b": False}}, "a", "b", default=True)  # falsy values preserved
+        False
+    """
+    if not isinstance(cfg, dict):
+        return default
+    node: Any = cfg
+    for key in keys:
+        if not isinstance(node, dict):
+            return default
+        if key not in node:
+            return default
+        node = node[key]
+    return node
+
+

 def read_raw_config() -> Dict[str, Any]:
    """Read ~/.hermes/config.yaml as-is, without merging defaults or migrating.
@@ -3107,25 +3542,62 @@ def read_raw_config() -> Dict[str, Any]:
    be parsed.  Use this for lightweight config reads where you just need a
    single value and don't want the overhead of ``load_config()``'s deep-merge
    + migration pipeline.
+
+    Cached on the config file's (mtime_ns, size) — same strategy as
+    ``load_config()``. Returns a deepcopy on every call since some callers
+    mutate the result before passing to ``save_config()``.
    """
    try:
        config_path = get_config_path()
-        if config_path.exists():
-            with open(config_path, encoding="utf-8") as f:
-                return yaml.safe_load(f) or {}
+        st = config_path.stat()
+        cache_key = (st.st_mtime_ns, st.st_size)
+    except (FileNotFoundError, OSError):
+        return {}
+
+    path_key = str(config_path)
+    cached = _RAW_CONFIG_CACHE.get(path_key)
+    if cached is not None and cached[:2] == cache_key:
+        return copy.deepcopy(cached[2])
+
+    try:
+        with open(config_path, encoding="utf-8") as f:
+            data = yaml.safe_load(f) or {}
    except Exception:
-        pass
-    return {}
+        return {}
+
+    if not isinstance(data, dict):
+        data = {}
+    _RAW_CONFIG_CACHE[path_key] = (cache_key[0], cache_key[1], copy.deepcopy(data))
+    return data


 def load_config() -> Dict[str, Any]:
-    """Load configuration from ~/.hermes/config.yaml."""
+    """Load configuration from ~/.hermes/config.yaml.
+
+    Cached on the config file's (mtime_ns, size). Returns a deepcopy of
+    the cached value when unchanged, since most call sites mutate the
+    result (e.g. ``cfg["model"]["default"] = ...`` before ``save_config``).
+    The cache is keyed on ``str(config_path)`` so profile switches
+    (which change ``HERMES_HOME`` and therefore ``get_config_path()``)
+    don't collide.
+    """
    ensure_hermes_home()
    config_path = get_config_path()
-    
+    path_key = str(config_path)
+
+    try:
+        st = config_path.stat()
+        cache_key: Optional[Tuple[int, int]] = (st.st_mtime_ns, st.st_size)
+    except FileNotFoundError:
+        cache_key = None
+
+    cached = _LOAD_CONFIG_CACHE.get(path_key)
+    if cached is not None and cache_key is not None and cached[:2] == cache_key:
+        return copy.deepcopy(cached[2])
+
    config = copy.deepcopy(DEFAULT_CONFIG)
-    
-    if config_path.exists():
+
+    if cache_key is not None:
        try:
            with open(config_path, encoding="utf-8") as f:
                user_config = yaml.safe_load(f) or {}
@@ -3143,20 +3615,26 @@ def load_config() -> Dict[str, Any]:

    normalized = _normalize_root_model_keys(_normalize_max_turns_config(config))
    expanded = _expand_env_vars(normalized)
-    _LAST_EXPANDED_CONFIG_BY_PATH[str(config_path)] = copy.deepcopy(expanded)
+    _LAST_EXPANDED_CONFIG_BY_PATH[path_key] = copy.deepcopy(expanded)
+    if cache_key is not None:
+        _LOAD_CONFIG_CACHE[path_key] = (cache_key[0], cache_key[1], copy.deepcopy(expanded))
+    else:
+        _LOAD_CONFIG_CACHE.pop(path_key, None)
    return expanded


 _SECURITY_COMMENT = """
 # ── Security ──────────────────────────────────────────────────────────
-# API keys, tokens, and passwords are redacted from tool output by default.
-# Set to false to see full values (useful for debugging auth issues).
+# Secret redaction is OFF by default — tool output (terminal stdout,
+# read_file results, web content) passes through unmodified. Set
+# redact_secrets to true to mask strings that look like API keys, tokens,
+# and passwords before they enter the model context and logs.
 # tirith pre-exec scanning is enabled by default when the tirith binary
 # is available. Configure via security.tirith_* keys or env vars
 # (TIRITH_ENABLED, TIRITH_BIN, TIRITH_TIMEOUT, TIRITH_FAIL_OPEN).
 #
 # security:
-#   redact_secrets: false
+#   redact_secrets: true
 #   tirith_enabled: true
 #   tirith_path: "tirith"
 #   tirith_timeout: 5
@@ -3189,11 +3667,11 @@ _FALLBACK_COMMENT = """

 _COMMENTED_SECTIONS = """
 # ── Security ──────────────────────────────────────────────────────────
-# API keys, tokens, and passwords are redacted from tool output by default.
-# Set to false to see full values (useful for debugging auth issues).
+# Secret redaction is OFF by default. Set to true to mask strings that
+# look like API keys, tokens, and passwords in tool output and logs.
 #
 # security:
-#   redact_secrets: false
+#   redact_secrets: true

 # ── Fallback Model ────────────────────────────────────────────────────
 # Automatic provider failover when primary is unavailable.
@@ -3244,7 +3722,12 @@ def save_config(config: Dict[str, Any]):
    if not sec or sec.get("redact_secrets") is None:
        parts.append(_SECURITY_COMMENT)
    fb = normalized.get("fallback_model", {})
-    if not fb or not isinstance(fb, dict) or not (fb.get("provider") and fb.get("model")):
+    fb_is_valid = False
+    if isinstance(fb, list):
+        fb_is_valid = any(isinstance(e, dict) and e.get("provider") and e.get("model") for e in fb)
+    elif isinstance(fb, dict):
+        fb_is_valid = bool(fb.get("provider") and fb.get("model"))
+    if not fb_is_valid:
        parts.append(_FALLBACK_COMMENT)

    atomic_yaml_write(
@@ -3313,18 +3796,27 @@ def _sanitize_env_lines(lines: list) -> list:

        # Detect concatenated KEY=VALUE pairs on one line.
        # Search for known KEY= patterns at any position in the line.
-        split_positions = []
+        # We collect full needle ranges so we can drop matches that are
+        # fully contained within a longer overlapping needle. Without this,
+        # suffix collisions corrupt the file: e.g. LM_API_KEY= inside
+        # GLM_API_KEY= would otherwise split the line into "G\nLM_API_KEY=...".
+        match_ranges: list[tuple[int, int]] = []
        for key_name in known_keys:
            needle = key_name + "="
            idx = stripped.find(needle)
            while idx >= 0:
-                split_positions.append(idx)
+                match_ranges.append((idx, idx + len(needle)))
                idx = stripped.find(needle, idx + len(needle))

+        split_positions = sorted({
+            s for s, e in match_ranges
+            if not any(
+                s2 <= s and e2 >= e and (s2, e2) != (s, e)
+                for s2, e2 in match_ranges
+            )
+        })
+
        if len(split_positions) > 1:
-            split_positions.sort()
-            # Deduplicate (shouldn't happen, but be safe)
-            split_positions = sorted(set(split_positions))
            for i, pos in enumerate(split_positions):
                end = split_positions[i + 1] if i + 1 < len(split_positions) else len(stripped)
                part = stripped[pos:end].strip()
@@ -3370,7 +3862,7 @@ def sanitize_env_file() -> int:
            f.writelines(sanitized)
            f.flush()
            os.fsync(f.fileno())
-        os.replace(tmp_path, env_path)
+        atomic_replace(tmp_path, env_path)
    except BaseException:
        try:
            os.unlink(tmp_path)
@@ -3433,7 +3925,7 @@ def save_env_value(key: str, value: str):
    value = _check_non_ascii_credential(key, value)
    ensure_hermes_home()
    env_path = get_env_path()
-    
+
    # On Windows, open() defaults to the system locale (cp1252) which can
    # cause OSError errno 22 on UTF-8 .env files.
    read_kw = {"encoding": "utf-8", "errors": "replace"} if _IS_WINDOWS else {}
@@ -3445,7 +3937,7 @@ def save_env_value(key: str, value: str):
            lines = f.readlines()
        # Sanitize on every read: split concatenated keys, drop stale placeholders
        lines = _sanitize_env_lines(lines)
-    
+
    # Find and update or append
    found = False
    for i, line in enumerate(lines):
@@ -3453,7 +3945,7 @@ def save_env_value(key: str, value: str):
            lines[i] = f"{key}={value}\n"
            found = True
            break
-    
+
    if not found:
        # Ensure there's a newline at the end of the file before appending
        if lines and not lines[-1].endswith("\n"):
@@ -3473,7 +3965,7 @@ def save_env_value(key: str, value: str):
            f.writelines(lines)
            f.flush()
            os.fsync(f.fileno())
-        os.replace(tmp_path, env_path)
+        atomic_replace(tmp_path, env_path)
        # Restore original permissions before _secure_file may tighten them.
        if original_mode is not None:
            try:
@@ -3529,7 +4021,7 @@ def remove_env_value(key: str) -> bool:
                f.writelines(new_lines)
                f.flush()
                os.fsync(f.fileno())
-            os.replace(tmp_path, env_path)
+            atomic_replace(tmp_path, env_path)
            if original_mode is not None:
                try:
                    os.chmod(env_path, original_mode)
@@ -3616,12 +4108,13 @@ def get_env_value(key: str) -> Optional[str]:
 # =============================================================================

 def redact_key(key: str) -> str:
-    """Redact an API key for display."""
-    if not key:
-        return color("(not set)", Colors.DIM)
-    if len(key) < 12:
-        return "***"
-    return key[:4] + "..." + key[-4:]
+    """Redact an API key for display.
+
+    Thin wrapper over :func:`agent.redact.mask_secret` — preserves the
+    "(not set)" placeholder in dim color for the empty case.
+    """
+    from agent.redact import mask_secret
+    return mask_secret(key, empty=color("(not set)", Colors.DIM))


 def show_config():
@@ -3701,6 +4194,9 @@ def show_config():
        print(f"  Daytona image: {terminal.get('daytona_image', 'nikolaik/python-nodejs:python3.11-nodejs20')}")
        daytona_key = get_env_value('DAYTONA_API_KEY')
        print(f"  API key:      {'configured' if daytona_key else '(not set)'}")
+    elif terminal.get('backend') == 'vercel_sandbox':
+        print(f"  Vercel runtime: {terminal.get('vercel_runtime', 'node24')}")
+        print(f"  Vercel auth:    {'configured' if get_env_value('VERCEL_OIDC_TOKEN') or (get_env_value('VERCEL_TOKEN') and get_env_value('VERCEL_PROJECT_ID') and get_env_value('VERCEL_TEAM_ID')) else '(not set)'}")
    elif terminal.get('backend') == 'ssh':
        ssh_host = get_env_value('TERMINAL_SSH_HOST')
        ssh_user = get_env_value('TERMINAL_SSH_USER')
@@ -3893,7 +4389,9 @@ def set_config_value(key: str, value: str):
        "terminal.singularity_image": "TERMINAL_SINGULARITY_IMAGE",
        "terminal.modal_image": "TERMINAL_MODAL_IMAGE",
        "terminal.daytona_image": "TERMINAL_DAYTONA_IMAGE",
+        "terminal.vercel_runtime": "TERMINAL_VERCEL_RUNTIME",
        "terminal.docker_mount_cwd_to_workspace": "TERMINAL_DOCKER_MOUNT_CWD_TO_WORKSPACE",
+        "terminal.docker_run_as_host_user": "TERMINAL_DOCKER_RUN_AS_HOST_USER",
        "terminal.cwd": "TERMINAL_CWD",
        "terminal.timeout": "TERMINAL_TIMEOUT",
        "terminal.sandbox_dir": "TERMINAL_SANDBOX_DIR",
@@ -0,0 +1,235 @@
+"""CLI subcommand: `hermes curator <subcommand>`.
+
+Thin shell around agent/curator.py and tools/skill_usage.py. Renders a status
+table, triggers a run, pauses/resumes, and pins/unpins skills.
+
+This module intentionally has no side effects at import time — main.py wires
+the argparse subparsers on demand.
+"""
+
+from __future__ import annotations
+
+import argparse
+import sys
+from datetime import datetime, timezone
+from typing import Optional
+
+
+def _fmt_ts(ts: Optional[str]) -> str:
+    if not ts:
+        return "never"
+    try:
+        dt = datetime.fromisoformat(ts)
+    except (TypeError, ValueError):
+        return str(ts)
+    if dt.tzinfo is None:
+        dt = dt.replace(tzinfo=timezone.utc)
+    delta = datetime.now(timezone.utc) - dt
+    secs = int(delta.total_seconds())
+    if secs < 60:
+        return f"{secs}s ago"
+    if secs < 3600:
+        return f"{secs // 60}m ago"
+    if secs < 86400:
+        return f"{secs // 3600}h ago"
+    return f"{secs // 86400}d ago"
+
+
+def _cmd_status(args) -> int:
+    from agent import curator
+    from tools import skill_usage
+
+    state = curator.load_state()
+    enabled = curator.is_enabled()
+    paused = state.get("paused", False)
+    last_run = state.get("last_run_at")
+    summary = state.get("last_run_summary") or "(none)"
+    runs = state.get("run_count", 0)
+
+    status_line = (
+        "ENABLED" if enabled and not paused else
+        "PAUSED" if paused else
+        "DISABLED"
+    )
+    print(f"curator: {status_line}")
+    print(f"  runs:           {runs}")
+    print(f"  last run:       {_fmt_ts(last_run)}")
+    print(f"  last summary:   {summary}")
+    _report = state.get("last_report_path")
+    if _report:
+        print(f"  last report:    {_report}")
+    _ih = curator.get_interval_hours()
+    _interval_label = (
+        f"{_ih // 24}d" if _ih % 24 == 0 and _ih >= 24
+        else f"{_ih}h"
+    )
+    print(f"  interval:       every {_interval_label}")
+    print(f"  stale after:    {curator.get_stale_after_days()}d unused")
+    print(f"  archive after:  {curator.get_archive_after_days()}d unused")
+
+    rows = skill_usage.agent_created_report()
+    if not rows:
+        print("\nno agent-created skills")
+        return 0
+
+    by_state = {"active": [], "stale": [], "archived": []}
+    pinned = []
+    for r in rows:
+        state_name = r.get("state", "active")
+        by_state.setdefault(state_name, []).append(r)
+        if r.get("pinned"):
+            pinned.append(r["name"])
+
+    print(f"\nagent-created skills: {len(rows)} total")
+    for state_name in ("active", "stale", "archived"):
+        bucket = by_state.get(state_name, [])
+        print(f"  {state_name:10s} {len(bucket)}")
+
+    if pinned:
+        print(f"\npinned ({len(pinned)}): {', '.join(pinned)}")
+
+    # Show top 5 least-recently-used active skills
+    active = sorted(
+        by_state.get("active", []),
+        key=lambda r: r.get("last_used_at") or r.get("created_at") or "",
+    )[:5]
+    if active:
+        print("\nleast recently used (top 5):")
+        for r in active:
+            last = _fmt_ts(r.get("last_used_at"))
+            print(f"  {r['name']:40s}  use={r.get('use_count', 0):3d}  last_used={last}")
+
+    return 0
+
+
+def _cmd_run(args) -> int:
+    from agent import curator
+    if not curator.is_enabled():
+        print("curator: disabled via config; enable with `curator.enabled: true`")
+        return 1
+
+    print("curator: running review pass...")
+
+    def _on_summary(msg: str) -> None:
+        print(msg)
+
+    result = curator.run_curator_review(
+        on_summary=_on_summary,
+        synchronous=bool(args.synchronous),
+    )
+    auto = result.get("auto_transitions", {})
+    if auto:
+        print(
+            f"auto: checked={auto.get('checked', 0)} "
+            f"stale={auto.get('marked_stale', 0)} "
+            f"archived={auto.get('archived', 0)} "
+            f"reactivated={auto.get('reactivated', 0)}"
+        )
+    if not args.synchronous:
+        print("llm pass running in background — check `hermes curator status` later")
+    return 0
+
+
+def _cmd_pause(args) -> int:
+    from agent import curator
+    curator.set_paused(True)
+    print("curator: paused")
+    return 0
+
+
+def _cmd_resume(args) -> int:
+    from agent import curator
+    curator.set_paused(False)
+    print("curator: resumed")
+    return 0
+
+
+def _cmd_pin(args) -> int:
+    from tools import skill_usage
+    if not skill_usage.is_agent_created(args.skill):
+        print(
+            f"curator: '{args.skill}' is bundled or hub-installed — cannot pin "
+            "(only agent-created skills participate in curation)"
+        )
+        return 1
+    skill_usage.set_pinned(args.skill, True)
+    print(f"curator: pinned '{args.skill}' (will bypass auto-transitions)")
+    return 0
+
+
+def _cmd_unpin(args) -> int:
+    from tools import skill_usage
+    if not skill_usage.is_agent_created(args.skill):
+        print(
+            f"curator: '{args.skill}' is bundled or hub-installed — "
+            "there's nothing to unpin (curator only tracks agent-created skills)"
+        )
+        return 1
+    skill_usage.set_pinned(args.skill, False)
+    print(f"curator: unpinned '{args.skill}'")
+    return 0
+
+
+def _cmd_restore(args) -> int:
+    from tools import skill_usage
+    ok, msg = skill_usage.restore_skill(args.skill)
+    print(f"curator: {msg}")
+    return 0 if ok else 1
+
+
+# ---------------------------------------------------------------------------
+# argparse wiring (called from hermes_cli.main)
+# ---------------------------------------------------------------------------
+
+def register_cli(parent: argparse.ArgumentParser) -> None:
+    """Attach `curator` subcommands to *parent*.
+
+    main.py calls this with the ArgumentParser returned by
+    ``subparsers.add_parser("curator", ...)``.
+    """
+    parent.set_defaults(func=lambda a: (parent.print_help(), 0)[1])
+    subs = parent.add_subparsers(dest="curator_command")
+
+    p_status = subs.add_parser("status", help="Show curator status and skill stats")
+    p_status.set_defaults(func=_cmd_status)
+
+    p_run = subs.add_parser("run", help="Trigger a curator review now")
+    p_run.add_argument(
+        "--sync", "--synchronous", dest="synchronous", action="store_true",
+        help="Wait for the LLM review pass to finish (default: background thread)",
+    )
+    p_run.set_defaults(func=_cmd_run)
+
+    p_pause = subs.add_parser("pause", help="Pause the curator until resumed")
+    p_pause.set_defaults(func=_cmd_pause)
+
+    p_resume = subs.add_parser("resume", help="Resume a paused curator")
+    p_resume.set_defaults(func=_cmd_resume)
+
+    p_pin = subs.add_parser("pin", help="Pin a skill so the curator never auto-transitions it")
+    p_pin.add_argument("skill", help="Skill name")
+    p_pin.set_defaults(func=_cmd_pin)
+
+    p_unpin = subs.add_parser("unpin", help="Unpin a skill")
+    p_unpin.add_argument("skill", help="Skill name")
+    p_unpin.set_defaults(func=_cmd_unpin)
+
+    p_restore = subs.add_parser("restore", help="Restore an archived skill")
+    p_restore.add_argument("skill", help="Skill name")
+    p_restore.set_defaults(func=_cmd_restore)
+
+
+def cli_main(argv=None) -> int:
+    """Standalone entry (also usable by hermes_cli.main fallthrough)."""
+    parser = argparse.ArgumentParser(prog="hermes curator")
+    register_cli(parser)
+    args = parser.parse_args(argv)
+    fn = getattr(args, "func", None)
+    if fn is None:
+        parser.print_help()
+        return 0
+    return int(fn(args) or 0)
+
+
+if __name__ == "__main__":  # pragma: no cover
+    sys.exit(cli_main())
@@ -7,7 +7,6 @@ Currently supports:

 import io
 import json
-import os
 import sys
 import time
 import urllib.error
@@ -18,6 +17,7 @@ from pathlib import Path
 from typing import Optional

 from hermes_constants import get_hermes_home
+from utils import atomic_replace


 # ---------------------------------------------------------------------------
@@ -45,8 +45,13 @@ def _pending_file() -> Path:
    Each entry: ``{"url": "...", "expire_at": <unix_ts>}``.  Scheduled
    DELETEs used to be handled by spawning a detached Python process per
    paste that slept for 6 hours; those accumulated forever if the user
-    ran ``hermes debug share`` repeatedly.  We now persist the schedule
-    to disk and sweep expired entries on the next debug invocation.
+    ran ``hermes debug share`` repeatedly.
+
+    Deletion is now driven by the gateway's cron ticker
+    (``gateway/run.py::_start_cron_ticker``) which calls
+    ``_sweep_expired_pastes`` once per hour.  ``hermes debug share`` also
+    runs an opportunistic sweep on entry as a fallback for CLI-only users
+    who never start the gateway.
    """
    return get_hermes_home() / "pastes" / "pending.json"

@@ -74,7 +79,7 @@ def _save_pending(entries: list[dict]) -> None:
        path.parent.mkdir(parents=True, exist_ok=True)
        tmp = path.with_suffix(".json.tmp")
        tmp.write_text(json.dumps(entries, indent=2), encoding="utf-8")
-        os.replace(tmp, path)
+        atomic_replace(tmp, path)
    except OSError:
        # Non-fatal — worst case the user has to run ``hermes debug delete``
        # manually.
@@ -223,9 +228,10 @@ def _schedule_auto_delete(urls: list[str], delay_seconds: int = _AUTO_DELETE_SEC
    interpreters that never exited until the sleep completed.

    The replacement is stateless: we append to ``~/.hermes/pastes/pending.json``
-    and rely on opportunistic sweeps (``_sweep_expired_pastes``) called from
-    every ``hermes debug`` invocation.  If the user never runs ``hermes debug``
-    again, paste.rs's own retention policy handles cleanup.
+    and the gateway's cron ticker sweeps expired entries once per hour.
+    ``hermes debug share`` also runs an opportunistic sweep as a fallback
+    for CLI-only users.  If neither runs again, paste.rs's own retention
+    policy handles cleanup.
    """
    _record_pending(urls, delay_seconds=delay_seconds)

@@ -13,7 +13,6 @@ automatically.

 from __future__ import annotations

-import io
 import os
 import sys
 import time
@@ -8,6 +8,7 @@ import os
 import sys
 import subprocess
 import shutil
+import importlib.util
 from pathlib import Path

 from hermes_cli.config import get_project_root, get_hermes_home, get_env_path
@@ -30,6 +31,7 @@ load_dotenv(PROJECT_ROOT / ".env", override=False, encoding="utf-8")

 from hermes_cli.colors import Colors, color
 from hermes_cli.models import _HERMES_USER_AGENT
+from hermes_cli.vercel_auth import describe_vercel_auth
 from hermes_constants import OPENROUTER_MODELS_URL
 from utils import base_url_host_matches

@@ -46,6 +48,7 @@ _PROVIDER_ENV_HINTS = (
    "Z_AI_API_KEY",
    "KIMI_API_KEY",
    "KIMI_CN_API_KEY",
+    "GMI_API_KEY",
    "MINIMAX_API_KEY",
    "MINIMAX_CN_API_KEY",
    "KILOCODE_API_KEY",
@@ -56,6 +59,7 @@ _PROVIDER_ENV_HINTS = (
    "OPENCODE_ZEN_API_KEY",
    "OPENCODE_GO_API_KEY",
    "XIAOMI_API_KEY",
+    "TOKENHUB_API_KEY",
 )


@@ -291,15 +295,23 @@ def run_doctor(args):

            known_providers: set = set()
            try:
-                from hermes_cli.auth import PROVIDER_REGISTRY
+                from hermes_cli.auth import (
+                    PROVIDER_REGISTRY,
+                    resolve_provider as _resolve_auth_provider,
+                )
                known_providers = set(PROVIDER_REGISTRY.keys()) | {"openrouter", "custom", "auto"}
            except Exception:
+                _resolve_auth_provider = None
                pass
            try:
                from hermes_cli.config import get_compatible_custom_providers as _compatible_custom_providers
-                from hermes_cli.providers import resolve_provider_full as _resolve_provider_full
+                from hermes_cli.providers import (
+                    normalize_provider as _normalize_catalog_provider,
+                    resolve_provider_full as _resolve_provider_full,
+                )
            except Exception:
                _compatible_custom_providers = None
+                _normalize_catalog_provider = None
                _resolve_provider_full = None

            custom_providers = []
@@ -319,13 +331,43 @@ def run_doctor(args):
                if name:
                    known_providers.add("custom:" + name.lower().replace(" ", "-"))

-            canonical_provider = provider
-            if provider and _resolve_provider_full is not None and provider != "auto":
+            valid_provider_ids = set(known_providers)
+            provider_ids_to_accept = {provider} if provider else set()
+            if _normalize_catalog_provider is not None:
+                for known_provider in known_providers:
+                    try:
+                        valid_provider_ids.add(_normalize_catalog_provider(known_provider))
+                    except Exception:
+                        continue
+
+            runtime_provider = provider
+            if (
+                provider
+                and _resolve_auth_provider is not None
+                and provider not in ("auto", "custom")
+            ):
+                try:
+                    runtime_provider = _resolve_auth_provider(provider)
+                    provider_ids_to_accept.add(runtime_provider)
+                except Exception:
+                    runtime_provider = provider
+
+            catalog_provider = provider
+            if (
+                provider
+                and _resolve_provider_full is not None
+                and provider not in ("auto", "custom")
+            ):
                provider_def = _resolve_provider_full(provider, user_providers, custom_providers)
-                canonical_provider = provider_def.id if provider_def is not None else None
+                catalog_provider = provider_def.id if provider_def is not None else None
+                if catalog_provider is not None:
+                    provider_ids_to_accept.add(catalog_provider)

            if provider and provider != "auto":
-                if canonical_provider is None or (known_providers and canonical_provider not in known_providers):
+                if catalog_provider is None or (
+                    known_providers
+                    and not (provider_ids_to_accept & valid_provider_ids)
+                ):
                    known_list = ", ".join(sorted(known_providers)) if known_providers else "(unavailable)"
                    check_fail(
                        f"model.provider '{provider_raw}' is not a recognised provider",
@@ -338,7 +380,24 @@ def run_doctor(args):
                    )

            # Warn if model is set to a provider-prefixed name on a provider that doesn't use them
-            if default_model and "/" in default_model and canonical_provider and canonical_provider not in ("openrouter", "custom", "auto", "ai-gateway", "kilocode", "opencode-zen", "huggingface", "nous"):
+            provider_for_policy = runtime_provider or catalog_provider
+            providers_accepting_vendor_slugs = {
+                "openrouter",
+                "custom",
+                "auto",
+                "ai-gateway",
+                "kilocode",
+                "opencode-zen",
+                "huggingface",
+                "lmstudio",
+                "nous",
+            }
+            if (
+                default_model
+                and "/" in default_model
+                and provider_for_policy
+                and provider_for_policy not in providers_accepting_vendor_slugs
+            ):
                check_warn(
                    f"model.default '{default_model}' uses a vendor/model slug but provider is '{provider_raw}'",
                    "(vendor-prefixed slugs belong to aggregators like openrouter)",
@@ -354,20 +413,24 @@ def run_doctor(args):
            # own env-var checks elsewhere in doctor, and get_auth_status()
            # returns a bare {logged_in: False} for anything it doesn't
            # explicitly dispatch, which would produce false positives.
-            if canonical_provider and canonical_provider not in ("auto", "custom", "openrouter"):
+            if runtime_provider and runtime_provider not in ("auto", "custom", "openrouter"):
                try:
                    from hermes_cli.auth import PROVIDER_REGISTRY, get_auth_status
-                    pconfig = PROVIDER_REGISTRY.get(canonical_provider)
+                    pconfig = PROVIDER_REGISTRY.get(runtime_provider)
                    if pconfig and getattr(pconfig, "auth_type", "") == "api_key":
-                        status = get_auth_status(canonical_provider) or {}
-                        configured = bool(status.get("configured") or status.get("logged_in") or status.get("api_key"))
+                        status = get_auth_status(runtime_provider) or {}
+                        configured = bool(
+                            status.get("configured")
+                            or status.get("logged_in")
+                            or status.get("api_key")
+                        )
                        if not configured:
                            check_fail(
-                                f"model.provider '{canonical_provider}' is set but no API key is configured",
+                                f"model.provider '{runtime_provider}' is set but no API key is configured",
                                "(check ~/.hermes/.env or run 'hermes setup')",
                            )
                            issues.append(
-                                f"No credentials found for provider '{canonical_provider}'. "
+                                f"No credentials found for provider '{runtime_provider}'. "
                                f"Run 'hermes setup' or set the provider's API key in {_DHH}/.env, "
                                f"or switch providers with 'hermes config set model.provider <name>'"
                            )
@@ -476,6 +539,7 @@ def run_doctor(args):
            get_nous_auth_status,
            get_codex_auth_status,
            get_gemini_oauth_auth_status,
+            get_minimax_oauth_auth_status,
        )

        nous_status = get_nous_auth_status()
@@ -505,13 +569,27 @@ def run_doctor(args):
            check_ok("Google Gemini OAuth", f"(logged in{suffix})")
        else:
            check_warn("Google Gemini OAuth", "(not logged in)")
+
+        minimax_status = get_minimax_oauth_auth_status()
+        if minimax_status.get("logged_in"):
+            region = minimax_status.get("region", "global")
+            check_ok("MiniMax OAuth", f"(logged in, region={region})")
+        else:
+            check_warn("MiniMax OAuth", "(not logged in)")
    except Exception as e:
        check_warn("Auth provider status", f"(could not check: {e})")

    if shutil.which("codex"):
        check_ok("codex CLI")
    else:
-        check_warn("codex CLI not found", "(required for openai-codex login)")
+        # Native OAuth uses Hermes' own device-code flow — the Codex CLI is
+        # only needed if you want to import existing tokens from
+        # ~/.codex/auth.json.  Downgrade to info so users running
+        # `hermes auth openai-codex` aren't told they're missing something.
+        check_info(
+            "codex CLI not installed "
+            "(optional — only required to import tokens from an existing Codex CLI login)"
+        )

    # =========================================================================
    # Check: Directory structure
@@ -795,6 +873,50 @@ def run_doctor(args):
            check_fail("daytona SDK not installed", "(pip install daytona)")
            issues.append("Install daytona SDK: pip install daytona")

+    # Vercel Sandbox (if using vercel_sandbox backend)
+    if terminal_env == "vercel_sandbox":
+        runtime = os.getenv("TERMINAL_VERCEL_RUNTIME", "node24").strip() or "node24"
+        from tools.terminal_tool import _SUPPORTED_VERCEL_RUNTIMES
+        if runtime in _SUPPORTED_VERCEL_RUNTIMES:
+            check_ok("Vercel runtime", f"({runtime})")
+        else:
+            supported = ", ".join(_SUPPORTED_VERCEL_RUNTIMES)
+            check_fail("Vercel runtime unsupported", f"({runtime}; use {supported})")
+            issues.append(f"Set TERMINAL_VERCEL_RUNTIME to one of: {supported}")
+
+        disk = os.getenv("TERMINAL_CONTAINER_DISK", "51200").strip()
+        if disk in ("", "0", "51200"):
+            check_ok("Vercel disk setting", "(uses platform default)")
+        else:
+            check_fail("Vercel custom disk unsupported", "(reset terminal.container_disk to 51200)")
+            issues.append("Vercel Sandbox does not support custom container_disk; use the shared default 51200")
+
+        if importlib.util.find_spec("vercel") is not None:
+            check_ok("vercel SDK", "(installed)")
+        else:
+            check_fail("vercel SDK not installed", "(pip install 'hermes-agent[vercel]')")
+            issues.append("Install the Vercel optional dependency: pip install 'hermes-agent[vercel]'")
+
+        auth_status = describe_vercel_auth()
+        if auth_status.ok:
+            check_ok("Vercel auth", f"({auth_status.label})")
+        elif auth_status.label.startswith("partial"):
+            check_fail("Vercel auth incomplete", f"({auth_status.label})")
+            issues.append("Set VERCEL_TOKEN, VERCEL_PROJECT_ID, and VERCEL_TEAM_ID together")
+        else:
+            check_fail("Vercel auth not configured", f"({auth_status.label})")
+            issues.append(
+                "Configure Vercel Sandbox auth with VERCEL_TOKEN, VERCEL_PROJECT_ID, and VERCEL_TEAM_ID"
+            )
+        for line in auth_status.detail_lines:
+            check_info(f"Vercel auth {line}")
+
+        persistent = os.getenv("TERMINAL_CONTAINER_PERSISTENT", "true").lower() in ("1", "true", "yes", "on")
+        if persistent:
+            check_info("Vercel persistence: snapshot filesystem only; live processes do not survive sandbox recreation")
+        else:
+            check_info("Vercel persistence: ephemeral filesystem")
+
    # Node.js + agent-browser (for browser automation tools)
    if shutil.which("node"):
        check_ok("Node.js")
@@ -933,6 +1055,7 @@ def run_doctor(args):
        ("StepFun Step Plan",   ("STEPFUN_API_KEY",),                           "https://api.stepfun.ai/step_plan/v1/models", "STEPFUN_BASE_URL", True),
        ("Kimi / Moonshot (China)", ("KIMI_CN_API_KEY",),                    "https://api.moonshot.cn/v1/models",   None, True),
        ("Arcee AI",         ("ARCEEAI_API_KEY",),                            "https://api.arcee.ai/api/v1/models",  "ARCEE_BASE_URL", True),
+        ("GMI Cloud",        ("GMI_API_KEY",),                                "https://api.gmi-serving.com/v1/models", "GMI_BASE_URL", True),
        ("DeepSeek",         ("DEEPSEEK_API_KEY",),                           "https://api.deepseek.com/v1/models",  "DEEPSEEK_BASE_URL", True),
        ("Hugging Face",     ("HF_TOKEN",),                                   "https://router.huggingface.co/v1/models", "HF_BASE_URL", True),
        ("NVIDIA NIM",       ("NVIDIA_API_KEY",),                             "https://integrate.api.nvidia.com/v1/models", "NVIDIA_BASE_URL", True),
@@ -33,12 +33,14 @@ def _get_git_commit(project_root: Path) -> str:


 def _redact(value: str) -> str:
-    """Redact all but first 4 and last 4 chars."""
-    if not value:
-        return ""
-    if len(value) < 12:
-        return "***"
-    return value[:4] + "..." + value[-4:]
+    """Redact all but first 4 and last 4 chars.
+
+    Thin wrapper over :func:`agent.redact.mask_secret`. Returns ``""`` for
+    an empty value (matches the historical behavior of this helper —
+    ``hermes dump`` formats empty values as blank, not as ``"(not set)"``).
+    """
+    from agent.redact import mask_secret
+    return mask_secret(value)


 def _gateway_status() -> str:
@@ -7,6 +7,7 @@ import sys
 from pathlib import Path

 from dotenv import load_dotenv
+from utils import atomic_replace


 # Env var name suffixes that indicate credential values.  These are the
@@ -127,7 +128,7 @@ def _sanitize_env_file_if_needed(path: Path) -> None:
                    f.writelines(sanitized)
                    f.flush()
                    os.fsync(f.fileno())
-                os.replace(tmp, path)
+                atomic_replace(tmp, path)
            except BaseException:
                try:
                    os.unlink(tmp)
@@ -0,0 +1,361 @@
+"""
+hermes fallback — manage the fallback provider chain.
+
+Fallback providers are tried in order when the primary model fails with
+rate-limit, overload, or connection errors. See:
+https://hermes-agent.nousresearch.com/docs/user-guide/features/fallback-providers
+
+Subcommands:
+  hermes fallback [list]   Show the current fallback chain (default when no subcommand)
+  hermes fallback add      Pick provider + model via the same picker as `hermes model`,
+                           then append the selection to the chain
+  hermes fallback remove   Pick an entry to delete from the chain
+  hermes fallback clear    Remove all fallback entries
+
+Storage: ``fallback_providers`` in ``~/.hermes/config.yaml`` (top-level, list of
+``{provider, model, base_url?, api_mode?}`` dicts).  The legacy single-dict
+``fallback_model`` format is migrated to the new list format on first add.
+"""
+from __future__ import annotations
+
+import copy
+from typing import Any, Dict, List, Optional
+
+
+# ---------------------------------------------------------------------------
+# Helpers
+# ---------------------------------------------------------------------------
+
+def _read_chain(config: Dict[str, Any]) -> List[Dict[str, Any]]:
+    """Return the normalized fallback chain as a list of dicts.
+
+    Accepts both the new list format (``fallback_providers``) and the legacy
+    single-dict format (``fallback_model``).  The returned list is always a
+    fresh copy — callers can mutate without touching the config dict.
+    """
+    chain = config.get("fallback_providers") or []
+    if isinstance(chain, list):
+        result = [dict(e) for e in chain if isinstance(e, dict) and e.get("provider") and e.get("model")]
+        if result:
+            return result
+    legacy = config.get("fallback_model")
+    if isinstance(legacy, dict) and legacy.get("provider") and legacy.get("model"):
+        return [dict(legacy)]
+    if isinstance(legacy, list):
+        return [dict(e) for e in legacy if isinstance(e, dict) and e.get("provider") and e.get("model")]
+    return []
+
+
+def _write_chain(config: Dict[str, Any], chain: List[Dict[str, Any]]) -> None:
+    """Persist the chain to ``fallback_providers`` and clear legacy key."""
+    config["fallback_providers"] = chain
+    # Drop the legacy single-dict key on write so there's only one source of truth.
+    if "fallback_model" in config:
+        config.pop("fallback_model", None)
+
+
+def _format_entry(entry: Dict[str, Any]) -> str:
+    """One-line human-readable rendering of a fallback entry."""
+    provider = entry.get("provider", "?")
+    model = entry.get("model", "?")
+    base = entry.get("base_url")
+    suffix = f"  [{base}]" if base else ""
+    return f"{model}  (via {provider}){suffix}"
+
+
+def _extract_fallback_from_model_cfg(model_cfg: Any) -> Optional[Dict[str, Any]]:
+    """Pull the ``{provider, model, base_url?, api_mode?}`` dict from a ``config["model"]`` snapshot."""
+    if not isinstance(model_cfg, dict):
+        return None
+    provider = (model_cfg.get("provider") or "").strip()
+    # The picker writes the selected model to ``model.default``.
+    model = (model_cfg.get("default") or model_cfg.get("model") or "").strip()
+    if not provider or not model:
+        return None
+    entry: Dict[str, Any] = {"provider": provider, "model": model}
+    base_url = (model_cfg.get("base_url") or "").strip()
+    if base_url:
+        entry["base_url"] = base_url
+    api_mode = (model_cfg.get("api_mode") or "").strip()
+    if api_mode:
+        entry["api_mode"] = api_mode
+    return entry
+
+
+def _snapshot_auth_active_provider() -> Any:
+    """Return the current ``active_provider`` in auth.json, or a sentinel if unavailable."""
+    try:
+        from hermes_cli.auth import _load_auth_store
+        store = _load_auth_store()
+        return store.get("active_provider")
+    except Exception:
+        return None
+
+
+def _restore_auth_active_provider(value: Any) -> None:
+    """Write back a previously snapshotted ``active_provider`` value."""
+    try:
+        from hermes_cli.auth import _auth_store_lock, _load_auth_store, _save_auth_store
+        with _auth_store_lock():
+            store = _load_auth_store()
+            store["active_provider"] = value
+            _save_auth_store(store)
+    except Exception:
+        # Best-effort — if auth.json can't be restored, the user's primary
+        # provider may have been deactivated by the picker.  They can re-run
+        # `hermes model` to fix it.  Don't fail the fallback add.
+        pass
+
+
+# ---------------------------------------------------------------------------
+# Subcommand handlers
+# ---------------------------------------------------------------------------
+
+def cmd_fallback_list(args) -> None:  # noqa: ARG001
+    """Print the current fallback chain."""
+    from hermes_cli.config import load_config
+
+    config = load_config()
+    chain = _read_chain(config)
+
+    print()
+    if not chain:
+        print("  No fallback providers configured.")
+        print()
+        print("  Add one with:  hermes fallback add")
+        print()
+        return
+
+    primary = _describe_primary(config)
+    if primary:
+        print(f"  Primary:   {primary}")
+        print()
+    print(f"  Fallback chain ({len(chain)} {'entry' if len(chain) == 1 else 'entries'}):")
+    for i, entry in enumerate(chain, 1):
+        print(f"    {i}. {_format_entry(entry)}")
+    print()
+    print("  Tried in order when the primary fails (rate-limit, 5xx, connection errors).")
+    print("  Docs: https://hermes-agent.nousresearch.com/docs/user-guide/features/fallback-providers")
+    print()
+
+
+def _describe_primary(config: Dict[str, Any]) -> Optional[str]:
+    """One-line description of the primary model for display purposes."""
+    model_cfg = config.get("model")
+    if isinstance(model_cfg, dict):
+        provider = (model_cfg.get("provider") or "?").strip() or "?"
+        model = (model_cfg.get("default") or model_cfg.get("model") or "?").strip() or "?"
+        return f"{model}  (via {provider})"
+    if isinstance(model_cfg, str) and model_cfg.strip():
+        return model_cfg.strip()
+    return None
+
+
+def cmd_fallback_add(args) -> None:
+    """Launch the same picker as `hermes model`, then append the selection to the chain."""
+    from hermes_cli.main import _require_tty, select_provider_and_model
+    from hermes_cli.config import load_config, save_config
+
+    _require_tty("fallback add")
+
+    # Snapshot BEFORE the picker runs so we can distinguish "user actually
+    # picked something" from "user cancelled" by comparing before/after.
+    before_cfg = load_config()
+    model_before = copy.deepcopy(before_cfg.get("model"))
+    active_provider_before = _snapshot_auth_active_provider()
+
+    print()
+    print("  Adding a fallback provider.  The picker below is the same one used by")
+    print("  `hermes model` — select the provider + model you want as a fallback.")
+    print()
+
+    try:
+        select_provider_and_model(args=args)
+    except SystemExit:
+        # Some provider flows exit on auth failure — restore state and re-raise.
+        _restore_model_cfg(model_before)
+        _restore_auth_active_provider(active_provider_before)
+        raise
+
+    # Read the post-picker state to see what the user selected.
+    after_cfg = load_config()
+    model_after = after_cfg.get("model")
+
+    new_entry = _extract_fallback_from_model_cfg(model_after)
+    if not new_entry:
+        # Picker didn't complete (user cancelled or flow bailed).  Nothing to do.
+        _restore_model_cfg(model_before)
+        _restore_auth_active_provider(active_provider_before)
+        print()
+        print("  No fallback added.")
+        return
+
+    # Picker picked the same thing that's already the primary → nothing changed,
+    # and there's nothing useful to add as a fallback to itself.
+    primary_entry = _extract_fallback_from_model_cfg(model_before)
+    if primary_entry and primary_entry["provider"] == new_entry["provider"] \
+            and primary_entry["model"] == new_entry["model"]:
+        _restore_model_cfg(model_before)
+        _restore_auth_active_provider(active_provider_before)
+        print()
+        print(f"  Selected model matches the current primary ({_format_entry(new_entry)}).")
+        print("  A provider cannot be a fallback for itself — no change.")
+        return
+
+    # Reload the config with the primary restored, then append the new entry
+    # to ``fallback_providers``.  We deliberately re-load (rather than mutating
+    # ``after_cfg``) because the picker may have touched other top-level keys
+    # (custom_providers, providers credentials) that we want to keep.
+    _restore_model_cfg(model_before)
+    _restore_auth_active_provider(active_provider_before)
+
+    final_cfg = load_config()
+    chain = _read_chain(final_cfg)
+
+    # Reject exact-duplicate fallback entries.
+    for existing in chain:
+        if existing.get("provider") == new_entry["provider"] \
+                and existing.get("model") == new_entry["model"]:
+            print()
+            print(f"  {_format_entry(new_entry)} is already in the fallback chain — skipped.")
+            return
+
+    chain.append(new_entry)
+    _write_chain(final_cfg, chain)
+    save_config(final_cfg)
+
+    print()
+    print(f"  Added fallback: {_format_entry(new_entry)}")
+    print(f"  Chain is now {len(chain)} {'entry' if len(chain) == 1 else 'entries'} long.")
+    print()
+    print("  Run `hermes fallback list` to view, or `hermes fallback remove` to delete.")
+
+
+def _restore_model_cfg(model_before: Any) -> None:
+    """Restore ``config["model"]`` to a previously-captured snapshot."""
+    from hermes_cli.config import load_config, save_config
+
+    cfg = load_config()
+    if model_before is None:
+        cfg.pop("model", None)
+    else:
+        cfg["model"] = copy.deepcopy(model_before)
+    save_config(cfg)
+
+
+def cmd_fallback_remove(args) -> None:  # noqa: ARG001
+    """Pick an entry from the chain and remove it."""
+    from hermes_cli.config import load_config, save_config
+
+    config = load_config()
+    chain = _read_chain(config)
+
+    if not chain:
+        print()
+        print("  No fallback providers configured — nothing to remove.")
+        print()
+        return
+
+    choices = [_format_entry(e) for e in chain]
+    choices.append("Cancel")
+
+    try:
+        from hermes_cli.setup import _curses_prompt_choice
+        idx = _curses_prompt_choice("Select a fallback to remove:", choices, 0)
+    except Exception:
+        idx = _numbered_pick("Select a fallback to remove:", choices)
+
+    if idx is None or idx < 0 or idx >= len(chain):
+        print()
+        print("  Cancelled — no change.")
+        return
+
+    removed = chain.pop(idx)
+    _write_chain(config, chain)
+    save_config(config)
+
+    print()
+    print(f"  Removed fallback: {_format_entry(removed)}")
+    if chain:
+        print(f"  Chain is now {len(chain)} {'entry' if len(chain) == 1 else 'entries'} long.")
+    else:
+        print("  Fallback chain is now empty.")
+    print()
+
+
+def cmd_fallback_clear(args) -> None:  # noqa: ARG001
+    """Remove all fallback entries (with confirmation)."""
+    from hermes_cli.config import load_config, save_config
+
+    config = load_config()
+    chain = _read_chain(config)
+
+    if not chain:
+        print()
+        print("  No fallback providers configured — nothing to clear.")
+        print()
+        return
+
+    print()
+    print(f"  Current fallback chain ({len(chain)} {'entry' if len(chain) == 1 else 'entries'}):")
+    for i, entry in enumerate(chain, 1):
+        print(f"    {i}. {_format_entry(entry)}")
+    print()
+    try:
+        resp = input("  Clear all entries? [y/N]: ").strip().lower()
+    except (KeyboardInterrupt, EOFError):
+        print()
+        print("  Cancelled.")
+        return
+    if resp not in ("y", "yes"):
+        print("  Cancelled — no change.")
+        return
+
+    _write_chain(config, [])
+    save_config(config)
+    print()
+    print("  Fallback chain cleared.")
+    print()
+
+
+def _numbered_pick(question: str, choices: List[str]) -> Optional[int]:
+    """Fallback numbered-list picker when curses is unavailable."""
+    print(question)
+    for i, c in enumerate(choices, 1):
+        print(f"  {i}. {c}")
+    print()
+    while True:
+        try:
+            val = input(f"Choice [1-{len(choices)}]: ").strip()
+            if not val:
+                return None
+            idx = int(val) - 1
+            if 0 <= idx < len(choices):
+                return idx
+            print(f"Please enter 1-{len(choices)}")
+        except ValueError:
+            print("Please enter a number")
+        except (KeyboardInterrupt, EOFError):
+            print()
+            return None
+
+
+# ---------------------------------------------------------------------------
+# Dispatch
+# ---------------------------------------------------------------------------
+
+def cmd_fallback(args) -> None:
+    """Top-level dispatcher for ``hermes fallback [subcommand]``."""
+    sub = getattr(args, "fallback_command", None)
+    if sub in (None, "", "list", "ls"):
+        cmd_fallback_list(args)
+    elif sub == "add":
+        cmd_fallback_add(args)
+    elif sub in ("remove", "rm"):
+        cmd_fallback_remove(args)
+    elif sub == "clear":
+        cmd_fallback_clear(args)
+    else:
+        print(f"Unknown fallback subcommand: {sub}")
+        print("Use one of: list, add, remove, clear")
+        raise SystemExit(2)
@@ -279,9 +279,11 @@ def _scan_gateway_pids(exclude_pids: set[int], all_profiles: bool = False) -> li
                ["wmic", "process", "get", "ProcessId,CommandLine", "/FORMAT:LIST"],
                capture_output=True,
                text=True,
+                encoding="utf-8",
+                errors="ignore",
                timeout=10,
            )
-            if result.returncode != 0:
+            if result.returncode != 0 or result.stdout is None:
                return []
            current_cmd = ""
            for line in result.stdout.split("\n"):
@@ -830,6 +832,22 @@ def _user_dbus_socket_path() -> Path:
    return Path(xdg) / "bus"


+def _user_systemd_private_socket_path() -> Path:
+    """Return the per-user systemd private socket path (regardless of existence)."""
+    xdg = os.environ.get("XDG_RUNTIME_DIR") or f"/run/user/{os.getuid()}"
+    return Path(xdg) / "systemd" / "private"
+
+
+def _user_systemd_socket_ready() -> bool:
+    """Return True when user-scope systemd has a reachable control socket.
+
+    Some distros expose only the per-user systemd private socket even when the
+    D-Bus session bus socket is absent. ``systemctl --user`` can still work in
+    that configuration, so preflight checks must treat either socket as valid.
+    """
+    return _user_dbus_socket_path().exists() or _user_systemd_private_socket_path().exists()
+
+
 def _ensure_user_systemd_env() -> None:
    """Ensure DBUS_SESSION_BUS_ADDRESS and XDG_RUNTIME_DIR are set for systemctl --user.

@@ -853,28 +871,29 @@ def _ensure_user_systemd_env() -> None:


 def _wait_for_user_dbus_socket(timeout: float = 3.0) -> bool:
-    """Poll for the user D-Bus socket to appear, up to ``timeout`` seconds.
+    """Poll for the user systemd runtime socket(s), up to ``timeout`` seconds.

-    Linger-enabled user@.service can take a second or two to spawn the socket
-    after ``loginctl enable-linger`` runs.  Returns True once the socket exists.
+    Linger-enabled user@.service can take a second or two to spawn its control
+    socket(s) after ``loginctl enable-linger`` runs. Returns True once either
+    the user D-Bus socket or the per-user systemd private socket exists.
    """
    import time

    deadline = time.monotonic() + timeout
    while time.monotonic() < deadline:
-        if _user_dbus_socket_path().exists():
+        if _user_systemd_socket_ready():
            _ensure_user_systemd_env()
            return True
        time.sleep(0.2)
-    return _user_dbus_socket_path().exists()
+    return _user_systemd_socket_ready()


 def _preflight_user_systemd(*, auto_enable_linger: bool = True) -> None:
-    """Ensure ``systemctl --user`` will reach the user D-Bus session bus.
+    """Ensure ``systemctl --user`` will reach the user-scope systemd instance.

-    No-op when the bus socket is already there (the common case on desktops
-    and linger-enabled servers).  On fresh SSH sessions where the socket is
-    missing:
+    No-op when the user D-Bus socket or per-user systemd private socket is
+    already there (the common case on desktops and linger-enabled servers). On
+    fresh SSH sessions where both are missing:

    * If linger is already enabled, wait briefly for user@.service to spawn
      the socket.
@@ -888,8 +907,7 @@ def _preflight_user_systemd(*, auto_enable_linger: bool = True) -> None:
    systemd operations and surface the message to the user.
    """
    _ensure_user_systemd_env()
-    bus_path = _user_dbus_socket_path()
-    if bus_path.exists():
+    if _user_systemd_socket_ready():
        return

    import getpass
@@ -903,7 +921,7 @@ def _preflight_user_systemd(*, auto_enable_linger: bool = True) -> None:
        # Linger is on but socket still missing — unusual; fall through to error.
        _raise_user_systemd_unavailable(
            username,
-            reason="User D-Bus socket is missing even though linger is enabled.",
+            reason="User systemd control sockets are missing even though linger is enabled.",
            fix_hint=(
                f"  systemctl start user@{os.getuid()}.service\n"
                "  (may require sudo; try again after the command succeeds)"
@@ -2724,6 +2742,24 @@ _PLATFORMS = [
             "help": "OpenID to deliver cron results and notifications to."},
        ],
    },
+    {
+        "key": "yuanbao",
+        "label": "Yuanbao",
+        "emoji": "💎",
+        "token_var": "YUANBAO_APP_ID",
+        "setup_instructions": [
+            "1. Download the Yuanbao app from https://yuanbao.tencent.com/",
+            "2. In the app, go to PAI → My Bot and create a new bot",
+            "3. After the bot is created, copy the App ID and App Secret",
+            "4. Enter them below and Hermes will connect automatically over WebSocket",
+        ],
+        "vars": [
+            {"name": "YUANBAO_APP_ID", "prompt": "App ID", "password": False,
+             "help": "The App ID from your Yuanbao IM Bot credentials."},
+            {"name": "YUANBAO_APP_SECRET", "prompt": "App Secret", "password": True,
+             "help": "The App Secret (used for HMAC signing) from your Yuanbao IM Bot."},
+        ],
+    },
 ]


@@ -2935,7 +2971,7 @@ def _setup_sms():
 def _setup_dingtalk():
    """Configure DingTalk — QR scan (recommended) or manual credential entry."""
    from hermes_cli.setup import (
-        prompt_choice, prompt_yes_no, print_info, print_success, print_warning,
+        prompt_choice, prompt_yes_no, print_success, print_warning,
    )

    dingtalk_platform = next(p for p in _PLATFORMS if p["key"] == "dingtalk")
@@ -3108,6 +3144,12 @@ def _setup_wecom():
    print_success("💬 WeCom configured!")


+def _setup_yuanbao():
+    """Configure Yuanbao via the standard platform setup."""
+    yuanbao_platform = next(p for p in _PLATFORMS if p["key"] == "yuanbao")
+    _setup_standard_platform(yuanbao_platform)
+
+
 def _is_service_installed() -> bool:
    """Check if the gateway is installed as a system service."""
    if supports_systemd_services():
@@ -3253,6 +3295,12 @@ def _setup_weixin():
        print_warning("  Direct messages disabled.")

    print()
+    print_info("  Note: QR login connects an iLink bot identity (e.g. ...@im.bot), not a")
+    print_info("  scriptable personal WeChat account. Ordinary WeChat groups typically cannot")
+    print_info("  invite an @im.bot identity, and iLink does not deliver ordinary-group events")
+    print_info("  to most bot accounts. The settings below only apply when iLink actually")
+    print_info("  delivers group events for your account type — otherwise DM remains the only")
+    print_info("  working channel regardless of this choice.")
    group_choices = [
        "Disable group chats (recommended)",
        "Allow all group chats",
@@ -3266,12 +3314,12 @@ def _setup_weixin():
    elif group_idx == 1:
        save_env_value("WEIXIN_GROUP_POLICY", "open")
        save_env_value("WEIXIN_GROUP_ALLOWED_USERS", "")
-        print_warning("  All group chats enabled.")
+        print_warning("  All group chats enabled (only takes effect if iLink delivers group events).")
    else:
-        allow_groups = prompt("  Allowed group chat IDs (comma-separated)", "", password=False).replace(" ", "")
+        allow_groups = prompt("  Allowed group chat IDs (comma-separated, not member user IDs)", "", password=False).replace(" ", "")
        save_env_value("WEIXIN_GROUP_POLICY", "allowlist")
        save_env_value("WEIXIN_GROUP_ALLOWED_USERS", allow_groups)
-        print_success("  Group allowlist saved.")
+        print_success("  Group allowlist saved (only takes effect if iLink delivers group events).")

    if user_id:
        print()
@@ -3480,7 +3528,6 @@ def _setup_qqbot():
    method_idx = prompt_choice("  How would you like to set up QQ Bot?", method_choices, 0)

    credentials = None
-    used_qr = False

    if method_idx == 0:
        # ── QR scan-to-configure ──
@@ -3491,8 +3538,6 @@ def _setup_qqbot():
            print()
            print_warning("  QQ Bot setup cancelled.")
            return
-        if credentials:
-            used_qr = True
        if not credentials:
            print_info("  QR setup did not complete. Continuing with manual input.")

@@ -19,9 +19,8 @@ format) lives there.
 from __future__ import annotations

 import json
-import os
 from pathlib import Path
-from typing import Any, Dict, List, Optional
+from typing import Any, Dict, List


 def hooks_command(args) -> None:
@@ -125,6 +124,7 @@ _DEFAULT_PAYLOADS = {
        "task_id": "test-task",
        "tool_call_id": "test-call",
        "result": '{"output": "hello"}',
+        "duration_ms": 42,
    },
    "pre_llm_call": {
        "session_id": "test-session",
@@ -16,6 +16,7 @@ import time
 from typing import Any, Dict, List, Optional, Tuple

 from hermes_cli.config import (
+    cfg_get,
    load_config,
    save_config,
    get_env_value,
@@ -716,7 +717,7 @@ def cmd_mcp_configure(args):

    # Update config
    config = load_config()
-    server_entry = config.get("mcp_servers", {}).get(name, {})
+    server_entry = cfg_get(config, "mcp_servers", name, default={})

    if len(chosen) == total:
        # All selected → remove include/exclude (register all)
@@ -0,0 +1,329 @@
+"""Remote model catalog fetcher.
+
+The Hermes docs site hosts a JSON manifest of curated models for providers
+we want to update without shipping a release (currently OpenRouter and
+Nous Portal). This module fetches, validates, and caches that manifest,
+falling back to the in-repo hardcoded lists when the network is unavailable.
+
+Pipeline
+--------
+1. ``get_catalog()`` — returns a parsed manifest dict.
+   - Checks in-process cache (invalidated by TTL).
+   - Reads disk cache at ``~/.hermes/cache/model_catalog.json``.
+   - Fetches the master URL if disk cache is stale or missing.
+   - On any fetch failure, keeps using the stale cache (or empty dict).
+
+2. ``get_curated_openrouter_models()`` / ``get_curated_nous_models()`` —
+   thin accessors returning the shapes existing callers expect. Each
+   falls back to the in-repo hardcoded list on any lookup failure.
+
+Schema (version 1)
+------------------
+::
+
+    {
+      "version": 1,
+      "updated_at": "2026-04-25T22:00:00Z",
+      "metadata": {...},                # free-form
+      "providers": {
+        "openrouter": {
+          "metadata": {...},            # free-form
+          "models": [
+            {"id": "vendor/model", "description": "recommended",
+             "metadata": {...}}          # free-form, model-level
+          ]
+        },
+        "nous": {...}
+      }
+    }
+
+Unknown fields are ignored — extra metadata can be added at either level
+without bumping ``version``. ``version`` bumps are reserved for
+breaking changes (renaming ``providers``, changing ``models`` shape).
+"""
+
+from __future__ import annotations
+
+import json
+import logging
+import time
+import urllib.error
+import urllib.request
+from pathlib import Path
+from typing import Any
+
+from hermes_cli import __version__ as _HERMES_VERSION
+from utils import atomic_replace
+
+logger = logging.getLogger(__name__)
+
+# ---------------------------------------------------------------------------
+# Constants
+# ---------------------------------------------------------------------------
+
+DEFAULT_CATALOG_URL = (
+    "https://hermes-agent.nousresearch.com/docs/api/model-catalog.json"
+)
+DEFAULT_TTL_HOURS = 24
+DEFAULT_FETCH_TIMEOUT = 8.0
+SUPPORTED_SCHEMA_VERSION = 1
+
+_HERMES_USER_AGENT = f"hermes-cli/{_HERMES_VERSION}"
+
+# In-process cache to avoid repeated disk + parse work across multiple
+# calls within the same session. Invalidated by TTL against the disk file's
+# mtime, so calling code never has to think about this.
+_catalog_cache: dict[str, Any] | None = None
+_catalog_cache_source_mtime: float = 0.0
+
+
+# ---------------------------------------------------------------------------
+# Config
+# ---------------------------------------------------------------------------
+
+
+def _load_catalog_config() -> dict[str, Any]:
+    """Load the ``model_catalog`` config block with defaults filled in."""
+    try:
+        from hermes_cli.config import load_config
+        cfg = load_config() or {}
+    except Exception:
+        cfg = {}
+
+    raw = cfg.get("model_catalog")
+    if not isinstance(raw, dict):
+        raw = {}
+
+    return {
+        "enabled": bool(raw.get("enabled", True)),
+        "url": str(raw.get("url") or DEFAULT_CATALOG_URL),
+        "ttl_hours": float(raw.get("ttl_hours") or DEFAULT_TTL_HOURS),
+        "providers": raw.get("providers") if isinstance(raw.get("providers"), dict) else {},
+    }
+
+
+def _cache_path() -> Path:
+    """Return the disk cache path. Import lazily so tests can monkeypatch home."""
+    from hermes_constants import get_hermes_home
+    return get_hermes_home() / "cache" / "model_catalog.json"
+
+
+# ---------------------------------------------------------------------------
+# Fetch + validate + cache
+# ---------------------------------------------------------------------------
+
+
+def _fetch_manifest(url: str, timeout: float) -> dict[str, Any] | None:
+    """HTTP GET the manifest URL and return a parsed dict, or None on failure."""
+    try:
+        req = urllib.request.Request(
+            url,
+            headers={
+                "Accept": "application/json",
+                "User-Agent": _HERMES_USER_AGENT,
+            },
+        )
+        with urllib.request.urlopen(req, timeout=timeout) as resp:
+            data = json.loads(resp.read().decode())
+    except (urllib.error.URLError, TimeoutError, json.JSONDecodeError, OSError) as exc:
+        logger.info("model catalog fetch failed (%s): %s", url, exc)
+        return None
+    except Exception as exc:  # pragma: no cover — defensive
+        logger.info("model catalog fetch errored (%s): %s", url, exc)
+        return None
+
+    if not _validate_manifest(data):
+        logger.info("model catalog at %s failed schema validation", url)
+        return None
+
+    return data
+
+
+def _validate_manifest(data: Any) -> bool:
+    """Return True when ``data`` matches the minimum manifest shape."""
+    if not isinstance(data, dict):
+        return False
+    version = data.get("version")
+    if not isinstance(version, int) or version > SUPPORTED_SCHEMA_VERSION:
+        # Future schema version we don't understand — refuse rather than
+        # guess. Older schemas (version < 1) aren't supported either.
+        return False
+    providers = data.get("providers")
+    if not isinstance(providers, dict):
+        return False
+    for pname, pblock in providers.items():
+        if not isinstance(pname, str) or not isinstance(pblock, dict):
+            return False
+        models = pblock.get("models")
+        if not isinstance(models, list):
+            return False
+        for m in models:
+            if not isinstance(m, dict):
+                return False
+            if not isinstance(m.get("id"), str) or not m["id"].strip():
+                return False
+    return True
+
+
+def _read_disk_cache() -> tuple[dict[str, Any] | None, float]:
+    """Return ``(data_or_none, mtime)``. mtime is 0 if file is missing."""
+    path = _cache_path()
+    try:
+        mtime = path.stat().st_mtime
+    except (OSError, FileNotFoundError):
+        return (None, 0.0)
+    try:
+        with open(path) as fh:
+            data = json.load(fh)
+    except (OSError, json.JSONDecodeError):
+        return (None, 0.0)
+    if not _validate_manifest(data):
+        return (None, 0.0)
+    return (data, mtime)
+
+
+def _write_disk_cache(data: dict[str, Any]) -> None:
+    path = _cache_path()
+    try:
+        path.parent.mkdir(parents=True, exist_ok=True)
+        tmp = path.with_suffix(path.suffix + ".tmp")
+        with open(tmp, "w") as fh:
+            json.dump(data, fh, indent=2)
+            fh.write("\n")
+        atomic_replace(tmp, path)
+    except OSError as exc:
+        logger.info("model catalog cache write failed: %s", exc)
+
+
+# ---------------------------------------------------------------------------
+# Public API
+# ---------------------------------------------------------------------------
+
+
+def get_catalog(*, force_refresh: bool = False) -> dict[str, Any]:
+    """Return the parsed model catalog manifest, or an empty dict on failure.
+
+    Callers should treat a missing provider/model as "use the in-repo fallback"
+    — never raise from this function so the CLI keeps working offline.
+    """
+    global _catalog_cache, _catalog_cache_source_mtime
+
+    cfg = _load_catalog_config()
+    if not cfg["enabled"]:
+        return {}
+
+    ttl_seconds = max(0.0, cfg["ttl_hours"] * 3600.0)
+
+    disk_data, disk_mtime = _read_disk_cache()
+    now = time.time()
+    disk_fresh = disk_data is not None and (now - disk_mtime) < ttl_seconds
+
+    # In-process cache hit: disk hasn't changed since we loaded it and still fresh.
+    if (
+        not force_refresh
+        and _catalog_cache is not None
+        and disk_data is not None
+        and disk_mtime == _catalog_cache_source_mtime
+        and disk_fresh
+    ):
+        return _catalog_cache
+
+    # Disk is fresh enough — use it without a network hit.
+    if not force_refresh and disk_fresh and disk_data is not None:
+        _catalog_cache = disk_data
+        _catalog_cache_source_mtime = disk_mtime
+        return disk_data
+
+    # Need to (re)fetch. If it fails, fall back to any stale disk copy.
+    fetched = _fetch_manifest(cfg["url"], DEFAULT_FETCH_TIMEOUT)
+    if fetched is not None:
+        _write_disk_cache(fetched)
+        new_disk_data, new_mtime = _read_disk_cache()
+        if new_disk_data is not None:
+            _catalog_cache = new_disk_data
+            _catalog_cache_source_mtime = new_mtime
+            return new_disk_data
+        _catalog_cache = fetched
+        _catalog_cache_source_mtime = now
+        return fetched
+
+    if disk_data is not None:
+        _catalog_cache = disk_data
+        _catalog_cache_source_mtime = disk_mtime
+        return disk_data
+
+    return {}
+
+
+def _fetch_provider_override(provider: str) -> dict[str, Any] | None:
+    """If ``model_catalog.providers.<name>.url`` is set, fetch that instead."""
+    cfg = _load_catalog_config()
+    if not cfg["enabled"]:
+        return None
+    provider_cfg = cfg["providers"].get(provider)
+    if not isinstance(provider_cfg, dict):
+        return None
+    override_url = provider_cfg.get("url")
+    if not isinstance(override_url, str) or not override_url.strip():
+        return None
+    # Override fetches skip the disk cache because they're usually
+    # third-party self-hosted. Re-request on every call but with a short
+    # timeout so they don't block the picker.
+    return _fetch_manifest(override_url.strip(), DEFAULT_FETCH_TIMEOUT)
+
+
+def _get_provider_block(provider: str) -> dict[str, Any] | None:
+    """Return the provider's manifest block, respecting per-provider overrides."""
+    override = _fetch_provider_override(provider)
+    if override is not None:
+        block = override.get("providers", {}).get(provider)
+        if isinstance(block, dict):
+            return block
+
+    catalog = get_catalog()
+    if not catalog:
+        return None
+    block = catalog.get("providers", {}).get(provider)
+    return block if isinstance(block, dict) else None
+
+
+def get_curated_openrouter_models() -> list[tuple[str, str]] | None:
+    """Return OpenRouter's curated ``[(id, description), ...]`` from the manifest.
+
+    Returns ``None`` when the manifest is unavailable, so callers can fall
+    back to their hardcoded list.
+    """
+    block = _get_provider_block("openrouter")
+    if not block:
+        return None
+    out: list[tuple[str, str]] = []
+    for m in block.get("models", []):
+        mid = str(m.get("id") or "").strip()
+        if not mid:
+            continue
+        desc = str(m.get("description") or "")
+        out.append((mid, desc))
+    return out or None
+
+
+def get_curated_nous_models() -> list[str] | None:
+    """Return Nous Portal's curated list of model ids from the manifest.
+
+    Returns ``None`` when the manifest is unavailable.
+    """
+    block = _get_provider_block("nous")
+    if not block:
+        return None
+    out: list[str] = []
+    for m in block.get("models", []):
+        mid = str(m.get("id") or "").strip()
+        if mid:
+            out.append(mid)
+    return out or None
+
+
+def reset_cache() -> None:
+    """Clear the in-process cache. Used by tests and ``hermes model --refresh``."""
+    global _catalog_cache, _catalog_cache_source_mtime
+    _catalog_cache = None
+    _catalog_cache_source_mtime = 0.0
@@ -96,6 +96,7 @@ _MATCHING_PREFIX_STRIP_PROVIDERS: frozenset[str] = frozenset({
    "kimi-coding",
    "kimi-coding-cn",
    "minimax",
+    "minimax-oauth",
    "minimax-cn",
    "alibaba",
    "qwen-oauth",
@@ -213,10 +213,15 @@ def _load_direct_aliases() -> dict[str, DirectAlias]:


 def _ensure_direct_aliases() -> None:
-    """Lazy-load direct aliases on first use."""
-    global DIRECT_ALIASES
+    """Lazy-load direct aliases on first use.
+
+    Mutates the existing DIRECT_ALIASES dict in place rather than rebinding
+    the module attribute. This keeps `from hermes_cli.model_switch import
+    DIRECT_ALIASES` references valid in callers — rebinding would leave them
+    pointing at a stale empty dict.
+    """
    if not DIRECT_ALIASES:
-        DIRECT_ALIASES = _load_direct_aliases()
+        DIRECT_ALIASES.update(_load_direct_aliases())


 # ---------------------------------------------------------------------------
@@ -533,6 +538,7 @@ def resolve_display_context_length(
    base_url: str = "",
    api_key: str = "",
    model_info: Optional[ModelInfo] = None,
+    custom_providers: list | None = None,
 ) -> Optional[int]:
    """Resolve the context length to show in /model output.

@@ -543,6 +549,11 @@ def resolve_display_context_length(
    about Codex OAuth, Copilot, Nous, and falls back to models.dev for the
    rest.

+    When ``custom_providers`` is provided, per-model ``context_length``
+    overrides from ``custom_providers[].models.<id>.context_length`` are
+    honored — this closes #15779 where ``/model`` switch ignored user-set
+    overrides.
+
    Prefer the provider-aware value; fall back to ``model_info.context_window``
    only if the resolver returns nothing.
    """
@@ -553,6 +564,7 @@ def resolve_display_context_length(
            base_url=base_url or "",
            api_key=api_key or "",
            provider=provider or None,
+            custom_providers=custom_providers,
        )
        if ctx:
            return int(ctx)
@@ -831,9 +843,14 @@ def switch_model(
                requested=current_provider,
                target_model=new_model,
            )
-            api_key = runtime.get("api_key", "")
-            base_url = runtime.get("base_url", "")
-            api_mode = runtime.get("api_mode", "")
+            # If resolution fell through to "custom" (e.g. named custom provider like
+            # "ollama-launch" that resolve_runtime_provider doesn't know), keep existing
+            # credentials. Otherwise use the resolved values (picks up credential rotation,
+            # base_url adjustments for OpenCode, etc.).
+            if runtime.get("provider") != "custom":
+                api_key = runtime.get("api_key", "")
+                base_url = runtime.get("base_url", "")
+                api_mode = runtime.get("api_mode", "")
        except Exception:
            pass

@@ -867,16 +884,31 @@ def switch_model(
            "message": f"Could not validate `{new_model}`: {e}",
        }

+    # Override rejection if model is in the user's saved provider config.
+    # API /v1/models may not list cloud/aliased models even though the server supports them.
    if not validation.get("accepted"):
-        msg = validation.get("message", "Invalid model")
-        return ModelSwitchResult(
-            success=False,
-            new_model=new_model,
-            target_provider=target_provider,
-            provider_label=provider_label,
-            is_global=is_global,
-            error_message=msg,
-        )
+        override = False
+        if user_providers:
+            for up in user_providers:
+                if isinstance(up, dict) and up.get("provider") == target_provider:
+                    cfg_models = up.get("models", [])
+                    if new_model in cfg_models or any(
+                        m.get("name") == new_model for m in cfg_models if isinstance(m, dict)
+                    ):
+                        override = True
+                        break
+        if override:
+            validation = {"accepted": True, "persist": True, "recognized": False, "message": validation.get("message", "")}
+        else:
+            msg = validation.get("message", "Invalid model")
+            return ModelSwitchResult(
+                success=False,
+                new_model=new_model,
+                target_provider=target_provider,
+                provider_label=provider_label,
+                is_global=is_global,
+                error_message=msg,
+            )

    # Apply auto-correction if validation found a closer match
    if validation.get("corrected_model"):
@@ -952,6 +984,7 @@ def list_authenticated_providers(
    user_providers: dict = None,
    custom_providers: list | None = None,
    max_models: int = 8,
+    current_model: str = "",
 ) -> List[dict]:
    """Detect which providers have credentials and list their curated models.

@@ -985,6 +1018,37 @@ def list_authenticated_providers(
    results: List[dict] = []
    seen_slugs: set = set()  # lowercase-normalized to catch case variants (#9545)
    seen_mdev_ids: set = set()  # prevent duplicate entries for aliases (e.g. kimi-coding + kimi-coding-cn)
+    # Effective base URLs of every built-in row we emit (normalized lower+rstrip).
+    # Section 4 uses this to hide ``custom_providers`` entries that point at the
+    # same endpoint as a built-in (e.g. a user-defined "my-dashscope" on
+    # https://coding-intl.dashscope.aliyuncs.com/v1 collides with the built-in
+    # alibaba-coding-plan row when DASHSCOPE_API_KEY is present). Fixes #16970.
+    _builtin_endpoints: set = set()
+
+    def _norm_url(url: str) -> str:
+        return str(url or "").strip().rstrip("/").lower()
+
+    def _record_builtin_endpoint(slug: str) -> None:
+        """Record the effective base URL for a built-in provider row.
+
+        Prefers the live env-override (e.g. DASHSCOPE_BASE_URL) over the
+        static inference_base_url so the dedup matches what a user typing
+        that URL into custom_providers would actually hit."""
+        try:
+            from hermes_cli.auth import PROVIDER_REGISTRY as _reg
+        except Exception:
+            return
+        pcfg = _reg.get(slug)
+        if not pcfg:
+            return
+        url = ""
+        if getattr(pcfg, "base_url_env_var", ""):
+            url = os.environ.get(pcfg.base_url_env_var, "") or ""
+        if not url:
+            url = getattr(pcfg, "inference_base_url", "") or ""
+        normed = _norm_url(url)
+        if normed:
+            _builtin_endpoints.add(normed)

    data = fetch_models_dev()

@@ -998,6 +1062,34 @@ def list_authenticated_providers(
    if "ollama-cloud" not in curated:
        from hermes_cli.models import fetch_ollama_cloud_models
        curated["ollama-cloud"] = fetch_ollama_cloud_models()
+    # LM Studio has no static catalog — probe its native /api/v1/models
+    # endpoint live so the picker reflects whatever the user has loaded.
+    # Base URL precedence: LM_BASE_URL env var > active config's base_url
+    # (when current provider is lmstudio) > 127.0.0.1 default.
+    # On auth rejection or unreachable server, fall back to the caller-supplied
+    # current model so the picker still shows something when offline / mis-keyed.
+    if "lmstudio" not in curated and (
+        os.environ.get("LM_API_KEY") or os.environ.get("LM_BASE_URL") or current_provider.strip().lower() == "lmstudio"
+    ):
+        from hermes_cli.models import fetch_lmstudio_models
+        from hermes_cli.auth import AuthError
+        is_current_lmstudio = current_provider.strip().lower() == "lmstudio"
+        lm_base = (
+            os.environ.get("LM_BASE_URL")
+            or (current_base_url if is_current_lmstudio and current_base_url else None)
+            or "http://127.0.0.1:1234/v1"
+        )
+        try:
+            live = fetch_lmstudio_models(
+                api_key=os.environ.get("LM_API_KEY", ""),
+                base_url=lm_base,
+                timeout=1.5, # Smaller timeout for picker
+            )
+        except AuthError:
+            live = []
+        if not live and is_current_lmstudio and current_model:
+            live = [current_model]
+        curated["lmstudio"] = live

    # --- 1. Check Hermes-mapped providers ---
    for hermes_id, mdev_id in PROVIDER_TO_MODELS_DEV.items():
@@ -1063,6 +1155,7 @@ def list_authenticated_providers(
        })
        seen_slugs.add(slug.lower())
        seen_mdev_ids.add(mdev_id)
+        _record_builtin_endpoint(slug)

    # --- 2. Check Hermes-only providers (nous, openai-codex, copilot, opencode-go) ---
    from hermes_cli.providers import HERMES_OVERLAYS
@@ -1148,6 +1241,15 @@ def list_authenticated_providers(

        if hermes_slug in {"copilot", "copilot-acp"}:
            model_ids = provider_model_ids(hermes_slug)
+        # For aws_sdk providers (bedrock), use live discovery so the list
+        # reflects the active region (eu.*, ap.*) not the static us.* list.
+        elif overlay.auth_type == "aws_sdk":
+            try:
+                from agent.bedrock_adapter import bedrock_model_ids_or_none
+                _ids = bedrock_model_ids_or_none()
+                model_ids = _ids if _ids is not None else (curated.get(hermes_slug, []) or curated.get(pid, []))
+            except Exception:
+                model_ids = curated.get(hermes_slug, []) or curated.get(pid, [])
        else:
            # Use curated list — look up by Hermes slug, fall back to overlay key
            model_ids = curated.get(hermes_slug, []) or curated.get(pid, [])
@@ -1168,6 +1270,7 @@ def list_authenticated_providers(
        })
        seen_slugs.add(pid.lower())
        seen_slugs.add(hermes_slug.lower())
+        _record_builtin_endpoint(hermes_slug)

    # --- 2b. Cross-check canonical provider list ---
    # Catches providers that are in CANONICAL_PROVIDERS but weren't found
@@ -1210,10 +1313,30 @@ def list_authenticated_providers(
            except Exception:
                pass

+        # Special case: aws_sdk auth (bedrock) — no API key env vars,
+        # credentials come from the boto3 credential chain (env vars,
+        # ~/.aws/credentials, instance roles, etc.)
+        if not _cp_has_creds and _cp_config and getattr(_cp_config, "auth_type", "") == "aws_sdk":
+            try:
+                from agent.bedrock_adapter import has_aws_credentials
+                _cp_has_creds = has_aws_credentials()
+            except Exception:
+                pass
+
        if not _cp_has_creds:
            continue

-        _cp_model_ids = curated.get(_cp.slug, [])
+        # For bedrock, use live discovery so the list reflects the active
+        # region (eu.*, us.*, ap.*) instead of the hardcoded us.* static list.
+        if _cp_config and getattr(_cp_config, "auth_type", "") == "aws_sdk":
+            try:
+                from agent.bedrock_adapter import bedrock_model_ids_or_none
+                _ids = bedrock_model_ids_or_none()
+                _cp_model_ids = _ids if _ids is not None else curated.get(_cp.slug, [])
+            except Exception:
+                _cp_model_ids = curated.get(_cp.slug, [])
+        else:
+            _cp_model_ids = curated.get(_cp.slug, [])
        _cp_total = len(_cp_model_ids)
        _cp_top = _cp_model_ids[:max_models]

@@ -1227,6 +1350,7 @@ def list_authenticated_providers(
            "source": "canonical",
        })
        seen_slugs.add(_cp.slug.lower())
+        _record_builtin_endpoint(_cp.slug)

    # --- 3. User-defined endpoints from config ---
    # Track (name, base_url) of what section 3 emits so section 4 can skip
@@ -1285,8 +1409,23 @@ def list_authenticated_providers(
                    if fb:
                        models_list = list(fb)

-            # Try to probe /v1/models if URL is set (but don't block on it)
-            # For now just show what we know from config
+            # Prefer the endpoint's live /models list when credentials are
+            # available. This keeps OpenAI-compatible relays (for example CRS)
+            # in sync when the server catalog changes without requiring the
+            # user to mirror every model into config.yaml.
+            api_key = str(ep_cfg.get("api_key", "") or "").strip()
+            if not api_key:
+                key_env = str(ep_cfg.get("key_env", "") or "").strip()
+                api_key = os.environ.get(key_env, "").strip() if key_env else ""
+            if api_url and api_key:
+                try:
+                    from hermes_cli.models import fetch_api_models
+                    live_models = fetch_api_models(api_key, api_url)
+                    if live_models:
+                        models_list = live_models
+                except Exception:
+                    pass
+
            results.append({
                "slug": ep_name,
                "name": display_name,
@@ -1421,6 +1560,15 @@ def list_authenticated_providers(
            )
            if _pair_key[0] and _pair_key[1] and _pair_key in _section3_emitted_pairs:
                continue
+            # Skip if a built-in row (sections 1/2/2b) already represents this
+            # endpoint. Fixes #16970: a user-defined "my-dashscope" pointing at
+            # https://coding-intl.dashscope.aliyuncs.com/v1 duplicates the
+            # built-in alibaba-coding-plan row whenever DASHSCOPE_API_KEY is
+            # set. The built-in row carries the curated model list, correct
+            # auth wiring, and canonical slug — keep it and hide the shadow.
+            _grp_url_norm = _pair_key[1]
+            if _grp_url_norm and _grp_url_norm in _builtin_endpoints:
+                continue
            results.append({
                "slug": slug,
                "name": grp["name"],
--- a/Show More
+++ b/Show More