fix(docker): run bundled TUI without npm install

fix: use skill activity in curator status
Treat skill views and edits as activity when curator reports and applies lifecycle transitions, so recently loaded or patched skills are not displayed or transitioned as never used.\n\nAdds regression tests for activity derivation, automatic transitions, and CLI status output.
2026-04-30 12:36:02 -05:00 · 2026-04-30 10:31:47 -07:00 · 2026-04-30 10:31:44 -07:00 · 2026-04-30 10:31:38 -07:00 · 2026-04-30 10:31:23 -07:00 · 2026-04-30 10:31:16 -07:00
723 changed files with 77870 additions and 5969 deletions
@@ -5,7 +5,9 @@

 # Dependencies
 node_modules
+**/node_modules
 .venv
+**/.venv

 # CI/CD
 .github
@@ -398,3 +398,19 @@ IMAGE_TOOLS_DEBUG=false
 # Override STT provider endpoints (for proxies or self-hosted instances)
 # GROQ_BASE_URL=https://api.groq.com/openai/v1
 # STT_OPENAI_BASE_URL=https://api.openai.com/v1
+
+# =============================================================================
+# MICROSOFT TEAMS INTEGRATION
+# =============================================================================
+# Register a Bot in Azure: https://dev.botframework.com/ → "Register a bot"
+# Or use Azure Portal: Azure Active Directory → App registrations → New registration
+# Then add the bot to Teams via the Bot Framework or App Studio.
+#
+# TEAMS_CLIENT_ID=                     # Azure AD App (client) ID
+# TEAMS_CLIENT_SECRET=                 # Azure AD client secret value
+# TEAMS_TENANT_ID=                     # Azure AD tenant ID (or "common" for multi-tenant)
+# TEAMS_ALLOWED_USERS=                 # Comma-separated AAD object IDs or UPNs
+# TEAMS_ALLOW_ALL_USERS=false          # Set true to skip the allowlist
+# TEAMS_HOME_CHANNEL=                  # Default channel/chat ID for cron delivery
+# TEAMS_HOME_CHANNEL_NAME=             # Display name for the home channel
+# TEAMS_PORT=3978                      # Webhook listen port (Bot Framework default)
@@ -1,8 +1,18 @@
 name: 'Setup Nix'
-description: 'Install Nix with DeterminateSystems and enable magic-nix-cache'
+description: 'Install Nix and configure Cachix binary cache'
+
+inputs:
+  cachix-auth-token:
+    description: 'Cachix auth token (enables push). Omit for read-only.'
+    required: false
+    default: ''

 runs:
  using: composite
  steps:
    - uses: DeterminateSystems/nix-installer-action@ef8a148080ab6020fd15196c2084a2eea5ff2d25 # v22
-    - uses: DeterminateSystems/magic-nix-cache-action@565684385bcd71bad329742eefe8d12f2e765b39 # v13
+    - uses: cachix/cachix-action@1eb2ef646ac0255473d23a5907ad7b04ce94065c # v17
+      with:
+        name: hermes-agent
+        authToken: ${{ inputs.cachix-auth-token }}
+      continue-on-error: true
@@ -1,68 +0,0 @@
-name: Nix Lockfile Check
-
-on:
-  pull_request:
-  workflow_dispatch:
-
-permissions:
-  contents: read
-  pull-requests: write
-
-concurrency:
-  group: nix-lockfile-check-${{ github.ref }}
-  cancel-in-progress: true
-
-jobs:
-  check:
-    runs-on: ubuntu-latest
-    timeout-minutes: 20
-    steps:
-      - uses: actions/checkout@34e114876b0b11c390a56381ad16ebd13914f8d5  # v4
-
-      - uses: ./.github/actions/nix-setup
-
-      - name: Resolve head SHA
-        id: sha
-        shell: bash
-        run: |
-          FULL="${{ github.event.pull_request.head.sha || github.sha }}"
-          echo "full=$FULL" >> "$GITHUB_OUTPUT"
-          echo "short=${FULL:0:7}" >> "$GITHUB_OUTPUT"
-
-      - name: Check lockfile hashes
-        id: check
-        continue-on-error: true
-        env:
-          LINK_SHA: ${{ steps.sha.outputs.full }}
-        run: nix run .#fix-lockfiles -- --check
-
-      - name: Post sticky PR comment (stale)
-        if: steps.check.outputs.stale == 'true' && github.event_name == 'pull_request'
-        uses: marocchino/sticky-pull-request-comment@52423e01640425a022ef5fd42c6fb5f633a02728  # v2.9.1
-        with:
-          header: nix-lockfile-check
-          message: |
-            ### ⚠️ npm lockfile hash out of date
-
-            Checked against commit [`${{ steps.sha.outputs.short }}`](${{ github.server_url }}/${{ github.repository }}/commit/${{ steps.sha.outputs.full }}) (PR head at check time).
-
-            The `hash = "sha256-..."` line in these nix files no longer matches the committed `package-lock.json`:
-
-            ${{ steps.check.outputs.report }}
-
-            #### Apply the fix
-
-            - [ ] **Apply lockfile fix** — tick to push a commit with the correct hashes to this PR branch
-            - Or [run the Nix Lockfile Fix workflow](${{ github.server_url }}/${{ github.repository }}/actions/workflows/nix-lockfile-fix.yml) manually (pass PR `#${{ github.event.pull_request.number }}`)
-            - Or locally: `nix run .#fix-lockfiles -- --apply` and commit the diff
-
-      - name: Clear sticky PR comment (resolved)
-        if: steps.check.outputs.stale == 'false' && github.event_name == 'pull_request'
-        uses: marocchino/sticky-pull-request-comment@52423e01640425a022ef5fd42c6fb5f633a02728  # v2.9.1
-        with:
-          header: nix-lockfile-check
-          delete: true
-
-      - name: Fail if stale
-        if: steps.check.outputs.stale == 'true'
-        run: exit 1
@@ -1,6 +1,13 @@
 name: Nix Lockfile Fix

 on:
+  push:
+    branches: [main]
+    paths:
+      - 'ui-tui/package-lock.json'
+      - 'ui-tui/package.json'
+      - 'web/package-lock.json'
+      - 'web/package.json'
  workflow_dispatch:
    inputs:
      pr_number:
@@ -19,9 +26,105 @@ concurrency:
  cancel-in-progress: false

 jobs:
+  # ── Auto-fix on main ───────────────────────────────────────────────
+  # Fires when a push to main touches package.json or package-lock.json
+  # in ui-tui/ or web/. Runs fix-lockfiles and pushes the hash
+  # update commit directly to main so Nix builds never stay broken.
+  #
+  # Safety invariants:
+  #   1. The fix commit only touches nix/*.nix files, which are NOT in
+  #      the paths filter above, so this cannot re-trigger itself.
+  #   2. An explicit file-whitelist check before commit aborts if
+  #      fix-lockfiles ever modifies unexpected files.
+  #   3. Job-level concurrency with cancel-in-progress: true ensures
+  #      back-to-back pushes collapse to the newest; ref: main checkout
+  #      always operates on the latest branch state.
+  #   4. Uses a GitHub App token (not GITHUB_TOKEN) so the fix commit
+  #      triggers downstream nix.yml verification.
+  auto-fix-main:
+    if: github.event_name == 'push'
+    runs-on: ubuntu-latest
+    timeout-minutes: 25
+    concurrency:
+      group: auto-fix-main
+      cancel-in-progress: true
+    steps:
+      - name: Generate GitHub App token
+        id: app-token
+        uses: actions/create-github-app-token@7bfa3a4717ef143a604ee0a99d859b8886a96d00  # v1.9.3
+        with:
+          app-id: ${{ secrets.APP_ID }}
+          private-key: ${{ secrets.APP_PRIVATE_KEY }}
+
+      - uses: actions/checkout@34e114876b0b11c390a56381ad16ebd13914f8d5  # v4
+        with:
+          ref: main
+          token: ${{ steps.app-token.outputs.token }}
+
+      - uses: ./.github/actions/nix-setup
+        with:
+          cachix-auth-token: ${{ secrets.CACHIX_AUTH_TOKEN }}
+
+      - name: Apply lockfile hashes
+        id: apply
+        run: nix run .#fix-lockfiles -- --apply
+
+      - name: Commit & push
+        if: steps.apply.outputs.changed == 'true'
+        shell: bash
+        run: |
+          set -euo pipefail
+
+          # Ensure only nix files were modified — prevents accidental
+          # self-triggering if fix-lockfiles ever touches package files.
+          unexpected="$(git diff --name-only | grep -Ev '^nix/(tui|web)\.nix$' || true)"
+          if [ -n "$unexpected" ]; then
+            echo "::error::Unexpected modified files: $unexpected"
+            exit 1
+          fi
+
+          # Record the base SHA before committing — used to detect package
+          # file changes if we need to rebase after a non-fast-forward push.
+          BASE_SHA="$(git rev-parse HEAD)"
+
+          git config user.name 'github-actions[bot]'
+          git config user.email '41898282+github-actions[bot]@users.noreply.github.com'
+          git add nix/tui.nix nix/web.nix
+          git commit -m "fix(nix): auto-refresh npm lockfile hashes" \
+            -m "Source: $GITHUB_SHA" \
+            -m "Run: $GITHUB_SERVER_URL/$GITHUB_REPOSITORY/actions/runs/$GITHUB_RUN_ID"
+
+          # Retry push with rebase in case main advanced with an unrelated
+          # commit during the nix build. Without this, a non-fast-forward
+          # rejection silently loses the fix. If package files changed during
+          # the rebase, abort — a fresh auto-fix run will handle the new state.
+          for attempt in 1 2 3; do
+            if git push origin HEAD:main; then
+              exit 0
+            fi
+            echo "::warning::Push attempt $attempt failed (non-fast-forward?), rebasing…"
+            git fetch origin main
+
+            # If package files changed between our base and the new main,
+            # our computed hashes are stale. Abort and let the next triggered
+            # run recompute from the correct package-lock state.
+            pkg_changed="$(git diff --name-only "$BASE_SHA"..origin/main -- \
+              'ui-tui/package-lock.json' 'ui-tui/package.json' \
+              'web/package-lock.json' 'web/package.json' || true)"
+            if [ -n "$pkg_changed" ]; then
+              echo "::warning::Package files changed since hash computation — aborting; a fresh run will recompute"
+              exit 0
+            fi
+
+            git rebase origin/main
+          done
+          echo "::error::Failed to push after 3 rebase attempts"
+          exit 1
+
+  # ── PR fix (manual / checkbox) ─────────────────────────────────────
+  # Existing behavior: run on manual dispatch OR when a task-list
+  # checkbox in the sticky lockfile-check comment flips from [ ] to [x].
  fix:
-    # Run on manual dispatch OR when a task-list checkbox in the sticky
-    # lockfile-check comment flips from `[ ]` to `[x]`.
    if: |
      github.event_name == 'workflow_dispatch' ||
      (github.event_name == 'issue_comment'
@@ -99,10 +202,12 @@ jobs:
          fetch-depth: 0

      - uses: ./.github/actions/nix-setup
+        with:
+          cachix-auth-token: ${{ secrets.CACHIX_AUTH_TOKEN }}

      - name: Apply lockfile hashes
        id: apply
-        run: nix run .#fix-lockfiles -- --apply
+        run: nix run .#fix-lockfiles

      - name: Commit & push
        if: steps.apply.outputs.changed == 'true'
@@ -7,6 +7,7 @@ on:

 permissions:
  contents: read
+  pull-requests: write

 concurrency:
  group: nix-${{ github.ref }}
@@ -22,12 +23,95 @@ jobs:
    steps:
      - uses: actions/checkout@34e114876b0b11c390a56381ad16ebd13914f8d5 # v4
      - uses: ./.github/actions/nix-setup
+        with:
+          cachix-auth-token: ${{ secrets.CACHIX_AUTH_TOKEN }}
+
+      - name: Resolve head SHA
+        if: github.event_name == 'pull_request'
+        id: sha
+        shell: bash
+        run: |
+          FULL="${{ github.event.pull_request.head.sha || github.sha }}"
+          echo "full=$FULL" >> "$GITHUB_OUTPUT"
+          echo "short=${FULL:0:7}" >> "$GITHUB_OUTPUT"
+
      - name: Check flake
+        id: flake
        if: runner.os == 'Linux'
+        continue-on-error: true
        run: nix flake check --print-build-logs
+
      - name: Build package
+        id: build
        if: runner.os == 'Linux'
+        continue-on-error: true
        run: nix build --print-build-logs
+
+      # When the real Nix build fails, run a targeted diagnostic to see if
+      # the failure is specifically a stale npm lockfile hash in one of the
+      # known npm subpackages (tui / web).  This avoids surfacing a generic
+      # "build failed" message when the fix is a single known command.
+      - name: Diagnose npm lockfile hashes
+        id: hash_check
+        if: (steps.flake.outcome == 'failure' || steps.build.outcome == 'failure') && runner.os == 'Linux'
+        continue-on-error: true
+        env:
+          LINK_SHA: ${{ steps.sha.outputs.full }}
+        run: nix run .#fix-lockfiles -- --check
+
+      # If fix-lockfiles itself crashes (infrastructure blip, cache throttle,
+      # etc.) it won't set stale=true/false.  Treat that as a distinct failure
+      # mode rather than silently ignoring it.
+      - name: Fail if hash check crashed without reporting
+        if: steps.hash_check.outcome == 'failure' && steps.hash_check.outputs.stale != 'true' && steps.hash_check.outputs.stale != 'false'
+        run: |
+          echo "::error::fix-lockfiles exited without reporting stale status — likely an infrastructure or script failure"
+          exit 1
+
+      - name: Post sticky PR comment (stale hashes)
+        if: steps.hash_check.outputs.stale == 'true' && github.event_name == 'pull_request'
+        uses: marocchino/sticky-pull-request-comment@52423e01640425a022ef5fd42c6fb5f633a02728  # v2.9.1
+        with:
+          header: nix-lockfile-check
+          message: |
+            ### ⚠️ npm lockfile hash out of date
+
+            Checked against commit [`${{ steps.sha.outputs.short }}`](${{ github.server_url }}/${{ github.repository }}/commit/${{ steps.sha.outputs.full }}) (PR head at check time).
+
+            The `hash = "sha256-..."` line in these nix files no longer matches the committed `package-lock.json`:
+
+            ${{ steps.hash_check.outputs.report }}
+
+            #### Apply the fix
+
+            - [ ] **Apply lockfile fix** — tick to push a commit with the correct hashes to this PR branch
+            - Or [run the Nix Lockfile Fix workflow](${{ github.server_url }}/${{ github.repository }}/actions/workflows/nix-lockfile-fix.yml) manually (pass PR `#${{ github.event.pull_request.number }}`)
+            - Or locally: `nix run .#fix-lockfiles` and commit the diff
+
+      # Clear the sticky comment when either the build passed outright (no
+      # hash check needed) or the hash check explicitly returned stale=false
+      # (build failed for a non-hash reason).
+      - name: Clear sticky PR comment (resolved)
+        if: |
+          github.event_name == 'pull_request' &&
+          runner.os == 'Linux' &&
+          (steps.hash_check.outputs.stale == 'false' ||
+           (steps.flake.outcome == 'success' && steps.build.outcome == 'success'))
+        uses: marocchino/sticky-pull-request-comment@52423e01640425a022ef5fd42c6fb5f633a02728  # v2.9.1
+        with:
+          header: nix-lockfile-check
+          delete: true
+
+      - name: Final fail if build or flake failed
+        if: steps.flake.outcome == 'failure' || steps.build.outcome == 'failure'
+        run: |
+          if [ "${{ steps.hash_check.outputs.stale }}" == "true" ]; then
+            echo "::error::Nix build failed due to stale npm lockfile hash. Run: nix run .#fix-lockfiles"
+          else
+            echo "::error::Nix build/flake check failed. See logs above."
+          fi
+          exit 1
+
      - name: Evaluate flake (macOS)
        if: runner.os == 'macOS'
        run: nix flake show --json > /dev/null
@@ -38,7 +38,7 @@ hermes-agent/
 │   │                     #   homeassistant, signal, matrix, mattermost, email, sms,
 │   │                     #   dingtalk, wecom, weixin, feishu, qqbot, bluebubbles,
 │   │                     #   webhook, api_server, ...). See ADDING_A_PLATFORM.md.
-│   └── builtin_hooks/    # Always-registered gateway hooks (boot-md, ...)
+│   └── builtin_hooks/    # Extension point for always-registered gateway hooks (none shipped)
 ├── plugins/              # Plugin system (see "Plugins" section below)
 │   ├── memory/           # Memory-provider plugins (honcho, mem0, supermemory, ...)
 │   ├── context_engine/   # Context-engine plugins
@@ -494,7 +494,7 @@ branding:
  agent_name: "My Agent"
  welcome: "Welcome message"
  response_label: " ⚔ Agent "
-  prompt_symbol: "⚔ ❯ "
+  prompt_symbol: "⚔"

 tool_prefix: "╎"             # Tool output line prefix
 ```
@@ -14,7 +14,7 @@ ENV PLAYWRIGHT_BROWSERS_PATH=/opt/hermes/.playwright
 # that would otherwise accumulate when hermes runs as PID 1. See #15012.
 RUN apt-get update && \
    apt-get install -y --no-install-recommends \
-        build-essential nodejs npm python3 ripgrep ffmpeg gcc python3-dev libffi-dev procps git openssh-client docker-cli tini && \
+    build-essential curl nodejs npm python3 ripgrep ffmpeg gcc python3-dev libffi-dev procps git openssh-client docker-cli tini && \
    rm -rf /var/lib/apt/lists/*

 # Non-root user for runtime; UID can be overridden via HERMES_UID at runtime
@@ -45,7 +45,14 @@ COPY --chown=hermes:hermes . .

 # Build browser dashboard and terminal UI assets.
 RUN cd web && npm run build && \
-    cd ../ui-tui && npm run build
+    cd ../ui-tui && npm run build && \
+    rm -rf node_modules/@hermes/ink && \
+    rm -rf packages/hermes-ink/node_modules && \
+    cp -R packages/hermes-ink node_modules/@hermes/ink && \
+    npm install --omit=dev --prefer-offline --no-audit --prefix node_modules/@hermes/ink && \
+    rm -rf node_modules/@hermes/ink/node_modules/react && \
+    node --input-type=module -e "await import('@hermes/ink')" && \
+    touch .hermes-prebuilt-tui

 # ---------- Permissions ----------
 # Make install dir world-readable so any HERMES_UID can read it at runtime.
@@ -112,6 +112,17 @@ def main() -> None:
    import acp
    from .server import HermesACPAgent

+    # MCP tool discovery from config.yaml — run before asyncio.run() so
+    # it's safe to use blocking waits.  (ACP also registers per-session
+    # MCP servers dynamically via asyncio.to_thread inside the event
+    # loop; that path is unaffected.)  Moved from model_tools.py module
+    # scope to avoid freezing the gateway's loop on lazy import (#16856).
+    try:
+        from tools.mcp_tool import discover_mcp_tools
+        discover_mcp_tools()
+    except Exception:
+        logger.debug("MCP tool discovery failed at ACP startup", exc_info=True)
+
    agent = HermesACPAgent()
    try:
        asyncio.run(acp.run_agent(agent, use_unstable_protocol=True))
@@ -3,6 +3,7 @@
 from __future__ import annotations

 import asyncio
+import contextvars
 import logging
 import os
 from collections import defaultdict, deque
@@ -12,6 +13,7 @@ from typing import Any, Deque, Optional
 import acp
 from acp.schema import (
    AgentCapabilities,
+    AgentMessageChunk,
    AuthenticateResponse,
    AvailableCommand,
    AvailableCommandsUpdate,
@@ -29,6 +31,7 @@ from acp.schema import (
    McpServerStdio,
    ModelInfo,
    NewSessionResponse,
+    PromptCapabilities,
    PromptResponse,
    ResumeSessionResponse,
    SetSessionConfigOptionResponse,
@@ -44,6 +47,7 @@ from acp.schema import (
    TextContentBlock,
    UnstructuredCommandInput,
    Usage,
+    UserMessageChunk,
 )

 # AuthMethodAgent was renamed from AuthMethod in agent-client-protocol 0.9.0
@@ -87,17 +91,69 @@ def _extract_text(
        | EmbeddedResourceContentBlock
    ],
 ) -> str:
-    """Extract plain text from ACP content blocks."""
+    """Extract plain text from ACP content blocks for display/commands."""
    parts: list[str] = []
    for block in prompt:
        if isinstance(block, TextContentBlock):
            parts.append(block.text)
        elif hasattr(block, "text"):
            parts.append(str(block.text))
-        # Non-text blocks are ignored for now.
    return "\n".join(parts)


+def _image_block_to_openai_part(block: ImageContentBlock) -> dict[str, Any] | None:
+    """Convert an ACP image content block to OpenAI-style multimodal content."""
+    data = str(getattr(block, "data", "") or "").strip()
+    uri = str(getattr(block, "uri", "") or "").strip()
+    mime_type = str(getattr(block, "mime_type", "") or "image/png").strip() or "image/png"
+
+    if data:
+        url = data if data.startswith("data:") else f"data:{mime_type};base64,{data}"
+    elif uri:
+        url = uri
+    else:
+        return None
+
+    return {"type": "image_url", "image_url": {"url": url}}
+
+
+def _content_blocks_to_openai_user_content(
+    prompt: list[
+        TextContentBlock
+        | ImageContentBlock
+        | AudioContentBlock
+        | ResourceContentBlock
+        | EmbeddedResourceContentBlock
+    ],
+) -> str | list[dict[str, Any]]:
+    """Convert ACP prompt blocks into a Hermes/OpenAI-compatible user content payload."""
+    parts: list[dict[str, Any]] = []
+    text_parts: list[str] = []
+
+    for block in prompt:
+        if isinstance(block, TextContentBlock):
+            if block.text:
+                parts.append({"type": "text", "text": block.text})
+                text_parts.append(block.text)
+            continue
+        if isinstance(block, ImageContentBlock):
+            image_part = _image_block_to_openai_part(block)
+            if image_part is not None:
+                parts.append(image_part)
+            continue
+
+    if not parts:
+        return _extract_text(prompt)
+
+    # Keep pure text prompts as strings so slash-command handling and text-only
+    # providers keep the exact legacy path. Switch to structured content only
+    # when an actual non-text block is present.
+    if all(part.get("type") == "text" for part in parts):
+        return "\n".join(text_parts)
+
+    return parts
+
+
 class HermesACPAgent(acp.Agent):
    """ACP Agent implementation wrapping Hermes AIAgent."""

@@ -351,6 +407,7 @@ class HermesACPAgent(acp.Agent):
            agent_info=Implementation(name="hermes-agent", version=HERMES_VERSION),
            agent_capabilities=AgentCapabilities(
                load_session=True,
+                prompt_capabilities=PromptCapabilities(image=True),
                session_capabilities=SessionCapabilities(
                    fork=SessionForkCapabilities(),
                    list=SessionListCapabilities(),
@@ -376,6 +433,78 @@ class HermesACPAgent(acp.Agent):

    # ---- Session management -------------------------------------------------

+    @staticmethod
+    def _history_message_text(message: dict[str, Any]) -> str:
+        """Extract displayable text from a persisted OpenAI-style message."""
+        content = message.get("content")
+        if isinstance(content, str):
+            return content.strip()
+        if isinstance(content, list):
+            parts: list[str] = []
+            for item in content:
+                if isinstance(item, dict):
+                    text = item.get("text")
+                    if isinstance(text, str):
+                        parts.append(text)
+                    elif item.get("type") == "text" and isinstance(item.get("content"), str):
+                        parts.append(item["content"])
+                elif isinstance(item, str):
+                    parts.append(item)
+            return "\n".join(part.strip() for part in parts if part and part.strip()).strip()
+        return ""
+
+    @staticmethod
+    def _history_message_update(
+        *,
+        role: str,
+        text: str,
+    ) -> UserMessageChunk | AgentMessageChunk | None:
+        """Build an ACP history replay update for a user/assistant message."""
+        block = TextContentBlock(type="text", text=text)
+        if role == "user":
+            return UserMessageChunk(
+                session_update="user_message_chunk",
+                content=block,
+            )
+        if role == "assistant":
+            return AgentMessageChunk(
+                session_update="agent_message_chunk",
+                content=block,
+            )
+        return None
+
+    async def _replay_session_history(self, state: SessionState) -> None:
+        """Send persisted user/assistant history to clients during session/load.
+
+        Zed's ACP history UI calls ``session/load`` after the user picks an item
+        from the Agents sidebar. The agent must then replay the full conversation
+        as ``user_message_chunk`` / ``agent_message_chunk`` notifications; merely
+        restoring server-side state makes Hermes remember context, but leaves the
+        editor looking like a clean thread.
+        """
+        if not self._conn or not state.history:
+            return
+
+        for message in state.history:
+            role = str(message.get("role") or "")
+            if role not in {"user", "assistant"}:
+                continue
+            text = self._history_message_text(message)
+            if not text:
+                continue
+            update = self._history_message_update(role=role, text=text)
+            if update is None:
+                continue
+            try:
+                await self._conn.session_update(session_id=state.session_id, update=update)
+            except Exception:
+                logger.warning(
+                    "Failed to replay ACP history for session %s",
+                    state.session_id,
+                    exc_info=True,
+                )
+                return
+
    async def new_session(
        self,
        cwd: str,
@@ -404,6 +533,7 @@ class HermesACPAgent(acp.Agent):
            return None
        await self._register_session_mcp_servers(state, mcp_servers)
        logger.info("Loaded session %s", session_id)
+        await self._replay_session_history(state)
        self._schedule_available_commands_update(session_id)
        return LoadSessionResponse(models=self._build_model_state(state))

@@ -420,6 +550,7 @@ class HermesACPAgent(acp.Agent):
            state = self.session_manager.create_session(cwd=cwd)
        await self._register_session_mcp_servers(state, mcp_servers)
        logger.info("Resumed session %s", state.session_id)
+        await self._replay_session_history(state)
        self._schedule_available_commands_update(state.session_id)
        return ResumeSessionResponse(models=self._build_model_state(state))

@@ -516,11 +647,18 @@ class HermesACPAgent(acp.Agent):
            return PromptResponse(stop_reason="refusal")

        user_text = _extract_text(prompt).strip()
-        if not user_text:
+        user_content = _content_blocks_to_openai_user_content(prompt)
+        has_content = bool(user_text) or (
+            isinstance(user_content, list) and bool(user_content)
+        )
+        if not has_content:
            return PromptResponse(stop_reason="end_turn")

-        # Intercept slash commands — handle locally without calling the LLM
-        if user_text.startswith("/"):
+        # Intercept slash commands — handle locally without calling the LLM.
+        # Slash commands are text-only; if the client included images/resources,
+        # send the whole multimodal prompt to the agent instead of treating it as
+        # an ACP command.
+        if isinstance(user_content, str) and user_text.startswith("/"):
            response_text = self._handle_slash_command(user_text, state)
            if response_text is not None:
                if self._conn:
@@ -574,6 +712,22 @@ class HermesACPAgent(acp.Agent):

        def _run_agent() -> dict:
            nonlocal previous_approval_cb, previous_interactive
+            # Bind HERMES_SESSION_KEY for this session so per-session caches
+            # (e.g. the interactive sudo password cache in tools.terminal_tool)
+            # scope to the ACP session rather than leaking across sessions
+            # that land on the same reused executor thread. This call runs
+            # inside a contextvars.copy_context() below, so the ContextVar
+            # write is isolated from other concurrent ACP sessions.
+            try:
+                from gateway.session_context import (
+                    clear_session_vars,
+                    set_session_vars,
+                )
+                session_tokens = set_session_vars(session_key=session_id)
+            except Exception:
+                session_tokens = None
+                clear_session_vars = None  # type: ignore[assignment]
+                logger.debug("Could not set ACP session context", exc_info=True)
            if approval_cb:
                try:
                    from tools import terminal_tool as _terminal_tool
@@ -587,9 +741,10 @@ class HermesACPAgent(acp.Agent):
            os.environ["HERMES_INTERACTIVE"] = "1"
            try:
                result = agent.run_conversation(
-                    user_message=user_text,
+                    user_message=user_content,
                    conversation_history=state.history,
                    task_id=session_id,
+                    persist_user_message=user_text or "[Image attachment]",
                )
                return result
            except Exception as e:
@@ -607,9 +762,19 @@ class HermesACPAgent(acp.Agent):
                        _terminal_tool.set_approval_callback(previous_approval_cb)
                    except Exception:
                        logger.debug("Could not restore approval callback", exc_info=True)
+                if session_tokens is not None and clear_session_vars is not None:
+                    try:
+                        clear_session_vars(session_tokens)
+                    except Exception:
+                        logger.debug("Could not clear ACP session context", exc_info=True)

        try:
-            result = await loop.run_in_executor(_executor, _run_agent)
+            # Wrap the executor call in a fresh copy of the current context so
+            # concurrent ACP sessions on the shared ThreadPoolExecutor don't
+            # stomp on each other's ContextVar writes (HERMES_SESSION_KEY in
+            # particular — used by the interactive sudo password cache scope).
+            ctx = contextvars.copy_context()
+            result = await loop.run_in_executor(_executor, ctx.run, _run_agent)
        except Exception:
            logger.exception("Executor error for session %s", session_id)
            return PromptResponse(stop_reason="end_turn")
@@ -20,12 +20,27 @@ from pathlib import Path

 from hermes_constants import get_hermes_home
 from typing import Any, Dict, List, Optional, Tuple
-from utils import normalize_proxy_env_vars
+from utils import base_url_host_matches, normalize_proxy_env_vars

-try:
-    import anthropic as _anthropic_sdk
-except ImportError:
-    _anthropic_sdk = None  # type: ignore[assignment]
+# NOTE: `import anthropic` is deliberately NOT at module top — the SDK pulls
+# ~220 ms of imports (anthropic.types, anthropic.lib.tools._beta_runner, etc.)
+# and the 3 usage sites (build_anthropic_client, build_anthropic_bedrock_client,
+# read_claude_code_credentials_from_keychain) are all on cold user-triggered
+# paths. Access via the `_get_anthropic_sdk()` accessor below, which caches
+# the module after the first call and returns None on ImportError.
+_anthropic_sdk: Any = ...  # sentinel — None means "tried and missing"
+
+
+def _get_anthropic_sdk():
+    """Return the ``anthropic`` SDK module, importing lazily. None if not installed."""
+    global _anthropic_sdk
+    if _anthropic_sdk is ...:
+        try:
+            import anthropic as _sdk
+            _anthropic_sdk = _sdk
+        except ImportError:
+            _anthropic_sdk = None
+    return _anthropic_sdk

 logger = logging.getLogger(__name__)

@@ -202,19 +217,33 @@ def _forbids_sampling_params(model: str) -> bool:


 # Beta headers for enhanced features (sent with ALL auth types).
-# As of Opus 4.7 (2026-04-16), both of these are GA on Claude 4.6+ — the
+# As of Opus 4.7 (2026-04-16), the first two are GA on Claude 4.6+ — the
 # beta headers are still accepted (harmless no-op) but not required. Kept
 # here so older Claude (4.5, 4.1) + third-party Anthropic-compat endpoints
 # that still gate on the headers continue to get the enhanced features.
-# Migration guide: remove these if you no longer support ≤4.5 models.
+#
+# ``context-1m-2025-08-07`` unlocks the 1M context window on Claude Opus 4.6/4.7
+# and Sonnet 4.6 when served via AWS Bedrock or Azure AI Foundry. 1M is GA on
+# native Anthropic (api.anthropic.com) for Opus 4.6+, but Bedrock/Azure still
+# gate it behind this beta header as of 2026-04 — without it Bedrock caps Opus
+# at 200K even though model_metadata.py advertises 1M. The header is a harmless
+# no-op on endpoints where 1M is GA.
+#
+# Migration guide: remove these if you no longer support ≤4.5 models or once
+# Bedrock/Azure promote 1M to GA.
 _COMMON_BETAS = [
    "interleaved-thinking-2025-05-14",
    "fine-grained-tool-streaming-2025-05-14",
+    "context-1m-2025-08-07",
 ]
 # MiniMax's Anthropic-compatible endpoints fail tool-use requests when
 # the fine-grained tool streaming beta is present.  Omit it so tool calls
 # fall back to the provider's default response path.
 _TOOL_STREAMING_BETA = "fine-grained-tool-streaming-2025-05-14"
+# 1M context beta — see comment on _COMMON_BETAS above. Stripped for
+# Bearer-auth (MiniMax) endpoints since they host their own models and
+# unknown Anthropic beta headers risk request rejection.
+_CONTEXT_1M_BETA = "context-1m-2025-08-07"

 # Fast mode beta — enables the ``speed: "fast"`` request parameter for
 # significantly higher output token throughput on Opus 4.6 (~2.5x).
@@ -336,6 +365,88 @@ def _is_kimi_coding_endpoint(base_url: str | None) -> bool:
    return normalized.rstrip("/").lower().startswith("https://api.kimi.com/coding")


+# Model-name prefixes that identify the Kimi / Moonshot family.  Covers
+# - official slugs: ``kimi-k2.5``, ``kimi_thinking``, ``moonshot-v1-8k``
+# - common release lines: ``k1.5-...``, ``k2-thinking``, ``k25-...``, ``k2.5-...``
+# Matched case-insensitively against the post-``normalize_model_name`` form,
+# so a caller's ``provider/vendor/model`` slug is handled the same as a
+# bare name.
+_KIMI_FAMILY_MODEL_PREFIXES = (
+    "kimi-", "kimi_",
+    "moonshot-", "moonshot_",
+    "k1.", "k1-",
+    "k2.", "k2-",
+    "k25", "k2.5",
+)
+
+
+def _model_name_is_kimi_family(model: str | None) -> bool:
+    if not isinstance(model, str):
+        return False
+    m = model.strip().lower()
+    if not m:
+        return False
+    # Strip vendor prefix (e.g. ``moonshotai/kimi-k2.5`` → ``kimi-k2.5``)
+    if "/" in m:
+        m = m.rsplit("/", 1)[-1]
+    return m.startswith(_KIMI_FAMILY_MODEL_PREFIXES)
+
+
+def _is_kimi_family_endpoint(base_url: str | None, model: str | None = None) -> bool:
+    """Return True for any Kimi / Moonshot Anthropic-Messages-speaking endpoint.
+
+    Broader than ``_is_kimi_coding_endpoint`` — matches:
+
+    - Kimi's official ``/coding`` URL (legacy check, preserved)
+    - Any ``api.kimi.com`` / ``moonshot.ai`` / ``moonshot.cn`` host
+    - Custom or proxied endpoints whose *model* name is in the Kimi / Moonshot
+      family (``kimi-*``, ``moonshot-*``, ``k1.*``, ``k2.*``, …).  Users with
+      ``api_mode: anthropic_messages`` on a private gateway fronting Kimi
+      fall into this branch — the upstream still enforces Kimi's thinking
+      semantics (reasoning_content required on every replayed tool-call
+      message) regardless of the gateway's hostname.
+
+    Used to decide whether to drop Anthropic's ``thinking`` kwarg and to
+    preserve unsigned reasoning_content-derived thinking blocks on replay.
+    See hermes-agent#13848, #17057.
+    """
+    if _is_kimi_coding_endpoint(base_url):
+        return True
+    for _domain in ("api.kimi.com", "moonshot.ai", "moonshot.cn"):
+        if base_url_host_matches(base_url or "", _domain):
+            return True
+    if _model_name_is_kimi_family(model):
+        return True
+    return False
+
+
+def _is_deepseek_anthropic_endpoint(base_url: str | None) -> bool:
+    """Return True for DeepSeek's Anthropic-compatible endpoint.
+
+    DeepSeek's ``/anthropic`` route speaks the Anthropic Messages protocol
+    but, when thinking mode is enabled, requires the ``thinking`` blocks
+    from prior assistant turns to round-trip on subsequent requests — the
+    generic third-party path strips them and triggers HTTP 400::
+
+        The content[].thinking in the thinking mode must be passed back
+        to the API.
+
+    Per DeepSeek's published compatibility matrix the blocks are unsigned
+    (no Anthropic-proprietary signature, no ``redacted_thinking`` support),
+    so this endpoint is handled with the same strip-signed / keep-unsigned
+    policy used for Kimi's ``/coding`` endpoint.  The match is pinned to
+    the ``/anthropic`` path so the OpenAI-compatible ``api.deepseek.com``
+    base URL (which never reaches this adapter) is not misclassified.
+    See hermes-agent#16748.
+    """
+    if not base_url_host_matches(base_url or "", "api.deepseek.com"):
+        return False
+    normalized = _normalize_base_url_text(base_url)
+    if not normalized:
+        return False
+    return "/anthropic" in normalized.rstrip("/").lower()
+
+
 def _requires_bearer_auth(base_url: str | None) -> bool:
    """Return True for Anthropic-compatible providers that require Bearer auth.

@@ -350,20 +461,45 @@ def _requires_bearer_auth(base_url: str | None) -> bool:
    return normalized.startswith(("https://api.minimax.io/anthropic", "https://api.minimaxi.com/anthropic"))


-def _common_betas_for_base_url(base_url: str | None) -> list[str]:
+def _common_betas_for_base_url(
+    base_url: str | None,
+    *,
+    drop_context_1m_beta: bool = False,
+) -> list[str]:
    """Return the beta headers that are safe for the configured endpoint.

    MiniMax's Anthropic-compatible endpoints (Bearer-auth) reject requests
    that include Anthropic's ``fine-grained-tool-streaming`` beta — every
    tool-use message triggers a connection error.  Strip that beta for
    Bearer-auth endpoints while keeping all other betas intact.
+
+    The ``context-1m-2025-08-07`` beta is also stripped for Bearer-auth
+    endpoints — MiniMax hosts its own models, not Claude, so the header is
+    irrelevant at best and risks request rejection at worst.
+
+    ``drop_context_1m_beta=True`` additionally strips the 1M-context beta on
+    otherwise-unrelated endpoints. The OAuth retry path flips this flag after
+    a subscription rejects the beta with
+    "The long context beta is not yet available for this subscription" so
+    subsequent requests in the same session don't repeat the probe. See the
+    reactive recovery loop in ``run_agent.py`` and issue-comment history on
+    PR #17680 for the full rationale.
    """
    if _requires_bearer_auth(base_url):
-        return [b for b in _COMMON_BETAS if b != _TOOL_STREAMING_BETA]
+        _stripped = {_TOOL_STREAMING_BETA, _CONTEXT_1M_BETA}
+        return [b for b in _COMMON_BETAS if b not in _stripped]
+    if drop_context_1m_beta:
+        return [b for b in _COMMON_BETAS if b != _CONTEXT_1M_BETA]
    return _COMMON_BETAS


-def build_anthropic_client(api_key: str, base_url: str = None, timeout: float = None):
+def build_anthropic_client(
+    api_key: str,
+    base_url: str = None,
+    timeout: float = None,
+    *,
+    drop_context_1m_beta: bool = False,
+):
    """Create an Anthropic client, auto-detecting setup-tokens vs API keys.

    If *timeout* is provided it overrides the default 900s read timeout.  The
@@ -372,8 +508,15 @@ def build_anthropic_client(api_key: str, base_url: str = None, timeout: float =
    Anthropic-compatible providers respect the same knob as OpenAI-wire
    providers.

+    ``drop_context_1m_beta=True`` strips ``context-1m-2025-08-07`` from the
+    client-level ``anthropic-beta`` header. Used by the reactive OAuth retry
+    path in ``run_agent.py`` when a subscription rejects the beta; leave at
+    its default on fresh clients so 1M-capable subscriptions keep the
+    capability.
+
    Returns an anthropic.Anthropic instance.
    """
+    _anthropic_sdk = _get_anthropic_sdk()
    if _anthropic_sdk is None:
        raise ImportError(
            "The 'anthropic' package is required for the Anthropic provider. "
@@ -400,7 +543,10 @@ def build_anthropic_client(api_key: str, base_url: str = None, timeout: float =
            kwargs["default_query"] = {"api-version": "2025-04-15"}
        else:
            kwargs["base_url"] = normalized_base_url
-    common_betas = _common_betas_for_base_url(normalized_base_url)
+    common_betas = _common_betas_for_base_url(
+        normalized_base_url,
+        drop_context_1m_beta=drop_context_1m_beta,
+    )

    if _is_kimi_coding_endpoint(base_url):
        # Kimi's /coding endpoint requires User-Agent: claude-code/0.1.0
@@ -456,8 +602,16 @@ def build_anthropic_bedrock_client(region: str):
    Claude feature parity: prompt caching, thinking budgets, adaptive
    thinking, fast mode — features not available via the Converse API.

+    Attaches the common Anthropic beta headers as client-level defaults so
+    that Bedrock-hosted Claude models get the same enhanced features as
+    native Anthropic. The ``context-1m-2025-08-07`` beta in particular
+    unlocks the 1M context window for Opus 4.6/4.7 on Bedrock — without
+    it, Bedrock caps these models at 200K even though the Anthropic API
+    serves them with 1M natively.
+
    Auth uses the boto3 default credential chain (IAM roles, SSO, env vars).
    """
+    _anthropic_sdk = _get_anthropic_sdk()
    if _anthropic_sdk is None:
        raise ImportError(
            "The 'anthropic' package is required for the Bedrock provider. "
@@ -473,6 +627,7 @@ def build_anthropic_bedrock_client(region: str):
    return _anthropic_sdk.AnthropicBedrock(
        aws_region=region,
        timeout=Timeout(timeout=900.0, connect=10.0),
+        default_headers={"anthropic-beta": ",".join(_COMMON_BETAS)},
    )


@@ -488,9 +643,6 @@ def _read_claude_code_credentials_from_keychain() -> Optional[Dict[str, Any]]:

    Returns dict with {accessToken, refreshToken?, expiresAt?} or None.
    """
-    import platform
-    import subprocess
-
    if platform.system() != "Darwin":
        return None

@@ -1035,9 +1187,12 @@ def normalize_model_name(model: str, preserve_dots: bool = False) -> str:
        # These must not be converted to hyphens.  See issue #12295.
        if _is_bedrock_model_id(model):
            return model
-        # OpenRouter uses dots for version separators (claude-opus-4.6),
-        # Anthropic uses hyphens (claude-opus-4-6). Convert dots to hyphens.
-        model = model.replace(".", "-")
+        # Only convert dots to hyphens for Anthropic/Claude models.
+        # Non-Anthropic models (gpt-5.4, gemini-2.5, etc.) use dots
+        # as part of their canonical names.  See issue #17171.
+        _lower = model.lower()
+        if _lower.startswith("claude-") or _lower.startswith("anthropic/"):
+            model = model.replace(".", "-")
    return model


@@ -1054,6 +1209,33 @@ def _sanitize_tool_id(tool_id: str) -> str:
    return sanitized or "tool_0"


+def _normalize_tool_input_schema(schema: Any) -> Dict[str, Any]:
+    """Normalize tool schemas before sending them to Anthropic.
+
+    Anthropic's tool schema validator rejects nullable unions such as
+    ``anyOf: [{"type": "string"}, {"type": "null"}]`` that Pydantic/MCP
+    commonly emits for optional fields. Tool optionality is represented by
+    the parent ``required`` array, so we delegate to the shared
+    ``strip_nullable_unions`` helper to collapse nullable unions to the
+    non-null branch while preserving metadata like description/default.
+
+    ``keep_nullable_hint=False`` because the Anthropic validator does not
+    recognize the OpenAPI-style ``nullable: true`` extension and strict
+    schema-to-grammar converters may reject unknown keywords.
+    """
+    if not schema:
+        return {"type": "object", "properties": {}}
+
+    from tools.schema_sanitizer import strip_nullable_unions
+
+    normalized = strip_nullable_unions(schema, keep_nullable_hint=False)
+    if not isinstance(normalized, dict):
+        return {"type": "object", "properties": {}}
+    if normalized.get("type") == "object" and not isinstance(normalized.get("properties"), dict):
+        normalized = {**normalized, "properties": {}}
+    return normalized
+
+
 def convert_tools_to_anthropic(tools: List[Dict]) -> List[Dict]:
    """Convert OpenAI tool definitions to Anthropic format."""
    if not tools:
@@ -1064,7 +1246,9 @@ def convert_tools_to_anthropic(tools: List[Dict]) -> List[Dict]:
        result.append({
            "name": fn.get("name", ""),
            "description": fn.get("description", ""),
-            "input_schema": fn.get("parameters", {"type": "object", "properties": {}}),
+            "input_schema": _normalize_tool_input_schema(
+                fn.get("parameters", {"type": "object", "properties": {}})
+            ),
        })
    return result

@@ -1195,6 +1379,7 @@ def _convert_content_to_anthropic(content: Any) -> Any:
 def convert_messages_to_anthropic(
    messages: List[Dict],
    base_url: str | None = None,
+    model: str | None = None,
 ) -> Tuple[Optional[Any], List[Dict]]:
    """Convert OpenAI-format messages to Anthropic format.

@@ -1206,6 +1391,12 @@ def convert_messages_to_anthropic(
    endpoint, all thinking block signatures are stripped.  Signatures are
    Anthropic-proprietary — third-party endpoints cannot validate them and will
    reject them with HTTP 400 "Invalid signature in thinking block".
+
+    When *model* is provided and matches the Kimi / Moonshot family (or
+    *base_url* is a Kimi / Moonshot host), unsigned thinking blocks
+    synthesised from ``reasoning_content`` are preserved on replayed
+    assistant tool-call messages — Kimi requires the field to exist, even
+    if empty.
    """
    system = None
    result = []
@@ -1434,7 +1625,16 @@ def convert_messages_to_anthropic(
    #    cache markers can interfere with signature validation.
    _THINKING_TYPES = frozenset(("thinking", "redacted_thinking"))
    _is_third_party = _is_third_party_anthropic_endpoint(base_url)
-    _is_kimi = _is_kimi_coding_endpoint(base_url)
+    # Kimi /coding and DeepSeek /anthropic share a contract: both speak the
+    # Anthropic Messages protocol upstream but require that thinking blocks
+    # synthesised from reasoning_content round-trip on subsequent turns when
+    # thinking is enabled.  Signed Anthropic blocks still have to be stripped
+    # (neither endpoint can validate Anthropic's signatures); unsigned blocks
+    # are preserved.  See hermes-agent#13848 (Kimi) and #16748 (DeepSeek).
+    _preserve_unsigned_thinking = (
+        _is_kimi_family_endpoint(base_url, model)
+        or _is_deepseek_anthropic_endpoint(base_url)
+    )

    last_assistant_idx = None
    for i in range(len(result) - 1, -1, -1):
@@ -1446,22 +1646,22 @@ def convert_messages_to_anthropic(
        if m.get("role") != "assistant" or not isinstance(m.get("content"), list):
            continue

-        if _is_kimi:
-            # Kimi's /coding endpoint enables thinking server-side and
-            # requires unsigned thinking blocks on replayed assistant
-            # tool-call messages.  Strip signed Anthropic blocks (Kimi
-            # can't validate signatures) but preserve the unsigned ones
-            # we synthesised from reasoning_content above.
+        if _preserve_unsigned_thinking:
+            # Kimi's /coding and DeepSeek's /anthropic endpoints both enable
+            # thinking server-side and require unsigned thinking blocks on
+            # replayed assistant tool-call messages.  Strip signed Anthropic
+            # blocks (neither upstream can validate Anthropic signatures) but
+            # preserve the unsigned ones we synthesised from reasoning_content.
            new_content = []
            for b in m["content"]:
                if not isinstance(b, dict) or b.get("type") not in _THINKING_TYPES:
                    new_content.append(b)
                    continue
                if b.get("signature") or b.get("data"):
-                    # Anthropic-signed block — Kimi can't validate, strip
+                    # Anthropic-signed block — upstream can't validate, strip
                    continue
                # Unsigned thinking (synthesised from reasoning_content) —
-                # keep it: Kimi needs it for message-history validation.
+                # keep it: the upstream needs it for message-history validation.
                new_content.append(b)
            m["content"] = new_content or [{"type": "text", "text": "(empty)"}]
        elif _is_third_party or idx != last_assistant_idx:
@@ -1518,6 +1718,7 @@ def build_anthropic_kwargs(
    context_length: Optional[int] = None,
    base_url: str | None = None,
    fast_mode: bool = False,
+    drop_context_1m_beta: bool = False,
 ) -> Dict[str, Any]:
    """Build kwargs for anthropic.messages.create().

@@ -1557,7 +1758,9 @@ def build_anthropic_kwargs(
    Currently only supported on native Anthropic endpoints (not third-party
    compatible ones).
    """
-    system, anthropic_messages = convert_messages_to_anthropic(messages, base_url=base_url)
+    system, anthropic_messages = convert_messages_to_anthropic(
+        messages, base_url=base_url, model=model
+    )
    anthropic_tools = convert_tools_to_anthropic(tools) if tools else []

    model = normalize_model_name(model, preserve_dots=preserve_dots)
@@ -1663,7 +1866,7 @@ def build_anthropic_kwargs(
    # silently hides reasoning text that Hermes surfaces in its CLI. We
    # request "summarized" so the reasoning blocks stay populated — matching
    # 4.6 behavior and preserving the activity-feed UX during long tool runs.
-    _is_kimi_coding = _is_kimi_coding_endpoint(base_url)
+    _is_kimi_coding = _is_kimi_family_endpoint(base_url, model)
    if reasoning_config and isinstance(reasoning_config, dict) and not _is_kimi_coding:
        if reasoning_config.get("enabled") is not False and "haiku" not in model.lower():
            effort = str(reasoning_config.get("effort", "medium")).lower()
@@ -1704,7 +1907,10 @@ def build_anthropic_kwargs(
        kwargs.setdefault("extra_body", {})["speed"] = "fast"
        # Build extra_headers with ALL applicable betas (the per-request
        # extra_headers override the client-level anthropic-beta header).
-        betas = list(_common_betas_for_base_url(base_url))
+        betas = list(_common_betas_for_base_url(
+            base_url,
+            drop_context_1m_beta=drop_context_1m_beta,
+        ))
        if is_oauth:
            betas.extend(_OAUTH_ONLY_BETAS)
        betas.append(_FAST_MODE_BETA)
@@ -5,11 +5,11 @@ session search, web extraction, vision analysis, browser vision) picks up
 the best available backend without duplicating fallback logic.

 Resolution order for text tasks (auto mode):
-  1. OpenRouter  (OPENROUTER_API_KEY)
-  2. Nous Portal (~/.hermes/auth.json active provider)
-  3. Custom endpoint (config.yaml model.base_url + OPENAI_API_KEY)
-  4. Codex OAuth (Responses API via chatgpt.com with gpt-5.3-codex,
-     wrapped to look like a chat.completions client)
+  1. User's main provider + main model (used regardless of provider type —
+     aggregators, direct API-key providers, native Anthropic, Codex, etc.)
+  2. OpenRouter  (OPENROUTER_API_KEY)
+  3. Nous Portal (~/.hermes/auth.json active provider)
+  4. Custom endpoint (config.yaml model.base_url + OPENAI_API_KEY)
  5. Native Anthropic
  6. Direct API-key providers (z.ai/GLM, Kimi/Moonshot, MiniMax, MiniMax-CN)
  7. None
@@ -18,10 +18,16 @@ Resolution order for vision/multimodal tasks (auto mode):
  1. Selected main provider, if it is one of the supported vision backends below
  2. OpenRouter
  3. Nous Portal
-  4. Codex OAuth (gpt-5.3-codex supports vision via Responses API)
-  5. Native Anthropic
-  6. Custom endpoint (for local vision models: Qwen-VL, LLaVA, Pixtral, etc.)
-  7. None
+  4. Native Anthropic
+  5. Custom endpoint (for local vision models: Qwen-VL, LLaVA, Pixtral, etc.)
+  6. None
+
+Codex OAuth (ChatGPT-account auth) is intentionally NOT in either
+fallback chain: OpenAI gates this endpoint behind an undocumented,
+shifting model allow-list, so "just try Codex with a hardcoded model"
+rots on its own.  Codex is used only when the user's main provider *is*
+openai-codex (Step 1 above) or when a caller explicitly requests it with
+a model (auxiliary.<task>.provider + auxiliary.<task>.model).

 Per-task overrides are configured in config.yaml under the ``auxiliary:`` section
 (e.g. ``auxiliary.vision.provider``, ``auxiliary.compression.model``).
@@ -41,10 +47,57 @@ import threading
 import time
 from pathlib import Path  # noqa: F401 — used by test mocks
 from types import SimpleNamespace
-from typing import Any, Dict, List, Optional, Tuple
+from typing import Any, Dict, List, Optional, Tuple, TYPE_CHECKING
 from urllib.parse import urlparse, parse_qs, urlunparse

-from openai import OpenAI
+# NOTE: `from openai import OpenAI` is deliberately NOT at module top — the
+# openai SDK pulls a large type tree (~240 ms cold, including responses/*,
+# graders/*). We expose `OpenAI` here as a thin proxy that imports the SDK on
+# first call and forwards, so:
+#   (a) the 15+ in-module `OpenAI(...)` construction sites work unchanged
+#       (Python's function-scope name lookup resolves `OpenAI` to the proxy
+#       object bound in module globals here, without triggering any import);
+#   (b) external code can still do `auxiliary_client.OpenAI` or
+#       `patch("agent.auxiliary_client.OpenAI", ...)` — tests see the proxy,
+#       and patch replaces the module attribute as usual;
+#   (c) `OpenAI` as a type annotation resolves at runtime to the proxy class
+#       (which is harmless — annotations aren't type-checked at runtime).
+# See tests/agent/test_auxiliary_client.py for patch patterns this supports.
+if TYPE_CHECKING:
+    from openai import OpenAI  # noqa: F401 — type hints only
+
+_OPENAI_CLS_CACHE: Optional[type] = None
+
+
+def _load_openai_cls() -> type:
+    """Import and cache ``openai.OpenAI``."""
+    global _OPENAI_CLS_CACHE
+    if _OPENAI_CLS_CACHE is None:
+        from openai import OpenAI as _cls
+        _OPENAI_CLS_CACHE = _cls
+    return _OPENAI_CLS_CACHE
+
+
+class _OpenAIProxy:
+    """Module-level proxy that looks like the ``openai.OpenAI`` class.
+
+    Forwards ``OpenAI(...)`` calls and ``isinstance(x, OpenAI)`` checks to the
+    real SDK class, importing the SDK lazily on first use.
+    """
+
+    __slots__ = ()
+
+    def __call__(self, *args, **kwargs):
+        return _load_openai_cls()(*args, **kwargs)
+
+    def __instancecheck__(self, obj):
+        return isinstance(obj, _load_openai_cls())
+
+    def __repr__(self):
+        return "<lazy openai.OpenAI proxy>"
+
+
+OpenAI = _OpenAIProxy()  # module-level name, resolves lazily on call/isinstance

 from agent.credential_pool import load_pool
 from hermes_cli.config import get_hermes_home
@@ -54,6 +107,14 @@ from utils import base_url_host_matches, base_url_hostname, normalize_proxy_env_
 logger = logging.getLogger(__name__)


+def _safe_isinstance(obj: Any, maybe_type: Any) -> bool:
+    """Return False instead of raising when a patched symbol is not a type."""
+    try:
+        return isinstance(obj, maybe_type)
+    except TypeError:
+        return False
+
+
 def _extract_url_query_params(url: str):
    """Extract query params from URL, return (clean_url, default_query dict or None)."""
    parsed = urlparse(url)
@@ -94,6 +155,10 @@ _PROVIDER_ALIASES = {
    "github-models": "copilot",
    "github-copilot-acp": "copilot-acp",
    "copilot-acp-agent": "copilot-acp",
+    "tencent": "tencent-tokenhub",
+    "tokenhub": "tencent-tokenhub",
+    "tencent-cloud": "tencent-tokenhub",
+    "tencentmaas": "tencent-tokenhub",
 }


@@ -159,6 +224,7 @@ _API_KEY_PROVIDER_AUX_MODELS: Dict[str, str] = {
    "kimi-coding-cn": "kimi-k2-turbo-preview",
    "gmi": "google/gemini-3.1-flash-lite-preview",
    "minimax": "MiniMax-M2.7",
+    "minimax-oauth": "MiniMax-M2.7-highspeed",
    "minimax-cn": "MiniMax-M2.7",
    "anthropic": "claude-haiku-4-5-20251001",
    "ai-gateway": "google/gemini-3-flash",
@@ -166,6 +232,7 @@ _API_KEY_PROVIDER_AUX_MODELS: Dict[str, str] = {
    "opencode-go": "glm-5",
    "kilocode": "google/gemini-3-flash-preview",
    "ollama-cloud": "nemotron-3-nano:30b",
+    "tencent-tokenhub": "hy3-preview",
 }

 # Vision-specific model overrides for direct providers.
@@ -177,6 +244,21 @@ _PROVIDER_VISION_MODELS: Dict[str, str] = {
    "zai": "glm-5v-turbo",
 }

+# Providers whose endpoint does not accept image input, even though the
+# provider's broader ecosystem has vision models available elsewhere.  When
+# `auxiliary.vision.provider: auto` sees one of these as the main provider,
+# it must skip straight to the aggregator chain instead of returning a client
+# that will 404 on every vision request.
+#
+# kimi-coding / kimi-coding-cn: the Kimi Coding Plan routes through
+# api.kimi.com/coding (Anthropic Messages wire) which Kimi's own docs
+# describe as having no image_in capability. Vision lives on the separate
+# Kimi Platform (api.moonshot.ai, OpenAI-wire, pay-as-you-go).  See #17076.
+_PROVIDERS_WITHOUT_VISION: frozenset = frozenset({
+    "kimi-coding",
+    "kimi-coding-cn",
+})
+
 # OpenRouter app attribution headers
 _OR_HEADERS = {
    "HTTP-Referer": "https://hermes-agent.nousresearch.com",
@@ -209,12 +291,14 @@ _NOUS_DEFAULT_BASE_URL = "https://inference-api.nousresearch.com/v1"
 _ANTHROPIC_DEFAULT_BASE_URL = "https://api.anthropic.com"
 _AUTH_JSON_PATH = get_hermes_home() / "auth.json"

-# Codex fallback: uses the Responses API (the only endpoint the Codex
-# OAuth token can access) with a fast model for auxiliary tasks.
-# ChatGPT-backed Codex accounts currently reject gpt-5.3-codex for these
-# auxiliary flows, while gpt-5.2-codex remains broadly available and supports
-# vision via Responses.
-_CODEX_AUX_MODEL = "gpt-5.2-codex"
+# Codex OAuth endpoint used when a caller explicitly requests
+# provider="openai-codex".  There is deliberately no hardcoded default
+# model: the set of models OpenAI accepts on this endpoint for
+# ChatGPT-account auth is an undocumented, shifting allow-list, and
+# pinning one here has drifted silently twice (gpt-5.3-codex → gpt-5.2-codex
+# → gpt-5.4 over 6 weeks in early 2026).  Callers must pass the model
+# they want explicitly (from config.yaml model.model, auxiliary.<task>.model,
+# or the user's active Codex model selection).
 _CODEX_AUX_BASE_URL = "https://chatgpt.com/backend-api/codex"


@@ -271,6 +355,13 @@ def _to_openai_base_url(base_url: str) -> str:
        rewritten = url[: -len("/anthropic")] + "/v1"
        logger.debug("Auxiliary client: rewrote base URL %s → %s", url, rewritten)
        return rewritten
+    if "api.kimi.com" in url and url.endswith("/coding"):
+        # Kimi Code uses /coding/v1/messages for Anthropic SDK (appends /v1/messages)
+        # but /coding/v1/chat/completions for OpenAI SDK (appends /chat/completions)
+        # Without /v1 here, OpenAI SDK hits /coding/chat/completions — a 404.
+        rewritten = url + "/v1"
+        logger.debug("Auxiliary client: rewrote Kimi base URL %s → %s", url, rewritten)
+        return rewritten
    return url


@@ -405,6 +496,33 @@ class _CodexCompletionsAdapter:
        # Note: the Codex endpoint (chatgpt.com/backend-api/codex) does NOT
        # support max_output_tokens or temperature — omit to avoid 400 errors.

+        # Translate extra_body.reasoning (chat.completions shape) into the
+        # Responses API's top-level reasoning + include fields.  Mirrors
+        # agent/transports/codex.py::build_kwargs() so auxiliary callers
+        # that configure reasoning via auxiliary.<task>.extra_body get the
+        # same behavior as the main agent's Codex transport.
+        extra_body = kwargs.get("extra_body") or {}
+        if isinstance(extra_body, dict):
+            reasoning_cfg = extra_body.get("reasoning")
+            if isinstance(reasoning_cfg, dict):
+                if reasoning_cfg.get("enabled") is False:
+                    # Reasoning explicitly disabled — do not set reasoning
+                    # or include.  The Codex backend still thinks by
+                    # default, but we honor the caller's intent where the
+                    # API allows it.
+                    pass
+                else:
+                    effort = reasoning_cfg.get("effort", "medium")
+                    # Codex backend rejects "minimal"; clamp to "low" to
+                    # match the main-agent Codex transport behavior.
+                    if effort == "minimal":
+                        effort = "low"
+                    resp_kwargs["reasoning"] = {
+                        "effort": effort,
+                        "summary": "auto",
+                    }
+                    resp_kwargs["include"] = ["reasoning.encrypted_content"]
+
        # Tools support for auxiliary callers (e.g. skills_hub) that pass function schemas
        tools = kwargs.get("tools")
        if tools:
@@ -714,6 +832,116 @@ class AsyncAnthropicAuxiliaryClient:
        self.base_url = sync_wrapper.base_url


+def _endpoint_speaks_anthropic_messages(base_url: str) -> bool:
+    """True if the endpoint at ``base_url`` speaks the Anthropic Messages
+    protocol instead of OpenAI chat.completions.
+
+    Mirrors ``hermes_cli.runtime_provider._detect_api_mode_for_url`` so the
+    auxiliary client and the main agent stay in sync on transport selection.
+    Covers:
+
+    - Any URL ending in ``/anthropic`` (MiniMax, Zhipu GLM, LiteLLM proxies,
+      Anthropic-compatible gateways).
+    - ``api.kimi.com/coding`` (Kimi Coding Plan — the /coding route only
+      speaks Claude-Code's native Anthropic shape; ``chat.completions``
+      returns 404 on Anthropic-only model aliases like ``kimi-for-coding``).
+    - ``api.anthropic.com`` (native Anthropic).
+    """
+    normalized = (base_url or "").strip().lower().rstrip("/")
+    if not normalized:
+        return False
+    if normalized.endswith("/anthropic"):
+        return True
+    hostname = base_url_hostname(normalized)
+    if hostname == "api.anthropic.com":
+        return True
+    if hostname == "api.kimi.com" and "/coding" in normalized:
+        return True
+    return False
+
+
+def _maybe_wrap_anthropic(
+    client_obj: Any,
+    model: str,
+    api_key: str,
+    base_url: str,
+    api_mode: Optional[str] = None,
+) -> Any:
+    """Rewrap a plain OpenAI client in ``AnthropicAuxiliaryClient`` when
+    the endpoint actually speaks Anthropic Messages.
+
+    This is the single chokepoint for aux-client transport correction.
+    Runs at the end of every ``resolve_provider_client`` branch so that
+    api_key providers (Kimi Coding Plan), the ``custom`` endpoint, and
+    future /anthropic gateways all land on the right wire format
+    regardless of which branch built the client.
+
+    Returns ``client_obj`` unchanged when:
+
+    - It's already an Anthropic/Codex/Gemini/CopilotACP wrapper.
+    - The endpoint is an OpenAI-wire endpoint.
+    - ``api_mode`` is explicitly set to a non-Anthropic transport.
+    - The ``anthropic`` SDK is not installed (falls back to OpenAI wire).
+    """
+    # Already wrapped — don't double-wrap.
+    if _safe_isinstance(client_obj, AnthropicAuxiliaryClient):
+        return client_obj
+    # Other specialized adapters we should never re-dispatch.
+    if _safe_isinstance(client_obj, CodexAuxiliaryClient):
+        return client_obj
+    try:
+        from agent.gemini_native_adapter import GeminiNativeClient
+        if _safe_isinstance(client_obj, GeminiNativeClient):
+            return client_obj
+    except ImportError:
+        pass
+    try:
+        from agent.copilot_acp_client import CopilotACPClient
+        if _safe_isinstance(client_obj, CopilotACPClient):
+            return client_obj
+    except ImportError:
+        pass
+
+    # Explicit non-anthropic api_mode wins over URL heuristics.
+    if api_mode and api_mode != "anthropic_messages":
+        return client_obj
+
+    should_wrap = (
+        api_mode == "anthropic_messages"
+        or _endpoint_speaks_anthropic_messages(base_url)
+    )
+    if not should_wrap:
+        return client_obj
+
+    try:
+        from agent.anthropic_adapter import build_anthropic_client
+    except ImportError:
+        logger.warning(
+            "Endpoint %s speaks Anthropic Messages but the anthropic SDK is "
+            "not installed — falling back to OpenAI-wire (will likely 404).",
+            base_url,
+        )
+        return client_obj
+
+    try:
+        real_client = build_anthropic_client(api_key, base_url)
+    except Exception as exc:
+        logger.warning(
+            "Failed to build Anthropic client for %s (%s) — falling back to "
+            "OpenAI-wire client.", base_url, exc,
+        )
+        return client_obj
+
+    logger.debug(
+        "Auxiliary transport: wrapping client in AnthropicAuxiliaryClient "
+        "(model=%s, base_url=%s, api_mode=%s)",
+        model, base_url[:60] if base_url else "", api_mode or "auto-detected",
+    )
+    return AnthropicAuxiliaryClient(
+        real_client, model, api_key, base_url, is_oauth=False,
+    )
+
+
 def _read_nous_auth() -> Optional[dict]:
    """Read and validate ~/.hermes/auth.json for an active Nous provider.

@@ -865,9 +1093,8 @@ def _resolve_api_key_provider() -> Tuple[Optional[OpenAI], Optional[str]]:
            if not api_key:
                continue

-            base_url = _to_openai_base_url(
-                _pool_runtime_base_url(entry, pconfig.inference_base_url) or pconfig.inference_base_url
-            )
+            raw_base_url = _pool_runtime_base_url(entry, pconfig.inference_base_url) or pconfig.inference_base_url
+            base_url = _to_openai_base_url(raw_base_url)
            model = _API_KEY_PROVIDER_AUX_MODELS.get(provider_id)
            if model is None:
                continue  # skip provider if we don't know a valid aux model
@@ -884,16 +1111,17 @@ def _resolve_api_key_provider() -> Tuple[Optional[OpenAI], Optional[str]]:
                from hermes_cli.models import copilot_default_headers

                extra["default_headers"] = copilot_default_headers()
-            return OpenAI(api_key=api_key, base_url=base_url, **extra), model
+            _client = OpenAI(api_key=api_key, base_url=base_url, **extra)
+            _client = _maybe_wrap_anthropic(_client, model, api_key, raw_base_url)
+            return _client, model

        creds = resolve_api_key_provider_credentials(provider_id)
        api_key = str(creds.get("api_key", "")).strip()
        if not api_key:
            continue

-        base_url = _to_openai_base_url(
-            str(creds.get("base_url", "")).strip().rstrip("/") or pconfig.inference_base_url
-        )
+        raw_base_url = str(creds.get("base_url", "")).strip().rstrip("/") or pconfig.inference_base_url
+        base_url = _to_openai_base_url(raw_base_url)
        model = _API_KEY_PROVIDER_AUX_MODELS.get(provider_id)
        if model is None:
            continue  # skip provider if we don't know a valid aux model
@@ -910,7 +1138,9 @@ def _resolve_api_key_provider() -> Tuple[Optional[OpenAI], Optional[str]]:
            from hermes_cli.models import copilot_default_headers

            extra["default_headers"] = copilot_default_headers()
-        return OpenAI(api_key=api_key, base_url=base_url, **extra), model
+        _client = OpenAI(api_key=api_key, base_url=base_url, **extra)
+        _client = _maybe_wrap_anthropic(_client, model, api_key, raw_base_url)
+        return _client, model

    return None, None

@@ -1194,10 +1424,32 @@ def _try_custom_endpoint() -> Tuple[Optional[Any], Optional[str]]:
            AnthropicAuxiliaryClient(real_client, model, custom_key, custom_base, is_oauth=False),
            model,
        )
-    return OpenAI(api_key=custom_key, base_url=_clean_base, **_extra), model
+    # URL-based anthropic detection for custom endpoints that didn't set
+    # api_mode explicitly (e.g. kimi.com/coding reached via custom config).
+    _fallback_client = OpenAI(api_key=custom_key, base_url=_clean_base, **_extra)
+    _fallback_client = _maybe_wrap_anthropic(
+        _fallback_client, model, custom_key, custom_base, custom_mode,
+    )
+    return _fallback_client, model


-def _try_codex() -> Tuple[Optional[Any], Optional[str]]:
+def _build_codex_client(model: str) -> Tuple[Optional[Any], Optional[str]]:
+    """Build a CodexAuxiliaryClient for an explicitly-requested model.
+
+    There is no auto-selection of the Codex model: the ChatGPT-account
+    Codex endpoint's accepted model list is an undocumented, drifting
+    allow-list, so any hardcoded default we pick goes stale.  The caller
+    is responsible for passing the model (e.g. from the user's own
+    ``model.model`` or ``auxiliary.<task>.model`` config).
+
+    Returns (None, None) when no Codex OAuth token is available.
+    """
+    if not model:
+        logger.warning(
+            "Auxiliary client: openai-codex requested without a model; "
+            "pass model explicitly (auxiliary.<task>.model in config.yaml)."
+        )
+        return None, None
    pool_present, entry = _select_pool_entry("openai-codex")
    if pool_present:
        codex_token = _pool_runtime_api_key(entry)
@@ -1213,13 +1465,13 @@ def _try_codex() -> Tuple[Optional[Any], Optional[str]]:
        if not codex_token:
            return None, None
        base_url = _CODEX_AUX_BASE_URL
-    logger.debug("Auxiliary client: Codex OAuth (%s via Responses API)", _CODEX_AUX_MODEL)
+    logger.debug("Auxiliary client: Codex OAuth (%s via Responses API)", model)
    real_client = OpenAI(
        api_key=codex_token,
        base_url=base_url,
        default_headers=_codex_cloudflare_headers(codex_token),
    )
-    return CodexAuxiliaryClient(real_client, _CODEX_AUX_MODEL), _CODEX_AUX_MODEL
+    return CodexAuxiliaryClient(real_client, model), model


 def _try_anthropic() -> Tuple[Optional[Any], Optional[str]]:
@@ -1274,7 +1526,6 @@ _AUTO_PROVIDER_LABELS = {
    "_try_openrouter": "openrouter",
    "_try_nous": "nous",
    "_try_custom_endpoint": "local/custom",
-    "_try_codex": "openai-codex",
    "_resolve_api_key_provider": "api-key",
 }

@@ -1301,12 +1552,18 @@ def _get_provider_chain() -> List[tuple]:

    Built at call time (not module level) so that test patches
    on the ``_try_*`` functions are picked up correctly.
+
+    NOTE: ``openai-codex`` is deliberately NOT in this chain.  The
+    ChatGPT-account Codex endpoint only accepts a shifting, undocumented
+    allow-list of model IDs, so falling back to it with a guessed model
+    fails more often than not.  Codex is used only when the user's main
+    provider *is* openai-codex (see Step 1 of ``_resolve_auto``) or when
+    a caller explicitly requests it with a model.
    """
    return [
        ("openrouter", _try_openrouter),
        ("nous", _try_nous),
        ("local/custom", _try_custom_endpoint),
-        ("openai-codex", _try_codex),
        ("api-key", _resolve_api_key_provider),
    ]

@@ -1745,8 +2002,20 @@ def resolve_provider_client(
                return True
        return False

-    def _wrap_if_needed(client_obj, final_model_str: str, base_url_str: str = ""):
-        """Wrap a plain OpenAI client in CodexAuxiliaryClient if Responses API is needed."""
+    def _wrap_if_needed(client_obj, final_model_str: str, base_url_str: str = "",
+                        api_key_str: str = ""):
+        """Wrap a plain OpenAI client in the correct transport adapter.
+
+        Handles two cases:
+        - ``CodexAuxiliaryClient`` when the endpoint needs the Responses API
+          (explicit ``api_mode=codex_responses`` or api.openai.com + codex
+          model name).
+        - ``AnthropicAuxiliaryClient`` when the endpoint speaks Anthropic
+          Messages (explicit ``api_mode=anthropic_messages``, any ``/anthropic``
+          suffix, ``api.kimi.com/coding``, or ``api.anthropic.com``).
+
+        Clients that are already specialized wrappers pass through unchanged.
+        """
        if _needs_codex_wrap(client_obj, base_url_str, final_model_str):
            logger.debug(
                "resolve_provider_client: wrapping client in CodexAuxiliaryClient "
@@ -1754,7 +2023,11 @@ def resolve_provider_client(
                api_mode or "auto-detected", final_model_str,
                base_url_str[:60] if base_url_str else "")
            return CodexAuxiliaryClient(client_obj, final_model_str)
-        return client_obj
+        # Anthropic-wire endpoints: rewrap plain OpenAI clients so
+        # chat.completions.create() is translated to /v1/messages.
+        return _maybe_wrap_anthropic(
+            client_obj, final_model_str, api_key_str, base_url_str, api_mode,
+        )

    # ── Auto: try all providers in priority order ────────────────────
    if provider == "auto":
@@ -1806,6 +2079,13 @@ def resolve_provider_client(

    # ── OpenAI Codex (OAuth → Responses API) ─────────────────────────
    if provider == "openai-codex":
+        if not model:
+            logger.warning(
+                "resolve_provider_client: openai-codex requested without a "
+                "model; pass model explicitly (e.g. model.model in config.yaml "
+                "or auxiliary.<task>.model for per-task aux routing)."
+            )
+            return None, None
        if raw_codex:
            # Return the raw OpenAI client for callers that need direct
            # access to responses.stream() (e.g., the main agent loop).
@@ -1814,7 +2094,7 @@ def resolve_provider_client(
                logger.warning("resolve_provider_client: openai-codex requested "
                               "but no Codex OAuth token found (run: hermes model)")
                return None, None
-            final_model = _normalize_resolved_model(model or _CODEX_AUX_MODEL, provider)
+            final_model = _normalize_resolved_model(model, provider)
            raw_client = OpenAI(
                api_key=codex_token,
                base_url=_CODEX_AUX_BASE_URL,
@@ -1822,7 +2102,7 @@ def resolve_provider_client(
            )
            return (raw_client, final_model)
        # Standard path: wrap in CodexAuxiliaryClient adapter
-        client, default = _try_codex()
+        client, default = _build_codex_client(model)
        if client is None:
            logger.warning("resolve_provider_client: openai-codex requested "
                           "but no Codex OAuth token found (run: hermes model)")
@@ -1834,7 +2114,7 @@ def resolve_provider_client(
    # ── Custom endpoint (OPENAI_BASE_URL + OPENAI_API_KEY) ───────────
    if provider == "custom":
        if explicit_base_url:
-            custom_base = explicit_base_url.strip()
+            custom_base = _to_openai_base_url(explicit_base_url).strip()
            custom_key = (
                (explicit_api_key or "").strip()
                or os.getenv("OPENAI_API_KEY", "").strip()
@@ -1847,7 +2127,7 @@ def resolve_provider_client(
                )
                return None, None
            final_model = _normalize_resolved_model(
-                model or _read_main_model() or "gpt-4o-mini",
+                model or (main_runtime.get("model") if main_runtime else None) or "gpt-4o-mini",
                provider,
            )
            extra = {}
@@ -1862,17 +2142,18 @@ def resolve_provider_client(
                    is_agent_turn=True, is_vision=is_vision
                )
            client = OpenAI(api_key=custom_key, base_url=_clean_base, **extra)
-            client = _wrap_if_needed(client, final_model, custom_base)
+            client = _wrap_if_needed(client, final_model, custom_base, custom_key)
            return (_to_async_client(client, final_model, is_vision=is_vision) if async_mode
                    else (client, final_model))
-        # Try custom first, then codex, then API-key providers
-        for try_fn in (_try_custom_endpoint, _try_codex,
-                       _resolve_api_key_provider):
+        # Try custom first, then API-key providers (Codex excluded here:
+        # falling through to Codex with no model is a stale-constant trap).
+        for try_fn in (_try_custom_endpoint, _resolve_api_key_provider):
            client, default = try_fn()
            if client is not None:
                final_model = _normalize_resolved_model(model or default, provider)
                _cbase = str(getattr(client, "base_url", "") or "")
-                client = _wrap_if_needed(client, final_model, _cbase)
+                _ckey = str(getattr(client, "api_key", "") or "")
+                client = _wrap_if_needed(client, final_model, _cbase, _ckey)
                return (_to_async_client(client, final_model, is_vision=is_vision) if async_mode
                        else (client, final_model))
        logger.warning("resolve_provider_client: custom/main requested "
@@ -1895,10 +2176,24 @@ def resolve_provider_client(
            entry_api_mode = (api_mode or custom_entry.get("api_mode") or "").strip()
            if custom_base:
                final_model = _normalize_resolved_model(
-                    model or custom_entry.get("model") or _read_main_model() or "gpt-4o-mini",
+                    model
+                    or custom_entry.get("model")
+                    or (main_runtime.get("model") if main_runtime else None)
+                    or _read_main_model()
+                    or "gpt-4o-mini",
                    provider,
                )
-                _clean_base2, _dq2 = _extract_url_query_params(custom_base)
+                # anthropic_messages talks to the /anthropic surface directly;
+                # OpenAI-wire paths (chat_completions / codex_responses) need the
+                # /v1 equivalent.  Rewrite only on the OpenAI-wire path so the
+                # Anthropic fallback SDK still sees the original URL.
+                if entry_api_mode == "anthropic_messages":
+                    openai_base = custom_base
+                    raw_base_for_wrap = custom_base
+                else:
+                    openai_base = _to_openai_base_url(custom_base)
+                    raw_base_for_wrap = custom_base
+                _clean_base2, _dq2 = _extract_url_query_params(openai_base)
                _extra2 = {"default_query": _dq2} if _dq2 else {}
                logger.debug(
                    "resolve_provider_client: named custom provider %r (%s, api_mode=%s)",
@@ -1917,7 +2212,12 @@ def resolve_provider_client(
                            "installed — falling back to OpenAI-wire.",
                            provider,
                        )
-                        client = OpenAI(api_key=custom_key, base_url=_clean_base2, **_extra2)
+                        # Fallback went OpenAI-wire after all — redo the query
+                        # extraction against the rewritten /v1 URL.
+                        _fallback_base = _to_openai_base_url(custom_base)
+                        _fb_clean, _fb_dq = _extract_url_query_params(_fallback_base)
+                        _fb_extra = {"default_query": _fb_dq} if _fb_dq else {}
+                        client = OpenAI(api_key=custom_key, base_url=_fb_clean, **_fb_extra)
                        return (_to_async_client(client, final_model, is_vision=is_vision) if async_mode
                                else (client, final_model))
                    sync_anthropic = AnthropicAuxiliaryClient(
@@ -1936,7 +2236,7 @@ def resolve_provider_client(
                ):
                    client = CodexAuxiliaryClient(client, final_model)
                else:
-                    client = _wrap_if_needed(client, final_model, custom_base)
+                    client = _wrap_if_needed(client, final_model, raw_base_for_wrap, custom_key)
                return (_to_async_client(client, final_model, is_vision=is_vision) if async_mode
                        else (client, final_model))
            logger.warning(
@@ -1982,9 +2282,8 @@ def resolve_provider_client(
                         provider, ", ".join(tried_sources))
            return None, None

-        base_url = _to_openai_base_url(
-            str(creds.get("base_url", "")).strip().rstrip("/") or pconfig.inference_base_url
-        )
+        raw_base_url = str(creds.get("base_url", "")).strip().rstrip("/") or pconfig.inference_base_url
+        base_url = _to_openai_base_url(raw_base_url)

        default_model = _API_KEY_PROVIDER_AUX_MODELS.get(provider, "")
        final_model = _normalize_resolved_model(model or default_model, provider)
@@ -2029,8 +2328,11 @@ def resolve_provider_client(

        # Honor api_mode for any API-key provider (e.g. direct OpenAI with
        # codex-family models).  The copilot-specific wrapping above handles
-        # copilot; this covers the general case (#6800).
-        client = _wrap_if_needed(client, final_model, base_url)
+        # copilot; this covers the general case (#6800).  Also rewraps
+        # Anthropic-wire endpoints (Kimi Coding Plan api.kimi.com/coding,
+        # /anthropic-suffixed gateways) so named providers like kimi-coding
+        # land on the right transport without needing per-provider branches.
+        client = _wrap_if_needed(client, final_model, raw_base_url, api_key)

        logger.debug("resolve_provider_client: %s (%s)", provider, final_model)
        return (_to_async_client(client, final_model, is_vision=is_vision) if async_mode
@@ -2038,7 +2340,12 @@ def resolve_provider_client(

    if pconfig.auth_type == "external_process":
        creds = resolve_external_process_provider_credentials(provider)
-        final_model = _normalize_resolved_model(model or _read_main_model(), provider)
+        final_model = _normalize_resolved_model(
+            model
+            or (main_runtime.get("model") if main_runtime else None)
+            or _read_main_model(),
+            provider,
+        )
        if provider == "copilot-acp":
            api_key = str(creds.get("api_key", "")).strip()
            base_url = str(creds.get("base_url", "")).strip()
@@ -2188,7 +2495,10 @@ def _resolve_strict_vision_backend(
    if provider == "nous":
        return _try_nous(vision=True)
    if provider == "openai-codex":
-        return _try_codex()
+        # Route through resolve_provider_client so the caller's explicit
+        # model is used.  There is no safe default Codex model (shifting
+        # allow-list); callers must specify via auxiliary.<task>.model.
+        return resolve_provider_client("openai-codex", model, is_vision=True)
    if provider == "anthropic":
        return _try_anthropic()
    if provider == "custom":
@@ -2293,6 +2603,19 @@ def resolve_vision_provider_client(
                        main_provider, default_model or resolved_model or main_model,
                    )
                    return _finalize(main_provider, sync_client, default_model)
+            elif main_provider in _PROVIDERS_WITHOUT_VISION:
+                # Kimi Coding Plan's /coding endpoint (Anthropic Messages wire)
+                # does not accept image input — Kimi's own docs say "Current
+                # model does not support image input, switch to a model with
+                # image_in capability" and vision lives on the separate Kimi
+                # Platform (api.moonshot.ai). Skip the main provider and fall
+                # through to the aggregator chain instead of returning a
+                # client that will 404 on every vision request (#17076).
+                logger.debug(
+                    "Vision auto-detect: skipping main provider %s (no "
+                    "vision support) — falling through to aggregator chain",
+                    main_provider,
+                )
            else:
                rpc_client, rpc_model = resolve_provider_client(
                    main_provider, vision_model,
@@ -2774,7 +3097,7 @@ def _get_task_extra_body(task: str) -> Dict[str, Any]:

 # Providers that use Anthropic-compatible endpoints (via OpenAI SDK wrapper).
 # Their image content blocks must use Anthropic format, not OpenAI format.
-_ANTHROPIC_COMPAT_PROVIDERS = frozenset({"minimax", "minimax-cn"})
+_ANTHROPIC_COMPAT_PROVIDERS = frozenset({"minimax", "minimax-oauth", "minimax-cn"})


 def _is_anthropic_compat_endpoint(provider: str, base_url: str) -> bool:
@@ -291,14 +291,52 @@ def has_aws_credentials(env: Optional[Dict[str, str]] = None) -> bool:
 def resolve_bedrock_region(env: Optional[Dict[str, str]] = None) -> str:
    """Resolve the AWS region for Bedrock API calls.

-    Priority: AWS_REGION → AWS_DEFAULT_REGION → us-east-1 (fallback).
+    Priority:
+      1. AWS_REGION env var
+      2. AWS_DEFAULT_REGION env var
+      3. boto3/botocore configured region (from ~/.aws/config or SSO profile)
+      4. us-east-1 (hard fallback)
+
+    The boto3 fallback is critical for EU/AP users who configure their region
+    in ~/.aws/config via a named profile rather than env vars — without it,
+    live model discovery would always return us.* profile IDs regardless of
+    the user's actual region.
    """
    env = env if env is not None else os.environ
-    return (
+    explicit = (
        env.get("AWS_REGION", "").strip()
        or env.get("AWS_DEFAULT_REGION", "").strip()
-        or "us-east-1"
    )
+    if explicit:
+        return explicit
+    try:
+        import botocore.session
+        region = botocore.session.get_session().get_config_variable("region")
+        if region:
+            return region
+    except Exception:
+        pass
+    return "us-east-1"
+
+
+def bedrock_model_ids_or_none() -> Optional[List[str]]:
+    """Live-discover Bedrock model IDs for the active region.
+
+    Returns a list of model ID strings if discovery succeeds and yields
+    at least one model, or ``None`` on failure / empty result.  Callers
+    should fall back to the static curated list when ``None`` is returned.
+
+    This helper consolidates the discover → extract-ids → fallback
+    pattern that was previously duplicated across ``provider_model_ids``,
+    ``list_authenticated_providers`` section 2, and section 3.
+    """
+    try:
+        discovered = discover_bedrock_models(resolve_bedrock_region())
+        if discovered:
+            return [m["id"] for m in discovered]
+    except Exception:
+        pass
+    return None


 # ---------------------------------------------------------------------------
@@ -608,7 +608,7 @@ class CopilotACPClient:
                    end = start + limit if isinstance(limit, int) and limit > 0 else None
                    content = "".join(lines[start:end])
                if content:
-                    content = redact_sensitive_text(content)
+                    content = redact_sensitive_text(content, force=True)
                response = {
                    "jsonrpc": "2.0",
                    "id": message_id,
@@ -7,7 +7,6 @@ import random
 import threading
 import time
 import uuid
-import os
 import re
 from dataclasses import dataclass, fields, replace
 from datetime import datetime
@@ -456,6 +455,70 @@ class CredentialPool:
            logger.debug("Failed to sync from credentials file: %s", exc)
        return entry

+    def _sync_codex_entry_from_auth_store(self, entry: PooledCredential) -> PooledCredential:
+        """Sync a Codex device_code pool entry from auth.json if tokens differ.
+
+        When a Codex OAuth access token expires (or the ChatGPT account hits
+        its 5h/weekly quota), the pool entry gets marked ``STATUS_EXHAUSTED``
+        with a ``last_error_reset_at`` that can be many hours in the future.
+        Meanwhile the user may run ``hermes model`` / ``hermes auth`` which
+        performs a fresh device-code login and writes new tokens to
+        ``auth.json`` under ``_auth_store_lock``.  Without this sync the pool
+        entry stays frozen until ``last_error_reset_at`` elapses — even
+        though fresh credentials are sitting on disk — and every request
+        fails with "no available entries (all exhausted or empty)".
+
+        Mirrors the Nous/Anthropic resync paths above.  Only applies to
+        device_code-sourced entries; env/API-key-sourced entries have no
+        auth.json shadow to sync from.
+        """
+        if self.provider != "openai-codex" or entry.source != "device_code":
+            return entry
+        try:
+            with _auth_store_lock():
+                auth_store = _load_auth_store()
+                state = _load_provider_state(auth_store, "openai-codex")
+            if not isinstance(state, dict):
+                return entry
+            tokens = state.get("tokens")
+            if not isinstance(tokens, dict):
+                return entry
+            store_access = tokens.get("access_token", "")
+            store_refresh = tokens.get("refresh_token", "")
+            # Adopt auth.json tokens when either side differs.  Codex refresh
+            # tokens are single-use too, so a fresh refresh_token from
+            # another process means our entry's pair is consumed/stale.
+            entry_access = entry.access_token or ""
+            entry_refresh = entry.refresh_token or ""
+            if store_access and (
+                store_access != entry_access
+                or (store_refresh and store_refresh != entry_refresh)
+            ):
+                logger.debug(
+                    "Pool entry %s: syncing Codex tokens from auth.json "
+                    "(refreshed by another process)",
+                    entry.id,
+                )
+                field_updates: Dict[str, Any] = {
+                    "access_token": store_access,
+                    "refresh_token": store_refresh or entry.refresh_token,
+                    "last_status": None,
+                    "last_status_at": None,
+                    "last_error_code": None,
+                    "last_error_reason": None,
+                    "last_error_message": None,
+                    "last_error_reset_at": None,
+                }
+                if state.get("last_refresh"):
+                    field_updates["last_refresh"] = state["last_refresh"]
+                updated = replace(entry, **field_updates)
+                self._replace_entry(entry, updated)
+                self._persist()
+                return updated
+        except Exception as exc:
+            logger.debug("Failed to sync Codex entry from auth.json: %s", exc)
+        return entry
+
    def _sync_nous_entry_from_auth_store(self, entry: PooledCredential) -> PooledCredential:
        """Sync a Nous pool entry from auth.json if tokens differ.

@@ -788,6 +851,18 @@ class CredentialPool:
                if synced is not entry:
                    entry = synced
                    cleared_any = True
+            # For openai-codex entries, same pattern: the user may have
+            # re-authed via `hermes model` / `hermes auth` after a 429/401,
+            # leaving fresh tokens on disk while the pool entry is still
+            # frozen behind last_error_reset_at (can be hours in the
+            # future for ChatGPT weekly windows).
+            if (self.provider == "openai-codex"
+                    and entry.source == "device_code"
+                    and entry.last_status == STATUS_EXHAUSTED):
+                synced = self._sync_codex_entry_from_auth_store(entry)
+                if synced is not entry:
+                    entry = synced
+                    cleared_any = True
            if entry.last_status == STATUS_EXHAUSTED:
                exhausted_until = _exhausted_until(entry)
                if exhausted_until is not None and now < exhausted_until:
@@ -1224,6 +1299,48 @@ def _seed_from_singletons(provider: str, entries: List[PooledCredential]) -> Tup
        except Exception as exc:
            logger.debug("Qwen OAuth token seed failed: %s", exc)

+    elif provider == "minimax-oauth":
+        # MiniMax OAuth tokens live in ~/.hermes/auth.json providers.minimax-oauth.
+        # Seed the pool so `/auth list` reflects the logged-in state and the
+        # standard `hermes auth remove minimax-oauth <N>` flow works.
+        # Use refresh_if_expiring=False equivalent: resolve_minimax_oauth_runtime_credentials
+        # always refreshes on expiry, so instead read raw state here to avoid
+        # surprise network calls during provider discovery.
+        try:
+            from hermes_cli.auth import get_provider_auth_state
+            state = get_provider_auth_state("minimax-oauth")
+            if state and state.get("access_token"):
+                source_name = "oauth"
+                if not _is_suppressed(provider, source_name):
+                    active_sources.add(source_name)
+                    expires_at_ms = None
+                    try:
+                        from datetime import datetime as _dt
+                        raw = state.get("expires_at", "")
+                        if raw:
+                            expires_at_ms = int(_dt.fromisoformat(raw).timestamp() * 1000)
+                    except Exception:
+                        expires_at_ms = None
+                    base_url = str(state.get("inference_base_url", "") or "").rstrip("/")
+                    changed |= _upsert_entry(
+                        entries,
+                        provider,
+                        source_name,
+                        {
+                            "source": source_name,
+                            "auth_type": AUTH_TYPE_OAUTH,
+                            "access_token": state["access_token"],
+                            "refresh_token": state.get("refresh_token"),
+                            "expires_at_ms": expires_at_ms,
+                            "base_url": base_url,
+                            "label": state.get("label", "") or label_from_token(
+                                state.get("access_token", ""), source_name
+                            ),
+                        },
+                    )
+        except Exception as exc:
+            logger.debug("MiniMax OAuth token seed failed: %s", exc)
+
    elif provider == "openai-codex":
        # Respect user suppression — `hermes auth remove openai-codex` marks
        # the device_code source as suppressed so it won't be re-seeded from
@@ -47,7 +47,6 @@ from __future__ import annotations

 import os
 from dataclasses import dataclass, field
-from pathlib import Path
 from typing import Callable, List, Optional


@@ -253,6 +252,19 @@ def _remove_nous_device_code(provider: str, removed) -> RemovalResult:
    return result


+def _remove_minimax_oauth(provider: str, removed) -> RemovalResult:
+    """MiniMax OAuth lives in auth.json providers.minimax-oauth — clear it.
+
+    Same pattern as Nous: single-source OAuth state with refresh tokens.
+    Suppression of the `oauth` source ensures the pool reseed path
+    (_seed_from_singletons) doesn't instantly undo the removal.
+    """
+    result = RemovalResult()
+    if _clear_auth_store_provider(provider):
+        result.cleaned.append(f"Cleared {provider} OAuth tokens from auth store")
+    return result
+
+
 def _remove_codex_device_code(provider: str, removed) -> RemovalResult:
    """Codex tokens live in TWO places: our auth store AND ~/.codex/auth.json.

@@ -390,6 +402,11 @@ def _register_all_sources() -> None:
        remove_fn=_remove_qwen_cli,
        description="~/.qwen/oauth_creds.json",
    ))
+    register(RemovalStep(
+        provider="minimax-oauth", source_id="oauth",
+        remove_fn=_remove_minimax_oauth,
+        description="auth.json providers.minimax-oauth",
+    ))
    register(RemovalStep(
        provider="*", source_id="config:",
        match_fn=lambda src: src.startswith("config:") or src == "model_config",
@@ -54,6 +54,7 @@ class FailoverReason(enum.Enum):
    # Provider-specific
    thinking_signature = "thinking_signature"  # Anthropic thinking block sig invalid
    long_context_tier = "long_context_tier"    # Anthropic "extra usage" tier gate
+    oauth_long_context_beta_forbidden = "oauth_long_context_beta_forbidden"  # Anthropic OAuth subscription rejects 1M context beta — disable beta and retry

    # Catch-all
    unknown = "unknown"                  # Unclassifiable — retry with backoff
@@ -91,6 +92,7 @@ class ClassifiedError:
 _BILLING_PATTERNS = [
    "insufficient credits",
    "insufficient_quota",
+    "insufficient balance",
    "credit balance",
    "credits have been exhausted",
    "top up your credits",
@@ -449,6 +451,25 @@ def classify_api_error(
            should_compress=True,
        )

+    # Anthropic OAuth subscription rejects the 1M-context beta header.
+    # Observed error body: "The long context beta is not yet available for
+    # this subscription." Returned as HTTP 400 from native Anthropic when
+    # the subscription doesn't include 1M context, even though the request
+    # carries ``anthropic-beta: context-1m-2025-08-07``. The recovery path
+    # in run_agent.py rebuilds the Anthropic client with the beta stripped
+    # and retries once. Pattern is narrow enough that it won't collide with
+    # the 429 tier-gate pattern above (different status, different phrase).
+    if (
+        status_code == 400
+        and "long context beta" in error_msg
+        and "not yet available" in error_msg
+    ):
+        return _result(
+            FailoverReason.oauth_long_context_beta_forbidden,
+            retryable=True,
+            should_compress=False,
+        )
+
    # ── 2. HTTP status code classification ──────────────────────────

    if status_code is not None:
@@ -30,7 +30,6 @@ from __future__ import annotations

 import json
 import logging
-import os
 import time
 import uuid
 from types import SimpleNamespace
@@ -42,7 +41,6 @@ from agent import google_oauth
 from agent.gemini_schema import sanitize_gemini_tool_parameters
 from agent.google_code_assist import (
    CODE_ASSIST_ENDPOINT,
-    FREE_TIER_ID,
    CodeAssistError,
    ProjectContext,
    resolve_project_context,
@@ -2,7 +2,7 @@

 from __future__ import annotations

-from typing import Any, Dict, List
+from typing import Any, Dict

 # Gemini's ``FunctionDeclaration.parameters`` field accepts the ``Schema``
 # object, which is only a subset of OpenAPI 3.0 / JSON Schema.  Strip fields
@@ -29,7 +29,6 @@ from __future__ import annotations

 import json
 import logging
-import os
 import time
 import urllib.error
 import urllib.parse
@@ -49,14 +49,13 @@ import json
 import logging
 import os
 import secrets
-import socket
 import stat
 import threading
 import time
 import urllib.error
 import urllib.parse
 import urllib.request
-from dataclasses import dataclass, field
+from dataclasses import dataclass
 from pathlib import Path
 from typing import Any, Dict, Optional, Tuple

@@ -98,6 +97,7 @@ _DEFAULT_CLIENT_SECRET = f"GOCSPX-{_PUBLIC_CLIENT_SECRET_SUFFIX}"

 # Regex patterns for fallback scraping from an installed gemini-cli.
 import re as _re
+from utils import atomic_replace
 _CLIENT_ID_PATTERN = _re.compile(
    r"OAUTH_CLIENT_ID\s*=\s*['\"]([0-9]+-[a-z0-9]+\.apps\.googleusercontent\.com)['\"]"
 )
@@ -499,7 +499,7 @@ def save_credentials(creds: GoogleCredentials) -> Path:
                fh.flush()
                os.fsync(fh.fileno())
            os.chmod(tmp_path, stat.S_IRUSR | stat.S_IWUSR)
-            os.replace(tmp_path, path)
+            atomic_replace(tmp_path, path)
        finally:
            try:
                if tmp_path.exists():
@@ -0,0 +1,48 @@
+"""LM Studio reasoning-effort resolution shared by the chat-completions
+transport and run_agent's iteration-limit summary path.
+
+LM Studio publishes per-model ``capabilities.reasoning.allowed_options`` (e.g.
+``["off","on"]`` for toggle-style models, ``["off","minimal","low"]`` for
+graduated models). We map the user's ``reasoning_config`` onto LM Studio's
+OpenAI-compatible vocabulary, then clamp against the model's allowed set so
+the server doesn't 400 on an unsupported effort.
+"""
+
+from __future__ import annotations
+
+from typing import List, Optional
+
+# LM Studio accepts these top-level reasoning_effort values via its
+# OpenAI-compatible chat.completions endpoint.
+_LM_VALID_EFFORTS = {"none", "minimal", "low", "medium", "high", "xhigh"}
+
+# Toggle-style models publish allowed_options as ["off","on"] in /api/v1/models.
+# Map them onto the OpenAI-compatible request vocabulary.
+_LM_EFFORT_ALIASES = {"off": "none", "on": "medium"}
+
+
+def resolve_lmstudio_effort(
+    reasoning_config: Optional[dict],
+    allowed_options: Optional[List[str]],
+) -> Optional[str]:
+    """Return the ``reasoning_effort`` string to send to LM Studio, or ``None``.
+
+    ``None`` means "omit the field": the user picked a level the model can't
+    honor, so let LM Studio fall back to the model's declared default rather
+    than silently substituting a different effort. When ``allowed_options`` is
+    falsy (probe failed), skip clamping and send the resolved effort anyway.
+    """
+    effort = "medium"
+    if reasoning_config and isinstance(reasoning_config, dict):
+        if reasoning_config.get("enabled") is False:
+            effort = "none"
+        else:
+            raw = (reasoning_config.get("effort") or "").strip().lower()
+            raw = _LM_EFFORT_ALIASES.get(raw, raw)
+            if raw in _LM_VALID_EFFORTS:
+                effort = raw
+    if allowed_options:
+        allowed = {_LM_EFFORT_ALIASES.get(opt, opt) for opt in allowed_options}
+        if effort not in allowed:
+            return None
+    return effort
@@ -28,7 +28,6 @@ Usage in run_agent.py:

 from __future__ import annotations

-import json
 import logging
 import re
 import inspect
@@ -403,6 +402,41 @@ class MemoryManager:
                    provider.name, e,
                )

+    def on_session_switch(
+        self,
+        new_session_id: str,
+        *,
+        parent_session_id: str = "",
+        reset: bool = False,
+        **kwargs,
+    ) -> None:
+        """Notify all providers that the agent's session_id has rotated.
+
+        Fires on ``/resume``, ``/branch``, ``/reset``, ``/new``, and
+        context compression — any path that reassigns
+        ``AIAgent.session_id`` without tearing the provider down.
+
+        Providers keep running; they only need to refresh cached
+        per-session state so subsequent writes land in the correct
+        session's record. See ``MemoryProvider.on_session_switch`` for
+        the full contract.
+        """
+        if not new_session_id:
+            return
+        for provider in self._providers:
+            try:
+                provider.on_session_switch(
+                    new_session_id,
+                    parent_session_id=parent_session_id,
+                    reset=reset,
+                    **kwargs,
+                )
+            except Exception as e:
+                logger.debug(
+                    "Memory provider '%s' on_session_switch failed: %s",
+                    provider.name, e,
+                )
+
    def on_pre_compress(self, messages: List[Dict[str, Any]]) -> str:
        """Notify all providers before context compression.

@@ -25,6 +25,7 @@ Lifecycle (called by MemoryManager, wired in run_agent.py):
 Optional hooks (override to opt in):
  on_turn_start(turn, message, **kwargs) — per-turn tick with runtime context
  on_session_end(messages)               — end-of-session extraction
+  on_session_switch(new_session_id, **kwargs) — mid-process session_id rotation
  on_pre_compress(messages) -> str       — extract before context compression
  on_memory_write(action, target, content, metadata=None) — mirror built-in memory writes
  on_delegation(task, result, **kwargs)  — parent-side observation of subagent work
@@ -160,6 +161,45 @@ class MemoryProvider(ABC):
        (CLI exit, /reset, gateway session expiry).
        """

+    def on_session_switch(
+        self,
+        new_session_id: str,
+        *,
+        parent_session_id: str = "",
+        reset: bool = False,
+        **kwargs,
+    ) -> None:
+        """Called when the agent switches session_id mid-process.
+
+        Fires on ``/resume``, ``/branch``, ``/reset``, ``/new`` (CLI), the
+        gateway equivalents, and context compression — any path that
+        reassigns ``AIAgent.session_id`` without tearing the provider down.
+
+        Providers that cache per-session state in ``initialize()``
+        (``_session_id``, ``_document_id``, accumulated turn buffers,
+        counters) should update or reset that state here so subsequent
+        writes land in the correct session's record.
+
+        Parameters
+        ----------
+        new_session_id:
+            The session_id the agent just switched to.
+        parent_session_id:
+            The previous session_id, if meaningful — set for ``/branch``
+            (fork lineage), context compression (continuation lineage),
+            and ``/resume`` (the session we're leaving). Empty string
+            when no lineage applies.
+        reset:
+            ``True`` when this is a genuinely new conversation, not a
+            resumption of an existing one. Fired by ``/reset`` / ``/new``.
+            Providers should flush accumulated per-session buffers
+            (``_session_turns``, ``_turn_counter``, etc.) when this is
+            set. ``False`` for ``/resume`` / ``/branch`` / compression
+            where the logical conversation continues under the new id.
+
+        Default is no-op for backward compatibility.
+        """
+
    def on_pre_compress(self, messages: List[Dict[str, Any]]) -> str:
        """Called before context compression discards old messages.

@@ -46,12 +46,13 @@ def _resolve_requests_verify() -> bool | str:
 # are preserved so the full model name reaches cache lookups and server queries.
 _PROVIDER_PREFIXES: frozenset[str] = frozenset({
    "openrouter", "nous", "openai-codex", "copilot", "copilot-acp",
-    "gemini", "ollama-cloud", "zai", "kimi-coding", "kimi-coding-cn", "stepfun", "minimax", "minimax-cn", "anthropic", "deepseek",
+    "gemini", "ollama-cloud", "zai", "kimi-coding", "kimi-coding-cn", "stepfun", "minimax", "minimax-oauth", "minimax-cn", "anthropic", "deepseek",
    "opencode-zen", "opencode-go", "ai-gateway", "kilocode", "alibaba",
    "qwen-oauth",
    "xiaomi",
    "arcee",
    "gmi",
+    "tencent-tokenhub",
    "custom", "local",
    # Common aliases
    "google", "google-gemini", "google-ai-studio",
@@ -60,6 +61,7 @@ _PROVIDER_PREFIXES: frozenset[str] = frozenset({
    "ollama",
    "stepfun", "opencode", "zen", "go", "vercel", "kilo", "dashscope", "aliyun", "qwen",
    "mimo", "xiaomi-mimo",
+    "tencent", "tokenhub", "tencent-cloud", "tencentmaas",
    "arcee-ai", "arceeai",
    "gmi-cloud", "gmicloud",
    "xai", "x-ai", "x.ai", "grok",
@@ -208,6 +210,8 @@ DEFAULT_CONTEXT_LENGTHS = {
    "grok": 131072,             # catch-all (grok-beta, unknown grok-*)
    # Kimi
    "kimi": 262144,
+    # Tencent — Hy3 Preview (Hunyuan) with 256K context window
+    "hy3-preview": 256000,
    # Nemotron — NVIDIA's open-weights series (128K context across all sizes)
    "nemotron": 131072,
    # Arcee
@@ -310,6 +314,7 @@ _URL_TO_PROVIDER: Dict[str, str] = {
    "api.xiaomimimo.com": "xiaomi",
    "xiaomimimo.com": "xiaomi",
    "api.gmi-serving.com": "gmi",
+    "tokenhub.tencentmaas.com": "tencent-tokenhub",
    "ollama.com": "ollama-cloud",
 }

@@ -620,8 +625,6 @@ def fetch_endpoint_model_metadata(
                        if isinstance(ctx, int) and ctx > 0:
                            context_length = ctx
                            break
-                    if context_length is None:
-                        context_length = _extract_context_length(model)
                    if context_length is not None:
                        entry["context_length"] = context_length

@@ -1011,10 +1014,7 @@ def _query_local_context_length(model: str, base_url: str, api_key: str = "") ->
                                ctx = cfg.get("context_length")
                                if ctx and isinstance(ctx, (int, float)):
                                    return int(ctx)
-                            # Fall back to max_context_length (theoretical model max)
-                            ctx = m.get("max_context_length") or m.get("context_length")
-                            if ctx and isinstance(ctx, (int, float)):
-                                return int(ctx)
+                            break

            # LM Studio / vLLM / llama.cpp: try /v1/models/{model}
            resp = client.get(f"{server_url}/v1/models/{model}")
@@ -1247,7 +1247,7 @@ def get_model_context_length(
    6. Nous suffix-match via OpenRouter cache
    7. models.dev registry lookup (provider-aware)
    8. Thin hardcoded defaults (broad family patterns)
-    9. Default fallback (128K)
+    9. Default fallback (256K)
    """
    # 0. Explicit config override — user knows best
    if config_context_length is not None and isinstance(config_context_length, int) and config_context_length > 0:
@@ -1276,7 +1276,10 @@ def get_model_context_length(
    model = _strip_provider_prefix(model)

    # 1. Check persistent cache (model+provider)
-    if base_url:
+    # LM Studio is excluded — its loaded context length is transient (the
+    # user can reload the model with a different context_length at any time
+    # via /api/v1/models/load), so a stale cached value would mask reloads.
+    if base_url and provider != "lmstudio":
        cached = get_cached_context_length(model, base_url)
        if cached is not None:
            # Invalidate stale Codex OAuth cache entries: pre-PR #14935 builds
@@ -1329,7 +1332,8 @@ def get_model_context_length(
            if is_local_endpoint(base_url):
                local_ctx = _query_local_context_length(model, base_url, api_key=api_key)
                if local_ctx and local_ctx > 0:
-                    save_context_length(model, base_url, local_ctx)
+                    if provider != "lmstudio":
+                        save_context_length(model, base_url, local_ctx)
                    return local_ctx
            logger.info(
                "Could not detect context length for model %r at %s — "
@@ -1419,10 +1423,11 @@ def get_model_context_length(
    if base_url and is_local_endpoint(base_url):
        local_ctx = _query_local_context_length(model, base_url, api_key=api_key)
        if local_ctx and local_ctx > 0:
-            save_context_length(model, base_url, local_ctx)
+            if provider != "lmstudio":
+                save_context_length(model, base_url, local_ctx)
            return local_ctx

-    # 10. Default fallback — 128K
+    # 10. Default fallback — 256K
    return DEFAULT_FALLBACK_CONTEXT


@@ -149,6 +149,7 @@ PROVIDER_TO_MODELS_DEV: Dict[str, str] = {
    "stepfun": "stepfun",
    "kimi-coding-cn": "kimi-for-coding",
    "minimax": "minimax",
+    "minimax-oauth": "minimax",
    "minimax-cn": "minimax-cn",
    "deepseek": "deepseek",
    "alibaba": "alibaba",
@@ -18,6 +18,7 @@ import os
 import tempfile
 import time
 from typing import Any, Mapping, Optional
+from utils import atomic_replace

 logger = logging.getLogger(__name__)

@@ -118,7 +119,7 @@ def record_nous_rate_limit(
        try:
            with os.fdopen(fd, "w") as f:
                json.dump(state, f)
-            os.replace(tmp_path, path)
+            atomic_replace(tmp_path, path)
        except Exception:
            # Clean up temp file on failure
            try:
@@ -98,17 +98,19 @@ def tool_progress_hint_cli() -> str:
 def openclaw_residue_hint_cli() -> str:
    """Banner shown the first time Hermes starts and finds ``~/.openclaw/``.

-    OpenClaw-era config, memory, and skill paths in ``~/.openclaw/`` will
-    otherwise attract the agent (memory entries like ``~/.openclaw/config.yaml``
-    get carried forward and the agent dutifully reads them). ``hermes claw
-    cleanup`` renames the directory so the agent stops finding it.
+    Points users at ``hermes claw migrate`` (non-destructive port of config,
+    memory, and skills) first. ``hermes claw cleanup`` is mentioned as the
+    follow-up step for users who have already migrated and want to archive
+    the old directory — with a warning that archiving breaks OpenClaw.
    """
    return (
-        "Heads up — an OpenClaw workspace was detected at ~/.openclaw/.\n"
-        "After migrating, the agent can still get confused and read that "
-        "directory's config/memory instead of Hermes's.\n"
-        "Run `hermes claw cleanup` to archive it (rename → .openclaw.pre-migration). "
-        "This tip only shows once; rerun it any time with `hermes claw cleanup`."
+        "A legacy OpenClaw directory was detected at ~/.openclaw/.\n"
+        "To port your config, memory, and skills over to Hermes, run "
+        "`hermes claw migrate`.\n"
+        "If you've already migrated and want to archive the old directory, "
+        "run `hermes claw cleanup` (renames it to ~/.openclaw.pre-migration — "
+        "OpenClaw will stop working after this).\n"
+        "This tip only shows once."
    )


@@ -310,6 +310,10 @@ PLATFORM_HINTS = {
        "Standard markdown is automatically converted to Telegram format. "
        "Supported: **bold**, *italic*, ~~strikethrough~~, ||spoiler||, "
        "`inline code`, ```code blocks```, [links](url), and ## headers. "
+        "Telegram has NO table syntax — prefer bullet lists or labeled "
+        "key: value pairs over pipe tables (any tables you do emit are "
+        "auto-rewritten into row-group bullets, which you can produce "
+        "directly for cleaner output). "
        "You can send media files natively: to deliver a file to the user, "
        "include MEDIA:/absolute/path/to/file in your response. Images "
        "(.png, .jpg, .webp) appear as photos, audio (.ogg) sends as voice "
@@ -56,8 +56,12 @@ _SENSITIVE_BODY_KEYS = frozenset({
 })

 # Snapshot at import time so runtime env mutations (e.g. LLM-generated
-# `export HERMES_REDACT_SECRETS=false`) cannot disable redaction mid-session.
-_REDACT_ENABLED = os.getenv("HERMES_REDACT_SECRETS", "").lower() not in ("0", "false", "no", "off")
+# `export HERMES_REDACT_SECRETS=true`) cannot enable/disable redaction
+# mid-session.  OFF by default — user must opt in via
+# `security.redact_secrets: true` in config.yaml (bridged to this env var
+# in hermes_cli/main.py and gateway/run.py) or `HERMES_REDACT_SECRETS=true`
+# in ~/.hermes/.env.
+_REDACT_ENABLED = os.getenv("HERMES_REDACT_SECRETS", "").lower() in ("1", "true", "yes", "on")

 # Known API key prefixes -- match the prefix + contiguous token chars
 _PREFIX_PATTERNS = [
@@ -180,11 +184,59 @@ _PREFIX_RE = re.compile(
 )


+def mask_secret(
+    value: str,
+    *,
+    head: int = 4,
+    tail: int = 4,
+    floor: int = 12,
+    placeholder: str = "***",
+    empty: str = "",
+) -> str:
+    """Mask a secret for display, preserving ``head`` and ``tail`` characters.
+
+    Canonical helper for display-time redaction across Hermes — used by
+    ``hermes config``, ``hermes status``, ``hermes dump``, and anywhere
+    a secret needs to be shown truncated for debuggability while still
+    keeping the bulk hidden.
+
+    Args:
+        value:       The secret to mask. ``None``/empty returns ``empty``.
+        head:        Leading characters to preserve. Default 4.
+        tail:        Trailing characters to preserve. Default 4.
+        floor:       Values shorter than ``head + tail + floor_margin`` are
+                     fully masked (returns ``placeholder``). Default 12 —
+                     matches the existing config/status/dump convention.
+        placeholder: Value returned for too-short inputs. Default ``"***"``.
+        empty:       Value returned when ``value`` is falsy (None, ""). The
+                     caller can override this to e.g. ``color("(not set)",
+                     Colors.DIM)`` for user-facing display.
+
+    Examples:
+        >>> mask_secret("sk-proj-abcdef1234567890")
+        'sk-p...7890'
+        >>> mask_secret("short")                         # fully masked
+        '***'
+        >>> mask_secret("")                              # empty default
+        ''
+        >>> mask_secret("", empty="(not set)")           # empty override
+        '(not set)'
+        >>> mask_secret("long-token", head=6, tail=4, floor=18)
+        '***'
+    """
+    if not value:
+        return empty
+    if len(value) < floor:
+        return placeholder
+    return f"{value[:head]}...{value[-tail:]}"
+
+
 def _mask_token(token: str) -> str:
-    """Mask a token, preserving prefix for long tokens."""
-    if len(token) < 18:
+    """Mask a log token — conservative 18-char floor, preserves 6 prefix / 4 suffix."""
+    # Empty input: historically this returned "***" rather than "". Preserve.
+    if not token:
        return "***"
-    return f"{token[:6]}...{token[-4:]}"
+    return mask_secret(token, head=6, tail=4, floor=18)


 def _redact_query_string(query: str) -> str:
@@ -253,11 +305,13 @@ def _redact_form_body(text: str) -> str:
    return _redact_query_string(text.strip())


-def redact_sensitive_text(text: str) -> str:
+def redact_sensitive_text(text: str, *, force: bool = False) -> str:
    """Apply all redaction patterns to a block of text.

    Safe to call on any string -- non-matching text passes through unchanged.
-    Disabled when security.redact_secrets is false in config.yaml.
+    Disabled by default — enable via security.redact_secrets: true in config.yaml.
+    Set force=True for safety boundaries that must never return raw secrets
+    regardless of the user's global logging redaction preference.
    """
    if text is None:
        return None
@@ -265,7 +319,7 @@ def redact_sensitive_text(text: str) -> str:
        text = str(text)
    if not text:
        return text
-    if not _REDACT_ENABLED:
+    if not (force or _REDACT_ENABLED):
        return text

    # Known prefixes (sk-, ghp_, etc.)
@@ -76,6 +76,7 @@ except ImportError:  # pragma: no cover
    fcntl = None  # type: ignore[assignment]

 from hermes_constants import get_hermes_home
+from utils import atomic_replace

 logger = logging.getLogger(__name__)

@@ -568,7 +569,7 @@ def save_allowlist(data: Dict[str, Any]) -> None:
        try:
            with os.fdopen(fd, "w") as fh:
                fh.write(json.dumps(data, indent=2, sort_keys=True))
-            os.replace(tmp_path, p)
+            atomic_replace(tmp_path, p)
        except Exception:
            try:
                os.unlink(tmp_path)
@@ -234,7 +234,7 @@ def scan_skill_commands() -> Dict[str, Dict[str, Any]]:

        for scan_dir in dirs_to_scan:
            for skill_md in iter_skill_index_files(scan_dir, "SKILL.md"):
-                if any(part in ('.git', '.github', '.hub') for part in skill_md.parts):
+                if any(part in ('.git', '.github', '.hub', '.archive') for part in skill_md.parts):
                    continue
                try:
                    content = skill_md.read_text(encoding='utf-8')
@@ -284,6 +284,71 @@ def get_skill_commands() -> Dict[str, Dict[str, Any]]:
    return _skill_commands


+def reload_skills() -> Dict[str, Any]:
+    """Re-scan the skills directory and return a diff of what changed.
+
+    Rescans ``~/.hermes/skills/`` and any ``skills.external_dirs`` so the
+    slash-command map (``agent.skill_commands._skill_commands``) reflects
+    skills added or removed on disk.
+
+    This does NOT invalidate the skills system-prompt cache. Skills are
+    called by name via ``/skill-name``, ``skills_list``, or ``skill_view``
+    — they don't need to be in the system prompt for the model to use them.
+    Keeping the prompt cache intact preserves prefix caching across the
+    reload, so a user invoking ``/reload-skills`` pays no cache-reset cost.
+
+    Returns:
+        Dict with keys::
+
+            {
+              "added":      [{"name": str, "description": str}, ...],
+              "removed":    [{"name": str, "description": str}, ...],
+              "unchanged":  [skill names present before and after],
+              "total":      total skill count after rescan,
+              "commands":   total /slash-skill count after rescan,
+            }
+
+        ``description`` is the skill's full SKILL.md frontmatter
+        ``description:`` field — the same string the system prompt renders
+        as ``    - name: description`` for pre-existing skills.
+    """
+    # Snapshot pre-reload state (name -> description) from the current
+    # slash-command cache. Using dicts lets the post-rescan diff carry
+    # descriptions for newly-visible or just-removed skills without a
+    # second disk walk.
+    def _snapshot(cmds: Dict[str, Dict[str, Any]]) -> Dict[str, str]:
+        out: Dict[str, str] = {}
+        for slash_key, info in cmds.items():
+            bare = slash_key.lstrip("/")
+            out[bare] = (info or {}).get("description") or ""
+        return out
+
+    before = _snapshot(_skill_commands)
+
+    # Rescan the skills dir. ``scan_skill_commands`` resets
+    # ``_skill_commands = {}`` internally and repopulates it.
+    new_commands = scan_skill_commands()
+
+    after = _snapshot(new_commands)
+
+    added_names = sorted(set(after) - set(before))
+    removed_names = sorted(set(before) - set(after))
+    unchanged = sorted(set(after) & set(before))
+
+    added = [{"name": n, "description": after[n]} for n in added_names]
+    # For removed skills, use the description we had cached pre-rescan
+    # (the skill file is gone so we can't re-read it).
+    removed = [{"name": n, "description": before[n]} for n in removed_names]
+
+    return {
+        "added": added,
+        "removed": removed,
+        "unchanged": unchanged,
+        "total": len(after),
+        "commands": len(new_commands),
+    }
+
+
 def resolve_skill_command_key(command: str) -> Optional[str]:
    """Resolve a user-typed /command to its canonical skill_cmds key.

@@ -328,6 +393,14 @@ def build_skill_invocation_message(
        return f"[Failed to load skill: {skill_info['name']}]"

    loaded_skill, skill_dir, skill_name = loaded
+
+    # Track active usage for Curator lifecycle management (#17782)
+    try:
+        from tools.skill_usage import bump_use
+        bump_use(skill_name)
+    except Exception:
+        pass  # Non-critical — skill invocation proceeds regardless
+
    activation_note = (
        f'[IMPORTANT: The user has invoked the "{skill_name}" skill, indicating they want '
        "you to follow its instructions. The full skill content is loaded below.]"
@@ -367,6 +440,14 @@ def build_preloaded_skills_prompt(
            continue

        loaded_skill, skill_dir, skill_name = loaded
+
+        # Track active usage for Curator lifecycle management (#17782)
+        try:
+            from tools.skill_usage import bump_use
+            bump_use(skill_name)
+        except Exception:
+            pass  # Non-critical
+
        activation_note = (
            f'[IMPORTANT: The user launched this CLI session with the "{skill_name}" skill '
            "preloaded. Treat its instructions as active guidance for the duration of this "
@@ -24,7 +24,7 @@ PLATFORM_MAP = {
    "windows": "win32",
 }

-EXCLUDED_SKILL_DIRS = frozenset((".git", ".github", ".hub"))
+EXCLUDED_SKILL_DIRS = frozenset((".git", ".github", ".hub", ".archive"))

 # ── Lazy YAML loader ─────────────────────────────────────────────────────

@@ -200,6 +200,9 @@ def get_external_skills_dirs() -> List[Path]:
    if not isinstance(raw_dirs, list):
        return []

+    from hermes_constants import get_hermes_home
+
+    hermes_home = get_hermes_home()
    local_skills = get_skills_dir().resolve()
    seen: Set[Path] = set()
    result: List[Path] = []
@@ -210,7 +213,12 @@ def get_external_skills_dirs() -> List[Path]:
            continue
        # Expand ~ and environment variables
        expanded = os.path.expanduser(os.path.expandvars(entry))
-        p = Path(expanded).resolve()
+        p = Path(expanded)
+        # Resolve relative paths against HERMES_HOME, not cwd
+        if not p.is_absolute():
+            p = (hermes_home / p).resolve()
+        else:
+            p = p.resolve()
        if p == local_skills:
            continue
        if p in seen:
@@ -432,7 +440,7 @@ def extract_skill_description(frontmatter: Dict[str, Any]) -> str:
 def iter_skill_index_files(skills_dir: Path, filename: str):
    """Walk skills_dir yielding sorted paths matching *filename*.

-    Excludes ``.git``, ``.github``, ``.hub`` directories.
+    Excludes ``.git``, ``.github``, ``.hub``, ``.archive`` directories.
    """
    matches = []
    for root, dirs, files in os.walk(skills_dir, followlinks=True):
@@ -30,10 +30,12 @@ def generate_title(
    assistant_response: str,
    timeout: float = 30.0,
    failure_callback: Optional[FailureCallback] = None,
+    main_runtime: dict = None,
 ) -> Optional[str]:
    """Generate a session title from the first exchange.

-    Uses the auxiliary LLM client (cheapest/fastest available model).
+    Uses the main runtime's model when available, falling back to the
+    auxiliary LLM client (cheapest/fastest available model).
    Returns the title string or None on failure.

    ``failure_callback`` is invoked with ``(task, exception)`` when the
@@ -57,6 +59,7 @@ def generate_title(
            max_tokens=500,
            temperature=0.3,
            timeout=timeout,
+            main_runtime=main_runtime,
        )
        title = (response.choices[0].message.content or "").strip()
        # Clean up: remove quotes, trailing punctuation, prefixes like "Title: "
@@ -86,6 +89,7 @@ def auto_title_session(
    user_message: str,
    assistant_response: str,
    failure_callback: Optional[FailureCallback] = None,
+    main_runtime: dict = None,
 ) -> None:
    """Generate and set a session title if one doesn't already exist.

@@ -107,7 +111,7 @@ def auto_title_session(
        return

    title = generate_title(
-        user_message, assistant_response, failure_callback=failure_callback
+        user_message, assistant_response, failure_callback=failure_callback, main_runtime=main_runtime
    )
    if not title:
        return
@@ -126,6 +130,7 @@ def maybe_auto_title(
    assistant_response: str,
    conversation_history: list,
    failure_callback: Optional[FailureCallback] = None,
+    main_runtime: dict = None,
 ) -> None:
    """Fire-and-forget title generation after the first exchange.

@@ -147,7 +152,7 @@ def maybe_auto_title(
    thread = threading.Thread(
        target=auto_title_session,
        args=(session_db, session_id, user_message, assistant_response),
-        kwargs={"failure_callback": failure_callback},
+        kwargs={"failure_callback": failure_callback, "main_runtime": main_runtime},
        daemon=True,
        name="auto-title",
    )
@@ -58,6 +58,7 @@ class AnthropicTransport(ProviderTransport):
            context_length: int | None
            base_url: str | None
            fast_mode: bool
+            drop_context_1m_beta: bool
        """
        from agent.anthropic_adapter import build_anthropic_kwargs

@@ -73,6 +74,7 @@ class AnthropicTransport(ProviderTransport):
            context_length=params.get("context_length"),
            base_url=params.get("base_url"),
            fast_mode=params.get("fast_mode", False),
+            drop_context_1m_beta=params.get("drop_context_1m_beta", False),
        )

    def normalize_response(self, response: Any, **kwargs) -> NormalizedResponse:
@@ -12,12 +12,93 @@ reasoning configuration, temperature handling, and extra_body assembly.
 import copy
 from typing import Any, Dict, List, Optional

+from agent.lmstudio_reasoning import resolve_lmstudio_effort
 from agent.moonshot_schema import is_moonshot_model, sanitize_moonshot_tools
 from agent.prompt_builder import DEVELOPER_ROLE_MODELS
 from agent.transports.base import ProviderTransport
 from agent.transports.types import NormalizedResponse, ToolCall, Usage


+def _build_gemini_thinking_config(model: str, reasoning_config: dict | None) -> dict | None:
+    """Translate Hermes/OpenRouter-style reasoning config to Gemini thinkingConfig."""
+    if reasoning_config is None or not isinstance(reasoning_config, dict):
+        return None
+
+    normalized_model = (model or "").strip().lower()
+    if normalized_model.startswith("google/"):
+        normalized_model = normalized_model.split("/", 1)[1]
+
+    # ``thinking_config`` is a Gemini-only request parameter. The same
+    # ``gemini`` provider also serves Gemma (and historically PaLM/Bard);
+    # those reject the field with HTTP 400 "Unknown name 'thinking_config':
+    # Cannot find field" — including the polite ``{"includeThoughts": False}``
+    # form. Omit the field entirely on non-Gemini models. (#17426)
+    if not normalized_model.startswith("gemini"):
+        return None
+
+    if reasoning_config.get("enabled") is False:
+        # Gemini can hide thought parts even when internal thinking still
+        # happens; omit thinkingLevel to avoid model-specific validation quirks.
+        return {"includeThoughts": False}
+
+    effort = str(reasoning_config.get("effort", "medium") or "medium").strip().lower()
+    if effort == "none":
+        return {"includeThoughts": False}
+
+    thinking_config: Dict[str, Any] = {"includeThoughts": True}
+
+    # Gemini 2.5 accepts thinkingBudget; don't guess a budget from Hermes'
+    # coarse effort levels. ``includeThoughts`` alone is enough to surface
+    # thought parts without risking request validation errors.
+    if normalized_model.startswith("gemini-2.5-"):
+        return thinking_config
+
+    if effort not in {"minimal", "low", "medium", "high", "xhigh"}:
+        effort = "medium"
+
+    # Gemini 3 Flash documents low/medium/high thinking levels; Gemini 3 Pro
+    # is stricter (low/high). Clamp Hermes' wider effort set to what each
+    # family accepts so we never forward an undocumented level verbatim.
+    if normalized_model.startswith(("gemini-3", "gemini-3.1")):
+        if "flash" in normalized_model:
+            if effort in {"minimal", "low"}:
+                thinking_config["thinkingLevel"] = "low"
+            elif effort in {"high", "xhigh"}:
+                thinking_config["thinkingLevel"] = "high"
+            else:
+                thinking_config["thinkingLevel"] = "medium"
+        elif "pro" in normalized_model:
+            thinking_config["thinkingLevel"] = (
+                "high" if effort in {"high", "xhigh"} else "low"
+            )
+
+    return thinking_config
+
+
+def _snake_case_gemini_thinking_config(config: dict | None) -> dict | None:
+    """Convert Gemini thinking config keys to the OpenAI-compat field names."""
+    if not isinstance(config, dict) or not config:
+        return None
+
+    translated: Dict[str, Any] = {}
+    if isinstance(config.get("includeThoughts"), bool):
+        translated["include_thoughts"] = config["includeThoughts"]
+    if isinstance(config.get("thinkingLevel"), str) and config["thinkingLevel"].strip():
+        translated["thinking_level"] = config["thinkingLevel"].strip().lower()
+    if isinstance(config.get("thinkingBudget"), (int, float)):
+        translated["thinking_budget"] = int(config["thinkingBudget"])
+    return translated or None
+
+
+def _is_gemini_openai_compat_base_url(base_url: Any) -> bool:
+    normalized = str(base_url or "").strip().rstrip("/").lower()
+    if not normalized:
+        return False
+    if "generativelanguage.googleapis.com" not in normalized:
+        return False
+    return normalized.endswith("/openai")
+
+
 class ChatCompletionsTransport(ProviderTransport):
    """Transport for api_mode='chat_completions'.

@@ -101,6 +182,7 @@ class ChatCompletionsTransport(ProviderTransport):
            is_github_models: bool
            is_nvidia_nim: bool
            is_kimi: bool
+            is_lmstudio: bool
            is_custom_provider: bool
            ollama_num_ctx: int | None
            # Provider routing
@@ -114,6 +196,7 @@ class ChatCompletionsTransport(ProviderTransport):
            # Reasoning
            supports_reasoning: bool
            github_reasoning_extra: dict | None
+            lmstudio_reasoning_options: list[str] | None  # raw allowed_options from /api/v1/models
            # Claude on OpenRouter/Nous max output
            anthropic_max_output: int | None
            # Extra
@@ -188,6 +271,7 @@ class ChatCompletionsTransport(ProviderTransport):
        anthropic_max_out = params.get("anthropic_max_output")
        is_nvidia_nim = params.get("is_nvidia_nim", False)
        is_kimi = params.get("is_kimi", False)
+        is_tokenhub = params.get("is_tokenhub", False)
        reasoning_config = params.get("reasoning_config")

        if ephemeral is not None and max_tokens_fn:
@@ -219,12 +303,41 @@ class ChatCompletionsTransport(ProviderTransport):
                        _kimi_effort = _e
                api_kwargs["reasoning_effort"] = _kimi_effort

+        # Tencent TokenHub: top-level reasoning_effort (unless thinking disabled)
+        if is_tokenhub:
+            _tokenhub_thinking_off = bool(
+                reasoning_config
+                and isinstance(reasoning_config, dict)
+                and reasoning_config.get("enabled") is False
+            )
+            if not _tokenhub_thinking_off:
+                _tokenhub_effort = "high"
+                if reasoning_config and isinstance(reasoning_config, dict):
+                    _e = (reasoning_config.get("effort") or "").strip().lower()
+                    if _e in ("low", "medium", "high"):
+                        _tokenhub_effort = _e
+                api_kwargs["reasoning_effort"] = _tokenhub_effort
+
+        # LM Studio: top-level reasoning_effort. Only emit when the model
+        # declares reasoning support via /api/v1/models capabilities (gated
+        # upstream by params["supports_reasoning"]). resolve_lmstudio_effort
+        # is shared with run_agent's summary path so both stay in sync.
+        if params.get("is_lmstudio", False) and params.get("supports_reasoning", False):
+            _lm_effort = resolve_lmstudio_effort(
+                reasoning_config,
+                params.get("lmstudio_reasoning_options"),
+            )
+            if _lm_effort is not None:
+                api_kwargs["reasoning_effort"] = _lm_effort
+
        # extra_body assembly
        extra_body: Dict[str, Any] = {}

        is_openrouter = params.get("is_openrouter", False)
        is_nous = params.get("is_nous", False)
        is_github_models = params.get("is_github_models", False)
+        provider_name = str(params.get("provider_name") or "").strip().lower()
+        base_url = params.get("base_url")

        provider_prefs = params.get("provider_preferences")
        if provider_prefs and is_openrouter:
@@ -240,8 +353,9 @@ class ChatCompletionsTransport(ProviderTransport):
                "type": "enabled" if _kimi_thinking_enabled else "disabled",
            }

-        # Reasoning
-        if params.get("supports_reasoning", False):
+        # Reasoning. LM Studio is handled above via top-level reasoning_effort,
+        # so skip emitting extra_body.reasoning for it.
+        if params.get("supports_reasoning", False) and not params.get("is_lmstudio", False):
            if is_github_models:
                gh_reasoning = params.get("github_reasoning_extra")
                if gh_reasoning is not None:
@@ -277,6 +391,23 @@ class ChatCompletionsTransport(ProviderTransport):
        if is_qwen:
            extra_body["vl_high_resolution_images"] = True

+        if provider_name == "gemini":
+            raw_thinking_config = _build_gemini_thinking_config(model, reasoning_config)
+            if _is_gemini_openai_compat_base_url(base_url):
+                thinking_config = _snake_case_gemini_thinking_config(raw_thinking_config)
+                if thinking_config:
+                    openai_compat_extra = extra_body.get("extra_body", {})
+                    google_extra = openai_compat_extra.get("google", {})
+                    google_extra["thinking_config"] = thinking_config
+                    openai_compat_extra["google"] = google_extra
+                    extra_body["extra_body"] = openai_compat_extra
+            elif raw_thinking_config:
+                extra_body["thinking_config"] = raw_thinking_config
+        elif provider_name == "google-gemini-cli":
+            thinking_config = _build_gemini_thinking_config(model, reasoning_config)
+            if thinking_config:
+                extra_body["thinking_config"] = thinking_config
+
        # Merge any pre-built extra_body additions
        additions = params.get("extra_body_additions")
        if additions:
@@ -8,7 +8,7 @@ streaming, or the _run_codex_stream() call path.
 from typing import Any, Dict, List, Optional

 from agent.transports.base import ProviderTransport
-from agent.transports.types import NormalizedResponse, ToolCall, Usage
+from agent.transports.types import NormalizedResponse, ToolCall


 class ResponsesApiTransport(ProviderTransport):
@@ -151,8 +151,6 @@ class ResponsesApiTransport(ProviderTransport):
        """Normalize Codex Responses API response to NormalizedResponse."""
        from agent.codex_responses_adapter import (
            _normalize_codex_response,
-            _extract_responses_message_text,
-            _extract_responses_reasoning_text,
        )

        # _normalize_codex_response returns (SimpleNamespace, finish_reason_str)
@@ -359,6 +359,25 @@ _OFFICIAL_DOCS_PRICING: Dict[tuple[str, str], PricingEntry] = {
        source_url="https://aws.amazon.com/bedrock/pricing/",
        pricing_version="bedrock-pricing-2026-04",
    ),
+    # MiniMax
+    (
+        "minimax",
+        "minimax-m2.7",
+    ): PricingEntry(
+        input_cost_per_million=Decimal("0.30"),
+        output_cost_per_million=Decimal("1.20"),
+        source="official_docs_snapshot",
+        pricing_version="minimax-pricing-2026-04",
+    ),
+    (
+        "minimax-cn",
+        "minimax-m2.7",
+    ): PricingEntry(
+        input_cost_per_million=Decimal("0.30"),
+        output_cost_per_million=Decimal("1.20"),
+        source="official_docs_snapshot",
+        pricing_version="minimax-pricing-2026-04",
+    ),
 }


@@ -400,6 +419,8 @@ def resolve_billing_route(
        return BillingRoute(provider="anthropic", model=model.split("/")[-1], base_url=base_url or "", billing_mode="official_docs_snapshot")
    if provider_name == "openai":
        return BillingRoute(provider="openai", model=model.split("/")[-1], base_url=base_url or "", billing_mode="official_docs_snapshot")
+    if provider_name in {"minimax", "minimax-cn"}:
+        return BillingRoute(provider=provider_name, model=model.split("/")[-1], base_url=base_url or "", billing_mode="official_docs_snapshot")
    if provider_name in {"custom", "local"} or (base and "localhost" in base):
        return BillingRoute(provider=provider_name or "custom", model=model, base_url=base_url or "", billing_mode="unknown")
    return BillingRoute(provider=provider_name or "unknown", model=model.split("/")[-1] if model else "", base_url=base_url or "", billing_mode="unknown")
@@ -30,14 +30,13 @@ model:
  #   "ollama-cloud" - Ollama Cloud (requires: OLLAMA_API_KEY — https://ollama.com/settings)
  #   "kilocode"     - KiloCode gateway (requires: KILOCODE_API_KEY)
  #   "ai-gateway"   - Vercel AI Gateway (requires: AI_GATEWAY_API_KEY)
+  #   "lmstudio"     - LM Studio local server (optional: LM_API_KEY, defaults to http://127.0.0.1:1234/v1)
  #
  # Local servers (LM Studio, Ollama, vLLM, llama.cpp):
-  #   "custom"       - Any OpenAI-compatible endpoint. Set base_url below.
-  #   Aliases: "lmstudio", "ollama", "vllm", "llamacpp" all map to "custom".
-  #   Example for LM Studio:
-  #     provider: "lmstudio"
-  #     base_url: "http://localhost:1234/v1"
-  #   No API key needed — local servers typically ignore auth.
+  #   "custom"       - Any other OpenAI-compatible endpoint. Set base_url below.
+  #   Aliases: "ollama", "vllm", "llamacpp" all map to "custom".
+  #   LM Studio is first-class and uses provider: "lmstudio".
+  #   It works with both no-auth and auth-enabled server modes.
  #
  # Can also be overridden with --provider flag or HERMES_INFERENCE_PROVIDER env var.
  provider: "auto"
@@ -181,6 +180,11 @@ terminal:
 #   lifetime_seconds: 300
 #   docker_image: "nikolaik/python-nodejs:python3.11-nodejs20"
 #   docker_mount_cwd_to_workspace: true   # Explicit opt-in: mount your launch cwd into /workspace
+#   # Optional: run the container as your host user's uid:gid so files written
+#   # into bind-mounted dirs are owned by you, not root. Drops SETUID/SETGID
+#   # caps too since no gosu privilege drop is needed. Leave off if your
+#   # chosen docker_image expects to start as root.
+#   docker_run_as_host_user: true
 #   # Optional: explicitly forward selected env vars into Docker.
 #   # These values come from your current shell first, then ~/.hermes/.env.
 #   # Warning: anything forwarded here is visible to commands run in the container.
@@ -566,7 +570,7 @@ agent:
 #   - A preset like "hermes-cli" or "hermes-telegram" (curated tool set)
 #   - A list of individual toolsets to compose your own (see list below)
 #
-# Supported platform keys: cli, telegram, discord, whatsapp, slack, qqbot
+# Supported platform keys: cli, telegram, discord, whatsapp, slack, qqbot, teams
 #
 # Examples:
 #
@@ -596,6 +600,7 @@ agent:
 #   signal:        hermes-signal         (same as telegram)
 #   homeassistant: hermes-homeassistant  (same as telegram)
 #   qqbot:            hermes-qqbot            (same as telegram)
+#   teams:            hermes-teams            (same as telegram)
 #
 platform_toolsets:
  cli: [hermes-cli]
@@ -607,6 +612,7 @@ platform_toolsets:
  homeassistant: [hermes-homeassistant]
  qqbot: [hermes-qqbot]
  yuanbao: [hermes-yuanbao]
+  teams: [hermes-teams]

 # =============================================================================
 # Gateway Platform Settings
@@ -928,7 +934,7 @@ display:
  #     agent_name: "My Agent"               # Banner title and branding
  #     welcome: "Welcome message"           # Shown at CLI startup
  #     response_label: " ⚔ Agent "         # Response box header label
-  #     prompt_symbol: "⚔ ❯ "              # Prompt symbol
+  #     prompt_symbol: "⚔"                  # Prompt symbol (bare token; renderers add trailing space)
  #   tool_prefix: "╎"                       # Tool output line prefix (default: ┊)
  #
  skin: default
@@ -69,7 +69,9 @@ from agent.usage_pricing import (
    format_duration_compact,
    format_token_count_compact,
 )
-from agent.account_usage import fetch_account_usage, render_account_usage_lines
+# NOTE: `from agent.account_usage import ...` is deliberately NOT at module
+# top — it transitively pulls the OpenAI SDK chain (~230 ms cold) and is only
+# needed when the user runs `/limits`. Lazy-imported inside the handler below.
 from hermes_cli.banner import _format_context_length, format_banner_version_label

 _COMMAND_SPINNER_FRAMES = ("⠋", "⠙", "⠹", "⠸", "⠼", "⠴", "⠦", "⠧", "⠇", "⠏")
@@ -78,6 +80,11 @@ _COMMAND_SPINNER_FRAMES = ("⠋", "⠙", "⠹", "⠸", "⠼", "⠴", "⠦", "⠧
 # Load .env from ~/.hermes/.env first, then project root as dev fallback.
 # User-managed env files should override stale shell exports on restart.
 from hermes_constants import get_hermes_home, display_hermes_home
+from hermes_cli.browser_connect import (
+    DEFAULT_BROWSER_CDP_URL,
+    manual_chrome_debug_command,
+    try_launch_chrome_debug,
+)
 from hermes_cli.env_loader import load_hermes_dotenv
 from utils import base_url_host_matches

@@ -238,65 +245,6 @@ def _parse_service_tier_config(raw: str) -> str | None:
    logger.warning("Unknown service_tier '%s', ignoring", raw)
    return None

-
-
-def _get_chrome_debug_candidates(system: str) -> list[str]:
-    """Return likely browser executables for local CDP auto-launch."""
-    candidates: list[str] = []
-    seen: set[str] = set()
-
-    def _add_candidate(path: str | None) -> None:
-        if not path:
-            return
-        normalized = os.path.normcase(os.path.normpath(path))
-        if normalized in seen:
-            return
-        if os.path.isfile(path):
-            candidates.append(path)
-            seen.add(normalized)
-
-    def _add_from_path(*names: str) -> None:
-        for name in names:
-            _add_candidate(shutil.which(name))
-
-    if system == "Darwin":
-        for app in (
-            "/Applications/Google Chrome.app/Contents/MacOS/Google Chrome",
-            "/Applications/Chromium.app/Contents/MacOS/Chromium",
-            "/Applications/Brave Browser.app/Contents/MacOS/Brave Browser",
-            "/Applications/Microsoft Edge.app/Contents/MacOS/Microsoft Edge",
-        ):
-            _add_candidate(app)
-    elif system == "Windows":
-        _add_from_path(
-            "chrome.exe", "msedge.exe", "brave.exe", "chromium.exe",
-            "chrome", "msedge", "brave", "chromium",
-        )
-
-        for base in (
-            os.environ.get("ProgramFiles"),
-            os.environ.get("ProgramFiles(x86)"),
-            os.environ.get("LOCALAPPDATA"),
-        ):
-            if not base:
-                continue
-            for parts in (
-                ("Google", "Chrome", "Application", "chrome.exe"),
-                ("Chromium", "Application", "chrome.exe"),
-                ("Chromium", "Application", "chromium.exe"),
-                ("BraveSoftware", "Brave-Browser", "Application", "brave.exe"),
-                ("Microsoft", "Edge", "Application", "msedge.exe"),
-            ):
-                _add_candidate(os.path.join(base, *parts))
-    else:
-        _add_from_path(
-            "google-chrome", "google-chrome-stable", "chromium-browser",
-            "chromium", "brave-browser", "microsoft-edge",
-        )
-
-    return candidates
-
-
 def load_cli_config() -> Dict[str, Any]:
    """
    Load CLI configuration from config files.
@@ -549,18 +497,20 @@ def load_cli_config() -> Dict[str, Any]:
        "singularity_image": "TERMINAL_SINGULARITY_IMAGE",
        "modal_image": "TERMINAL_MODAL_IMAGE",
        "daytona_image": "TERMINAL_DAYTONA_IMAGE",
+        "vercel_runtime": "TERMINAL_VERCEL_RUNTIME",
        # SSH config
        "ssh_host": "TERMINAL_SSH_HOST",
        "ssh_user": "TERMINAL_SSH_USER",
        "ssh_port": "TERMINAL_SSH_PORT",
        "ssh_key": "TERMINAL_SSH_KEY",
-        # Container resource config (docker, singularity, modal, daytona -- ignored for local/ssh)
+        # Container resource config (docker, singularity, modal, daytona, vercel_sandbox -- ignored for local/ssh)
        "container_cpu": "TERMINAL_CONTAINER_CPU",
        "container_memory": "TERMINAL_CONTAINER_MEMORY",
        "container_disk": "TERMINAL_CONTAINER_DISK",
        "container_persistent": "TERMINAL_CONTAINER_PERSISTENT",
        "docker_volumes": "TERMINAL_DOCKER_VOLUMES",
        "docker_mount_cwd_to_workspace": "TERMINAL_DOCKER_MOUNT_CWD_TO_WORKSPACE",
+        "docker_run_as_host_user": "TERMINAL_DOCKER_RUN_AS_HOST_USER",
        "sandbox_dir": "TERMINAL_SANDBOX_DIR",
        # Persistent shell (non-local backends)
        "persistent_shell": "TERMINAL_PERSISTENT_SHELL",
@@ -1590,9 +1540,29 @@ def _strip_leaked_bracketed_paste_wrappers(text: str) -> str:
 # that appears when the ESC byte was stripped by a prior filter.
 _DSR_CPR_ESC_RE = re.compile(r"\x1b\[\d+;\d+R")
 _DSR_CPR_VISIBLE_RE = re.compile(r"\^\[\[\d+;\d+R")
+_SGR_MOUSE_ESC_RE = re.compile(r"\x1b\[<\d+;\d+;\d+[Mm]")
+_SGR_MOUSE_VISIBLE_RE = re.compile(r"\^\[\[<\d+;\d+;\d+[Mm]")
+# Some terminals/filters can drop ESC and literal "^[[", leaving only
+# "<btn;col;rowM" fragments in the buffer. Keep this broad on purpose:
+# these fragments are extremely unlikely to be intentional user input, and
+# stripping them is better than sending corrupted prompts.
+_SGR_MOUSE_BARE_RE = re.compile(r"<\d+;\d+;\d+[Mm]")
+_TERMINAL_INPUT_MODE_RESET_SEQ = (
+    "\x1b[?1006l"  # disable SGR mouse
+    "\x1b[?1003l"  # disable any-motion tracking
+    "\x1b[?1002l"  # disable button-motion tracking
+    "\x1b[?1000l"  # disable click tracking
+    "\x1b[?1004l"  # disable focus events
+    "\x1b[?2004l"  # disable bracketed paste
+    "\x1b[?1049l"  # leave alt screen (if stuck there)
+    "\x1b[<u"      # pop kitty keyboard mode
+    "\x1b[>4m"     # reset modifyOtherKeys
+    "\x1b[0m"      # reset text attributes
+    "\x1b[?25h"    # ensure cursor visible
+)


-def _strip_leaked_terminal_responses(text: str) -> str:
+def _strip_leaked_terminal_responses_with_meta(text: str) -> tuple[str, bool]:
    """Strip leaked terminal control-response sequences from user input.

    Covers Cursor Position Report (CPR / DSR) responses — ``ESC[<row>;<col>R``
@@ -1602,12 +1572,43 @@ def _strip_leaked_terminal_responses(text: str) -> str:
    (resize storms, multiplexer focus changes, slow PTYs) the response
    lands in the input buffer as literal text and corrupts what the user
    typed.
+
+    Also strips leaked SGR mouse-report fragments (``ESC[<...M/m`` and
+    degraded visible forms). Returns ``(cleaned_text, had_mouse_reports)``
+    so callers can trigger an in-place terminal mode recovery when needed.
    """
    if not text:
-        return text
-    text = _DSR_CPR_ESC_RE.sub("", text)
-    text = _DSR_CPR_VISIBLE_RE.sub("", text)
-    return text
+        return text, False
+
+    has_esc = "\x1b[" in text
+    has_visible = "^[" in text
+    has_bare_mouse = "<" in text and ";" in text and ("M" in text or "m" in text)
+    if not (has_esc or has_visible or has_bare_mouse):
+        return text, False
+
+    had_mouse_reports = False
+
+    if has_esc:
+        text = _DSR_CPR_ESC_RE.sub("", text)
+        text, count = _SGR_MOUSE_ESC_RE.subn("", text)
+        had_mouse_reports = had_mouse_reports or count > 0
+
+    if has_visible:
+        text = _DSR_CPR_VISIBLE_RE.sub("", text)
+        text, count = _SGR_MOUSE_VISIBLE_RE.subn("", text)
+        had_mouse_reports = had_mouse_reports or count > 0
+
+    if has_bare_mouse:
+        text, count = _SGR_MOUSE_BARE_RE.subn("", text)
+        had_mouse_reports = had_mouse_reports or count > 0
+
+    return text, had_mouse_reports
+
+
+def _strip_leaked_terminal_responses(text: str) -> str:
+    """Compatibility wrapper returning only cleaned text."""
+    cleaned, _ = _strip_leaked_terminal_responses_with_meta(text)
+    return cleaned


 def _collect_query_images(query: str | None, image_arg: str | None = None) -> tuple[str, list[Path]]:
@@ -1981,6 +1982,8 @@ class HermesCLI:
        self._stream_box_opened = False  # True once the response box header is printed
        self._reasoning_preview_buf = ""  # Coalesce tiny reasoning chunks for [thinking] output
        self._pending_edit_snapshots = {}
+        self._last_input_mode_recovery = 0.0
+        self._input_mode_recovery_notice_shown = False
        
        # Configuration - priority: CLI args > env vars > config file
        # Model comes from: CLI arg or config.yaml (single source of truth).
@@ -3157,6 +3160,8 @@ class HermesCLI:
            return "Processing skills command..."
        if cmd_lower == "/reload-mcp":
            return "Reloading MCP servers..."
+        if cmd_lower == "/reload-skills" or cmd_lower == "/reload_skills":
+            return "Reloading skills..."
        if cmd_lower.startswith("/browser"):
            return "Configuring browser..."
        return "Processing command..."
@@ -4170,6 +4175,37 @@ class HermesCLI:
        sys.stdout.write(seq)
        sys.stdout.flush()

+    def _recover_terminal_input_modes(self, *, reason: str) -> None:
+        """Best-effort reset when leaked mouse reports indicate mode drift."""
+        now = time.monotonic()
+        # Rate-limit to avoid thrashing if a terminal floods reports.
+        if now - self._last_input_mode_recovery < 0.5:
+            return
+        self._last_input_mode_recovery = now
+
+        out = getattr(self, "_app", None)
+        output = getattr(out, "output", None) if out else None
+        try:
+            if output and hasattr(output, "write_raw"):
+                output.write_raw(_TERMINAL_INPUT_MODE_RESET_SEQ)
+                output.flush()
+            elif output and hasattr(output, "write"):
+                output.write(_TERMINAL_INPUT_MODE_RESET_SEQ)
+                output.flush()
+            else:
+                sys.stdout.write(_TERMINAL_INPUT_MODE_RESET_SEQ)
+                sys.stdout.flush()
+        except Exception:
+            return
+
+        logger.warning("Recovered terminal input modes after leak: %s", reason)
+        if not self._input_mode_recovery_notice_shown:
+            self._input_mode_recovery_notice_shown = True
+            _cprint(
+                f"  {_DIM}Recovered terminal input modes after leaked mouse reports. "
+                f"If this repeats, run /new or restart this tab.{_RST}"
+            )
+
    def _handle_copy_command(self, cmd_original: str) -> None:
        """Handle /copy [number] — copy assistant output to clipboard."""
        parts = cmd_original.split(maxsplit=1)
@@ -4860,6 +4896,22 @@ class HermesCLI:
                    )
                except Exception:
                    pass
+            # Notify memory providers that session_id rotated to a fresh
+            # conversation. reset=True signals providers to flush accumulated
+            # per-session state (_session_turns, _turn_counter, _document_id).
+            # Fires BEFORE the plugin on_session_reset hook (shell hooks only
+            # see the new id; Python providers see the transition). See #6672.
+            try:
+                _mm = getattr(self.agent, "_memory_manager", None)
+                if _mm is not None:
+                    _mm.on_session_switch(
+                        self.session_id,
+                        parent_session_id=old_session_id or "",
+                        reset=True,
+                        reason="new_session",
+                    )
+            except Exception:
+                pass
            self._notify_session_boundary("on_session_reset")

        if not silent:
@@ -4912,6 +4964,7 @@ class HermesCLI:
            _cprint("  Already on that session.")
            return

+        old_session_id = self.session_id
        # End current session
        try:
            self._session_db.end_session(self.session_id, "resumed_other")
@@ -4949,6 +5002,22 @@ class HermesCLI:
            if hasattr(self.agent, "_invalidate_system_prompt"):
                self.agent._invalidate_system_prompt()

+            # Notify memory providers that session_id rotated to a resumed
+            # session. reset=False — the provider's accumulated state is
+            # still valid; it just needs to target the new session_id for
+            # subsequent writes. See #6672.
+            try:
+                _mm = getattr(self.agent, "_memory_manager", None)
+                if _mm is not None:
+                    _mm.on_session_switch(
+                        target_id,
+                        parent_session_id=old_session_id or "",
+                        reset=False,
+                        reason="resume",
+                    )
+            except Exception:
+                pass
+
        title_part = f" \"{session_meta['title']}\"" if session_meta.get("title") else ""
        msg_count = len([m for m in self.conversation_history if m.get("role") == "user"])
        if self.conversation_history:
@@ -5069,6 +5138,22 @@ class HermesCLI:
            if hasattr(self.agent, "_invalidate_system_prompt"):
                self.agent._invalidate_system_prompt()

+            # Notify memory providers that session_id forked to a new branch.
+            # reset=False — the branched session carries the transcript
+            # forward, so provider state tracks the lineage. parent_session_id
+            # links the branch back to the original. See #6672.
+            try:
+                _mm = getattr(self.agent, "_memory_manager", None)
+                if _mm is not None:
+                    _mm.on_session_switch(
+                        new_session_id,
+                        parent_session_id=parent_session_id or "",
+                        reset=False,
+                        reason="branch",
+                    )
+            except Exception:
+                pass
+
        msg_count = len([m for m in self.conversation_history if m.get("role") == "user"])
        _cprint(
            f"  ⑂ Branched session \"{branch_title}\""
@@ -5327,6 +5412,7 @@ class HermesCLI:
                base_url=result.base_url or self.base_url or "",
                api_key=result.api_key or self.api_key or "",
                model_info=mi,
+                config_context_length=getattr(self.agent, "_config_context_length", None) if self.agent else None,
            )
            if ctx:
                _cprint(f"    Context: {ctx:,} tokens")
@@ -5457,6 +5543,8 @@ class HermesCLI:
            try:
                providers = list_authenticated_providers(
                    current_provider=self.provider or "",
+                    current_base_url=self.base_url or "",
+                    current_model=self.model or "",
                    user_providers=user_provs,
                    custom_providers=custom_provs,
                    max_models=50,
@@ -5551,6 +5639,7 @@ class HermesCLI:
            base_url=result.base_url or self.base_url or "",
            api_key=result.api_key or self.api_key or "",
            model_info=mi,
+            config_context_length=getattr(self.agent, "_config_context_length", None) if self.agent else None,
        )
        if ctx:
            _cprint(f"    Context: {ctx:,} tokens")
@@ -5975,7 +6064,29 @@ class HermesCLI:

        print(f"(._.) Unknown cron command: {subcommand}")
        print("  Available: list, add, edit, pause, resume, run, remove")
-    
+
+    def _handle_curator_command(self, cmd: str):
+        """Handle /curator slash command.
+
+        Delegates to hermes_cli.curator so the CLI and the `hermes curator`
+        subcommand share the same handler set.
+        """
+        import shlex
+
+        tokens = shlex.split(cmd)[1:] if cmd else []
+        if not tokens:
+            tokens = ["status"]
+
+        try:
+            from hermes_cli.curator import cli_main
+            cli_main(tokens)
+        except SystemExit:
+            # argparse calls sys.exit() on --help or errors; swallow so we
+            # don't kill the interactive session.
+            pass
+        except Exception as exc:
+            print(f"(._.) curator: {exc}")
+
    def _handle_skills_command(self, cmd: str):
        """Handle /skills slash command — delegates to hermes_cli.skills_hub."""
        from hermes_cli.skills_hub import handle_skills_slash
@@ -6219,6 +6330,8 @@ class HermesCLI:
            self.save_conversation()
        elif canonical == "cron":
            self._handle_cron_command(cmd_original)
+        elif canonical == "curator":
+            self._handle_curator_command(cmd_original)
        elif canonical == "skills":
            with self._busy_command(self._slow_command_status(cmd_original)):
                self._handle_skills_command(cmd_original)
@@ -6232,6 +6345,8 @@ class HermesCLI:
            self._console_print(f"  Status bar {state}")
        elif canonical == "verbose":
            self._toggle_verbose()
+        elif canonical == "footer":
+            self._handle_footer_command(cmd_original)
        elif canonical == "yolo":
            self._toggle_yolo()
        elif canonical == "reasoning":
@@ -6257,8 +6372,13 @@ class HermesCLI:
            count = reload_env()
            print(f"  Reloaded .env ({count} var(s) updated)")
        elif canonical == "reload-mcp":
+            # Interactive reload: confirm first (unless the user has opted out).
+            # The auto-reload path (file watcher) calls _reload_mcp directly
+            # without this confirmation.
+            self._confirm_and_reload_mcp(cmd_original)
+        elif canonical == "reload-skills":
            with self._busy_command(self._slow_command_status(cmd_original)):
-                self._reload_mcp()
+                self._reload_skills()
        elif canonical == "browser":
            self._handle_browser_command(cmd_original)
        elif canonical == "plugins":
@@ -6600,34 +6720,7 @@ class HermesCLI:

        Returns True if a launch command was executed (doesn't guarantee success).
        """
-        import subprocess as _sp
-
-        candidates = _get_chrome_debug_candidates(system)
-
-        if not candidates:
-            return False
-
-        # Dedicated profile dir so debug Chrome won't collide with normal Chrome
-        data_dir = str(_hermes_home / "chrome-debug")
-        os.makedirs(data_dir, exist_ok=True)
-
-        chrome = candidates[0]
-        try:
-            _sp.Popen(
-                [
-                    chrome,
-                    f"--remote-debugging-port={port}",
-                    f"--user-data-dir={data_dir}",
-                    "--no-first-run",
-                    "--no-default-browser-check",
-                ],
-                stdout=_sp.DEVNULL,
-                stderr=_sp.DEVNULL,
-                start_new_session=True,  # detach from terminal
-            )
-            return True
-        except Exception:
-            return False
+        return try_launch_chrome_debug(port, system)

    def _handle_browser_command(self, cmd: str):
        """Handle /browser connect|disconnect|status — manage live Chrome CDP connection."""
@@ -6636,13 +6729,44 @@ class HermesCLI:
        parts = cmd.strip().split(None, 1)
        sub = parts[1].lower().strip() if len(parts) > 1 else "status"

-        _DEFAULT_CDP = "http://127.0.0.1:9222"
+        _DEFAULT_CDP = DEFAULT_BROWSER_CDP_URL
        current = os.environ.get("BROWSER_CDP_URL", "").strip()

        if sub.startswith("connect"):
            # Optionally accept a custom CDP URL: /browser connect ws://host:port
            connect_parts = cmd.strip().split(None, 2)  # ["/browser", "connect", "ws://..."]
            cdp_url = connect_parts[2].strip() if len(connect_parts) > 2 else _DEFAULT_CDP
+            parsed_cdp = urlparse(cdp_url if "://" in cdp_url else f"http://{cdp_url}")
+            if parsed_cdp.scheme not in {"http", "https", "ws", "wss"}:
+                print()
+                print(
+                    f"   ⚠ Unsupported browser url scheme: {parsed_cdp.scheme or '(missing)'} "
+                    "(expected one of: http, https, ws, wss)"
+                )
+                print()
+                return
+            try:
+                _port = parsed_cdp.port or (443 if parsed_cdp.scheme in {"https", "wss"} else 80)
+            except ValueError:
+                print()
+                print(f"   ⚠ Invalid port in browser url: {cdp_url}")
+                print()
+                return
+            if not parsed_cdp.hostname:
+                print()
+                print(f"   ⚠ Missing host in browser url: {cdp_url}")
+                print()
+                return
+            _host = parsed_cdp.hostname
+            if parsed_cdp.path.startswith("/devtools/browser/"):
+                cdp_url = parsed_cdp.geturl()
+            else:
+                cdp_url = parsed_cdp._replace(
+                    path="",
+                    params="",
+                    query="",
+                    fragment="",
+                ).geturl()

            # Clear any existing browser sessions so the next tool call uses the new backend
            try:
@@ -6653,20 +6777,13 @@ class HermesCLI:

            print()

-            # Extract port for connectivity checks
-            _port = 9222
-            try:
-                _port = int(cdp_url.rsplit(":", 1)[-1].split("/")[0])
-            except (ValueError, IndexError):
-                pass
-
            # Check if Chrome is already listening on the debug port
            import socket
            _already_open = False
            try:
                s = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
                s.settimeout(1)
-                s.connect(("127.0.0.1", _port))
+                s.connect((_host, _port))
                s.close()
                _already_open = True
            except (OSError, socket.timeout):
@@ -6684,7 +6801,7 @@ class HermesCLI:
                        try:
                            s = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
                            s.settimeout(1)
-                            s.connect(("127.0.0.1", _port))
+                            s.connect((_host, _port))
                            s.close()
                            _already_open = True
                            break
@@ -6697,33 +6814,22 @@ class HermesCLI:
                        print("     Try again in a few seconds — the debug instance may still be starting")
                else:
                    print("   ⚠ Could not auto-launch Chrome")
-                    # Show manual instructions as fallback
-                    _data_dir = str(_hermes_home / "chrome-debug")
                    sys_name = _plat.system()
-                    if sys_name == "Darwin":
-                        chrome_cmd = (
-                            'open -a "Google Chrome" --args'
-                            f" --remote-debugging-port=9222"
-                            f' --user-data-dir="{_data_dir}"'
-                            " --no-first-run --no-default-browser-check"
-                        )
-                    elif sys_name == "Windows":
-                        chrome_cmd = (
-                            f'chrome.exe --remote-debugging-port=9222'
-                            f' --user-data-dir="{_data_dir}"'
-                            f" --no-first-run --no-default-browser-check"
-                        )
+                    chrome_cmd = manual_chrome_debug_command(_port, sys_name)
+                    if chrome_cmd:
+                        print(f"     Launch Chrome manually:")
+                        print(f"     {chrome_cmd}")
                    else:
-                        chrome_cmd = (
-                            f"google-chrome --remote-debugging-port=9222"
-                            f' --user-data-dir="{_data_dir}"'
-                            f" --no-first-run --no-default-browser-check"
-                        )
-                    print(f"     Launch Chrome manually:")
-                    print(f"     {chrome_cmd}")
+                        print("     No Chrome/Chromium executable found in this environment")
            else:
                print(f"   ⚠ Port {_port} is not reachable at {cdp_url}")

+            if not _already_open:
+                print()
+                print("Browser not connected — start Chrome with remote debugging and retry /browser connect")
+                print()
+                return
+
            os.environ["BROWSER_CDP_URL"] = cdp_url
            # Eagerly start the CDP supervisor so pending_dialogs + frame_tree
            # show up in the next browser_snapshot.  No-op if already started.
@@ -6859,6 +6965,58 @@ class HermesCLI:
        if self._apply_tui_skin_style():
            print("  Prompt + TUI colors updated.")

+    def _handle_footer_command(self, cmd_original: str) -> None:
+        """Toggle or inspect ``display.runtime_footer.enabled`` from the CLI.
+
+        Usage:
+            /footer           → toggle
+            /footer on|off    → explicit
+            /footer status    → show current state
+        """
+        from hermes_cli.config import load_config
+        from hermes_cli.colors import Colors as _Colors
+
+        # Parse arg
+        arg = ""
+        try:
+            parts = (cmd_original or "").strip().split(None, 1)
+            if len(parts) > 1:
+                arg = parts[1].strip().lower()
+        except Exception:
+            arg = ""
+
+        cfg = load_config() or {}
+        footer_cfg = ((cfg.get("display") or {}).get("runtime_footer") or {})
+        current = bool(footer_cfg.get("enabled", False))
+        fields = footer_cfg.get("fields") or ["model", "context_pct", "cwd"]
+
+        if arg in ("status", "?"):
+            state = "ON" if current else "OFF"
+            _cprint(
+                f"  {_Colors.BOLD}Runtime footer:{_Colors.RESET} {state}\n"
+                f"  Fields: {', '.join(fields)}"
+            )
+            return
+
+        if arg in ("on", "enable", "true", "1"):
+            new_state = True
+        elif arg in ("off", "disable", "false", "0"):
+            new_state = False
+        elif arg == "":
+            new_state = not current
+        else:
+            _cprint("  Usage: /footer [on|off|status]")
+            return
+
+        if save_config_value("display.runtime_footer.enabled", new_state):
+            state = (
+                f"{_Colors.GREEN}ON{_Colors.RESET}" if new_state
+                else f"{_Colors.DIM}OFF{_Colors.RESET}"
+            )
+            _cprint(f"  Runtime footer: {state}")
+        else:
+            _cprint("  Failed to save runtime_footer setting to config.yaml")
+
    def _toggle_verbose(self):
        """Cycle tool progress mode: off → new → all → verbose → off."""
        cycle = ["off", "new", "all", "verbose"]
@@ -7099,9 +7257,15 @@ class HermesCLI:
                else:
                    print(f"🗜️  Compressing {original_count} messages (~{approx_tokens:,} tokens)...")

+                # Pass None as system_message so _compress_context rebuilds
+                # the system prompt from scratch via _build_system_prompt(None).
+                # Passing _cached_system_prompt caused duplication because
+                # _build_system_prompt appends system_message to prompt_parts
+                # which already contain the agent identity — resulting in the
+                # identity block appearing twice (issue #15281).
                compressed, _ = self.agent._compress_context(
                    original_history,
-                    self.agent._cached_system_prompt or "",
+                    None,
                    approx_tokens=approx_tokens,
                    focus_topic=focus_topic or None,
                )
@@ -7225,6 +7389,8 @@ class HermesCLI:
        provider = getattr(agent, "provider", None) or getattr(self, "provider", None)
        base_url = getattr(agent, "base_url", None) or getattr(self, "base_url", None)
        api_key = getattr(agent, "api_key", None) or getattr(self, "api_key", None)
+        # Lazy import — pulls the OpenAI SDK chain, only needed here.
+        from agent.account_usage import fetch_account_usage, render_account_usage_lines
        account_snapshot = None
        if provider:
            with concurrent.futures.ThreadPoolExecutor(max_workers=1) as _pool:
@@ -7339,6 +7505,77 @@ class HermesCLI:
        if _reload_thread.is_alive():
            print("  ⚠️  MCP reload timed out (30s). Some servers may not have reconnected.")

+    def _confirm_and_reload_mcp(self, cmd_original: str = "") -> None:
+        """Interactive /reload-mcp — confirm with the user, then reload.
+
+        Reloading MCP tools invalidates the provider prompt cache for the
+        active session (tool schemas are baked into the system prompt).
+        The next message re-sends full input tokens — can be expensive on
+        long-context or high-reasoning models.
+
+        Three options: Approve Once, Always Approve (persists
+        ``approvals.mcp_reload_confirm: false`` so future reloads run
+        without this prompt), Cancel.  Gated by
+        ``approvals.mcp_reload_confirm`` — default on.
+        """
+        # Gate check — respects prior "Always Approve" clicks.
+        try:
+            cfg = load_cli_config()
+            approvals = cfg.get("approvals") if isinstance(cfg, dict) else None
+            confirm_required = True
+            if isinstance(approvals, dict):
+                confirm_required = bool(approvals.get("mcp_reload_confirm", True))
+        except Exception:
+            confirm_required = True
+
+        if not confirm_required:
+            with self._busy_command(self._slow_command_status(cmd_original)):
+                self._reload_mcp()
+            return
+
+        # Render warning + prompt.  Use a single-line prompt so the user
+        # sees the warning as output and types a response into the composer.
+        print()
+        print("⚠️  /reload-mcp — Prompt cache invalidation warning")
+        print()
+        print("  Reloading MCP servers rebuilds the tool set for this session and")
+        print("  invalidates the provider prompt cache.  The next message will")
+        print("  re-send full input tokens (can be expensive on long-context or")
+        print("  high-reasoning models).")
+        print()
+        print("  [1] Approve Once   — reload now")
+        print("  [2] Always Approve — reload now and silence this prompt permanently")
+        print("  [3] Cancel         — leave MCP tools unchanged")
+        print()
+        raw = self._prompt_text_input("Choice [1/2/3]: ")
+        if raw is None:
+            print("🟡 /reload-mcp cancelled (no input).")
+            return
+        choice_raw = raw.strip().lower()
+        if choice_raw in ("1", "once", "approve", "yes", "y", "ok"):
+            choice = "once"
+        elif choice_raw in ("2", "always", "remember"):
+            choice = "always"
+        elif choice_raw in ("3", "cancel", "nevermind", "no", "n", ""):
+            choice = "cancel"
+        else:
+            print(f"🟡 Unrecognized choice '{raw}'. /reload-mcp cancelled.")
+            return
+
+        if choice == "cancel":
+            print("🟡 /reload-mcp cancelled. MCP tools unchanged.")
+            return
+
+        if choice == "always":
+            if save_config_value("approvals.mcp_reload_confirm", False):
+                print("🔒 Future /reload-mcp calls will run without confirmation.")
+                print("   Re-enable via `approvals.mcp_reload_confirm: true` in config.yaml.")
+            else:
+                print("⚠️  Couldn't persist opt-out — reloading once.")
+
+        with self._busy_command(self._slow_command_status(cmd_original)):
+            self._reload_mcp()
+
    def _reload_mcp(self):
        """Reload MCP servers: disconnect all, re-read config.yaml, reconnect.

@@ -7424,6 +7661,78 @@ class HermesCLI:
        except Exception as e:
            print(f"  ❌ MCP reload failed: {e}")

+    def _reload_skills(self) -> None:
+        """Reload skills: rescan ~/.hermes/skills/ and queue a note for the
+        next user turn.
+
+        Skills don't need to live in the system prompt for the model to use
+        them (they're invoked via ``/skill-name``, ``skills_list``, or
+        ``skill_view`` at runtime), so this does NOT clear the prompt cache.
+        It rescans the slash-command map, prints the diff for the user, and
+        — if any skills were added or removed — queues a one-shot note that
+        gets prepended to the next user message. This preserves message
+        alternation (no phantom user turn injected out of band) and keeps
+        prompt caching intact.
+        """
+        try:
+            from agent.skill_commands import reload_skills
+
+            if not self._command_running:
+                print("🔄 Reloading skills...")
+
+            result = reload_skills()
+            added = result.get("added", [])      # [{"name", "description"}, ...]
+            removed = result.get("removed", [])  # [{"name", "description"}, ...]
+            total = result.get("total", 0)
+
+            if not added and not removed:
+                print("  No new skills detected.")
+                print(f"  📚 {total} skill(s) available")
+                return
+
+            def _fmt_line(item: dict) -> str:
+                nm = item.get("name", "")
+                desc = item.get("description", "")
+                return f"    - {nm}: {desc}" if desc else f"    - {nm}"
+
+            if added:
+                print("  ➕ Added Skills:")
+                for item in added:
+                    print(f"  {_fmt_line(item)}")
+            if removed:
+                print("  ➖ Removed Skills:")
+                for item in removed:
+                    print(f"  {_fmt_line(item)}")
+            print(f"  📚 {total} skill(s) available")
+
+            # Queue a one-shot note for the NEXT user turn. The CLI's agent
+            # loop prepends ``_pending_skills_reload_note`` (if set) to the
+            # API-call-local message at ~L8770, then clears it — same
+            # pattern as ``_pending_model_switch_note``. Nothing is written
+            # to conversation_history here, so message alternation stays
+            # intact and no out-of-band user turn is persisted.
+            #
+            # Format matches how the system prompt renders pre-existing
+            # skills (``    - name: description``) so the model reads the
+            # diff in the same shape as its original skill catalog.
+            sections = ["[USER INITIATED SKILLS RELOAD:"]
+            if added:
+                sections.append("")
+                sections.append("Added Skills:")
+                for item in added:
+                    sections.append(_fmt_line(item))
+            if removed:
+                sections.append("")
+                sections.append("Removed Skills:")
+                for item in removed:
+                    sections.append(_fmt_line(item))
+            sections.append("")
+            sections.append("Use skills_list to see the updated catalog.]")
+            self._pending_skills_reload_note = "\n".join(sections)
+
+        except Exception as e:
+            print(f"  ❌ Skills reload failed: {e}")
+
    # ====================================================================
    # Tool-call generation indicator (shown during streaming)
    # ====================================================================
@@ -8505,7 +8814,8 @@ class HermesCLI:
                from agent.context_references import preprocess_context_references
                from agent.model_metadata import get_model_context_length
                _ctx_len = get_model_context_length(
-                    self.model, base_url=self.base_url or "", api_key=self.api_key or "")
+                    self.model, base_url=self.base_url or "", api_key=self.api_key or "",
+                    config_context_length=getattr(self.agent, "_config_context_length", None) if self.agent else None)
                _ctx_result = preprocess_context_references(
                    message, cwd=os.getcwd(), context_length=_ctx_len)
                if _ctx_result.expanded or _ctx_result.blocked:
@@ -8632,6 +8942,13 @@ class HermesCLI:
                if _msn:
                    agent_message = _msn + "\n\n" + agent_message
                    self._pending_model_switch_note = None
+                # Prepend pending /reload-skills note so the model sees which
+                # skills were added/removed before handling this turn. Same
+                # one-shot queue pattern as the model-switch note above.
+                _srn = getattr(self, '_pending_skills_reload_note', None)
+                if _srn:
+                    agent_message = _srn + "\n\n" + agent_message
+                    self._pending_skills_reload_note = None
                try:
                    result = self.agent.run_conversation(
                        user_message=agent_message,
@@ -8814,6 +9131,13 @@ class HermesCLI:
                        response,
                        self.conversation_history,
                        failure_callback=_title_failure_cb,
+                        main_runtime={
+                            "model": self.model,
+                            "provider": self.provider,
+                            "base_url": self.base_url,
+                            "api_key": self.api_key,
+                            "api_mode": self.api_mode,
+                        },
                    )
                except Exception:
                    pass
@@ -9271,6 +9595,21 @@ class HermesCLI:
            self._console_print(f"[dim {_tip_color}]✦ Tip: {_tip}[/]")
        except Exception:
            pass  # Tips are non-critical — never break startup
+
+        # Curator — kick off a background skill-maintenance pass on startup
+        # if the schedule says we're due.  Runs in a daemon thread so it
+        # never blocks the interactive loop.  Best-effort; any failure is
+        # swallowed to avoid breaking session startup.
+        try:
+            from agent.curator import maybe_run_curator
+            maybe_run_curator(
+                idle_for_seconds=float("inf"),  # CLI startup = fully idle
+                on_summary=lambda msg: self._console_print(
+                    f"[dim #6b7684]💾 {msg}[/]"
+                ),
+            )
+        except Exception:
+            pass
        if self.preloaded_skills and not self._startup_skills_line_shown:
            skills_label = ", ".join(self.preloaded_skills)
            self._console_print(
@@ -9940,7 +10279,9 @@ class HermesCLI:
            # so the 5-line collapse threshold and display are consistent.
            pasted_text = pasted_text.replace('\r\n', '\n').replace('\r', '\n')
            pasted_text = _strip_leaked_bracketed_paste_wrappers(pasted_text)
-            pasted_text = _strip_leaked_terminal_responses(pasted_text)
+            pasted_text, _had_mouse_reports = _strip_leaked_terminal_responses_with_meta(pasted_text)
+            if _had_mouse_reports:
+                self._recover_terminal_input_modes(reason="mouse reports leaked into bracketed paste payload")
            if _should_auto_attach_clipboard_image_on_paste(pasted_text) and self._try_attach_clipboard_image():
                event.app.invalidate()
            if pasted_text:
@@ -10094,7 +10435,9 @@ class HermesCLI:
               event so it never triggers this.
            """
            text = _strip_leaked_bracketed_paste_wrappers(buf.text)
-            text = _strip_leaked_terminal_responses(text)
+            text, _had_mouse_reports = _strip_leaked_terminal_responses_with_meta(text)
+            if _had_mouse_reports:
+                self._recover_terminal_input_modes(reason="mouse reports leaked into prompt buffer")
            if text != buf.text:
                cursor = min(buf.cursor_position, len(text))
                _paste_just_collapsed[0] = True
@@ -10847,7 +11190,9 @@ class HermesCLI:

                    if isinstance(user_input, str):
                        user_input = _strip_leaked_bracketed_paste_wrappers(user_input)
-                        user_input = _strip_leaked_terminal_responses(user_input)
+                        user_input, _had_mouse_reports = _strip_leaked_terminal_responses_with_meta(user_input)
+                        if _had_mouse_reports:
+                            self._recover_terminal_input_modes(reason="mouse reports leaked into submitted input")
                    
                    # Check for commands — but detect dragged/pasted file paths first.
                    # See _detect_file_drop() for details.
@@ -21,6 +21,7 @@ from typing import Optional, Dict, List, Any, Union
 logger = logging.getLogger(__name__)

 from hermes_time import now as _hermes_now
+from utils import atomic_replace

 try:
    from croniter import croniter
@@ -312,13 +313,21 @@ def compute_next_run(schedule: Dict[str, Any], last_run_at: Optional[str] = None
    elif schedule["kind"] == "cron":
        if not HAS_CRONITER:
            logger.warning(
-                "Cannot compute next run for cron schedule %r: 'croniter' "
-                "is not installed. Install the 'cron' extra (pip install "
-                "'hermes-agent[cron]') to re-enable recurring cron jobs.",
+                "Cannot compute next run for cron schedule %r: 'croniter' is "
+                "not installed. croniter is a core dependency as of v0.9.x; "
+                "reinstall hermes-agent or run 'pip install croniter' in your "
+                "runtime env.",
                schedule.get("expr"),
            )
            return None
-        cron = croniter(schedule["expr"], now)
+        # Use last_run_at as the croniter base when available, consistent
+        # with interval jobs.  This ensures that after a crash/restart,
+        # the next run is anchored to the actual last execution time
+        # rather than to an arbitrary restart time.
+        base_time = now
+        if last_run_at:
+            base_time = _ensure_aware(datetime.fromisoformat(last_run_at))
+        cron = croniter(schedule["expr"], base_time)
        next_run = cron.get_next(datetime)
        return next_run.isoformat()

@@ -367,7 +376,7 @@ def save_jobs(jobs: List[Dict[str, Any]]):
            json.dump({"jobs": jobs, "updated_at": _hermes_now().isoformat()}, f, indent=2)
            f.flush()
            os.fsync(f.fileno())
-        os.replace(tmp_path, JOBS_FILE)
+        atomic_replace(tmp_path, JOBS_FILE)
        _secure_file(JOBS_FILE)
    except BaseException:
        try:
@@ -863,7 +872,7 @@ def save_job_output(job_id: str, output: str):
            f.write(output)
            f.flush()
            os.fsync(f.fileno())
-        os.replace(tmp_path, output_file)
+        atomic_replace(tmp_path, output_file)
        _secure_file(output_file)
    except BaseException:
        try:
@@ -198,7 +198,9 @@ def _resolve_single_delivery_target(job: dict, deliver_value: str) -> Optional[d
            if resolved:
                parsed_chat_id, parsed_thread_id, resolved_is_explicit = _parse_target_ref(platform_key, resolved)
                if resolved_is_explicit:
-                    chat_id, thread_id = parsed_chat_id, parsed_thread_id
+                    chat_id = parsed_chat_id
+                    if parsed_thread_id is not None:
+                        thread_id = parsed_thread_id
                else:
                    chat_id = resolved
        except Exception:
@@ -231,12 +233,32 @@ def _resolve_single_delivery_target(job: dict, deliver_value: str) -> Optional[d
    }


+def _normalize_deliver_value(deliver) -> str:
+    """Normalize a stored/submitted ``deliver`` value to its canonical string form.
+
+    The contract is that ``deliver`` is a string (``"local"``, ``"origin"``,
+    ``"telegram"``, ``"telegram:-1001:17"``, or comma-separated combinations).
+    Historically some callers — MCP clients passing an array, direct edits of
+    ``jobs.json``, or stale code paths — have stored a list/tuple like
+    ``["telegram"]``.  ``str(["telegram"])`` would serialize to the literal
+    string ``"['telegram']"``, which is not a known platform and fails
+    resolution silently.  Flatten lists/tuples into a comma-separated string
+    so both forms work.  Returns ``"local"`` for anything falsy.
+    """
+    if deliver is None or deliver == "":
+        return "local"
+    if isinstance(deliver, (list, tuple)):
+        parts = [str(p).strip() for p in deliver if str(p).strip()]
+        return ",".join(parts) if parts else "local"
+    return str(deliver)
+
+
 def _resolve_delivery_targets(job: dict) -> List[dict]:
    """Resolve all concrete auto-delivery targets for a cron job (supports comma-separated deliver)."""
-    deliver = job.get("deliver", "local")
+    deliver = _normalize_deliver_value(job.get("deliver", "local"))
    if deliver == "local":
        return []
-    parts = [p.strip() for p in str(deliver).split(",") if p.strip()]
+    parts = [p.strip() for p in deliver.split(",") if p.strip()]
    seen = set()
    targets = []
    for part in parts:
@@ -255,13 +277,21 @@ def _resolve_delivery_target(job: dict) -> Optional[dict]:
    return targets[0] if targets else None


-# Media extension sets — keep in sync with gateway/platforms/base.py:_process_message_background
-_AUDIO_EXTS = frozenset({'.ogg', '.opus', '.mp3', '.wav', '.m4a'})
+# Media extension sets — audio routing is centralized in gateway.platforms.base
+# via should_send_media_as_audio() so Telegram-specific rules stay in one place.
 _VIDEO_EXTS = frozenset({'.mp4', '.mov', '.avi', '.mkv', '.webm', '.3gp'})
 _IMAGE_EXTS = frozenset({'.jpg', '.jpeg', '.png', '.webp', '.gif'})


-def _send_media_via_adapter(adapter, chat_id: str, media_files: list, metadata: dict | None, loop, job: dict) -> None:
+def _send_media_via_adapter(
+    adapter,
+    chat_id: str,
+    media_files: list,
+    metadata: dict | None,
+    loop,
+    job: dict,
+    platform=None,
+) -> None:
    """Send extracted MEDIA files as native platform attachments via a live adapter.

    Routes each file to the appropriate adapter method (send_voice, send_image_file,
@@ -270,10 +300,13 @@ def _send_media_via_adapter(adapter, chat_id: str, media_files: list, metadata:
    """
    from pathlib import Path

+    from gateway.platforms.base import should_send_media_as_audio
+
    for media_path, _is_voice in media_files:
        try:
            ext = Path(media_path).suffix.lower()
-            if ext in _AUDIO_EXTS:
+            route_platform = platform if platform is not None else getattr(adapter, "platform", None)
+            if should_send_media_as_audio(route_platform, ext, is_voice=_is_voice):
                coro = adapter.send_voice(chat_id=chat_id, audio_path=media_path, metadata=metadata)
            elif ext in _VIDEO_EXTS:
                coro = adapter.send_video(chat_id=chat_id, video_path=media_path, metadata=metadata)
@@ -319,27 +352,6 @@ def _deliver_result(job: dict, content: str, adapters=None, loop=None) -> Option
    from tools.send_message_tool import _send_to_platform
    from gateway.config import load_gateway_config, Platform

-    platform_map = {
-        "telegram": Platform.TELEGRAM,
-        "discord": Platform.DISCORD,
-        "slack": Platform.SLACK,
-        "whatsapp": Platform.WHATSAPP,
-        "signal": Platform.SIGNAL,
-        "matrix": Platform.MATRIX,
-        "mattermost": Platform.MATTERMOST,
-        "homeassistant": Platform.HOMEASSISTANT,
-        "dingtalk": Platform.DINGTALK,
-        "feishu": Platform.FEISHU,
-        "wecom": Platform.WECOM,
-        "wecom_callback": Platform.WECOM_CALLBACK,
-        "weixin": Platform.WEIXIN,
-        "email": Platform.EMAIL,
-        "sms": Platform.SMS,
-        "bluebubbles": Platform.BLUEBUBBLES,
-        "qqbot": Platform.QQBOT,
-        "yuanbao": Platform.YUANBAO,
-    }
-
    # Optionally wrap the content with a header/footer so the user knows this
    # is a cron delivery.  Wrapping is on by default; set cron.wrap_response: false
    # in config.yaml for clean output.
@@ -396,13 +408,23 @@ def _deliver_result(job: dict, content: str, adapters=None, loop=None) -> Option
                job["id"], platform_name, chat_id, thread_id,
            )

-        platform = platform_map.get(platform_name.lower())
-        if not platform:
+        # Built-in names resolve to their enum member; plugin platform names
+        # create dynamic members via Platform._missing_().
+        try:
+            platform = Platform(platform_name.lower())
+        except (ValueError, KeyError):
            msg = f"unknown platform '{platform_name}'"
            logger.warning("Job '%s': %s", job["id"], msg)
            delivery_errors.append(msg)
            continue

+        pconfig = config.platforms.get(platform)
+        if not pconfig or not pconfig.enabled:
+            msg = f"platform '{platform_name}' not configured/enabled"
+            logger.warning("Job '%s': %s", job["id"], msg)
+            delivery_errors.append(msg)
+            continue
+
        # Prefer the live adapter when the gateway is running — this supports E2EE
        # rooms (e.g. Matrix) where the standalone HTTP path cannot encrypt.
        runtime_adapter = (adapters or {}).get(platform)
@@ -433,7 +455,15 @@ def _deliver_result(job: dict, content: str, adapters=None, loop=None) -> Option

                # Send extracted media files as native attachments via the live adapter
                if adapter_ok and media_files:
-                    _send_media_via_adapter(runtime_adapter, chat_id, media_files, send_metadata, loop, job)
+                    _send_media_via_adapter(
+                        runtime_adapter,
+                        chat_id,
+                        media_files,
+                        send_metadata,
+                        loop,
+                        job,
+                        platform=platform,
+                    )

                if adapter_ok:
                    logger.info("Job '%s': delivered to %s:%s via live adapter", job["id"], platform_name, chat_id)
@@ -445,13 +475,6 @@ def _deliver_result(job: dict, content: str, adapters=None, loop=None) -> Option
                )

        if not delivered:
-            pconfig = config.platforms.get(platform)
-            if not pconfig or not pconfig.enabled:
-                msg = f"platform '{platform_name}' not configured/enabled"
-                logger.warning("Job '%s': %s", job["id"], msg)
-                delivery_errors.append(msg)
-                continue
-
            # Standalone path: run the async send in a fresh event loop (safe from any thread)
            coro = _send_to_platform(platform, pconfig, chat_id, cleaned_delivery_content, thread_id=thread_id, media_files=media_files)
            try:
@@ -838,6 +861,13 @@ def run_job(job: dict) -> tuple[bool, str, str, Optional[str]]:
        chat_id=str(origin["chat_id"]) if origin else "",
        chat_name=origin.get("chat_name", "") if origin else "",
    )
+    _cron_delivery_vars = (
+        "HERMES_CRON_AUTO_DELIVER_PLATFORM",
+        "HERMES_CRON_AUTO_DELIVER_CHAT_ID",
+        "HERMES_CRON_AUTO_DELIVER_THREAD_ID",
+    )
+    for _var_name in _cron_delivery_vars:
+        _VAR_MAP[_var_name].set("")

    # Per-job working directory.  When set (and validated at create/update
    # time), we point TERMINAL_CWD at it so:
@@ -876,8 +906,11 @@ def run_job(job: dict) -> tuple[bool, str, str, Optional[str]]:
        if delivery_target:
            _VAR_MAP["HERMES_CRON_AUTO_DELIVER_PLATFORM"].set(delivery_target["platform"])
            _VAR_MAP["HERMES_CRON_AUTO_DELIVER_CHAT_ID"].set(str(delivery_target["chat_id"]))
-            if delivery_target.get("thread_id") is not None:
-                _VAR_MAP["HERMES_CRON_AUTO_DELIVER_THREAD_ID"].set(str(delivery_target["thread_id"]))
+            _VAR_MAP["HERMES_CRON_AUTO_DELIVER_THREAD_ID"].set(
+                ""
+                if delivery_target.get("thread_id") is None
+                else str(delivery_target["thread_id"])
+            )

        model = job.get("model") or os.getenv("HERMES_MODEL") or ""

@@ -1011,10 +1044,12 @@ def run_job(job: dict) -> tuple[bool, str, str, Optional[str]]:
            enabled_toolsets=_resolve_cron_enabled_toolsets(job, _cfg),
            disabled_toolsets=["cronjob", "messaging", "clarify"],
            quiet_mode=True,
-            # When a workdir is configured, inject AGENTS.md / CLAUDE.md /
-            # .cursorrules from that directory; otherwise preserve the old
-            # behaviour (don't inject SOUL.md/AGENTS.md from the scheduler cwd).
+            # Cron jobs should always inherit the user's SOUL.md identity from
+            # HERMES_HOME. When a workdir is configured, also inject project
+            # context files (AGENTS.md / CLAUDE.md / .cursorrules) from there.
+            # Without a workdir, keep cwd context discovery disabled.
            skip_context_files=not bool(_job_workdir),
+            load_soul_identity=True,
            skip_memory=True,  # Cron system prompts would corrupt user representations
            platform="cron",
            session_id=_cron_session_id,
@@ -1029,7 +1064,18 @@ def run_job(job: dict) -> tuple[bool, str, str, Optional[str]]:
        #
        # Uses the agent's built-in activity tracker (updated by
        # _touch_activity() on every tool call, API call, and stream delta).
-        _cron_timeout = float(os.getenv("HERMES_CRON_TIMEOUT", 600))
+        _raw_cron_timeout = os.getenv("HERMES_CRON_TIMEOUT", "").strip()
+        if _raw_cron_timeout:
+            try:
+                _cron_timeout = float(_raw_cron_timeout)
+            except (ValueError, TypeError):
+                logger.warning(
+                    "Invalid HERMES_CRON_TIMEOUT=%r; using default 600s",
+                    _raw_cron_timeout,
+                )
+                _cron_timeout = 600.0
+        else:
+            _cron_timeout = 600.0
        _cron_inactivity_limit = _cron_timeout if _cron_timeout > 0 else None
        _POLL_INTERVAL = 5.0
        _cron_pool = concurrent.futures.ThreadPoolExecutor(max_workers=1)
@@ -1104,6 +1150,21 @@ def run_job(job: dict) -> tuple[bool, str, str, Optional[str]]:
                f"agent.run_conversation returned {type(result).__name__} instead of dict: {result!r}"
            )

+        # If the agent itself reported failure (e.g. all retries exhausted on
+        # API errors, model abort, mid-run interrupt), do not silently mark the
+        # job as successful. run_agent populates `failed=True`/`completed=False`
+        # on these paths and may put the error into `final_response`, which
+        # would otherwise be delivered as if it were the agent's reply and the
+        # job's `last_status` set to "ok". Raise so the except handler below
+        # builds the proper failure tuple. (issue #17855)
+        if result.get("failed") is True or result.get("completed") is False:
+            _err_text = (
+                result.get("error")
+                or (result.get("final_response") or "").strip()
+                or "agent reported failure"
+            )
+            raise RuntimeError(_err_text)
+
        final_response = result.get("final_response", "") or ""
        # Strip leaked placeholder text that upstream may inject on empty completions.
        if final_response.strip() == "(No response generated)":
@@ -1163,6 +1224,8 @@ def run_job(job: dict) -> tuple[bool, str, str, Optional[str]]:
                os.environ["TERMINAL_CWD"] = _prior_terminal_cwd
        # Clean up ContextVar session/delivery state for this job.
        clear_session_vars(_ctx_tokens)
+        for _var_name in _cron_delivery_vars:
+            _VAR_MAP[_var_name].set("")
        if _session_db:
            try:
                _session_db.end_session(_cron_session_id, "cron_complete")
@@ -34,6 +34,13 @@ services:
      # uncomment BOTH lines (API_SERVER_KEY is mandatory for auth):
      # - API_SERVER_HOST=0.0.0.0
      # - API_SERVER_KEY=${API_SERVER_KEY}
+      # Microsoft Teams — uncomment and fill in to enable Teams gateway.
+      # Register your bot at https://dev.botframework.com/ to get these values.
+      # - TEAMS_CLIENT_ID=${TEAMS_CLIENT_ID}
+      # - TEAMS_CLIENT_SECRET=${TEAMS_CLIENT_SECRET}
+      # - TEAMS_TENANT_ID=${TEAMS_TENANT_ID}
+      # - TEAMS_ALLOWED_USERS=${TEAMS_ALLOWED_USERS}
+      # - TEAMS_PORT=3978
    command: ["gateway", "run"]

  dashboard:
@@ -1,85 +0,0 @@
-"""Built-in boot-md hook — run ~/.hermes/BOOT.md on gateway startup.
-
-This hook is always registered. It silently skips if no BOOT.md exists.
-To activate, create ``~/.hermes/BOOT.md`` with instructions for the
-agent to execute on every gateway restart.
-
-Example BOOT.md::
-
-    # Startup Checklist
-
-    1. Check if any cron jobs failed overnight
-    2. Send a status update to Discord #general
-    3. If there are errors in /opt/app/deploy.log, summarize them
-
-The agent runs in a background thread so it doesn't block gateway
-startup. If nothing needs attention, it replies with [SILENT] to
-suppress delivery.
-"""
-
-import logging
-import threading
-
-logger = logging.getLogger("hooks.boot-md")
-
-from hermes_constants import get_hermes_home
-HERMES_HOME = get_hermes_home()
-BOOT_FILE = HERMES_HOME / "BOOT.md"
-
-
-def _build_boot_prompt(content: str) -> str:
-    """Wrap BOOT.md content in a system-level instruction."""
-    return (
-        "You are running a startup boot checklist. Follow the BOOT.md "
-        "instructions below exactly.\n\n"
-        "---\n"
-        f"{content}\n"
-        "---\n\n"
-        "Execute each instruction. If you need to send a message to a "
-        "platform, use the send_message tool.\n"
-        "If nothing needs attention and there is nothing to report, "
-        "reply with ONLY: [SILENT]"
-    )
-
-
-def _run_boot_agent(content: str) -> None:
-    """Spawn a one-shot agent session to execute the boot instructions."""
-    try:
-        from run_agent import AIAgent
-
-        prompt = _build_boot_prompt(content)
-        agent = AIAgent(
-            quiet_mode=True,
-            skip_context_files=True,
-            skip_memory=True,
-            max_iterations=20,
-        )
-        result = agent.run_conversation(prompt)
-        response = result.get("final_response", "")
-        if response and "[SILENT]" not in response:
-            logger.info("boot-md completed: %s", response[:200])
-        else:
-            logger.info("boot-md completed (nothing to report)")
-    except Exception as e:
-        logger.error("boot-md agent failed: %s", e)
-
-
-async def handle(event_type: str, context: dict) -> None:
-    """Gateway startup handler — run BOOT.md if it exists."""
-    if not BOOT_FILE.exists():
-        return
-
-    content = BOOT_FILE.read_text(encoding="utf-8").strip()
-    if not content:
-        return
-
-    logger.info("Running BOOT.md (%d chars)", len(content))
-
-    # Run in a background thread so we don't block gateway startup.
-    thread = threading.Thread(
-        target=_run_boot_agent,
-        args=(content,),
-        name="boot-md",
-        daemon=True,
-    )
-    thread.start()
@@ -86,6 +86,16 @@ async def build_channel_directory(adapters: Dict[Any, Any]) -> Dict[str, Any]:
            continue
        platforms[plat_name] = _build_from_sessions(plat_name)

+    # Include plugin-registered platforms (dynamic enum members aren't in
+    # Platform.__members__, so the loop above misses them).
+    try:
+        from gateway.platform_registry import platform_registry
+        for entry in platform_registry.plugin_entries():
+            if entry.name not in _SKIP_SESSION_DISCOVERY and entry.name not in platforms:
+                platforms[entry.name] = _build_from_sessions(entry.name)
+    except Exception:
+        pass
+
    directory = {
        "updated_at": datetime.now().isoformat(),
        "platforms": platforms,
@@ -13,7 +13,7 @@ import os
 import json
 from pathlib import Path
 from dataclasses import dataclass, field
-from typing import Dict, List, Optional, Any
+from typing import Dict, List, Optional, Any, Callable
 from enum import Enum

 from hermes_cli.config import get_hermes_home
@@ -45,8 +45,19 @@ def _normalize_unauthorized_dm_behavior(value: Any, default: str = "pair") -> st
    return default


+# Module-level cache for bundled platform plugin names (lives outside the
+# enum so it doesn't become an accidental enum member).
+_Platform__bundled_plugin_names: Optional[set] = None
+
+
 class Platform(Enum):
-    """Supported messaging platforms."""
+    """Supported messaging platforms.
+
+    Built-in platforms have explicit members.  Plugin platforms use dynamic
+    members created on-demand by ``_missing_()`` so that
+    ``Platform("irc")`` works without modifying this enum.  Dynamic members
+    are cached in ``_value2member_map_`` for identity-stable comparisons.
+    """
    LOCAL = "local"
    TELEGRAM = "telegram"
    DISCORD = "discord"
@@ -68,6 +79,76 @@ class Platform(Enum):
    BLUEBUBBLES = "bluebubbles"
    QQBOT = "qqbot"
    YUANBAO = "yuanbao"
+    @classmethod
+    def _missing_(cls, value):
+        """Accept unknown platform names only for known plugin adapters.
+
+        Creates a pseudo-member cached in ``_value2member_map_`` so that
+        ``Platform("irc") is Platform("irc")`` holds True (identity-stable).
+        Arbitrary strings are rejected to prevent enum pollution.
+        """
+        if not isinstance(value, str) or not value.strip():
+            return None
+        # Normalise to lowercase to avoid case mismatches in config
+        value = value.strip().lower()
+        # Check cache first (another call may have created it already)
+        if value in cls._value2member_map_:
+            return cls._value2member_map_[value]
+
+        # Only create pseudo-members for bundled plugin platforms (discovered
+        # via filesystem scan) or runtime-registered plugin platforms.
+        global _Platform__bundled_plugin_names
+        if _Platform__bundled_plugin_names is None:
+            _Platform__bundled_plugin_names = cls._scan_bundled_plugin_platforms()
+        if value in _Platform__bundled_plugin_names:
+            pseudo = object.__new__(cls)
+            pseudo._value_ = value
+            pseudo._name_ = value.upper().replace("-", "_").replace(" ", "_")
+            cls._value2member_map_[value] = pseudo
+            cls._member_map_[pseudo._name_] = pseudo
+            return pseudo
+
+        # Runtime-registered plugins (e.g. user-installed, discovered after
+        # the enum was defined).
+        try:
+            from gateway.platform_registry import platform_registry
+            if platform_registry.is_registered(value):
+                pseudo = object.__new__(cls)
+                pseudo._value_ = value
+                pseudo._name_ = value.upper().replace("-", "_").replace(" ", "_")
+                cls._value2member_map_[value] = pseudo
+                cls._member_map_[pseudo._name_] = pseudo
+                return pseudo
+        except Exception:
+            pass
+
+        return None
+
+    @classmethod
+    def _scan_bundled_plugin_platforms(cls) -> set:
+        """Return names of bundled platform plugins under ``plugins/platforms/``."""
+        names: set = set()
+        try:
+            platforms_dir = Path(__file__).parent.parent / "plugins" / "platforms"
+            if platforms_dir.is_dir():
+                for child in platforms_dir.iterdir():
+                    if (
+                        child.is_dir()
+                        and (child / "__init__.py").exists()
+                        and (
+                            (child / "plugin.yaml").exists()
+                            or (child / "plugin.yml").exists()
+                        )
+                    ):
+                        names.add(child.name.lower())
+        except Exception:
+            pass
+        return names
+
+
+# Snapshot of built-in platform values before any dynamic _missing_ lookups.
+# Used to distinguish real platforms from arbitrary strings.
+_BUILTIN_PLATFORM_VALUES = frozenset(m.value for m in Platform.__members__.values())


@dataclass
@@ -231,6 +312,44 @@ class StreamingConfig:
        )


+# -----------------------------------------------------------------------------
+# Built-in platform connection checkers
+# -----------------------------------------------------------------------------
+# Each callable receives a ``PlatformConfig`` and returns ``True`` when the
+# platform is sufficiently configured to be considered "connected".  Platforms
+# that rely on the generic ``token or api_key`` check (Telegram, Discord,
+# Slack, Matrix, Mattermost, HomeAssistant) do not need an entry here.
+_PLATFORM_CONNECTED_CHECKERS: dict[Platform, Callable[[PlatformConfig], bool]] = {
+    Platform.WEIXIN: lambda cfg: bool(
+        cfg.extra.get("account_id") and (cfg.token or cfg.extra.get("token"))
+    ),
+    Platform.WHATSAPP: lambda cfg: True,  # bridge handles auth
+    Platform.SIGNAL: lambda cfg: bool(cfg.extra.get("http_url")),
+    Platform.EMAIL: lambda cfg: bool(cfg.extra.get("address")),
+    Platform.SMS: lambda cfg: bool(os.getenv("TWILIO_ACCOUNT_SID")),
+    Platform.API_SERVER: lambda cfg: True,
+    Platform.WEBHOOK: lambda cfg: True,
+    Platform.FEISHU: lambda cfg: bool(cfg.extra.get("app_id")),
+    Platform.WECOM: lambda cfg: bool(cfg.extra.get("bot_id")),
+    Platform.WECOM_CALLBACK: lambda cfg: bool(
+        cfg.extra.get("corp_id") or cfg.extra.get("apps")
+    ),
+    Platform.BLUEBUBBLES: lambda cfg: bool(
+        cfg.extra.get("server_url") and cfg.extra.get("password")
+    ),
+    Platform.QQBOT: lambda cfg: bool(
+        cfg.extra.get("app_id") and cfg.extra.get("client_secret")
+    ),
+    Platform.YUANBAO: lambda cfg: bool(
+        cfg.extra.get("app_id") and cfg.extra.get("app_secret")
+    ),
+    Platform.DINGTALK: lambda cfg: bool(
+        (cfg.extra.get("client_id") or os.getenv("DINGTALK_CLIENT_ID"))
+        and (cfg.extra.get("client_secret") or os.getenv("DINGTALK_CLIENT_SECRET"))
+    ),
+}
+
+
@dataclass
 class GatewayConfig:
    """
@@ -284,61 +403,43 @@ class GatewayConfig:
        for platform, config in self.platforms.items():
            if not config.enabled:
                continue
-            # Weixin requires both a token and an account_id
-            if platform == Platform.WEIXIN:
-                if config.extra.get("account_id") and (config.token or config.extra.get("token")):
-                    connected.append(platform)
-                continue
-            # Platforms that use token/api_key auth
-            if config.token or config.api_key:
+            if self._is_platform_connected(platform, config):
                connected.append(platform)
-            # WhatsApp uses enabled flag only (bridge handles auth)
-            elif platform == Platform.WHATSAPP:
-                connected.append(platform)
-            # Signal uses extra dict for config (http_url + account)
-            elif platform == Platform.SIGNAL and config.extra.get("http_url"):
-                connected.append(platform)
-            # Email uses extra dict for config (address + imap_host + smtp_host)
-            elif platform == Platform.EMAIL and config.extra.get("address"):
-                connected.append(platform)
-            # SMS uses api_key (Twilio auth token) — SID checked via env
-            elif platform == Platform.SMS and os.getenv("TWILIO_ACCOUNT_SID"):
-                connected.append(platform)
-            # API Server uses enabled flag only (no token needed)
-            elif platform == Platform.API_SERVER:
-                connected.append(platform)
-            # Webhook uses enabled flag only (secrets are per-route)
-            elif platform == Platform.WEBHOOK:
-                connected.append(platform)
-            # Feishu uses extra dict for app credentials
-            elif platform == Platform.FEISHU and config.extra.get("app_id"):
-                connected.append(platform)
-            # WeCom bot mode uses extra dict for bot credentials
-            elif platform == Platform.WECOM and config.extra.get("bot_id"):
-                connected.append(platform)
-            # WeCom callback mode uses corp_id or apps list
-            elif platform == Platform.WECOM_CALLBACK and (
-                config.extra.get("corp_id") or config.extra.get("apps")
-            ):
-                connected.append(platform)
-            # BlueBubbles uses extra dict for local server config
-            elif platform == Platform.BLUEBUBBLES and config.extra.get("server_url") and config.extra.get("password"):
-                connected.append(platform)
-            # QQBot uses extra dict for app credentials
-            elif platform == Platform.QQBOT and config.extra.get("app_id") and config.extra.get("client_secret"):
-                connected.append(platform)
-            # Yuanbao uses extra dict for app credentials
-            elif platform == Platform.YUANBAO and config.extra.get("app_id") and config.extra.get("app_secret"):
-                connected.append(platform)
-            # DingTalk uses client_id/client_secret from config.extra or env vars
-            elif platform == Platform.DINGTALK and (
-                config.extra.get("client_id") or os.getenv("DINGTALK_CLIENT_ID")
-            ) and (
-                config.extra.get("client_secret") or os.getenv("DINGTALK_CLIENT_SECRET")
-            ):
-                connected.append(platform)
-        
        return connected
+
+    def _is_platform_connected(self, platform: Platform, config: PlatformConfig) -> bool:
+        """Check whether a single platform is sufficiently configured."""
+        # Weixin requires both a token and an account_id (checked first so
+        # the generic token branch doesn't let it through without account_id).
+        if platform == Platform.WEIXIN:
+            return bool(
+                config.extra.get("account_id")
+                and (config.token or config.extra.get("token"))
+            )
+
+        # Generic token/api_key auth covers Telegram, Discord, Slack, etc.
+        if config.token or config.api_key:
+            return True
+
+        # Platform-specific check
+        checker = _PLATFORM_CONNECTED_CHECKERS.get(platform)
+        if checker is not None:
+            return checker(config)
+
+        # Plugin-registered platforms
+        try:
+            from gateway.platform_registry import platform_registry
+            entry = platform_registry.get(platform.value)
+            if entry:
+                if entry.is_connected is not None:
+                    return entry.is_connected(config)
+                if entry.validate_config is not None:
+                    return entry.validate_config(config)
+                return True
+        except Exception:
+            pass  # Registry not yet initialised during early import
+
+        return False
    
    def get_home_channel(self, platform: Platform) -> Optional[HomeChannel]:
        """Get the home channel for a platform."""
@@ -714,11 +815,21 @@ def load_gateway_config() -> GatewayConfig:
                    os.environ["TELEGRAM_REACTIONS"] = str(telegram_cfg["reactions"]).lower()
                if "proxy_url" in telegram_cfg and not os.getenv("TELEGRAM_PROXY"):
                    os.environ["TELEGRAM_PROXY"] = str(telegram_cfg["proxy_url"]).strip()
-                if "group_allowed_chats" in telegram_cfg and not os.getenv("TELEGRAM_GROUP_ALLOWED_USERS"):
-                    gac = telegram_cfg["group_allowed_chats"]
-                    if isinstance(gac, list):
-                        gac = ",".join(str(v) for v in gac)
-                    os.environ["TELEGRAM_GROUP_ALLOWED_USERS"] = str(gac)
+                allowed_users = telegram_cfg.get("allow_from")
+                if allowed_users is not None and not os.getenv("TELEGRAM_ALLOWED_USERS"):
+                    if isinstance(allowed_users, list):
+                        allowed_users = ",".join(str(v) for v in allowed_users)
+                    os.environ["TELEGRAM_ALLOWED_USERS"] = str(allowed_users)
+                group_allowed_users = telegram_cfg.get("group_allow_from")
+                if group_allowed_users is not None and not os.getenv("TELEGRAM_GROUP_ALLOWED_USERS"):
+                    if isinstance(group_allowed_users, list):
+                        group_allowed_users = ",".join(str(v) for v in group_allowed_users)
+                    os.environ["TELEGRAM_GROUP_ALLOWED_USERS"] = str(group_allowed_users)
+                group_allowed_chats = telegram_cfg.get("group_allowed_chats")
+                if group_allowed_chats is not None and not os.getenv("TELEGRAM_GROUP_ALLOWED_CHATS"):
+                    if isinstance(group_allowed_chats, list):
+                        group_allowed_chats = ",".join(str(v) for v in group_allowed_chats)
+                    os.environ["TELEGRAM_GROUP_ALLOWED_CHATS"] = str(group_allowed_chats)
                if "disable_link_previews" in telegram_cfg:
                    plat_data = platforms_data.setdefault(Platform.TELEGRAM.value, {})
                    if not isinstance(plat_data, dict):
@@ -1371,3 +1482,25 @@ def _apply_env_overrides(config: GatewayConfig) -> None:
            config.default_reset_policy.at_hour = int(reset_hour)
        except ValueError:
            pass
+
+    # Registry-driven enable for plugin platforms.  Built-ins have explicit
+    # blocks above; plugins expose check_fn() which is the single source of
+    # truth for "are my env vars set?".  When it returns True, ensure the
+    # platform is enabled so start() will create its adapter.
+    try:
+        from hermes_cli.plugins import discover_plugins
+        discover_plugins()  # idempotent
+        from gateway.platform_registry import platform_registry
+        for entry in platform_registry.plugin_entries():
+            try:
+                if not entry.check_fn():
+                    continue
+            except Exception as e:
+                logger.debug("check_fn for %s raised: %s", entry.name, e)
+                continue
+            platform = Platform(entry.name)
+            if platform not in config.platforms:
+                config.platforms[platform] = PlatformConfig()
+            config.platforms[platform].enabled = True
+    except Exception as e:
+        logger.debug("Plugin platform enable pass failed: %s", e)
@@ -21,6 +21,7 @@ Errors in hooks are caught and logged but never block the main pipeline.

 import asyncio
 import importlib.util
+import sys
 from typing import Any, Callable, Dict, List, Optional

 import yaml
@@ -52,19 +53,13 @@ class HookRegistry:
        return list(self._loaded_hooks)

    def _register_builtin_hooks(self) -> None:
-        """Register built-in hooks that are always active."""
-        try:
-            from gateway.builtin_hooks.boot_md import handle as boot_md_handle
+        """Register built-in hooks that are always active.

-            self._handlers.setdefault("gateway:startup", []).append(boot_md_handle)
-            self._loaded_hooks.append({
-                "name": "boot-md",
-                "description": "Run ~/.hermes/BOOT.md on gateway startup",
-                "events": ["gateway:startup"],
-                "path": "(builtin)",
-            })
-        except Exception as e:
-            print(f"[hooks] Could not load built-in boot-md hook: {e}", flush=True)
+        Currently empty — no shipped built-in hooks. Kept as the extension
+        point for future always-on gateway hooks so they drop in without
+        re-plumbing discover_and_load().
+        """
+        return

    def discover_and_load(self) -> None:
        """
@@ -103,16 +98,28 @@ class HookRegistry:
                    print(f"[hooks] Skipping {hook_name}: no events declared", flush=True)
                    continue

-                # Dynamically load the handler module
+                # Dynamically load the handler module.
+                # Register in sys.modules BEFORE exec_module so Pydantic /
+                # dataclasses / typing introspection can resolve forward
+                # references (triggered by `from __future__ import annotations`
+                # in the handler). Without this, a handler that declares a
+                # Pydantic BaseModel for webhook/event payloads fails at first
+                # dispatch with "TypeAdapter ... is not fully defined".
+                module_name = f"hermes_hook_{hook_name}"
                spec = importlib.util.spec_from_file_location(
-                    f"hermes_hook_{hook_name}", handler_path
+                    module_name, handler_path
                )
                if spec is None or spec.loader is None:
                    print(f"[hooks] Skipping {hook_name}: could not load handler.py", flush=True)
                    continue

                module = importlib.util.module_from_spec(spec)
-                spec.loader.exec_module(module)
+                sys.modules[module_name] = module
+                try:
+                    spec.loader.exec_module(module)
+                except Exception:
+                    sys.modules.pop(module_name, None)
+                    raise

                handle_fn = getattr(module, "handle", None)
                if handle_fn is None:
@@ -28,6 +28,7 @@ from pathlib import Path
 from typing import Optional

 from hermes_constants import get_hermes_dir
+from utils import atomic_replace


 # Unambiguous alphabet -- excludes 0/O, 1/I to prevent confusion
@@ -59,7 +60,7 @@ def _secure_write(path: Path, data: str) -> None:
            f.write(data)
            f.flush()
            os.fsync(f.fileno())
-        os.replace(tmp_path, str(path))
+        atomic_replace(tmp_path, path)
        try:
            os.chmod(path, 0o600)
        except OSError:
@@ -0,0 +1,212 @@
+"""
+Platform Adapter Registry
+
+Allows platform adapters (built-in and plugin) to self-register so the gateway
+can discover and instantiate them without hardcoded if/elif chains.
+
+Built-in adapters continue to use the existing if/elif in _create_adapter()
+for now.  Plugin adapters register here via PluginContext.register_platform()
+and are looked up first -- if nothing is found the gateway falls through to
+the legacy code path.
+
+Usage (plugin side):
+
+    from gateway.platform_registry import platform_registry, PlatformEntry
+
+    platform_registry.register(PlatformEntry(
+        name="irc",
+        label="IRC",
+        adapter_factory=lambda cfg: IRCAdapter(cfg),
+        check_fn=check_requirements,
+        validate_config=lambda cfg: bool(cfg.extra.get("server")),
+        required_env=["IRC_SERVER"],
+        install_hint="pip install irc",
+    ))
+
+Usage (gateway side):
+
+    adapter = platform_registry.create_adapter("irc", platform_config)
+"""
+
+import logging
+from dataclasses import dataclass, field
+from typing import Any, Callable, Optional
+
+logger = logging.getLogger(__name__)
+
+
+@dataclass
+class PlatformEntry:
+    """Metadata and factory for a single platform adapter."""
+
+    # Identifier used in config.yaml (e.g. "irc", "viber").
+    name: str
+
+    # Human-readable label (e.g. "IRC", "Viber").
+    label: str
+
+    # Factory callable: receives a PlatformConfig, returns an adapter instance.
+    # Using a factory instead of a bare class lets plugins do custom init
+    # (e.g. passing extra kwargs, wrapping in try/except).
+    adapter_factory: Callable[[Any], Any]
+
+    # Returns True when the platform's dependencies are available.
+    check_fn: Callable[[], bool]
+
+    # Optional: given a PlatformConfig, is it properly configured?
+    # If None, the registry skips config validation and lets the adapter
+    # fail at connect() time with a descriptive error.
+    validate_config: Optional[Callable[[Any], bool]] = None
+
+    # Optional: given a PlatformConfig, is the platform connected/enabled?
+    # Used by ``GatewayConfig.get_connected_platforms()`` and setup UI status.
+    # If None, falls back to ``validate_config`` or ``check_fn``.
+    is_connected: Optional[Callable[[Any], bool]] = None
+
+    # Env vars this platform needs (for ``hermes setup`` display).
+    required_env: list = field(default_factory=list)
+
+    # Hint shown when check_fn returns False.
+    install_hint: str = ""
+
+    # Optional setup function for interactive configuration.
+    # Signature: () -> None (prompts user, saves env vars).
+    # If None, falls back to _setup_standard_platform (needs token_var + vars)
+    # or a generic "set these env vars" display.
+    setup_fn: Optional[Callable[[], None]] = None
+
+    # "builtin" or "plugin"
+    source: str = "plugin"
+
+    # Name of the plugin manifest that registered this entry (empty for
+    # built-ins).  Used by ``hermes gateway setup`` to auto-enable the
+    # owning plugin when the user configures its platform.
+    plugin_name: str = ""
+
+    # ── Auth env var names (for _is_user_authorized integration) ──
+    # E.g. "IRC_ALLOWED_USERS" — checked for comma-separated user IDs.
+    allowed_users_env: str = ""
+    # E.g. "IRC_ALLOW_ALL_USERS" — if truthy, all users authorized.
+    allow_all_env: str = ""
+
+    # ── Message limits ──
+    # Max message length for smart-chunking.  0 = no limit.
+    max_message_length: int = 0
+
+    # ── Privacy ──
+    # If True, session descriptions redact PII (phone numbers, etc.)
+    pii_safe: bool = False
+
+    # ── Display ──
+    # Emoji for CLI/gateway display (e.g. "💬")
+    emoji: str = "🔌"
+
+    # Whether this platform should appear in _UPDATE_ALLOWED_PLATFORMS
+    # (allows /update command from this platform).
+    allow_update_command: bool = True
+
+    # ── LLM guidance ──
+    # Platform hint injected into the system prompt (e.g. "You are on IRC.
+    # Do not use markdown.").  Empty string = no hint.
+    platform_hint: str = ""
+
+
+class PlatformRegistry:
+    """Central registry of platform adapters.
+
+    Thread-safe for reads (dict lookups are atomic under GIL).
+    Writes happen at startup during sequential discovery.
+    """
+
+    def __init__(self) -> None:
+        self._entries: dict[str, PlatformEntry] = {}
+
+    def register(self, entry: PlatformEntry) -> None:
+        """Register a platform adapter entry.
+
+        If an entry with the same name exists, it is replaced (last writer
+        wins -- this lets plugins override built-in adapters if desired).
+        """
+        if entry.name in self._entries:
+            prev = self._entries[entry.name]
+            logger.info(
+                "Platform '%s' re-registered (was %s, now %s)",
+                entry.name,
+                prev.source,
+                entry.source,
+            )
+        self._entries[entry.name] = entry
+        logger.debug("Registered platform adapter: %s (%s)", entry.name, entry.source)
+
+    def unregister(self, name: str) -> bool:
+        """Remove a platform entry.  Returns True if it existed."""
+        return self._entries.pop(name, None) is not None
+
+    def get(self, name: str) -> Optional[PlatformEntry]:
+        """Look up a platform entry by name."""
+        return self._entries.get(name)
+
+    def all_entries(self) -> list[PlatformEntry]:
+        """Return all registered platform entries."""
+        return list(self._entries.values())
+
+    def plugin_entries(self) -> list[PlatformEntry]:
+        """Return only plugin-registered platform entries."""
+        return [e for e in self._entries.values() if e.source == "plugin"]
+
+    def is_registered(self, name: str) -> bool:
+        return name in self._entries
+
+    def create_adapter(self, name: str, config: Any) -> Optional[Any]:
+        """Create an adapter instance for the given platform name.
+
+        Returns None if:
+        - No entry registered for *name*
+        - check_fn() returns False (missing deps)
+        - validate_config() returns False (misconfigured)
+        - The factory raises an exception
+        """
+        entry = self._entries.get(name)
+        if entry is None:
+            return None
+
+        if not entry.check_fn():
+            hint = f" ({entry.install_hint})" if entry.install_hint else ""
+            logger.warning(
+                "Platform '%s' requirements not met%s",
+                entry.label,
+                hint,
+            )
+            return None
+
+        if entry.validate_config is not None:
+            try:
+                if not entry.validate_config(config):
+                    logger.warning(
+                        "Platform '%s' config validation failed",
+                        entry.label,
+                    )
+                    return None
+            except Exception as e:
+                logger.warning(
+                    "Platform '%s' config validation error: %s",
+                    entry.label,
+                    e,
+                )
+                return None
+
+        try:
+            adapter = entry.adapter_factory(config)
+            return adapter
+        except Exception as e:
+            logger.error(
+                "Failed to create adapter for platform '%s': %s",
+                entry.label,
+                e,
+                exc_info=True,
+            )
+            return None
+
+
+# Module-level singleton
+platform_registry = PlatformRegistry()
@@ -1,9 +1,30 @@
 # Adding a New Messaging Platform

-Checklist for integrating a new messaging platform into the Hermes gateway.
-Use this as a reference when building a new adapter — every item here is a
-real integration point that exists in the codebase. Missing any of them will
-cause broken functionality, missing features, or inconsistent behavior.
+There are two ways to add a platform to the Hermes gateway:
+
+## Plugin Path (Recommended for Community/Third-Party)
+
+Create a plugin directory in `~/.hermes/plugins/` with a `PLUGIN.yaml` and
+`adapter.py`.  The adapter inherits from `BasePlatformAdapter` and registers
+via `ctx.register_platform()` in the `register(ctx)` entry point.  This
+requires **zero changes to core Hermes code**.
+
+The plugin system automatically handles: adapter creation, config parsing,
+user authorization, cron delivery, send_message routing, system prompt hints,
+status display, gateway setup, and more.
+
+See `plugins/platforms/irc/` for a complete reference implementation, and
+`website/docs/developer-guide/adding-platform-adapters.md` for the full
+plugin guide with code examples.
+
+---
+
+## Built-in Path (Core Contributors Only)
+
+Checklist for integrating a platform directly into the Hermes core.
+Use this as a reference when building a built-in adapter — every item here
+is a real integration point. Missing any of them will cause broken
+functionality, missing features, or inconsistent behavior.

 ---

@@ -7,7 +7,9 @@ Exposes an HTTP server with endpoints:
 - GET  /v1/responses/{response_id} — Retrieve a stored response
 - DELETE /v1/responses/{response_id} — Delete a stored response
 - GET  /v1/models                  — lists hermes-agent as an available model
+- GET  /v1/capabilities            — machine-readable API capabilities for external UIs
 - POST /v1/runs                    — start a run, returns run_id immediately (202)
+- GET  /v1/runs/{run_id}           — retrieve current run status
 - GET  /v1/runs/{run_id}/events    — SSE stream of structured lifecycle events
 - POST /v1/runs/{run_id}/stop    — interrupt a running agent
 - GET  /health                     — health check
@@ -590,6 +592,8 @@ class APIServerAdapter(BasePlatformAdapter):
        # Active run agent/task references for stop support
        self._active_run_agents: Dict[str, Any] = {}
        self._active_run_tasks: Dict[str, "asyncio.Task"] = {}
+        # Pollable run status for dashboards and external control-plane UIs.
+        self._run_statuses: Dict[str, Dict[str, Any]] = {}
        self._session_db: Optional[Any] = None  # Lazy-init SessionDB for session continuity

    @staticmethod
@@ -808,6 +812,51 @@ class APIServerAdapter(BasePlatformAdapter):
            ],
        })

+    async def _handle_capabilities(self, request: "web.Request") -> "web.Response":
+        """GET /v1/capabilities — advertise the stable API surface.
+
+        External UIs and orchestrators use this endpoint to discover the API
+        server's plugin-safe contract without scraping docs or assuming that
+        every Hermes version exposes the same endpoints.
+        """
+        auth_err = self._check_auth(request)
+        if auth_err:
+            return auth_err
+
+        return web.json_response({
+            "object": "hermes.api_server.capabilities",
+            "platform": "hermes-agent",
+            "model": self._model_name,
+            "auth": {
+                "type": "bearer",
+                "required": bool(self._api_key),
+            },
+            "features": {
+                "chat_completions": True,
+                "chat_completions_streaming": True,
+                "responses_api": True,
+                "responses_streaming": True,
+                "run_submission": True,
+                "run_status": True,
+                "run_events_sse": True,
+                "run_stop": True,
+                "tool_progress_events": True,
+                "session_continuity_header": "X-Hermes-Session-Id",
+                "cors": bool(self._cors_origins),
+            },
+            "endpoints": {
+                "health": {"method": "GET", "path": "/health"},
+                "health_detailed": {"method": "GET", "path": "/health/detailed"},
+                "models": {"method": "GET", "path": "/v1/models"},
+                "chat_completions": {"method": "POST", "path": "/v1/chat/completions"},
+                "responses": {"method": "POST", "path": "/v1/responses"},
+                "runs": {"method": "POST", "path": "/v1/runs"},
+                "run_status": {"method": "GET", "path": "/v1/runs/{run_id}"},
+                "run_events": {"method": "GET", "path": "/v1/runs/{run_id}/events"},
+                "run_stop": {"method": "POST", "path": "/v1/runs/{run_id}/stop"},
+            },
+        })
+
    async def _handle_chat_completions(self, request: "web.Request") -> "web.Response":
        """POST /v1/chat/completions — OpenAI Chat Completions format."""
        auth_err = self._check_auth(request)
@@ -932,39 +981,62 @@ class APIServerAdapter(BasePlatformAdapter):
                if delta is not None:
                    _stream_q.put(delta)

-            def _on_tool_progress(event_type, name, preview, args, **kwargs):
-                """Send tool progress as a separate SSE event.
+            # Track which tool_call_ids we've emitted a "running" lifecycle
+            # event for, so a "completed" event without a matching "running"
+            # (e.g. internal/filtered tools) is silently dropped instead of
+            # producing an orphaned event clients can't correlate.
+            _started_tool_call_ids: set[str] = set()

-                Previously, progress markers like ``⏰ list`` were injected
-                directly into ``delta.content``.  OpenAI-compatible frontends
-                (Open WebUI, LobeChat, …) store ``delta.content`` verbatim as
-                the assistant message and send it back on subsequent requests.
-                After enough turns the model learns to *emit* the markers as
-                plain text instead of issuing real tool calls — silently
-                hallucinating tool results.  See #6972.
+            def _on_tool_start(tool_call_id, function_name, function_args):
+                """Emit ``hermes.tool.progress`` with ``status: running``.

-                The fix: push a tagged tuple ``("__tool_progress__", payload)``
-                onto the stream queue.  The SSE writer emits it as a custom
-                ``event: hermes.tool.progress`` line that compliant frontends
-                can render for UX but will *not* persist into conversation
-                history.  Clients that don't understand the custom event type
-                silently ignore it per the SSE specification.
+                Replaces the old ``tool_progress_callback("tool.started",
+                ...)`` emit so SSE consumers receive a single event per
+                tool start, carrying both the legacy ``tool``/``emoji``/
+                ``label`` payload (for #6972 frontends) and the new
+                ``toolCallId``/``status`` correlation fields (#16588).
+
+                Skips tools whose names start with ``_`` so internal
+                events (``_thinking``, …) stay off the wire — matching
+                the prior ``_on_tool_progress`` filter exactly.
                """
-                if event_type != "tool.started":
+                if not tool_call_id or function_name.startswith("_"):
                    return
-                if name.startswith("_"):
-                    return
-                from agent.display import get_tool_emoji
-                emoji = get_tool_emoji(name)
-                label = preview or name
+                _started_tool_call_ids.add(tool_call_id)
+                from agent.display import build_tool_preview, get_tool_emoji
+                label = build_tool_preview(function_name, function_args) or function_name
                _stream_q.put(("__tool_progress__", {
-                    "tool": name,
-                    "emoji": emoji,
+                    "tool": function_name,
+                    "emoji": get_tool_emoji(function_name),
                    "label": label,
+                    "toolCallId": tool_call_id,
+                    "status": "running",
+                }))
+
+            def _on_tool_complete(tool_call_id, function_name, function_args, function_result):
+                """Emit the matching ``status: completed`` event.
+
+                Dropped if the start was filtered (internal tool, missing
+                id, or never seen) so clients never get an orphaned
+                ``completed`` they can't correlate to a prior ``running``.
+                """
+                if not tool_call_id or tool_call_id not in _started_tool_call_ids:
+                    return
+                _started_tool_call_ids.discard(tool_call_id)
+                _stream_q.put(("__tool_progress__", {
+                    "tool": function_name,
+                    "toolCallId": tool_call_id,
+                    "status": "completed",
                }))

            # Start agent in background.  agent_ref is a mutable container
            # so the SSE writer can interrupt the agent on client disconnect.
+            #
+            # ``tool_progress_callback`` is intentionally not wired here:
+            # it would duplicate every emit because ``run_agent`` fires it
+            # side-by-side with ``tool_start_callback``/``tool_complete_callback``.
+            # The structured callbacks are strictly richer (they carry the
+            # tool_call id), so they own the chat-completions SSE channel.
            agent_ref = [None]
            agent_task = asyncio.ensure_future(self._run_agent(
                user_message=user_message,
@@ -972,7 +1044,8 @@ class APIServerAdapter(BasePlatformAdapter):
                ephemeral_system_prompt=system_prompt,
                session_id=session_id,
                stream_delta_callback=_on_delta,
-                tool_progress_callback=_on_tool_progress,
+                tool_start_callback=_on_tool_start,
+                tool_complete_callback=_on_tool_complete,
                agent_ref=agent_ref,
            ))

@@ -1087,7 +1160,8 @@ class APIServerAdapter(BasePlatformAdapter):
                Tagged tuples ``("__tool_progress__", payload)`` are sent
                as a custom ``event: hermes.tool.progress`` SSE event so
                frontends can display them without storing the markers in
-                conversation history.  See #6972.
+                conversation history.  See #6972 for the original event,
+                #16588 for the ``toolCallId``/``status`` lifecycle fields.
                """
                if isinstance(item, tuple) and len(item) == 2 and item[0] == "__tool_progress__":
                    event_data = json.dumps(item[1])
@@ -2297,10 +2371,31 @@ class APIServerAdapter(BasePlatformAdapter):

    _MAX_CONCURRENT_RUNS = 10  # Prevent unbounded resource allocation
    _RUN_STREAM_TTL = 300  # seconds before orphaned runs are swept
+    _RUN_STATUS_TTL = 3600  # seconds to retain terminal run status for polling
+
+    def _set_run_status(self, run_id: str, status: str, **fields: Any) -> Dict[str, Any]:
+        """Update pollable run status without exposing private agent objects."""
+        now = time.time()
+        current = self._run_statuses.get(run_id, {})
+        current.update({
+            "object": "hermes.run",
+            "run_id": run_id,
+            "status": status,
+            "updated_at": now,
+        })
+        current.setdefault("created_at", fields.pop("created_at", now))
+        current.update(fields)
+        self._run_statuses[run_id] = current
+        return current

    def _make_run_event_callback(self, run_id: str, loop: "asyncio.AbstractEventLoop"):
        """Return a tool_progress_callback that pushes structured events to the run's SSE queue."""
        def _push(event: Dict[str, Any]) -> None:
+            self._set_run_status(
+                run_id,
+                self._run_statuses.get(run_id, {}).get("status", "running"),
+                last_event=event.get("event"),
+            )
            q = self._run_streams.get(run_id)
            if q is None:
                return
@@ -2365,28 +2460,6 @@ class APIServerAdapter(BasePlatformAdapter):
        if not user_message:
            return web.json_response(_openai_error("No user message found in input"), status=400)

-        run_id = f"run_{uuid.uuid4().hex}"
-        loop = asyncio.get_running_loop()
-        q: "asyncio.Queue[Optional[Dict]]" = asyncio.Queue()
-        self._run_streams[run_id] = q
-        self._run_streams_created[run_id] = time.time()
-
-        event_cb = self._make_run_event_callback(run_id, loop)
-
-        # Also wire stream_delta_callback so message.delta events flow through
-        def _text_cb(delta: Optional[str]) -> None:
-            if delta is None:
-                return
-            try:
-                loop.call_soon_threadsafe(q.put_nowait, {
-                    "event": "message.delta",
-                    "run_id": run_id,
-                    "timestamp": time.time(),
-                    "delta": delta,
-                })
-            except Exception:
-                pass
-
        instructions = body.get("instructions")
        previous_response_id = body.get("previous_response_id")

@@ -2434,11 +2507,42 @@ class APIServerAdapter(BasePlatformAdapter):
                        )
                    conversation_history.append({"role": msg["role"], "content": str(content)})

+        run_id = f"run_{uuid.uuid4().hex}"
        session_id = body.get("session_id") or stored_session_id or run_id
        ephemeral_system_prompt = instructions
+        loop = asyncio.get_running_loop()
+        q: "asyncio.Queue[Optional[Dict]]" = asyncio.Queue()
+        created_at = time.time()
+        self._run_streams[run_id] = q
+        self._run_streams_created[run_id] = created_at
+
+        event_cb = self._make_run_event_callback(run_id, loop)
+
+        # Also wire stream_delta_callback so message.delta events flow through.
+        def _text_cb(delta: Optional[str]) -> None:
+            if delta is None:
+                return
+            try:
+                loop.call_soon_threadsafe(q.put_nowait, {
+                    "event": "message.delta",
+                    "run_id": run_id,
+                    "timestamp": time.time(),
+                    "delta": delta,
+                })
+            except Exception:
+                pass
+
+        self._set_run_status(
+            run_id,
+            "queued",
+            created_at=created_at,
+            session_id=session_id,
+            model=body.get("model", self._model_name),
+        )

        async def _run_and_close():
            try:
+                self._set_run_status(run_id, "running")
                agent = self._create_agent(
                    ephemeral_system_prompt=ephemeral_system_prompt,
                    session_id=session_id,
@@ -2468,8 +2572,36 @@ class APIServerAdapter(BasePlatformAdapter):
                    "output": final_response,
                    "usage": usage,
                })
+                self._set_run_status(
+                    run_id,
+                    "completed",
+                    output=final_response,
+                    usage=usage,
+                    last_event="run.completed",
+                )
+            except asyncio.CancelledError:
+                self._set_run_status(
+                    run_id,
+                    "cancelled",
+                    last_event="run.cancelled",
+                )
+                try:
+                    q.put_nowait({
+                        "event": "run.cancelled",
+                        "run_id": run_id,
+                        "timestamp": time.time(),
+                    })
+                except Exception:
+                    pass
+                raise
            except Exception as exc:
                logger.exception("[api_server] run %s failed", run_id)
+                self._set_run_status(
+                    run_id,
+                    "failed",
+                    error=str(exc),
+                    last_event="run.failed",
+                )
                try:
                    q.put_nowait({
                        "event": "run.failed",
@@ -2499,6 +2631,21 @@ class APIServerAdapter(BasePlatformAdapter):

        return web.json_response({"run_id": run_id, "status": "started"}, status=202)

+    async def _handle_get_run(self, request: "web.Request") -> "web.Response":
+        """GET /v1/runs/{run_id} — return pollable run status for external UIs."""
+        auth_err = self._check_auth(request)
+        if auth_err:
+            return auth_err
+
+        run_id = request.match_info["run_id"]
+        status = self._run_statuses.get(run_id)
+        if status is None:
+            return web.json_response(
+                _openai_error(f"Run not found: {run_id}", code="run_not_found"),
+                status=404,
+            )
+        return web.json_response(status)
+
    async def _handle_run_events(self, request: "web.Request") -> "web.StreamResponse":
        """GET /v1/runs/{run_id}/events — SSE stream of structured agent lifecycle events."""
        auth_err = self._check_auth(request)
@@ -2561,6 +2708,8 @@ class APIServerAdapter(BasePlatformAdapter):
        if agent is None and task is None:
            return web.json_response(_openai_error(f"Run not found: {run_id}", code="run_not_found"), status=404)

+        self._set_run_status(run_id, "stopping", last_event="run.stopping")
+
        if agent is not None:
            try:
                agent.interrupt("Stop requested via API")
@@ -2603,6 +2752,15 @@ class APIServerAdapter(BasePlatformAdapter):
                self._active_run_agents.pop(run_id, None)
                self._active_run_tasks.pop(run_id, None)

+            stale_statuses = [
+                run_id
+                for run_id, status in list(self._run_statuses.items())
+                if status.get("status") in {"completed", "failed", "cancelled"}
+                and now - float(status.get("updated_at", 0) or 0) > self._RUN_STATUS_TTL
+            ]
+            for run_id in stale_statuses:
+                self._run_statuses.pop(run_id, None)
+
    # ------------------------------------------------------------------
    # BasePlatformAdapter interface
    # ------------------------------------------------------------------
@@ -2621,6 +2779,7 @@ class APIServerAdapter(BasePlatformAdapter):
            self._app.router.add_get("/health/detailed", self._handle_health_detailed)
            self._app.router.add_get("/v1/health", self._handle_health)
            self._app.router.add_get("/v1/models", self._handle_models)
+            self._app.router.add_get("/v1/capabilities", self._handle_capabilities)
            self._app.router.add_post("/v1/chat/completions", self._handle_chat_completions)
            self._app.router.add_post("/v1/responses", self._handle_responses)
            self._app.router.add_get("/v1/responses/{response_id}", self._handle_get_response)
@@ -2636,6 +2795,7 @@ class APIServerAdapter(BasePlatformAdapter):
            self._app.router.add_post("/api/jobs/{job_id}/run", self._handle_run_job)
            # Structured event streaming
            self._app.router.add_post("/v1/runs", self._handle_runs)
+            self._app.router.add_get("/v1/runs/{run_id}", self._handle_get_run)
            self._app.router.add_get("/v1/runs/{run_id}/events", self._handle_run_events)
            self._app.router.add_post("/v1/runs/{run_id}/stop", self._handle_stop_run)
            # Start background sweep to clean up orphaned (unconsumed) run streams
@@ -23,6 +23,45 @@ from utils import normalize_proxy_url

 logger = logging.getLogger(__name__)

+# Audio file extensions Hermes recognizes for native audio delivery.
+# Kept in sync with tools/send_message_tool.py and cron/scheduler.py via
+# should_send_media_as_audio() below.
+_AUDIO_EXTS = frozenset({'.ogg', '.opus', '.mp3', '.wav', '.m4a', '.flac'})
+# Telegram's Bot API sendAudio only accepts MP3 / M4A. Other audio
+# formats either need to go through sendVoice (Opus/OGG) or must be
+# delivered as a regular document.
+_TELEGRAM_AUDIO_ATTACHMENT_EXTS = frozenset({'.mp3', '.m4a'})
+_TELEGRAM_VOICE_EXTS = frozenset({'.ogg', '.opus'})
+
+
+def _platform_name(platform) -> str:
+    """Normalize a Platform enum / raw string into a lowercase name."""
+    value = getattr(platform, "value", platform)
+    return str(value or "").lower()
+
+
+def should_send_media_as_audio(platform, ext: str, is_voice: bool = False) -> bool:
+    """Return True when a media file should use the platform's audio sender.
+
+    Other platforms: every recognized audio extension routes through the
+    audio sender.
+
+    Telegram: the Bot API only accepts MP3/M4A for sendAudio and
+    Opus/OGG for sendVoice. Opus/OGG is only routed as audio when the
+    caller flagged ``is_voice=True`` (so we don't turn a regular audio
+    attachment into a voice bubble just because the file happens to be
+    Opus). Everything else falls through to document delivery by
+    returning ``False``.
+    """
+    normalized_ext = (ext or "").lower()
+    if normalized_ext not in _AUDIO_EXTS:
+        return False
+    if _platform_name(platform) == "telegram":
+        if normalized_ext in _TELEGRAM_VOICE_EXTS:
+            return is_voice
+        return normalized_ext in _TELEGRAM_AUDIO_ATTACHMENT_EXTS
+    return True
+

 def utf16_len(s: str) -> int:
    """Count UTF-16 code units in *s*.
@@ -307,9 +346,14 @@ def proxy_kwargs_for_aiohttp(proxy_url: str | None) -> tuple[dict, dict]:
    """Build kwargs for standalone ``aiohttp.ClientSession`` with proxy.

    Returns ``(session_kwargs, request_kwargs)`` where:
-      - SOCKS → ``({"connector": ProxyConnector(...)}, {})``
-      - HTTP  → ``({}, {"proxy": url})``
-      - None  → ``({}, {})``
+      - With aiohttp-socks → ``({"connector": ProxyConnector(...)}, {})``
+        for *all* proxy schemes (SOCKS **and** HTTP/HTTPS).
+      - HTTP without aiohttp-socks → ``({}, {"proxy": url})``.
+      - None → ``({}, {})``.
+
+    Prefer the connector path: it works transparently with libraries
+    (like mautrix) that call ``session.request()`` without forwarding
+    per-request ``proxy=`` kwargs.

    Usage::

@@ -320,20 +364,20 @@ def proxy_kwargs_for_aiohttp(proxy_url: str | None) -> tuple[dict, dict]:
    """
    if not proxy_url:
        return {}, {}
-    if proxy_url.lower().startswith("socks"):
-        try:
-            from aiohttp_socks import ProxyConnector
+    try:
+        from aiohttp_socks import ProxyConnector

-            connector = ProxyConnector.from_url(proxy_url, rdns=True)
-            return {"connector": connector}, {}
-        except ImportError:
+        connector = ProxyConnector.from_url(proxy_url, rdns=True)
+        return {"connector": connector}, {}
+    except ImportError:
+        if proxy_url.lower().startswith("socks"):
            logger.warning(
                "aiohttp_socks not installed — SOCKS proxy %s ignored. "
                "Run: pip install aiohttp-socks",
                proxy_url,
            )
            return {}, {}
-    return {}, {"proxy": proxy_url}
+        return {}, {"proxy": proxy_url}


 def is_host_excluded_by_no_proxy(hostname: str, no_proxy_value: str | None = None) -> bool:
@@ -902,6 +946,41 @@ class MessageEvent:
        return args


+_PLAINTEXT_GATEWAY_RESTART_PATTERNS: tuple[re.Pattern[str], ...] = (
+    re.compile(r"^(?:please\s+)?restart\s+(?:the\s+)?gateway[.!?\s]*$", re.IGNORECASE),
+    re.compile(r"^(?:please\s+)?restart\s+(?:the\s+)?hermes\s+gateway[.!?\s]*$", re.IGNORECASE),
+    re.compile(r"^(?:please\s+)?restart\s+hermes[.!?\s]*$", re.IGNORECASE),
+)
+
+
+def coerce_plaintext_gateway_command(event: "MessageEvent") -> None:
+    """Rewrite a tiny set of DM plaintext admin phrases into slash commands.
+
+    This keeps high-impact operational phrases like ``restart gateway`` out of
+    the LLM/tool path, where they can trigger a self-restart from inside the
+    currently running agent and leave the gateway stuck in ``draining`` while it
+    waits for that same agent to finish.
+
+    Scope is intentionally narrow: DM text messages only, exact restart-style
+    phrases only. Group chats keep natural-language semantics.
+    """
+    try:
+        if event is None or event.message_type != MessageType.TEXT:
+            return
+        text = (event.text or "").strip()
+        if not text or text.startswith("/"):
+            return
+        source = getattr(event, "source", None)
+        if getattr(source, "chat_type", None) != "dm":
+            return
+        for pattern in _PLAINTEXT_GATEWAY_RESTART_PATTERNS:
+            if pattern.match(text):
+                event.text = "/restart"
+                return
+    except Exception:
+        return
+
+
@dataclass 
 class SendResult:
    """Result of sending a message."""
@@ -1375,6 +1454,41 @@ class BasePlatformAdapter(ABC):
        """
        return False

+    async def send_slash_confirm(
+        self,
+        chat_id: str,
+        title: str,
+        message: str,
+        session_key: str,
+        confirm_id: str,
+        metadata: Optional[Dict[str, Any]] = None,
+    ) -> SendResult:
+        """Send a three-option slash-command confirmation prompt.
+
+        Used by the gateway's generic slash-confirm primitive (see
+        ``GatewayRunner._request_slash_confirm``) for commands that have a
+        non-destructive but expensive side effect the user should explicitly
+        acknowledge — the current caller is ``/reload-mcp``, which
+        invalidates the provider prompt cache.
+
+        Platforms with inline-button support (Telegram, Discord, Slack,
+        Matrix, Feishu) should override this to render three buttons:
+        Approve Once / Always Approve / Cancel.  Button callbacks MUST be
+        routed back through the gateway by calling
+        ``GatewayRunner._resolve_slash_confirm(confirm_id, choice)`` where
+        ``choice`` is ``"once"`` / ``"always"`` / ``"cancel"``.
+
+        Platforms without button UIs leave this as the default and fall
+        through to the gateway's text fallback (which sends ``message`` as
+        plain text and intercepts the next ``/approve`` / ``/always`` /
+        ``/cancel`` reply).
+
+        ``confirm_id`` is a short string generated by the gateway; the
+        adapter stores it alongside any platform-specific state needed to
+        route the callback (e.g. Telegram's ``_approval_state`` dict).
+        """
+        return SendResult(success=False, error="Not supported")
+
    async def send_typing(self, chat_id: str, metadata=None) -> None:
        """
        Send a typing indicator.
@@ -1391,7 +1505,64 @@ class BasePlatformAdapter(ABC):
        Default is a no-op for platforms with one-shot typing indicators.
        """
        pass
-    
+
+    async def send_multiple_images(
+        self,
+        chat_id: str,
+        images: List[Tuple[str, str]],
+        metadata: Optional[Dict[str, Any]] = None,
+        human_delay: float = 0.0,
+    ) -> None:
+        """Send a batch of images.
+
+        Accepts ``http(s)://``, ``file://`` URIs in the first tuple
+        element.
+
+        Default implementation sends each item individually,
+        routing animated GIFs through ``send_animation`` and local
+        files through ``send_image_file``.
+
+        Override in subclasses to bundle into a single native API call
+        (e.g. Signal's multi-attachment RPC)
+        """
+        from urllib.parse import unquote as _unquote
+
+        for image_url, alt_text in images:
+            if human_delay > 0:
+                await asyncio.sleep(human_delay)
+            try:
+                logger.info(
+                    "[%s] Sending image: %s (alt=%s)",
+                    self.name,
+                    safe_url_for_log(image_url),
+                    alt_text[:30] if alt_text else "",
+                )
+                if image_url.startswith("file://"):
+                    img_result = await self.send_image_file(
+                        chat_id=chat_id,
+                        image_path=_unquote(image_url[7:]),
+                        caption=alt_text if alt_text else None,
+                        metadata=metadata,
+                    )
+                elif self._is_animation_url(image_url):
+                    img_result = await self.send_animation(
+                        chat_id=chat_id,
+                        animation_url=image_url,
+                        caption=alt_text if alt_text else None,
+                        metadata=metadata,
+                    )
+                else:
+                    img_result = await self.send_image(
+                        chat_id=chat_id,
+                        image_url=image_url,
+                        caption=alt_text if alt_text else None,
+                        metadata=metadata,
+                    )
+                if not img_result.success:
+                    logger.error("[%s] Failed to send image: %s", self.name, img_result.error)
+            except Exception as img_err:
+                logger.error("[%s] Error sending image: %s", self.name, img_err, exc_info=True)
+
    async def send_image(
        self,
        chat_id: str,
@@ -1600,7 +1771,7 @@ class BasePlatformAdapter(ABC):
        # Extract MEDIA:<path> tags, allowing optional whitespace after the colon
        # and quoted/backticked paths for LLM-formatted outputs.
        media_pattern = re.compile(
-            r'''[`"']?MEDIA:\s*(?P<path>`[^`\n]+`|"[^"\n]+"|'[^'\n]+'|(?:~/|/)\S+(?:[^\S\n]+\S+)*?\.(?:png|jpe?g|gif|webp|mp4|mov|avi|mkv|webm|ogg|opus|mp3|wav|m4a|epub|pdf|zip|rar|7z|docx?|xlsx?|pptx?|txt|csv|apk|ipa)(?=[\s`"',;:)\]}]|$)|\S+)[`"']?'''
+            r'''[`"']?MEDIA:\s*(?P<path>`[^`\n]+`|"[^"\n]+"|'[^'\n]+'|(?:~/|/)\S+(?:[^\S\n]+\S+)*?\.(?:png|jpe?g|gif|webp|mp4|mov|avi|mkv|webm|ogg|opus|mp3|wav|m4a|flac|epub|pdf|zip|rar|7z|docx?|xlsx?|pptx?|txt|csv|apk|ipa)(?=[\s`"',;:)\]}]|$)|\S+)[`"']?'''
        )
        for match in media_pattern.finditer(content):
            path = match.group("path").strip()
@@ -1740,11 +1911,19 @@ class BasePlatformAdapter(ABC):
                if stop_event is None:
                    await asyncio.sleep(interval)
                    continue
-                try:
-                    await asyncio.wait_for(stop_event.wait(), timeout=interval)
-                except asyncio.TimeoutError:
-                    continue
-                return
+                loop = asyncio.get_running_loop()
+                deadline = loop.time() + interval
+                while not stop_event.is_set():
+                    remaining = deadline - loop.time()
+                    if remaining <= 0:
+                        break
+                    # Poll instead of wait_for(stop_event.wait()).  Cancelling
+                    # wait_for while it owns the inner Event.wait task can leave
+                    # shutdown paths stuck awaiting the typing task on Python
+                    # 3.11/pytest-asyncio; sleep cancellation is immediate.
+                    await asyncio.sleep(min(0.25, remaining))
+                if stop_event.is_set():
+                    return
        except asyncio.CancelledError:
            pass  # Normal cancellation when handler completes
        finally:
@@ -2077,6 +2256,12 @@ class BasePlatformAdapter(ABC):
        ``release_guard=False`` keeps the adapter-level session guard in place
        so reset-like commands can finish atomically before follow-up messages
        are allowed to start a fresh background task.
+
+        Bounded by a 5s timeout so a wedged finally block in the cancelled
+        task (typing-task cleanup, on_processing_complete hook, etc.) can't
+        stall the calling dispatch coroutine — particularly under pytest-
+        asyncio where the event loop's cancellation-propagation semantics
+        differ subtly from a bare ``asyncio.run`` harness.
        """
        task = self._session_tasks.pop(session_key, None)
        if task is not None and not task.done():
@@ -2088,9 +2273,15 @@ class BasePlatformAdapter(ABC):
            self._expected_cancelled_tasks.add(task)
            task.cancel()
            try:
-                await task
+                await asyncio.wait_for(asyncio.shield(task), timeout=5.0)
            except asyncio.CancelledError:
                pass
+            except asyncio.TimeoutError:
+                logger.warning(
+                    "[%s] Cancelled task for %s did not exit within 5s; "
+                    "unblocking dispatch and letting the task unwind in the background",
+                    self.name, session_key,
+                )
            except Exception:
                logger.debug(
                    "[%s] Session cancellation raised while unwinding %s",
@@ -2188,6 +2379,8 @@ class BasePlatformAdapter(ABC):
        """
        if not self._message_handler:
            return
+
+        coerce_plaintext_gateway_command(event)
        
        session_key = build_session_key(
            event.source,
@@ -2340,6 +2533,16 @@ class BasePlatformAdapter(ABC):
                **_keep_typing_kwargs,
            )
        )
+
+        async def _stop_typing_task() -> None:
+            typing_task.cancel()
+            try:
+                await asyncio.wait_for(asyncio.shield(typing_task), timeout=0.5)
+            except (asyncio.CancelledError, asyncio.TimeoutError):
+                # Cancellation cleanup must not block adapter shutdown.  The
+                # typing task is already cancelled; if the parent task is also
+                # cancelling, let this message-processing task unwind now.
+                pass
        
        try:
            await self._run_processing_hook("on_processing_start", event)
@@ -2441,47 +2644,57 @@ class BasePlatformAdapter(ABC):
                # Send extracted images as native attachments
                if images:
                    logger.info("[%s] Extracted %d image(s) to send as attachments", self.name, len(images))
-                for image_url, alt_text in images:
-                    if human_delay > 0:
-                        await asyncio.sleep(human_delay)
                    try:
-                        logger.info(
-                            "[%s] Sending image: %s (alt=%s)",
-                            self.name,
-                            safe_url_for_log(image_url),
-                            alt_text[:30] if alt_text else "",
+                        await self.send_multiple_images(
+                            chat_id=event.source.chat_id,
+                            images=images,
+                            metadata=_thread_metadata,
+                            human_delay=human_delay,
                        )
-                        # Route animated GIFs through send_animation for proper playback
-                        if self._is_animation_url(image_url):
-                            img_result = await self.send_animation(
-                                chat_id=event.source.chat_id,
-                                animation_url=image_url,
-                                caption=alt_text if alt_text else None,
-                                metadata=_thread_metadata,
-                            )
-                        else:
-                            img_result = await self.send_image(
-                                chat_id=event.source.chat_id,
-                                image_url=image_url,
-                                caption=alt_text if alt_text else None,
-                                metadata=_thread_metadata,
-                            )
-                        if not img_result.success:
-                            logger.error("[%s] Failed to send image: %s", self.name, img_result.error)
-                    except Exception as img_err:
-                        logger.error("[%s] Error sending image: %s", self.name, img_err, exc_info=True)
+                    except Exception as batch_err:
+                        logger.warning("[%s] Error batching images: %s", self.name, batch_err, exc_info=True)
+

                # Send extracted media files — route by file type
-                _AUDIO_EXTS = {'.ogg', '.opus', '.mp3', '.wav', '.m4a'}
                _VIDEO_EXTS = {'.mp4', '.mov', '.avi', '.mkv', '.webm', '.3gp'}
                _IMAGE_EXTS = {'.jpg', '.jpeg', '.png', '.webp', '.gif'}

+                # Partition images out of media_files + local_files so they
+                # can be sent as a single batch (Signal RPC)
+                from urllib.parse import quote as _quote
+                _image_paths: list = []
+                _non_image_media: list = []
                for media_path, is_voice in media_files:
+                    _ext = Path(media_path).suffix.lower()
+                    if _ext in _IMAGE_EXTS and not is_voice:
+                        _image_paths.append(media_path)
+                    else:
+                        _non_image_media.append((media_path, is_voice))
+                _non_image_local: list = []
+                for file_path in local_files:
+                    if Path(file_path).suffix.lower() in _IMAGE_EXTS:
+                        _image_paths.append(file_path)
+                    else:
+                        _non_image_local.append(file_path)
+
+                if _image_paths:
+                    try:
+                        _batch = [(f"file://{_quote(p)}", "") for p in _image_paths]
+                        await self.send_multiple_images(
+                            chat_id=event.source.chat_id,
+                            images=_batch,
+                            metadata=_thread_metadata,
+                            human_delay=human_delay,
+                        )
+                    except Exception as batch_err:
+                        logger.warning("[%s] Error batching images: %s", self.name, batch_err, exc_info=True)
+
+                for media_path, is_voice in _non_image_media:
                    if human_delay > 0:
                        await asyncio.sleep(human_delay)
                    try:
                        ext = Path(media_path).suffix.lower()
-                        if ext in _AUDIO_EXTS:
+                        if should_send_media_as_audio(self.platform, ext, is_voice=is_voice):
                            media_result = await self.send_voice(
                                chat_id=event.source.chat_id,
                                audio_path=media_path,
@@ -2493,12 +2706,6 @@ class BasePlatformAdapter(ABC):
                                video_path=media_path,
                                metadata=_thread_metadata,
                            )
-                        elif ext in _IMAGE_EXTS:
-                            media_result = await self.send_image_file(
-                                chat_id=event.source.chat_id,
-                                image_path=media_path,
-                                metadata=_thread_metadata,
-                            )
                        else:
                            media_result = await self.send_document(
                                chat_id=event.source.chat_id,
@@ -2511,19 +2718,13 @@ class BasePlatformAdapter(ABC):
                    except Exception as media_err:
                        logger.warning("[%s] Error sending media: %s", self.name, media_err)

-                # Send auto-detected local files as native attachments
-                for file_path in local_files:
+                # Send auto-detected local non-image files as native attachments
+                for file_path in _non_image_local:
                    if human_delay > 0:
                        await asyncio.sleep(human_delay)
                    try:
                        ext = Path(file_path).suffix.lower()
-                        if ext in _IMAGE_EXTS:
-                            await self.send_image_file(
-                                chat_id=event.source.chat_id,
-                                image_path=file_path,
-                                metadata=_thread_metadata,
-                            )
-                        elif ext in _VIDEO_EXTS:
+                        if ext in _VIDEO_EXTS:
                            await self.send_video(
                                chat_id=event.source.chat_id,
                                video_path=file_path,
@@ -2562,14 +2763,28 @@ class BasePlatformAdapter(ABC):
                _active = self._active_sessions.get(session_key)
                if _active is not None:
                    _active.clear()
-                typing_task.cancel()
+                await _stop_typing_task()
+                # Spawn a fresh task for the pending message instead of
+                # recursing.  Issue #17758: `await
+                # self._process_message_background(...)` here grew the
+                # call stack one frame per chained follow-up, and under
+                # sustained pending-queue activity the C stack would
+                # exhaust at ~2000 frames and SIGSEGV the process.
+                # Mirror the late-arrival drain pattern below: hand off
+                # to a new task and return so this frame can unwind.
+                drain_task = asyncio.create_task(
+                    self._process_message_background(pending_event, session_key)
+                )
+                # Hand ownership of the session to the drain task so
+                # stale-lock detection keeps working while it runs.
+                self._session_tasks[session_key] = drain_task
                try:
-                    await typing_task
-                except asyncio.CancelledError:
+                    self._background_tasks.add(drain_task)
+                    drain_task.add_done_callback(self._background_tasks.discard)
+                except TypeError:
+                    # Tests stub create_task() with non-hashable sentinels; tolerate.
                    pass
-                # Process pending message in new background task
-                await self._process_message_background(pending_event, session_key)
-                return  # Already cleaned up
+                return  # Drain task owns the session now.
                
        except asyncio.CancelledError:
            current_task = asyncio.current_task()
@@ -2614,11 +2829,7 @@ class BasePlatformAdapter(ABC):
                except Exception:
                    pass
            # Stop typing indicator
-            typing_task.cancel()
-            try:
-                await typing_task
-            except asyncio.CancelledError:
-                pass
+            await _stop_typing_task()
            # Also cancel any platform-level persistent typing tasks (e.g. Discord)
            # that may have been recreated by _keep_typing after the last stop_typing()
            try:
@@ -2635,25 +2846,41 @@ class BasePlatformAdapter(ABC):
            # dropped (user never gets a reply).
            late_pending = self._pending_messages.pop(session_key, None)
            if late_pending is not None:
-                logger.debug(
-                    "[%s] Late-arrival pending message during cleanup — spawning drain task",
-                    self.name,
-                )
-                _active = self._active_sessions.get(session_key)
-                if _active is not None:
-                    _active.clear()
-                drain_task = asyncio.create_task(
-                    self._process_message_background(late_pending, session_key)
-                )
-                # Hand ownership of the session to the drain task so stale-lock
-                # detection keeps working while it runs.
-                self._session_tasks[session_key] = drain_task
-                try:
-                    self._background_tasks.add(drain_task)
-                    drain_task.add_done_callback(self._background_tasks.discard)
-                except TypeError:
-                    # Tests stub create_task() with non-hashable sentinels; tolerate.
-                    pass
+                current_task = asyncio.current_task()
+                existing_task = self._session_tasks.get(session_key)
+                if (
+                    existing_task is not None
+                    and existing_task is not current_task
+                ):
+                    # The in-band drain (or an earlier late-arrival drain)
+                    # already spawned a follow-up task that owns this
+                    # session.  Re-queue the late-arrival event so that
+                    # task picks it up — avoids spawning two concurrent
+                    # _process_message_background tasks for the same key
+                    # (#17758 follow-up: prevents the create_task path
+                    # from racing with itself across the in-band/finally
+                    # boundary).
+                    self._pending_messages[session_key] = late_pending
+                else:
+                    logger.debug(
+                        "[%s] Late-arrival pending message during cleanup — spawning drain task",
+                        self.name,
+                    )
+                    _active = self._active_sessions.get(session_key)
+                    if _active is not None:
+                        _active.clear()
+                    drain_task = asyncio.create_task(
+                        self._process_message_background(late_pending, session_key)
+                    )
+                    # Hand ownership of the session to the drain task so stale-lock
+                    # detection keeps working while it runs.
+                    self._session_tasks[session_key] = drain_task
+                    try:
+                        self._background_tasks.add(drain_task)
+                        drain_task.add_done_callback(self._background_tasks.discard)
+                    except TypeError:
+                        # Tests stub create_task() with non-hashable sentinels; tolerate.
+                        pass
                # Leave _active_sessions[session_key] populated — the drain
                # task's own lifecycle will clean it up.
            else:
@@ -2661,16 +2888,34 @@ class BasePlatformAdapter(ABC):
                # reset-like command that already swapped in its own
                # command_guard (and cancelled us) can't be accidentally
                # cleared by our unwind.  The command owns the session now.
+                #
+                # The owner-check also covers the in-band drain handoff
+                # above: when we spawned a drain_task and transferred
+                # ownership via ``_session_tasks[session_key] = drain_task``,
+                # ``_session_tasks.get(session_key) is current_task`` is
+                # False, so we leave _active_sessions populated.  Without
+                # this guard, the drain task picks up the same
+                # interrupt_event in its own _process_message_background
+                # entry, _release_session_guard's guard-match succeeds,
+                # and we'd delete the entry while the drain task is still
+                # running — letting a concurrent inbound message pass
+                # the Level-1 guard and spawn a second handler for the
+                # same session.
                current_task = asyncio.current_task()
                if current_task is not None and self._session_tasks.get(session_key) is current_task:
                    del self._session_tasks[session_key]
-                self._release_session_guard(session_key, guard=interrupt_event)
+                    self._release_session_guard(session_key, guard=interrupt_event)
    
    async def cancel_background_tasks(self) -> None:
        """Cancel any in-flight background message-processing tasks.

        Used during gateway shutdown/replacement so active sessions from the old
        process do not keep running after adapters are being torn down.
+
+        Each cancelled task is awaited with a 5s bound so a wedged finally
+        (typing-task cleanup, on_processing_complete hook) can't stall the
+        whole shutdown path.  Stragglers are released from our tracking and
+        allowed to finish unwinding on their own.
        """
        # Loop until no new tasks appear.  Without this, a message
        # arriving during the `await asyncio.gather` below would spawn
@@ -2689,7 +2934,21 @@ class BasePlatformAdapter(ABC):
            for task in tasks:
                self._expected_cancelled_tasks.add(task)
                task.cancel()
-            await asyncio.gather(*tasks, return_exceptions=True)
+            try:
+                await asyncio.wait_for(
+                    asyncio.gather(
+                        *(asyncio.shield(t) for t in tasks),
+                        return_exceptions=True,
+                    ),
+                    timeout=5.0,
+                )
+            except asyncio.TimeoutError:
+                logger.warning(
+                    "[%s] %d background task(s) did not exit within 5s; "
+                    "releasing tracking and letting them unwind in the background",
+                    self.name, len([t for t in tasks if not t.done()]),
+                )
+                break
            # Loop: late-arrival tasks spawned during the gather above
            # will be in self._background_tasks now.  Re-check.
        self._background_tasks.clear()
@@ -18,7 +18,7 @@ import tempfile
 import threading
 import time
 from collections import defaultdict
-from typing import Callable, Dict, Optional, Any
+from typing import Callable, Dict, List, Optional, Any, Tuple

 logger = logging.getLogger(__name__)

@@ -305,7 +305,7 @@ class VoiceReceiver:
        encrypted = bytes(payload_with_nonce[:-4])

        try:
-            import nacl.secret  # noqa: delayed import – only in voice path
+            import nacl.secret  # noqa: E402 — delayed import, only in voice path
            box = nacl.secret.Aead(self._secret_key)
            decrypted = box.decrypt(encrypted, header, bytes(nonce))
        except Exception as e:
@@ -813,7 +813,14 @@ class DiscordAdapter(BasePlatformAdapter):
                logger.info("[%s] Synced %d slash command(s) via bulk tree sync", self.name, len(synced))
                return

-            summary = await asyncio.wait_for(self._safe_sync_slash_commands(), timeout=30)
+            # Discord's per-app command-management bucket is ~5 writes / 20 s,
+            # so a mass-prune-plus-upsert reconcile (e.g. 77 orphans + 30
+            # desired = 107 writes) takes several minutes of forced waits.
+            # A flat 30 s budget blew up reliably under bucket pressure and
+            # left slash commands broken for ~60 min until the bucket fully
+            # recovered. Use a wide ceiling; the cap still guards against a
+            # true hang. (#16713)
+            summary = await asyncio.wait_for(self._safe_sync_slash_commands(), timeout=600)
            logger.info(
                "[%s] Safely reconciled %d slash command(s): unchanged=%d updated=%d recreated=%d created=%d deleted=%d",
                self.name,
@@ -825,7 +832,11 @@ class DiscordAdapter(BasePlatformAdapter):
                summary["deleted"],
            )
        except asyncio.TimeoutError:
-            logger.warning("[%s] Slash command sync timed out after 30s", self.name)
+            logger.warning(
+                "[%s] Slash command sync timed out — Discord rate-limit bucket "
+                "may be saturated; will retry on next reconnect",
+                self.name,
+            )
        except asyncio.CancelledError:
            raise
        except Exception as e:  # pragma: no cover - defensive logging
@@ -1332,6 +1343,134 @@ class DiscordAdapter(BasePlatformAdapter):
            msg = await channel.send(content=caption if caption else None, file=file)
        return SendResult(success=True, message_id=str(msg.id))

+    async def send_multiple_images(
+        self,
+        chat_id: str,
+        images: List[Tuple[str, str]],
+        metadata: Optional[Dict[str, Any]] = None,
+        human_delay: float = 0.0,
+    ) -> None:
+        """Send a batch of images as a single Discord message with multiple attachments.
+
+        Discord permits up to 10 file attachments per message. Batches are
+        chunked accordingly. URL images are downloaded into memory and
+        uploaded as inline attachments (same pattern as ``send_image`` so
+        they render inline, not as bare links). Local files are opened
+        directly. On per-chunk failure the remaining images in that chunk
+        fall back to the base per-image loop.
+        """
+        if not self._client:
+            return
+        if not images:
+            return
+
+        try:
+            import discord as _discord_mod
+            import io as _io
+            from urllib.parse import unquote as _unquote
+        except Exception:  # pragma: no cover
+            await super().send_multiple_images(chat_id, images, metadata, human_delay)
+            return
+
+        try:
+            channel = self._client.get_channel(int(chat_id))
+            if not channel:
+                channel = await self._client.fetch_channel(int(chat_id))
+            if not channel:
+                logger.warning("[%s] Channel %s not found for multi-image send", self.name, chat_id)
+                return
+        except Exception as e:
+            logger.warning("[%s] Failed to resolve channel for multi-image send: %s", self.name, e)
+            await super().send_multiple_images(chat_id, images, metadata, human_delay)
+            return
+
+        CHUNK = 10
+        chunks = [images[i:i + CHUNK] for i in range(0, len(images), CHUNK)]
+
+        for chunk_idx, chunk in enumerate(chunks):
+            if human_delay > 0 and chunk_idx > 0:
+                await asyncio.sleep(human_delay)
+
+            files: List[Any] = []
+            captions: List[str] = []
+            aiohttp_session = None
+            try:
+                for image_url, alt_text in chunk:
+                    if alt_text:
+                        captions.append(alt_text)
+                    if image_url.startswith("file://"):
+                        local_path = _unquote(image_url[7:])
+                        if not os.path.exists(local_path):
+                            logger.warning("[%s] Skipping missing image: %s", self.name, local_path)
+                            continue
+                        files.append(_discord_mod.File(local_path, filename=os.path.basename(local_path)))
+                    else:
+                        if not is_safe_url(image_url):
+                            logger.warning("[%s] Blocked unsafe image URL in batch", self.name)
+                            continue
+                        # Download to BytesIO so it renders inline
+                        try:
+                            import aiohttp as _aiohttp
+                            from gateway.platforms.base import resolve_proxy_url, proxy_kwargs_for_aiohttp
+                            _proxy = resolve_proxy_url(platform_env_var="DISCORD_PROXY")
+                            _sess_kw, _req_kw = proxy_kwargs_for_aiohttp(_proxy)
+                            if aiohttp_session is None:
+                                aiohttp_session = _aiohttp.ClientSession(**_sess_kw)
+                            async with aiohttp_session.get(
+                                image_url, timeout=_aiohttp.ClientTimeout(total=30), **_req_kw,
+                            ) as resp:
+                                if resp.status != 200:
+                                    logger.warning(
+                                        "[%s] Failed to download image (HTTP %d) in batch: %s",
+                                        self.name, resp.status, image_url[:80],
+                                    )
+                                    continue
+                                data = await resp.read()
+                                ct = resp.headers.get("content-type", "image/png")
+                                ext = "png"
+                                if "jpeg" in ct or "jpg" in ct:
+                                    ext = "jpg"
+                                elif "gif" in ct:
+                                    ext = "gif"
+                                elif "webp" in ct:
+                                    ext = "webp"
+                                files.append(_discord_mod.File(_io.BytesIO(data), filename=f"image_{len(files)}.{ext}"))
+                        except Exception as dl_err:
+                            logger.warning("[%s] Download failed for %s: %s", self.name, image_url[:80], dl_err)
+                            continue
+
+                if not files:
+                    continue
+
+                # Use the first caption if any (Discord only has one message body for the group)
+                content = captions[0] if captions else None
+                logger.info(
+                    "[%s] Sending %d image(s) as single Discord message (chunk %d/%d)",
+                    self.name, len(files), chunk_idx + 1, len(chunks),
+                )
+
+                if self._is_forum_parent(channel):
+                    await self._forum_post_file(
+                        channel,
+                        content=(content or "").strip(),
+                        files=files,
+                    )
+                else:
+                    await channel.send(content=content, files=files)
+            except Exception as e:
+                logger.warning(
+                    "[%s] Multi-image Discord send failed (chunk %d/%d), falling back to per-image: %s",
+                    self.name, chunk_idx + 1, len(chunks), e,
+                    exc_info=True,
+                )
+                await super().send_multiple_images(chat_id, chunk, metadata, human_delay=human_delay)
+            finally:
+                if aiohttp_session is not None:
+                    try:
+                        await aiohttp_session.close()
+                    except Exception:
+                        pass
+
    async def play_tts(
        self,
        chat_id: str,
@@ -2259,6 +2398,10 @@ class DiscordAdapter(BasePlatformAdapter):
        async def slash_reload_mcp(interaction: discord.Interaction):
            await self._run_simple_slash(interaction, "/reload-mcp")

+        @tree.command(name="reload-skills", description="Re-scan ~/.hermes/skills/ for new or removed skills")
+        async def slash_reload_skills(interaction: discord.Interaction):
+            await self._run_simple_slash(interaction, "/reload-skills")
+
        @tree.command(name="voice", description="Toggle voice reply mode")
        @discord.app_commands.describe(mode="Voice mode: on, off, tts, channel, leave, or status")
        @discord.app_commands.choices(mode=[
@@ -2895,6 +3038,43 @@ class DiscordAdapter(BasePlatformAdapter):
        except Exception as e:
            return SendResult(success=False, error=str(e))

+    async def send_slash_confirm(
+        self, chat_id: str, title: str, message: str, session_key: str,
+        confirm_id: str, metadata: Optional[dict] = None,
+    ) -> SendResult:
+        """Send a three-button slash-command confirmation prompt."""
+        if not self._client or not DISCORD_AVAILABLE:
+            return SendResult(success=False, error="Not connected")
+
+        try:
+            target_id = chat_id
+            if metadata and metadata.get("thread_id"):
+                target_id = metadata["thread_id"]
+
+            channel = self._client.get_channel(int(target_id))
+            if not channel:
+                channel = await self._client.fetch_channel(int(target_id))
+
+            # Embed description limit is 4096; message usually fits easily.
+            max_desc = 4088
+            body = message if len(message) <= max_desc else message[: max_desc - 3] + "..."
+            embed = discord.Embed(
+                title=title or "Confirm",
+                description=body,
+                color=discord.Color.orange(),
+            )
+
+            view = SlashConfirmView(
+                session_key=session_key,
+                confirm_id=confirm_id,
+                allowed_user_ids=self._allowed_user_ids,
+            )
+
+            msg = await channel.send(embed=embed, view=view)
+            return SendResult(success=True, message_id=str(msg.id))
+        except Exception as e:
+            return SendResult(success=False, error=str(e))
+
    async def send_update_prompt(
        self, chat_id: str, prompt: str, default: str = "",
        session_key: str = "",
@@ -3628,6 +3808,103 @@ if DISCORD_AVAILABLE:
            for child in self.children:
                child.disabled = True

+    class SlashConfirmView(discord.ui.View):
+        """Three-button view for generic slash-command confirmations.
+
+        Used by ``/reload-mcp`` and any future slash command routed through
+        ``GatewayRunner._request_slash_confirm``.  Buttons map to the
+        gateway's three choices:
+
+          * "Approve Once"   → ``choice="once"``
+          * "Always Approve" → ``choice="always"``
+          * "Cancel"         → ``choice="cancel"``
+
+        Clicking calls the module-level
+        ``tools.slash_confirm.resolve(session_key, confirm_id, choice)``
+        which runs the handler the runner stored for this ``session_key``.
+        Only users in the adapter's allowlist can click.  Times out after
+        5 minutes (matches the gateway primitive's timeout).
+        """
+
+        def __init__(self, session_key: str, confirm_id: str, allowed_user_ids: set):
+            super().__init__(timeout=300)
+            self.session_key = session_key
+            self.confirm_id = confirm_id
+            self.allowed_user_ids = allowed_user_ids
+            self.resolved = False
+
+        def _check_auth(self, interaction: discord.Interaction) -> bool:
+            if not self.allowed_user_ids:
+                return True
+            return str(interaction.user.id) in self.allowed_user_ids
+
+        async def _resolve(
+            self, interaction: discord.Interaction, choice: str,
+            color: discord.Color, label: str,
+        ):
+            if self.resolved:
+                await interaction.response.send_message(
+                    "This prompt has already been resolved~", ephemeral=True,
+                )
+                return
+            if not self._check_auth(interaction):
+                await interaction.response.send_message(
+                    "You're not authorized to answer this prompt~", ephemeral=True,
+                )
+                return
+
+            self.resolved = True
+
+            embed = interaction.message.embeds[0] if interaction.message.embeds else None
+            if embed:
+                embed.color = color
+                embed.set_footer(text=f"{label} by {interaction.user.display_name}")
+
+            for child in self.children:
+                child.disabled = True
+
+            await interaction.response.edit_message(embed=embed, view=self)
+
+            # Resolve via the module-level primitive.  If the handler
+            # returns a follow-up message, post it in the same channel.
+            try:
+                from tools import slash_confirm as _slash_confirm_mod
+                result_text = await _slash_confirm_mod.resolve(
+                    self.session_key, self.confirm_id, choice,
+                )
+                if result_text:
+                    await interaction.followup.send(result_text)
+                logger.info(
+                    "Discord button resolved slash-confirm for session %s "
+                    "(choice=%s, user=%s)",
+                    self.session_key, choice, interaction.user.display_name,
+                )
+            except Exception as exc:
+                logger.error("Discord slash-confirm resolve failed: %s", exc, exc_info=True)
+
+        @discord.ui.button(label="Approve Once", style=discord.ButtonStyle.green)
+        async def approve_once(
+            self, interaction: discord.Interaction, button: discord.ui.Button,
+        ):
+            await self._resolve(interaction, "once", discord.Color.green(), "Approved once")
+
+        @discord.ui.button(label="Always Approve", style=discord.ButtonStyle.blurple)
+        async def approve_always(
+            self, interaction: discord.Interaction, button: discord.ui.Button,
+        ):
+            await self._resolve(interaction, "always", discord.Color.purple(), "Always approved")
+
+        @discord.ui.button(label="Cancel", style=discord.ButtonStyle.red)
+        async def cancel(
+            self, interaction: discord.Interaction, button: discord.ui.Button,
+        ):
+            await self._resolve(interaction, "cancel", discord.Color.greyple(), "Cancelled")
+
+        async def on_timeout(self):
+            self.resolved = True
+            for child in self.children:
+                child.disabled = True
+
    class UpdatePromptView(discord.ui.View):
        """Interactive Yes/No buttons for ``hermes update`` prompts.

@@ -31,7 +31,7 @@ from email.mime.base import MIMEBase
 from email.utils import formatdate
 from email import encoders
 from pathlib import Path
-from typing import Any, Dict, List, Optional
+from typing import Any, Dict, List, Optional, Tuple

 from gateway.platforms.base import (
    BasePlatformAdapter,
@@ -540,6 +540,113 @@ class EmailAdapter(BasePlatformAdapter):
        text += f"\n\nImage: {image_url}"
        return await self.send(chat_id, text.strip(), reply_to)

+    async def send_multiple_images(
+        self,
+        chat_id: str,
+        images: List[Tuple[str, str]],
+        metadata: Optional[Dict[str, Any]] = None,
+        human_delay: float = 0.0,
+    ) -> None:
+        """Send a batch of images as a single email with multiple MIME attachments.
+
+        Local files are attached directly. URL images have their URL
+        appended to the body (email adapter does not download remote
+        images). No hard cap — email clients handle dozens of
+        attachments fine, subject to SMTP message size limits.
+        """
+        if not images:
+            return
+
+        from urllib.parse import unquote as _unquote
+
+        body_parts: List[str] = []
+        local_paths: List[str] = []
+        for image_url, alt_text in images:
+            if alt_text:
+                body_parts.append(alt_text)
+            if image_url.startswith("file://"):
+                local_path = _unquote(image_url[7:])
+                if Path(local_path).exists():
+                    local_paths.append(local_path)
+                else:
+                    logger.warning("[Email] Skipping missing image: %s", local_path)
+            else:
+                # Remote URLs just get linked in the body (parity with send_image)
+                body_parts.append(f"Image: {image_url}")
+
+        if not local_paths and not body_parts:
+            return
+
+        body = "\n\n".join(body_parts)
+
+        try:
+            loop = asyncio.get_running_loop()
+            await loop.run_in_executor(
+                None,
+                self._send_email_with_attachments,
+                chat_id,
+                body,
+                local_paths,
+            )
+        except Exception as e:
+            logger.error("[Email] Multi-image send failed, falling back: %s", e, exc_info=True)
+            await super().send_multiple_images(chat_id, images, metadata, human_delay)
+
+    def _send_email_with_attachments(
+        self,
+        to_addr: str,
+        body: str,
+        file_paths: List[str],
+    ) -> str:
+        """Send an email with multiple file attachments via SMTP."""
+        msg = MIMEMultipart()
+        msg["From"] = self._address
+        msg["To"] = to_addr
+
+        ctx = self._thread_context.get(to_addr, {})
+        subject = ctx.get("subject", "Hermes Agent")
+        if not subject.startswith("Re:"):
+            subject = f"Re: {subject}"
+        msg["Subject"] = subject
+
+        original_msg_id = ctx.get("message_id")
+        if original_msg_id:
+            msg["In-Reply-To"] = original_msg_id
+            msg["References"] = original_msg_id
+
+        msg["Date"] = formatdate(localtime=True)
+        msg_id = f"<hermes-{uuid.uuid4().hex[:12]}@{self._address.split('@')[1]}>"
+        msg["Message-ID"] = msg_id
+
+        if body:
+            msg.attach(MIMEText(body, "plain", "utf-8"))
+
+        for file_path in file_paths:
+            p = Path(file_path)
+            try:
+                with open(p, "rb") as f:
+                    part = MIMEBase("application", "octet-stream")
+                    part.set_payload(f.read())
+                    encoders.encode_base64(part)
+                    part.add_header("Content-Disposition", f"attachment; filename={p.name}")
+                    msg.attach(part)
+            except Exception as e:
+                logger.warning("[Email] Failed to attach %s: %s", file_path, e)
+
+        smtp = smtplib.SMTP(self._smtp_host, self._smtp_port, timeout=30)
+        try:
+            smtp.starttls(context=ssl.create_default_context())
+            smtp.login(self._address, self._password)
+            smtp.send_message(msg)
+        finally:
+            try:
+                smtp.quit()
+            except Exception:
+                smtp.close()
+
+        logger.info("[Email] Sent multi-attachment email to %s (%d files)", to_addr, len(file_paths))
+        return msg_id
+
    async def send_document(
        self,
        chat_id: str,
@@ -974,7 +974,6 @@ def build_whole_comment_prompt(

 def _resolve_model_and_runtime() -> Tuple[str, dict]:
    """Resolve model and provider credentials, same as gateway message handling."""
-    import os
    from gateway.run import _load_gateway_config, _resolve_gateway_model

    user_config = _load_gateway_config()
@@ -11,10 +11,10 @@ import logging
 import re
 import time
 from pathlib import Path
-from typing import TYPE_CHECKING, Dict, Optional
+from typing import TYPE_CHECKING, Dict

 if TYPE_CHECKING:
-    from gateway.platforms.base import BasePlatformAdapter, MessageEvent
+    from gateway.platforms.base import MessageEvent

 logger = logging.getLogger(__name__)

@@ -11,6 +11,7 @@ Environment variables:
    MATRIX_PASSWORD             Password (alternative to access token)
    MATRIX_ENCRYPTION           Set "true" to enable E2EE
    MATRIX_DEVICE_ID            Stable device ID for E2EE persistence across restarts
+    MATRIX_PROXY                HTTP(S) or SOCKS proxy URL for Matrix traffic
    MATRIX_ALLOWED_USERS    Comma-separated Matrix user IDs (@user:server)
    MATRIX_HOME_ROOM        Room ID for cron/notification delivery
    MATRIX_REACTIONS        Set "false" to disable processing lifecycle reactions
@@ -18,6 +19,7 @@ Environment variables:
    MATRIX_REQUIRE_MENTION      Require @mention in rooms (default: true)
    MATRIX_FREE_RESPONSE_ROOMS  Comma-separated room IDs exempt from mention requirement
    MATRIX_AUTO_THREAD          Auto-create threads for room messages (default: true)
+    MATRIX_DM_AUTO_THREAD       Auto-create threads for DM messages (default: false)
    MATRIX_RECOVERY_KEY         Recovery key for cross-signing verification after device key rotation
    MATRIX_DM_MENTION_THREADS   Create a thread when bot is @mentioned in a DM (default: false)
 """
@@ -30,6 +32,8 @@ import mimetypes
 import os
 import re
 import time
+from dataclasses import dataclass
+
 from html import escape as _html_escape
 from pathlib import Path
 from typing import Any, Dict, Optional, Set
@@ -95,11 +99,25 @@ from gateway.platforms.base import (
    MessageType,
    ProcessingOutcome,
    SendResult,
+    resolve_proxy_url,
+    proxy_kwargs_for_aiohttp,
 )
 from gateway.platforms.helpers import ThreadParticipationTracker

 logger = logging.getLogger(__name__)

+
+@dataclass
+class _MatrixApprovalPrompt:
+    """Tracks a pending Matrix reaction-based exec approval prompt."""
+
+    def __init__(self, session_key: str, chat_id: str, message_id: str, resolved: bool = False):
+        self.session_key = session_key
+        self.chat_id = chat_id
+        self.message_id = message_id
+        self.resolved = resolved
+        self.bot_reaction_events: dict[str, str] = {}  # emoji -> event_id
+
 # Matrix message size limit (4000 chars practical, spec has no hard limit
 # but clients render poorly above this).
 MAX_MESSAGE_LENGTH = 4000
@@ -114,11 +132,85 @@ _CRYPTO_DB_PATH = _STORE_DIR / "crypto.db"
 # Grace period: ignore messages older than this many seconds before startup.
 _STARTUP_GRACE_SECONDS = 5

+_OUTBOUND_MENTION_RE = re.compile(
+    r"(?<![\w/])(@[0-9A-Za-z._=/-]+:[0-9A-Za-z.-]+(?::\d+)?)"
+)

 _E2EE_INSTALL_HINT = (
    "Install with: pip install 'mautrix[encryption]'  (requires libolm C library)"
 )

+_MATRIX_IMAGE_FILENAME_EXTS = frozenset({
+    ".jpg",
+    ".jpeg",
+    ".png",
+    ".gif",
+    ".webp",
+    ".bmp",
+    ".svg",
+    ".heic",
+    ".heif",
+    ".avif",
+})
+
+
+def _looks_like_matrix_image_filename(text: str) -> bool:
+    """Return True when Matrix image body text is probably just a transport filename.
+
+    Matrix ``m.image`` events commonly populate ``content.body`` with the uploaded
+    filename when the user did not add a caption. Treating that raw filename as
+    user-authored text confuses downstream vision enrichment.
+    """
+    candidate = str(text or "").strip()
+    if not candidate or "\n" in candidate or candidate.endswith("/"):
+        return False
+
+    name = Path(candidate).name
+    if not name or name != candidate:
+        return False
+
+    suffix = Path(name).suffix.lower()
+    if not suffix:
+        return False
+
+    guessed_type, _ = mimetypes.guess_type(name)
+    if guessed_type and guessed_type.startswith("image/"):
+        return True
+    return suffix in _MATRIX_IMAGE_FILENAME_EXTS
+
+
+def _create_matrix_session(proxy_url: str | None):
+    """Create an ``aiohttp.ClientSession`` whose proxy applies to *all* requests.
+
+    mautrix's ``HTTPAPI._send()`` calls ``session.request()`` without forwarding
+    per-request ``proxy=`` kwargs.  For HTTP(S) proxies we use aiohttp's native
+    ``proxy=`` session parameter which sets a default for every request.  For SOCKS
+    we use ``aiohttp_socks.ProxyConnector`` (connector-level).
+    When no proxy is configured we enable ``trust_env`` so standard env vars
+    (``HTTP_PROXY`` / ``HTTPS_PROXY``) are honoured automatically.
+    """
+    import aiohttp
+
+    if not proxy_url:
+        return aiohttp.ClientSession(trust_env=True)
+
+    if proxy_url.split("://")[0].lower().startswith("socks"):
+        try:
+            from aiohttp_socks import ProxyConnector
+
+            return aiohttp.ClientSession(
+                connector=ProxyConnector.from_url(proxy_url, rdns=True),
+            )
+        except ImportError:
+            logger.warning(
+                "aiohttp_socks not installed — SOCKS proxy %s ignored. "
+                "Run: pip install aiohttp-socks",
+                proxy_url,
+            )
+            return aiohttp.ClientSession(trust_env=True)
+
+    return aiohttp.ClientSession(proxy=proxy_url)
+

 def _check_e2ee_deps() -> bool:
    """Return True if mautrix E2EE dependencies (python-olm) are available."""
@@ -260,6 +352,9 @@ class MatrixAdapter(BasePlatformAdapter):
            "1",
            "yes",
        )
+        self._dm_auto_thread: bool = os.getenv(
+            "MATRIX_DM_AUTO_THREAD", "false"
+        ).lower() in ("true", "1", "yes")
        self._dm_mention_threads: bool = os.getenv(
            "MATRIX_DM_MENTION_THREADS", "false"
        ).lower() in ("true", "1", "yes")
@@ -270,6 +365,11 @@ class MatrixAdapter(BasePlatformAdapter):
        ).lower() not in ("false", "0", "no")
        self._pending_reactions: dict[tuple[str, str], str] = {}

+        # Proxy support — resolve once at init, reuse for all HTTP traffic.
+        self._proxy_url: str | None = resolve_proxy_url(platform_env_var="MATRIX_PROXY")
+        if self._proxy_url:
+            logger.info("Matrix: proxy configured — %s", self._proxy_url)
+
        # Text batching: merge rapid successive messages (Telegram-style).
        # Matrix clients split long messages around 4000 chars.
        self._text_batch_delay_seconds = float(
@@ -281,6 +381,18 @@ class MatrixAdapter(BasePlatformAdapter):
        self._pending_text_batches: Dict[str, MessageEvent] = {}
        self._pending_text_batch_tasks: Dict[str, asyncio.Task] = {}

+        # Matrix reaction-based dangerous command approvals.
+        self._approval_reaction_map = {
+            "✅": "once",
+            "❎": "deny",
+        }
+        self._approval_prompts_by_event: Dict[str, _MatrixApprovalPrompt] = {}
+        self._approval_prompt_by_session: Dict[str, str] = {}
+        allowed_users_raw = os.getenv("MATRIX_ALLOWED_USERS", "")
+        self._allowed_user_ids: Set[str] = {
+            u.strip() for u in allowed_users_raw.split(",") if u.strip()
+        }
+
    def _is_duplicate_event(self, event_id) -> bool:
        """Return True if this event was already processed. Tracks the ID otherwise."""
        if not event_id:
@@ -326,7 +438,7 @@ class MatrixAdapter(BasePlatformAdapter):
                    )
                    return False
        except Exception as exc:
-            logger.error("Matrix: post-upload key verification failed: %s", exc)
+            logger.error("Matrix: post-upload key verification failed: %s", exc, exc_info=True)
            return False
        return True

@@ -342,6 +454,7 @@ class MatrixAdapter(BasePlatformAdapter):
            logger.error(
                "Matrix: cannot verify device keys on server: %s — refusing E2EE",
                exc,
+                exc_info=True,
            )
            return False

@@ -356,7 +469,7 @@ class MatrixAdapter(BasePlatformAdapter):
            try:
                await olm.share_keys()
            except Exception as exc:
-                logger.error("Matrix: failed to re-upload device keys: %s", exc)
+                logger.error("Matrix: failed to re-upload device keys: %s", exc, exc_info=True)
                return False
            return await self._reverify_keys_after_upload(client, local_ed25519)

@@ -396,6 +509,7 @@ class MatrixAdapter(BasePlatformAdapter):
                    "Try generating a new access token to get a fresh device.",
                    client.device_id,
                    exc,
+                    exc_info=True,
                )
                return False
            return await self._reverify_keys_after_upload(client, local_ed25519)
@@ -420,9 +534,11 @@ class MatrixAdapter(BasePlatformAdapter):
        _STORE_DIR.mkdir(parents=True, exist_ok=True)

        # Create the HTTP API layer.
+        client_session = _create_matrix_session(self._proxy_url)
        api = HTTPAPI(
            base_url=self._homeserver,
            token=self._access_token or "",
+            client_session=client_session,
        )

        # Create the client.
@@ -465,6 +581,7 @@ class MatrixAdapter(BasePlatformAdapter):
                logger.error(
                    "Matrix: whoami failed — check MATRIX_ACCESS_TOKEN and MATRIX_HOMESERVER: %s",
                    exc,
+                    exc_info=True,
                )
                await api.session.close()
                return False
@@ -607,6 +724,44 @@ class MatrixAdapter(BasePlatformAdapter):
                        logger.warning(
                            "Matrix: recovery key verification failed: %s", exc
                        )
+                else:
+                    # No recovery key — bootstrap cross-signing if the bot
+                    # has none yet. Without this, Element shows "Encrypted
+                    # by a device not verified by its owner" on every
+                    # message from this bot, indefinitely. mautrix's
+                    # generate_recovery_key does the full flow: generates
+                    # MSK/SSK/USK, uploads private keys to SSSS, publishes
+                    # public keys to the homeserver, and signs the current
+                    # device with the new SSK. Some homeservers require UIA
+                    # for /keys/device_signing/upload — those will need an
+                    # alternate path; Continuwuity and Synapse-with-shared-
+                    # secret accept the unauthenticated upload.
+                    try:
+                        own_xsign = await olm.get_own_cross_signing_public_keys()
+                    except Exception as exc:
+                        own_xsign = None
+                        logger.warning(
+                            "Matrix: cross-signing key lookup failed: %s", exc
+                        )
+                    if own_xsign is None:
+                        try:
+                            new_recovery_key = await olm.generate_recovery_key()
+                            logger.warning(
+                                "Matrix: bootstrapped cross-signing for %s. "
+                                "SAVE THIS RECOVERY KEY — set "
+                                "MATRIX_RECOVERY_KEY for future restarts so "
+                                "the bot can re-sign its device after key "
+                                "rotation: %s",
+                                client.mxid,
+                                new_recovery_key,
+                            )
+                        except Exception as exc:
+                            logger.warning(
+                                "Matrix: cross-signing bootstrap failed "
+                                "(non-fatal — Element will show 'not "
+                                "verified by its owner'): %s",
+                                exc,
+                            )

                client.crypto = olm
                logger.info(
@@ -664,6 +819,7 @@ class MatrixAdapter(BasePlatformAdapter):
                        await asyncio.gather(*tasks)
                except Exception as exc:
                    logger.warning("Matrix: initial sync event dispatch error: %s", exc)
+                await self._join_pending_invites(sync_data)
            else:
                logger.warning(
                    "Matrix: initial sync returned unexpected type %s",
@@ -727,17 +883,8 @@ class MatrixAdapter(BasePlatformAdapter):
        chunks = self.truncate_message(formatted, MAX_MESSAGE_LENGTH)

        last_event_id = None
-        for chunk in chunks:
-            msg_content: Dict[str, Any] = {
-                "msgtype": "m.text",
-                "body": chunk,
-            }
-
-            # Convert markdown to HTML for rich rendering.
-            html = self._markdown_to_html(chunk)
-            if html and html != chunk:
-                msg_content["format"] = "org.matrix.custom.html"
-                msg_content["formatted_body"] = html
+        for i, chunk in enumerate(chunks):
+            msg_content = self._build_text_message_content(chunk)

            # Reply-to support.
            if reply_to:
@@ -844,25 +991,21 @@ class MatrixAdapter(BasePlatformAdapter):
        """Edit an existing message (via m.replace)."""

        formatted = self.format_message(content)
+        new_content = self._build_text_message_content(formatted)
        msg_content: Dict[str, Any] = {
            "msgtype": "m.text",
            "body": f"* {formatted}",
-            "m.new_content": {
-                "msgtype": "m.text",
-                "body": formatted,
-            },
-            "m.relates_to": {
-                "rel_type": "m.replace",
-                "event_id": message_id,
-            },
+            "m.new_content": new_content,
        }
-
-        html = self._markdown_to_html(formatted)
-        if html and html != formatted:
-            msg_content["m.new_content"]["format"] = "org.matrix.custom.html"
-            msg_content["m.new_content"]["formatted_body"] = html
+        if "m.mentions" in new_content:
+            msg_content["m.mentions"] = new_content["m.mentions"]
+        if "formatted_body" in new_content:
            msg_content["format"] = "org.matrix.custom.html"
-            msg_content["formatted_body"] = f"* {html}"
+            msg_content["formatted_body"] = f'* {new_content["formatted_body"]}'
+        msg_content["m.relates_to"] = {
+            "rel_type": "m.replace",
+            "event_id": message_id,
+        }

        try:
            event_id = await self._client.send_message_event(
@@ -895,10 +1038,12 @@ class MatrixAdapter(BasePlatformAdapter):
            # Try aiohttp first (always available), fall back to httpx
            try:
                import aiohttp as _aiohttp
-
-                async with _aiohttp.ClientSession(trust_env=True) as http:
+                _sess_kw, _req_kw = proxy_kwargs_for_aiohttp(self._proxy_url)
+                async with _aiohttp.ClientSession(**_sess_kw) as http:
                    async with http.get(
-                        image_url, timeout=_aiohttp.ClientTimeout(total=30)
+                        image_url,
+                        timeout=_aiohttp.ClientTimeout(total=30),
+                        **_req_kw,
                    ) as resp:
                        resp.raise_for_status()
                        data = await resp.read()
@@ -908,8 +1053,10 @@ class MatrixAdapter(BasePlatformAdapter):
                        )
            except ImportError:
                import httpx
-
-                async with httpx.AsyncClient() as http:
+                _httpx_kw: dict = {}
+                if self._proxy_url:
+                    _httpx_kw["proxy"] = self._proxy_url
+                async with httpx.AsyncClient(**_httpx_kw) as http:
                    resp = await http.get(image_url, follow_redirects=True, timeout=30)
                    resp.raise_for_status()
                    data = resp.content
@@ -984,6 +1131,56 @@ class MatrixAdapter(BasePlatformAdapter):
            chat_id, video_path, "m.video", caption, reply_to, metadata=metadata
        )

+    async def send_exec_approval(
+        self,
+        chat_id: str,
+        command: str,
+        session_key: str,
+        description: str = "dangerous command",
+        metadata: Optional[dict] = None,
+    ) -> SendResult:
+        """Send a reaction-based exec approval prompt for Matrix."""
+        if not self._client:
+            return SendResult(success=False, error="Not connected")
+
+        cmd_preview = command[:2000] + "..." if len(command) > 2000 else command
+        text = (
+            "⚠️ **Dangerous command requires approval**\n"
+            f"```\n{cmd_preview}\n```\n"
+            f"Reason: {description}\n\n"
+            "Reply `/approve` to execute, `/approve session` to approve this pattern for the session, "
+            "`/approve always` to approve permanently, or `/deny` to cancel.\n\n"
+            "You can also click the reaction to approve:\n"
+            "✅ = /approve\n"
+            "❎ = /deny"
+        )
+
+        result = await self.send(chat_id, text, metadata=metadata)
+        if not result.success or not result.message_id:
+            return result
+
+        prompt = _MatrixApprovalPrompt(
+            session_key=session_key,
+            chat_id=chat_id,
+            message_id=result.message_id,
+        )
+        old_event = self._approval_prompt_by_session.get(session_key)
+        if old_event:
+            self._approval_prompts_by_event.pop(old_event, None)
+        self._approval_prompts_by_event[result.message_id] = prompt
+        self._approval_prompt_by_session[session_key] = result.message_id
+
+        for emoji in ("✅", "❎"):
+            try:
+                reaction_result = await self._send_reaction(chat_id, result.message_id, emoji)
+                # Save the bot's reaction event_id for later cleanup
+                if reaction_result:
+                    prompt.bot_reaction_events[emoji] = str(reaction_result)
+            except Exception as exc:
+                logger.debug("Matrix: failed to add approval reaction %s: %s", emoji, exc)
+
+        return result
+
    def format_message(self, content: str) -> str:
        """Pass-through — Matrix supports standard Markdown natively."""
        # Strip image markdown; media is uploaded separately.
@@ -1115,9 +1312,15 @@ class MatrixAdapter(BasePlatformAdapter):
        next_batch = await client.sync_store.get_next_batch()
        while not self._closing:
            try:
-                sync_data = await client.sync(
-                    since=next_batch,
-                    timeout=30000,
+                # Wrap in asyncio.wait_for to guard against TCP-level hangs
+                # that the Matrix long-poll timeout cannot catch. Long-poll
+                # is 30s, so 45s gives 15s slack for network drain.
+                sync_data = await asyncio.wait_for(
+                    client.sync(
+                        since=next_batch,
+                        timeout=30000,
+                    ),
+                    timeout=45.0,
                )

                # nio returns SyncError objects (not exceptions) for auth
@@ -1153,6 +1356,7 @@ class MatrixAdapter(BasePlatformAdapter):
                            await asyncio.gather(*tasks)
                    except Exception as exc:
                        logger.warning("Matrix: sync event dispatch error: %s", exc)
+                    await self._join_pending_invites(sync_data)

            except asyncio.CancelledError:
                return
@@ -1239,6 +1443,15 @@ class MatrixAdapter(BasePlatformAdapter):
        room_id = str(getattr(event, "room_id", ""))
        sender = str(getattr(event, "sender", ""))

+        # Diagnostic: confirm the callback is firing at all when DEBUG is on.
+        # Helps users troubleshoot silent inbound issues like #5819, #7914, #12614.
+        logger.debug(
+            "Matrix: callback fired — event %s from %s in %s",
+            getattr(event, "event_id", "?"),
+            sender,
+            room_id,
+        )
+
        # Ignore own messages (case-insensitive; also drops when our own
        # user_id hasn't been resolved yet — see _is_self_sender docstring
        # and issue #15763).
@@ -1350,6 +1563,12 @@ class MatrixAdapter(BasePlatformAdapter):
            in_bot_thread = bool(thread_id and thread_id in self._threads)
            if self._require_mention and not is_free_room and not in_bot_thread:
                if not is_mentioned:
+                    logger.debug(
+                        "Matrix: ignoring message %s in %s — no @mention "
+                        "(set MATRIX_REQUIRE_MENTION=false to disable)",
+                        event_id,
+                        room_id,
+                    )
                    return None

        # DM mention-thread.
@@ -1362,7 +1581,7 @@ class MatrixAdapter(BasePlatformAdapter):
            body = self._strip_mention(body)

        # Auto-thread.
-        if not is_dm and not thread_id and self._auto_thread:
+        if not thread_id and ((not is_dm and self._auto_thread) or (is_dm and self._dm_auto_thread)):
            thread_id = event_id
            self._threads.mark(thread_id)

@@ -1604,6 +1823,9 @@ class MatrixAdapter(BasePlatformAdapter):
            return
        body, is_dm, chat_type, thread_id, display_name, source = ctx

+        if msgtype == "m.image" and _looks_like_matrix_image_filename(body):
+            body = ""
+
        allow_http_fallback = bool(http_url) and not is_encrypted_media
        media_urls = (
            [cached_path]
@@ -1633,13 +1855,35 @@ class MatrixAdapter(BasePlatformAdapter):
            "Matrix: invited to %s — joining",
            room_id,
        )
+        await self._join_room_by_id(room_id)
+
+    async def _join_room_by_id(self, room_id: str) -> bool:
+        """Join a room by ID and refresh local caches on success."""
+        if not room_id:
+            return False
+        if room_id in self._joined_rooms:
+            return True
        try:
            await self._client.join_room(RoomID(room_id))
            self._joined_rooms.add(room_id)
            logger.info("Matrix: joined %s", room_id)
            await self._refresh_dm_cache()
+            return True
        except Exception as exc:
            logger.warning("Matrix: error joining %s: %s", room_id, exc)
+            return False
+
+    async def _join_pending_invites(self, sync_data: Dict[str, Any]) -> None:
+        """Join rooms still present in rooms.invite after sync processing."""
+        rooms = sync_data.get("rooms", {}) if isinstance(sync_data, dict) else {}
+        invites = rooms.get("invite", {})
+        if not isinstance(invites, dict):
+            return
+        for room_id in invites:
+            if room_id in self._joined_rooms:
+                continue
+            logger.info("Matrix: reconciling pending invite for %s", room_id)
+            await self._join_room_by_id(str(room_id))

    # ------------------------------------------------------------------
    # Reactions (send, receive, processing lifecycle)
@@ -1754,6 +1998,51 @@ class MatrixAdapter(BasePlatformAdapter):
                room_id,
            )

+            # Check if this reaction resolves a pending approval prompt.
+            prompt = self._approval_prompts_by_event.get(reacts_to)
+            if prompt and not prompt.resolved:
+                if room_id != prompt.chat_id:
+                    return
+                if self._allowed_user_ids and sender not in self._allowed_user_ids:
+                    logger.info(
+                        "Matrix: ignoring approval reaction from unauthorized user %s on %s",
+                        sender, reacts_to,
+                    )
+                    return
+                choice = self._approval_reaction_map.get(key)
+                if not choice:
+                    return
+                try:
+                    from tools.approval import resolve_gateway_approval
+
+                    count = resolve_gateway_approval(prompt.session_key, choice)
+                    if count:
+                        prompt.resolved = True
+                        self._approval_prompts_by_event.pop(reacts_to, None)
+                        self._approval_prompt_by_session.pop(prompt.session_key, None)
+                        logger.info(
+                            "Matrix reaction resolved %d approval(s) for session %s "
+                            "(choice=%s, user=%s)",
+                            count, prompt.session_key, choice, sender,
+                        )
+                        # Redact bot's seed reactions, leaving only the user's
+                        await self._redact_bot_approval_reactions(room_id, prompt)
+                except Exception as exc:
+                    logger.error("Failed to resolve gateway approval from Matrix reaction: %s", exc)
+
+    async def _redact_bot_approval_reactions(
+        self,
+        room_id: str,
+        prompt: "_MatrixApprovalPrompt",
+    ) -> None:
+        """Redact the bot's seed ✅/❎ reactions, leaving only the user's reaction."""
+        for emoji, evt_id in prompt.bot_reaction_events.items():
+            try:
+                await self.redact_message(room_id, evt_id, "approval resolved")
+                logger.debug("Matrix: redacted bot reaction %s (%s)", emoji, evt_id)
+            except Exception as exc:
+                logger.debug("Matrix: failed to redact bot reaction %s: %s", emoji, exc)
+
    # ------------------------------------------------------------------
    # Text message aggregation (handles Matrix client-side splits)
    # ------------------------------------------------------------------
@@ -1979,11 +2268,7 @@ class MatrixAdapter(BasePlatformAdapter):
        if not self._client or not text:
            return SendResult(success=False, error="No client or empty text")

-        msg_content: Dict[str, Any] = {"msgtype": msgtype, "body": text}
-        html = self._markdown_to_html(text)
-        if html and html != text:
-            msg_content["format"] = "org.matrix.custom.html"
-            msg_content["formatted_body"] = html
+        msg_content = self._build_text_message_content(text, msgtype=msgtype)

        try:
            event_id = await self._client.send_message_event(
@@ -2046,6 +2331,77 @@ class MatrixAdapter(BasePlatformAdapter):
    # Mention detection helpers
    # ------------------------------------------------------------------

+    def _build_text_message_content(self, text: str, msgtype: str = "m.text") -> Dict[str, Any]:
+        """Build Matrix text content with HTML and outbound mention metadata."""
+        msg_content: Dict[str, Any] = {"msgtype": msgtype, "body": text}
+        mention_user_ids = self._extract_outbound_mentions(text)
+        if mention_user_ids:
+            msg_content["m.mentions"] = {"user_ids": mention_user_ids}
+
+        html_source = self._inject_outbound_mention_links(text)
+        html = self._markdown_to_html(html_source)
+        if html and html != text:
+            msg_content["format"] = "org.matrix.custom.html"
+            msg_content["formatted_body"] = html
+
+        return msg_content
+
+    def _extract_outbound_mentions(self, text: str) -> list[str]:
+        """Return unique Matrix user IDs mentioned in outbound text."""
+        protected, _ = self._protect_outbound_mention_regions(text)
+        seen: Set[str] = set()
+        mentions: list[str] = []
+        for match in _OUTBOUND_MENTION_RE.finditer(protected):
+            user_id = match.group(1)
+            if user_id not in seen:
+                seen.add(user_id)
+                mentions.append(user_id)
+        return mentions
+
+    def _inject_outbound_mention_links(self, text: str) -> str:
+        """Wrap outbound Matrix mentions in markdown links outside code spans."""
+        if not text:
+            return text
+
+        protected, placeholders = self._protect_outbound_mention_regions(text)
+
+        linked = _OUTBOUND_MENTION_RE.sub(
+            lambda match: f"[{match.group(1)}](https://matrix.to/#/{match.group(1)})",
+            protected,
+        )
+
+        for idx, original in enumerate(placeholders):
+            linked = linked.replace(f"\x00MENTION_PROTECTED{idx}\x00", original)
+
+        return linked
+
+    def _protect_outbound_mention_regions(self, text: str) -> tuple[str, list[str]]:
+        """Protect markdown regions where outbound mentions should stay literal."""
+        placeholders: list[str] = []
+
+        def _protect(fragment: str) -> str:
+            idx = len(placeholders)
+            placeholders.append(fragment)
+            return f"\x00MENTION_PROTECTED{idx}\x00"
+
+        protected = re.sub(
+            r"```[\s\S]*?```",
+            lambda match: _protect(match.group(0)),
+            text or "",
+        )
+        protected = re.sub(
+            r"`[^`\n]+`",
+            lambda match: _protect(match.group(0)),
+            protected,
+        )
+        protected = re.sub(
+            r"\[[^\]]+\]\([^)]+\)",
+            lambda match: _protect(match.group(0)),
+            protected,
+        )
+
+        return protected, placeholders
+
    def _is_bot_mentioned(
        self,
        body: str,
@@ -2080,13 +2436,33 @@ class MatrixAdapter(BasePlatformAdapter):
        return False

    def _strip_mention(self, body: str) -> str:
-        """Strip the bot's full MXID (``@user:server``) from *body*.
+        """Remove explicit bot mentions from message body.

-        The bare localpart is intentionally *not* stripped — it would
-        mangle file paths like ``/home/hermes/media/file.png``.
+        Important: only strip explicit mention tokens (``@user:server`` or
+        ``@localpart``). Do NOT strip bare words matching the bot localpart,
+        otherwise normal phrases like "Hermes Agent" become "Agent".
        """
+        if not body:
+            return ""
+
+        # Strip explicit full MXID mentions.
        if self._user_id:
            body = body.replace(self._user_id, "")
+
+        # Strip explicit @localpart mentions only (not bare localpart words).
+        if self._user_id and ":" in self._user_id:
+            localpart = self._user_id.split(":")[0].lstrip("@")
+            if localpart:
+                body = re.sub(
+                    r'(?<![\w])@' + re.escape(localpart) + r'\b',
+                    '',
+                    body,
+                    flags=re.IGNORECASE,
+                )
+
+        # Normalize spacing after mention removal.
+        body = re.sub(r'[ \t]{2,}', ' ', body)
+        body = re.sub(r'\s+([,.;:!?])', r'\1', body)
        return body.strip()

    async def _get_display_name(self, room_id: str, user_id: str) -> str:
@@ -19,7 +19,7 @@ import logging
 import os
 import re
 from pathlib import Path
-from typing import Any, Dict, List, Optional
+from typing import Any, Dict, List, Optional, Tuple

 from gateway.config import Platform, PlatformConfig
 from gateway.platforms.helpers import MessageDeduplicator
@@ -412,7 +412,6 @@ class MattermostAdapter(BasePlatformAdapter):

        import aiohttp

-        last_exc = None
        file_data = None
        ct = "application/octet-stream"
        fname = url.rsplit("/", 1)[-1].split("?")[0] or f"{kind}.png"
@@ -497,6 +496,100 @@ class MattermostAdapter(BasePlatformAdapter):
            return SendResult(success=False, error="Failed to post with file")
        return SendResult(success=True, message_id=data["id"])

+    async def send_multiple_images(
+        self,
+        chat_id: str,
+        images: List[Tuple[str, str]],
+        metadata: Optional[Dict[str, Any]] = None,
+        human_delay: float = 0.0,
+    ) -> None:
+        """Send a batch of images as a single Mattermost post with multiple attachments.
+
+        Mattermost supports up to 5 ``file_ids`` per post. Each image is
+        uploaded individually (Mattermost's file API is one-at-a-time),
+        then a single post is created referencing all uploaded file_ids
+        at once. Batches larger than 5 are chunked. Falls back to the
+        base per-image loop on total failure.
+        """
+        if not images:
+            return
+
+        import mimetypes
+        import aiohttp
+        from urllib.parse import unquote as _unquote
+
+        CHUNK = 5  # Mattermost post file_ids cap
+        chunks = [images[i:i + CHUNK] for i in range(0, len(images), CHUNK)]
+
+        for chunk_idx, chunk in enumerate(chunks):
+            if human_delay > 0 and chunk_idx > 0:
+                await asyncio.sleep(human_delay)
+
+            file_ids: List[str] = []
+            caption_parts: List[str] = []
+            try:
+                for image_url, alt_text in chunk:
+                    if alt_text:
+                        caption_parts.append(alt_text)
+
+                    if image_url.startswith("file://"):
+                        local_path = _unquote(image_url[7:])
+                        p = Path(local_path)
+                        if not p.exists():
+                            logger.warning("Mattermost: skipping missing image %s", local_path)
+                            continue
+                        fname = p.name
+                        ct = mimetypes.guess_type(fname)[0] or "image/png"
+                        file_data = p.read_bytes()
+                    else:
+                        from tools.url_safety import is_safe_url
+                        if not is_safe_url(image_url):
+                            logger.warning("Mattermost: blocked unsafe image URL in batch")
+                            continue
+                        try:
+                            async with self._session.get(
+                                image_url, timeout=aiohttp.ClientTimeout(total=30)
+                            ) as resp:
+                                if resp.status >= 400:
+                                    logger.warning(
+                                        "Mattermost: failed to download image (HTTP %d): %s",
+                                        resp.status, image_url[:80],
+                                    )
+                                    continue
+                                file_data = await resp.read()
+                                ct = resp.content_type or "image/png"
+                        except Exception as dl_err:
+                            logger.warning("Mattermost: download failed for %s: %s", image_url[:80], dl_err)
+                            continue
+                        fname = image_url.rsplit("/", 1)[-1].split("?")[0] or f"image_{len(file_ids)}.png"
+
+                    fid = await self._upload_file(chat_id, file_data, fname, ct)
+                    if fid:
+                        file_ids.append(fid)
+
+                if not file_ids:
+                    continue
+
+                payload: Dict[str, Any] = {
+                    "channel_id": chat_id,
+                    "message": "\n".join(caption_parts),
+                    "file_ids": file_ids,
+                }
+                logger.info(
+                    "Mattermost: sending %d image(s) as single post (chunk %d/%d)",
+                    len(file_ids), chunk_idx + 1, len(chunks),
+                )
+                data = await self._api_post("posts", payload)
+                if not data or "id" not in data:
+                    logger.warning("Mattermost: multi-image post failed, falling back")
+                    await super().send_multiple_images(chat_id, chunk, metadata, human_delay=human_delay)
+            except Exception as e:
+                logger.warning(
+                    "Mattermost: multi-image send failed (chunk %d/%d), falling back: %s",
+                    chunk_idx + 1, len(chunks), e, exc_info=True,
+                )
+                await super().send_multiple_images(chat_id, chunk, metadata, human_delay=human_delay)
+
    # ------------------------------------------------------------------
    # WebSocket
    # ------------------------------------------------------------------
@@ -976,6 +976,18 @@ class QQAdapter(BasePlatformAdapter):
        if not channel_id:
            return

+        # Apply group_policy ACL — guild channels are group-like contexts.
+        # Without this check any member of any guild the bot is in could
+        # bypass the configured allowlist.
+        guild_id = str(d.get("guild_id", ""))
+        author_id = str(author.get("id", ""))
+        if not self._is_group_allowed(guild_id or channel_id, author_id):
+            logger.debug(
+                "[%s] Guild message blocked by ACL: channel=%s user=%s",
+                self._log_tag, channel_id, author_id,
+            )
+            return
+
        member = d.get("member") if isinstance(d.get("member"), dict) else {}
        nick = str(member.get("nick", "")) or str(author.get("username", ""))

@@ -1032,6 +1044,17 @@ class QQAdapter(BasePlatformAdapter):
        if not guild_id:
            return

+        # Apply dm_policy ACL — guild DMs were previously unauthenticated.
+        # Without this check any member of any guild the bot is in could
+        # bypass the configured allowlist via direct messages.
+        author_id = str(author.get("id", ""))
+        if not self._is_dm_allowed(author_id):
+            logger.debug(
+                "[%s] Guild DM blocked by ACL: guild=%s user=%s",
+                self._log_tag, guild_id, author_id,
+            )
+            return
+
        text = content
        att_result = await self._process_attachments(d.get("attachments"))
        image_urls = att_result["image_urls"]
@@ -1957,7 +1980,7 @@ class QQAdapter(BasePlatformAdapter):
            self, openid: str, content: str, reply_to: Optional[str] = None
    ) -> SendResult:
        """Send text to a C2C user via REST API."""
-        msg_seq = self._next_msg_seq(reply_to or openid)
+        self._next_msg_seq(reply_to or openid)
        body = self._build_text_body(content, reply_to)
        if reply_to:
            body["msg_id"] = reply_to
@@ -1970,7 +1993,7 @@ class QQAdapter(BasePlatformAdapter):
            self, group_openid: str, content: str, reply_to: Optional[str] = None
    ) -> SendResult:
        """Send text to a group via REST API."""
-        msg_seq = self._next_msg_seq(reply_to or group_openid)
+        self._next_msg_seq(reply_to or group_openid)
        body = self._build_text_body(content, reply_to)
        if reply_to:
            body["msg_id"] = reply_to
@@ -2135,11 +2158,6 @@ class QQAdapter(BasePlatformAdapter):

            # Route
            chat_type = self._guess_chat_type(chat_id)
-            target_path = (
-                f"/v2/users/{chat_id}/files"
-                if chat_type == "c2c"
-                else f"/v2/groups/{chat_id}/files"
-            )

            if chat_type == "guild":
                # Guild channels don't support native media upload in the same way
@@ -21,7 +21,7 @@ import time
 import uuid
 from datetime import datetime, timezone
 from pathlib import Path
-from typing import Dict, List, Optional, Any
+from typing import Any, Dict, List, Optional, Tuple
 from urllib.parse import quote, unquote

 import httpx
@@ -31,6 +31,7 @@ from gateway.platforms.base import (
    BasePlatformAdapter,
    MessageEvent,
    MessageType,
+    ProcessingOutcome,
    SendResult,
    cache_image_from_bytes,
    cache_audio_from_bytes,
@@ -38,6 +39,17 @@ from gateway.platforms.base import (
    cache_image_from_url,
 )
 from gateway.platforms.helpers import redact_phone
+from gateway.platforms.signal_rate_limit import (
+    SIGNAL_BATCH_PACING_NOTICE_THRESHOLD,
+    SIGNAL_MAX_ATTACHMENTS_PER_MSG,
+    SIGNAL_RATE_LIMIT_MAX_ATTEMPTS,
+    SignalRateLimitError,
+    _extract_retry_after_seconds,
+    _format_wait,
+    _is_signal_rate_limit_error,
+    _signal_send_timeout,
+    get_scheduler,
+)

 logger = logging.getLogger(__name__)

@@ -52,6 +64,7 @@ SSE_RETRY_DELAY_MAX = 60.0
 HEALTH_CHECK_INTERVAL = 30.0  # seconds between health checks
 HEALTH_CHECK_STALE_THRESHOLD = 120.0  # seconds without SSE activity before concern

+
 # ---------------------------------------------------------------------------
 # Helpers
 # ---------------------------------------------------------------------------
@@ -162,6 +175,10 @@ class SignalAdapter(BasePlatformAdapter):
    """Signal messenger adapter using signal-cli HTTP daemon."""

    platform = Platform.SIGNAL
+    # Signal has no real edit API for already-sent messages. Mark it explicitly
+    # so streaming suppresses the visible cursor instead of leaving a stale tofu
+    # square behind in chat clients when edit attempts fail.
+    SUPPORTS_MESSAGE_EDITING = False

    def __init__(self, config: PlatformConfig):
        super().__init__(config, Platform.SIGNAL)
@@ -488,6 +505,11 @@ class SignalAdapter(BasePlatformAdapter):
        if text and mentions:
            text = _render_mentions(text, mentions)

+        # Extract quote (reply-to) context from Signal dataMessage
+        quote_data = data_message.get("quote") or {}
+        reply_to_id = str(quote_data.get("id")) if quote_data.get("id") else None
+        reply_to_text = quote_data.get("text")
+
        # Process attachments
        attachments_data = data_message.get("attachments", [])
        media_urls = []
@@ -541,7 +563,9 @@ class SignalAdapter(BasePlatformAdapter):
        else:
            timestamp = datetime.now(tz=timezone.utc)

-        # Build and dispatch event
+        # Build and dispatch event.
+        # Store raw envelope data in raw_message so on_processing_start/complete
+        # can extract targetAuthor + targetTimestamp for sendReaction.
        event = MessageEvent(
            source=source,
            text=text or "",
@@ -549,6 +573,9 @@ class SignalAdapter(BasePlatformAdapter):
            media_urls=media_urls,
            media_types=media_types,
            timestamp=timestamp,
+            raw_message={"sender": sender, "timestamp_ms": ts_ms},
+            reply_to_message_id=reply_to_id,
+            reply_to_text=reply_to_text,
        )

        logger.debug("Signal: message from %s in %s: %s",
@@ -659,6 +686,8 @@ class SignalAdapter(BasePlatformAdapter):
        rpc_id: str = None,
        *,
        log_failures: bool = True,
+        raise_on_rate_limit: bool = False,
+        timeout: float = 30.0,
    ) -> Any:
        """Send a JSON-RPC 2.0 request to signal-cli daemon.

@@ -667,6 +696,11 @@ class SignalAdapter(BasePlatformAdapter):
        repeated NETWORK_FAILURE spam for unreachable recipients while
        still preserving visibility for the first occurrence and for
        unrelated RPCs.
+
+        When ``raise_on_rate_limit=True``, a Signal ``[429]`` /
+        ``RateLimitException`` response raises ``SignalRateLimitError``
+        instead of being swallowed — lets callers (multi-attachment send)
+        opt into backoff-retry without changing default behaviour.
        """
        if not self.client:
            logger.warning("Signal: RPC called but client not connected")
@@ -686,20 +720,28 @@ class SignalAdapter(BasePlatformAdapter):
            resp = await self.client.post(
                f"{self.http_url}/api/v1/rpc",
                json=payload,
-                timeout=30.0,
+                timeout=timeout,
            )
            resp.raise_for_status()
            data = resp.json()

            if "error" in data:
+                err = data["error"]
+                if raise_on_rate_limit:
+                    if _is_signal_rate_limit_error(err):
+                        err_msg = str(err.get("message", "")) if isinstance(err, dict) else str(err)
+                        retry_after = _extract_retry_after_seconds(err)
+                        raise SignalRateLimitError(err_msg, retry_after=retry_after)
                if log_failures:
-                    logger.warning("Signal RPC error (%s): %s", method, data["error"])
+                    logger.warning("Signal RPC error (%s): %s", method, err)
                else:
-                    logger.debug("Signal RPC error (%s): %s", method, data["error"])
+                    logger.debug("Signal RPC error (%s): %s", method, err)
                return None

            return data.get("result")

+        except SignalRateLimitError:
+            raise
        except Exception as e:
            if log_failures:
                logger.warning("Signal RPC %s failed: %s", method, e)
@@ -707,6 +749,159 @@ class SignalAdapter(BasePlatformAdapter):
                logger.debug("Signal RPC %s failed: %s", method, e)
            return None

+    # ------------------------------------------------------------------
+    # Formatting — markdown → Signal body ranges
+    # ------------------------------------------------------------------
+
+    @staticmethod
+    def _markdown_to_signal(text: str) -> tuple:
+        """Convert markdown to plain text + Signal textStyles list.
+
+        Signal doesn't render markdown.  Instead it uses ``bodyRanges``
+        (exposed by signal-cli as ``textStyle`` / ``textStyles`` params)
+        with the format ``start:length:STYLE``.
+
+        Positions are measured in **UTF-16 code units** (not Python code
+        points) because that's what the Signal protocol uses.
+
+        Supported styles: BOLD, ITALIC, STRIKETHROUGH, MONOSPACE.
+        (Signal's SPOILER style is not currently mapped — no standard
+        markdown syntax for it; would need ``||spoiler||`` parsing.)
+
+        Returns ``(plain_text, styles_list)`` where *styles_list* may be
+        empty if there's nothing to format.
+        """
+        import re
+
+        def _utf16_len(s: str) -> int:
+            """Length of *s* in UTF-16 code units."""
+            return len(s.encode("utf-16-le")) // 2
+
+        # Pre-process: normalize whitespace before any position tracking
+        # so later operations don't invalidate recorded offsets.
+        text = re.sub(r"\n{3,}", "\n\n", text)
+        text = text.strip()
+
+        styles: list = []
+
+        # --- Phase 1: fenced code blocks  ```...``` → MONOSPACE ---
+        _CB = re.compile(r"```[a-zA-Z0-9_+-]*\n?(.*?)```", re.DOTALL)
+        while m := _CB.search(text):
+            inner = m.group(1).rstrip("\n")
+            start = m.start()
+            text = text[: m.start()] + inner + text[m.end() :]
+            styles.append((start, len(inner), "MONOSPACE"))
+
+        # --- Phase 2: heading markers  # Foo → Foo (BOLD) ---
+        _HEADING = re.compile(r"^#{1,6}\s+", re.MULTILINE)
+        new_text = ""
+        last_end = 0
+        for m in _HEADING.finditer(text):
+            new_text += text[last_end : m.start()]
+            last_end = m.end()
+            eol = text.find("\n", m.end())
+            if eol == -1:
+                eol = len(text)
+            heading_text = text[m.end() : eol]
+            start = len(new_text)
+            new_text += heading_text
+            styles.append((start, len(heading_text), "BOLD"))
+            last_end = eol
+        new_text += text[last_end:]
+        text = new_text
+
+        # --- Phase 3: inline patterns (single-pass to avoid offset drift) ---
+        # The old code processed each pattern sequentially, stripping markers
+        # and recording positions per-pass.  Later passes shifted text without
+        # adjusting earlier positions → bold/italic landed mid-word.
+        #
+        # Fix: collect ALL non-overlapping matches first, then strip every
+        # marker in one pass so positions are computed against the final text.
+        _PATTERNS = [
+            (re.compile(r"\*\*(.+?)\*\*", re.DOTALL), "BOLD"),
+            (re.compile(r"__(.+?)__", re.DOTALL), "BOLD"),
+            (re.compile(r"~~(.+?)~~", re.DOTALL), "STRIKETHROUGH"),
+            (re.compile(r"`(.+?)`"), "MONOSPACE"),
+            (re.compile(r"(?<!\*)\*(?!\*| )(.+?)(?<!\*)\*(?!\*)"), "ITALIC"),
+            (re.compile(r"(?<!\w)_(?!_)(.+?)(?<!_)_(?!\w)"), "ITALIC"),
+        ]
+
+        # Collect all non-overlapping matches (earlier patterns win ties).
+        all_matches: list = []  # (start, end, g1_start, g1_end, style)
+        occupied: list = []     # (start, end) intervals already claimed
+        for pat, style in _PATTERNS:
+            for m in pat.finditer(text):
+                ms, me = m.start(), m.end()
+                if not any(ms < oe and me > os for os, oe in occupied):
+                    all_matches.append((ms, me, m.start(1), m.end(1), style))
+                    occupied.append((ms, me))
+        all_matches.sort()
+
+        # Build removal list so we can adjust Phase 1/2 styles.
+        # Each match removes its prefix markers (start..g1_start) and
+        # suffix markers (g1_end..end).
+        removals: list = []  # (position, length) sorted
+        for ms, me, g1s, g1e, _ in all_matches:
+            if g1s > ms:
+                removals.append((ms, g1s - ms))
+            if me > g1e:
+                removals.append((g1e, me - g1e))
+        removals.sort()
+
+        # Adjust Phase 1/2 styles for characters about to be removed.
+        def _adj(pos: int) -> int:
+            shift = 0
+            for rp, rl in removals:
+                if rp < pos:
+                    shift += min(rl, pos - rp)
+                else:
+                    break
+            return pos - shift
+
+        adjusted_prior: list = []
+        for s, l, st in styles:
+            ns = _adj(s)
+            ne = _adj(s + l)
+            if ne > ns:
+                adjusted_prior.append((ns, ne - ns, st))
+
+        # Strip all inline markers in one pass → positions are correct.
+        result = ""
+        last_end = 0
+        inline_styles: list = []
+        for ms, me, g1s, g1e, sty in all_matches:
+            result += text[last_end:ms]
+            pos = len(result)
+            inner = text[g1s:g1e]
+            result += inner
+            inline_styles.append((pos, len(inner), sty))
+            last_end = me
+        result += text[last_end:]
+        text = result
+
+        styles = adjusted_prior + inline_styles
+
+        # Convert code-point offsets → UTF-16 code-unit offsets
+        style_strings = []
+        for cp_start, cp_len, stype in sorted(styles):
+            # Safety: skip any out-of-bounds styles
+            if cp_start < 0 or cp_start + cp_len > len(text):
+                continue
+            u16_start = _utf16_len(text[:cp_start])
+            u16_len = _utf16_len(text[cp_start : cp_start + cp_len])
+            style_strings.append(f"{u16_start}:{u16_len}:{stype}")
+
+        return text, style_strings
+
+    def format_message(self, content: str) -> str:
+        """Strip markdown for plain-text fallback (used by base class).
+
+        The actual rich formatting happens in send() via _markdown_to_signal().
+        """
+        # This is only called if someone uses the base-class send path.
+        # Our send() override bypasses this entirely.
+        return content
+
    # ------------------------------------------------------------------
    # Sending
    # ------------------------------------------------------------------
@@ -718,14 +913,22 @@ class SignalAdapter(BasePlatformAdapter):
        reply_to: Optional[str] = None,
        metadata: Optional[Dict[str, Any]] = None,
    ) -> SendResult:
-        """Send a text message."""
+        """Send a text message with native Signal formatting."""
        await self._stop_typing_indicator(chat_id)

+        plain_text, text_styles = self._markdown_to_signal(content)
+
        params: Dict[str, Any] = {
            "account": self.account,
-            "message": content,
+            "message": plain_text,
        }

+        if text_styles:
+            if len(text_styles) == 1:
+                params["textStyle"] = text_styles[0]
+            else:
+                params["textStyles"] = text_styles
+
        if chat_id.startswith("group:"):
            params["groupId"] = chat_id[6:]
        else:
@@ -735,11 +938,10 @@ class SignalAdapter(BasePlatformAdapter):

        if result is not None:
            self._track_sent_timestamp(result)
-            # Use the timestamp from the RPC result as a pseudo message_id.
-            # Signal doesn't have real message IDs, but the stream consumer
-            # needs a truthy value to follow its edit→fallback path correctly.
-            _msg_id = str(result.get("timestamp", "")) if isinstance(result, dict) else None
-            return SendResult(success=True, message_id=_msg_id or None)
+            # Signal has no editable message identifier. Returning None keeps the
+            # stream consumer on the non-edit fallback path instead of pretending
+            # future edits can remove an in-progress cursor from the chat thread.
+            return SendResult(success=True, message_id=None)
        return SendResult(success=False, error="RPC send failed")

    def _track_sent_timestamp(self, rpc_result) -> None:
@@ -803,6 +1005,178 @@ class SignalAdapter(BasePlatformAdapter):
            self._typing_failures.pop(chat_id, None)
            self._typing_skip_until.pop(chat_id, None)

+    async def send_multiple_images(
+        self,
+        chat_id: str,
+        images: List[Tuple[str, str]],
+        metadata: Optional[Dict[str, Any]] = None,
+        human_delay: float = 0.0,
+    ) -> None:
+        """Send a batch of images via chunked Signal RPC calls.
+
+        Per-image alt texts are dropped — Signal's send RPC only carries
+        one shared message body. Bad images (download failure, missing
+        file, oversize) are skipped with a warning so one bad URL
+        doesn't lose the rest of the batch. ``human_delay`` is ignored:
+        the rate-limit scheduler handles inter-batch pacing.
+        """
+        if not images:
+            return
+
+        scheduler = get_scheduler()
+        logger.info(
+            "Signal send_multiple_images: received %d image(s) for %s — "
+            "scheduler state: %s",
+            len(images), chat_id[:30], scheduler.state(),
+        )
+
+        await self._stop_typing_indicator(chat_id)
+
+        attachments: List[str] = []
+        skipped_download = 0
+        skipped_missing = 0
+        skipped_oversize = 0
+        for image_url, _alt_text in images:
+            if image_url.startswith("file://"):
+                file_path = unquote(image_url[7:])
+            else:
+                try:
+                    file_path = await cache_image_from_url(image_url)
+                except Exception as e:
+                    logger.warning("Signal: failed to download image %s: %s", image_url, e)
+                    skipped_download += 1
+                    continue
+
+            if not file_path or not Path(file_path).exists():
+                logger.warning("Signal: image file not found for %s", image_url)
+                skipped_missing += 1
+                continue
+
+            file_size = Path(file_path).stat().st_size
+            if file_size > SIGNAL_MAX_ATTACHMENT_SIZE:
+                logger.warning(
+                    "Signal: image too large (%d bytes), skipping %s", file_size, image_url
+                )
+                skipped_oversize += 1
+                continue
+
+            attachments.append(file_path)
+
+        if not attachments:
+            logger.error(
+                "Signal: no valid images in batch of %d "
+                "(download=%d missing=%d oversize=%d)",
+                len(images), skipped_download, skipped_missing, skipped_oversize,
+            )
+            return
+
+        logger.info(
+            "Signal send_multiple_images: %d/%d images valid, sending in chunks",
+            len(attachments), len(images),
+        )
+
+        base_params: Dict[str, Any] = {
+            "account": self.account,
+            "message": "",
+        }
+        if chat_id.startswith("group:"):
+            base_params["groupId"] = chat_id[6:]
+        else:
+            base_params["recipient"] = [await self._resolve_recipient(chat_id)]
+
+        att_batches = [
+            attachments[i:i + SIGNAL_MAX_ATTACHMENTS_PER_MSG]
+            for i in range(0, len(attachments), SIGNAL_MAX_ATTACHMENTS_PER_MSG)
+        ]
+
+        for idx, att_batch in enumerate(att_batches):
+            n = len(att_batch)
+            estimated = scheduler.estimate_wait(n)
+            logger.debug(
+                "Signal batch %d/%d: %d attachments, estimated wait=%.1fs",
+                idx + 1, len(att_batches), n, estimated,
+            )
+            if estimated >= SIGNAL_BATCH_PACING_NOTICE_THRESHOLD:
+                await self._notify_batch_pacing(
+                    chat_id, idx + 1, len(att_batches), estimated
+                )
+
+            params = dict(base_params, attachments=att_batch)
+            send_timeout = _signal_send_timeout(n)
+
+            for attempt in range(1, SIGNAL_RATE_LIMIT_MAX_ATTEMPTS + 1):
+                await scheduler.acquire(n)
+                try:
+                    _rpc_t0 = time.monotonic()
+                    result = await self._rpc(
+                        "send", params, raise_on_rate_limit=True, timeout=send_timeout,
+                    )
+                    _rpc_duration = time.monotonic() - _rpc_t0
+                    if result is not None:
+                        self._track_sent_timestamp(result)
+                        await scheduler.report_rpc_duration(_rpc_duration, n)
+                        logger.info(
+                            "Signal batch %d/%d: %d attachments sent in %.1fs "
+                            "(attempt %d/%d)",
+                            idx + 1, len(att_batches), n, _rpc_duration,
+                            attempt, SIGNAL_RATE_LIMIT_MAX_ATTEMPTS,
+                        )
+                    else:
+                        # Assume the server didn't accept the batch, don't deduce tokens
+                        logger.error(
+                            "Signal: RPC send failed for batch %d/%d (%d attachments, "
+                            "attempt %d/%d, rpc_duration=%.1fs)",
+                            idx + 1, len(att_batches), n,
+                            attempt, SIGNAL_RATE_LIMIT_MAX_ATTEMPTS,
+                            _rpc_duration,
+                        )
+                        # Retry transient (non-rate-limit) failures once
+                        if attempt < SIGNAL_RATE_LIMIT_MAX_ATTEMPTS:
+                            backoff = 2.0 ** attempt
+                            logger.info(
+                                "Signal: retrying batch %d/%d after %.1fs backoff",
+                                idx + 1, len(att_batches), backoff,
+                            )
+                            await asyncio.sleep(backoff)
+                            continue
+                    break
+                except SignalRateLimitError as e:
+                    scheduler.feedback(e.retry_after, n)
+                    if attempt >= SIGNAL_RATE_LIMIT_MAX_ATTEMPTS:
+                        logger.error(
+                            "Signal: rate-limit retries exhausted on batch %d/%d "
+                            "(%d attachments lost, server retry_after=%s)",
+                            idx + 1, len(att_batches), n,
+                            f"{e.retry_after:.0f}s" if e.retry_after else "unknown",
+                        )
+                        break
+                    logger.warning(
+                        "Signal: rate-limited on batch %d/%d "
+                        "(attempt %d/%d, server retry_after=%s); "
+                        "scheduler will pace the retry",
+                        idx + 1, len(att_batches),
+                        attempt, SIGNAL_RATE_LIMIT_MAX_ATTEMPTS,
+                        f"{e.retry_after:.0f}s" if e.retry_after else "unknown",
+                    )
+
+    async def _notify_batch_pacing(
+        self,
+        chat_id: str,
+        next_batch_idx: int,
+        total_batches: int,
+        wait_s: float,
+    ) -> None:
+        """Inform the user when an inter-batch pacing wait crosses the
+        notice threshold. Best-effort; logs and continues on failure."""
+        try:
+            await self.send(
+                chat_id,
+                f"(More images coming — pausing ~{_format_wait(wait_s)} "
+                f"for Signal rate limit, batch {next_batch_idx}/{total_batches}.)",
+            )
+        except Exception as e:
+            logger.warning("Signal: failed to send pacing notice: %s", e)
+
    async def send_image(
        self,
        chat_id: str,
@@ -963,6 +1337,110 @@ class SignalAdapter(BasePlatformAdapter):
        _keep_typing finally block to clean up platform-level typing tasks."""
        await self._stop_typing_indicator(chat_id)

+    # ------------------------------------------------------------------
+    # Reactions
+    # ------------------------------------------------------------------
+
+    async def send_reaction(
+        self,
+        chat_id: str,
+        emoji: str,
+        target_author: str,
+        target_timestamp: int,
+    ) -> bool:
+        """Send a reaction emoji to a specific message via signal-cli RPC.
+
+        Args:
+            chat_id: The chat (phone number or "group:<id>")
+            emoji: Reaction emoji string (e.g. "👀", "✅")
+            target_author: Phone number / UUID of the message author
+            target_timestamp: Signal timestamp (ms) of the message to react to
+        """
+        params: Dict[str, Any] = {
+            "account": self.account,
+            "emoji": emoji,
+            "targetAuthor": target_author,
+            "targetTimestamp": target_timestamp,
+        }
+
+        if chat_id.startswith("group:"):
+            params["groupId"] = chat_id[6:]
+        else:
+            params["recipient"] = [chat_id]
+
+        result = await self._rpc("sendReaction", params)
+        if result is not None:
+            return True
+        logger.debug("Signal: sendReaction failed (chat=%s, emoji=%s)", chat_id[:20], emoji)
+        return False
+
+    async def remove_reaction(
+        self,
+        chat_id: str,
+        target_author: str,
+        target_timestamp: int,
+    ) -> bool:
+        """Remove a reaction by sending an empty-string emoji."""
+        params: Dict[str, Any] = {
+            "account": self.account,
+            "emoji": "",
+            "targetAuthor": target_author,
+            "targetTimestamp": target_timestamp,
+            "remove": True,
+        }
+
+        if chat_id.startswith("group:"):
+            params["groupId"] = chat_id[6:]
+        else:
+            params["recipient"] = [chat_id]
+
+        result = await self._rpc("sendReaction", params)
+        return result is not None
+
+    # ------------------------------------------------------------------
+    # Processing Lifecycle Hooks (reactions as progress indicators)
+    # ------------------------------------------------------------------
+
+    def _extract_reaction_target(self, event: MessageEvent) -> Optional[tuple]:
+        """Extract (target_author, target_timestamp) from a MessageEvent.
+
+        Returns None if the event doesn't carry the raw Signal envelope data
+        needed for sendReaction.
+        """
+        raw = event.raw_message
+        if not isinstance(raw, dict):
+            return None
+        author = raw.get("sender")
+        ts = raw.get("timestamp_ms")
+        if not author or not ts:
+            return None
+        return (author, ts)
+
+    async def on_processing_start(self, event: MessageEvent) -> None:
+        """React with 👀 when processing begins."""
+        target = self._extract_reaction_target(event)
+        if target:
+            await self.send_reaction(event.source.chat_id, "👀", *target)
+
+    async def on_processing_complete(self, event: MessageEvent, outcome: "ProcessingOutcome") -> None:
+        """Swap the 👀 reaction for ✅ (success) or ❌ (failure).
+
+        On CANCELLED we leave the 👀 in place — no terminal outcome means
+        the reaction should keep reflecting "in progress" (matches Telegram).
+        """
+        if outcome == ProcessingOutcome.CANCELLED:
+            return
+        target = self._extract_reaction_target(event)
+        if not target:
+            return
+        chat_id = event.source.chat_id
+        # Remove the in-progress reaction, then add the final one
+        await self.remove_reaction(chat_id, *target)
+        if outcome == ProcessingOutcome.SUCCESS:
+            await self.send_reaction(chat_id, "✅", *target)
+        elif outcome == ProcessingOutcome.FAILURE:
+            await self.send_reaction(chat_id, "❌", *target)
+
    # ------------------------------------------------------------------
    # Chat Info
    # ------------------------------------------------------------------
@@ -0,0 +1,369 @@
+"""
+Signal attachment rate-limit scheduler.
+
+Process-wide token-bucket simulator that mirrors the per-account
+attachment rate limit signal-cli/Signal-Server enforce. Producers
+(``SignalAdapter.send_multiple_images`` and the ``send_message`` tool's
+Signal path) call ``acquire(n)`` before an attachment send; on a 429
+they call ``feedback(retry_after, n)`` so the model recalibrates from
+the server's authoritative hint.
+
+The scheduler serializes concurrent calls through an ``asyncio.Lock``,
+giving FIFO fairness across agent sessions sharing one signal-cli
+daemon.
+"""
+
+from __future__ import annotations
+
+import asyncio
+import logging
+import re
+import time
+from typing import Any, Optional
+
+logger = logging.getLogger(__name__)
+
+
+# ---------------------------------------------------------------------------
+# Constants
+# ---------------------------------------------------------------------------
+
+SIGNAL_MAX_ATTACHMENTS_PER_MSG = 32  # per-message attachment cap (source: Signal-{Android,Desktop} source code)
+SIGNAL_RATE_LIMIT_BUCKET_CAPACITY = 50  # server-side token-bucket capacity for attachments rate limiting
+SIGNAL_RATE_LIMIT_DEFAULT_RETRY_AFTER = 4  # fallback token refill interval for signal-cli < v0.14.3
+SIGNAL_RATE_LIMIT_MAX_ATTEMPTS = 2  # initial attempt + 1 retry
+SIGNAL_BATCH_PACING_NOTICE_THRESHOLD = 10.0  # if estimated waiting time > 10s, notify the user about the delay
+SIGNAL_RPC_ERROR_RATELIMIT = -5  # signal-cli (v0.14.3+) JSON-RPC error code for RateLimitException
+
+
+# ---------------------------------------------------------------------------
+# Errors
+# ---------------------------------------------------------------------------
+
+class SignalRateLimitError(Exception):
+    """
+    Raised by ``SignalAdapter._rpc`` for rate-limit responses when the
+    caller has opted in via ``raise_on_rate_limit=True``.
+
+    Carries the server-supplied per-token Retry-After (in seconds) on
+    signal-cli ≥ v0.14.3
+    ``retry_after`` is None when the version doesn't expose it.
+    """
+
+    def __init__(self, message: str, retry_after: Optional[float] = None) -> None:
+        super().__init__(message)
+        self.retry_after = retry_after
+
+
+class SignalSchedulerError(Exception):
+    pass
+
+# ---------------------------------------------------------------------------
+# Detection helpers — used to fish a 429 out of signal-cli's various error
+# shapes (typed code, [429] substring, libsignal-net RetryLaterException
+# leaked through AttachmentInvalidException).
+# ---------------------------------------------------------------------------
+
+# "Retry after 4 seconds" / "retry after 4 second" — libsignal-net's
+# RetryLaterException string form, surfaced when 429s hit during
+# attachment upload (signal-cli wraps these as AttachmentInvalidException
+# rather than RateLimitException, so the typed path doesn't fire).
+_RETRY_AFTER_RE = re.compile(r"Retry after (\d+(?:\.\d+)?)\s*second", re.IGNORECASE)
+
+
+def _extract_retry_after_seconds(err: Any) -> Optional[float]:
+    """Pull the per-token Retry-After window from a signal-cli rate-limit error.
+
+    Tries two sources, in order:
+    1. ``error.data.response.results[*].retryAfterSeconds`` — the
+       structured field signal-cli ≥ v0.14.3 surfaces for plain
+       RateLimitException.
+    2. ``"Retry after N seconds"`` parsed out of the message — covers
+       libsignal-net's RetryLaterException that gets wrapped as
+       AttachmentInvalidException during attachment upload, where the
+       structured field stays null.
+
+    Returns None when neither yields a value.
+    """
+    msg = ""
+    if isinstance(err, dict):
+        data = err.get("data") or {}
+        response = data.get("response") or {}
+        results = response.get("results") or []
+        candidates = [
+            r.get("retryAfterSeconds") for r in results
+            if isinstance(r, dict) and r.get("retryAfterSeconds")
+        ]
+        if candidates:
+            return float(max(candidates))
+        msg = str(err.get("message", ""))
+    else:
+        msg = str(err)
+    match = _RETRY_AFTER_RE.search(msg)
+    return float(match.group(1)) if match else None
+
+
+def _is_signal_rate_limit_error(err: Any) -> bool:
+    """True if a signal-cli RPC error reflects a rate-limit failure.
+
+    Matches three layers:
+    - typed ``RATELIMIT_ERROR`` code (signal-cli ≥ v0.14.3, plain
+      RateLimitException)
+    - legacy ``[429] / RateLimitException`` substrings
+    - libsignal-net's ``RetryLaterException`` / ``Retry after N seconds``
+      surfaced inside ``AttachmentInvalidException`` when the rate
+      limit is hit during attachment upload — signal-cli never re-tags
+      these as RateLimitException, so substring is the only signal.
+    """
+    if isinstance(err, dict) and err.get("code") == SIGNAL_RPC_ERROR_RATELIMIT:
+        return True
+
+    message = (
+        str(err.get("message", ""))
+        if isinstance(err, dict)
+        else str(err)
+    )
+    msg_lower = message.lower()
+    return (
+        "[429]" in message
+        or "ratelimit" in msg_lower
+        or "retrylaterexception" in msg_lower
+        or "retry after" in msg_lower
+    )
+
+
+# ---------------------------------------------------------------------------
+# Misc helpers
+# ---------------------------------------------------------------------------
+
+def _format_wait(seconds: float) -> str:
+    """Human-friendly wait label for user-facing pacing notices."""
+    s = max(0.0, seconds)
+    if s < 90:
+        return f"{int(round(s))}s"
+    return f"{max(1, int(round(s / 60)))} min"
+
+
+def _signal_send_timeout(num_attachments: int) -> float:
+    """HTTP timeout for a Signal ``send`` RPC.
+
+    signal-cli uploads attachments serially during the call, so the
+    server-side time scales with batch size. Default 30s is fine for
+    text-only sends but truncates large attachment batches mid-upload —
+    we then log a phantom failure even though signal-cli completes the
+    send a few seconds later. Scale at 5s/attachment with a 60s floor.
+    """
+    if num_attachments <= 0:
+        return 30.0
+    return max(60.0, 5.0 * num_attachments)
+
+
+# ---------------------------------------------------------------------------
+# Scheduler
+# ---------------------------------------------------------------------------
+
+class SignalAttachmentScheduler:
+    """Process-wide token-bucket simulator for Signal attachment sends.
+
+    The bucket holds up to ``capacity`` tokens (default 50, matching
+    Signal's server-side rate-limit bucket size). Each attachment consumes one
+    token. Tokens refill at ``refill_rate`` tokens/second, calibrated
+    from the per-token Retry-After hint we get from the server when a
+    429 fires. Until we've observed one, we use the documented default
+    (1 token / 4 seconds).
+
+    Concurrent ``acquire(n)`` calls serialize through an
+    ``asyncio.Lock`` — natural FIFO across agent sessions hitting the
+    same daemon.
+    """
+
+    def __init__(
+        self,
+        capacity: float = float(SIGNAL_RATE_LIMIT_BUCKET_CAPACITY),
+        default_retry_after: float = float(SIGNAL_RATE_LIMIT_DEFAULT_RETRY_AFTER),
+    ) -> None:
+        self.capacity = float(capacity)
+        self.tokens = float(capacity)
+        self.refill_rate = 1.0 / float(default_retry_after)
+        self.last_refill = time.monotonic()
+        self._lock = asyncio.Lock()
+
+    # ------------------------------------------------------------------
+    # Internals
+    # ------------------------------------------------------------------
+
+    def _refill(self) -> None:
+        now = time.monotonic()
+        elapsed = now - self.last_refill
+        if elapsed > 0 and self.tokens < self.capacity:
+            self.tokens = min(self.capacity, self.tokens + elapsed * self.refill_rate)
+        self.last_refill = now
+
+    # ------------------------------------------------------------------
+    # Public API
+    # ------------------------------------------------------------------
+
+    def estimate_wait(self, n: int) -> float:
+        """Best-effort estimate of the seconds until ``n`` tokens would
+        be available. Used to decide whether to emit a user-facing
+        pacing notice *before* committing to an ``acquire`` that may
+        block silently. Lock-free; small races vs. concurrent acquires
+        are benign for an informational notice.
+        """
+        now = time.monotonic()
+        elapsed = now - self.last_refill
+        projected = self.tokens
+        if elapsed > 0 and projected < self.capacity:
+            projected = min(self.capacity, projected + elapsed * self.refill_rate)
+        deficit = n - projected
+        if deficit <= 0:
+            return 0.0
+        return deficit / self.refill_rate
+
+    async def acquire(self, n: int) -> float:
+        """Block until at least ``n`` tokens are available, return the
+        seconds slept.
+
+        Does **not** deduct tokens — the bucket is a read-only model of
+        server-side capacity.  Call ``report_rpc_duration()`` after the
+        RPC to synchronise the model with the server timeline.
+
+        Not perfect in case lots of coroutines try to acquire for big
+        uploads (``report_rpc_duration`` will take a long time to get hit)
+        but this is just a simulation. Signal server is ground truth and
+        will raise rate-limit exceptions triggering requeues.
+
+        The lock is released during ``asyncio.sleep`` so other callers
+        can interleave.  A retry loop re-checks after each sleep in
+        case the deadline was pessimistic.
+        """
+        if n <= 0:
+            return 0.0
+        if n > self.capacity:
+            raise SignalSchedulerError(
+                f"Signal scheduler was called requesting {n} tokens "
+                f"(max is {self.capacity})",
+            )
+
+        total_slept = 0.0
+        first_pass = True
+        while True:
+            async with self._lock:
+                self._refill()
+                if self.tokens >= n:
+                    if not first_pass or total_slept > 0:
+                        logger.debug(
+                            "Signal scheduler: tokens sufficient for %d "
+                            "(remaining=%.1f, total_slept=%.1fs)",
+                            n, self.tokens, total_slept,
+                        )
+                    return total_slept
+                deficit = n - self.tokens
+            wait = deficit / self.refill_rate
+            if first_pass:
+                logger.info(
+                    "Signal scheduler: pausing %.1fs for %d tokens "
+                    "(available=%.1f, deficit=%.1f, refill=%.4f/s ≈ %.1fs/token)",
+                    wait, n, self.tokens, deficit,
+                    self.refill_rate, 1.0 / self.refill_rate,
+                )
+                first_pass = False
+            await asyncio.sleep(wait)
+            total_slept += wait
+
+    async def report_rpc_duration(self, rpc_duration: float, n_attachments: int) -> None:
+        """Record an attachment-send RPC that just completed.
+
+        Deducts ``n_attachments`` tokens without crediting refill during
+        the upload window. Signal's server checks the bucket at RPC start
+        and does *not* refill during request processing — refill resumes
+        after the response. Crediting upload-time refill causes cumulative
+        drift that eventually triggers 429s.
+
+        Advances ``last_refill`` so the next ``acquire`` / ``_refill``
+        starts counting from this point.
+        """
+        if n_attachments <= 0:
+            return
+
+        async with self._lock:
+            now = time.monotonic()
+            token_before = self.tokens
+            self.tokens = max(0.0, token_before - float(n_attachments))
+            self.last_refill = now
+        logger.log(
+            logging.INFO if rpc_duration > 10 and n_attachments > 5 else logging.DEBUG,
+            "Signal scheduler: RPC for %d att took %.1fs — "
+            "tokens %.1f → %.1f (deducted=%d, no upload refill credited, refill=%.4fs⁻¹)",
+            n_attachments, rpc_duration,
+            token_before, self.tokens,
+            n_attachments, self.refill_rate,
+        )
+
+    def feedback(self, retry_after: Optional[float], n_attempted: int) -> None:
+        """Apply server feedback after a 429.
+
+        ``retry_after`` is the per-*token* refill window the server
+        reports (None when signal-cli is older than v0.14.3 and didn't
+        surface it).
+
+        When present we calibrate ``refill_rate`` from it:
+        the server is authoritative.
+        """
+        if retry_after and retry_after > 0:
+            new_rate = 1.0 / float(retry_after)
+            if new_rate != self.refill_rate:
+                logger.info(
+                    "Signal scheduler: calibrating refill_rate to %.4f tokens/sec "
+                    "(server retry_after=%.1fs per token)",
+                    new_rate, retry_after,
+                )
+                self.refill_rate = new_rate
+        self.tokens = 0.0
+        self.last_refill = time.monotonic()
+
+    def state(self) -> dict:
+        """Return current scheduler state for diagnostic logging (read-only).
+
+        Does not advance ``last_refill`` — safe to call from logging paths
+        without perturbing the bucket.
+        """
+        now = time.monotonic()
+        elapsed = now - self.last_refill
+        projected = self.tokens
+        if elapsed > 0 and projected < self.capacity:
+            projected = min(self.capacity, projected + elapsed * self.refill_rate)
+        return {
+            "tokens": round(projected, 1),
+            "capacity": int(self.capacity),
+            "refill_rate": round(self.refill_rate, 4),
+            "refill_seconds_per_token": round(1.0 / self.refill_rate, 1) if self.refill_rate > 0 else float("inf"),
+        }
+
+
+# ---------------------------------------------------------------------------
+# Process-wide singleton
+# ---------------------------------------------------------------------------
+
+_scheduler: Optional[SignalAttachmentScheduler] = None
+
+
+def get_scheduler() -> SignalAttachmentScheduler:
+    """Return the process-wide scheduler, creating it on first access."""
+    global _scheduler
+    if _scheduler is None:
+        _scheduler = SignalAttachmentScheduler()
+        logger.info(
+            "Signal scheduler: created (capacity=%d tokens, refill=%.4f/s ≈ %.1fs/token)",
+            int(_scheduler.capacity),
+            _scheduler.refill_rate,
+            1.0 / _scheduler.refill_rate,
+        )
+    return _scheduler
+
+
+def _reset_scheduler() -> None:
+    """Drop the cached scheduler so the next ``get_scheduler`` call
+    builds a fresh one. Test-only — never call from production paths."""
+    global _scheduler
+    _scheduler = None
@@ -514,6 +514,15 @@ class SlackAdapter(BasePlatformAdapter):
            ):
                self._app.action(_action_id)(self._handle_approval_action)

+            # Register Block Kit action handlers for slash-confirm buttons
+            # (generic three-option prompts; see tools/slash_confirm.py).
+            for _action_id in (
+                "hermes_confirm_once",
+                "hermes_confirm_always",
+                "hermes_confirm_cancel",
+            ):
+                self._app.action(_action_id)(self._handle_slash_confirm_action)
+
            # Start Socket Mode handler in background
            self._handler = AsyncSocketModeHandler(self._app, app_token, proxy=proxy_url)
            _apply_slack_proxy(self._handler.client, proxy_url)
@@ -783,6 +792,111 @@ class SlackAdapter(BasePlatformAdapter):

        raise last_exc

+    async def send_multiple_images(
+        self,
+        chat_id: str,
+        images: List[Tuple[str, str]],
+        metadata: Optional[Dict[str, Any]] = None,
+        human_delay: float = 0.0,
+    ) -> None:
+        """Send a batch of images as a single Slack message with multiple file uploads.
+
+        Uses ``files_upload_v2`` with its ``file_uploads`` parameter so all
+        images show up attached to one ``initial_comment`` message instead
+        of N separate messages. Falls back to the base per-image loop on
+        any failure.
+
+        The batch limit is 10 file uploads per call (Slack server-side cap).
+        """
+        if not self._app:
+            return
+        if not images:
+            return
+
+        try:
+            import httpx as _httpx
+            from urllib.parse import unquote as _unquote
+            from tools.url_safety import is_safe_url as _is_safe_url
+        except Exception:
+            await super().send_multiple_images(chat_id, images, metadata, human_delay)
+            return
+
+        thread_ts = self._resolve_thread_ts(None, metadata)
+
+        CHUNK = 10
+        chunks = [images[i:i + CHUNK] for i in range(0, len(images), CHUNK)]
+
+        for chunk_idx, chunk in enumerate(chunks):
+            if human_delay > 0 and chunk_idx > 0:
+                await asyncio.sleep(human_delay)
+
+            file_uploads: List[Dict[str, Any]] = []
+            initial_comment_parts: List[str] = []
+            try:
+                async with _httpx.AsyncClient(timeout=30.0, follow_redirects=True) as http_client:
+                    for image_url, alt_text in chunk:
+                        if alt_text:
+                            initial_comment_parts.append(alt_text)
+
+                        if image_url.startswith("file://"):
+                            local_path = _unquote(image_url[7:])
+                            if not os.path.exists(local_path):
+                                logger.warning("[Slack] Skipping missing image: %s", local_path)
+                                continue
+                            file_uploads.append({
+                                "file": local_path,
+                                "filename": os.path.basename(local_path),
+                            })
+                        else:
+                            if not _is_safe_url(image_url):
+                                logger.warning("[Slack] Blocked unsafe image URL in batch")
+                                continue
+                            try:
+                                response = await http_client.get(image_url)
+                                response.raise_for_status()
+                                ext = "png"
+                                ct = response.headers.get("content-type", "")
+                                if "jpeg" in ct or "jpg" in ct:
+                                    ext = "jpg"
+                                elif "gif" in ct:
+                                    ext = "gif"
+                                elif "webp" in ct:
+                                    ext = "webp"
+                                file_uploads.append({
+                                    "content": response.content,
+                                    "filename": f"image_{len(file_uploads)}.{ext}",
+                                })
+                            except Exception as dl_err:
+                                logger.warning(
+                                    "[Slack] Download failed for %s: %s",
+                                    safe_url_for_log(image_url), dl_err,
+                                )
+                                continue
+
+                if not file_uploads:
+                    continue
+
+                initial_comment = "\n".join(initial_comment_parts) if initial_comment_parts else ""
+                logger.info(
+                    "[Slack] Sending %d image(s) in single files_upload_v2 (chunk %d/%d)",
+                    len(file_uploads), chunk_idx + 1, len(chunks),
+                )
+                result = await self._get_client(chat_id).files_upload_v2(
+                    channel=chat_id,
+                    file_uploads=file_uploads,
+                    initial_comment=initial_comment,
+                    thread_ts=thread_ts,
+                )
+                self._record_uploaded_file_thread(chat_id, thread_ts)
+                _ = result
+            except Exception as e:
+                logger.warning(
+                    "[Slack] Multi-image files_upload_v2 failed (chunk %d/%d), falling back to per-image: %s",
+                    chunk_idx + 1, len(chunks), e,
+                    exc_info=True,
+                )
+                await super().send_multiple_images(chat_id, chunk, metadata, human_delay=human_delay)
+
    def _record_uploaded_file_thread(self, chat_id: str, thread_ts: Optional[str]) -> None:
        """Treat successful file uploads as bot participation in a thread."""
        if not thread_ts:
@@ -1931,6 +2045,168 @@ class SlackAdapter(BasePlatformAdapter):
            logger.error("[Slack] send_exec_approval failed: %s", e, exc_info=True)
            return SendResult(success=False, error=str(e))

+    async def send_slash_confirm(
+        self, chat_id: str, title: str, message: str, session_key: str,
+        confirm_id: str, metadata: Optional[Dict[str, Any]] = None,
+    ) -> SendResult:
+        """Send a Block Kit three-option slash-command confirmation prompt."""
+        if not self._app:
+            return SendResult(success=False, error="Not connected")
+
+        try:
+            body = message[:2900] + "..." if len(message) > 2900 else message
+            thread_ts = self._resolve_thread_ts(None, metadata)
+            # Encode session_key and confirm_id into the button value so the
+            # callback handler can resolve without extra bookkeeping.
+            value = f"{session_key}|{confirm_id}"
+
+            blocks = [
+                {
+                    "type": "section",
+                    "text": {
+                        "type": "mrkdwn",
+                        "text": f"*{title or 'Confirm'}*\n\n{body}",
+                    },
+                },
+                {
+                    "type": "actions",
+                    "elements": [
+                        {
+                            "type": "button",
+                            "text": {"type": "plain_text", "text": "Approve Once"},
+                            "style": "primary",
+                            "action_id": "hermes_confirm_once",
+                            "value": value,
+                        },
+                        {
+                            "type": "button",
+                            "text": {"type": "plain_text", "text": "Always Approve"},
+                            "action_id": "hermes_confirm_always",
+                            "value": value,
+                        },
+                        {
+                            "type": "button",
+                            "text": {"type": "plain_text", "text": "Cancel"},
+                            "style": "danger",
+                            "action_id": "hermes_confirm_cancel",
+                            "value": value,
+                        },
+                    ],
+                },
+            ]
+
+            kwargs: Dict[str, Any] = {
+                "channel": chat_id,
+                "text": f"{title or 'Confirm'}: {body[:100]}",
+                "blocks": blocks,
+            }
+            if thread_ts:
+                kwargs["thread_ts"] = thread_ts
+
+            result = await self._get_client(chat_id).chat_postMessage(**kwargs)
+            return SendResult(success=True, message_id=result.get("ts", ""), raw_response=result)
+        except Exception as e:
+            logger.error("[Slack] send_slash_confirm failed: %s", e, exc_info=True)
+            return SendResult(success=False, error=str(e))
+
+    async def _handle_slash_confirm_action(self, ack, body, action) -> None:
+        """Handle a slash-confirm button click from Block Kit."""
+        await ack()
+
+        action_id = action.get("action_id", "")
+        value = action.get("value", "")
+        message = body.get("message", {})
+        msg_ts = message.get("ts", "")
+        channel_id = body.get("channel", {}).get("id", "")
+        user_name = body.get("user", {}).get("name", "unknown")
+        user_id = body.get("user", {}).get("id", "")
+
+        # Authorization — reuse the exec-approval allowlist.
+        allowed_csv = os.getenv("SLACK_ALLOWED_USERS", "").strip()
+        if allowed_csv:
+            allowed_ids = {uid.strip() for uid in allowed_csv.split(",") if uid.strip()}
+            if "*" not in allowed_ids and user_id not in allowed_ids:
+                logger.warning(
+                    "[Slack] Unauthorized slash-confirm click by %s (%s) — ignoring",
+                    user_name, user_id,
+                )
+                return
+
+        # Parse session_key|confirm_id back out
+        if "|" not in value:
+            logger.warning("[Slack] Malformed slash-confirm value: %s", value)
+            return
+        session_key, confirm_id = value.split("|", 1)
+
+        choice_map = {
+            "hermes_confirm_once": "once",
+            "hermes_confirm_always": "always",
+            "hermes_confirm_cancel": "cancel",
+        }
+        choice = choice_map.get(action_id, "cancel")
+
+        label_map = {
+            "once": f"✅ Approved once by {user_name}",
+            "always": f"🔒 Always approved by {user_name}",
+            "cancel": f"❌ Cancelled by {user_name}",
+        }
+        decision_text = label_map.get(choice, f"Resolved by {user_name}")
+
+        # Pull original prompt body out of the section block so we can show
+        # the decision inline without losing context.
+        original_text = ""
+        for block in message.get("blocks", []):
+            if block.get("type") == "section":
+                original_text = block.get("text", {}).get("text", "")
+                break
+
+        updated_blocks = [
+            {
+                "type": "section",
+                "text": {
+                    "type": "mrkdwn",
+                    "text": original_text or "Confirmation prompt",
+                },
+            },
+            {
+                "type": "context",
+                "elements": [
+                    {"type": "mrkdwn", "text": decision_text},
+                ],
+            },
+        ]
+
+        try:
+            await self._get_client(channel_id).chat_update(
+                channel=channel_id,
+                ts=msg_ts,
+                text=decision_text,
+                blocks=updated_blocks,
+            )
+        except Exception as e:
+            logger.warning("[Slack] Failed to update slash-confirm message: %s", e)
+
+        # Resolve via the module-level primitive and post any follow-up.
+        try:
+            from tools import slash_confirm as _slash_confirm_mod
+            result_text = await _slash_confirm_mod.resolve(session_key, confirm_id, choice)
+            if result_text:
+                post_kwargs: Dict[str, Any] = {
+                    "channel": channel_id,
+                    "text": result_text,
+                }
+                # Inherit the thread so the reply stays in the same place.
+                thread_ts = message.get("thread_ts") or msg_ts
+                if thread_ts:
+                    post_kwargs["thread_ts"] = thread_ts
+                await self._get_client(channel_id).chat_postMessage(**post_kwargs)
+            logger.info(
+                "Slack button resolved slash-confirm for session %s (choice=%s, user=%s)",
+                session_key, choice, user_name,
+            )
+        except Exception as exc:
+            logger.error("Failed to resolve slash-confirm from Slack button: %s", exc, exc_info=True)
+
    async def _handle_approval_action(self, ack, body, action) -> None:
        """Handle an approval button click from Block Kit."""
        await ack()
@@ -84,6 +84,7 @@ from gateway.platforms.telegram_network import (
    discover_fallback_ips,
    parse_fallback_ip_env,
 )
+from utils import atomic_replace


 def check_telegram_requirements() -> bool:
@@ -122,12 +123,12 @@ def _strip_mdv2(text: str) -> str:


 # ---------------------------------------------------------------------------
-# Markdown table → code block conversion
+# Markdown table → Telegram-friendly row groups
 # ---------------------------------------------------------------------------
 # Telegram's MarkdownV2 has no table syntax — '|' is just an escaped literal,
 # so pipe tables render as noisy backslash-pipe text with no alignment.
-# Wrapping the table in a fenced code block makes Telegram render it as
-# monospace preformatted text with columns intact.
+# Reformating each row into a bold heading plus bullet list keeps the content
+# readable on mobile clients while preserving the source data.

 # Matches a GFM table delimiter row: optional outer pipes, cells containing
 # only dashes (with optional leading/trailing colons for alignment) separated
@@ -144,13 +145,49 @@ def _is_table_row(line: str) -> bool:
    return bool(stripped) and '|' in stripped


+def _split_markdown_table_row(line: str) -> list[str]:
+    """Split a simple GFM table row into stripped cell values."""
+    stripped = line.strip()
+    if stripped.startswith("|"):
+        stripped = stripped[1:]
+    if stripped.endswith("|"):
+        stripped = stripped[:-1]
+    return [cell.strip() for cell in stripped.split("|")]
+
+
+def _render_table_block_for_telegram(table_block: list[str]) -> str:
+    """Render a detected GFM table as Telegram-friendly row groups."""
+    if len(table_block) < 3:
+        return "\n".join(table_block)
+
+    headers = _split_markdown_table_row(table_block[0])
+    if len(headers) < 2:
+        return "\n".join(table_block)
+
+    rendered_rows: list[str] = []
+    for index, row in enumerate(table_block[2:], start=1):
+        cells = _split_markdown_table_row(row)
+        if len(cells) < len(headers):
+            cells.extend([""] * (len(headers) - len(cells)))
+        elif len(cells) > len(headers):
+            cells = cells[: len(headers)]
+
+        heading = next((cell for cell in cells if cell), f"Row {index}")
+        rendered_rows.append(f"**{heading}**")
+        rendered_rows.extend(
+            f"• {header}: {value}" for header, value in zip(headers, cells)
+        )
+
+    return "\n\n".join(rendered_rows)
+
+
 def _wrap_markdown_tables(text: str) -> str:
-    """Wrap GFM-style pipe tables in ``` fences so Telegram renders them.
+    """Rewrite GFM-style pipe tables into Telegram-friendly bullet groups.

    Detected by a row containing '|' immediately followed by a delimiter
    row matching :data:`_TABLE_SEPARATOR_RE`.  Subsequent pipe-containing
-    non-blank lines are consumed as the table body and included in the
-    wrapped block.  Tables inside existing fenced code blocks are left
+    non-blank lines are consumed as the table body and rewritten as
+    per-row bullet groups. Tables inside existing fenced code blocks are left
    alone.
    """
    if '|' not in text or '-' not in text:
@@ -187,9 +224,7 @@ def _wrap_markdown_tables(text: str) -> str:
            while j < len(lines) and _is_table_row(lines[j]):
                table_block.append(lines[j])
                j += 1
-            out.append('```')
-            out.extend(table_block)
-            out.append('```')
+            out.append(_render_table_block_for_telegram(table_block))
            i = j
            continue

@@ -202,14 +237,14 @@ def _wrap_markdown_tables(text: str) -> str:
 class TelegramAdapter(BasePlatformAdapter):
    """
    Telegram bot adapter.
-    
+
    Handles:
    - Receiving messages from users and groups
    - Sending responses with Telegram markdown
    - Forum topics (thread_id support)
    - Media messages
    """
-    
+
    # Telegram message limits
    MAX_MESSAGE_LENGTH = 4096
    # Threshold for detecting Telegram client-side message splits.
@@ -217,7 +252,7 @@ class TelegramAdapter(BasePlatformAdapter):
    _SPLIT_THRESHOLD = 4000
    MEDIA_GROUP_WAIT_SECONDS = 0.8
    _GENERAL_TOPIC_THREAD_ID = "1"
-    
+
    def __init__(self, config: PlatformConfig):
        super().__init__(config, Platform.TELEGRAM)
        self._app: Optional[Application] = None
@@ -251,6 +286,9 @@ class TelegramAdapter(BasePlatformAdapter):
        self._model_picker_state: Dict[str, dict] = {}
        # Approval button state: message_id → session_key
        self._approval_state: Dict[int, str] = {}
+        # Slash-confirm button state: confirm_id → session_key (for /reload-mcp
+        # and any other slash-confirm prompts; see GatewayRunner._request_slash_confirm).
+        self._slash_confirm_state: Dict[str, str] = {}

    @staticmethod
    def _is_callback_user_authorized(user_id: str) -> bool:
@@ -334,6 +372,49 @@ class TelegramAdapter(BasePlatformAdapter):
            return {"link_preview_options": LinkPreviewOptions(is_disabled=True)}
        return {"disable_web_page_preview": True}

+    async def _drain_polling_connections(self) -> None:
+        """Reset the httpx connection pool used for getUpdates polling.
+
+        Network errors (especially through proxies like sing-box) can leave
+        httpx connections in a half-closed state that still occupy pool slots.
+        After enough reconnect cycles the pool fills up entirely, causing
+        ``Pool timeout: All connections in the connection pool are occupied.``
+
+        We reset ONLY ``_request[0]`` (the getUpdates request) — the general
+        request (``_request[1]``) is left untouched so concurrent
+        ``send_message`` / ``edit_message`` calls are never interrupted.
+
+        Implementation note: accesses ``Bot._request[0]`` which is the
+        get-updates ``BaseRequest`` in the PTB 22.x internal tuple
+        ``(get_updates_request, general_request)``.  There is no public
+        accessor for the polling request; review if upgrading to PTB 23+.
+        """
+        if not (self._app and self._app.bot):
+            return
+        try:
+            # PTB 22.x: _request is a (get_updates, general) tuple;
+            # no public accessor exists for the polling request.
+            polling_req = self._app.bot._request[0]  # noqa: SLF001
+        except Exception:
+            return
+        try:
+            await polling_req.shutdown()
+        except Exception:
+            logger.debug(
+                "[%s] Polling request shutdown failed (non-fatal)",
+                self.name, exc_info=True,
+            )
+        try:
+            await polling_req.initialize()
+            logger.debug(
+                "[%s] Polling request pool drained before reconnect", self.name
+            )
+        except Exception:
+            logger.debug(
+                "[%s] Polling request re-initialize failed (non-fatal)",
+                self.name, exc_info=True,
+            )
+
    async def _handle_polling_network_error(self, error: Exception) -> None:
        """Reconnect polling after a transient network interruption.

@@ -379,6 +460,8 @@ class TelegramAdapter(BasePlatformAdapter):
        except Exception:
            pass

+        await self._drain_polling_connections()
+
        try:
            await self._app.updater.start_polling(
                allowed_updates=Update.ALL_TYPES,
@@ -426,6 +509,7 @@ class TelegramAdapter(BasePlatformAdapter):
            except Exception:
                pass
            await asyncio.sleep(RETRY_DELAY)
+            await self._drain_polling_connections()
            try:
                await self._app.updater.start_polling(
                    allowed_updates=Update.ALL_TYPES,
@@ -554,7 +638,7 @@ class TelegramAdapter(BasePlatformAdapter):
                        _yaml.dump(config, f, default_flow_style=False, sort_keys=False)
                        f.flush()
                        os.fsync(f.fileno())
-                    os.replace(tmp_path, config_path)
+                    atomic_replace(tmp_path, config_path)
                except BaseException:
                    try:
                        os.unlink(tmp_path)
@@ -913,7 +997,7 @@ class TelegramAdapter(BasePlatformAdapter):
            self._set_fatal_error("telegram_connect_error", message, retryable=True)
            logger.error("[%s] Failed to connect to Telegram: %s", self.name, e, exc_info=True)
            return False
-    
+
    async def disconnect(self) -> None:
        """Stop polling/webhook, cancel pending album flushes, and disconnect."""
        pending_media_group_tasks = list(self._media_group_tasks.values())
@@ -1330,6 +1414,48 @@ class TelegramAdapter(BasePlatformAdapter):
            logger.warning("[%s] send_exec_approval failed: %s", self.name, e)
            return SendResult(success=False, error=str(e))

+    async def send_slash_confirm(
+        self, chat_id: str, title: str, message: str, session_key: str,
+        confirm_id: str, metadata: Optional[Dict[str, Any]] = None,
+    ) -> SendResult:
+        """Render a three-button slash-command confirmation prompt."""
+        if not self._bot:
+            return SendResult(success=False, error="Not connected")
+
+        try:
+            # Message body: render as plain text (message already contains
+            # markdown formatting from the gateway primitive).
+            preview = message if len(message) <= 3800 else message[:3800] + "..."
+
+            keyboard = InlineKeyboardMarkup([
+                [
+                    InlineKeyboardButton("✅ Approve Once", callback_data=f"sc:once:{confirm_id}"),
+                    InlineKeyboardButton("🔒 Always Approve", callback_data=f"sc:always:{confirm_id}"),
+                ],
+                [
+                    InlineKeyboardButton("❌ Cancel", callback_data=f"sc:cancel:{confirm_id}"),
+                ],
+            ])
+
+            thread_id = self._metadata_thread_id(metadata)
+            kwargs: Dict[str, Any] = {
+                "chat_id": int(chat_id),
+                "text": preview,
+                "parse_mode": ParseMode.MARKDOWN,
+                "reply_markup": keyboard,
+                **self._link_preview_kwargs(),
+            }
+            message_thread_id = self._message_thread_id_for_send(thread_id)
+            if message_thread_id is not None:
+                kwargs["message_thread_id"] = message_thread_id
+
+            msg = await self._bot.send_message(**kwargs)
+            self._slash_confirm_state[confirm_id] = session_key
+            return SendResult(success=True, message_id=str(msg.message_id))
+        except Exception as e:
+            logger.warning("[%s] send_slash_confirm failed: %s", self.name, e)
+            return SendResult(success=False, error=str(e))
+
    async def send_model_picker(
        self,
        chat_id: str,
@@ -1698,6 +1824,68 @@ class TelegramAdapter(BasePlatformAdapter):
                    logger.error("Failed to resolve gateway approval from Telegram button: %s", exc)
            return

+        # --- Slash-confirm callbacks (sc:choice:confirm_id) ---
+        if data.startswith("sc:"):
+            parts = data.split(":", 2)
+            if len(parts) == 3:
+                choice = parts[1]  # once, always, cancel
+                confirm_id = parts[2]
+
+                caller_id = str(getattr(query.from_user, "id", "")) 
+                if not self._is_callback_user_authorized(caller_id):
+                    await query.answer(text="⛔ You are not authorized to answer this prompt.")
+                    return
+
+                session_key = self._slash_confirm_state.pop(confirm_id, None)
+                if not session_key:
+                    await query.answer(text="This prompt has already been resolved.")
+                    return
+
+                label_map = {
+                    "once": "✅ Approved once",
+                    "always": "🔒 Always approve",
+                    "cancel": "❌ Cancelled",
+                }
+                user_display = getattr(query.from_user, "first_name", "User")
+                label = label_map.get(choice, "Resolved")
+
+                await query.answer(text=label)
+
+                try:
+                    await query.edit_message_text(
+                        text=f"{label} by {user_display}",
+                        parse_mode=ParseMode.MARKDOWN,
+                        reply_markup=None,
+                    )
+                except Exception:
+                    pass
+
+                # Resolve via the module-level primitive.  The runner stored
+                # a handler keyed by session_key; we run it on the event
+                # loop and (if it returns a string) send it as a follow-up
+                # message in the same chat.
+                try:
+                    from tools import slash_confirm as _slash_confirm_mod
+                    result_text = await _slash_confirm_mod.resolve(
+                        session_key, confirm_id, choice,
+                    )
+                    if result_text and query.message:
+                        # Inherit the prompt message's thread so the reply
+                        # lands in the same supergroup topic / reply chain.
+                        thread_id = getattr(query.message, "message_thread_id", None)
+                        send_kwargs: Dict[str, Any] = {
+                            "chat_id": int(query.message.chat_id),
+                            "text": result_text,
+                            "parse_mode": ParseMode.MARKDOWN,
+                            **self._link_preview_kwargs(),
+                        }
+                        if thread_id is not None:
+                            send_kwargs["message_thread_id"] = thread_id
+                        await self._bot.send_message(**send_kwargs)
+                except Exception as exc:
+                    logger.error("[%s] slash-confirm callback failed: %s", self.name, exc, exc_info=True)
+            return
+
        # --- Update prompt callbacks ---
        if not data.startswith("update_prompt:"):
            return
@@ -1763,8 +1951,9 @@ class TelegramAdapter(BasePlatformAdapter):
                return SendResult(success=False, error=self._missing_media_path_error("Audio", audio_path))
            
            with open(audio_path, "rb") as audio_file:
-                # .ogg files -> send as voice (round playable bubble)
-                if audio_path.endswith((".ogg", ".opus")):
+                ext = os.path.splitext(audio_path)[1].lower()
+                # .ogg / .opus files -> send as voice (round playable bubble)
+                if ext in (".ogg", ".opus"):
                    _voice_thread = self._metadata_thread_id(metadata)
                    msg = await self._bot.send_voice(
                        chat_id=int(chat_id),
@@ -1773,8 +1962,8 @@ class TelegramAdapter(BasePlatformAdapter):
                        reply_to_message_id=int(reply_to) if reply_to else None,
                        message_thread_id=self._message_thread_id_for_send(_voice_thread),
                    )
-                else:
-                    # .mp3 and others -> send as audio file
+                elif ext in (".mp3", ".m4a"):
+                    # Telegram's Bot API sendAudio only accepts MP3 / M4A.
                    _audio_thread = self._metadata_thread_id(metadata)
                    msg = await self._bot.send_audio(
                        chat_id=int(chat_id),
@@ -1783,6 +1972,16 @@ class TelegramAdapter(BasePlatformAdapter):
                        reply_to_message_id=int(reply_to) if reply_to else None,
                        message_thread_id=self._message_thread_id_for_send(_audio_thread),
                    )
+                else:
+                    # Formats Telegram can't play natively (.wav, .flac, ...)
+                    # — fall back to document delivery instead of raising.
+                    return await self.send_document(
+                        chat_id=chat_id,
+                        file_path=audio_path,
+                        caption=caption,
+                        reply_to=reply_to,
+                        metadata=metadata,
+                    )
            return SendResult(success=True, message_id=str(msg.message_id))
        except Exception as e:
            logger.error(
@@ -1792,7 +1991,118 @@ class TelegramAdapter(BasePlatformAdapter):
                exc_info=True,
            )
            return await super().send_voice(chat_id, audio_path, caption, reply_to)
-    
+
+    async def send_multiple_images(
+        self,
+        chat_id: str,
+        images: List[tuple],
+        metadata: Optional[Dict[str, Any]] = None,
+        human_delay: float = 0.0,
+    ) -> None:
+        """Send a batch of images natively via Telegram's media group API.
+
+        Telegram's ``send_media_group`` bundles up to 10 photos/videos into
+        a single album. Larger batches are chunked. Animated GIFs cannot
+        go into a media group (they require ``send_animation``), so they
+        are peeled off and sent individually via the base default path.
+
+        URL-based photos go into the group directly; local files are
+        opened as byte streams. On failure the whole batch falls back to
+        the base adapter's per-image loop.
+        """
+        if not self._bot:
+            return
+        if not images:
+            return
+
+        try:
+            from telegram import InputMediaPhoto
+        except Exception as exc:  # pragma: no cover - missing SDK
+            logger.warning(
+                "[%s] InputMediaPhoto unavailable, falling back to per-image send: %s",
+                self.name, exc,
+            )
+            await super().send_multiple_images(chat_id, images, metadata, human_delay)
+            return
+
+        # Peel off animations — they need send_animation, not send_media_group
+        animations: List[tuple] = []
+        photos: List[tuple] = []
+        for image_url, alt_text in images:
+            if not image_url.startswith("file://") and self._is_animation_url(image_url):
+                animations.append((image_url, alt_text))
+            else:
+                photos.append((image_url, alt_text))
+
+        # Animations: route through the base default (per-image send_animation)
+        if animations:
+            await super().send_multiple_images(
+                chat_id, animations, metadata, human_delay=human_delay,
+            )
+
+        if not photos:
+            return
+
+        from urllib.parse import unquote as _unquote
+        _thread = self._metadata_thread_id(metadata)
+        _thread_id = self._message_thread_id_for_send(_thread)
+
+        # Chunk into groups of 10 (Telegram's album limit)
+        CHUNK = 10
+        chunks = [photos[i:i + CHUNK] for i in range(0, len(photos), CHUNK)]
+
+        for chunk_idx, chunk in enumerate(chunks):
+            if human_delay > 0 and chunk_idx > 0:
+                await asyncio.sleep(human_delay)
+
+            media: List[Any] = []
+            opened_files: List[Any] = []
+            try:
+                for image_url, alt_text in chunk:
+                    caption = alt_text[:1024] if alt_text else None
+                    if image_url.startswith("file://"):
+                        local_path = _unquote(image_url[7:])
+                        if not os.path.exists(local_path):
+                            logger.warning(
+                                "[%s] Skipping missing image in media group: %s",
+                                self.name, local_path,
+                            )
+                            continue
+                        fh = open(local_path, "rb")
+                        opened_files.append(fh)
+                        media.append(InputMediaPhoto(media=fh, caption=caption))
+                    else:
+                        media.append(InputMediaPhoto(media=image_url, caption=caption))
+
+                if not media:
+                    continue
+
+                logger.info(
+                    "[%s] Sending media group of %d photo(s) (chunk %d/%d)",
+                    self.name, len(media), chunk_idx + 1, len(chunks),
+                )
+                await self._bot.send_media_group(
+                    chat_id=int(chat_id),
+                    media=media,
+                    message_thread_id=_thread_id,
+                )
+            except Exception as e:
+                logger.warning(
+                    "[%s] send_media_group failed (chunk %d/%d), falling back to per-image: %s",
+                    self.name, chunk_idx + 1, len(chunks), e,
+                    exc_info=True,
+                )
+                # Fallback: send each photo in this chunk individually
+                await super().send_multiple_images(
+                    chat_id, chunk, metadata, human_delay=human_delay,
+                )
+            finally:
+                for fh in opened_files:
+                    try:
+                        fh.close()
+                    except Exception:
+                        pass
+
    async def send_image_file(
        self,
        chat_id: str,
@@ -1959,7 +2269,7 @@ class TelegramAdapter(BasePlatformAdapter):
                )
                # Final fallback: send URL as text
                return await super().send_image(chat_id, image_url, caption, reply_to)
-    
+
    async def send_animation(
        self,
        chat_id: str,
@@ -2021,7 +2331,7 @@ class TelegramAdapter(BasePlatformAdapter):
                    e,
                    exc_info=True,
                )
-    
+
    async def get_chat_info(self, chat_id: str) -> Dict[str, Any]:
        """Get information about a Telegram chat."""
        if not self._bot:
@@ -2055,7 +2365,7 @@ class TelegramAdapter(BasePlatformAdapter):
                exc_info=True,
            )
            return {"name": str(chat_id), "type": "dm", "error": str(e)}
-    
+
    def format_message(self, content: str) -> str:
        """
        Convert standard markdown to Telegram MarkdownV2 format.
@@ -2080,10 +2390,8 @@ class TelegramAdapter(BasePlatformAdapter):

        text = content

-        # 0) Pre-wrap GFM-style pipe tables in ``` fences.  Telegram can't
-        #    render tables natively, but fenced code blocks render as
-        #    monospace preformatted text with columns intact.  The wrapped
-        #    tables then flow through step (1) below as protected regions.
+        # 0) Rewrite GFM-style pipe tables into Telegram-friendly row groups
+        #    before the normal MarkdownV2 conversions run.
        text = _wrap_markdown_tables(text)

        # 1) Protect fenced code blocks (``` ... ```)
@@ -2229,7 +2537,7 @@ class TelegramAdapter(BasePlatformAdapter):
        text = ''.join(_safe_parts)

        return text
-    
+
    # ── Group mention gating ──────────────────────────────────────────────

    def _telegram_require_mention(self) -> bool:
@@ -2444,7 +2752,7 @@ class TelegramAdapter(BasePlatformAdapter):
        event = self._build_message_event(update.message, MessageType.TEXT, update_id=update.update_id)
        event.text = self._clean_bot_trigger_text(event.text)
        self._enqueue_text_event(event)
-    
+
    async def _handle_command(self, update: Update, context: ContextTypes.DEFAULT_TYPE) -> None:
        """Handle incoming command messages."""
        if not update.message or not update.message.text:
@@ -2454,7 +2762,7 @@ class TelegramAdapter(BasePlatformAdapter):
        
        event = self._build_message_event(update.message, MessageType.COMMAND, update_id=update.update_id)
        await self.handle_message(event)
-    
+
    async def _handle_location_message(self, update: Update, context: ContextTypes.DEFAULT_TYPE) -> None:
        """Handle incoming location/venue pin messages."""
        if not update.message:
@@ -2812,7 +3120,7 @@ class TelegramAdapter(BasePlatformAdapter):
            return

        await self.handle_message(event)
-    
+
    async def _queue_media_group_event(self, media_group_id: str, event: MessageEvent) -> None:
        """Buffer Telegram media-group items so albums arrive as one logical event.

@@ -202,26 +202,22 @@ class WebhookAdapter(BasePlatformAdapter):
        if deliver_type == "github_comment":
            return await self._deliver_github_comment(content, delivery)

-        # Cross-platform delivery — any platform with a gateway adapter
-        if self.gateway_runner and deliver_type in (
-            "telegram",
-            "discord",
-            "slack",
-            "signal",
-            "sms",
-            "whatsapp",
-            "matrix",
-            "mattermost",
-            "homeassistant",
-            "email",
-            "dingtalk",
-            "feishu",
-            "wecom",
-            "wecom_callback",
-            "weixin",
-            "bluebubbles",
-            "qqbot",
-        ):
+        # Cross-platform delivery — any platform with a gateway adapter.
+        # Check both built-in names and plugin-registered platforms.
+        _BUILTIN_DELIVER_PLATFORMS = {
+            "telegram", "discord", "slack", "signal", "sms", "whatsapp",
+            "matrix", "mattermost", "homeassistant", "email", "dingtalk",
+            "feishu", "wecom", "wecom_callback", "weixin", "bluebubbles",
+            "qqbot", "yuanbao",
+        }
+        _is_known_platform = deliver_type in _BUILTIN_DELIVER_PLATFORMS
+        if not _is_known_platform:
+            try:
+                from gateway.platform_registry import platform_registry
+                _is_known_platform = platform_registry.is_registered(deliver_type)
+            except Exception:
+                pass
+        if self.gateway_runner and _is_known_platform:
            return await self._deliver_cross_platform(
                deliver_type, content, delivery
            )
@@ -89,8 +89,21 @@ MAX_CONSECUTIVE_FAILURES = 3
 RETRY_DELAY_SECONDS = 2
 BACKOFF_DELAY_SECONDS = 30
 SESSION_EXPIRED_ERRCODE = -14
+RATE_LIMIT_ERRCODE = -2  # iLink frequency limit — backoff and retry
 MESSAGE_DEDUP_TTL_SECONDS = 300

+
+def _is_stale_session_ret(
+    ret: "Optional[int]", errcode: "Optional[int]", errmsg: "Optional[str]",
+) -> bool:
+    """True when iLink returns ret=-2 / errcode=-2 with 'unknown error',
+    which is a stale-session signal (same as errcode=-14) rather than
+    a genuine rate limit."""
+    if ret != RATE_LIMIT_ERRCODE and errcode != RATE_LIMIT_ERRCODE:
+        return False
+    return (errmsg or "").lower() == "unknown error"
+
+
 MEDIA_IMAGE = 1
 MEDIA_VIDEO = 2
 MEDIA_FILE = 3
@@ -1113,7 +1126,7 @@ async def qr_login(
 class WeixinAdapter(BasePlatformAdapter):
    """Native Hermes adapter for Weixin personal accounts."""

-    MAX_MESSAGE_LENGTH = 4000
+    MAX_MESSAGE_LENGTH = 2000

    # WeChat does not support editing sent messages — streaming must use the
    # fallback "send-final-only" path so the cursor (▉) is never left visible.
@@ -1138,10 +1151,10 @@ class WeixinAdapter(BasePlatformAdapter):
            extra.get("cdn_base_url") or os.getenv("WEIXIN_CDN_BASE_URL", WEIXIN_CDN_BASE_URL)
        ).strip().rstrip("/")
        self._send_chunk_delay_seconds = float(
-            extra.get("send_chunk_delay_seconds") or os.getenv("WEIXIN_SEND_CHUNK_DELAY_SECONDS", "0.35")
+            extra.get("send_chunk_delay_seconds") or os.getenv("WEIXIN_SEND_CHUNK_DELAY_SECONDS", "1.5")
        )
        self._send_chunk_retries = int(
-            extra.get("send_chunk_retries") or os.getenv("WEIXIN_SEND_CHUNK_RETRIES", "2")
+            extra.get("send_chunk_retries") or os.getenv("WEIXIN_SEND_CHUNK_RETRIES", "4")
        )
        self._send_chunk_retry_delay_seconds = float(
            extra.get("send_chunk_retry_delay_seconds")
@@ -1209,6 +1222,17 @@ class WeixinAdapter(BasePlatformAdapter):
        self._mark_connected()
        _LIVE_ADAPTERS[self._token] = self
        logger.info("[%s] Connected account=%s base=%s", self.name, _safe_id(self._account_id), self._base_url)
+        if self._group_policy != "disabled":
+            logger.warning(
+                "[%s] WEIXIN_GROUP_POLICY=%s is set, but QR-login connects an iLink bot "
+                "identity (e.g. ...@im.bot) which typically cannot be invited into ordinary "
+                "WeChat groups. iLink usually does not deliver ordinary-group events for "
+                "these accounts, so group messages may never reach Hermes regardless of this "
+                "policy. If group delivery doesn't work, the limitation is on the iLink side, "
+                "not in Hermes.",
+                self.name,
+                self._group_policy,
+            )
        return True

    async def disconnect(self) -> None:
@@ -1253,7 +1277,8 @@ class WeixinAdapter(BasePlatformAdapter):
                ret = response.get("ret", 0)
                errcode = response.get("errcode", 0)
                if ret not in (0, None) or errcode not in (0, None):
-                    if ret == SESSION_EXPIRED_ERRCODE or errcode == SESSION_EXPIRED_ERRCODE:
+                    if (ret == SESSION_EXPIRED_ERRCODE or errcode == SESSION_EXPIRED_ERRCODE
+                            or _is_stale_session_ret(ret, errcode, response.get("errmsg"))):
                        logger.error("[%s] Session expired; pausing for 10 minutes", self.name)
                        await asyncio.sleep(600)
                        consecutive_failures = 0
@@ -1518,6 +1543,7 @@ class WeixinAdapter(BasePlatformAdapter):
                        is_session_expired = (
                            ret == SESSION_EXPIRED_ERRCODE
                            or errcode == SESSION_EXPIRED_ERRCODE
+                            or _is_stale_session_ret(ret, errcode, resp.get("errmsg"))
                        )
                        # Session expired — strip token and retry once
                        if is_session_expired and not retried_without_token and context_token:
@@ -1531,6 +1557,28 @@ class WeixinAdapter(BasePlatformAdapter):
                                self.name, _safe_id(chat_id),
                            )
                            continue
+                        # Rate limit (-2) — backoff and retry
+                        is_rate_limited = (
+                            ret == RATE_LIMIT_ERRCODE
+                            or errcode == RATE_LIMIT_ERRCODE
+                        )
+                        if is_rate_limited:
+                            errmsg = resp.get("errmsg") or resp.get("msg") or "rate limited"
+                            # Record the error so we raise a descriptive
+                            # RuntimeError (instead of AssertionError) if the
+                            # loop exhausts with the server still rate-limiting.
+                            last_error = RuntimeError(
+                                f"iLink sendmessage rate limited: ret={ret} errcode={errcode} errmsg={errmsg}"
+                            )
+                            if attempt >= self._send_chunk_retries:
+                                break
+                            wait = self._send_chunk_retry_delay_seconds * 3  # 3x backoff for rate limit
+                            logger.warning(
+                                "[%s] rate limited for %s; backing off %.1fs before retry",
+                                self.name, _safe_id(chat_id), wait,
+                            )
+                            await asyncio.sleep(wait)
+                            continue
                        errmsg = resp.get("errmsg") or resp.get("msg") or "unknown error"
                        raise RuntimeError(
                            f"iLink sendmessage error: ret={ret} errcode={errcode} errmsg={errmsg}"
@@ -1572,7 +1620,7 @@ class WeixinAdapter(BasePlatformAdapter):
        _, image_cleaned = self.extract_images(cleaned_content)
        local_files, final_content = self.extract_local_files(image_cleaned)

-        _AUDIO_EXTS = {".ogg", ".opus", ".mp3", ".wav", ".m4a"}
+        _AUDIO_EXTS = {".ogg", ".opus", ".mp3", ".wav", ".m4a", ".flac"}
        _VIDEO_EXTS = {".mp4", ".mov", ".avi", ".mkv", ".webm", ".3gp"}
        _IMAGE_EXTS = {".jpg", ".jpeg", ".png", ".webp", ".gif"}

@@ -90,7 +90,7 @@ from gateway.platforms.yuanbao_proto import (
    encode_get_group_member_list,
    next_seq_no,
 )
-from gateway.session import SessionSource, build_session_key
+from gateway.session import build_session_key

 logger = logging.getLogger(__name__)

@@ -1897,7 +1897,7 @@ class OwnerCommandMiddleware(InboundMiddleware):
            return None, None, False

        # Sender identity check: bot owner <-> push.from_account == push.bot_owner_id
-        owner_id = (push or {}).get("bot_owner_id") or ""
+        # owner_id = (push or {}).get("bot_owner_id") or ""
        # is_owner = bool(owner_id) and owner_id == from_account
        is_owner = True
        return cmd, cmd_line, is_owner
@@ -21,12 +21,10 @@ import hashlib
 import hmac
 import logging
 import os
-import re
 import secrets
 import struct
 import time
 import urllib.parse
-from datetime import datetime, timezone, timedelta
 from typing import Optional, Any

 import httpx
@@ -19,9 +19,8 @@ yuanbao_proto.py - Yuanbao WebSocket 协议编解码（纯 Python 实现）
 from __future__ import annotations

 import logging
-import struct
 import threading
-from typing import Optional, Union
+from typing import Optional

 logger = logging.getLogger(__name__)

@@ -0,0 +1,150 @@
+"""Gateway runtime-metadata footer.
+
+Renders a compact footer showing runtime state (model, context %, cwd) and
+appends it to the FINAL message of an agent turn when enabled.  Off by default
+to keep replies minimal.
+
+Config (``~/.hermes/config.yaml``)::
+
+    display:
+      runtime_footer:
+        enabled: true                       # off by default
+        fields: [model, context_pct, cwd]   # order shown; drop any to hide
+
+Per-platform overrides live under ``display.platforms.<platform>.runtime_footer``.
+Users can toggle the global setting with ``/footer on|off`` from both the CLI
+and any gateway platform.
+
+The footer is appended to the final response text in ``gateway/run.py`` right
+before returning the response to the adapter send path — so it only lands on
+the final message a user sees, not on tool-progress updates or streaming
+partials.  When streaming is on and the final text has already been delivered
+piecemeal, the footer is sent as a separate trailing message via
+``send_trailing_footer()``.
+"""
+
+from __future__ import annotations
+
+import os
+from pathlib import Path
+from typing import Any, Iterable, Optional
+
+_DEFAULT_FIELDS: tuple[str, ...] = ("model", "context_pct", "cwd")
+_SEP = " · "
+
+
+def _home_relative_cwd(cwd: str) -> str:
+    """Return *cwd* with ``$HOME`` collapsed to ``~``.  Empty string if unset."""
+    if not cwd:
+        return ""
+    try:
+        home = os.path.expanduser("~")
+        p = os.path.abspath(cwd)
+        if home and (p == home or p.startswith(home + os.sep)):
+            return "~" + p[len(home):]
+        return p
+    except Exception:
+        return cwd
+
+
+def _model_short(model: Optional[str]) -> str:
+    """Drop ``vendor/`` prefix for readability (``openai/gpt-5.4`` → ``gpt-5.4``)."""
+    if not model:
+        return ""
+    return model.rsplit("/", 1)[-1]
+
+
+def resolve_footer_config(
+    user_config: dict[str, Any] | None,
+    platform_key: str | None = None,
+) -> dict[str, Any]:
+    """Resolve effective runtime-footer config for *platform_key*.
+
+    Merge order (later wins):
+        1. Built-in defaults (enabled=False)
+        2. ``display.runtime_footer``
+        3. ``display.platforms.<platform_key>.runtime_footer``
+    """
+    resolved = {"enabled": False, "fields": list(_DEFAULT_FIELDS)}
+    cfg = (user_config or {}).get("display") or {}
+
+    global_cfg = cfg.get("runtime_footer")
+    if isinstance(global_cfg, dict):
+        if "enabled" in global_cfg:
+            resolved["enabled"] = bool(global_cfg.get("enabled"))
+        if isinstance(global_cfg.get("fields"), list) and global_cfg["fields"]:
+            resolved["fields"] = [str(f) for f in global_cfg["fields"]]
+
+    if platform_key:
+        platforms = cfg.get("platforms") or {}
+        plat_cfg = platforms.get(platform_key)
+        if isinstance(plat_cfg, dict):
+            plat_footer = plat_cfg.get("runtime_footer")
+            if isinstance(plat_footer, dict):
+                if "enabled" in plat_footer:
+                    resolved["enabled"] = bool(plat_footer.get("enabled"))
+                if isinstance(plat_footer.get("fields"), list) and plat_footer["fields"]:
+                    resolved["fields"] = [str(f) for f in plat_footer["fields"]]
+
+    return resolved
+
+
+def format_runtime_footer(
+    *,
+    model: Optional[str],
+    context_tokens: int,
+    context_length: Optional[int],
+    cwd: Optional[str] = None,
+    fields: Iterable[str] = _DEFAULT_FIELDS,
+) -> str:
+    """Render the footer line, or return "" if no fields have data.
+
+    Fields are skipped silently when their underlying data is missing — a
+    partially-populated footer is better than a line with ``?%`` or empty slots.
+    """
+    parts: list[str] = []
+    for field in fields:
+        if field == "model":
+            m = _model_short(model)
+            if m:
+                parts.append(m)
+        elif field == "context_pct":
+            if context_length and context_length > 0 and context_tokens >= 0:
+                pct = max(0, min(100, round((context_tokens / context_length) * 100)))
+                parts.append(f"{pct}%")
+        elif field == "cwd":
+            rel = _home_relative_cwd(cwd or os.environ.get("TERMINAL_CWD", ""))
+            if rel:
+                parts.append(rel)
+        # Unknown field names are silently ignored.
+
+    if not parts:
+        return ""
+    return _SEP.join(parts)
+
+
+def build_footer_line(
+    *,
+    user_config: dict[str, Any] | None,
+    platform_key: str | None,
+    model: Optional[str],
+    context_tokens: int,
+    context_length: Optional[int],
+    cwd: Optional[str] = None,
+) -> str:
+    """Top-level entry point used by gateway/run.py.
+
+    Returns the footer text (empty string when disabled or no data).  Callers
+    append this to the final response themselves, preserving a single blank
+    line of separation.
+    """
+    cfg = resolve_footer_config(user_config, platform_key)
+    if not cfg.get("enabled"):
+        return ""
+    return format_runtime_footer(
+        model=model,
+        context_tokens=context_tokens,
+        context_length=context_length,
+        cwd=cwd,
+        fields=cfg.get("fields") or _DEFAULT_FIELDS,
+    )
@@ -62,8 +62,9 @@ from .config import (
 )
 from .whatsapp_identity import (
    canonical_whatsapp_identifier,
-    normalize_whatsapp_identifier,
+    normalize_whatsapp_identifier,  # noqa: F401 - re-exported for gateway.session callers
 )
+from utils import atomic_replace


@dataclass
@@ -234,7 +235,7 @@ def build_session_context_prompt(
 ) -> str:
    """
    Build the dynamic system prompt section that tells the agent about its context.
-    
+
    This is injected into the system prompt so the agent knows:
    - Where messages are coming from
    - What platforms are connected
@@ -246,13 +247,23 @@ def build_session_context_prompt(
    Platforms like Discord are excluded because mentions need real IDs.
    Routing still uses the original values (they stay in SessionSource).
    """
-    # Only apply redaction on platforms where IDs aren't needed for mentions
-    redact_pii = redact_pii and context.source.platform in _PII_SAFE_PLATFORMS
+    # Only apply redaction on platforms where IDs aren't needed for mentions.
+    # Check both the hardcoded set (builtins) and the plugin registry.
+    _is_pii_safe = context.source.platform in _PII_SAFE_PLATFORMS
+    if not _is_pii_safe:
+        try:
+            from gateway.platform_registry import platform_registry
+            entry = platform_registry.get(context.source.platform.value)
+            if entry and entry.pii_safe:
+                _is_pii_safe = True
+        except Exception:
+            pass
+    redact_pii = redact_pii and _is_pii_safe
    lines = [
        "## Current Session Context",
        "",
    ]
-    
+
    # Source info
    platform_name = context.source.platform.value.title()
    if context.source.platform == Platform.LOCAL:
@@ -277,7 +288,7 @@ def build_session_context_prompt(
        else:
            desc = src.description
        lines.append(f"**Source:** {platform_name} ({desc})")
-    
+
    # Channel topic (if available - provides context about the channel's purpose)
    if context.source.chat_topic:
        lines.append(f"**Channel Topic:** {context.source.chat_topic}")
@@ -302,7 +313,7 @@ def build_session_context_prompt(
        if redact_pii:
            uid = _hash_sender_id(uid)
        lines.append(f"**User ID:** {uid}")
-    
+
    # Platform-specific behavioral notes
    if context.source.platform == Platform.SLACK:
        lines.append("")
@@ -368,9 +379,9 @@ def build_session_context_prompt(
    for p in context.connected_platforms:
        if p != Platform.LOCAL:
            platforms_list.append(f"{p.value}: Connected ✓")
-    
+
    lines.append(f"**Connected Platforms:** {', '.join(platforms_list)}")
-    
+
    # Home channels
    if context.home_channels:
        lines.append("")
@@ -378,11 +389,11 @@ def build_session_context_prompt(
        for platform, home in context.home_channels.items():
            hc_id = _hash_chat_id(home.chat_id) if redact_pii else home.chat_id
            lines.append(f"  - {platform.value}: {home.name} (ID: {hc_id})")
-    
+
    # Delivery options for scheduled tasks
    lines.append("")
    lines.append("**Delivery options for scheduled tasks:**")
-    
+
    from hermes_constants import display_hermes_home

    # Origin delivery
@@ -398,15 +409,15 @@ def build_session_context_prompt(
    lines.append(
        f"- `\"local\"` → Save to local files only ({display_hermes_home()}/cron/output/)"
    )
-    
+
    # Platform home channels
    for platform, home in context.home_channels.items():
        lines.append(f"- `\"{platform.value}\"` → Home channel ({home.name})")
-    
+
    # Note about explicit targeting
    lines.append("")
    lines.append("*For explicit targeting, use `\"platform:chat_id\"` format if the user provides a specific chat ID.*")
-    
+
    return "\n".join(lines)


@@ -705,7 +716,7 @@ class SessionStore:
                json.dump(data, f, indent=2)
                f.flush()
                os.fsync(f.fileno())
-            os.replace(tmp_path, sessions_file)
+            atomic_replace(tmp_path, sessions_file)
        except BaseException:
            try:
                os.unlink(tmp_path)
@@ -1257,25 +1268,11 @@ class SessionStore:
        Used by /retry, /undo, and /compress to persist modified conversation history.
        Rewrites both SQLite and legacy JSONL storage.
        """
-        # SQLite: clear old messages and re-insert
+        # SQLite: replace atomically so a mid-rewrite failure doesn't leave
+        # the session half-empty in the DB while JSONL still has history.
        if self._db:
            try:
-                self._db.clear_messages(session_id)
-                for msg in messages:
-                    role = msg.get("role", "unknown")
-                    self._db.append_message(
-                        session_id=session_id,
-                        role=role,
-                        content=msg.get("content"),
-                        tool_name=msg.get("tool_name"),
-                        tool_calls=msg.get("tool_calls"),
-                        tool_call_id=msg.get("tool_call_id"),
-                        reasoning=msg.get("reasoning") if role == "assistant" else None,
-                        reasoning_content=msg.get("reasoning_content") if role == "assistant" else None,
-                        reasoning_details=msg.get("reasoning_details") if role == "assistant" else None,
-                        codex_reasoning_items=msg.get("codex_reasoning_items") if role == "assistant" else None,
-                        codex_message_items=msg.get("codex_message_items") if role == "assistant" else None,
-                    )
+                self._db.replace_messages(session_id, messages)
            except Exception as e:
                logger.debug("Failed to rewrite transcript in DB: %s", e)
        
@@ -91,11 +91,20 @@ class GatewayStreamConsumer:
        chat_id: str,
        config: Optional[StreamConsumerConfig] = None,
        metadata: Optional[dict] = None,
+        on_new_message: Optional[callable] = None,
    ):
        self.adapter = adapter
        self.chat_id = chat_id
        self.cfg = config or StreamConsumerConfig()
        self.metadata = metadata
+        # Fired whenever a fresh content bubble is created on the platform
+        # (first-send of a new message, commentary, overflow chunk, or
+        # fallback continuation). The gateway uses this to linearize the
+        # tool-progress bubble: when content resumes after a tool batch,
+        # the next tool.started should open a NEW progress bubble below
+        # the content, not edit the old bubble above it.
+        # Called with no arguments. Exceptions are swallowed.
+        self._on_new_message = on_new_message
        self._queue: queue.Queue = queue.Queue()
        self._accumulated = ""
        self._message_id: Optional[str] = None
@@ -146,6 +155,16 @@ class GatewayStreamConsumer:
        if text:
            self._queue.put((_COMMENTARY, text))

+    def _notify_new_message(self) -> None:
+        """Fire the on_new_message callback, swallowing any errors."""
+        cb = self._on_new_message
+        if cb is None:
+            return
+        try:
+            cb()
+        except Exception:
+            logger.debug("on_new_message callback error", exc_info=True)
+
    def _reset_segment_state(self, *, preserve_no_edit: bool = False) -> None:
        if preserve_no_edit and self._message_id == "__no_edit__":
            return
@@ -529,6 +548,9 @@ class GatewayStreamConsumer:
                self._message_id = str(result.message_id)
                self._already_sent = True
                self._last_sent_text = text
+                # Fresh content bubble — close off any stale tool bubble
+                # above so the next tool starts a new bubble below.
+                self._notify_new_message()
                return str(result.message_id)
            else:
                self._edit_supported = False
@@ -661,6 +683,9 @@ class GatewayStreamConsumer:
            sent_any_chunk = True
            last_successful_chunk = chunk
            last_message_id = result.message_id or last_message_id
+            # Each fallback chunk is a fresh platform message — notify
+            # so any stale tool-progress bubble gets closed off.
+            self._notify_new_message()

        self._message_id = last_message_id
        self._already_sent = True
@@ -744,6 +769,11 @@ class GatewayStreamConsumer:
            # tool..."), not the final response. Setting already_sent would cause
            # the final response to be incorrectly suppressed when there are
            # multiple tool calls. See: https://github.com/NousResearch/hermes-agent/issues/10454
+            if result.success:
+                # Commentary counts as fresh content — close off any
+                # stale tool bubble above it so the next tool starts a
+                # new bubble below.
+                self._notify_new_message()
            return result.success
        except Exception as e:
            logger.error("Commentary send error: %s", e)
@@ -973,6 +1003,11 @@ class GatewayStreamConsumer:
                        # every delta/tool boundary when platforms accept a
                        # message but do not return an editable message id.
                        self._message_id = "__no_edit__"
+                    # Notify the gateway that a fresh content bubble was
+                    # created so any accumulated tool-progress bubble above
+                    # gets closed off — the next tool fires into a new
+                    # bubble below, preserving chronological order.
+                    self._notify_new_message()
                    return True
                else:
                    # Initial send failed — disable streaming for this session
@@ -0,0 +1,373 @@
+"""
+Top-level argparse construction for the hermes CLI.
+
+Lives in its own module so other modules (e.g. ``relaunch.py``) can
+introspect the parser to discover which flags exist without running the
+``main`` fn.
+
+Only the top-level parser and the ``chat`` subparser live here. Every other
+subparser (model, gateway, sessions, …) is built inline in ``main.py``
+because its dispatch is tightly coupled to module-level ``cmd_*`` functions.
+"""
+
+import argparse
+
+
+# `--profile` / `-p` is consumed by ``main._apply_profile_override`` before
+# argparse runs (it sets ``HERMES_HOME`` and strips itself from ``sys.argv``),
+# so it isn't on the parser. Listed here so all "carry over on relaunch"
+# metadata lives in one file.
+PRE_ARGPARSE_INHERITED_FLAGS: list[tuple[str, bool]] = [
+    ("--profile", True),
+    ("-p", True),
+]
+
+
+def _inherited_flag(parser, *args, **kwargs):
+    """Register a flag that ``hermes_cli.relaunch`` should carry over when
+    the CLI re-execs itself (e.g. after ``sessions browse`` picks a session,
+    or after the setup wizard launches chat).
+
+    Equivalent to ``parser.add_argument(...)`` plus tagging the resulting
+    Action with ``inherit_on_relaunch = True`` so the relaunch table builder
+    can find it via introspection.
+    """
+    action = parser.add_argument(*args, **kwargs)
+    action.inherit_on_relaunch = True
+    return action
+
+
+_EPILOGUE = """
+Examples:
+    hermes                        Start interactive chat
+    hermes chat -q "Hello"        Single query mode
+    hermes -c                     Resume the most recent session
+    hermes -c "my project"        Resume a session by name (latest in lineage)
+    hermes --resume <session_id>  Resume a specific session by ID
+    hermes setup                  Run setup wizard
+    hermes logout                 Clear stored authentication
+    hermes auth add <provider>    Add a pooled credential
+    hermes auth list              List pooled credentials
+    hermes auth remove <p> <t>    Remove pooled credential by index, id, or label
+    hermes auth reset <provider>  Clear exhaustion status for a provider
+    hermes model                  Select default model
+    hermes fallback [list]        Show fallback provider chain
+    hermes fallback add           Add a fallback provider (same picker as `hermes model`)
+    hermes fallback remove        Remove a fallback provider from the chain
+    hermes config                 View configuration
+    hermes config edit            Edit config in $EDITOR
+    hermes config set model gpt-4 Set a config value
+    hermes gateway                Run messaging gateway
+    hermes -s hermes-agent-dev,github-auth
+    hermes -w                     Start in isolated git worktree
+    hermes gateway install        Install gateway background service
+    hermes sessions list          List past sessions
+    hermes sessions browse        Interactive session picker
+    hermes sessions rename ID T   Rename/title a session
+    hermes logs                   View agent.log (last 50 lines)
+    hermes logs -f                Follow agent.log in real time
+    hermes logs errors            View errors.log
+    hermes logs --since 1h        Lines from the last hour
+    hermes debug share             Upload debug report for support
+    hermes update                 Update to latest version
+
+For more help on a command:
+    hermes <command> --help
+"""
+
+
+def build_top_level_parser():
+    """Build the top-level parser, the subparsers action, and the ``chat`` subparser.
+
+    Returns ``(parser, subparsers, chat_parser)``. The caller wires
+    ``chat_parser.set_defaults(func=cmd_chat)`` and continues registering
+    other subparsers via ``subparsers.add_parser(...)``.
+    """
+    parser = argparse.ArgumentParser(
+        prog="hermes",
+        description="Hermes Agent - AI assistant with tool-calling capabilities",
+        formatter_class=argparse.RawDescriptionHelpFormatter,
+        epilog=_EPILOGUE,
+    )
+
+    parser.add_argument(
+        "--version", "-V", action="store_true", help="Show version and exit"
+    )
+    parser.add_argument(
+        "-z",
+        "--oneshot",
+        metavar="PROMPT",
+        default=None,
+        help=(
+            "One-shot mode: send a single prompt and print ONLY the final "
+            "response text to stdout. No banner, no spinner, no tool "
+            "previews, no session_id line. Tools, memory, rules, and "
+            "AGENTS.md in the CWD are loaded as normal; approvals are "
+            "auto-bypassed. Intended for scripts / pipes."
+        ),
+    )
+    # --model / --provider are accepted at the top level so they can pair
+    # with -z without needing the `chat` subcommand.  If neither -z nor a
+    # subcommand consumes them, they fall through harmlessly as None.
+    # Mirrors `hermes chat --model ... --provider ...` semantics.
+    _inherited_flag(
+        parser,
+        "-m",
+        "--model",
+        default=None,
+        help=(
+            "Model override for this invocation (e.g. anthropic/claude-sonnet-4.6). "
+            "Applies to -z/--oneshot and --tui. Also settable via HERMES_INFERENCE_MODEL env var."
+        ),
+    )
+    _inherited_flag(
+        parser,
+        "--provider",
+        default=None,
+        help=(
+            "Provider override for this invocation (e.g. openrouter, anthropic). "
+            "Applies to -z/--oneshot and --tui. Also settable via HERMES_INFERENCE_PROVIDER env var."
+        ),
+    )
+    parser.add_argument(
+        "-t",
+        "--toolsets",
+        default=None,
+        help="Comma-separated toolsets to enable for this invocation. Applies to -z/--oneshot and --tui.",
+    )
+    parser.add_argument(
+        "--resume",
+        "-r",
+        metavar="SESSION",
+        default=None,
+        help="Resume a previous session by ID or title",
+    )
+    parser.add_argument(
+        "--continue",
+        "-c",
+        dest="continue_last",
+        nargs="?",
+        const=True,
+        default=None,
+        metavar="SESSION_NAME",
+        help="Resume a session by name, or the most recent if no name given",
+    )
+    parser.add_argument(
+        "--worktree",
+        "-w",
+        action="store_true",
+        default=False,
+        help="Run in an isolated git worktree (for parallel agents)",
+    )
+    _inherited_flag(
+        parser,
+        "--accept-hooks",
+        action="store_true",
+        default=False,
+        help=(
+            "Auto-approve any unseen shell hooks declared in config.yaml "
+            "without a TTY prompt.  Equivalent to HERMES_ACCEPT_HOOKS=1 or "
+            "hooks_auto_accept: true in config.yaml.  Use on CI / headless "
+            "runs that can't prompt."
+        ),
+    )
+    _inherited_flag(
+        parser,
+        "--skills",
+        "-s",
+        action="append",
+        default=None,
+        help="Preload one or more skills for the session (repeat flag or comma-separate)",
+    )
+    _inherited_flag(
+        parser,
+        "--yolo",
+        action="store_true",
+        default=False,
+        help="Bypass all dangerous command approval prompts (use at your own risk)",
+    )
+    _inherited_flag(
+        parser,
+        "--pass-session-id",
+        action="store_true",
+        default=False,
+        help="Include the session ID in the agent's system prompt",
+    )
+    _inherited_flag(
+        parser,
+        "--ignore-user-config",
+        action="store_true",
+        default=False,
+        help="Ignore ~/.hermes/config.yaml and fall back to built-in defaults (credentials in .env are still loaded)",
+    )
+    _inherited_flag(
+        parser,
+        "--ignore-rules",
+        action="store_true",
+        default=False,
+        help="Skip auto-injection of AGENTS.md, SOUL.md, .cursorrules, memory, and preloaded skills",
+    )
+    _inherited_flag(
+        parser,
+        "--tui",
+        action="store_true",
+        default=False,
+        help="Launch the modern TUI instead of the classic REPL",
+    )
+    _inherited_flag(
+        parser,
+        "--dev",
+        dest="tui_dev",
+        action="store_true",
+        default=False,
+        help="With --tui: run TypeScript sources via tsx (skip dist build)",
+    )
+
+    subparsers = parser.add_subparsers(dest="command", help="Command to run")
+
+    # =========================================================================
+    # chat command
+    # =========================================================================
+    chat_parser = subparsers.add_parser(
+        "chat",
+        help="Interactive chat with the agent",
+        description="Start an interactive chat session with Hermes Agent",
+    )
+    chat_parser.add_argument(
+        "-q", "--query", help="Single query (non-interactive mode)"
+    )
+    chat_parser.add_argument(
+        "--image", help="Optional local image path to attach to a single query"
+    )
+    _inherited_flag(
+        chat_parser,
+        "-m", "--model", help="Model to use (e.g., anthropic/claude-sonnet-4)",
+    )
+    chat_parser.add_argument(
+        "-t", "--toolsets", help="Comma-separated toolsets to enable"
+    )
+    _inherited_flag(
+        chat_parser,
+        "-s",
+        "--skills",
+        action="append",
+        default=argparse.SUPPRESS,
+        help="Preload one or more skills for the session (repeat flag or comma-separate)",
+    )
+    _inherited_flag(
+        chat_parser,
+        "--provider",
+        # No `choices=` here: user-defined providers from config.yaml `providers:`
+        # are also valid values, and runtime resolution (resolve_runtime_provider)
+        # handles validation/error reporting consistently with the top-level
+        # `--provider` flag.
+        default=None,
+        help="Inference provider (default: auto). Built-in or a user-defined name from `providers:` in config.yaml.",
+    )
+    chat_parser.add_argument(
+        "-v", "--verbose", action="store_true", help="Verbose output"
+    )
+    chat_parser.add_argument(
+        "-Q",
+        "--quiet",
+        action="store_true",
+        help="Quiet mode for programmatic use: suppress banner, spinner, and tool previews. Only output the final response and session info.",
+    )
+    chat_parser.add_argument(
+        "--resume",
+        "-r",
+        metavar="SESSION_ID",
+        default=argparse.SUPPRESS,
+        help="Resume a previous session by ID (shown on exit)",
+    )
+    chat_parser.add_argument(
+        "--continue",
+        "-c",
+        dest="continue_last",
+        nargs="?",
+        const=True,
+        default=argparse.SUPPRESS,
+        metavar="SESSION_NAME",
+        help="Resume a session by name, or the most recent if no name given",
+    )
+    chat_parser.add_argument(
+        "--worktree",
+        "-w",
+        action="store_true",
+        default=argparse.SUPPRESS,
+        help="Run in an isolated git worktree (for parallel agents on the same repo)",
+    )
+    _inherited_flag(
+        chat_parser,
+        "--accept-hooks",
+        action="store_true",
+        default=argparse.SUPPRESS,
+        help=(
+            "Auto-approve any unseen shell hooks declared in config.yaml "
+            "without a TTY prompt (see also HERMES_ACCEPT_HOOKS env var and "
+            "hooks_auto_accept: in config.yaml)."
+        ),
+    )
+    chat_parser.add_argument(
+        "--checkpoints",
+        action="store_true",
+        default=False,
+        help="Enable filesystem checkpoints before destructive file operations (use /rollback to restore)",
+    )
+    chat_parser.add_argument(
+        "--max-turns",
+        type=int,
+        default=None,
+        metavar="N",
+        help="Maximum tool-calling iterations per conversation turn (default: 90, or agent.max_turns in config)",
+    )
+    _inherited_flag(
+        chat_parser,
+        "--yolo",
+        action="store_true",
+        default=argparse.SUPPRESS,
+        help="Bypass all dangerous command approval prompts (use at your own risk)",
+    )
+    _inherited_flag(
+        chat_parser,
+        "--pass-session-id",
+        action="store_true",
+        default=argparse.SUPPRESS,
+        help="Include the session ID in the agent's system prompt",
+    )
+    _inherited_flag(
+        chat_parser,
+        "--ignore-user-config",
+        action="store_true",
+        default=argparse.SUPPRESS,
+        help="Ignore ~/.hermes/config.yaml and fall back to built-in defaults (credentials in .env are still loaded). Useful for isolated CI runs, reproduction, and third-party integrations.",
+    )
+    _inherited_flag(
+        chat_parser,
+        "--ignore-rules",
+        action="store_true",
+        default=argparse.SUPPRESS,
+        help="Skip auto-injection of AGENTS.md, SOUL.md, .cursorrules, memory, and preloaded skills. Combine with --ignore-user-config for a fully isolated run.",
+    )
+    chat_parser.add_argument(
+        "--source",
+        default=None,
+        help="Session source tag for filtering (default: cli). Use 'tool' for third-party integrations that should not appear in user session lists.",
+    )
+    _inherited_flag(
+        chat_parser,
+        "--tui",
+        action="store_true",
+        default=False,
+        help="Launch the modern TUI instead of the classic REPL",
+    )
+    _inherited_flag(
+        chat_parser,
+        "--dev",
+        dest="tui_dev",
+        action="store_true",
+        default=False,
+        help="With --tui: run TypeScript sources via tsx (skip dist build)",
+    )
+
+    return parser, subparsers, chat_parser
@@ -43,6 +43,7 @@ import yaml

 from hermes_cli.config import get_hermes_home, get_config_path, read_raw_config
 from hermes_constants import OPENROUTER_BASE_URL
+from utils import atomic_replace

 logger = logging.getLogger(__name__)

@@ -71,6 +72,14 @@ DEFAULT_AGENT_KEY_MIN_TTL_SECONDS = 30 * 60  # 30 minutes
 ACCESS_TOKEN_REFRESH_SKEW_SECONDS = 120       # refresh 2 min before expiry
 DEVICE_AUTH_POLL_INTERVAL_CAP_SECONDS = 1     # poll at most every 1s
 DEFAULT_CODEX_BASE_URL = "https://chatgpt.com/backend-api/codex"
+MINIMAX_OAUTH_CLIENT_ID = "78257093-7e40-4613-99e0-527b14b39113"
+MINIMAX_OAUTH_SCOPE = "group_id profile model.completion"
+MINIMAX_OAUTH_GRANT_TYPE = "urn:ietf:params:oauth:grant-type:user_code"
+MINIMAX_OAUTH_GLOBAL_BASE = "https://api.minimax.io"
+MINIMAX_OAUTH_CN_BASE = "https://api.minimaxi.com"
+MINIMAX_OAUTH_GLOBAL_INFERENCE = "https://api.minimax.io/anthropic"
+MINIMAX_OAUTH_CN_INFERENCE = "https://api.minimaxi.com/anthropic"
+MINIMAX_OAUTH_REFRESH_SKEW_SECONDS = 60
 DEFAULT_QWEN_BASE_URL = "https://portal.qwen.ai/v1"
 DEFAULT_GITHUB_MODELS_BASE_URL = "https://api.githubcopilot.com"
 DEFAULT_COPILOT_ACP_BASE_URL = "acp://copilot"
@@ -109,6 +118,12 @@ SERVICE_PROVIDER_NAMES: Dict[str, str] = {
 DEFAULT_GEMINI_CLOUDCODE_BASE_URL = "cloudcode-pa://google"
 GEMINI_OAUTH_ACCESS_TOKEN_REFRESH_SKEW_SECONDS = 60  # refresh 60s before expiry

+# LM Studio's default no-auth mode still requires *some* non-empty bearer for
+# the API-key code paths (auxiliary_client, runtime resolver) to treat the
+# provider as configured. This sentinel is sent only to LM Studio, never to
+# any remote service.
+LMSTUDIO_NOAUTH_PLACEHOLDER = "dummy-lm-api-key"
+

 # =============================================================================
 # Provider Registry
@@ -119,7 +134,7 @@ class ProviderConfig:
    """Describes a known inference provider."""
    id: str
    name: str
-    auth_type: str  # "oauth_device_code", "oauth_external", or "api_key"
+    auth_type: str  # "oauth_device_code", "oauth_external", "oauth_minimax", or "api_key"
    portal_base_url: str = ""
    inference_base_url: str = ""
    client_id: str = ""
@@ -159,6 +174,14 @@ PROVIDER_REGISTRY: Dict[str, ProviderConfig] = {
        auth_type="oauth_external",
        inference_base_url=DEFAULT_GEMINI_CLOUDCODE_BASE_URL,
    ),
+    "lmstudio": ProviderConfig(
+        id="lmstudio",
+        name="LM Studio",
+        auth_type="api_key",
+        inference_base_url="http://127.0.0.1:1234/v1",
+        api_key_env_vars=("LM_API_KEY",),
+        base_url_env_var="LM_BASE_URL",
+    ),
    "copilot": ProviderConfig(
        id="copilot",
        name="GitHub Copilot",
@@ -240,6 +263,17 @@ PROVIDER_REGISTRY: Dict[str, ProviderConfig] = {
        api_key_env_vars=("MINIMAX_API_KEY",),
        base_url_env_var="MINIMAX_BASE_URL",
    ),
+    "minimax-oauth": ProviderConfig(
+        id="minimax-oauth",
+        name="MiniMax (OAuth \u00b7 minimax.io)",
+        auth_type="oauth_minimax",
+        portal_base_url=MINIMAX_OAUTH_GLOBAL_BASE,
+        inference_base_url=MINIMAX_OAUTH_GLOBAL_INFERENCE,
+        client_id=MINIMAX_OAUTH_CLIENT_ID,
+        scope=MINIMAX_OAUTH_SCOPE,
+        extra={"region": "global", "cn_portal_base_url": MINIMAX_OAUTH_CN_BASE,
+               "cn_inference_base_url": MINIMAX_OAUTH_CN_INFERENCE},
+    ),
    "anthropic": ProviderConfig(
        id="anthropic",
        name="Anthropic",
@@ -348,6 +382,14 @@ PROVIDER_REGISTRY: Dict[str, ProviderConfig] = {
        api_key_env_vars=("XIAOMI_API_KEY",),
        base_url_env_var="XIAOMI_BASE_URL",
    ),
+    "tencent-tokenhub": ProviderConfig(
+        id="tencent-tokenhub",
+        name="Tencent TokenHub",
+        auth_type="api_key",
+        inference_base_url="https://tokenhub.tencentmaas.com/v1",
+        api_key_env_vars=("TOKENHUB_API_KEY",),
+        base_url_env_var="TOKENHUB_BASE_URL",
+    ),
    "ollama-cloud": ProviderConfig(
        id="ollama-cloud",
        name="Ollama Cloud",
@@ -820,7 +862,7 @@ def _save_auth_store(auth_store: Dict[str, Any]) -> Path:
            handle.write(payload)
            handle.flush()
            os.fsync(handle.fileno())
-        os.replace(tmp_path, auth_file)
+        atomic_replace(tmp_path, auth_file)
        try:
            dir_fd = os.open(str(auth_file.parent), os.O_RDONLY)
        except OSError:
@@ -1130,6 +1172,7 @@ def resolve_provider(
        "arcee-ai": "arcee", "arceeai": "arcee",
        "gmi-cloud": "gmi", "gmicloud": "gmi",
        "minimax-china": "minimax-cn", "minimax_cn": "minimax-cn",
+        "minimax-portal": "minimax-oauth", "minimax-global": "minimax-oauth", "minimax_oauth": "minimax-oauth",
        "alibaba_coding": "alibaba-coding-plan", "alibaba-coding": "alibaba-coding-plan",
        "alibaba_coding_plan": "alibaba-coding-plan",
        "claude": "anthropic", "claude-code": "anthropic",
@@ -1141,11 +1184,13 @@ def resolve_provider(
        "qwen-portal": "qwen-oauth", "qwen-cli": "qwen-oauth", "qwen-oauth": "qwen-oauth", "google-gemini-cli": "google-gemini-cli", "gemini-cli": "google-gemini-cli", "gemini-oauth": "google-gemini-cli",
        "hf": "huggingface", "hugging-face": "huggingface", "huggingface-hub": "huggingface",
        "mimo": "xiaomi", "xiaomi-mimo": "xiaomi",
+        "tencent": "tencent-tokenhub", "tokenhub": "tencent-tokenhub",
+        "tencent-cloud": "tencent-tokenhub", "tencentmaas": "tencent-tokenhub",
        "aws": "bedrock", "aws-bedrock": "bedrock", "amazon-bedrock": "bedrock", "amazon": "bedrock",
        "go": "opencode-go", "opencode-go-sub": "opencode-go",
        "kilo": "kilocode", "kilo-code": "kilocode", "kilo-gateway": "kilocode",
+        "lmstudio": "lmstudio", "lm-studio": "lmstudio", "lm_studio": "lmstudio",
        # Local server aliases — route through the generic custom provider
-        "lmstudio": "custom", "lm-studio": "custom", "lm_studio": "custom",
        "ollama": "custom", "ollama_cloud": "ollama-cloud",
        "vllm": "custom", "llamacpp": "custom",
        "llama.cpp": "custom", "llama-cpp": "custom",
@@ -1192,8 +1237,11 @@ def resolve_provider(
            continue
        # GitHub tokens are commonly present for repo/tool access but should not
        # hijack inference auto-selection unless the user explicitly chooses
-        # Copilot/GitHub Models as the provider.
-        if pid == "copilot":
+        # Copilot/GitHub Models as the provider. LM Studio is a local server
+        # whose availability isn't implied by LM_API_KEY presence (it may be
+        # offline, and the no-auth setup uses a placeholder value), so it
+        # also requires explicit selection.
+        if pid in ("copilot", "lmstudio"):
            continue
        for env_var in pconfig.api_key_env_vars:
            if has_usable_secret(os.getenv(env_var, "")):
@@ -3471,6 +3519,13 @@ def resolve_api_key_provider_credentials(provider_id: str) -> Dict[str, Any]:
    key_source = ""
    api_key, key_source = _resolve_api_key_provider_secret(provider_id, pconfig)

+    # No-auth LM Studio: substitute a placeholder so runtime / auxiliary_client
+    # see the local server as configured. doctor still reports unconfigured
+    # because get_api_key_provider_status uses the raw secret resolver.
+    if not api_key and provider_id == "lmstudio":
+        api_key = LMSTUDIO_NOAUTH_PLACEHOLDER
+        key_source = key_source or "default"
+
    env_url = ""
    if pconfig.base_url_env_var:
        env_url = os.getenv(pconfig.base_url_env_var, "").strip()
@@ -4081,6 +4136,326 @@ def _codex_device_code_login() -> Dict[str, Any]:
    }


+# ==================== MiniMax Portal OAuth ====================
+
+def _minimax_pkce_pair() -> tuple:
+    """Generate (code_verifier, code_challenge_S256, state) for MiniMax OAuth."""
+    import secrets
+    verifier = secrets.token_urlsafe(64)[:96]
+    challenge = base64.urlsafe_b64encode(
+        hashlib.sha256(verifier.encode()).digest()
+    ).decode().rstrip("=")
+    state = secrets.token_urlsafe(16)
+    return verifier, challenge, state
+
+
+def _minimax_request_user_code(
+    client: httpx.Client, *, portal_base_url: str, client_id: str,
+    code_challenge: str, state: str,
+) -> Dict[str, Any]:
+    response = client.post(
+        f"{portal_base_url}/oauth/code",
+        data={
+            "response_type": "code",
+            "client_id": client_id,
+            "scope": MINIMAX_OAUTH_SCOPE,
+            "code_challenge": code_challenge,
+            "code_challenge_method": "S256",
+            "state": state,
+        },
+        headers={
+            "Content-Type": "application/x-www-form-urlencoded",
+            "Accept": "application/json",
+            "x-request-id": str(uuid.uuid4()),
+        },
+    )
+    if response.status_code != 200:
+        raise AuthError(
+            f"MiniMax OAuth authorization failed: {response.text or response.reason_phrase}",
+            provider="minimax-oauth", code="authorization_failed",
+        )
+    payload = response.json()
+    for field in ("user_code", "verification_uri", "expired_in"):
+        if field not in payload:
+            raise AuthError(
+                f"MiniMax OAuth response missing field: {field}",
+                provider="minimax-oauth", code="authorization_incomplete",
+            )
+    if payload.get("state") != state:
+        raise AuthError(
+            "MiniMax OAuth state mismatch (possible CSRF).",
+            provider="minimax-oauth", code="state_mismatch",
+        )
+    return payload
+
+
+def _minimax_poll_token(
+    client: httpx.Client, *, portal_base_url: str, client_id: str,
+    user_code: str, code_verifier: str, expired_in: int, interval_ms: Optional[int],
+) -> Dict[str, Any]:
+    # OpenClaw treats expired_in as a unix-ms timestamp (Date.now() < expireTimeMs).
+    # Defensive parsing: if it's small enough to be a duration, treat as seconds.
+    import time as _time
+    now_ms = int(_time.time() * 1000)
+    if expired_in > now_ms // 2:
+        # Looks like a unix-ms timestamp.
+        deadline = expired_in / 1000.0
+    else:
+        # Treat as duration in seconds from now.
+        deadline = _time.time() + max(1, expired_in)
+    interval = max(2.0, (interval_ms or 2000) / 1000.0)
+
+    while _time.time() < deadline:
+        response = client.post(
+            f"{portal_base_url}/oauth/token",
+            data={
+                "grant_type": MINIMAX_OAUTH_GRANT_TYPE,
+                "client_id": client_id,
+                "user_code": user_code,
+                "code_verifier": code_verifier,
+            },
+            headers={
+                "Content-Type": "application/x-www-form-urlencoded",
+                "Accept": "application/json",
+            },
+        )
+        try:
+            payload = response.json() if response.text else {}
+        except Exception:
+            payload = {}
+
+        if response.status_code != 200:
+            msg = (payload.get("base_resp", {}) or {}).get("status_msg") or response.text
+            raise AuthError(
+                f"MiniMax OAuth error: {msg or 'unknown'}",
+                provider="minimax-oauth", code="token_exchange_failed",
+            )
+
+        status = payload.get("status")
+        if status == "error":
+            raise AuthError(
+                "MiniMax OAuth reported an error. Please try again later.",
+                provider="minimax-oauth", code="authorization_denied",
+            )
+        if status == "success":
+            if not all(payload.get(k) for k in ("access_token", "refresh_token", "expired_in")):
+                raise AuthError(
+                    "MiniMax OAuth success payload missing required token fields.",
+                    provider="minimax-oauth", code="token_incomplete",
+                )
+            return payload
+        # "pending" or any other status -> keep polling
+        _time.sleep(interval)
+
+    raise AuthError(
+        "MiniMax OAuth timed out before authorization completed.",
+        provider="minimax-oauth", code="timeout",
+    )
+
+
+def _minimax_save_auth_state(auth_state: Dict[str, Any]) -> None:
+    """Persist MiniMax OAuth state to Hermes auth store (~/.hermes/auth.json)."""
+    with _auth_store_lock():
+        auth_store = _load_auth_store()
+        _save_provider_state(auth_store, "minimax-oauth", auth_state)
+        _save_auth_store(auth_store)
+
+
+def _minimax_oauth_login(
+    *, region: str = "global", open_browser: bool = True,
+    timeout_seconds: float = 15.0,
+) -> Dict[str, Any]:
+    """Run MiniMax OAuth flow, persist tokens, return auth state dict."""
+    pconfig = PROVIDER_REGISTRY["minimax-oauth"]
+    if region == "cn":
+        portal_base_url = pconfig.extra["cn_portal_base_url"]
+        inference_base_url = pconfig.extra["cn_inference_base_url"]
+    else:
+        portal_base_url = pconfig.portal_base_url
+        inference_base_url = pconfig.inference_base_url
+
+    verifier, challenge, state = _minimax_pkce_pair()
+
+    if _is_remote_session():
+        open_browser = False
+
+    print(f"Starting Hermes login via MiniMax ({region}) OAuth...")
+    print(f"Portal: {portal_base_url}")
+
+    with httpx.Client(timeout=httpx.Timeout(timeout_seconds),
+                      headers={"Accept": "application/json"}) as client:
+        code_data = _minimax_request_user_code(
+            client, portal_base_url=portal_base_url,
+            client_id=pconfig.client_id,
+            code_challenge=challenge, state=state,
+        )
+        verification_url = str(code_data["verification_uri"])
+        user_code = str(code_data["user_code"])
+
+        print()
+        print("To continue:")
+        print(f"  1. Open: {verification_url}")
+        print(f"  2. If prompted, enter code: {user_code}")
+        if open_browser:
+            if webbrowser.open(verification_url):
+                print("  (Opened browser for verification)")
+            else:
+                print("  Could not open browser automatically -- use the URL above.")
+
+        interval_raw = code_data.get("interval")
+        interval_ms = int(interval_raw) if interval_raw is not None else None
+        print("Waiting for approval...")
+
+        token_data = _minimax_poll_token(
+            client, portal_base_url=portal_base_url,
+            client_id=pconfig.client_id,
+            user_code=user_code, code_verifier=verifier,
+            expired_in=int(code_data["expired_in"]),
+            interval_ms=interval_ms,
+        )
+
+    now = datetime.now(timezone.utc)
+    expires_in_s = int(token_data["expired_in"])
+    expires_at = now.timestamp() + expires_in_s
+
+    auth_state = {
+        "provider": "minimax-oauth",
+        "region": region,
+        "portal_base_url": portal_base_url,
+        "inference_base_url": inference_base_url,
+        "client_id": pconfig.client_id,
+        "scope": MINIMAX_OAUTH_SCOPE,
+        "token_type": token_data.get("token_type", "Bearer"),
+        "access_token": token_data["access_token"],
+        "refresh_token": token_data["refresh_token"],
+        "resource_url": token_data.get("resource_url"),
+        "obtained_at": now.isoformat(),
+        "expires_at": datetime.fromtimestamp(expires_at, tz=timezone.utc).isoformat(),
+        "expires_in": expires_in_s,
+    }
+
+    _minimax_save_auth_state(auth_state)
+    print("\u2713 MiniMax OAuth login successful.")
+    if msg := token_data.get("notification_message"):
+        print(f"Note from MiniMax: {msg}")
+    return auth_state
+
+
+def _refresh_minimax_oauth_state(
+    state: Dict[str, Any], *, timeout_seconds: float = 15.0,
+    force: bool = False,
+) -> Dict[str, Any]:
+    """Refresh MiniMax OAuth access token if close to expiry (or forced)."""
+    if not state.get("refresh_token"):
+        raise AuthError(
+            "MiniMax OAuth state has no refresh_token; please re-login.",
+            provider="minimax-oauth", code="no_refresh_token", relogin_required=True,
+        )
+    try:
+        expires_at = datetime.fromisoformat(state.get("expires_at", "")).timestamp()
+    except Exception:
+        expires_at = 0.0
+    now = time.time()
+    if not force and (expires_at - now) > MINIMAX_OAUTH_REFRESH_SKEW_SECONDS:
+        return state
+
+    portal_base_url = state["portal_base_url"]
+    with httpx.Client(timeout=httpx.Timeout(timeout_seconds)) as client:
+        response = client.post(
+            f"{portal_base_url}/oauth/token",
+            data={
+                "grant_type": "refresh_token",
+                "client_id": state["client_id"],
+                "refresh_token": state["refresh_token"],
+            },
+            headers={
+                "Content-Type": "application/x-www-form-urlencoded",
+                "Accept": "application/json",
+            },
+        )
+    if response.status_code != 200:
+        body = response.text.lower()
+        relogin = any(m in body for m in
+                      ("invalid_grant", "refresh_token_reused", "invalid_refresh_token"))
+        raise AuthError(
+            f"MiniMax OAuth refresh failed: {response.text or response.reason_phrase}",
+            provider="minimax-oauth", code="refresh_failed",
+            relogin_required=relogin,
+        )
+    payload = response.json()
+    if payload.get("status") != "success":
+        raise AuthError(
+            "MiniMax OAuth refresh did not return success.",
+            provider="minimax-oauth", code="refresh_failed",
+            relogin_required=True,
+        )
+    now_dt = datetime.now(timezone.utc)
+    expires_in_s = int(payload["expired_in"])
+    new_state = dict(state)
+    new_state.update({
+        "access_token": payload["access_token"],
+        "refresh_token": payload.get("refresh_token", state["refresh_token"]),
+        "obtained_at": now_dt.isoformat(),
+        "expires_at": datetime.fromtimestamp(now_dt.timestamp() + expires_in_s,
+                                             tz=timezone.utc).isoformat(),
+        "expires_in": expires_in_s,
+    })
+    _minimax_save_auth_state(new_state)
+    return new_state
+
+
+def resolve_minimax_oauth_runtime_credentials(
+    *, min_token_ttl_seconds: int = MINIMAX_OAUTH_REFRESH_SKEW_SECONDS,
+) -> Dict[str, Any]:
+    """Return {provider, api_key, base_url, source} for minimax-oauth."""
+    state = get_provider_auth_state("minimax-oauth")
+    if not state or not state.get("access_token"):
+        raise AuthError(
+            "Not logged into MiniMax OAuth. Run `hermes model` and select "
+            "MiniMax (OAuth).",
+            provider="minimax-oauth", code="not_logged_in", relogin_required=True,
+        )
+    state = _refresh_minimax_oauth_state(state)
+    return {
+        "provider": "minimax-oauth",
+        "api_key": state["access_token"],
+        "base_url": state["inference_base_url"].rstrip("/"),
+        "source": "oauth",
+    }
+
+
+def get_minimax_oauth_auth_status() -> Dict[str, Any]:
+    """Return auth status dict for MiniMax OAuth provider."""
+    state = get_provider_auth_state("minimax-oauth")
+    if not state or not state.get("access_token"):
+        return {"logged_in": False, "provider": "minimax-oauth"}
+    try:
+        expires_at = datetime.fromisoformat(state.get("expires_at", "")).timestamp()
+        token_valid = (expires_at - time.time()) > 0
+    except Exception:
+        token_valid = bool(state.get("access_token"))
+    return {
+        "logged_in": token_valid,
+        "provider": "minimax-oauth",
+        "region": state.get("region", "global"),
+        "expires_at": state.get("expires_at"),
+    }
+
+
+def _login_minimax_oauth(args, pconfig: ProviderConfig) -> None:
+    """CLI entry for MiniMax OAuth login."""
+    region = getattr(args, "region", None) or "global"
+    open_browser = not getattr(args, "no_browser", False)
+    timeout = getattr(args, "timeout", None) or 15.0
+    try:
+        _minimax_oauth_login(
+            region=region, open_browser=open_browser, timeout_seconds=timeout,
+        )
+    except AuthError as exc:
+        print(format_auth_error(exc))
+        raise SystemExit(1)
+
+
 def _nous_device_code_login(
    *,
    portal_base_url: Optional[str] = None,
@@ -33,7 +33,7 @@ from hermes_constants import OPENROUTER_BASE_URL


 # Providers that support OAuth login in addition to API keys.
-_OAUTH_CAPABLE_PROVIDERS = {"anthropic", "nous", "openai-codex", "qwen-oauth", "google-gemini-cli"}
+_OAUTH_CAPABLE_PROVIDERS = {"anthropic", "nous", "openai-codex", "qwen-oauth", "google-gemini-cli", "minimax-oauth"}


 def _get_custom_provider_names() -> list:
@@ -170,7 +170,7 @@ def auth_add_command(args) -> None:
        if provider.startswith(CUSTOM_POOL_PREFIX):
            requested_type = AUTH_TYPE_API_KEY
        else:
-            requested_type = AUTH_TYPE_OAUTH if provider in {"anthropic", "nous", "openai-codex", "qwen-oauth", "google-gemini-cli"} else AUTH_TYPE_API_KEY
+            requested_type = AUTH_TYPE_OAUTH if provider in {"anthropic", "nous", "openai-codex", "qwen-oauth", "google-gemini-cli", "minimax-oauth"} else AUTH_TYPE_API_KEY

    pool = load_pool(provider)

@@ -333,6 +333,27 @@ def auth_add_command(args) -> None:
        print(f'Added {provider} OAuth credential #{len(pool.entries())}: "{entry.label}"')
        return

+    if provider == "minimax-oauth":
+        from hermes_cli.auth import resolve_minimax_oauth_runtime_credentials
+        creds = resolve_minimax_oauth_runtime_credentials()
+        label = (getattr(args, "label", None) or "").strip() or label_from_token(
+            creds["api_key"],
+            _oauth_default_label(provider, len(pool.entries()) + 1),
+        )
+        entry = PooledCredential(
+            provider=provider,
+            id=uuid.uuid4().hex[:6],
+            label=label,
+            auth_type=AUTH_TYPE_OAUTH,
+            priority=0,
+            source=f"{SOURCE_MANUAL}:minimax_oauth",
+            access_token=creds["api_key"],
+            base_url=creds.get("base_url"),
+        )
+        pool.add_entry(entry)
+        print(f'Added {provider} OAuth credential #{len(pool.entries())}: "{entry.label}"')
+        return
+
    raise SystemExit(f"`hermes auth add {provider}` is not implemented for auth type {requested_type} yet.")


@@ -34,7 +34,7 @@ from dataclasses import dataclass, field
 from typing import Optional
 from urllib import request as urllib_request
 from urllib.error import HTTPError, URLError
-from urllib.parse import urlparse, urlunparse
+from urllib.parse import urlparse

 logger = logging.getLogger(__name__)

@@ -696,6 +696,78 @@ def run_quick_backup(args) -> None:
        print("No state files found to snapshot.")


+# ---------------------------------------------------------------------------
+# Shared full-zip backup helper
+# ---------------------------------------------------------------------------
+
+def _write_full_zip_backup(out_path: Path, hermes_root: Path) -> Optional[Path]:
+    """Write a full zip snapshot of ``hermes_root`` to ``out_path``.
+
+    Uses the same exclusion rules and SQLite safe-copy as :func:`run_backup`.
+    Returns the output path on success, None on failure (nothing to back up,
+    or write error — caller should surface the outcome but not raise).
+    """
+    files_to_add: list[tuple[Path, Path]] = []
+    try:
+        for dirpath, dirnames, filenames in os.walk(hermes_root, followlinks=False):
+            dp = Path(dirpath)
+            # Prune excluded directories in-place so os.walk doesn't descend
+            dirnames[:] = [d for d in dirnames if d not in _EXCLUDED_DIRS]
+
+            for fname in filenames:
+                fpath = dp / fname
+                try:
+                    rel = fpath.relative_to(hermes_root)
+                except ValueError:
+                    continue
+
+                if _should_exclude(rel):
+                    continue
+
+                # Skip the output zip itself if it already exists inside root.
+                try:
+                    if fpath.resolve() == out_path.resolve():
+                        continue
+                except (OSError, ValueError):
+                    pass
+
+                files_to_add.append((fpath, rel))
+    except OSError as exc:
+        logger.warning("Full-zip backup: walk failed: %s", exc)
+        return None
+
+    if not files_to_add:
+        return None
+
+    try:
+        with zipfile.ZipFile(out_path, "w", zipfile.ZIP_DEFLATED, compresslevel=6) as zf:
+            for abs_path, rel_path in files_to_add:
+                try:
+                    if abs_path.suffix == ".db":
+                        with tempfile.NamedTemporaryFile(suffix=".db", delete=False) as tmp:
+                            tmp_db = Path(tmp.name)
+                        try:
+                            if _safe_copy_db(abs_path, tmp_db):
+                                zf.write(tmp_db, arcname=str(rel_path))
+                        finally:
+                            tmp_db.unlink(missing_ok=True)
+                    else:
+                        zf.write(abs_path, arcname=str(rel_path))
+                except (PermissionError, OSError, ValueError) as exc:
+                    logger.debug("Skipping %s in zip backup: %s", rel_path, exc)
+                    continue
+    except OSError as exc:
+        logger.warning("Full-zip backup: zip write failed: %s", exc)
+        # Best-effort cleanup of partial file
+        try:
+            out_path.unlink(missing_ok=True)
+        except OSError:
+            pass
+        return None
+
+    return out_path
+
+
 # ---------------------------------------------------------------------------
 # Pre-update auto-backup
 # ---------------------------------------------------------------------------
@@ -768,64 +840,87 @@ def create_pre_update_backup(
    stamp = datetime.now().strftime("%Y-%m-%d-%H%M%S")
    out_path = backup_dir / f"{_PRE_UPDATE_PREFIX}{stamp}.zip"

-    # Collect files (same logic as run_backup, minus the chatty progress prints)
-    files_to_add: list[tuple[Path, Path]] = []
-    try:
-        for dirpath, dirnames, filenames in os.walk(hermes_root, followlinks=False):
-            dp = Path(dirpath)
-            # Prune excluded directories in-place so os.walk doesn't descend
-            dirnames[:] = [d for d in dirnames if d not in _EXCLUDED_DIRS]
-
-            for fname in filenames:
-                fpath = dp / fname
-                try:
-                    rel = fpath.relative_to(hermes_root)
-                except ValueError:
-                    continue
-
-                if _should_exclude(rel):
-                    continue
-
-                # Skip the output zip itself if it already exists
-                try:
-                    if fpath.resolve() == out_path.resolve():
-                        continue
-                except (OSError, ValueError):
-                    pass
-
-                files_to_add.append((fpath, rel))
-    except OSError as exc:
-        logger.warning("Pre-update backup: walk failed: %s", exc)
-        return None
-
-    if not files_to_add:
-        return None
-
-    try:
-        with zipfile.ZipFile(out_path, "w", zipfile.ZIP_DEFLATED, compresslevel=6) as zf:
-            for abs_path, rel_path in files_to_add:
-                try:
-                    if abs_path.suffix == ".db":
-                        with tempfile.NamedTemporaryFile(suffix=".db", delete=False) as tmp:
-                            tmp_db = Path(tmp.name)
-                        try:
-                            if _safe_copy_db(abs_path, tmp_db):
-                                zf.write(tmp_db, arcname=str(rel_path))
-                        finally:
-                            tmp_db.unlink(missing_ok=True)
-                    else:
-                        zf.write(abs_path, arcname=str(rel_path))
-                except (PermissionError, OSError, ValueError) as exc:
-                    logger.debug("Skipping %s in pre-update backup: %s", rel_path, exc)
-                    continue
-    except OSError as exc:
-        logger.warning("Pre-update backup: zip write failed: %s", exc)
-        # Best-effort cleanup of partial file
-        try:
-            out_path.unlink(missing_ok=True)
-        except OSError:
-            pass
+    result = _write_full_zip_backup(out_path, hermes_root)
+    if result is None:
        return None

    _prune_pre_update_backups(backup_dir, keep=keep)
    return out_path
+
+
+# ---------------------------------------------------------------------------
+# Pre-migration auto-backup (used by `hermes claw migrate`)
+# ---------------------------------------------------------------------------
+
+_PRE_MIGRATION_PREFIX = "pre-migration-"
+_PRE_MIGRATION_DEFAULT_KEEP = 5
+
+
+def _prune_pre_migration_backups(backup_dir: Path, keep: int) -> int:
+    """Remove oldest pre-migration backups beyond the keep limit.
+
+    Only touches files matching ``pre-migration-*.zip`` so other backups in
+    the same directory are never touched.
+    """
+    if keep < 0:
+        keep = 0
+    if not backup_dir.exists():
+        return 0
+
+    backups = sorted(
+        (p for p in backup_dir.iterdir()
+         if p.is_file() and p.name.startswith(_PRE_MIGRATION_PREFIX) and p.suffix.lower() == ".zip"),
+        key=lambda p: p.name,
+        reverse=True,
+    )
+
+    deleted = 0
+    for p in backups[keep:]:
+        try:
+            p.unlink()
+            deleted += 1
+        except OSError as exc:
+            logger.warning("Failed to prune pre-migration backup %s: %s", p.name, exc)
+
+    return deleted
+
+
+def create_pre_migration_backup(
+    hermes_home: Optional[Path] = None,
+    keep: int = _PRE_MIGRATION_DEFAULT_KEEP,
+) -> Optional[Path]:
+    """Create a full zip backup of HERMES_HOME under ``backups/`` before a
+    ``hermes claw migrate`` apply.
+
+    Shares implementation with :func:`create_pre_update_backup` via
+    ``_write_full_zip_backup`` — same exclusions, same SQLite safe-copy,
+    restorable with ``hermes import <archive>``.  Writes to
+    ``<HERMES_HOME>/backups/pre-migration-<timestamp>.zip`` and auto-prunes
+    old pre-migration backups.
+
+    Returns the path to the created zip, or ``None`` if nothing was found
+    to back up (fresh install) or the write failed.  Never raises — the
+    caller decides whether to abort or proceed.
+    """
+    hermes_root = hermes_home or get_default_hermes_root()
+    if not hermes_root.is_dir():
+        return None
+
+    # Reuses the shared backups/ directory so `hermes import` and the
+    # update-backup listing pick up pre-migration archives too.
+    backup_dir = _pre_update_backup_dir(hermes_root)
+    try:
+        backup_dir.mkdir(parents=True, exist_ok=True)
+    except OSError as exc:
+        logger.warning("Could not create pre-migration backup dir %s: %s", backup_dir, exc)
+        return None
+
+    stamp = datetime.now().strftime("%Y-%m-%d-%H%M%S")
+    out_path = backup_dir / f"{_PRE_MIGRATION_PREFIX}{stamp}.zip"
+
+    result = _write_full_zip_backup(out_path, hermes_root)
+    if result is None:
+        return None
+
+    _prune_pre_migration_backups(backup_dir, keep=keep)
+    return out_path
@@ -5,6 +5,7 @@ Pure display functions with no HermesCLI state dependency.

 import json
 import logging
+import os
 import shutil
 import subprocess
 import threading
@@ -122,35 +123,36 @@ def get_available_skills() -> Dict[str, List[str]]:
 # Cache update check results for 6 hours to avoid repeated git fetches
 _UPDATE_CHECK_CACHE_SECONDS = 6 * 3600

+# Sentinel returned when we know an update exists but can't count commits
+# (e.g. nix-built hermes — no local git history to count against).
+UPDATE_AVAILABLE_NO_COUNT = -1

-def check_for_updates() -> Optional[int]:
-    """Check how many commits behind origin/main the local repo is.
+_UPSTREAM_REPO_URL = "https://github.com/NousResearch/hermes-agent.git"

-    Does a ``git fetch`` at most once every 6 hours (cached to
-    ``~/.hermes/.update_check``).  Returns the number of commits behind,
-    or ``None`` if the check fails or isn't applicable.
+
+def _check_via_rev(local_rev: str) -> Optional[int]:
+    """Compare an embedded git revision to upstream main via ls-remote.
+
+    Returns 0 if up-to-date, ``UPDATE_AVAILABLE_NO_COUNT`` if behind,
+    or ``None`` on failure.
    """
-    hermes_home = get_hermes_home()
-    repo_dir = hermes_home / "hermes-agent"
-    cache_file = hermes_home / ".update_check"
-
-    # Must be a git repo — fall back to project root for dev installs
-    if not (repo_dir / ".git").exists():
-        repo_dir = Path(__file__).parent.parent.resolve()
-    if not (repo_dir / ".git").exists():
-        return None
-
-    # Read cache
-    now = time.time()
    try:
-        if cache_file.exists():
-            cached = json.loads(cache_file.read_text())
-            if now - cached.get("ts", 0) < _UPDATE_CHECK_CACHE_SECONDS:
-                return cached.get("behind")
+        result = subprocess.run(
+            ["git", "ls-remote", _UPSTREAM_REPO_URL, "refs/heads/main"],
+            capture_output=True, text=True, timeout=10,
+        )
    except Exception:
-        pass
+        return None
+    if result.returncode != 0 or not result.stdout:
+        return None
+    upstream_rev = result.stdout.split()[0]
+    if not upstream_rev:
+        return None
+    return 0 if upstream_rev == local_rev else UPDATE_AVAILABLE_NO_COUNT

-    # Fetch latest refs (fast — only downloads ref metadata, no files)
+
+def _check_via_local_git(repo_dir: Path) -> Optional[int]:
+    """Count commits behind origin/main in a local checkout."""
    try:
        subprocess.run(
            ["git", "fetch", "origin", "--quiet"],
@@ -160,7 +162,6 @@ def check_for_updates() -> Optional[int]:
    except Exception:
        pass  # Offline or timeout — use stale refs, that's fine

-    # Count commits behind
    try:
        result = subprocess.run(
            ["git", "rev-list", "--count", "HEAD..origin/main"],
@@ -168,15 +169,52 @@ def check_for_updates() -> Optional[int]:
            cwd=str(repo_dir),
        )
        if result.returncode == 0:
-            behind = int(result.stdout.strip())
-        else:
-            behind = None
+            return int(result.stdout.strip())
    except Exception:
-        behind = None
+        pass
+    return None

-    # Write cache
+
+def check_for_updates() -> Optional[int]:
+    """Check whether a Hermes update is available.
+
+    Two paths: if ``HERMES_REVISION`` is set (nix builds embed it), compare
+    it to upstream main via ``git ls-remote``. Otherwise look for a local
+    git checkout and count commits behind ``origin/main``.
+
+    Returns the number of commits behind, ``UPDATE_AVAILABLE_NO_COUNT`` (-1)
+    if behind but the count is unknown, ``0`` if up-to-date, or ``None`` if
+    the check failed or doesn't apply. Cached for 6 hours.
+    """
+    hermes_home = get_hermes_home()
+    cache_file = hermes_home / ".update_check"
+    embedded_rev = os.environ.get("HERMES_REVISION") or None
+
+    # Read cache — invalidate if the embedded rev has changed since last check
+    now = time.time()
    try:
-        cache_file.write_text(json.dumps({"ts": now, "behind": behind}))
+        if cache_file.exists():
+            cached = json.loads(cache_file.read_text())
+            if (
+                now - cached.get("ts", 0) < _UPDATE_CHECK_CACHE_SECONDS
+                and cached.get("rev") == embedded_rev
+            ):
+                return cached.get("behind")
+    except Exception:
+        pass
+
+    if embedded_rev:
+        behind = _check_via_rev(embedded_rev)
+    else:
+        repo_dir = hermes_home / "hermes-agent"
+        if not (repo_dir / ".git").exists():
+            repo_dir = Path(__file__).parent.parent.resolve()
+        if not (repo_dir / ".git").exists():
+            return None
+        behind = _check_via_local_git(repo_dir)
+
+    try:
+        cache_file.write_text(json.dumps({"ts": now, "behind": behind, "rev": embedded_rev}))
    except Exception:
        pass

@@ -549,20 +587,29 @@ def build_welcome_banner(console: Console, model: str, cwd: str,
    # Update check — use prefetched result if available
    try:
        behind = get_update_result(timeout=0.5)
-        if behind and behind > 0:
-            from hermes_cli.config import recommended_update_command
-            commits_word = "commit" if behind == 1 else "commits"
-            right_lines.append(
-                f"[bold yellow]⚠ {behind} {commits_word} behind[/]"
-                f"[dim yellow] — run [bold]{recommended_update_command()}[/bold] to update[/]"
-            )
+        if behind is not None and behind != 0:
+            from hermes_cli.config import get_managed_update_command, recommended_update_command
+            if behind > 0:
+                commits_word = "commit" if behind == 1 else "commits"
+                right_lines.append(
+                    f"[bold yellow]⚠ {behind} {commits_word} behind[/]"
+                    f"[dim yellow] — run [bold]{recommended_update_command()}[/bold] to update[/]"
+                )
+            else:
+                # UPDATE_AVAILABLE_NO_COUNT: nix-built hermes; we know an update
+                # exists but not by how much, and we don't know how the user
+                # installed it (nix run, profile, system flake, home-manager).
+                managed_cmd = get_managed_update_command()
+                line = "[bold yellow]⚠ update available[/]"
+                if managed_cmd:
+                    line += f"[dim yellow] — run [bold]{managed_cmd}[/bold][/]"
+                right_lines.append(line)
    except Exception:
        pass  # Never break the banner over an update check

    right_content = "\n".join(right_lines)
    layout_table.add_row(left_content, right_content)

-    agent_name = _skin_branding("agent_name", "Hermes Agent")
    title_color = _skin_color("banner_title", "#FFD700")
    border_color = _skin_color("banner_border", "#CD7F32")
    version_label = format_banner_version_label()
@@ -0,0 +1,138 @@
+"""Shared helpers for attaching Hermes to a local Chrome CDP port."""
+
+from __future__ import annotations
+
+import os
+import platform
+import shlex
+import shutil
+import subprocess
+
+from hermes_constants import get_hermes_home
+
+
+DEFAULT_BROWSER_CDP_PORT = 9222
+DEFAULT_BROWSER_CDP_URL = f"http://127.0.0.1:{DEFAULT_BROWSER_CDP_PORT}"
+
+_DARWIN_APPS = (
+    "/Applications/Google Chrome.app/Contents/MacOS/Google Chrome",
+    "/Applications/Chromium.app/Contents/MacOS/Chromium",
+    "/Applications/Brave Browser.app/Contents/MacOS/Brave Browser",
+    "/Applications/Microsoft Edge.app/Contents/MacOS/Microsoft Edge",
+)
+
+_WINDOWS_INSTALL_PARTS = (
+    ("Google", "Chrome", "Application", "chrome.exe"),
+    ("Chromium", "Application", "chrome.exe"),
+    ("Chromium", "Application", "chromium.exe"),
+    ("BraveSoftware", "Brave-Browser", "Application", "brave.exe"),
+    ("Microsoft", "Edge", "Application", "msedge.exe"),
+)
+
+_LINUX_BIN_NAMES = (
+    "google-chrome", "google-chrome-stable", "chromium-browser",
+    "chromium", "brave-browser", "microsoft-edge",
+)
+
+_WINDOWS_BIN_NAMES = (
+    "chrome.exe", "msedge.exe", "brave.exe", "chromium.exe",
+    "chrome", "msedge", "brave", "chromium",
+)
+
+
+def get_chrome_debug_candidates(system: str) -> list[str]:
+    candidates: list[str] = []
+    seen: set[str] = set()
+
+    def add(path: str | None) -> None:
+        if not path:
+            return
+        normalized = os.path.normcase(os.path.normpath(path))
+        if normalized in seen or not os.path.isfile(path):
+            return
+        candidates.append(path)
+        seen.add(normalized)
+
+    def add_install_paths(bases: tuple[str | None, ...]) -> None:
+        for base in filter(None, bases):
+            for parts in _WINDOWS_INSTALL_PARTS:
+                add(os.path.join(base, *parts))
+
+    if system == "Darwin":
+        for app in _DARWIN_APPS:
+            add(app)
+        return candidates
+
+    if system == "Windows":
+        for name in _WINDOWS_BIN_NAMES:
+            add(shutil.which(name))
+        add_install_paths((
+            os.environ.get("ProgramFiles"),
+            os.environ.get("ProgramFiles(x86)"),
+            os.environ.get("LOCALAPPDATA"),
+        ))
+        return candidates
+
+    for name in _LINUX_BIN_NAMES:
+        add(shutil.which(name))
+    add_install_paths(("/mnt/c/Program Files", "/mnt/c/Program Files (x86)"))
+    return candidates
+
+
+def chrome_debug_data_dir() -> str:
+    return str(get_hermes_home() / "chrome-debug")
+
+
+def _chrome_debug_args(port: int) -> list[str]:
+    return [
+        f"--remote-debugging-port={port}",
+        f"--user-data-dir={chrome_debug_data_dir()}",
+        "--no-first-run",
+        "--no-default-browser-check",
+    ]
+
+
+def manual_chrome_debug_command(port: int = DEFAULT_BROWSER_CDP_PORT, system: str | None = None) -> str | None:
+    system = system or platform.system()
+    candidates = get_chrome_debug_candidates(system)
+
+    if candidates:
+        argv = [candidates[0], *_chrome_debug_args(port)]
+        return subprocess.list2cmdline(argv) if system == "Windows" else shlex.join(argv)
+
+    if system == "Darwin":
+        data_dir = chrome_debug_data_dir()
+        return (
+            f'open -a "Google Chrome" --args --remote-debugging-port={port} '
+            f'--user-data-dir="{data_dir}" --no-first-run --no-default-browser-check'
+        )
+
+    return None
+
+
+def _detach_kwargs(system: str) -> dict:
+    if system != "Windows":
+        return {"start_new_session": True}
+    flags = getattr(subprocess, "DETACHED_PROCESS", 0) | getattr(
+        subprocess, "CREATE_NEW_PROCESS_GROUP", 0
+    )
+    return {"creationflags": flags} if flags else {}
+
+
+def try_launch_chrome_debug(port: int = DEFAULT_BROWSER_CDP_PORT, system: str | None = None) -> bool:
+    system = system or platform.system()
+    candidates = get_chrome_debug_candidates(system)
+    if not candidates:
+        return False
+
+    os.makedirs(chrome_debug_data_dir(), exist_ok=True)
+    try:
+        subprocess.Popen(
+            [candidates[0], *_chrome_debug_args(port)],
+            stdout=subprocess.DEVNULL,
+            stderr=subprocess.DEVNULL,
+            **_detach_kwargs(system),
+        )
+        return True
+    except Exception:
+        return False
@@ -4,7 +4,8 @@ Usage:
    hermes claw migrate              # Preview then migrate (always shows preview first)
    hermes claw migrate --dry-run    # Preview only, no changes
    hermes claw migrate --yes        # Skip confirmation prompt
-    hermes claw migrate --preset full --overwrite  # Full migration, overwrite conflicts
+    hermes claw migrate --preset full --overwrite --migrate-secrets  # Full run w/ secrets
+    hermes claw migrate --no-backup  # Skip pre-migration snapshot
    hermes claw cleanup              # Archive leftover OpenClaw directories
    hermes claw cleanup --dry-run    # Preview what would be archived
 """
@@ -15,6 +16,7 @@ import subprocess
 import sys
 from datetime import datetime
 from pathlib import Path
+from typing import Optional

 from hermes_cli.config import get_hermes_home, get_config_path, load_config, save_config
 from hermes_constants import get_optional_skills_dir
@@ -321,10 +323,13 @@ def _cmd_migrate(args):
    migrate_secrets = getattr(args, "migrate_secrets", False)
    workspace_target = getattr(args, "workspace_target", None)
    skill_conflict = getattr(args, "skill_conflict", "skip")
+    no_backup = getattr(args, "no_backup", False)

-    # If using the "full" preset, secrets are included by default
-    if preset == "full":
-        migrate_secrets = True
+    # Secrets are never included implicitly — they must be explicitly requested
+    # via --migrate-secrets, even under --preset full.  This mirrors OpenClaw's
+    # migrate-hermes posture (two-phase: run once without secrets, rerun with
+    # --include-secrets) and prevents a --preset full invocation from silently
+    # importing API keys that the user may not have intended to copy.

    print()
    print(
@@ -431,15 +436,24 @@ def _cmd_migrate(args):

    preview_summary = preview_report.get("summary", {})
    preview_count = preview_summary.get("migrated", 0)
+    preview_conflicts = preview_summary.get("conflict", 0)

-    if preview_count == 0:
+    # "Nothing to migrate" means nothing migrated AND nothing blocked by
+    # conflicts.  If there are conflicts, we still want to show the plan and
+    # surface the refusal/--overwrite guidance instead of silently bailing.
+    if preview_count == 0 and preview_conflicts == 0:
        print()
        print_info("Nothing to migrate from OpenClaw.")
        _print_migration_report(preview_report, dry_run=True)
        return

    print()
-    print_header(f"Migration Preview — {preview_count} item(s) would be imported")
+    if preview_count > 0:
+        print_header(f"Migration Preview — {preview_count} item(s) would be imported")
+    else:
+        print_header(
+            f"Migration Preview — {preview_conflicts} conflict(s), nothing would be imported"
+        )
    print_info("No changes have been made yet. Review the list below:")
    _print_migration_report(preview_report, dry_run=True)

@@ -447,6 +461,24 @@ def _cmd_migrate(args):
    if dry_run:
        return

+    # ── Phase 1b: Refuse if the plan has conflicts and --overwrite is not set ─
+    # Modelled on OpenClaw's assertConflictFreePlan() — apply is a safe no-op
+    # on conflicts unless the user explicitly opts in to overwriting.  Without
+    # this guard, the user would answer "yes, proceed" and silently end up
+    # with a migration that skipped every conflicting item.
+    if preview_conflicts > 0 and not overwrite:
+        print()
+        print_error(
+            f"Plan has {preview_conflicts} conflict(s). Refusing to apply."
+        )
+        print_info(
+            "Each conflict is an item whose target already exists in ~/.hermes/. "
+            "Re-run with --overwrite to replace conflicting targets (item-level "
+            "backups are written to the migration report directory)."
+        )
+        print_info("Or re-run with --dry-run to review the full plan.")
+        return
+
    # ── Phase 2: Confirm and execute ───────────────────────────
    print()
    if not auto_yes:
@@ -458,6 +490,32 @@ def _cmd_migrate(args):
            print_info("Migration cancelled.")
            return

+    # ── Phase 2b: Pre-apply backup of the Hermes home ─────────
+    # Delegates to hermes_cli.backup.create_pre_migration_backup(), which
+    # shares implementation with the pre-update backup (same exclusion
+    # rules, same SQLite safe-copy, zip format) so the archive is
+    # restorable with `hermes import`.  Mirrors OpenClaw's
+    # createPreMigrationBackup posture — one atomic restore point before
+    # any mutation, auto-pruned to the last 5 pre-migration zips.
+    backup_archive: Optional[Path] = None
+    if not no_backup:
+        try:
+            from hermes_cli.backup import create_pre_migration_backup, _format_size
+            backup_archive = create_pre_migration_backup(hermes_home=hermes_home)
+            if backup_archive:
+                size_str = _format_size(backup_archive.stat().st_size)
+                print()
+                print_success(f"Pre-migration backup: {backup_archive} ({size_str})")
+                print_info(f"Restore with: hermes import {backup_archive.name}")
+        except Exception as e:
+            print()
+            print_error(f"Could not create pre-migration backup: {e}")
+            print_info(
+                "Re-run with --no-backup to skip, or free up disk space under the Hermes home."
+            )
+            logger.debug("Pre-migration backup error", exc_info=True)
+            return
+
    try:
        migrator = mod.Migrator(
            source_root=source_dir.resolve(),
@@ -476,6 +534,9 @@ def _cmd_migrate(args):
        print()
        print_error(f"Migration failed: {e}")
        logger.debug("OpenClaw migration error", exc_info=True)
+        if backup_archive:
+            print_info(f"A pre-migration backup is available at: {backup_archive}")
+            print_info(f"Restore with: hermes import {backup_archive.name}")
        return

    # Print results
@@ -115,6 +115,9 @@ COMMAND_REGISTRY: list[CommandDef] = [
    CommandDef("verbose", "Cycle tool progress display: off -> new -> all -> verbose",
               "Configuration", cli_only=True,
               gateway_config_gate="display.tool_progress_command"),
+    CommandDef("footer", "Toggle gateway runtime-metadata footer on final replies",
+               "Configuration", args_hint="[on|off|status]",
+               subcommands=("on", "off", "status")),
    CommandDef("yolo", "Toggle YOLO mode (skip all dangerous command approvals)",
               "Configuration"),
    CommandDef("reasoning", "Manage reasoning effort and display", "Configuration",
@@ -125,6 +128,9 @@ COMMAND_REGISTRY: list[CommandDef] = [
               subcommands=("normal", "fast", "status", "on", "off")),
    CommandDef("skin", "Show or change the display skin/theme", "Configuration",
               cli_only=True, args_hint="[name]"),
+    CommandDef("indicator", "Pick the TUI busy-indicator style", "Configuration",
+               cli_only=True, args_hint="[kaomoji|emoji|unicode|ascii]",
+               subcommands=("kaomoji", "emoji", "unicode", "ascii")),
    CommandDef("voice", "Toggle voice mode", "Configuration",
               args_hint="[on|off|tts|status]", subcommands=("on", "off", "tts", "status")),
    CommandDef("busy", "Control what Enter does while Hermes is working", "Configuration",
@@ -142,10 +148,15 @@ COMMAND_REGISTRY: list[CommandDef] = [
    CommandDef("cron", "Manage scheduled tasks", "Tools & Skills",
               cli_only=True, args_hint="[subcommand]",
               subcommands=("list", "add", "create", "edit", "pause", "resume", "run", "remove")),
+    CommandDef("curator", "Background skill maintenance (status, run, pin, archive)",
+               "Tools & Skills", args_hint="[subcommand]",
+               subcommands=("status", "run", "pause", "resume", "pin", "unpin", "restore")),
    CommandDef("reload", "Reload .env variables into the running session", "Tools & Skills",
               cli_only=True),
    CommandDef("reload-mcp", "Reload MCP servers from config", "Tools & Skills",
               aliases=("reload_mcp",)),
+    CommandDef("reload-skills", "Re-scan ~/.hermes/skills/ for newly installed or removed skills",
+               "Tools & Skills", aliases=("reload_skills",)),
    CommandDef("browser", "Connect browser tools to your live Chrome via CDP", "Tools & Skills",
               cli_only=True, args_hint="[connect|disconnect|status]",
               subcommands=("connect", "disconnect", "status")),
@@ -943,6 +954,42 @@ def slack_subcommand_map() -> dict[str, str]:
 # Autocomplete
 # ---------------------------------------------------------------------------

+
+# Per-process cache for /model<space> LM Studio autocomplete. Probing on
+# every keystroke would block the UI; a short TTL keeps it live without
+# hammering the server.
+_LMSTUDIO_COMPLETION_CACHE: tuple[float, list[str]] | None = None
+
+
+def _lmstudio_completion_models() -> list[str]:
+    """Locally-loaded LM Studio models for /model autocomplete (cached, gated)."""
+    global _LMSTUDIO_COMPLETION_CACHE
+    # Gate: don't probe 127.0.0.1 on every keystroke for users who don't use LM Studio.
+    if not (os.environ.get("LM_API_KEY") or os.environ.get("LM_BASE_URL")):
+        try:
+            from hermes_cli.auth import _load_auth_store
+            store = _load_auth_store() or {}
+            if "lmstudio" not in (store.get("providers") or {}) \
+               and "lmstudio" not in (store.get("credential_pool") or {}):
+                return []
+        except Exception:
+            return []
+    now = time.time()
+    if _LMSTUDIO_COMPLETION_CACHE and (now - _LMSTUDIO_COMPLETION_CACHE[0]) < 30.0:
+        return _LMSTUDIO_COMPLETION_CACHE[1]
+    try:
+        from hermes_cli.models import fetch_lmstudio_models
+        models = fetch_lmstudio_models(
+            api_key=os.environ.get("LM_API_KEY", ""),
+            base_url=os.environ.get("LM_BASE_URL") or "http://127.0.0.1:1234/v1",
+            timeout=0.8,
+        )
+    except Exception:
+        models = []
+    _LMSTUDIO_COMPLETION_CACHE = (now, models)
+    return models
+
+
 class SlashCommandCompleter(Completer):
    """Autocomplete for built-in slash commands, subcommands, and skill commands."""

@@ -1366,6 +1413,19 @@ class SlashCommandCompleter(Completer):
                    )
        except Exception:
            pass
+        # LM Studio: surface locally-loaded models. Gated on the user actually
+        # having LM Studio configured (env var or auth-store entry) so we
+        # don't probe 127.0.0.1 on every keystroke for users who don't use it.
+        for name in _lmstudio_completion_models():
+            if name in seen:
+                continue
+            if name.startswith(sub_lower) and name != sub_lower:
+                yield Completion(
+                    name,
+                    start_position=-len(sub_text),
+                    display=name,
+                    display_meta="LM Studio",
+                )

    def get_completions(self, document, complete_event):
        text = document.text_before_cursor
@@ -30,34 +30,69 @@ logger = logging.getLogger(__name__)
 _IS_WINDOWS = platform.system() == "Windows"
 _ENV_VAR_NAME_RE = re.compile(r"^[A-Za-z_][A-Za-z0-9_]*$")
 _LAST_EXPANDED_CONFIG_BY_PATH: Dict[str, Any] = {}
+# (path, mtime_ns, size) -> cached expanded config dict.
+# load_config() returns a deepcopy of the cached value when the file
+# hasn't changed since the last load, skipping yaml.safe_load +
+# _deep_merge + _normalize_* + _expand_env_vars (~13 ms/call).
+# save_config() + migrate_config() write via atomic_yaml_write which
+# produces a fresh inode, so stat() sees a new mtime_ns and the next
+# load repopulates automatically — no explicit invalidation hook.
+_LOAD_CONFIG_CACHE: Dict[str, Tuple[int, int, Dict[str, Any]]] = {}
+# (path, mtime_ns, size) -> cached raw yaml dict. Same pattern as
+# _LOAD_CONFIG_CACHE but for read_raw_config() — used when callers want
+# the user's on-disk values without defaults merged in.
+_RAW_CONFIG_CACHE: Dict[str, Tuple[int, int, Dict[str, Any]]] = {}
 # Env var names written to .env that aren't in OPTIONAL_ENV_VARS
 # (managed by setup/provider flows directly).
 _EXTRA_ENV_KEYS = frozenset({
    "OPENAI_API_KEY", "OPENAI_BASE_URL",
    "ANTHROPIC_API_KEY", "ANTHROPIC_TOKEN",
-    "DISCORD_HOME_CHANNEL", "TELEGRAM_HOME_CHANNEL",
+    "DISCORD_HOME_CHANNEL", "DISCORD_HOME_CHANNEL_NAME",
+    "TELEGRAM_HOME_CHANNEL", "TELEGRAM_HOME_CHANNEL_NAME",
+    "SLACK_HOME_CHANNEL", "SLACK_HOME_CHANNEL_NAME",
    "SIGNAL_ACCOUNT", "SIGNAL_HTTP_URL",
    "SIGNAL_ALLOWED_USERS", "SIGNAL_GROUP_ALLOWED_USERS",
+    "SIGNAL_HOME_CHANNEL", "SIGNAL_HOME_CHANNEL_NAME",
+    "SMS_HOME_CHANNEL", "SMS_HOME_CHANNEL_NAME",
    "DINGTALK_CLIENT_ID", "DINGTALK_CLIENT_SECRET",
+    "DINGTALK_HOME_CHANNEL", "DINGTALK_HOME_CHANNEL_NAME",
    "FEISHU_APP_ID", "FEISHU_APP_SECRET", "FEISHU_ENCRYPT_KEY", "FEISHU_VERIFICATION_TOKEN",
+    "FEISHU_HOME_CHANNEL", "FEISHU_HOME_CHANNEL_NAME",
+    "YUANBAO_HOME_CHANNEL", "YUANBAO_HOME_CHANNEL_NAME",
    "WECOM_BOT_ID", "WECOM_SECRET",
    "WECOM_CALLBACK_CORP_ID", "WECOM_CALLBACK_CORP_SECRET", "WECOM_CALLBACK_AGENT_ID",
    "WECOM_CALLBACK_TOKEN", "WECOM_CALLBACK_ENCODING_AES_KEY",
    "WECOM_CALLBACK_HOST", "WECOM_CALLBACK_PORT",
+    "WECOM_HOME_CHANNEL", "WECOM_HOME_CHANNEL_NAME",
    "WEIXIN_ACCOUNT_ID", "WEIXIN_TOKEN", "WEIXIN_BASE_URL", "WEIXIN_CDN_BASE_URL",
    "WEIXIN_HOME_CHANNEL", "WEIXIN_HOME_CHANNEL_NAME", "WEIXIN_DM_POLICY", "WEIXIN_GROUP_POLICY",
    "WEIXIN_ALLOWED_USERS", "WEIXIN_GROUP_ALLOWED_USERS", "WEIXIN_ALLOW_ALL_USERS",
    "BLUEBUBBLES_SERVER_URL", "BLUEBUBBLES_PASSWORD",
+    "BLUEBUBBLES_HOME_CHANNEL", "BLUEBUBBLES_HOME_CHANNEL_NAME",
    "QQ_APP_ID", "QQ_CLIENT_SECRET", "QQBOT_HOME_CHANNEL", "QQBOT_HOME_CHANNEL_NAME",
    "QQ_HOME_CHANNEL", "QQ_HOME_CHANNEL_NAME",  # legacy aliases (pre-rename, still read for back-compat)
    "QQ_ALLOWED_USERS", "QQ_GROUP_ALLOWED_USERS", "QQ_ALLOW_ALL_USERS", "QQ_MARKDOWN_SUPPORT",
    "QQ_STT_API_KEY", "QQ_STT_BASE_URL", "QQ_STT_MODEL",
+    "IRC_SERVER", "IRC_PORT", "IRC_NICKNAME", "IRC_CHANNEL",
+    "IRC_USE_TLS", "IRC_SERVER_PASSWORD", "IRC_NICKSERV_PASSWORD",
    "TERMINAL_ENV", "TERMINAL_SSH_KEY", "TERMINAL_SSH_PORT",
    "WHATSAPP_MODE", "WHATSAPP_ENABLED",
-    "MATTERMOST_HOME_CHANNEL", "MATTERMOST_REPLY_MODE",
+    "MATTERMOST_HOME_CHANNEL", "MATTERMOST_HOME_CHANNEL_NAME", "MATTERMOST_REPLY_MODE",
    "MATRIX_PASSWORD", "MATRIX_ENCRYPTION", "MATRIX_DEVICE_ID", "MATRIX_HOME_ROOM",
-    "MATRIX_REQUIRE_MENTION", "MATRIX_FREE_RESPONSE_ROOMS", "MATRIX_AUTO_THREAD",
+    "MATRIX_REQUIRE_MENTION", "MATRIX_FREE_RESPONSE_ROOMS", "MATRIX_AUTO_THREAD", "MATRIX_DM_AUTO_THREAD",
    "MATRIX_RECOVERY_KEY",
+    # Langfuse observability plugin — optional tuning keys + standard SDK vars.
+    # Activation is via plugins.enabled (opt-in through `hermes plugins enable
+    # observability/langfuse` or `hermes tools → Langfuse`); credentials gate
+    # the plugin at runtime.
+    "HERMES_LANGFUSE_ENV",
+    "HERMES_LANGFUSE_RELEASE",
+    "HERMES_LANGFUSE_SAMPLE_RATE",
+    "HERMES_LANGFUSE_MAX_CHARS",
+    "HERMES_LANGFUSE_DEBUG",
+    "LANGFUSE_PUBLIC_KEY",
+    "LANGFUSE_SECRET_KEY",
+    "LANGFUSE_BASE_URL",
 })
 import yaml

@@ -206,6 +241,7 @@ def get_container_exec_info() -> Optional[dict]:

 # Re-export from hermes_constants — canonical definition lives there.
 from hermes_constants import get_hermes_home  # noqa: F811,E402
+from utils import atomic_replace

 def get_config_path() -> Path:
    """Get the main config file path."""
@@ -314,7 +350,7 @@ def ensure_hermes_home():
    else:
        home.mkdir(parents=True, exist_ok=True)
        _secure_dir(home)
-        for subdir in ("cron", "sessions", "logs", "memories"):
+        for subdir in ("cron", "sessions", "logs", "logs/curator", "memories"):
            d = home / subdir
            d.mkdir(parents=True, exist_ok=True)
            _secure_dir(d)
@@ -335,6 +371,10 @@ def _ensure_hermes_home_managed(home: Path):
                f"{d} does not exist. "
                "Run 'sudo nixos-rebuild switch' first."
            )
+    # Curator reports dir is a sub-path of logs/; create it if missing.
+    # In managed mode the activation script may not know about this subdir,
+    # so we mkdir it ourselves (it's inside an already-secured logs/ dir).
+    (home / "logs" / "curator").mkdir(parents=True, exist_ok=True)
    # Inside umask(0o007) scope — SOUL.md will be created as 0660
    _ensure_default_soul_md(home)

@@ -389,6 +429,20 @@ DEFAULT_CONFIG = {
        # (60+ tool iterations with tiny output) before users assume the
        # bot is dead and /restart.
        "gateway_notify_interval": 180,
+        # Freshness window for the gateway auto-continue note (seconds).
+        # After a gateway crash/restart/SIGTERM mid-run, the next user
+        # message gets a "[System note: your previous turn was
+        # interrupted — process the unfinished tool result(s) first]"
+        # prepended so the model picks up where it left off.  That's the
+        # right behaviour while the interruption is fresh, but stale
+        # markers (transcript last touched hours or days ago) can revive
+        # an unrelated old task when the user's next message starts new
+        # work.  This window is the max age of the last persisted
+        # transcript row for which we still inject the continue note.
+        # Default 3600s comfortably covers a long turn (gateway_timeout
+        # default is 1800s) plus runtime slack.  Set to 0 to disable the
+        # gate and restore pre-fix behaviour (always inject).
+        "gateway_auto_continue_freshness": 3600,
        # How user-attached images are presented to the main model on each turn.
        #   "auto"   — attach natively when the active model reports
        #              supports_vision=True AND the user hasn't explicitly
@@ -451,7 +505,8 @@ DEFAULT_CONFIG = {
        "singularity_image": "docker://nikolaik/python-nodejs:python3.11-nodejs20",
        "modal_image": "nikolaik/python-nodejs:python3.11-nodejs20",
        "daytona_image": "nikolaik/python-nodejs:python3.11-nodejs20",
-        # Container resource limits (docker, singularity, modal, daytona — ignored for local/ssh)
+        "vercel_runtime": "node24",
+        # Container resource limits (docker, singularity, modal, daytona, vercel_sandbox — ignored for local/ssh)
        "container_cpu": 1,
        "container_memory": 5120,       # MB (default 5GB)
        "container_disk": 51200,        # MB (default 50GB)
@@ -467,6 +522,16 @@ DEFAULT_CONFIG = {
        # Explicit opt-in: mount the host cwd into /workspace for Docker sessions.
        # Default off because passing host directories into a sandbox weakens isolation.
        "docker_mount_cwd_to_workspace": False,
+        # Explicit opt-in: run the Docker container as the host user's uid:gid
+        # (via `--user`).  When enabled, files written into bind-mounted dirs
+        # (docker_volumes, the persistent workspace, or the auto-mounted cwd)
+        # are owned by your host user instead of root, which avoids needing
+        # `sudo chown` after container runs. Default off to preserve behavior
+        # for images whose entrypoints expect to start as root (e.g. the
+        # bundled Hermes image, which drops to the `hermes` user via gosu).
+        # When on, SETUID/SETGID caps are omitted from the container since
+        # no privilege drop is needed.
+        "docker_run_as_host_user": False,
        # Persistent shell — keep a long-lived bash shell across execute() calls
        # so cwd/env vars/shell variables survive between commands.
        # Enabled by default for non-local backends (SSH); local is always opt-in
@@ -546,7 +611,7 @@ DEFAULT_CONFIG = {
        "threshold": 0.50,            # compress when context usage exceeds this ratio
        "target_ratio": 0.20,         # fraction of threshold to preserve as recent tail
        "protect_last_n": 20,         # minimum recent messages to keep uncompressed
-
+        "hygiene_hard_message_limit": 400,  # gateway session-hygiene force-compress threshold by message count
    },

    # Anthropic prompt caching (Claude via OpenRouter or native Anthropic API).
@@ -648,6 +713,19 @@ DEFAULT_CONFIG = {
            "timeout": 30,
            "extra_body": {},
        },
+        # Curator — skill-usage review fork. Timeout is generous because the
+        # review pass can take several minutes on reasoning models (umbrella
+        # building over hundreds of candidate skills). "auto" = use main chat
+        # model; override via `hermes model` → auxiliary → Curator to route
+        # to a cheaper aux model (e.g. openrouter google/gemini-3-flash-preview).
+        "curator": {
+            "provider": "auto",
+            "model": "",
+            "base_url": "",
+            "api_key": "",
+            "timeout": 600,
+            "extra_body": {},
+        },
    },
    
    "display": {
@@ -655,6 +733,11 @@ DEFAULT_CONFIG = {
        "personality": "kawaii",
        "resume_display": "full",
        "busy_input_mode": "interrupt",  # interrupt | queue | steer
+        # When true, `hermes --tui` auto-resumes the most recent human-
+        # facing session on launch instead of forging a fresh one.
+        # Mirrors `hermes -c` muscle memory.  Default off so existing
+        # users aren't surprised.  HERMES_TUI_RESUME=<id> always wins.
+        "tui_auto_resume_recent": False,
        "bell_on_complete": False,
        "show_reasoning": False,
        "streaming": False,
@@ -662,6 +745,9 @@ DEFAULT_CONFIG = {
        "inline_diffs": True,     # Show inline diff previews for write actions (write_file, patch, skill_manage)
        "show_cost": False,       # Show $ cost in the status bar (off by default)
        "skin": "default",
+        # TUI busy indicator style: kaomoji (default), emoji, unicode (braille
+        # spinner), or ascii.  Live-swappable via `/indicator <style>`.
+        "tui_status_indicator": "kaomoji",
        "user_message_preview": {  # CLI: how many submitted user-message lines to echo back in scrollback
            "first_lines": 2,
            "last_lines": 2,
@@ -671,6 +757,14 @@ DEFAULT_CONFIG = {
        "tool_progress_overrides": {},  # DEPRECATED — use display.platforms instead
        "tool_preview_length": 0,  # Max chars for tool call previews (0 = no limit, show full paths/commands)
        "platforms": {},  # Per-platform display overrides: {"telegram": {"tool_progress": "all"}, "slack": {"tool_progress": "off"}}
+        # Gateway runtime-metadata footer appended to the FINAL message of a turn
+        # (disabled by default to keep replies minimal). When enabled, renders
+        # e.g. `model · 68% · ~/projects/hermes`. Per-platform overrides go under
+        # display.platforms.<platform>.runtime_footer.
+        "runtime_footer": {
+            "enabled": False,
+            "fields": ["model", "context_pct", "cwd"],  # Order shown; drop any to hide
+        },
    },

    # Web dashboard settings
@@ -689,7 +783,7 @@ DEFAULT_CONFIG = {
    # limit (OpenAI 4096, xAI 15000, MiniMax 10000, ElevenLabs 5k-40k model-aware,
    # Gemini 5000, Edge 5000, Mistral 4000, NeuTTS/KittenTTS 2000).
    "tts": {
-        "provider": "edge",  # "edge" (free) | "elevenlabs" (premium) | "openai" | "xai" | "minimax" | "mistral" | "neutts" (local)
+        "provider": "edge",  # "edge" (free) | "elevenlabs" (premium) | "openai" | "xai" | "minimax" | "mistral" | "gemini" | "neutts" (local) | "kittentts" (local) | "piper" (local)
        "edge": {
            "voice": "en-US-AriaNeural",
            # Popular: AriaNeural, JennyNeural, AndrewNeural, BrianNeural, SoniaNeural
@@ -719,6 +813,19 @@ DEFAULT_CONFIG = {
            "model": "neuphonic/neutts-air-q4-gguf",  # HuggingFace model repo
            "device": "cpu",  # cpu, cuda, or mps
        },
+        "piper": {
+            # Voice name (e.g. "en_US-lessac-medium") downloaded on first
+            # use, OR an absolute path to a pre-downloaded .onnx file.
+            # Full voice list: https://github.com/OHF-Voice/piper1-gpl/blob/main/docs/VOICES.md
+            "voice": "en_US-lessac-medium",
+            # "voices_dir": "",        # Override voice cache dir; default = ~/.hermes/cache/piper-voices/
+            # "use_cuda": False,       # Requires onnxruntime-gpu
+            # "length_scale": 1.0,     # 2.0 = twice as slow
+            # "noise_scale": 0.667,
+            # "noise_w_scale": 0.8,
+            # "volume": 1.0,
+            # "normalize_audio": True,
+        },
    },
    
    "stt": {
@@ -851,6 +958,29 @@ DEFAULT_CONFIG = {
        "guard_agent_created": False,
    },

+    # Curator — background skill maintenance.
+    #
+    # Periodically reviews AGENT-CREATED skills (never bundled or
+    # hub-installed) and keeps the collection tidy: marks long-unused skills
+    # as stale, archives genuinely obsolete ones (archive only, never
+    # deletes), and spawns a forked aux-model agent to consolidate overlaps
+    # and patch drift. Runs inactivity-triggered from session start — no
+    # cron daemon.
+    #
+    # See `hermes curator status` for the last run summary.
+    "curator": {
+        "enabled": True,
+        # How long to wait between curator runs (hours).  Default: 7 days.
+        "interval_hours": 24 * 7,
+        # Only run when the agent has been idle at least this long (hours).
+        "min_idle_hours": 2,
+        # Mark a skill as "stale" after this many days without use.
+        "stale_after_days": 30,
+        # Archive a skill (move to skills/.archive/) after this many days
+        # without use. Archived skills are recoverable — no auto-deletion.
+        "archive_after_days": 90,
+    },
+
    # Honcho AI-native memory -- reads ~/.honcho/config.json as single source of truth.
    # This section is only needed for hermes-specific overrides; everything else
    # (apiKey, workspace, peerName, sessions, enabled) comes from the global config.
@@ -888,6 +1018,7 @@ DEFAULT_CONFIG = {

    # Telegram platform settings (gateway mode)
    "telegram": {
+        "reactions": False,            # Add 👀/✅/❌ reactions to messages during processing
        "channel_prompts": {},         # Per-chat/topic ephemeral system prompts (topics inherit from parent group)
    },

@@ -913,6 +1044,14 @@ DEFAULT_CONFIG = {
        "mode": "manual",
        "timeout": 60,
        "cron_mode": "deny",
+        # When true, /reload-mcp asks the user to confirm before rebuilding
+        # the MCP tool set for the active session.  Reloading invalidates
+        # the provider prompt cache (tool schemas are baked into the system
+        # prompt), so the next message re-sends full input tokens — this can
+        # be expensive on long-context or high-reasoning models.  Users click
+        # "Always Approve" to silence the prompt permanently; that flips
+        # this key to false.
+        "mcp_reload_confirm": True,
    },

    # Permanently allowed dangerous command patterns (added via "always" approval)
@@ -942,7 +1081,7 @@ DEFAULT_CONFIG = {
    # Pre-exec security scanning via tirith
    "security": {
        "allow_private_urls": False,  # Allow requests to private/internal IPs (for OpenWrt, proxies, VPNs)
-        "redact_secrets": True,
+        "redact_secrets": False,
        "tirith_enabled": True,
        "tirith_path": "tirith",
        "tirith_timeout": 5,
@@ -1066,7 +1205,7 @@ DEFAULT_CONFIG = {
    },

    # Config schema version - bump this when adding new required fields
-    "_config_version": 22,
+    "_config_version": 23,
 }

 # =============================================================================
@@ -1166,6 +1305,22 @@ OPTIONAL_ENV_VARS = {
        "category": "provider",
        "advanced": True,
    },
+    "LM_API_KEY": {
+        "description": "LM Studio bearer token for auth-enabled local servers",
+        "prompt": "LM Studio API key / bearer token",
+        "url": None,
+        "password": True,
+        "category": "provider",
+        "advanced": True,
+    },
+    "LM_BASE_URL": {
+        "description": "LM Studio base URL override",
+        "prompt": "LM Studio base URL (leave empty for default)",
+        "url": None,
+        "password": False,
+        "category": "provider",
+        "advanced": True,
+    },
    "GLM_API_KEY": {
        "description": "Z.AI / GLM API key (also recognized as ZAI_API_KEY / Z_AI_API_KEY)",
        "prompt": "Z.AI / GLM API key",
@@ -1692,6 +1847,30 @@ OPTIONAL_ENV_VARS = {
        "category": "tool",
    },

+    # ── Langfuse observability ──
+    "HERMES_LANGFUSE_PUBLIC_KEY": {
+        "description": "Langfuse project public key (pk-lf-...)",
+        "prompt": "Langfuse public key",
+        "url": "https://cloud.langfuse.com",
+        "password": False,
+        "category": "tool",
+    },
+    "HERMES_LANGFUSE_SECRET_KEY": {
+        "description": "Langfuse project secret key (sk-lf-...)",
+        "prompt": "Langfuse secret key",
+        "url": "https://cloud.langfuse.com",
+        "password": True,
+        "category": "tool",
+    },
+    "HERMES_LANGFUSE_BASE_URL": {
+        "description": "Langfuse server URL (default: https://cloud.langfuse.com)",
+        "prompt": "Langfuse server URL (leave empty for cloud.langfuse.com)",
+        "url": None,
+        "password": False,
+        "category": "tool",
+        "advanced": True,
+    },
+
    # ── Messaging platforms ──
    "TELEGRAM_BOT_TOKEN": {
        "description": "Telegram bot token from @BotFather",
@@ -1839,6 +2018,14 @@ OPTIONAL_ENV_VARS = {
        "category": "messaging",
        "advanced": True,
    },
+    "MATRIX_DM_AUTO_THREAD": {
+        "description": "Auto-create threads for DM messages in Matrix (default: false)",
+        "prompt": "Auto-create threads in DMs (true/false)",
+        "url": None,
+        "password": False,
+        "category": "messaging",
+        "advanced": True,
+    },
    "MATRIX_DEVICE_ID": {
        "description": "Stable Matrix device ID for E2EE persistence across restarts (e.g. HERMES_BOT)",
        "prompt": "Matrix device ID (stable across restarts)",
@@ -1923,6 +2110,43 @@ OPTIONAL_ENV_VARS = {
        "prompt": "QQ Sandbox Mode",
        "category": "messaging",
    },
+    "IRC_SERVER": {
+        "description": "IRC server hostname (e.g. irc.libera.chat)",
+        "prompt": "IRC server",
+        "url": None,
+        "password": False,
+        "category": "messaging",
+    },
+    "IRC_CHANNEL": {
+        "description": "IRC channel to join (e.g. #hermes)",
+        "prompt": "IRC channel",
+        "url": None,
+        "password": False,
+        "category": "messaging",
+    },
+    "IRC_NICKNAME": {
+        "description": "Bot nickname on IRC (default: hermes-bot)",
+        "prompt": "IRC nickname",
+        "url": None,
+        "password": False,
+        "category": "messaging",
+    },
+    "IRC_SERVER_PASSWORD": {
+        "description": "IRC server password (if required)",
+        "prompt": "IRC server password",
+        "url": None,
+        "password": True,
+        "category": "messaging",
+        "advanced": True,
+    },
+    "IRC_NICKSERV_PASSWORD": {
+        "description": "NickServ password for nick identification",
+        "prompt": "NickServ password",
+        "url": None,
+        "password": True,
+        "category": "messaging",
+        "advanced": True,
+    },
    "GATEWAY_ALLOW_ALL_USERS": {
        "description": "Allow all users to interact with messaging bots (true/false). Default: false.",
        "prompt": "Allow all users (true/false)",
@@ -2085,19 +2309,55 @@ def get_missing_env_vars(required_only: bool = False) -> List[Dict[str, Any]]:
    return missing


-def _set_nested(config: dict, dotted_key: str, value):
+def _set_nested(config, dotted_key: str, value):
    """Set a value at an arbitrarily nested dotted key path.

-    Creates intermediate dicts as needed, e.g. ``_set_nested(c, "a.b.c", 1)``
-    ensures ``c["a"]["b"]["c"] == 1``.
+    Supports both dict and list navigation:
+      _set_nested(c, "a.b.c", 1)     → c["a"]["b"]["c"] = 1
+      _set_nested(c, "a.0.b", 1)     → c["a"][0]["b"] = 1
+      _set_nested(c, "providers.1", "x") → c["providers"][1] = "x"
+
+    Intermediate dicts are created on demand.  List indices are parsed
+    from numeric path segments; the referenced index must already exist
+    (we do not grow lists — the user is navigating into structure they
+    wrote themselves).  If a segment targets a non-container leaf
+    (scalar), the leaf is replaced with a fresh dict so the write can
+    proceed — this preserves the pre-existing behavior for bare scalar
+    overrides (e.g. setting ``a.b.c`` where ``a.b`` was previously a
+    string).
+
+    Guards against #17876: before this fix the code unconditionally
+    replaced any non-dict value (including lists) with ``{}``, silently
+    destroying list-typed config like ``custom_providers`` whenever a
+    caller used an indexed path.
    """
    parts = dotted_key.split(".")
    current = config
    for part in parts[:-1]:
-        if part not in current or not isinstance(current.get(part), dict):
-            current[part] = {}
-        current = current[part]
-    current[parts[-1]] = value
+        if isinstance(current, list):
+            try:
+                idx = int(part)
+            except (TypeError, ValueError):
+                raise TypeError(
+                    f"Cannot navigate into list at key {dotted_key!r}: "
+                    f"segment {part!r} is not a numeric index"
+                )
+            current = current[idx]
+        elif isinstance(current, dict):
+            existing = current.get(part)
+            # Preserve dicts and lists; replace missing/scalar with a fresh dict.
+            if part not in current or not isinstance(existing, (dict, list)):
+                current[part] = {}
+            current = current[part]
+        else:
+            raise TypeError(
+                f"Cannot navigate into {type(current).__name__} at key {dotted_key!r}"
+            )
+    last = parts[-1]
+    if isinstance(current, list):
+        current[int(last)] = value
+    else:
+        current[last] = value


 def get_missing_config_fields() -> List[Dict[str, Any]]:
@@ -2180,14 +2440,21 @@ def _normalize_custom_provider_entry(
        "baseUrl": "base_url",
        "apiMode": "api_mode",
        "keyEnv": "key_env",
+        "apiKeyEnv": "key_env",  # alias — OpenClaw-compatible + docs variant
        "defaultModel": "default_model",
        "contextLength": "context_length",
        "rateLimitDelay": "rate_limit_delay",
    }
+    # api_key_env is a documented snake_case alias for key_env (see
+    # website/docs/guides/azure-foundry.md).  Normalize it up front so the
+    # rest of the normalizer treats it as the canonical field.
+    if "api_key_env" in entry and "key_env" not in entry:
+        entry["key_env"] = entry["api_key_env"]
    _KNOWN_KEYS = {
-        "name", "api", "url", "base_url", "api_key", "key_env",
+        "name", "api", "url", "base_url", "api_key", "key_env", "api_key_env",
        "api_mode", "transport", "model", "default_model", "models",
        "context_length", "rate_limit_delay",
+        "request_timeout_seconds", "stale_timeout_seconds",
    }
    for camel, snake in _CAMEL_ALIASES.items():
        if camel in entry and snake not in entry:
@@ -2439,6 +2706,9 @@ _KNOWN_ROOT_KEYS = {
 _VALID_CUSTOM_PROVIDER_FIELDS = {
    "name", "base_url", "api_key", "api_mode", "model", "models",
    "context_length", "rate_limit_delay",
+    # key_env is read at runtime by runtime_provider.py and auxiliary_client.py
+    # — include it here so the set accurately describes the supported schema.
+    "key_env",
 }

 # Fields that look like they should be inside custom_providers, not at root
@@ -2515,10 +2785,32 @@ def validate_config_structure(config: Optional[Dict[str, Any]] = None) -> List["
                        "Add the API endpoint URL, e.g.: base_url: https://api.example.com/v1",
                    ))

-    # ── fallback_model must be a top-level dict with provider + model ────
+    # ── fallback_model: single dict OR list of dicts (chain) ─────────────
    fb = config.get("fallback_model")
    if fb is not None:
-        if not isinstance(fb, dict):
+        if isinstance(fb, list):
+            # Chain fallback — validate each entry
+            for i, entry in enumerate(fb):
+                if not isinstance(entry, dict):
+                    issues.append(ConfigIssue(
+                        "error",
+                        f"fallback_model[{i}] should be a dict, got {type(entry).__name__}",
+                        "Each entry needs provider + model",
+                    ))
+                else:
+                    if not entry.get("provider"):
+                        issues.append(ConfigIssue(
+                            "warning",
+                            f"fallback_model[{i}] is missing 'provider' field",
+                            "Add: provider: openrouter (or another provider)",
+                        ))
+                    if not entry.get("model"):
+                        issues.append(ConfigIssue(
+                            "warning",
+                            f"fallback_model[{i}] is missing 'model' field",
+                            "Add: model: <model-name>",
+                        ))
+        elif not isinstance(fb, dict):
            issues.append(ConfigIssue(
                "error",
                f"fallback_model should be a dict with 'provider' and 'model', got {type(fb).__name__}",
@@ -2986,6 +3278,90 @@ def migrate_config(interactive: bool = True, quiet: bool = False) -> Dict[str, A
                        "Use `hermes plugins enable <name>` to activate."
                    )

+    # ── Version 22 → 23: seed curator defaults + create logs/curator/ ──
+    # The curator (background skill maintenance) was added in PR #16049, but
+    # existing configs from before that PR (or before the April 2026
+    # unification under `auxiliary.curator`) never wrote the curator section
+    # to disk. The runtime deep-merge in `load_config()` fills defaults at
+    # read time, so the curator *functions*; but users can't see/edit the
+    # settings in their `config.yaml`, and `hermes curator status` has no
+    # stable logs dir to point at until the first run mkdir's it.
+    #
+    # This migration:
+    #   1. Writes the `curator` top-level section to config.yaml (enabled,
+    #      interval_hours, min_idle_hours, stale_after_days, archive_after_days)
+    #      — only keys the user hasn't already overridden.
+    #   2. Writes the `auxiliary.curator` aux-task slot (provider, model,
+    #      base_url, api_key, timeout, extra_body) — canonical slot for
+    #      routing the curator fork to a cheaper aux model.
+    #   3. Creates `~/.hermes/logs/curator/` if missing (belt-and-suspenders
+    #      on top of ensure_hermes_home() — old profiles that predate this
+    #      migration still benefit).
+    if current_ver < 23:
+        try:
+            curator_dir = get_hermes_home() / "logs" / "curator"
+            curator_dir.mkdir(parents=True, exist_ok=True)
+        except Exception as e:
+            results["warnings"].append(f"Could not create {curator_dir}: {e}")
+
+        config = read_raw_config()
+        touched = False
+
+        # (1) Top-level curator section — only add missing keys
+        _curator_defaults = DEFAULT_CONFIG.get("curator", {})
+        raw_curator = config.get("curator")
+        if not isinstance(raw_curator, dict):
+            raw_curator = {}
+        added_curator: List[str] = []
+        for k, v in _curator_defaults.items():
+            if k not in raw_curator:
+                raw_curator[k] = copy.deepcopy(v)
+                added_curator.append(k)
+        if added_curator:
+            config["curator"] = raw_curator
+            touched = True
+
+        # (2) auxiliary.curator task slot
+        _aux_curator_defaults = (
+            DEFAULT_CONFIG.get("auxiliary", {}).get("curator", {})
+        )
+        raw_aux = config.get("auxiliary")
+        if not isinstance(raw_aux, dict):
+            raw_aux = {}
+        raw_aux_curator = raw_aux.get("curator")
+        if not isinstance(raw_aux_curator, dict):
+            raw_aux_curator = {}
+        added_aux: List[str] = []
+        for k, v in _aux_curator_defaults.items():
+            if k not in raw_aux_curator:
+                raw_aux_curator[k] = copy.deepcopy(v)
+                added_aux.append(k)
+        if added_aux:
+            raw_aux["curator"] = raw_aux_curator
+            config["auxiliary"] = raw_aux
+            touched = True
+
+        if touched:
+            save_config(config)
+            if added_curator:
+                results["config_added"].append(
+                    f"curator ({len(added_curator)} default key(s))"
+                )
+                if not quiet:
+                    print(
+                        "  ✓ Seeded curator defaults in config.yaml: "
+                        f"{', '.join(added_curator)}"
+                    )
+            if added_aux:
+                results["config_added"].append(
+                    f"auxiliary.curator ({len(added_aux)} default key(s))"
+                )
+                if not quiet:
+                    print(
+                        "  ✓ Seeded auxiliary.curator defaults in config.yaml: "
+                        f"{', '.join(added_aux)}"
+                    )
+
    if current_ver < latest_ver and not quiet:
        print(f"Config version: {current_ver} → {latest_ver}")
    
@@ -3258,17 +3634,17 @@ def _preserve_env_ref_templates(current, raw, loaded_expanded=None):


 def _normalize_root_model_keys(config: Dict[str, Any]) -> Dict[str, Any]:
-    """Move stale root-level provider/base_url into model section.
+    """Move stale root-level provider/base_url/context_length into model section.

-    Some users (or older code) placed ``provider:`` and ``base_url:`` at the
-    config root instead of inside ``model:``.  These root-level keys are only
-    used as a fallback when the corresponding ``model.*`` key is empty — they
-    never override an existing ``model.provider`` or ``model.base_url``.
+    Some users (or older code) placed ``provider:``, ``base_url:``, or
+    ``context_length:`` at the config root instead of inside ``model:``.
+    These root-level keys are only used as a fallback when the corresponding
+    ``model.*`` key is empty — they never override an existing value.
    After migration the root-level keys are removed so they can't cause
    confusion on subsequent loads.
    """
    # Only act if there are root-level keys to migrate
-    has_root = any(config.get(k) for k in ("provider", "base_url"))
+    has_root = any(config.get(k) for k in ("provider", "base_url", "context_length"))
    if not has_root:
        return config

@@ -3278,7 +3654,7 @@ def _normalize_root_model_keys(config: Dict[str, Any]) -> Dict[str, Any]:
        model = {"default": model} if model else {}
        config["model"] = model

-    for key in ("provider", "base_url"):
+    for key in ("provider", "base_url", "context_length"):
        root_val = config.get(key)
        if root_val and not model.get(key):
            model[key] = root_val
@@ -3303,6 +3679,52 @@ def _normalize_max_turns_config(config: Dict[str, Any]) -> Dict[str, Any]:
    return config


+def cfg_get(cfg: Optional[Dict[str, Any]], *keys: str, default: Any = None) -> Any:
+    """Traverse nested dict keys safely, returning ``default`` on any miss.
+
+    Canonical helper for the ``cfg.get("X", {}).get("Y", default)`` pattern
+    that appears 50+ times across the codebase. Handles three common gotchas
+    in one place:
+
+      1. Missing intermediate keys (returns ``default``, no KeyError).
+      2. An intermediate value that's not a dict (e.g. a user wrote a string
+         where a section was expected). Returns ``default`` instead of
+         AttributeError on ``.get()``.
+      3. ``cfg is None`` (callers sometimes pass ``load_config() or None``).
+
+    Named ``cfg_get`` rather than ``cfg_path`` to avoid shadowing the
+    ubiquitous ``cfg_path = _hermes_home / "config.yaml"`` local variable
+    that appears in gateway/run.py, cron/scheduler.py, main.py, etc.
+
+    Explicit ``None`` values are returned as-is (matches ``dict.get(key,
+    default)`` semantics — ``default`` is only returned when the key is
+    *absent*, not when it's present but set to ``None``).
+
+    Examples:
+        >>> cfg_get({"agent": {"reasoning_effort": "high"}}, "agent", "reasoning_effort")
+        'high'
+        >>> cfg_get({}, "agent", "reasoning_effort", default="medium")
+        'medium'
+        >>> cfg_get({"agent": "oops_a_string"}, "agent", "reasoning_effort", default="low")
+        'low'
+        >>> cfg_get(None, "anything", default=42)
+        42
+        >>> cfg_get({"a": {"b": None}}, "a", "b", default="def")  # explicit None preserved
+        >>> cfg_get({"a": {"b": False}}, "a", "b", default=True)  # falsy values preserved
+        False
+    """
+    if not isinstance(cfg, dict):
+        return default
+    node: Any = cfg
+    for key in keys:
+        if not isinstance(node, dict):
+            return default
+        if key not in node:
+            return default
+        node = node[key]
+    return node
+
+

 def read_raw_config() -> Dict[str, Any]:
    """Read ~/.hermes/config.yaml as-is, without merging defaults or migrating.
@@ -3311,25 +3733,62 @@ def read_raw_config() -> Dict[str, Any]:
    be parsed.  Use this for lightweight config reads where you just need a
    single value and don't want the overhead of ``load_config()``'s deep-merge
    + migration pipeline.
+
+    Cached on the config file's (mtime_ns, size) — same strategy as
+    ``load_config()``. Returns a deepcopy on every call since some callers
+    mutate the result before passing to ``save_config()``.
    """
    try:
        config_path = get_config_path()
-        if config_path.exists():
-            with open(config_path, encoding="utf-8") as f:
-                return yaml.safe_load(f) or {}
+        st = config_path.stat()
+        cache_key = (st.st_mtime_ns, st.st_size)
+    except (FileNotFoundError, OSError):
+        return {}
+
+    path_key = str(config_path)
+    cached = _RAW_CONFIG_CACHE.get(path_key)
+    if cached is not None and cached[:2] == cache_key:
+        return copy.deepcopy(cached[2])
+
+    try:
+        with open(config_path, encoding="utf-8") as f:
+            data = yaml.safe_load(f) or {}
    except Exception:
-        pass
-    return {}
+        return {}
+
+    if not isinstance(data, dict):
+        data = {}
+    _RAW_CONFIG_CACHE[path_key] = (cache_key[0], cache_key[1], copy.deepcopy(data))
+    return data


 def load_config() -> Dict[str, Any]:
-    """Load configuration from ~/.hermes/config.yaml."""
+    """Load configuration from ~/.hermes/config.yaml.
+
+    Cached on the config file's (mtime_ns, size). Returns a deepcopy of
+    the cached value when unchanged, since most call sites mutate the
+    result (e.g. ``cfg["model"]["default"] = ...`` before ``save_config``).
+    The cache is keyed on ``str(config_path)`` so profile switches
+    (which change ``HERMES_HOME`` and therefore ``get_config_path()``)
+    don't collide.
+    """
    ensure_hermes_home()
    config_path = get_config_path()
-    
+    path_key = str(config_path)
+
+    try:
+        st = config_path.stat()
+        cache_key: Optional[Tuple[int, int]] = (st.st_mtime_ns, st.st_size)
+    except FileNotFoundError:
+        cache_key = None
+
+    cached = _LOAD_CONFIG_CACHE.get(path_key)
+    if cached is not None and cache_key is not None and cached[:2] == cache_key:
+        return copy.deepcopy(cached[2])
+
    config = copy.deepcopy(DEFAULT_CONFIG)
-    
-    if config_path.exists():
+
+    if cache_key is not None:
        try:
            with open(config_path, encoding="utf-8") as f:
                user_config = yaml.safe_load(f) or {}
@@ -3347,20 +3806,26 @@ def load_config() -> Dict[str, Any]:

    normalized = _normalize_root_model_keys(_normalize_max_turns_config(config))
    expanded = _expand_env_vars(normalized)
-    _LAST_EXPANDED_CONFIG_BY_PATH[str(config_path)] = copy.deepcopy(expanded)
+    _LAST_EXPANDED_CONFIG_BY_PATH[path_key] = copy.deepcopy(expanded)
+    if cache_key is not None:
+        _LOAD_CONFIG_CACHE[path_key] = (cache_key[0], cache_key[1], copy.deepcopy(expanded))
+    else:
+        _LOAD_CONFIG_CACHE.pop(path_key, None)
    return expanded


 _SECURITY_COMMENT = """
 # ── Security ──────────────────────────────────────────────────────────
-# API keys, tokens, and passwords are redacted from tool output by default.
-# Set to false to see full values (useful for debugging auth issues).
+# Secret redaction is OFF by default — tool output (terminal stdout,
+# read_file results, web content) passes through unmodified. Set
+# redact_secrets to true to mask strings that look like API keys, tokens,
+# and passwords before they enter the model context and logs.
 # tirith pre-exec scanning is enabled by default when the tirith binary
 # is available. Configure via security.tirith_* keys or env vars
 # (TIRITH_ENABLED, TIRITH_BIN, TIRITH_TIMEOUT, TIRITH_FAIL_OPEN).
 #
 # security:
-#   redact_secrets: false
+#   redact_secrets: true
 #   tirith_enabled: true
 #   tirith_path: "tirith"
 #   tirith_timeout: 5
@@ -3393,11 +3858,11 @@ _FALLBACK_COMMENT = """

 _COMMENTED_SECTIONS = """
 # ── Security ──────────────────────────────────────────────────────────
-# API keys, tokens, and passwords are redacted from tool output by default.
-# Set to false to see full values (useful for debugging auth issues).
+# Secret redaction is OFF by default. Set to true to mask strings that
+# look like API keys, tokens, and passwords in tool output and logs.
 #
 # security:
-#   redact_secrets: false
+#   redact_secrets: true

 # ── Fallback Model ────────────────────────────────────────────────────
 # Automatic provider failover when primary is unavailable.
@@ -3448,7 +3913,12 @@ def save_config(config: Dict[str, Any]):
    if not sec or sec.get("redact_secrets") is None:
        parts.append(_SECURITY_COMMENT)
    fb = normalized.get("fallback_model", {})
-    if not fb or not isinstance(fb, dict) or not (fb.get("provider") and fb.get("model")):
+    fb_is_valid = False
+    if isinstance(fb, list):
+        fb_is_valid = any(isinstance(e, dict) and e.get("provider") and e.get("model") for e in fb)
+    elif isinstance(fb, dict):
+        fb_is_valid = bool(fb.get("provider") and fb.get("model"))
+    if not fb_is_valid:
        parts.append(_FALLBACK_COMMENT)

    atomic_yaml_write(
@@ -3517,18 +3987,27 @@ def _sanitize_env_lines(lines: list) -> list:

        # Detect concatenated KEY=VALUE pairs on one line.
        # Search for known KEY= patterns at any position in the line.
-        split_positions = []
+        # We collect full needle ranges so we can drop matches that are
+        # fully contained within a longer overlapping needle. Without this,
+        # suffix collisions corrupt the file: e.g. LM_API_KEY= inside
+        # GLM_API_KEY= would otherwise split the line into "G\nLM_API_KEY=...".
+        match_ranges: list[tuple[int, int]] = []
        for key_name in known_keys:
            needle = key_name + "="
            idx = stripped.find(needle)
            while idx >= 0:
-                split_positions.append(idx)
+                match_ranges.append((idx, idx + len(needle)))
                idx = stripped.find(needle, idx + len(needle))

+        split_positions = sorted({
+            s for s, e in match_ranges
+            if not any(
+                s2 <= s and e2 >= e and (s2, e2) != (s, e)
+                for s2, e2 in match_ranges
+            )
+        })
+
        if len(split_positions) > 1:
-            split_positions.sort()
-            # Deduplicate (shouldn't happen, but be safe)
-            split_positions = sorted(set(split_positions))
            for i, pos in enumerate(split_positions):
                end = split_positions[i + 1] if i + 1 < len(split_positions) else len(stripped)
                part = stripped[pos:end].strip()
@@ -3574,7 +4053,7 @@ def sanitize_env_file() -> int:
            f.writelines(sanitized)
            f.flush()
            os.fsync(f.fileno())
-        os.replace(tmp_path, env_path)
+        atomic_replace(tmp_path, env_path)
    except BaseException:
        try:
            os.unlink(tmp_path)
@@ -3637,7 +4116,7 @@ def save_env_value(key: str, value: str):
    value = _check_non_ascii_credential(key, value)
    ensure_hermes_home()
    env_path = get_env_path()
-    
+
    # On Windows, open() defaults to the system locale (cp1252) which can
    # cause OSError errno 22 on UTF-8 .env files.
    read_kw = {"encoding": "utf-8", "errors": "replace"} if _IS_WINDOWS else {}
@@ -3649,7 +4128,7 @@ def save_env_value(key: str, value: str):
            lines = f.readlines()
        # Sanitize on every read: split concatenated keys, drop stale placeholders
        lines = _sanitize_env_lines(lines)
-    
+
    # Find and update or append
    found = False
    for i, line in enumerate(lines):
@@ -3657,7 +4136,7 @@ def save_env_value(key: str, value: str):
            lines[i] = f"{key}={value}\n"
            found = True
            break
-    
+
    if not found:
        # Ensure there's a newline at the end of the file before appending
        if lines and not lines[-1].endswith("\n"):
@@ -3677,7 +4156,7 @@ def save_env_value(key: str, value: str):
            f.writelines(lines)
            f.flush()
            os.fsync(f.fileno())
-        os.replace(tmp_path, env_path)
+        atomic_replace(tmp_path, env_path)
        # Restore original permissions before _secure_file may tighten them.
        if original_mode is not None:
            try:
@@ -3733,7 +4212,7 @@ def remove_env_value(key: str) -> bool:
                f.writelines(new_lines)
                f.flush()
                os.fsync(f.fileno())
-            os.replace(tmp_path, env_path)
+            atomic_replace(tmp_path, env_path)
            if original_mode is not None:
                try:
                    os.chmod(env_path, original_mode)
@@ -3820,12 +4299,13 @@ def get_env_value(key: str) -> Optional[str]:
 # =============================================================================

 def redact_key(key: str) -> str:
-    """Redact an API key for display."""
-    if not key:
-        return color("(not set)", Colors.DIM)
-    if len(key) < 12:
-        return "***"
-    return key[:4] + "..." + key[-4:]
+    """Redact an API key for display.
+
+    Thin wrapper over :func:`agent.redact.mask_secret` — preserves the
+    "(not set)" placeholder in dim color for the empty case.
+    """
+    from agent.redact import mask_secret
+    return mask_secret(key, empty=color("(not set)", Colors.DIM))


 def show_config():
@@ -3905,6 +4385,9 @@ def show_config():
        print(f"  Daytona image: {terminal.get('daytona_image', 'nikolaik/python-nodejs:python3.11-nodejs20')}")
        daytona_key = get_env_value('DAYTONA_API_KEY')
        print(f"  API key:      {'configured' if daytona_key else '(not set)'}")
+    elif terminal.get('backend') == 'vercel_sandbox':
+        print(f"  Vercel runtime: {terminal.get('vercel_runtime', 'node24')}")
+        print(f"  Vercel auth:    {'configured' if get_env_value('VERCEL_OIDC_TOKEN') or (get_env_value('VERCEL_TOKEN') and get_env_value('VERCEL_PROJECT_ID') and get_env_value('VERCEL_TEAM_ID')) else '(not set)'}")
    elif terminal.get('backend') == 'ssh':
        ssh_host = get_env_value('TERMINAL_SSH_HOST')
        ssh_user = get_env_value('TERMINAL_SSH_USER')
@@ -4062,15 +4545,11 @@ def set_config_value(key: str, value: str):
        except Exception:
            user_config = {}
    
-    # Handle nested keys (e.g., "tts.provider")
-    parts = key.split('.')
-    current = user_config
-    
-    for part in parts[:-1]:
-        if part not in current or not isinstance(current.get(part), dict):
-            current[part] = {}
-        current = current[part]
-    
+    # Handle nested keys (e.g., "tts.provider") including numeric list
+    # indices (e.g., "custom_providers.0.api_key").  Delegates to
+    # _set_nested which preserves list-typed nodes; before #17876 the
+    # inline navigation here silently overwrote lists with dicts.
+
    # Convert value to appropriate type
    if value.lower() in ('true', 'yes', 'on'):
        value = True
@@ -4080,8 +4559,8 @@ def set_config_value(key: str, value: str):
        value = int(value)
    elif value.replace('.', '', 1).isdigit():
        value = float(value)
-    
-    current[parts[-1]] = value
+
+    _set_nested(user_config, key, value)
    
    # Write only user config back (not the full merged defaults)
    ensure_hermes_home()
@@ -4097,7 +4576,9 @@ def set_config_value(key: str, value: str):
        "terminal.singularity_image": "TERMINAL_SINGULARITY_IMAGE",
        "terminal.modal_image": "TERMINAL_MODAL_IMAGE",
        "terminal.daytona_image": "TERMINAL_DAYTONA_IMAGE",
+        "terminal.vercel_runtime": "TERMINAL_VERCEL_RUNTIME",
        "terminal.docker_mount_cwd_to_workspace": "TERMINAL_DOCKER_MOUNT_CWD_TO_WORKSPACE",
+        "terminal.docker_run_as_host_user": "TERMINAL_DOCKER_RUN_AS_HOST_USER",
        "terminal.cwd": "TERMINAL_CWD",
        "terminal.timeout": "TERMINAL_TIMEOUT",
        "terminal.sandbox_dir": "TERMINAL_SANDBOX_DIR",
@@ -0,0 +1,244 @@
+"""CLI subcommand: `hermes curator <subcommand>`.
+
+Thin shell around agent/curator.py and tools/skill_usage.py. Renders a status
+table, triggers a run, pauses/resumes, and pins/unpins skills.
+
+This module intentionally has no side effects at import time — main.py wires
+the argparse subparsers on demand.
+"""
+
+from __future__ import annotations
+
+import argparse
+import sys
+from datetime import datetime, timezone
+from typing import Optional
+
+
+def _fmt_ts(ts: Optional[str]) -> str:
+    if not ts:
+        return "never"
+    try:
+        dt = datetime.fromisoformat(ts)
+    except (TypeError, ValueError):
+        return str(ts)
+    if dt.tzinfo is None:
+        dt = dt.replace(tzinfo=timezone.utc)
+    delta = datetime.now(timezone.utc) - dt
+    secs = int(delta.total_seconds())
+    if secs < 60:
+        return f"{secs}s ago"
+    if secs < 3600:
+        return f"{secs // 60}m ago"
+    if secs < 86400:
+        return f"{secs // 3600}h ago"
+    return f"{secs // 86400}d ago"
+
+
+def _cmd_status(args) -> int:
+    from agent import curator
+    from tools import skill_usage
+
+    state = curator.load_state()
+    enabled = curator.is_enabled()
+    paused = state.get("paused", False)
+    last_run = state.get("last_run_at")
+    summary = state.get("last_run_summary") or "(none)"
+    runs = state.get("run_count", 0)
+
+    status_line = (
+        "ENABLED" if enabled and not paused else
+        "PAUSED" if paused else
+        "DISABLED"
+    )
+    print(f"curator: {status_line}")
+    print(f"  runs:           {runs}")
+    print(f"  last run:       {_fmt_ts(last_run)}")
+    print(f"  last summary:   {summary}")
+    _report = state.get("last_report_path")
+    if _report:
+        print(f"  last report:    {_report}")
+    _ih = curator.get_interval_hours()
+    _interval_label = (
+        f"{_ih // 24}d" if _ih % 24 == 0 and _ih >= 24
+        else f"{_ih}h"
+    )
+    print(f"  interval:       every {_interval_label}")
+    print(f"  stale after:    {curator.get_stale_after_days()}d unused")
+    print(f"  archive after:  {curator.get_archive_after_days()}d unused")
+
+    rows = skill_usage.agent_created_report()
+    if not rows:
+        print("\nno agent-created skills")
+        return 0
+
+    by_state = {"active": [], "stale": [], "archived": []}
+    pinned = []
+    for r in rows:
+        state_name = r.get("state", "active")
+        by_state.setdefault(state_name, []).append(r)
+        if r.get("pinned"):
+            pinned.append(r["name"])
+
+    print(f"\nagent-created skills: {len(rows)} total")
+    for state_name in ("active", "stale", "archived"):
+        bucket = by_state.get(state_name, [])
+        print(f"  {state_name:10s} {len(bucket)}")
+
+    if pinned:
+        print(f"\npinned ({len(pinned)}): {', '.join(pinned)}")
+
+    # Show top 5 least-recently-active skills. Views and edits are activity too:
+    # curator should not report a skill as "never used" right after skill_view()
+    # or skill_manage() touched it.
+    active = sorted(
+        by_state.get("active", []),
+        key=lambda r: r.get("last_activity_at") or r.get("created_at") or "",
+    )[:5]
+    if active:
+        print("\nleast recently active (top 5):")
+        for r in active:
+            last = _fmt_ts(r.get("last_activity_at"))
+            print(
+                f"  {r['name']:40s}  "
+                f"activity={r.get('activity_count', 0):3d}  "
+                f"use={r.get('use_count', 0):3d}  "
+                f"view={r.get('view_count', 0):3d}  "
+                f"patches={r.get('patch_count', 0):3d}  "
+                f"last_activity={last}"
+            )
+
+    return 0
+
+
+def _cmd_run(args) -> int:
+    from agent import curator
+    if not curator.is_enabled():
+        print("curator: disabled via config; enable with `curator.enabled: true`")
+        return 1
+
+    print("curator: running review pass...")
+
+    def _on_summary(msg: str) -> None:
+        print(msg)
+
+    result = curator.run_curator_review(
+        on_summary=_on_summary,
+        synchronous=bool(args.synchronous),
+    )
+    auto = result.get("auto_transitions", {})
+    if auto:
+        print(
+            f"auto: checked={auto.get('checked', 0)} "
+            f"stale={auto.get('marked_stale', 0)} "
+            f"archived={auto.get('archived', 0)} "
+            f"reactivated={auto.get('reactivated', 0)}"
+        )
+    if not args.synchronous:
+        print("llm pass running in background — check `hermes curator status` later")
+    return 0
+
+
+def _cmd_pause(args) -> int:
+    from agent import curator
+    curator.set_paused(True)
+    print("curator: paused")
+    return 0
+
+
+def _cmd_resume(args) -> int:
+    from agent import curator
+    curator.set_paused(False)
+    print("curator: resumed")
+    return 0
+
+
+def _cmd_pin(args) -> int:
+    from tools import skill_usage
+    if not skill_usage.is_agent_created(args.skill):
+        print(
+            f"curator: '{args.skill}' is bundled or hub-installed — cannot pin "
+            "(only agent-created skills participate in curation)"
+        )
+        return 1
+    skill_usage.set_pinned(args.skill, True)
+    print(f"curator: pinned '{args.skill}' (will bypass auto-transitions)")
+    return 0
+
+
+def _cmd_unpin(args) -> int:
+    from tools import skill_usage
+    if not skill_usage.is_agent_created(args.skill):
+        print(
+            f"curator: '{args.skill}' is bundled or hub-installed — "
+            "there's nothing to unpin (curator only tracks agent-created skills)"
+        )
+        return 1
+    skill_usage.set_pinned(args.skill, False)
+    print(f"curator: unpinned '{args.skill}'")
+    return 0
+
+
+def _cmd_restore(args) -> int:
+    from tools import skill_usage
+    ok, msg = skill_usage.restore_skill(args.skill)
+    print(f"curator: {msg}")
+    return 0 if ok else 1
+
+
+# ---------------------------------------------------------------------------
+# argparse wiring (called from hermes_cli.main)
+# ---------------------------------------------------------------------------
+
+def register_cli(parent: argparse.ArgumentParser) -> None:
+    """Attach `curator` subcommands to *parent*.
+
+    main.py calls this with the ArgumentParser returned by
+    ``subparsers.add_parser("curator", ...)``.
+    """
+    parent.set_defaults(func=lambda a: (parent.print_help(), 0)[1])
+    subs = parent.add_subparsers(dest="curator_command")
+
+    p_status = subs.add_parser("status", help="Show curator status and skill stats")
+    p_status.set_defaults(func=_cmd_status)
+
+    p_run = subs.add_parser("run", help="Trigger a curator review now")
+    p_run.add_argument(
+        "--sync", "--synchronous", dest="synchronous", action="store_true",
+        help="Wait for the LLM review pass to finish (default: background thread)",
+    )
+    p_run.set_defaults(func=_cmd_run)
+
+    p_pause = subs.add_parser("pause", help="Pause the curator until resumed")
+    p_pause.set_defaults(func=_cmd_pause)
+
+    p_resume = subs.add_parser("resume", help="Resume a paused curator")
+    p_resume.set_defaults(func=_cmd_resume)
+
+    p_pin = subs.add_parser("pin", help="Pin a skill so the curator never auto-transitions it")
+    p_pin.add_argument("skill", help="Skill name")
+    p_pin.set_defaults(func=_cmd_pin)
+
+    p_unpin = subs.add_parser("unpin", help="Unpin a skill")
+    p_unpin.add_argument("skill", help="Skill name")
+    p_unpin.set_defaults(func=_cmd_unpin)
+
+    p_restore = subs.add_parser("restore", help="Restore an archived skill")
+    p_restore.add_argument("skill", help="Skill name")
+    p_restore.set_defaults(func=_cmd_restore)
+
+
+def cli_main(argv=None) -> int:
+    """Standalone entry (also usable by hermes_cli.main fallthrough)."""
+    parser = argparse.ArgumentParser(prog="hermes curator")
+    register_cli(parser)
+    args = parser.parse_args(argv)
+    fn = getattr(args, "func", None)
+    if fn is None:
+        parser.print_help()
+        return 0
+    return int(fn(args) or 0)
+
+
+if __name__ == "__main__":  # pragma: no cover
+    sys.exit(cli_main())
@@ -7,7 +7,6 @@ Currently supports:

 import io
 import json
-import os
 import sys
 import time
 import urllib.error
@@ -18,6 +17,7 @@ from pathlib import Path
 from typing import Optional

 from hermes_constants import get_hermes_home
+from utils import atomic_replace


 # ---------------------------------------------------------------------------
@@ -79,7 +79,7 @@ def _save_pending(entries: list[dict]) -> None:
        path.parent.mkdir(parents=True, exist_ok=True)
        tmp = path.with_suffix(".json.tmp")
        tmp.write_text(json.dumps(entries, indent=2), encoding="utf-8")
-        os.replace(tmp, path)
+        atomic_replace(tmp, path)
    except OSError:
        # Non-fatal — worst case the user has to run ``hermes debug delete``
        # manually.
@@ -13,7 +13,6 @@ automatically.

 from __future__ import annotations

-import io
 import os
 import sys
 import time
@@ -8,6 +8,7 @@ import os
 import sys
 import subprocess
 import shutil
+import importlib.util
 from pathlib import Path

 from hermes_cli.config import get_project_root, get_hermes_home, get_env_path
@@ -30,6 +31,7 @@ load_dotenv(PROJECT_ROOT / ".env", override=False, encoding="utf-8")

 from hermes_cli.colors import Colors, color
 from hermes_cli.models import _HERMES_USER_AGENT
+from hermes_cli.vercel_auth import describe_vercel_auth
 from hermes_constants import OPENROUTER_MODELS_URL
 from utils import base_url_host_matches

@@ -57,6 +59,7 @@ _PROVIDER_ENV_HINTS = (
    "OPENCODE_ZEN_API_KEY",
    "OPENCODE_GO_API_KEY",
    "XIAOMI_API_KEY",
+    "TOKENHUB_API_KEY",
 )


@@ -75,6 +78,14 @@ def _system_package_install_cmd(pkg: str) -> str:
    return f"sudo apt install {pkg}"


+def _safe_which(cmd: str) -> str | None:
+    """shutil.which wrapper resilient to platform monkeypatching in tests."""
+    try:
+        return shutil.which(cmd)
+    except Exception:
+        return None
+
+
 def _termux_browser_setup_steps(node_installed: bool) -> list[str]:
    steps: list[str] = []
    step = 1
@@ -292,15 +303,23 @@ def run_doctor(args):

            known_providers: set = set()
            try:
-                from hermes_cli.auth import PROVIDER_REGISTRY
+                from hermes_cli.auth import (
+                    PROVIDER_REGISTRY,
+                    resolve_provider as _resolve_auth_provider,
+                )
                known_providers = set(PROVIDER_REGISTRY.keys()) | {"openrouter", "custom", "auto"}
            except Exception:
+                _resolve_auth_provider = None
                pass
            try:
                from hermes_cli.config import get_compatible_custom_providers as _compatible_custom_providers
-                from hermes_cli.providers import resolve_provider_full as _resolve_provider_full
+                from hermes_cli.providers import (
+                    normalize_provider as _normalize_catalog_provider,
+                    resolve_provider_full as _resolve_provider_full,
+                )
            except Exception:
                _compatible_custom_providers = None
+                _normalize_catalog_provider = None
                _resolve_provider_full = None

            custom_providers = []
@@ -320,17 +339,43 @@ def run_doctor(args):
                if name:
                    known_providers.add("custom:" + name.lower().replace(" ", "-"))

-            canonical_provider = provider
+            valid_provider_ids = set(known_providers)
+            provider_ids_to_accept = {provider} if provider else set()
+            if _normalize_catalog_provider is not None:
+                for known_provider in known_providers:
+                    try:
+                        valid_provider_ids.add(_normalize_catalog_provider(known_provider))
+                    except Exception:
+                        continue
+
+            runtime_provider = provider
+            if (
+                provider
+                and _resolve_auth_provider is not None
+                and provider not in ("auto", "custom")
+            ):
+                try:
+                    runtime_provider = _resolve_auth_provider(provider)
+                    provider_ids_to_accept.add(runtime_provider)
+                except Exception:
+                    runtime_provider = provider
+
+            catalog_provider = provider
            if (
                provider
                and _resolve_provider_full is not None
                and provider not in ("auto", "custom")
            ):
                provider_def = _resolve_provider_full(provider, user_providers, custom_providers)
-                canonical_provider = provider_def.id if provider_def is not None else None
+                catalog_provider = provider_def.id if provider_def is not None else None
+                if catalog_provider is not None:
+                    provider_ids_to_accept.add(catalog_provider)

            if provider and provider != "auto":
-                if canonical_provider is None or (known_providers and canonical_provider not in known_providers):
+                if catalog_provider is None or (
+                    known_providers
+                    and not (provider_ids_to_accept & valid_provider_ids)
+                ):
                    known_list = ", ".join(sorted(known_providers)) if known_providers else "(unavailable)"
                    check_fail(
                        f"model.provider '{provider_raw}' is not a recognised provider",
@@ -343,7 +388,24 @@ def run_doctor(args):
                    )

            # Warn if model is set to a provider-prefixed name on a provider that doesn't use them
-            if default_model and "/" in default_model and canonical_provider and canonical_provider not in ("openrouter", "custom", "auto", "ai-gateway", "kilocode", "opencode-zen", "huggingface", "nous"):
+            provider_for_policy = runtime_provider or catalog_provider
+            providers_accepting_vendor_slugs = {
+                "openrouter",
+                "custom",
+                "auto",
+                "ai-gateway",
+                "kilocode",
+                "opencode-zen",
+                "huggingface",
+                "lmstudio",
+                "nous",
+            }
+            if (
+                default_model
+                and "/" in default_model
+                and provider_for_policy
+                and provider_for_policy not in providers_accepting_vendor_slugs
+            ):
                check_warn(
                    f"model.default '{default_model}' uses a vendor/model slug but provider is '{provider_raw}'",
                    "(vendor-prefixed slugs belong to aggregators like openrouter)",
@@ -359,20 +421,24 @@ def run_doctor(args):
            # own env-var checks elsewhere in doctor, and get_auth_status()
            # returns a bare {logged_in: False} for anything it doesn't
            # explicitly dispatch, which would produce false positives.
-            if canonical_provider and canonical_provider not in ("auto", "custom", "openrouter"):
+            if runtime_provider and runtime_provider not in ("auto", "custom", "openrouter"):
                try:
                    from hermes_cli.auth import PROVIDER_REGISTRY, get_auth_status
-                    pconfig = PROVIDER_REGISTRY.get(canonical_provider)
+                    pconfig = PROVIDER_REGISTRY.get(runtime_provider)
                    if pconfig and getattr(pconfig, "auth_type", "") == "api_key":
-                        status = get_auth_status(canonical_provider) or {}
-                        configured = bool(status.get("configured") or status.get("logged_in") or status.get("api_key"))
+                        status = get_auth_status(runtime_provider) or {}
+                        configured = bool(
+                            status.get("configured")
+                            or status.get("logged_in")
+                            or status.get("api_key")
+                        )
                        if not configured:
                            check_fail(
-                                f"model.provider '{canonical_provider}' is set but no API key is configured",
+                                f"model.provider '{runtime_provider}' is set but no API key is configured",
                                "(check ~/.hermes/.env or run 'hermes setup')",
                            )
                            issues.append(
-                                f"No credentials found for provider '{canonical_provider}'. "
+                                f"No credentials found for provider '{runtime_provider}'. "
                                f"Run 'hermes setup' or set the provider's API key in {_DHH}/.env, "
                                f"or switch providers with 'hermes config set model.provider <name>'"
                            )
@@ -481,6 +547,7 @@ def run_doctor(args):
            get_nous_auth_status,
            get_codex_auth_status,
            get_gemini_oauth_auth_status,
+            get_minimax_oauth_auth_status,
        )

        nous_status = get_nous_auth_status()
@@ -510,13 +577,27 @@ def run_doctor(args):
            check_ok("Google Gemini OAuth", f"(logged in{suffix})")
        else:
            check_warn("Google Gemini OAuth", "(not logged in)")
+
+        minimax_status = get_minimax_oauth_auth_status()
+        if minimax_status.get("logged_in"):
+            region = minimax_status.get("region", "global")
+            check_ok("MiniMax OAuth", f"(logged in, region={region})")
+        else:
+            check_warn("MiniMax OAuth", "(not logged in)")
    except Exception as e:
        check_warn("Auth provider status", f"(could not check: {e})")

-    if shutil.which("codex"):
+    if _safe_which("codex"):
        check_ok("codex CLI")
    else:
-        check_warn("codex CLI not found", "(required for openai-codex login)")
+        # Native OAuth uses Hermes' own device-code flow — the Codex CLI is
+        # only needed if you want to import existing tokens from
+        # ~/.codex/auth.json.  Downgrade to info so users running
+        # `hermes auth openai-codex` aren't told they're missing something.
+        check_info(
+            "codex CLI not installed "
+            "(optional — only required to import tokens from an existing Codex CLI login)"
+        )

    # =========================================================================
    # Check: Directory structure
@@ -724,13 +805,13 @@ def run_doctor(args):
    print(color("◆ External Tools", Colors.CYAN, Colors.BOLD))
    
    # Git
-    if shutil.which("git"):
+    if _safe_which("git"):
        check_ok("git")
    else:
        check_warn("git not found", "(optional)")
    
    # ripgrep (optional, for faster file search)
-    if shutil.which("rg"):
+    if _safe_which("rg"):
        check_ok("ripgrep (rg)", "(faster file search)")
    else:
        check_warn("ripgrep (rg) not found", "(file search uses grep fallback)")
@@ -739,7 +820,7 @@ def run_doctor(args):
    # Docker (optional)
    terminal_env = os.getenv("TERMINAL_ENV", "local")
    if terminal_env == "docker":
-        if shutil.which("docker"):
+        if _safe_which("docker"):
            # Check if docker daemon is running
            try:
                result = subprocess.run(["docker", "info"], capture_output=True, timeout=10)
@@ -754,7 +835,7 @@ def run_doctor(args):
            check_fail("docker not found", "(required for TERMINAL_ENV=docker)")
            issues.append("Install Docker or change TERMINAL_ENV")
    else:
-        if shutil.which("docker"):
+        if _safe_which("docker"):
            check_ok("docker", "(optional)")
        else:
            if _is_termux():
@@ -800,8 +881,52 @@ def run_doctor(args):
            check_fail("daytona SDK not installed", "(pip install daytona)")
            issues.append("Install daytona SDK: pip install daytona")

+    # Vercel Sandbox (if using vercel_sandbox backend)
+    if terminal_env == "vercel_sandbox":
+        runtime = os.getenv("TERMINAL_VERCEL_RUNTIME", "node24").strip() or "node24"
+        from tools.terminal_tool import _SUPPORTED_VERCEL_RUNTIMES
+        if runtime in _SUPPORTED_VERCEL_RUNTIMES:
+            check_ok("Vercel runtime", f"({runtime})")
+        else:
+            supported = ", ".join(_SUPPORTED_VERCEL_RUNTIMES)
+            check_fail("Vercel runtime unsupported", f"({runtime}; use {supported})")
+            issues.append(f"Set TERMINAL_VERCEL_RUNTIME to one of: {supported}")
+
+        disk = os.getenv("TERMINAL_CONTAINER_DISK", "51200").strip()
+        if disk in ("", "0", "51200"):
+            check_ok("Vercel disk setting", "(uses platform default)")
+        else:
+            check_fail("Vercel custom disk unsupported", "(reset terminal.container_disk to 51200)")
+            issues.append("Vercel Sandbox does not support custom container_disk; use the shared default 51200")
+
+        if importlib.util.find_spec("vercel") is not None:
+            check_ok("vercel SDK", "(installed)")
+        else:
+            check_fail("vercel SDK not installed", "(pip install 'hermes-agent[vercel]')")
+            issues.append("Install the Vercel optional dependency: pip install 'hermes-agent[vercel]'")
+
+        auth_status = describe_vercel_auth()
+        if auth_status.ok:
+            check_ok("Vercel auth", f"({auth_status.label})")
+        elif auth_status.label.startswith("partial"):
+            check_fail("Vercel auth incomplete", f"({auth_status.label})")
+            issues.append("Set VERCEL_TOKEN, VERCEL_PROJECT_ID, and VERCEL_TEAM_ID together")
+        else:
+            check_fail("Vercel auth not configured", f"({auth_status.label})")
+            issues.append(
+                "Configure Vercel Sandbox auth with VERCEL_TOKEN, VERCEL_PROJECT_ID, and VERCEL_TEAM_ID"
+            )
+        for line in auth_status.detail_lines:
+            check_info(f"Vercel auth {line}")
+
+        persistent = os.getenv("TERMINAL_CONTAINER_PERSISTENT", "true").lower() in ("1", "true", "yes", "on")
+        if persistent:
+            check_info("Vercel persistence: snapshot filesystem only; live processes do not survive sandbox recreation")
+        else:
+            check_info("Vercel persistence: ephemeral filesystem")
+
    # Node.js + agent-browser (for browser automation tools)
-    if shutil.which("node"):
+    if _safe_which("node"):
        check_ok("Node.js")
        # Check if agent-browser is installed
        agent_browser_path = PROJECT_ROOT / "node_modules" / "agent-browser"
@@ -827,7 +952,7 @@ def run_doctor(args):
            check_warn("Node.js not found", "(optional, needed for browser tools)")
    
    # npm audit for all Node.js packages
-    if shutil.which("npm"):
+    if _safe_which("npm"):
        npm_dirs = [
            (PROJECT_ROOT, "Browser tools (agent-browser)"),
            (PROJECT_ROOT / "scripts" / "whatsapp-bridge", "WhatsApp bridge"),
@@ -906,10 +1031,16 @@ def run_doctor(args):
        print("  Checking Anthropic API...", end="", flush=True)
        try:
            import httpx
-            from agent.anthropic_adapter import _is_oauth_token, _COMMON_BETAS, _OAUTH_ONLY_BETAS
+            from agent.anthropic_adapter import (
+                _is_oauth_token,
+                _COMMON_BETAS,
+                _OAUTH_ONLY_BETAS,
+                _CONTEXT_1M_BETA,
+            )

            headers = {"anthropic-version": "2023-06-01"}
-            if _is_oauth_token(anthropic_key):
+            is_oauth = _is_oauth_token(anthropic_key)
+            if is_oauth:
                headers["Authorization"] = f"Bearer {anthropic_key}"
                headers["anthropic-beta"] = ",".join(_COMMON_BETAS + _OAUTH_ONLY_BETAS)
            else:
@@ -919,6 +1050,25 @@ def run_doctor(args):
                headers=headers,
                timeout=10
            )
+            # Reactive recovery: OAuth subscriptions that don't include 1M
+            # context reject the request with 400 "long context beta is not
+            # yet available for this subscription". Retry once with that
+            # beta stripped so the doctor check doesn't falsely report the
+            # Anthropic API as unreachable for those users.
+            if (
+                is_oauth
+                and response.status_code == 400
+                and "long context beta" in response.text.lower()
+                and "not yet available" in response.text.lower()
+            ):
+                headers["anthropic-beta"] = ",".join(
+                    [b for b in _COMMON_BETAS if b != _CONTEXT_1M_BETA] + list(_OAUTH_ONLY_BETAS)
+                )
+                response = httpx.get(
+                    "https://api.anthropic.com/v1/models",
+                    headers=headers,
+                    timeout=10,
+                )
            if response.status_code == 200:
                print(f"\r  {color('✓', Colors.GREEN)} Anthropic API                           ")
            elif response.status_code == 401:
@@ -33,12 +33,14 @@ def _get_git_commit(project_root: Path) -> str:


 def _redact(value: str) -> str:
-    """Redact all but first 4 and last 4 chars."""
-    if not value:
-        return ""
-    if len(value) < 12:
-        return "***"
-    return value[:4] + "..." + value[-4:]
+    """Redact all but first 4 and last 4 chars.
+
+    Thin wrapper over :func:`agent.redact.mask_secret`. Returns ``""`` for
+    an empty value (matches the historical behavior of this helper —
+    ``hermes dump`` formats empty values as blank, not as ``"(not set)"``).
+    """
+    from agent.redact import mask_secret
+    return mask_secret(value)


 def _gateway_status() -> str:
@@ -7,6 +7,7 @@ import sys
 from pathlib import Path

 from dotenv import load_dotenv
+from utils import atomic_replace


 # Env var name suffixes that indicate credential values.  These are the
@@ -127,7 +128,7 @@ def _sanitize_env_file_if_needed(path: Path) -> None:
                    f.writelines(sanitized)
                    f.flush()
                    os.fsync(f.fileno())
-                os.replace(tmp, path)
+                atomic_replace(tmp, path)
            except BaseException:
                try:
                    os.unlink(tmp)
@@ -279,9 +279,11 @@ def _scan_gateway_pids(exclude_pids: set[int], all_profiles: bool = False) -> li
                ["wmic", "process", "get", "ProcessId,CommandLine", "/FORMAT:LIST"],
                capture_output=True,
                text=True,
+                encoding="utf-8",
+                errors="ignore",
                timeout=10,
            )
-            if result.returncode != 0:
+            if result.returncode != 0 or result.stdout is None:
                return []
            current_cmd = ""
            for line in result.stdout.split("\n"):
@@ -830,6 +832,22 @@ def _user_dbus_socket_path() -> Path:
    return Path(xdg) / "bus"


+def _user_systemd_private_socket_path() -> Path:
+    """Return the per-user systemd private socket path (regardless of existence)."""
+    xdg = os.environ.get("XDG_RUNTIME_DIR") or f"/run/user/{os.getuid()}"
+    return Path(xdg) / "systemd" / "private"
+
+
+def _user_systemd_socket_ready() -> bool:
+    """Return True when user-scope systemd has a reachable control socket.
+
+    Some distros expose only the per-user systemd private socket even when the
+    D-Bus session bus socket is absent. ``systemctl --user`` can still work in
+    that configuration, so preflight checks must treat either socket as valid.
+    """
+    return _user_dbus_socket_path().exists() or _user_systemd_private_socket_path().exists()
+
+
 def _ensure_user_systemd_env() -> None:
    """Ensure DBUS_SESSION_BUS_ADDRESS and XDG_RUNTIME_DIR are set for systemctl --user.

@@ -853,28 +871,29 @@ def _ensure_user_systemd_env() -> None:


 def _wait_for_user_dbus_socket(timeout: float = 3.0) -> bool:
-    """Poll for the user D-Bus socket to appear, up to ``timeout`` seconds.
+    """Poll for the user systemd runtime socket(s), up to ``timeout`` seconds.

-    Linger-enabled user@.service can take a second or two to spawn the socket
-    after ``loginctl enable-linger`` runs.  Returns True once the socket exists.
+    Linger-enabled user@.service can take a second or two to spawn its control
+    socket(s) after ``loginctl enable-linger`` runs. Returns True once either
+    the user D-Bus socket or the per-user systemd private socket exists.
    """
    import time

    deadline = time.monotonic() + timeout
    while time.monotonic() < deadline:
-        if _user_dbus_socket_path().exists():
+        if _user_systemd_socket_ready():
            _ensure_user_systemd_env()
            return True
        time.sleep(0.2)
-    return _user_dbus_socket_path().exists()
+    return _user_systemd_socket_ready()


 def _preflight_user_systemd(*, auto_enable_linger: bool = True) -> None:
-    """Ensure ``systemctl --user`` will reach the user D-Bus session bus.
+    """Ensure ``systemctl --user`` will reach the user-scope systemd instance.

-    No-op when the bus socket is already there (the common case on desktops
-    and linger-enabled servers).  On fresh SSH sessions where the socket is
-    missing:
+    No-op when the user D-Bus socket or per-user systemd private socket is
+    already there (the common case on desktops and linger-enabled servers). On
+    fresh SSH sessions where both are missing:

    * If linger is already enabled, wait briefly for user@.service to spawn
      the socket.
@@ -888,8 +907,7 @@ def _preflight_user_systemd(*, auto_enable_linger: bool = True) -> None:
    systemd operations and surface the message to the user.
    """
    _ensure_user_systemd_env()
-    bus_path = _user_dbus_socket_path()
-    if bus_path.exists():
+    if _user_systemd_socket_ready():
        return

    import getpass
@@ -903,7 +921,7 @@ def _preflight_user_systemd(*, auto_enable_linger: bool = True) -> None:
        # Linger is on but socket still missing — unusual; fall through to error.
        _raise_user_systemd_unavailable(
            username,
-            reason="User D-Bus socket is missing even though linger is enabled.",
+            reason="User systemd control sockets are missing even though linger is enabled.",
            fix_hint=(
                f"  systemctl start user@{os.getuid()}.service\n"
                "  (may require sudo; try again after the command succeeds)"
@@ -2350,7 +2368,11 @@ def run_gateway(verbose: int = 0, quiet: bool = False, replace: bool = False):
    # Exit with code 1 if gateway fails to connect any platform,
    # so systemd Restart=on-failure will retry on transient errors
    verbosity = None if quiet else verbose
-    success = asyncio.run(start_gateway(replace=replace, verbosity=verbosity))
+    try:
+        success = asyncio.run(start_gateway(replace=replace, verbosity=verbosity))
+    except KeyboardInterrupt:
+        print("\nGateway stopped.")
+        return
    if not success:
        sys.exit(1)

@@ -2743,15 +2765,77 @@ _PLATFORMS = [
        ],
    },
 ]
+def _all_platforms() -> list[dict]:
+    """Return the full list of platforms for setup menus.
+
+    Combines the built-in ``_PLATFORMS`` with plugin platforms registered via
+    ``platform_registry``. Plugins are discovered on first call so bundled
+    platforms (like IRC, which auto-load via ``kind: platform``) appear in
+    ``hermes setup gateway`` without needing the gateway to be running.
+    Built-ins keep their dict shape; plugin entries are adapted to the same
+    shape with ``_registry_entry`` holding the source.
+    """
+    # Populate the registry so plugin platforms are visible. Idempotent.
+    # Bundled platform plugins (``kind: platform``) auto-load unconditionally,
+    # so every shipped messaging channel appears in the setup menu by default.
+    # User-installed platform plugins under ~/.hermes/plugins/ still require
+    # opt-in via ``plugins.enabled`` (untrusted code).
+    try:
+        from hermes_cli.plugins import discover_plugins
+        discover_plugins()
+    except Exception as e:
+        logger.debug("plugin discovery failed during platform enumeration: %s", e)
+
+    platforms = [dict(p) for p in _PLATFORMS]
+    by_key = {p["key"]: p for p in platforms}
+
+    try:
+        from gateway.platform_registry import platform_registry
+    except Exception:
+        return platforms
+
+    for entry in platform_registry.all_entries():
+        if entry.name in by_key:
+            continue  # built-in already covers it
+        platforms.append({
+            "key": entry.name,
+            "label": entry.label,
+            "emoji": entry.emoji,
+            "token_var": entry.required_env[0] if entry.required_env else "",
+            "install_hint": entry.install_hint,
+            "_registry_entry": entry,
+        })
+    return platforms


 def _platform_status(platform: dict) -> str:
    """Return a plain-text status string for a platform.

    Returns uncolored text so it can safely be embedded in
-    simple_term_menu items (ANSI codes break width calculation).
+    curses menu items (ANSI codes break width calculation).
    """
-    token_var = platform["token_var"]
+    entry = platform.get("_registry_entry")
+    if entry is not None:
+        configured = False
+        # Prefer is_connected (checks both env and config.yaml) over
+        # check_fn (typically just dependency / env presence).
+        if entry.is_connected is not None:
+            try:
+                from gateway.config import PlatformConfig
+                synthetic = PlatformConfig(enabled=True)
+                configured = bool(entry.is_connected(synthetic))
+            except Exception:
+                configured = False
+        if not configured:
+            try:
+                configured = bool(entry.check_fn())
+            except Exception:
+                configured = False
+        return "configured" if configured else "not configured"
+
+    token_var = platform.get("token_var", "")
+    if not token_var:
+        return "not configured"
    val = get_env_value(token_var)
    if token_var == "WHATSAPP_ENABLED":
        if val and val.lower() == "true":
@@ -2953,7 +3037,7 @@ def _setup_sms():
 def _setup_dingtalk():
    """Configure DingTalk — QR scan (recommended) or manual credential entry."""
    from hermes_cli.setup import (
-        prompt_choice, prompt_yes_no, print_info, print_success, print_warning,
+        prompt_choice, prompt_yes_no, print_success, print_warning,
    )

    dingtalk_platform = next(p for p in _PLATFORMS if p["key"] == "dingtalk")
@@ -3277,6 +3361,12 @@ def _setup_weixin():
        print_warning("  Direct messages disabled.")

    print()
+    print_info("  Note: QR login connects an iLink bot identity (e.g. ...@im.bot), not a")
+    print_info("  scriptable personal WeChat account. Ordinary WeChat groups typically cannot")
+    print_info("  invite an @im.bot identity, and iLink does not deliver ordinary-group events")
+    print_info("  to most bot accounts. The settings below only apply when iLink actually")
+    print_info("  delivers group events for your account type — otherwise DM remains the only")
+    print_info("  working channel regardless of this choice.")
    group_choices = [
        "Disable group chats (recommended)",
        "Allow all group chats",
@@ -3290,12 +3380,12 @@ def _setup_weixin():
    elif group_idx == 1:
        save_env_value("WEIXIN_GROUP_POLICY", "open")
        save_env_value("WEIXIN_GROUP_ALLOWED_USERS", "")
-        print_warning("  All group chats enabled.")
+        print_warning("  All group chats enabled (only takes effect if iLink delivers group events).")
    else:
-        allow_groups = prompt("  Allowed group chat IDs (comma-separated)", "", password=False).replace(" ", "")
+        allow_groups = prompt("  Allowed group chat IDs (comma-separated, not member user IDs)", "", password=False).replace(" ", "")
        save_env_value("WEIXIN_GROUP_POLICY", "allowlist")
        save_env_value("WEIXIN_GROUP_ALLOWED_USERS", allow_groups)
-        print_success("  Group allowlist saved.")
+        print_success("  Group allowlist saved (only takes effect if iLink delivers group events).")

    if user_id:
        print()
@@ -3504,7 +3594,6 @@ def _setup_qqbot():
    method_idx = prompt_choice("  How would you like to set up QQ Bot?", method_choices, 0)

    credentials = None
-    used_qr = False

    if method_idx == 0:
        # ── QR scan-to-configure ──
@@ -3515,8 +3604,6 @@ def _setup_qqbot():
            print()
            print_warning("  QQ Bot setup cancelled.")
            return
-        if credentials:
-            used_qr = True
        if not credentials:
            print_info("  QR setup did not complete. Continuing with manual input.")

@@ -3706,6 +3793,71 @@ def _setup_signal():
    print_info(f"  Groups: {'enabled' if get_env_value('SIGNAL_GROUP_ALLOWED_USERS') else 'disabled'}")


+def _builtin_setup_fn(key: str):
+    """Resolve the interactive setup function for a built-in platform key.
+
+    Late-bound to avoid a circular import with ``hermes_cli.setup`` (which
+    imports from this module for the remaining bespoke flows).
+    """
+    from hermes_cli import setup as _s
+    return {
+        "telegram": _s._setup_telegram,
+        "discord": _s._setup_discord,
+        "slack": _s._setup_slack,
+        "matrix": _s._setup_matrix,
+        "mattermost": _s._setup_mattermost,
+        "bluebubbles": _s._setup_bluebubbles,
+        "webhooks": _s._setup_webhooks,
+        "signal": _setup_signal,
+        "whatsapp": _setup_whatsapp,
+        "weixin": _setup_weixin,
+        "dingtalk": _setup_dingtalk,
+        "feishu": _setup_feishu,
+        "wecom": _setup_wecom,
+        "qqbot": _setup_qqbot,
+    }.get(key)
+def _configure_platform(platform: dict) -> None:
+    """Run the interactive setup flow for a single platform.
+
+    Dispatch order:
+      1. Plugin-provided ``setup_fn`` on the registry entry.
+      2. Built-in setup function matched by platform key.
+      3. ``_setup_standard_platform`` when the entry has a ``vars`` schema.
+      4. Env-var hint fallback for plugins that offer no setup helper.
+
+    Bundled platform plugins (e.g. IRC) auto-load, so no plugin enable step
+    is needed here. User-installed platform plugins under ~/.hermes/plugins/
+    must already be in ``plugins.enabled`` before they appear in this menu.
+    """
+    entry = platform.get("_registry_entry")
+
+    if entry is not None and entry.setup_fn is not None:
+        entry.setup_fn()
+        return
+
+    fn = _builtin_setup_fn(platform["key"])
+    if fn is not None:
+        fn()
+        return
+
+    if platform.get("vars"):
+        _setup_standard_platform(platform)
+        return
+
+    # Plugin with no setup helper — show env-var instructions.
+    label = platform.get("label", platform["key"])
+    emoji = platform.get("emoji", "🔌")
+    print()
+    print(color(f"  ─── {emoji} {label} Setup ───", Colors.CYAN))
+    required = entry.required_env if entry else []
+    if required:
+        print_info(f"  Set these env vars in ~/.hermes/.env: {', '.join(required)}")
+    else:
+        print_info(f"  Configure {label} in config.yaml under gateway.platforms.{platform['key']}")
+    if platform.get("install_hint"):
+        print_info(f"  {platform['install_hint']}")
+
+
 def gateway_setup():
    """Interactive setup for messaging platforms + gateway service."""
    if is_managed():
@@ -3758,42 +3910,36 @@ def gateway_setup():
        print()
        print_header("Messaging Platforms")

-        menu_items = []
-        for plat in _PLATFORMS:
-            status = _platform_status(plat)
-            menu_items.append(f"{plat['label']}  ({status})")
+        platforms = _all_platforms()
+
+        menu_items = [
+            f"{p['emoji']} {p['label']}  ({_platform_status(p)})"
+            for p in platforms
+        ]
        menu_items.append("Done")

        choice = prompt_choice("Select a platform to configure:", menu_items, len(menu_items) - 1)
-
-        if choice == len(_PLATFORMS):
+        if choice == len(platforms):
            break

-        platform = _PLATFORMS[choice]
-
-        if platform["key"] == "whatsapp":
-            _setup_whatsapp()
-        elif platform["key"] == "signal":
-            _setup_signal()
-        elif platform["key"] == "weixin":
-            _setup_weixin()
-        elif platform["key"] == "dingtalk":
-            _setup_dingtalk()
-        elif platform["key"] == "feishu":
-            _setup_feishu()
-        elif platform["key"] == "qqbot":
-            _setup_qqbot()
-        elif platform["key"] == "wecom":
-            _setup_wecom()
-        else:
-            _setup_standard_platform(platform)
+        _configure_platform(platforms[choice])

    # ── Post-setup: offer to install/restart gateway ──
+    # Consider any platform (built-in or plugin) where the user has made
+    # meaningful progress.  ``_platform_status`` already handles plugin
+    # entries via their check_fn and per-platform dual-states like
+    # WhatsApp's "enabled, not paired".
+    def _is_progress(status: str) -> bool:
+        s = status.lower()
+        return not (
+            s == "not configured"
+            or s.startswith("partially")
+            or s.startswith("plugin disabled")
+        )
+
    any_configured = any(
-        bool(get_env_value(p["token_var"]))
-        for p in _PLATFORMS
-        if p["key"] != "whatsapp"
-    ) or (get_env_value("WHATSAPP_ENABLED") or "").lower() == "true"
+        _is_progress(_platform_status(p)) for p in _all_platforms()
+    )

    if any_configured:
        print()
@@ -4231,4 +4377,4 @@ def _gateway_command_inner(args):
        if not supports_systemd_services() and not is_macos():
            print("Legacy unit migration only applies to systemd-based Linux hosts.")
            return
-        remove_legacy_hermes_units(interactive=not yes, dry_run=dry_run)
+        remove_legacy_hermes_units(interactive=not yes, dry_run=dry_run)
@@ -19,9 +19,8 @@ format) lives there.
 from __future__ import annotations

 import json
-import os
 from pathlib import Path
-from typing import Any, Dict, List, Optional
+from typing import Any, Dict, List


 def hooks_command(args) -> None:
@@ -114,6 +114,12 @@ def _apply_profile_override() -> None:
            consume = 1
            break

+    # 1.5 If HERMES_HOME is already set and no explicit flag was given, trust it.
+    # This lets child processes (relaunch, subprocess) inherit the parent's
+    # profile choice without having to pass --profile again.
+    if profile_name is None and os.environ.get("HERMES_HOME"):
+        return
+
    # 2. If no flag, check active_profile in the hermes root
    if profile_name is None:
        try:
@@ -830,6 +836,16 @@ def _print_tui_exit_summary(session_id: Optional[str], active_session_file: Opti


 _NPM_LOCK_RUNTIME_KEYS = frozenset({"ideallyInert"})
+_TUI_PREBUILT_MARKER = ".hermes-prebuilt-tui"
+
+
+def _tui_prebuilt_ready(root: Path) -> bool:
+    return (
+        (root / _TUI_PREBUILT_MARKER).is_file()
+        and (root / "dist" / "entry.js").is_file()
+        and (root / "node_modules" / "@hermes" / "ink" / "package.json").is_file()
+        and (root / "packages" / "hermes-ink" / "dist" / "ink-bundle.js").is_file()
+    )


 def _tui_need_npm_install(root: Path) -> bool:
@@ -852,6 +868,9 @@ def _tui_need_npm_install(root: Path) -> bool:
    we'd rather not force a reinstall for them. Falls back to mtime
    comparison if either lockfile is unparseable.
    """
+    if _tui_prebuilt_ready(root):
+        return False
+
    ink = root / "node_modules" / "@hermes" / "ink" / "package.json"
    if not ink.is_file():
        return True
@@ -905,6 +924,8 @@ def _find_bundled_tui(tui_dir: Path) -> Optional[Path]:


 def _tui_build_needed(tui_dir: Path) -> bool:
+    if _tui_prebuilt_ready(tui_dir):
+        return False
    if _hermes_ink_bundle_stale(tui_dir):
        return True
    entry = tui_dir / "dist" / "entry.js"
@@ -1094,11 +1115,36 @@ def _make_tui_argv(tui_dir: Path, tui_dev: bool) -> tuple[list[str], Path]:
    return [node, str(root / "dist" / "entry.js")], root


+def _normalize_tui_toolsets(toolsets: object) -> list[str]:
+    """Normalize argparse/Fire-style toolset input for the TUI subprocess."""
+    try:
+        from hermes_cli.oneshot import _normalize_toolsets
+
+        return _normalize_toolsets(toolsets) or []
+    except (AttributeError, ImportError):
+        if not toolsets:
+            return []
+
+        raw_items = [toolsets] if isinstance(toolsets, str) else toolsets
+        if not isinstance(raw_items, (list, tuple)):
+            raw_items = [raw_items]
+
+        normalized: list[str] = []
+        for item in raw_items:
+            if isinstance(item, str):
+                normalized.extend(part.strip() for part in item.split(","))
+            else:
+                normalized.append(str(item).strip())
+
+        return [item for item in normalized if item]
+
+
 def _launch_tui(
    resume_session_id: Optional[str] = None,
    tui_dev: bool = False,
    model: Optional[str] = None,
    provider: Optional[str] = None,
+    toolsets: object = None,
 ):
    """Replace current process with the TUI."""
    tui_dir = PROJECT_ROOT / "ui-tui"
@@ -1123,6 +1169,9 @@ def _launch_tui(
    if provider:
        env["HERMES_TUI_PROVIDER"] = provider
        env["HERMES_INFERENCE_PROVIDER"] = provider
+    tui_toolsets = _normalize_tui_toolsets(toolsets)
+    if tui_toolsets:
+        env["HERMES_TUI_TOOLSETS"] = ",".join(tui_toolsets)
    # Guarantee an 8GB V8 heap + exposed GC for the TUI. Default node cap is
    # ~1.5–4GB depending on version and can fatal-OOM on long sessions with
    # large transcripts / reasoning blobs. Token-level merge: respect any
@@ -1270,6 +1319,7 @@ def cmd_chat(args):
            tui_dev=getattr(args, "tui_dev", False),
            model=getattr(args, "model", None),
            provider=getattr(args, "provider", None),
+            toolsets=getattr(args, "toolsets", None),
        )

    # Import and run the CLI
@@ -1770,6 +1820,8 @@ def select_provider_and_model(args=None):
        _model_flow_openai_codex(config, current_model)
    elif selected_provider == "qwen-oauth":
        _model_flow_qwen_oauth(config, current_model)
+    elif selected_provider == "minimax-oauth":
+        _model_flow_minimax_oauth(config, current_model, args=args)
    elif selected_provider == "google-gemini-cli":
        _model_flow_google_gemini_cli(config, current_model)
    elif selected_provider == "copilot-acp":
@@ -1820,6 +1872,8 @@ def select_provider_and_model(args=None):
        "gmi",
        "nvidia",
        "ollama-cloud",
+        "tencent-tokenhub",
+        "lmstudio",
    ):
        _model_flow_api_key_provider(config, selected_provider, current_model)

@@ -1888,6 +1942,7 @@ _AUX_TASKS: list[tuple[str, str, str]] = [
    ("mcp",              "MCP",              "MCP tool reasoning"),
    ("title_generation", "Title generation", "session titles"),
    ("skills_hub",       "Skills hub",       "skills search/install"),
+    ("curator",          "Curator",          "skill-usage review pass"),
 ]


@@ -2046,7 +2101,11 @@ def _aux_select_for_task(task: str) -> None:

    # Gather authenticated providers (has credentials + curated model list)
    try:
-        providers = list_authenticated_providers(current_provider=current_provider)
+        providers = list_authenticated_providers(
+            current_provider=current_provider,
+            current_model=current_model,
+            current_base_url=current_base_url,
+        )
    except Exception as exc:
        print(f"Could not detect authenticated providers: {exc}")
        providers = []
@@ -2652,6 +2711,53 @@ def _model_flow_qwen_oauth(_config, current_model=""):
        print("No change.")


+def _model_flow_minimax_oauth(config, current_model="", args=None):
+    """MiniMax OAuth provider: ensure logged in, then pick model."""
+    from hermes_cli.auth import (
+        get_provider_auth_state,
+        _prompt_model_selection,
+        _save_model_choice,
+        _update_config_for_provider,
+        resolve_minimax_oauth_runtime_credentials,
+        AuthError,
+        format_auth_error,
+        _login_minimax_oauth,
+        PROVIDER_REGISTRY,
+    )
+    state = get_provider_auth_state("minimax-oauth")
+    if not state or not state.get("access_token"):
+        print("Not logged into MiniMax. Starting OAuth login...")
+        print()
+        try:
+            mock_args = argparse.Namespace(
+                region=getattr(args, "region", None) or "global",
+                no_browser=bool(getattr(args, "no_browser", False)),
+                timeout=getattr(args, "timeout", None) or 15.0,
+            )
+            _login_minimax_oauth(mock_args, PROVIDER_REGISTRY["minimax-oauth"])
+        except SystemExit:
+            print("Login cancelled or failed.")
+            return
+        except Exception as exc:
+            print(f"Login failed: {exc}")
+            return
+
+    try:
+        creds = resolve_minimax_oauth_runtime_credentials()
+    except AuthError as exc:
+        print(format_auth_error(exc))
+        return
+
+    from hermes_cli.models import _PROVIDER_MODELS
+    model_ids = _PROVIDER_MODELS.get("minimax-oauth", [])
+    selected = _prompt_model_selection(model_ids, current_model)
+    if not selected:
+        return
+    _save_model_choice(selected)
+    _update_config_for_provider("minimax-oauth", creds["base_url"])
+    print(f"\u2713 Using MiniMax model: {selected}")
+
+
 def _model_flow_google_gemini_cli(_config, current_model=""):
    """Google Gemini OAuth (PKCE) via Cloud Code Assist — supports free AND paid tiers.

@@ -4376,6 +4482,7 @@ def _model_flow_bedrock(config, current_model=""):
 def _model_flow_api_key_provider(config, provider_id, current_model=""):
    """Generic flow for API-key providers (z.ai, MiniMax, OpenCode, etc.)."""
    from hermes_cli.auth import (
+        LMSTUDIO_NOAUTH_PLACEHOLDER,
        PROVIDER_REGISTRY,
        _prompt_model_selection,
        _save_model_choice,
@@ -4410,13 +4517,20 @@ def _model_flow_api_key_provider(config, provider_id, current_model=""):
            try:
                import getpass

-                new_key = getpass.getpass(f"{key_env} (or Enter to cancel): ").strip()
+                if provider_id == "lmstudio":
+                    prompt = f"{key_env} (Enter for no-auth default {LMSTUDIO_NOAUTH_PLACEHOLDER!r}): "
+                else:
+                    prompt = f"{key_env} (or Enter to cancel): "
+                new_key = getpass.getpass(prompt).strip()
            except (KeyboardInterrupt, EOFError):
                print()
                return
            if not new_key:
-                print("Cancelled.")
-                return
+                if provider_id == "lmstudio":
+                    new_key = LMSTUDIO_NOAUTH_PLACEHOLDER
+                else:
+                    print("Cancelled.")
+                    return
            save_env_value(key_env, new_key)
            existing_key = new_key
            print("API key saved.")
@@ -4483,10 +4597,21 @@ def _model_flow_api_key_provider(config, provider_id, current_model=""):
                print("  Tier check: could not verify (proceeding anyway).")
            print()

-    # Optional base URL override
+    # Optional base URL override.
+    # Precedence: env var → config.yaml model.base_url → registry default.
+    # Reading config.yaml prevents silently overwriting a saved remote URL
+    # (e.g. a remote LM Studio endpoint) with localhost when the user just
+    # presses Enter at the prompt below.
    current_base = ""
    if base_url_env:
        current_base = get_env_value(base_url_env) or os.getenv(base_url_env, "")
+    if not current_base:
+        try:
+            _m = load_config().get("model") or {}
+            if str(_m.get("provider") or "").strip().lower() == provider_id:
+                current_base = str(_m.get("base_url") or "").strip()
+        except Exception:
+            pass
    effective_base = current_base or pconfig.inference_base_url

    try:
@@ -4508,8 +4633,22 @@ def _model_flow_api_key_provider(config, provider_id, current_model=""):
    #   2. Curated static fallback list (offline insurance)
    #   3. Live /models endpoint probe (small providers without models.dev data)
    #
-    # Ollama Cloud: dedicated merged discovery (live API + models.dev + disk cache)
-    if provider_id == "ollama-cloud":
+    # LM Studio: live /api/v1/models probe (no models.dev catalog).
+    # Ollama Cloud: merged discovery (live API + models.dev + disk cache).
+    if provider_id == "lmstudio":
+        from hermes_cli.auth import AuthError
+        from hermes_cli.models import fetch_lmstudio_models
+
+        api_key_for_probe = existing_key or (get_env_value(key_env) if key_env else "")
+        try:
+            model_list = fetch_lmstudio_models(api_key=api_key_for_probe, base_url=effective_base)
+        except AuthError as exc:
+            print(f"  LM Studio rejected the request: {exc}")
+            print("  Set LM_API_KEY (or update it) to match the server's bearer token.")
+            model_list = []
+        if model_list:
+            print(f"  Found {len(model_list)} model(s) from LM Studio")
+    elif provider_id == "ollama-cloud":
        from hermes_cli.models import fetch_ollama_cloud_models

        api_key_for_probe = existing_key or (get_env_value(key_env) if key_env else "")
@@ -4731,7 +4870,6 @@ def _model_flow_anthropic(config, current_model=""):
            read_claude_code_credentials,
            is_claude_code_token_valid,
            _is_oauth_token,
-            _resolve_claude_code_token_from_credentials,
        )

        cc_creds = read_claude_code_credentials()
@@ -5213,6 +5351,197 @@ def _build_web_ui(web_dir: Path, *, fatal: bool = False) -> bool:
    return True


+def _find_stale_dashboard_pids() -> list[int]:
+    """Return PIDs of ``hermes dashboard`` processes other than ourselves.
+
+    ``hermes dashboard`` is a long-lived server process commonly started and
+    forgotten.  When ``hermes update`` replaces files on disk, the running
+    process keeps the old Python backend in memory while the JS bundle on
+    disk is updated, causing a silent frontend/backend mismatch (e.g. new
+    auth headers the old backend doesn't recognise → every API call 401s).
+
+    The dashboard has no service manager (systemd / launchd), no PID file,
+    and we can't know the original launch args — so the only sane action
+    after an update is to kill the stale process and let the user restart
+    it.  This helper is just the detection step; see
+    ``_kill_stale_dashboard_processes`` for the kill.
+
+    Returns an empty list on any scan error (missing ps/wmic, timeout, etc.).
+    """
+    patterns = [
+        "hermes dashboard",
+        "hermes_cli.main dashboard",
+        "hermes_cli/main.py dashboard",
+    ]
+    self_pid = os.getpid()
+    dashboard_pids: list[int] = []
+
+    try:
+        if sys.platform == "win32":
+            # wmic may emit text in the system code page (for example cp936
+            # on zh-CN systems), not UTF-8. In text mode, subprocess output
+            # decoding depends on Python's configuration (locale-dependent
+            # by default, or UTF-8 in UTF-8 mode). The important protection
+            # here is errors="ignore": it prevents a reader-thread
+            # UnicodeDecodeError from leaving result.stdout=None and turning
+            # the later .split() into an AttributeError (#17049).
+            result = subprocess.run(
+                ["wmic", "process", "get", "ProcessId,CommandLine",
+                 "/FORMAT:LIST"],
+                capture_output=True, text=True, timeout=10,
+                encoding="utf-8", errors="ignore",
+            )
+            if result.returncode != 0 or result.stdout is None:
+                return []
+            current_cmd = ""
+            for line in result.stdout.split("\n"):
+                line = line.strip()
+                if line.startswith("CommandLine="):
+                    current_cmd = line[len("CommandLine="):]
+                elif line.startswith("ProcessId="):
+                    pid_str = line[len("ProcessId="):]
+                    if (any(p in current_cmd for p in patterns)
+                            and int(pid_str) != self_pid):
+                        try:
+                            dashboard_pids.append(int(pid_str))
+                        except ValueError:
+                            pass
+        else:
+            # Linux / macOS: scan the process table via ps and match against
+            # the same explicit patterns list used on Windows.  Using ps
+            # (rather than `pgrep -f "hermes.*dashboard"`) keeps us consistent
+            # with `hermes_cli.gateway._scan_gateway_pids` and avoids the
+            # greedy regex matching unrelated cmdlines that merely contain
+            # both words (e.g. a chat session discussing "dashboard").
+            result = subprocess.run(
+                ["ps", "-A", "-o", "pid=,command="],
+                capture_output=True, text=True, timeout=10,
+            )
+            if result.returncode == 0:
+                for line in getattr(result, "stdout", "").split("\n"):
+                    stripped = line.strip()
+                    if not stripped or "grep" in stripped:
+                        continue
+                    parts = stripped.split(None, 1)
+                    if len(parts) != 2:
+                        continue
+                    try:
+                        pid = int(parts[0])
+                    except ValueError:
+                        continue
+                    command = parts[1]
+                    if (any(p in command for p in patterns)
+                            and pid != self_pid):
+                        dashboard_pids.append(pid)
+    except (FileNotFoundError, subprocess.TimeoutExpired, OSError):
+        return []
+
+    return dashboard_pids
+
+
+def _kill_stale_dashboard_processes(
+    reason: str = "the running backend no longer matches the updated frontend",
+) -> None:
+    """Kill running ``hermes dashboard`` processes.
+
+    Called at the end of ``hermes update`` (default ``reason``) and also
+    from ``hermes dashboard --stop`` (which overrides ``reason``).  The
+    dashboard has no service manager, so after a code update the running
+    process is guaranteed to be serving stale Python against a
+    freshly-updated JS bundle.  Leaving it alive produces silent
+    frontend/backend mismatches (new auth headers the old backend doesn't
+    recognise → every API call 401s).
+
+    POSIX: SIGTERM, wait up to ~3s for graceful exit, SIGKILL any survivors.
+    Windows: ``taskkill /PID <pid> /F`` since there's no clean SIGTERM
+    equivalent for background console apps.
+
+    The dashboard isn't auto-restarted because we don't know the original
+    launch args (--host, --port, --insecure, --tui, --no-open).  The user
+    restarts it manually; a hint is printed.
+    """
+    pids = _find_stale_dashboard_pids()
+    if not pids:
+        return
+
+    print()
+    print(f"⟲ Stopping {len(pids)} dashboard process(es) ({reason})")
+
+    killed: list[int] = []
+    failed: list[tuple[int, str]] = []
+
+    if sys.platform == "win32":
+        for pid in pids:
+            try:
+                result = subprocess.run(
+                    ["taskkill", "/PID", str(pid), "/F"],
+                    capture_output=True, text=True, timeout=10,
+                )
+                if result.returncode == 0:
+                    killed.append(pid)
+                else:
+                    failed.append((pid, (result.stderr or result.stdout or "").strip()))
+            except (FileNotFoundError, subprocess.TimeoutExpired, OSError) as e:
+                failed.append((pid, str(e)))
+    else:
+        import signal as _signal
+        import time as _time
+
+        # SIGTERM first — give each process a chance to shut down cleanly
+        # (uvicorn closes its socket, flushes logs, etc.).
+        for pid in pids:
+            try:
+                os.kill(pid, _signal.SIGTERM)
+            except ProcessLookupError:
+                # Already gone — count as killed.
+                killed.append(pid)
+            except (PermissionError, OSError) as e:
+                failed.append((pid, str(e)))
+
+        # Poll for exit up to ~3s total.
+        deadline = _time.monotonic() + 3.0
+        pending = [p for p in pids if p not in killed
+                   and p not in {f[0] for f in failed}]
+        while pending and _time.monotonic() < deadline:
+            _time.sleep(0.1)
+            still_pending = []
+            for pid in pending:
+                try:
+                    os.kill(pid, 0)  # probe
+                except ProcessLookupError:
+                    killed.append(pid)
+                except (PermissionError, OSError):
+                    # Can't probe — assume still there.
+                    still_pending.append(pid)
+                else:
+                    still_pending.append(pid)
+            pending = still_pending
+
+        # SIGKILL any survivors.
+        for pid in pending:
+            try:
+                os.kill(pid, _signal.SIGKILL)
+                killed.append(pid)
+            except ProcessLookupError:
+                killed.append(pid)
+            except (PermissionError, OSError) as e:
+                failed.append((pid, str(e)))
+
+    for pid in killed:
+        print(f"    ✓ stopped PID {pid}")
+    for pid, reason in failed:
+        print(f"    ✗ failed to stop PID {pid}: {reason}")
+
+    if killed:
+        print("  Restart the dashboard when you're ready:")
+        print("    hermes dashboard --port <port>")
+
+
+# Back-compat alias: some tests and any external callers may import the old
+# warn-only name.  The new behaviour (kill stale processes) replaces it.
+_warn_stale_dashboard_processes = _kill_stale_dashboard_processes
+
+
 def _update_via_zip(args):
    """Update Hermes Agent by downloading a ZIP archive.

@@ -5347,6 +5676,7 @@ def _update_via_zip(args):

    print()
    print("✓ Update complete!")
+    _kill_stale_dashboard_processes()


 def _stash_local_changes_if_needed(git_cmd: list[str], cwd: Path) -> Optional[str]:
@@ -7048,7 +7378,7 @@ def _cmd_update_impl(args, gateway_mode: bool):
                                    print(
                                        f"  ⚠ {svc_name} died after restart, retrying..."
                                    )
-                                    retry = subprocess.run(
+                                    subprocess.run(
                                        scope_cmd + ["restart", svc_name],
                                        capture_output=True,
                                        text=True,
@@ -7163,6 +7493,13 @@ def _cmd_update_impl(args, gateway_mode: bool):
        except Exception as e:
            logger.debug("Legacy unit check during update failed: %s", e)

+        # Kill stale dashboard processes — the dashboard has no service
+        # manager, so leaving it alive after a code update produces a
+        # silent frontend/backend mismatch.  We can't auto-restart it
+        # (no saved launch args) but we can stop it, and a hint is
+        # printed for the user to re-launch.
+        _kill_stale_dashboard_processes()
+
        print()
        print("Tip: You can now select a provider and model:")
        print("  hermes model              # Select provider and model")
@@ -7552,8 +7889,59 @@ def cmd_profile(args):
            sys.exit(1)


+def _report_dashboard_status() -> int:
+    """Print ``hermes dashboard`` PIDs and return the count.
+
+    Uses the same detection logic as ``_find_stale_dashboard_pids`` (the
+    current process is excluded, but since ``hermes dashboard --status``
+    runs in a short-lived CLI process that never matches the pattern,
+    the exclusion is irrelevant here).
+    """
+    pids = _find_stale_dashboard_pids()
+    if not pids:
+        print("No hermes dashboard processes running.")
+        return 0
+
+    print(f"{len(pids)} hermes dashboard process(es) running:")
+    for pid in pids:
+        # Best-effort: show the full cmdline so users can tell profiles apart.
+        cmdline = ""
+        try:
+            if sys.platform != "win32":
+                cmdline_path = f"/proc/{pid}/cmdline"
+                if os.path.exists(cmdline_path):
+                    with open(cmdline_path, "rb") as f:
+                        cmdline = f.read().replace(b"\x00", b" ").decode(
+                            "utf-8", errors="replace").strip()
+        except (OSError, ValueError):
+            pass
+        if cmdline:
+            print(f"    PID {pid}: {cmdline}")
+        else:
+            print(f"    PID {pid}")
+    return len(pids)
+
+
 def cmd_dashboard(args):
-    """Start the web UI server."""
+    """Start the web UI server, or (with --stop/--status) manage running ones."""
+    # --status: report running dashboards and exit, no deps needed.
+    if getattr(args, "status", False):
+        count = _report_dashboard_status()
+        sys.exit(0 if count == 0 else 0)  # status is informational, always 0
+
+    # --stop: kill any running dashboards and exit, no deps needed.
+    if getattr(args, "stop", False):
+        pids = _find_stale_dashboard_pids()
+        if not pids:
+            print("No hermes dashboard processes running.")
+            sys.exit(0)
+        # Reuse the same SIGTERM-grace-SIGKILL path used after `hermes update`.
+        _kill_stale_dashboard_processes(reason="requested via --stop")
+        # _kill_stale_dashboard_processes prints outcomes itself.  Exit 0 if
+        # we killed at least one, 1 if they were all unkillable.
+        remaining = _find_stale_dashboard_pids()
+        sys.exit(1 if remaining else 0)
+
    try:
        import fastapi  # noqa: F401
        import uvicorn  # noqa: F401
@@ -7620,322 +8008,9 @@ def cmd_logs(args):

 def main():
    """Main entry point for hermes CLI."""
-    parser = argparse.ArgumentParser(
-        prog="hermes",
-        description="Hermes Agent - AI assistant with tool-calling capabilities",
-        formatter_class=argparse.RawDescriptionHelpFormatter,
-        epilog="""
-Examples:
-    hermes                        Start interactive chat
-    hermes chat -q "Hello"        Single query mode
-    hermes -c                     Resume the most recent session
-    hermes -c "my project"        Resume a session by name (latest in lineage)
-    hermes --resume <session_id>  Resume a specific session by ID
-    hermes setup                  Run setup wizard
-    hermes logout                 Clear stored authentication
-    hermes auth add <provider>    Add a pooled credential
-    hermes auth list              List pooled credentials
-    hermes auth remove <p> <t>    Remove pooled credential by index, id, or label
-    hermes auth reset <provider>  Clear exhaustion status for a provider
-    hermes model                  Select default model
-    hermes fallback [list]        Show fallback provider chain
-    hermes fallback add           Add a fallback provider (same picker as `hermes model`)
-    hermes fallback remove        Remove a fallback provider from the chain
-    hermes config                 View configuration
-    hermes config edit            Edit config in $EDITOR
-    hermes config set model gpt-4 Set a config value
-    hermes gateway                Run messaging gateway
-    hermes -s hermes-agent-dev,github-auth
-    hermes -w                     Start in isolated git worktree
-    hermes gateway install        Install gateway background service
-    hermes sessions list          List past sessions
-    hermes sessions browse        Interactive session picker
-    hermes sessions rename ID T   Rename/title a session
-    hermes logs                   View agent.log (last 50 lines)
-    hermes logs -f                Follow agent.log in real time
-    hermes logs errors            View errors.log
-    hermes logs --since 1h        Lines from the last hour
-    hermes debug share             Upload debug report for support
-    hermes update                 Update to latest version
+    from hermes_cli._parser import build_top_level_parser

-For more help on a command:
-    hermes <command> --help
-""",
-    )
-
-    parser.add_argument(
-        "--version", "-V", action="store_true", help="Show version and exit"
-    )
-    parser.add_argument(
-        "-z",
-        "--oneshot",
-        metavar="PROMPT",
-        default=None,
-        help=(
-            "One-shot mode: send a single prompt and print ONLY the final "
-            "response text to stdout. No banner, no spinner, no tool "
-            "previews, no session_id line. Tools, memory, rules, and "
-            "AGENTS.md in the CWD are loaded as normal; approvals are "
-            "auto-bypassed. Intended for scripts / pipes."
-        ),
-    )
-    # --model / --provider are accepted at the top level so they can pair
-    # with -z without needing the `chat` subcommand.  If neither -z nor a
-    # subcommand consumes them, they fall through harmlessly as None.
-    # Mirrors `hermes chat --model ... --provider ...` semantics.
-    parser.add_argument(
-        "-m",
-        "--model",
-        default=None,
-        help=(
-            "Model override for this invocation (e.g. anthropic/claude-sonnet-4.6). "
-            "Applies to -z/--oneshot and --tui. Also settable via HERMES_INFERENCE_MODEL env var."
-        ),
-    )
-    parser.add_argument(
-        "--provider",
-        default=None,
-        help=(
-            "Provider override for this invocation (e.g. openrouter, anthropic). "
-            "Applies to -z/--oneshot and --tui. Also settable via HERMES_INFERENCE_PROVIDER env var."
-        ),
-    )
-    parser.add_argument(
-        "--resume",
-        "-r",
-        metavar="SESSION",
-        default=None,
-        help="Resume a previous session by ID or title",
-    )
-    parser.add_argument(
-        "--continue",
-        "-c",
-        dest="continue_last",
-        nargs="?",
-        const=True,
-        default=None,
-        metavar="SESSION_NAME",
-        help="Resume a session by name, or the most recent if no name given",
-    )
-    parser.add_argument(
-        "--worktree",
-        "-w",
-        action="store_true",
-        default=False,
-        help="Run in an isolated git worktree (for parallel agents)",
-    )
-    parser.add_argument(
-        "--accept-hooks",
-        action="store_true",
-        default=False,
-        help=(
-            "Auto-approve any unseen shell hooks declared in config.yaml "
-            "without a TTY prompt.  Equivalent to HERMES_ACCEPT_HOOKS=1 or "
-            "hooks_auto_accept: true in config.yaml.  Use on CI / headless "
-            "runs that can't prompt."
-        ),
-    )
-    parser.add_argument(
-        "--skills",
-        "-s",
-        action="append",
-        default=None,
-        help="Preload one or more skills for the session (repeat flag or comma-separate)",
-    )
-    parser.add_argument(
-        "--yolo",
-        action="store_true",
-        default=False,
-        help="Bypass all dangerous command approval prompts (use at your own risk)",
-    )
-    parser.add_argument(
-        "--pass-session-id",
-        action="store_true",
-        default=False,
-        help="Include the session ID in the agent's system prompt",
-    )
-    parser.add_argument(
-        "--ignore-user-config",
-        action="store_true",
-        default=False,
-        help="Ignore ~/.hermes/config.yaml and fall back to built-in defaults (credentials in .env are still loaded)",
-    )
-    parser.add_argument(
-        "--ignore-rules",
-        action="store_true",
-        default=False,
-        help="Skip auto-injection of AGENTS.md, SOUL.md, .cursorrules, memory, and preloaded skills",
-    )
-    parser.add_argument(
-        "--tui",
-        action="store_true",
-        default=False,
-        help="Launch the modern TUI instead of the classic REPL",
-    )
-    parser.add_argument(
-        "--dev",
-        dest="tui_dev",
-        action="store_true",
-        default=False,
-        help="With --tui: run TypeScript sources via tsx (skip dist build)",
-    )
-
-    subparsers = parser.add_subparsers(dest="command", help="Command to run")
-
-    # =========================================================================
-    # chat command
-    # =========================================================================
-    chat_parser = subparsers.add_parser(
-        "chat",
-        help="Interactive chat with the agent",
-        description="Start an interactive chat session with Hermes Agent",
-    )
-    chat_parser.add_argument(
-        "-q", "--query", help="Single query (non-interactive mode)"
-    )
-    chat_parser.add_argument(
-        "--image", help="Optional local image path to attach to a single query"
-    )
-    chat_parser.add_argument(
-        "-m", "--model", help="Model to use (e.g., anthropic/claude-sonnet-4)"
-    )
-    chat_parser.add_argument(
-        "-t", "--toolsets", help="Comma-separated toolsets to enable"
-    )
-    chat_parser.add_argument(
-        "-s",
-        "--skills",
-        action="append",
-        default=argparse.SUPPRESS,
-        help="Preload one or more skills for the session (repeat flag or comma-separate)",
-    )
-    chat_parser.add_argument(
-        "--provider",
-        choices=[
-            "auto",
-            "openrouter",
-            "nous",
-            "openai-codex",
-            "copilot-acp",
-            "copilot",
-            "anthropic",
-            "gemini",
-            "xai",
-            "ollama-cloud",
-            "huggingface",
-            "zai",
-            "kimi-coding",
-            "kimi-coding-cn",
-            "stepfun",
-            "minimax",
-            "minimax-cn",
-            "kilocode",
-            "xiaomi",
-            "arcee",
-            "gmi",
-            "nvidia",
-        ],
-        default=None,
-        help="Inference provider (default: auto)",
-    )
-    chat_parser.add_argument(
-        "-v", "--verbose", action="store_true", help="Verbose output"
-    )
-    chat_parser.add_argument(
-        "-Q",
-        "--quiet",
-        action="store_true",
-        help="Quiet mode for programmatic use: suppress banner, spinner, and tool previews. Only output the final response and session info.",
-    )
-    chat_parser.add_argument(
-        "--resume",
-        "-r",
-        metavar="SESSION_ID",
-        default=argparse.SUPPRESS,
-        help="Resume a previous session by ID (shown on exit)",
-    )
-    chat_parser.add_argument(
-        "--continue",
-        "-c",
-        dest="continue_last",
-        nargs="?",
-        const=True,
-        default=argparse.SUPPRESS,
-        metavar="SESSION_NAME",
-        help="Resume a session by name, or the most recent if no name given",
-    )
-    chat_parser.add_argument(
-        "--worktree",
-        "-w",
-        action="store_true",
-        default=argparse.SUPPRESS,
-        help="Run in an isolated git worktree (for parallel agents on the same repo)",
-    )
-    chat_parser.add_argument(
-        "--accept-hooks",
-        action="store_true",
-        default=argparse.SUPPRESS,
-        help=(
-            "Auto-approve any unseen shell hooks declared in config.yaml "
-            "without a TTY prompt (see also HERMES_ACCEPT_HOOKS env var and "
-            "hooks_auto_accept: in config.yaml)."
-        ),
-    )
-    chat_parser.add_argument(
-        "--checkpoints",
-        action="store_true",
-        default=False,
-        help="Enable filesystem checkpoints before destructive file operations (use /rollback to restore)",
-    )
-    chat_parser.add_argument(
-        "--max-turns",
-        type=int,
-        default=None,
-        metavar="N",
-        help="Maximum tool-calling iterations per conversation turn (default: 90, or agent.max_turns in config)",
-    )
-    chat_parser.add_argument(
-        "--yolo",
-        action="store_true",
-        default=argparse.SUPPRESS,
-        help="Bypass all dangerous command approval prompts (use at your own risk)",
-    )
-    chat_parser.add_argument(
-        "--pass-session-id",
-        action="store_true",
-        default=argparse.SUPPRESS,
-        help="Include the session ID in the agent's system prompt",
-    )
-    chat_parser.add_argument(
-        "--ignore-user-config",
-        action="store_true",
-        default=argparse.SUPPRESS,
-        help="Ignore ~/.hermes/config.yaml and fall back to built-in defaults (credentials in .env are still loaded). Useful for isolated CI runs, reproduction, and third-party integrations.",
-    )
-    chat_parser.add_argument(
-        "--ignore-rules",
-        action="store_true",
-        default=argparse.SUPPRESS,
-        help="Skip auto-injection of AGENTS.md, SOUL.md, .cursorrules, memory, and preloaded skills. Combine with --ignore-user-config for a fully isolated run.",
-    )
-    chat_parser.add_argument(
-        "--source",
-        default=None,
-        help="Session source tag for filtering (default: cli). Use 'tool' for third-party integrations that should not appear in user session lists.",
-    )
-    chat_parser.add_argument(
-        "--tui",
-        action="store_true",
-        default=False,
-        help="Launch the modern TUI instead of the classic REPL",
-    )
-    chat_parser.add_argument(
-        "--dev",
-        dest="tui_dev",
-        action="store_true",
-        default=False,
-        help="With --tui: run TypeScript sources via tsx (skip dist build)",
-    )
+    parser, subparsers, chat_parser = build_top_level_parser()
    chat_parser.set_defaults(func=cmd_chat)

    # =========================================================================
@@ -9120,6 +9195,26 @@ Examples:
    except Exception as _exc:
        logging.getLogger(__name__).debug("Plugin CLI discovery failed: %s", _exc)

+    # =========================================================================
+    # curator command — background skill maintenance
+    # =========================================================================
+    curator_parser = subparsers.add_parser(
+        "curator",
+        help="Background skill maintenance (curator) — status, run, pause, pin",
+        description=(
+            "The curator is an auxiliary-model background task that "
+            "periodically reviews agent-created skills, prunes stale ones, "
+            "consolidates overlaps, and archives obsolete skills. "
+            "Bundled and hub-installed skills are never touched. "
+            "Archives are recoverable; auto-deletion never happens."
+        ),
+    )
+    try:
+        from hermes_cli.curator import register_cli as _register_curator_cli
+        _register_curator_cli(curator_parser)
+    except Exception as _exc:
+        logging.getLogger(__name__).debug("curator CLI wiring failed: %s", _exc)
+
    # =========================================================================
    # memory command
    # =========================================================================
@@ -9585,15 +9680,8 @@ Examples:

            # Launch hermes --resume <id> by replacing the current process
            print(f"Resuming session: {selected_id}")
-            hermes_bin = shutil.which("hermes")
-            if hermes_bin:
-                os.execvp(hermes_bin, ["hermes", "--resume", selected_id])
-            else:
-                # Fallback: re-invoke via python -m
-                os.execvp(
-                    sys.executable,
-                    [sys.executable, "-m", "hermes_cli.main", "--resume", selected_id],
-                )
+            from hermes_cli.relaunch import relaunch
+            relaunch(["--resume", selected_id])
            return  # won't reach here after execvp

        elif action == "stats":
@@ -9676,17 +9764,26 @@ Examples:
        "--preset",
        choices=["user-data", "full"],
        default="full",
-        help="Migration preset (default: full). 'user-data' excludes secrets",
+        help="Migration preset (default: full). Neither preset imports secrets — "
+        "pass --migrate-secrets to include API keys.",
    )
    claw_migrate.add_argument(
        "--overwrite",
        action="store_true",
-        help="Overwrite existing files (default: skip conflicts)",
+        help="Overwrite existing files (default: refuse to apply when the plan has conflicts)",
    )
    claw_migrate.add_argument(
        "--migrate-secrets",
        action="store_true",
-        help="Include allowlisted secrets (TELEGRAM_BOT_TOKEN, API keys, etc.)",
+        help="Include allowlisted secrets (TELEGRAM_BOT_TOKEN, API keys, etc.). "
+        "Required even under --preset full.",
+    )
+    claw_migrate.add_argument(
+        "--no-backup",
+        action="store_true",
+        help="Skip the pre-migration zip snapshot of ~/.hermes/ (by default a "
+        "single restore-point archive is written to ~/.hermes/backups/ "
+        "before apply; restorable with 'hermes import').",
    )
    claw_migrate.add_argument(
        "--workspace-target", help="Absolute path to copy workspace instructions into"
@@ -9942,6 +10039,22 @@ Examples:
            "Alternatively set HERMES_DASHBOARD_TUI=1."
        ),
    )
+    # Lifecycle flags — mutually exclusive with each other and with the
+    # start-a-server flags above (if both are passed, --stop / --status win
+    # because they exit before the server is started).  The dashboard has
+    # no service manager and no PID file, so these scan the process table
+    # for `hermes dashboard` cmdlines and SIGTERM them directly — the same
+    # path `hermes update` uses to clean up stale dashboards.
+    dashboard_parser.add_argument(
+        "--stop",
+        action="store_true",
+        help="Stop all running hermes dashboard processes and exit",
+    )
+    dashboard_parser.add_argument(
+        "--status",
+        action="store_true",
+        help="List running hermes dashboard processes and exit",
+    )
    dashboard_parser.set_defaults(func=cmd_dashboard)

    # =========================================================================
@@ -10101,6 +10214,17 @@ Examples:
            logger.debug(
                "plugin discovery failed at CLI startup", exc_info=True,
            )
+        try:
+            # MCP tool discovery — no event loop running in CLI/TUI startup,
+            # so inline is safe.  Moved here from model_tools.py module scope
+            # to avoid freezing the gateway's event loop on its first message
+            # via the same lazy import path (#16856).
+            from tools.mcp_tool import discover_mcp_tools
+            discover_mcp_tools()
+        except Exception:
+            logger.debug(
+                "MCP tool discovery failed at CLI startup", exc_info=True,
+            )
        try:
            from hermes_cli.config import load_config
            from agent.shell_hooks import register_from_config
@@ -10120,6 +10244,7 @@ Examples:
            args.oneshot,
            model=getattr(args, "model", None),
            provider=getattr(args, "provider", None),
+            toolsets=getattr(args, "toolsets", None),
        ))

    # Handle top-level --resume / --continue as shortcut to chat
@@ -16,6 +16,7 @@ import time
 from typing import Any, Dict, List, Optional, Tuple

 from hermes_cli.config import (
+    cfg_get,
    load_config,
    save_config,
    get_env_value,
@@ -716,7 +717,7 @@ def cmd_mcp_configure(args):

    # Update config
    config = load_config()
-    server_entry = config.get("mcp_servers", {}).get(name, {})
+    server_entry = cfg_get(config, "mcp_servers", name, default={})

    if len(chosen) == total:
        # All selected → remove include/exclude (register all)
@@ -46,7 +46,6 @@ from __future__ import annotations

 import json
 import logging
-import os
 import time
 import urllib.error
 import urllib.request
@@ -54,6 +53,7 @@ from pathlib import Path
 from typing import Any

 from hermes_cli import __version__ as _HERMES_VERSION
+from utils import atomic_replace

 logger = logging.getLogger(__name__)

@@ -190,7 +190,7 @@ def _write_disk_cache(data: dict[str, Any]) -> None:
        with open(tmp, "w") as fh:
            json.dump(data, fh, indent=2)
            fh.write("\n")
-        os.replace(tmp, path)
+        atomic_replace(tmp, path)
    except OSError as exc:
        logger.info("model catalog cache write failed: %s", exc)

@@ -96,6 +96,7 @@ _MATCHING_PREFIX_STRIP_PROVIDERS: frozenset[str] = frozenset({
    "kimi-coding",
    "kimi-coding-cn",
    "minimax",
+    "minimax-oauth",
    "minimax-cn",
    "alibaba",
    "qwen-oauth",
@@ -213,10 +213,15 @@ def _load_direct_aliases() -> dict[str, DirectAlias]:


 def _ensure_direct_aliases() -> None:
-    """Lazy-load direct aliases on first use."""
-    global DIRECT_ALIASES
+    """Lazy-load direct aliases on first use.
+
+    Mutates the existing DIRECT_ALIASES dict in place rather than rebinding
+    the module attribute. This keeps `from hermes_cli.model_switch import
+    DIRECT_ALIASES` references valid in callers — rebinding would leave them
+    pointing at a stale empty dict.
+    """
    if not DIRECT_ALIASES:
-        DIRECT_ALIASES = _load_direct_aliases()
+        DIRECT_ALIASES.update(_load_direct_aliases())


 # ---------------------------------------------------------------------------
@@ -534,6 +539,7 @@ def resolve_display_context_length(
    api_key: str = "",
    model_info: Optional[ModelInfo] = None,
    custom_providers: list | None = None,
+    config_context_length: int | None = None,
 ) -> Optional[int]:
    """Resolve the context length to show in /model output.

@@ -560,6 +566,7 @@ def resolve_display_context_length(
            api_key=api_key or "",
            provider=provider or None,
            custom_providers=custom_providers,
+            config_context_length=config_context_length,
        )
        if ctx:
            return int(ctx)
@@ -979,6 +986,7 @@ def list_authenticated_providers(
    user_providers: dict = None,
    custom_providers: list | None = None,
    max_models: int = 8,
+    current_model: str = "",
 ) -> List[dict]:
    """Detect which providers have credentials and list their curated models.

@@ -1012,6 +1020,37 @@ def list_authenticated_providers(
    results: List[dict] = []
    seen_slugs: set = set()  # lowercase-normalized to catch case variants (#9545)
    seen_mdev_ids: set = set()  # prevent duplicate entries for aliases (e.g. kimi-coding + kimi-coding-cn)
+    # Effective base URLs of every built-in row we emit (normalized lower+rstrip).
+    # Section 4 uses this to hide ``custom_providers`` entries that point at the
+    # same endpoint as a built-in (e.g. a user-defined "my-dashscope" on
+    # https://coding-intl.dashscope.aliyuncs.com/v1 collides with the built-in
+    # alibaba-coding-plan row when DASHSCOPE_API_KEY is present). Fixes #16970.
+    _builtin_endpoints: set = set()
+
+    def _norm_url(url: str) -> str:
+        return str(url or "").strip().rstrip("/").lower()
+
+    def _record_builtin_endpoint(slug: str) -> None:
+        """Record the effective base URL for a built-in provider row.
+
+        Prefers the live env-override (e.g. DASHSCOPE_BASE_URL) over the
+        static inference_base_url so the dedup matches what a user typing
+        that URL into custom_providers would actually hit."""
+        try:
+            from hermes_cli.auth import PROVIDER_REGISTRY as _reg
+        except Exception:
+            return
+        pcfg = _reg.get(slug)
+        if not pcfg:
+            return
+        url = ""
+        if getattr(pcfg, "base_url_env_var", ""):
+            url = os.environ.get(pcfg.base_url_env_var, "") or ""
+        if not url:
+            url = getattr(pcfg, "inference_base_url", "") or ""
+        normed = _norm_url(url)
+        if normed:
+            _builtin_endpoints.add(normed)

    data = fetch_models_dev()

@@ -1025,6 +1064,34 @@ def list_authenticated_providers(
    if "ollama-cloud" not in curated:
        from hermes_cli.models import fetch_ollama_cloud_models
        curated["ollama-cloud"] = fetch_ollama_cloud_models()
+    # LM Studio has no static catalog — probe its native /api/v1/models
+    # endpoint live so the picker reflects whatever the user has loaded.
+    # Base URL precedence: LM_BASE_URL env var > active config's base_url
+    # (when current provider is lmstudio) > 127.0.0.1 default.
+    # On auth rejection or unreachable server, fall back to the caller-supplied
+    # current model so the picker still shows something when offline / mis-keyed.
+    if "lmstudio" not in curated and (
+        os.environ.get("LM_API_KEY") or os.environ.get("LM_BASE_URL") or current_provider.strip().lower() == "lmstudio"
+    ):
+        from hermes_cli.models import fetch_lmstudio_models
+        from hermes_cli.auth import AuthError
+        is_current_lmstudio = current_provider.strip().lower() == "lmstudio"
+        lm_base = (
+            os.environ.get("LM_BASE_URL")
+            or (current_base_url if is_current_lmstudio and current_base_url else None)
+            or "http://127.0.0.1:1234/v1"
+        )
+        try:
+            live = fetch_lmstudio_models(
+                api_key=os.environ.get("LM_API_KEY", ""),
+                base_url=lm_base,
+                timeout=1.5, # Smaller timeout for picker
+            )
+        except AuthError:
+            live = []
+        if not live and is_current_lmstudio and current_model:
+            live = [current_model]
+        curated["lmstudio"] = live

    # --- 1. Check Hermes-mapped providers ---
    for hermes_id, mdev_id in PROVIDER_TO_MODELS_DEV.items():
@@ -1090,6 +1157,7 @@ def list_authenticated_providers(
        })
        seen_slugs.add(slug.lower())
        seen_mdev_ids.add(mdev_id)
+        _record_builtin_endpoint(slug)

    # --- 2. Check Hermes-only providers (nous, openai-codex, copilot, opencode-go) ---
    from hermes_cli.providers import HERMES_OVERLAYS
@@ -1175,6 +1243,15 @@ def list_authenticated_providers(

        if hermes_slug in {"copilot", "copilot-acp"}:
            model_ids = provider_model_ids(hermes_slug)
+        # For aws_sdk providers (bedrock), use live discovery so the list
+        # reflects the active region (eu.*, ap.*) not the static us.* list.
+        elif overlay.auth_type == "aws_sdk":
+            try:
+                from agent.bedrock_adapter import bedrock_model_ids_or_none
+                _ids = bedrock_model_ids_or_none()
+                model_ids = _ids if _ids is not None else (curated.get(hermes_slug, []) or curated.get(pid, []))
+            except Exception:
+                model_ids = curated.get(hermes_slug, []) or curated.get(pid, [])
        else:
            # Use curated list — look up by Hermes slug, fall back to overlay key
            model_ids = curated.get(hermes_slug, []) or curated.get(pid, [])
@@ -1195,6 +1272,7 @@ def list_authenticated_providers(
        })
        seen_slugs.add(pid.lower())
        seen_slugs.add(hermes_slug.lower())
+        _record_builtin_endpoint(hermes_slug)

    # --- 2b. Cross-check canonical provider list ---
    # Catches providers that are in CANONICAL_PROVIDERS but weren't found
@@ -1237,10 +1315,30 @@ def list_authenticated_providers(
            except Exception:
                pass

+        # Special case: aws_sdk auth (bedrock) — no API key env vars,
+        # credentials come from the boto3 credential chain (env vars,
+        # ~/.aws/credentials, instance roles, etc.)
+        if not _cp_has_creds and _cp_config and getattr(_cp_config, "auth_type", "") == "aws_sdk":
+            try:
+                from agent.bedrock_adapter import has_aws_credentials
+                _cp_has_creds = has_aws_credentials()
+            except Exception:
+                pass
+
        if not _cp_has_creds:
            continue

-        _cp_model_ids = curated.get(_cp.slug, [])
+        # For bedrock, use live discovery so the list reflects the active
+        # region (eu.*, us.*, ap.*) instead of the hardcoded us.* static list.
+        if _cp_config and getattr(_cp_config, "auth_type", "") == "aws_sdk":
+            try:
+                from agent.bedrock_adapter import bedrock_model_ids_or_none
+                _ids = bedrock_model_ids_or_none()
+                _cp_model_ids = _ids if _ids is not None else curated.get(_cp.slug, [])
+            except Exception:
+                _cp_model_ids = curated.get(_cp.slug, [])
+        else:
+            _cp_model_ids = curated.get(_cp.slug, [])
        _cp_total = len(_cp_model_ids)
        _cp_top = _cp_model_ids[:max_models]

@@ -1254,6 +1352,7 @@ def list_authenticated_providers(
            "source": "canonical",
        })
        seen_slugs.add(_cp.slug.lower())
+        _record_builtin_endpoint(_cp.slug)

    # --- 3. User-defined endpoints from config ---
    # Track (name, base_url) of what section 3 emits so section 4 can skip
@@ -1312,8 +1411,23 @@ def list_authenticated_providers(
                    if fb:
                        models_list = list(fb)

-            # Try to probe /v1/models if URL is set (but don't block on it)
-            # For now just show what we know from config
+            # Prefer the endpoint's live /models list when credentials are
+            # available. This keeps OpenAI-compatible relays (for example CRS)
+            # in sync when the server catalog changes without requiring the
+            # user to mirror every model into config.yaml.
+            api_key = str(ep_cfg.get("api_key", "") or "").strip()
+            if not api_key:
+                key_env = str(ep_cfg.get("key_env", "") or "").strip()
+                api_key = os.environ.get(key_env, "").strip() if key_env else ""
+            if api_url and api_key:
+                try:
+                    from hermes_cli.models import fetch_api_models
+                    live_models = fetch_api_models(api_key, api_url)
+                    if live_models:
+                        models_list = live_models
+                except Exception:
+                    pass
+
            results.append({
                "slug": ep_name,
                "name": display_name,
@@ -1389,7 +1503,14 @@ def list_authenticated_providers(
                    current_base_url
                    and api_url == current_base_url.strip().rstrip("/")
                ):
-                    slug = current_provider or custom_provider_slug(display_name)
+                    # Guard against bare "custom" slug left by a prior
+                    # failed switch — always resolve to the canonical
+                    # custom:<name> form.  (GH #17478)
+                    slug = (
+                        current_provider
+                        if current_provider and current_provider != "custom"
+                        else custom_provider_slug(display_name)
+                    )
                else:
                    slug = custom_provider_slug(display_name)
                groups[group_key] = {
@@ -1448,6 +1569,15 @@ def list_authenticated_providers(
            )
            if _pair_key[0] and _pair_key[1] and _pair_key in _section3_emitted_pairs:
                continue
+            # Skip if a built-in row (sections 1/2/2b) already represents this
+            # endpoint. Fixes #16970: a user-defined "my-dashscope" pointing at
+            # https://coding-intl.dashscope.aliyuncs.com/v1 duplicates the
+            # built-in alibaba-coding-plan row whenever DASHSCOPE_API_KEY is
+            # set. The built-in row carries the curated model list, correct
+            # auth wiring, and canonical slug — keep it and hide the shadow.
+            _grp_url_norm = _pair_key[1]
+            if _grp_url_norm and _grp_url_norm in _builtin_endpoints:
+                continue
            results.append({
                "slug": slug,
                "name": grp["name"],
@@ -44,6 +44,7 @@ OPENROUTER_MODELS: list[tuple[str, str]] = [
    ("openai/gpt-5.4-mini",             ""),
    ("xiaomi/mimo-v2.5-pro",             ""),
    ("xiaomi/mimo-v2.5",                 ""),
+    ("tencent/hy3-preview:free",         "free"),
    ("openai/gpt-5.3-codex",            ""),
    ("google/gemini-3-pro-image-preview", ""),
    ("google/gemini-3-flash-preview",   ""),
@@ -106,11 +107,57 @@ def _codex_curated_models() -> list[str]:
    return _add_forward_compat_models(list(DEFAULT_CODEX_MODELS))


+# Static fallback for xAI when the models.dev disk cache is empty (fresh
+# install, offline first run, etc.). Mirrors the xAI-direct model IDs from
+# $HERMES_HOME/models_dev_cache.json as of 2026-04-28. Whenever xAI renames
+# or retires a model, the disk cache picks it up on the next refresh and the
+# fallback here only matters until that refresh lands.
+_XAI_STATIC_FALLBACK: list[str] = [
+    "grok-4.20-0309-reasoning",
+    "grok-4.20-0309-non-reasoning",
+    "grok-4.20-multi-agent-0309",
+    "grok-4-1-fast",
+    "grok-4-1-fast-non-reasoning",
+    "grok-4-fast",
+    "grok-4-fast-non-reasoning",
+    "grok-4",
+    "grok-code-fast-1",
+]
+
+
+def _xai_curated_models() -> list[str]:
+    """Derive the xAI-direct curated list from models.dev disk cache.
+
+    Reads $HERMES_HOME/models_dev_cache.json directly (no network) so this
+    runs at import time without blocking. Falls back to ``_XAI_STATIC_FALLBACK``
+    when the cache is empty or unreadable. Hermes refreshes the cache from
+    https://models.dev/api.json on normal use, so this list self-heals as
+    xAI renames models.
+
+    Mirrors ``_codex_curated_models()``'s role for openai-codex.
+    """
+    try:
+        from agent.models_dev import _load_disk_cache
+        data = _load_disk_cache()
+        xai = data.get("xai") if isinstance(data, dict) else None
+        models = xai.get("models") if isinstance(xai, dict) else None
+        if isinstance(models, dict) and models:
+            ids = [mid for mid in models.keys() if isinstance(mid, str)]
+            if ids:
+                return sorted(ids)
+    except Exception:
+        # Any failure (missing file, malformed JSON, import error)
+        # falls through to the static list.
+        pass
+    return list(_XAI_STATIC_FALLBACK)
+
+
 _PROVIDER_MODELS: dict[str, list[str]] = {
    "nous": [
        "moonshotai/kimi-k2.6",
        "xiaomi/mimo-v2.5-pro",
        "xiaomi/mimo-v2.5",
+        "tencent/hy3-preview",
        "anthropic/claude-opus-4.7",
        "anthropic/claude-opus-4.6",
        "anthropic/claude-sonnet-4.6",
@@ -193,10 +240,7 @@ _PROVIDER_MODELS: dict[str, list[str]] = {
        "glm-4.5",
        "glm-4.5-flash",
    ],
-    "xai": [
-        "grok-4.20-reasoning",
-        "grok-4-1-fast-reasoning",
-    ],
+    "xai": _xai_curated_models(),
    "nvidia": [
        # NVIDIA flagship reasoning models
        "nvidia/nemotron-3-super-120b-a12b",
@@ -244,6 +288,10 @@ _PROVIDER_MODELS: dict[str, list[str]] = {
        "MiniMax-M2.1",
        "MiniMax-M2",
    ],
+    "minimax-oauth": [
+        "MiniMax-M2.7",
+        "MiniMax-M2.7-highspeed",
+    ],
    "minimax-cn": [
        "MiniMax-M2.7",
        "MiniMax-M2.5",
@@ -273,6 +321,9 @@ _PROVIDER_MODELS: dict[str, list[str]] = {
        "mimo-v2-omni",
        "mimo-v2-flash",
    ],
+    "tencent-tokenhub": [
+        "hy3-preview",
+    ],
    "arcee": [
        "trinity-large-thinking",
        "trinity-large-preview",
@@ -350,6 +401,7 @@ _PROVIDER_MODELS: dict[str, list[str]] = {
    # to https://dashscope-intl.aliyuncs.com/compatible-mode/v1 (OpenAI-compat)
    # or https://dashscope-intl.aliyuncs.com/apps/anthropic (Anthropic-compat).
    "alibaba": [
+        "qwen3.6-plus",
        "kimi-k2.5",
        "qwen3.5-plus",
        "qwen3-coder-plus",
@@ -720,10 +772,12 @@ class ProviderEntry(NamedTuple):
 CANONICAL_PROVIDERS: list[ProviderEntry] = [
    ProviderEntry("nous",           "Nous Portal",              "Nous Portal (Nous Research subscription)"),
    ProviderEntry("openrouter",     "OpenRouter",               "OpenRouter (100+ models, pay-per-use)"),
+    ProviderEntry("lmstudio",       "LM Studio",                "LM Studio (local desktop app with built-in model server)"),
    ProviderEntry("ai-gateway",     "Vercel AI Gateway",        "Vercel AI Gateway (200+ models, $5 free credit, no markup)"),
    ProviderEntry("anthropic",      "Anthropic",                "Anthropic (Claude models — API key or Claude Code)"),
    ProviderEntry("openai-codex",   "OpenAI Codex",             "OpenAI Codex"),
    ProviderEntry("xiaomi",         "Xiaomi MiMo",              "Xiaomi MiMo (MiMo-V2.5 and V2 models — pro, omni, flash)"),
+    ProviderEntry("tencent-tokenhub", "Tencent TokenHub",       "Tencent TokenHub (Hy3 Preview — direct API via tokenhub.tencentmaas.com)"),
    ProviderEntry("nvidia",         "NVIDIA NIM",               "NVIDIA NIM (Nemotron models — build.nvidia.com or local NIM)"),
    ProviderEntry("qwen-oauth",     "Qwen OAuth (Portal)",      "Qwen OAuth (reuses local Qwen CLI login)"),
    ProviderEntry("copilot",        "GitHub Copilot",           "GitHub Copilot (uses GITHUB_TOKEN or gh auth token)"),
@@ -738,6 +792,7 @@ CANONICAL_PROVIDERS: list[ProviderEntry] = [
    ProviderEntry("kimi-coding-cn", "Kimi / Moonshot (China)",  "Kimi / Moonshot China (Moonshot CN direct API)"),
    ProviderEntry("stepfun",        "StepFun Step Plan",       "StepFun Step Plan (agent/coding models via Step Plan API)"),
    ProviderEntry("minimax",        "MiniMax",                  "MiniMax (global direct API)"),
+    ProviderEntry("minimax-oauth",  "MiniMax (OAuth)",          "MiniMax via OAuth browser login (Coding Plan, minimax.io)"),
    ProviderEntry("minimax-cn",     "MiniMax (China)",          "MiniMax China (domestic direct API)"),
    ProviderEntry("alibaba",        "Alibaba Cloud (DashScope)","Alibaba Cloud / DashScope Coding (Qwen + multi-provider)"),
    ProviderEntry("ollama-cloud",   "Ollama Cloud",             "Ollama Cloud (cloud-hosted open models — ollama.com)"),
@@ -781,6 +836,9 @@ _PROVIDER_ALIASES = {
    "gmicloud": "gmi",
    "minimax-china": "minimax-cn",
    "minimax_cn": "minimax-cn",
+    "minimax-portal": "minimax-oauth",
+    "minimax-global": "minimax-oauth",
+    "minimax_oauth": "minimax-oauth",
    "claude": "anthropic",
    "claude-code": "anthropic",
    "deep-seek": "deepseek",
@@ -806,6 +864,10 @@ _PROVIDER_ALIASES = {
    "huggingface-hub": "huggingface",
    "mimo": "xiaomi",
    "xiaomi-mimo": "xiaomi",
+    "tencent": "tencent-tokenhub",
+    "tokenhub": "tencent-tokenhub",
+    "tencent-cloud": "tencent-tokenhub",
+    "tencentmaas": "tencent-tokenhub",
    "aws": "bedrock",
    "aws-bedrock": "bedrock",
    "amazon-bedrock": "bedrock",
@@ -817,6 +879,9 @@ _PROVIDER_ALIASES = {
    "nvidia-nim": "nvidia",
    "build-nvidia": "nvidia",
    "nemotron": "nvidia",
+    "lmstudio": "lmstudio",
+    "lm-studio": "lmstudio",
+    "lm_studio": "lmstudio",
    "ollama": "custom",  # bare "ollama" = local; use "ollama-cloud" for cloud
    "ollama_cloud": "ollama-cloud",
 }
@@ -1623,31 +1688,41 @@ def provider_label(provider: Optional[str]) -> str:

 # Models that support OpenAI Priority Processing (service_tier="priority").
 # See https://openai.com/api-priority-processing/ for the canonical list.
-# Only the bare model slug is stored (no vendor prefix).
-_PRIORITY_PROCESSING_MODELS: frozenset[str] = frozenset({
-    "gpt-5.4",
-    "gpt-5.4-mini",
-    "gpt-5.2",
-    "gpt-5.1",
-    "gpt-5",
-    "gpt-5-mini",
-    "gpt-4.1",
-    "gpt-4.1-mini",
-    "gpt-4.1-nano",
-    "gpt-4o",
-    "gpt-4o-mini",
+#
+# Pattern-based matching — any OpenAI flagship model (gpt-*, o1*, o3*, o4*)
+# is assumed to support Priority Processing. service_tier=priority is silently
+# ignored by non-OpenAI endpoints (OpenRouter/Copilot/opencode-zen proxies
+# strip the field), so false positives are harmless. Codex-series models
+# (gpt-5-codex, gpt-5.3-codex, etc.) are excluded — they don't expose the
+# service_tier parameter through the Codex Responses API.
+_OPENAI_FAST_MODE_PREFIXES: tuple[str, ...] = (
+    "gpt-",
+    "o1",
    "o3",
-    "o4-mini",
-})
+    "o4",
+)
+
+
+def _is_openai_fast_model(model_id: Optional[str]) -> bool:
+    """Return True if the model is an OpenAI flagship eligible for Priority Processing."""
+    raw = _strip_vendor_prefix(str(model_id or ""))
+    base = raw.split(":")[0]
+    if not base:
+        return False
+    # Exclude Codex-series — they route through the Codex Responses API
+    # which doesn't accept service_tier.
+    if "codex" in base:
+        return False
+    return any(base.startswith(prefix) for prefix in _OPENAI_FAST_MODE_PREFIXES)
+

 # Models that support Anthropic Fast Mode (speed="fast").
 # See https://platform.claude.com/docs/en/build-with-claude/fast-mode
-# Currently only Claude Opus 4.6.  Both hyphen and dot variants are stored
-# to handle native Anthropic (claude-opus-4-6) and OpenRouter (claude-opus-4.6).
-_ANTHROPIC_FAST_MODE_MODELS: frozenset[str] = frozenset({
-    "claude-opus-4-6",
-    "claude-opus-4.6",
-})
+#
+# Pattern-based matching — any claude-* model is eligible. The anthropic
+# adapter gates speed=fast on native Anthropic endpoints only (see
+# _is_third_party_anthropic_endpoint in agent/anthropic_adapter.py), so
+# third-party proxies that would reject the beta header are protected.


 def _strip_vendor_prefix(model_id: str) -> str:
@@ -1660,20 +1735,14 @@ def _strip_vendor_prefix(model_id: str) -> str:

 def model_supports_fast_mode(model_id: Optional[str]) -> bool:
    """Return whether Hermes should expose the /fast toggle for this model."""
-    raw = _strip_vendor_prefix(str(model_id or ""))
-    if raw in _PRIORITY_PROCESSING_MODELS:
-        return True
-    # Anthropic fast mode — strip date suffixes (e.g. claude-opus-4-6-20260401)
-    # and OpenRouter variant tags (:fast, :beta) for matching.
-    base = raw.split(":")[0]
-    return base in _ANTHROPIC_FAST_MODE_MODELS
+    return _is_anthropic_fast_model(model_id) or _is_openai_fast_model(model_id)


 def _is_anthropic_fast_model(model_id: Optional[str]) -> bool:
-    """Return True if the model supports Anthropic's fast mode (speed='fast')."""
+    """Return True if the model is a Claude model eligible for Anthropic Fast Mode."""
    raw = _strip_vendor_prefix(str(model_id or ""))
    base = raw.split(":")[0]
-    return base in _ANTHROPIC_FAST_MODE_MODELS
+    return base.startswith("claude-")


 def resolve_fast_mode_overrides(model_id: Optional[str]) -> dict[str, Any] | None:
@@ -1695,14 +1764,61 @@ def resolve_fast_mode_overrides(model_id: Optional[str]) -> dict[str, Any] | Non


 def _resolve_copilot_catalog_api_key() -> str:
-    """Best-effort GitHub token for fetching the Copilot model catalog."""
+    """Best-effort GitHub token for fetching the Copilot model catalog.
+
+    Resolution order:
+      1. ``resolve_api_key_provider_credentials("copilot")`` — env vars
+         (``COPILOT_GITHUB_TOKEN`` / ``GH_TOKEN`` / ``GITHUB_TOKEN``) plus
+         the ``gh auth token`` CLI fallback.
+      2. ``read_credential_pool("copilot")`` — a token (typically a
+         ``gho_*`` from device-code login, or a fine-grained PAT) stored in
+         ``auth.json`` under ``credential_pool.copilot[]``. The pool is
+         populated by ``hermes auth add copilot`` and by ``_seed_from_env``
+         when the env var is set in ``~/.hermes/.env``.
+
+    Without (2), users whose only Copilot credential is in the pool see
+    the ``/model`` picker fall back to a stale hardcoded list because the
+    live catalog fetch silently 401s. To avoid wedging on a malformed pool
+    entry, each candidate is exchanged via ``exchange_copilot_token`` —
+    only entries that actually exchange successfully are returned, so a
+    later valid entry is reachable when an earlier one is unsupported.
+    """
    try:
        from hermes_cli.auth import resolve_api_key_provider_credentials

        creds = resolve_api_key_provider_credentials("copilot")
-        return str(creds.get("api_key") or "").strip()
+        api_key = str(creds.get("api_key") or "").strip()
+        if api_key:
+            return api_key
    except Exception:
-        return ""
+        pass
+
+    try:
+        from hermes_cli.auth import read_credential_pool
+        from hermes_cli.copilot_auth import (
+            exchange_copilot_token,
+            validate_copilot_token,
+        )
+
+        for entry in read_credential_pool("copilot"):
+            if not isinstance(entry, dict):
+                continue
+            raw = str(entry.get("access_token") or "").strip()
+            if not raw:
+                continue
+            valid, _ = validate_copilot_token(raw)
+            if not valid:
+                continue
+            try:
+                api_token, _expires_at = exchange_copilot_token(raw)
+            except Exception:
+                continue
+            if api_token:
+                return api_token
+    except Exception:
+        pass
+
+    return ""


 # Providers where models.dev is treated as authoritative: curated static
@@ -1884,6 +2000,18 @@ def provider_model_ids(provider: Optional[str], *, force_refresh: bool = False)
            live = fetch_api_models(api_key, base_url)
            if live:
                return live
+    # Bedrock uses live discovery keyed by the resolved AWS region so that
+    # EU/AP users see eu.*/ap.* model IDs instead of the static us.* list.
+    # Note: early return intentionally skips _MODELS_DEV_PREFERRED merge
+    # below — bedrock is not expected to appear in that table.
+    if normalized == "bedrock":
+        try:
+            from agent.bedrock_adapter import bedrock_model_ids_or_none
+            ids = bedrock_model_ids_or_none()
+            if ids is not None:
+                return ids
+        except Exception:
+            pass
    curated_static = list(_PROVIDER_MODELS.get(normalized, []))
    if normalized in _MODELS_DEV_PREFERRED:
        return _merge_with_models_dev(normalized, curated_static)
@@ -1906,28 +2034,56 @@ def _fetch_anthropic_models(timeout: float = 5.0) -> Optional[list[str]]:
        return None

    headers: dict[str, str] = {"anthropic-version": "2023-06-01"}
-    if _is_oauth_token(token):
+    is_oauth = _is_oauth_token(token)
+    if is_oauth:
        headers["Authorization"] = f"Bearer {token}"
-        from agent.anthropic_adapter import _COMMON_BETAS, _OAUTH_ONLY_BETAS
+        from agent.anthropic_adapter import _COMMON_BETAS, _OAUTH_ONLY_BETAS, _CONTEXT_1M_BETA
        headers["anthropic-beta"] = ",".join(_COMMON_BETAS + _OAUTH_ONLY_BETAS)
    else:
        headers["x-api-key"] = token

-    req = urllib.request.Request(
-        "https://api.anthropic.com/v1/models",
-        headers=headers,
-    )
-    try:
+    def _do_request(h: dict[str, str]):
+        req = urllib.request.Request(
+            "https://api.anthropic.com/v1/models",
+            headers=h,
+        )
        with urllib.request.urlopen(req, timeout=timeout) as resp:
-            data = json.loads(resp.read().decode())
-            models = [m["id"] for m in data.get("data", []) if m.get("id")]
-            # Sort: latest/largest first (opus > sonnet > haiku, higher version first)
-            return sorted(models, key=lambda m: (
-                "opus" not in m,      # opus first
-                "sonnet" not in m,    # then sonnet
-                "haiku" not in m,     # then haiku
-                m,                    # alphabetical within tier
-            ))
+            return json.loads(resp.read().decode())
+
+    try:
+        try:
+            data = _do_request(headers)
+        except urllib.error.HTTPError as http_err:
+            # Reactive recovery for OAuth subscriptions that reject the 1M
+            # context beta with 400 "long context beta is not yet available
+            # for this subscription". Retry once without the beta; re-raise
+            # anything else so the outer except logs it.
+            if (
+                is_oauth
+                and http_err.code == 400
+            ):
+                try:
+                    body_text = http_err.read().decode(errors="ignore").lower()
+                except Exception:
+                    body_text = ""
+                if "long context beta" in body_text and "not yet available" in body_text:
+                    headers["anthropic-beta"] = ",".join(
+                        [b for b in _COMMON_BETAS if b != _CONTEXT_1M_BETA]
+                        + list(_OAUTH_ONLY_BETAS)
+                    )
+                    data = _do_request(headers)
+                else:
+                    raise
+            else:
+                raise
+        models = [m["id"] for m in data.get("data", []) if m.get("id")]
+        # Sort: latest/largest first (opus > sonnet > haiku, higher version first)
+        return sorted(models, key=lambda m: (
+            "opus" not in m,      # opus first
+            "sonnet" not in m,    # then sonnet
+            "haiku" not in m,     # then haiku
+            m,                    # alphabetical within tier
+        ))
    except Exception as e:
        import logging
        logging.getLogger(__name__).debug("Failed to fetch Anthropic models: %s", e)
@@ -2079,6 +2235,228 @@ def _is_github_models_base_url(base_url: Optional[str]) -> bool:
    )


+def _lmstudio_server_root(base_url: Optional[str]) -> Optional[str]:
+    """Strip ``/v1`` suffix from an LM Studio base URL to get the native API root.
+
+    Returns ``None`` when the base URL is empty/invalid.
+    """
+    root = (base_url or "").strip().rstrip("/")
+    if root.endswith("/v1"):
+        root = root[:-3].rstrip("/")
+    return root or None
+
+
+def _lmstudio_request_headers(api_key: Optional[str] = None) -> dict:
+    """Build HTTP headers for LM Studio native API requests."""
+    headers = {"User-Agent": _HERMES_USER_AGENT}
+    token = str(api_key or "").strip()
+    if token:
+        headers["Authorization"] = f"Bearer {token}"
+    return headers
+
+
+def _lmstudio_fetch_raw_models(
+    api_key: Optional[str] = None,
+    base_url: Optional[str] = None,
+    timeout: float = 5.0,
+) -> Optional[list[dict]]:
+    """Fetch the raw model list from LM Studio's ``/api/v1/models``.
+
+    Returns the ``models`` list of dicts on success, ``None`` on network
+    errors or malformed responses.  Raises ``AuthError`` on HTTP 401/403.
+    """
+    server_root = _lmstudio_server_root(base_url)
+    if not server_root:
+        return None
+
+    headers = _lmstudio_request_headers(api_key)
+    request = urllib.request.Request(server_root + "/api/v1/models", headers=headers)
+    try:
+        with urllib.request.urlopen(request, timeout=timeout) as resp:
+            payload = json.loads(resp.read().decode())
+    except urllib.error.HTTPError as exc:
+        if exc.code in (401, 403):
+            from hermes_cli.auth import AuthError
+            raise AuthError(
+                f"LM Studio rejected the request with HTTP {exc.code}.",
+                provider="lmstudio",
+                code="auth_rejected",
+            ) from exc
+        import logging
+        logging.getLogger(__name__).debug(
+            "LM Studio probe at %s failed with HTTP %s", server_root, exc.code,
+        )
+        return None
+    except Exception as exc:
+        import logging
+        logging.getLogger(__name__).debug(
+            "LM Studio probe at %s failed: %s", server_root, exc,
+        )
+        return None
+
+    raw_models = payload.get("models") if isinstance(payload, dict) else None
+    if not isinstance(raw_models, list):
+        import logging
+        logging.getLogger(__name__).debug(
+            "LM Studio probe at %s returned malformed payload (no `models` list)",
+            server_root,
+        )
+        return None
+    return raw_models
+
+
+def probe_lmstudio_models(
+    api_key: Optional[str] = None,
+    base_url: Optional[str] = None,
+    timeout: float = 5.0,
+) -> Optional[list[str]]:
+    """Probe LM Studio's model listing.
+
+    Returns chat-capable model keys on success, including the valid empty-list
+    case when the server is reachable but has no non-embedding models.
+    Returns ``None`` on network errors, malformed responses, or empty/invalid
+    base URLs.
+
+    Raises ``AuthError`` on HTTP 401/403 so callers can surface token issues
+    separately from reachability problems.
+    """
+    raw_models = _lmstudio_fetch_raw_models(api_key=api_key, base_url=base_url, timeout=timeout)
+    if raw_models is None:
+        return None
+
+    keys: list[str] = []
+    for raw in raw_models:
+        if not isinstance(raw, dict):
+            continue
+        if str(raw.get("type") or "").strip().lower() == "embedding":
+            continue
+        key = str(raw.get("key") or raw.get("id") or "").strip()
+        if key and key not in keys:
+            keys.append(key)
+    return keys
+
+
+def fetch_lmstudio_models(
+    api_key: Optional[str] = None,
+    base_url: Optional[str] = None,
+    timeout: float = 5.0,
+) -> list[str]:
+    """Fetch LM Studio chat-capable model keys from native ``/api/v1/models``.
+
+    Returns a list of model keys (e.g. ``publisher/model-name``) with embedding
+    models filtered out. Returns an empty list on network errors, malformed
+    responses, or empty/invalid base URLs.
+
+    Raises ``AuthError`` on HTTP 401/403 so callers can distinguish a missing
+    or wrong ``LM_API_KEY`` from an unreachable server — the most common
+    LM Studio support case once auth-enabled mode is turned on.
+    """
+    models = probe_lmstudio_models(api_key=api_key, base_url=base_url, timeout=timeout)
+    return models or []
+
+
+def ensure_lmstudio_model_loaded(
+    model: str,
+    base_url: Optional[str],
+    api_key: Optional[str],
+    target_context_length: int,
+    timeout: float = 120.0,
+) -> Optional[int]:
+    """Ensure LM Studio has ``model`` loaded with at least ``target_context_length``.
+
+    No-op when an instance is already loaded with sufficient context. Otherwise
+    POSTs ``/api/v1/models/load`` to (re)load with the target context, capped
+    at the model's ``max_context_length``. Returns the resolved loaded context
+    length, or ``None`` when the probe / load failed.
+    """
+    server_root = _lmstudio_server_root(base_url)
+    if not server_root:
+        return None
+
+    headers = _lmstudio_request_headers(api_key)
+
+    try:
+        raw_models = _lmstudio_fetch_raw_models(api_key=api_key, base_url=base_url, timeout=10)
+    except Exception:
+        raw_models = None
+    if raw_models is None:
+        return None
+
+    target_entry = None
+    for raw in raw_models:
+        if not isinstance(raw, dict):
+            continue
+        if raw.get("key") == model or raw.get("id") == model:
+            target_entry = raw
+            break
+    if target_entry is None:
+        return None
+
+    max_ctx = target_entry.get("max_context_length")
+    if isinstance(max_ctx, int) and max_ctx > 0:
+        target_context_length = min(target_context_length, max_ctx)
+
+    for inst in target_entry.get("loaded_instances") or []:
+        cfg = inst.get("config") if isinstance(inst, dict) else None
+        loaded_ctx = cfg.get("context_length") if isinstance(cfg, dict) else None
+        if isinstance(loaded_ctx, int) and loaded_ctx >= target_context_length:
+            return loaded_ctx
+
+    body = json.dumps({
+        "model": model,
+        "context_length": target_context_length,
+    }).encode()
+    load_headers = dict(headers)
+    load_headers["Content-Type"] = "application/json"
+    try:
+        with urllib.request.urlopen(
+            urllib.request.Request(
+                server_root + "/api/v1/models/load",
+                data=body,
+                headers=load_headers,
+                method="POST",
+            ),
+            timeout=timeout,
+        ) as resp:
+            resp.read()
+    except Exception:
+        return None
+    return target_context_length
+
+
+def lmstudio_model_reasoning_options(
+    model: str,
+    base_url: Optional[str],
+    api_key: Optional[str] = None,
+    timeout: float = 5.0,
+) -> list[str]:
+    """Return the reasoning ``allowed_options`` LM Studio publishes for ``model``.
+
+    Pulls ``capabilities.reasoning.allowed_options`` from ``/api/v1/models``.
+    Returns ``[]`` when the model is unknown, the endpoint is unreachable,
+    or the model does not declare a reasoning capability.
+    """
+    try:
+        raw_models = _lmstudio_fetch_raw_models(api_key=api_key, base_url=base_url, timeout=timeout)
+    except Exception:
+        raw_models = None
+    if not raw_models:
+        return []
+
+    for raw in raw_models:
+        if not isinstance(raw, dict):
+            continue
+        if raw.get("key") != model and raw.get("id") != model:
+            continue
+        caps = raw.get("capabilities")
+        reasoning = caps.get("reasoning") if isinstance(caps, dict) else None
+        opts = reasoning.get("allowed_options") if isinstance(reasoning, dict) else None
+        if isinstance(opts, list):
+            return [str(o).strip().lower() for o in opts if isinstance(o, str)]
+        return []
+    return []
+
+
 def _fetch_github_models(api_key: Optional[str] = None, timeout: float = 5.0) -> Optional[list[str]]:
    catalog = fetch_github_model_catalog(api_key=api_key, timeout=timeout)
    if not catalog:
@@ -2674,6 +3052,40 @@ def validate_requested_model(
            "message": "Model names cannot contain spaces.",
        }

+    if normalized == "lmstudio":
+        from hermes_cli.auth import AuthError
+        # Use probe_lmstudio_models so we can distinguish None (unreachable
+        # / malformed response) from [] (reachable, but no chat-capable models
+        # are loaded). fetch_lmstudio_models collapses both to [].
+        try:
+            models = probe_lmstudio_models(api_key=api_key, base_url=base_url)
+        except AuthError as exc:
+            return {
+                "accepted": False, "persist": False, "recognized": False,
+                "message": (
+                    f"{exc} Set `LM_API_KEY` (or update it) to match the server's bearer token."
+                ),
+            }
+        if models is None:
+            return {
+                "accepted": False, "persist": False, "recognized": False,
+                "message": f"Could not reach LM Studio's `/api/v1/models` to validate `{requested}`.",
+            }
+        if not models:
+            return {
+                "accepted": False, "persist": False, "recognized": False,
+                "message": (
+                    f"LM Studio is reachable but no chat-capable models are loaded. "
+                    f"Load `{requested}` in LM Studio (Developer tab → Load Model) and try again."
+                ),
+            }
+        if requested_for_lookup in set(models):
+            return {"accepted": True, "persist": True, "recognized": True, "message": None}
+        return {
+            "accepted": False, "persist": False, "recognized": False,
+            "message": f"Model `{requested}` was not found in LM Studio's model listing.",
+        }
+
    if normalized == "custom":
        # Try probing with correct auth for the api_mode.
        if api_mode == "anthropic_messages":
@@ -3,7 +3,8 @@
 Bypasses cli.py entirely.  No banner, no spinner, no session_id line,
 no stderr chatter.  Just the agent's final text to stdout.

-Toolsets = whatever the user has configured for "cli" in `hermes tools`.
+Toolsets = explicit --toolsets when provided, otherwise whatever the user has
+configured for "cli" in `hermes tools`.
 Rules / memory / AGENTS.md / preloaded skills = same as a normal chat turn.
 Approvals = auto-bypassed (HERMES_YOLO_MODE=1 is set for the call).
 Working directory = the user's CWD (AGENTS.md etc. resolve from there as usual).
@@ -28,10 +29,103 @@ from contextlib import redirect_stderr, redirect_stdout
 from typing import Optional


+def _normalize_toolsets(toolsets: object = None) -> list[str] | None:
+    if not toolsets:
+        return None
+
+    raw_items = [toolsets] if isinstance(toolsets, str) else toolsets
+    if not isinstance(raw_items, (list, tuple)):
+        raw_items = [raw_items]
+
+    normalized: list[str] = []
+    for item in raw_items:
+        if isinstance(item, str):
+            normalized.extend(part.strip() for part in item.split(","))
+        else:
+            normalized.append(str(item).strip())
+
+    return [item for item in normalized if item] or None
+
+
+def _validate_explicit_toolsets(toolsets: object = None) -> tuple[list[str] | None, str | None]:
+    normalized = _normalize_toolsets(toolsets)
+    if normalized is None:
+        return None, None
+
+    try:
+        from toolsets import validate_toolset
+    except Exception as exc:
+        return None, f"hermes -z: failed to validate --toolsets: {exc}\n"
+
+    built_in = [name for name in normalized if validate_toolset(name)]
+    unresolved = [name for name in normalized if name not in built_in]
+
+    if unresolved:
+        try:
+            from hermes_cli.plugins import discover_plugins
+
+            discover_plugins()
+            plugin_valid = [name for name in unresolved if validate_toolset(name)]
+        except Exception:
+            plugin_valid = []
+
+        if plugin_valid:
+            built_in.extend(plugin_valid)
+            unresolved = [name for name in unresolved if name not in plugin_valid]
+
+    if any(name in {"all", "*"} for name in built_in):
+        ignored = [name for name in normalized if name not in {"all", "*"}]
+        if ignored:
+            sys.stderr.write(
+                "hermes -z: --toolsets all enables every toolset; "
+                f"ignoring additional entries: {', '.join(ignored)}\n"
+            )
+        return None, None
+
+    mcp_names: set[str] = set()
+    mcp_disabled: set[str] = set()
+    if unresolved:
+        try:
+            from hermes_cli.config import read_raw_config
+            from hermes_cli.tools_config import _parse_enabled_flag
+
+            cfg = read_raw_config()
+            mcp_servers = cfg.get("mcp_servers") if isinstance(cfg.get("mcp_servers"), dict) else {}
+            for name, server_cfg in mcp_servers.items():
+                if not isinstance(server_cfg, dict):
+                    continue
+                if _parse_enabled_flag(server_cfg.get("enabled", True), default=True):
+                    mcp_names.add(str(name))
+                else:
+                    mcp_disabled.add(str(name))
+        except Exception:
+            mcp_names = set()
+            mcp_disabled = set()
+
+    mcp_valid = [name for name in unresolved if name in mcp_names]
+    disabled = [name for name in unresolved if name in mcp_disabled]
+    unknown = [name for name in unresolved if name not in mcp_names and name not in mcp_disabled]
+    valid = built_in + mcp_valid
+
+    if unknown:
+        sys.stderr.write(f"hermes -z: ignoring unknown --toolsets entries: {', '.join(unknown)}\n")
+    if disabled:
+        sys.stderr.write(
+            "hermes -z: ignoring disabled MCP servers (set enabled: true in config.yaml to use): "
+            f"{', '.join(disabled)}\n"
+        )
+
+    if not valid:
+        return None, "hermes -z: --toolsets did not contain any valid toolsets.\n"
+
+    return valid, None
+
+
 def run_oneshot(
    prompt: str,
    model: Optional[str] = None,
    provider: Optional[str] = None,
+    toolsets: object = None,
 ) -> int:
    """Execute a single prompt and print only the final content block.

@@ -42,6 +136,7 @@ def run_oneshot(
        provider: Optional provider override. Falls back to
            HERMES_INFERENCE_PROVIDER env var, then config.yaml's model.provider,
            then "auto".
+        toolsets: Optional comma-separated string or iterable of toolsets.

    Returns the exit code.  Caller should sys.exit() with the return.
    """
@@ -65,6 +160,12 @@ def run_oneshot(
        )
        return 2

+    explicit_toolsets, toolsets_error = _validate_explicit_toolsets(toolsets)
+    if toolsets_error:
+        sys.stderr.write(toolsets_error)
+        return 2
+    use_config_toolsets = _normalize_toolsets(toolsets) is None
+
    # Auto-approve any shell / tool approvals.  Non-interactive by
    # definition — a prompt would hang forever.
    os.environ["HERMES_YOLO_MODE"] = "1"
@@ -77,7 +178,13 @@ def run_oneshot(

    try:
        with redirect_stdout(devnull), redirect_stderr(devnull):
-            response = _run_agent(prompt, model=model, provider=provider)
+            response = _run_agent(
+                prompt,
+                model=model,
+                provider=provider,
+                toolsets=explicit_toolsets,
+                use_config_toolsets=use_config_toolsets,
+            )
    finally:
        try:
            devnull.close()
@@ -96,6 +203,8 @@ def _run_agent(
    prompt: str,
    model: Optional[str] = None,
    provider: Optional[str] = None,
+    toolsets: object = None,
+    use_config_toolsets: bool = True,
 ) -> str:
    """Build an AIAgent exactly like a normal CLI chat turn would, then
    run a single conversation.  Returns the final response string."""
@@ -128,32 +237,52 @@ def _run_agent(
    # the user's configured default provider, which may not host the model
    # the caller just asked for.
    effective_provider = (provider or "").strip() or None
+    explicit_base_url_from_alias: Optional[str] = None
    if effective_provider is None and (model or env_model):
        # Only auto-detect when the model was explicitly requested via arg or
        # env var (not when it came from config — that's the "use my defaults"
        # path and the configured provider is already correct).
        explicit_model = (model or "").strip() or env_model
        if explicit_model:
-            cfg_provider = ""
-            if isinstance(model_cfg, dict):
-                cfg_provider = str(model_cfg.get("provider") or "").strip().lower()
-            current_provider = (
-                cfg_provider
-                or os.getenv("HERMES_INFERENCE_PROVIDER", "").strip().lower()
-                or "auto"
-            )
-            detected = detect_provider_for_model(explicit_model, current_provider)
-            if detected:
-                effective_provider, effective_model = detected
+            # First check DIRECT_ALIASES populated from config.yaml `model_aliases:`.
+            # These map a user-defined alias to (model, provider, base_url) for
+            # endpoints not in any catalog (local servers, custom proxies, etc.).
+            try:
+                from hermes_cli import model_switch as _ms
+                _ms._ensure_direct_aliases()
+                direct = _ms.DIRECT_ALIASES.get(explicit_model.strip().lower())
+            except Exception:
+                direct = None
+            if direct is not None:
+                effective_model = direct.model
+                effective_provider = direct.provider
+                if direct.base_url:
+                    explicit_base_url_from_alias = direct.base_url.rstrip("/")
+            else:
+                cfg_provider = ""
+                if isinstance(model_cfg, dict):
+                    cfg_provider = str(model_cfg.get("provider") or "").strip().lower()
+                current_provider = (
+                    cfg_provider
+                    or os.getenv("HERMES_INFERENCE_PROVIDER", "").strip().lower()
+                    or "auto"
+                )
+                detected = detect_provider_for_model(explicit_model, current_provider)
+                if detected:
+                    effective_provider, effective_model = detected

    runtime = resolve_runtime_provider(
        requested=effective_provider,
        target_model=effective_model or None,
+        explicit_base_url=explicit_base_url_from_alias,
    )

-    # Pull in whatever toolsets the user has enabled for "cli".
-    # sorted() gives stable ordering; set→list for AIAgent's signature.
-    toolsets_list = sorted(_get_platform_tools(cfg, "cli"))
+    # Pull in explicit toolsets when provided; otherwise use whatever the user
+    # has enabled for "cli". sorted() gives stable ordering for config-derived
+    # sets; explicit values preserve user order.
+    toolsets_list = _normalize_toolsets(toolsets)
+    if toolsets_list is None and use_config_toolsets:
+        toolsets_list = sorted(_get_platform_tools(cfg, "cli"))

    agent = AIAgent(
        api_key=runtime.get("api_key"),
@@ -44,6 +44,40 @@ PLATFORMS: OrderedDict[str, PlatformInfo] = OrderedDict([


 def platform_label(key: str, default: str = "") -> str:
-    """Return the display label for a platform key, or *default*."""
+    """Return the display label for a platform key, or *default*.
+
+    Checks the static PLATFORMS dict first, then the plugin platform
+    registry for dynamically registered platforms.
+    """
    info = PLATFORMS.get(key)
-    return info.label if info is not None else default
+    if info is not None:
+        return info.label
+    # Check plugin registry
+    try:
+        from gateway.platform_registry import platform_registry
+        entry = platform_registry.get(key)
+        if entry:
+            return f"{entry.emoji}  {entry.label}" if entry.emoji else entry.label
+    except Exception:
+        pass
+    return default
+
+
+def get_all_platforms() -> "OrderedDict[str, PlatformInfo]":
+    """Return PLATFORMS merged with any plugin-registered platforms.
+
+    Plugin platforms are appended after builtins.  This is the function
+    that tools_config and skills_config should use for platform menus.
+    """
+    merged = OrderedDict(PLATFORMS)
+    try:
+        from gateway.platform_registry import platform_registry
+        for entry in platform_registry.plugin_entries():
+            if entry.name not in merged:
+                merged[entry.name] = PlatformInfo(
+                    label=f"{entry.emoji}  {entry.label}" if entry.emoji else entry.label,
+                    default_toolset=f"hermes-{entry.name}",
+                )
+    except Exception:
+        pass
+    return merged
--- a/Show More
+++ b/Show More