feat: add /recap slash command — summarize recent session activity

Inspired by Claude Code's /recap (v2.1.114, April 2026). Produces a compact text summary of recent activity in the current session: turn counts, tools used, files touched, last user ask, and last assistant reply. Useful when juggling multiple sessions or returning to a session after being away. Implementation notes: - Pure local computation from the in-memory conversation history / gateway transcript. No LLM call, no auxiliary model, no prompt-cache invalidation — a recap should be instant and free. - Works unchanged on CLI and every gateway platform (Telegram, Discord, Slack, …) via a shared hermes_cli.session_recap.build_recap helper. Claude Code only ships this on the CLI. - Tailored to hermes-agent's tool vocabulary: file-editing tools (patch, write_file, read_file, skill_manage, skill_view) surface touched paths; tool-call counts highlight which classes of work drove the session. - Added to ACTIVE_SESSION_BYPASS_COMMANDS and the Level-2 early intercept in gateway/run.py so /recap works while an agent is running (read-only, safe). Source: https://code.claude.com/docs/en/whats-new/2026-w17
chore: add contributors to AUTHOR_MAP for Slack batch salvage
2026-05-01 17:10:46 -07:00 · 2026-05-01 14:01:26 -07:00 · 2026-05-01 14:01:26 -07:00 · 2026-05-01 14:01:26 -07:00 · 2026-05-01 14:01:26 -07:00 · 2026-05-01 14:01:26 -07:00
763 changed files with 101821 additions and 5406 deletions
@@ -9,6 +9,12 @@ node_modules
 .venv
 **/.venv

+# Built artifacts that are regenerated inside the image.  Excluded so local
+# rebuilds on the developer's machine don't invalidate the npm-install layer
+# that now depends on the full ui-tui/packages/hermes-ink/ tree being present.
+ui-tui/dist/
+ui-tui/packages/hermes-ink/dist/
+
 # CI/CD
 .github

@@ -398,3 +398,19 @@ IMAGE_TOOLS_DEBUG=false
 # Override STT provider endpoints (for proxies or self-hosted instances)
 # GROQ_BASE_URL=https://api.groq.com/openai/v1
 # STT_OPENAI_BASE_URL=https://api.openai.com/v1
+
+# =============================================================================
+# MICROSOFT TEAMS INTEGRATION
+# =============================================================================
+# Register a Bot in Azure: https://dev.botframework.com/ → "Register a bot"
+# Or use Azure Portal: Azure Active Directory → App registrations → New registration
+# Then add the bot to Teams via the Bot Framework or App Studio.
+#
+# TEAMS_CLIENT_ID=                     # Azure AD App (client) ID
+# TEAMS_CLIENT_SECRET=                 # Azure AD client secret value
+# TEAMS_TENANT_ID=                     # Azure AD tenant ID (or "common" for multi-tenant)
+# TEAMS_ALLOWED_USERS=                 # Comma-separated AAD object IDs or UPNs
+# TEAMS_ALLOW_ALL_USERS=false          # Set true to skip the allowlist
+# TEAMS_HOME_CHANNEL=                  # Default channel/chat ID for cron delivery
+# TEAMS_HOME_CHANNEL_NAME=             # Display name for the home channel
+# TEAMS_PORT=3978                      # Webhook listen port (Bot Framework default)
@@ -1,8 +1,18 @@
 name: 'Setup Nix'
-description: 'Install Nix with DeterminateSystems and enable magic-nix-cache'
+description: 'Install Nix and configure Cachix binary cache'
+
+inputs:
+  cachix-auth-token:
+    description: 'Cachix auth token (enables push). Omit for read-only.'
+    required: false
+    default: ''

 runs:
  using: composite
  steps:
    - uses: DeterminateSystems/nix-installer-action@ef8a148080ab6020fd15196c2084a2eea5ff2d25 # v22
-    - uses: DeterminateSystems/magic-nix-cache-action@565684385bcd71bad329742eefe8d12f2e765b39 # v13
+    - uses: cachix/cachix-action@1eb2ef646ac0255473d23a5907ad7b04ce94065c # v17
+      with:
+        name: hermes-agent
+        authToken: ${{ inputs.cachix-auth-token }}
+      continue-on-error: true
@@ -76,6 +76,16 @@ jobs:
        run: |
          mkdir -p _site/docs
          cp -r website/build/* _site/docs/
+          # llms.txt / llms-full.txt are also published at the site root
+          # (https://hermes-agent.nousresearch.com/llms.txt) because some
+          # agents and IDE plugins probe the classic root-level path rather
+          # than /docs/llms.txt. Same file, two URLs, one source of truth.
+          if [ -f website/build/llms.txt ]; then
+            cp website/build/llms.txt _site/llms.txt
+          fi
+          if [ -f website/build/llms-full.txt ]; then
+            cp website/build/llms-full.txt _site/llms-full.txt
+          fi

      - name: Upload artifact
        uses: actions/upload-pages-artifact@56afc609e74202658d3ffba0e8f6dda462b719fa  # v3
@@ -1,74 +0,0 @@
-name: Nix Lockfile Check
-
-on:
-  pull_request:
-  workflow_dispatch:
-
-permissions:
-  contents: read
-  pull-requests: write
-
-concurrency:
-  group: nix-lockfile-check-${{ github.ref }}
-  cancel-in-progress: true
-
-jobs:
-  nix-lockfile-check:
-    runs-on: ubuntu-latest
-    timeout-minutes: 20
-    steps:
-      - uses: actions/checkout@34e114876b0b11c390a56381ad16ebd13914f8d5  # v4
-
-      - uses: ./.github/actions/nix-setup
-
-      - name: Resolve head SHA
-        id: sha
-        shell: bash
-        run: |
-          FULL="${{ github.event.pull_request.head.sha || github.sha }}"
-          echo "full=$FULL" >> "$GITHUB_OUTPUT"
-          echo "short=${FULL:0:7}" >> "$GITHUB_OUTPUT"
-
-      - name: Check lockfile hashes
-        id: check
-        continue-on-error: true
-        env:
-          LINK_SHA: ${{ steps.sha.outputs.full }}
-        run: nix run .#fix-lockfiles -- --check
-
-      - name: Fail if check crashed without reporting
-        if: steps.check.outputs.stale != 'true' && steps.check.outputs.stale != 'false'
-        run: |
-          echo "::error::fix-lockfiles exited without reporting stale status — likely an infrastructure or script failure"
-          exit 1
-
-      - name: Post sticky PR comment (stale)
-        if: steps.check.outputs.stale == 'true' && github.event_name == 'pull_request'
-        uses: marocchino/sticky-pull-request-comment@52423e01640425a022ef5fd42c6fb5f633a02728  # v2.9.1
-        with:
-          header: nix-lockfile-check
-          message: |
-            ### ⚠️ npm lockfile hash out of date
-
-            Checked against commit [`${{ steps.sha.outputs.short }}`](${{ github.server_url }}/${{ github.repository }}/commit/${{ steps.sha.outputs.full }}) (PR head at check time).
-
-            The `hash = "sha256-..."` line in these nix files no longer matches the committed `package-lock.json`:
-
-            ${{ steps.check.outputs.report }}
-
-            #### Apply the fix
-
-            - [ ] **Apply lockfile fix** — tick to push a commit with the correct hashes to this PR branch
-            - Or [run the Nix Lockfile Fix workflow](${{ github.server_url }}/${{ github.repository }}/actions/workflows/nix-lockfile-fix.yml) manually (pass PR `#${{ github.event.pull_request.number }}`)
-            - Or locally: `nix run .#fix-lockfiles -- --apply` and commit the diff
-
-      - name: Clear sticky PR comment (resolved)
-        if: steps.check.outputs.stale == 'false' && github.event_name == 'pull_request'
-        uses: marocchino/sticky-pull-request-comment@52423e01640425a022ef5fd42c6fb5f633a02728  # v2.9.1
-        with:
-          header: nix-lockfile-check
-          delete: true
-
-      - name: Fail if stale
-        if: steps.check.outputs.stale == 'true'
-        run: exit 1
@@ -28,7 +28,7 @@ concurrency:
 jobs:
  # ── Auto-fix on main ───────────────────────────────────────────────
  # Fires when a push to main touches package.json or package-lock.json
-  # in ui-tui/ or web/. Runs fix-lockfiles --apply and pushes the hash
+  # in ui-tui/ or web/. Runs fix-lockfiles and pushes the hash
  # update commit directly to main so Nix builds never stay broken.
  #
  # Safety invariants:
@@ -62,6 +62,8 @@ jobs:
          token: ${{ steps.app-token.outputs.token }}

      - uses: ./.github/actions/nix-setup
+        with:
+          cachix-auth-token: ${{ secrets.CACHIX_AUTH_TOKEN }}

      - name: Apply lockfile hashes
        id: apply
@@ -200,10 +202,12 @@ jobs:
          fetch-depth: 0

      - uses: ./.github/actions/nix-setup
+        with:
+          cachix-auth-token: ${{ secrets.CACHIX_AUTH_TOKEN }}

      - name: Apply lockfile hashes
        id: apply
-        run: nix run .#fix-lockfiles -- --apply
+        run: nix run .#fix-lockfiles

      - name: Commit & push
        if: steps.apply.outputs.changed == 'true'
@@ -7,6 +7,7 @@ on:

 permissions:
  contents: read
+  pull-requests: write

 concurrency:
  group: nix-${{ github.ref }}
@@ -22,12 +23,95 @@ jobs:
    steps:
      - uses: actions/checkout@34e114876b0b11c390a56381ad16ebd13914f8d5 # v4
      - uses: ./.github/actions/nix-setup
+        with:
+          cachix-auth-token: ${{ secrets.CACHIX_AUTH_TOKEN }}
+
+      - name: Resolve head SHA
+        if: github.event_name == 'pull_request'
+        id: sha
+        shell: bash
+        run: |
+          FULL="${{ github.event.pull_request.head.sha || github.sha }}"
+          echo "full=$FULL" >> "$GITHUB_OUTPUT"
+          echo "short=${FULL:0:7}" >> "$GITHUB_OUTPUT"
+
      - name: Check flake
+        id: flake
        if: runner.os == 'Linux'
+        continue-on-error: true
        run: nix flake check --print-build-logs
+
      - name: Build package
+        id: build
        if: runner.os == 'Linux'
+        continue-on-error: true
        run: nix build --print-build-logs
+
+      # When the real Nix build fails, run a targeted diagnostic to see if
+      # the failure is specifically a stale npm lockfile hash in one of the
+      # known npm subpackages (tui / web).  This avoids surfacing a generic
+      # "build failed" message when the fix is a single known command.
+      - name: Diagnose npm lockfile hashes
+        id: hash_check
+        if: (steps.flake.outcome == 'failure' || steps.build.outcome == 'failure') && runner.os == 'Linux'
+        continue-on-error: true
+        env:
+          LINK_SHA: ${{ steps.sha.outputs.full }}
+        run: nix run .#fix-lockfiles -- --check
+
+      # If fix-lockfiles itself crashes (infrastructure blip, cache throttle,
+      # etc.) it won't set stale=true/false.  Treat that as a distinct failure
+      # mode rather than silently ignoring it.
+      - name: Fail if hash check crashed without reporting
+        if: steps.hash_check.outcome == 'failure' && steps.hash_check.outputs.stale != 'true' && steps.hash_check.outputs.stale != 'false'
+        run: |
+          echo "::error::fix-lockfiles exited without reporting stale status — likely an infrastructure or script failure"
+          exit 1
+
+      - name: Post sticky PR comment (stale hashes)
+        if: steps.hash_check.outputs.stale == 'true' && github.event_name == 'pull_request'
+        uses: marocchino/sticky-pull-request-comment@52423e01640425a022ef5fd42c6fb5f633a02728  # v2.9.1
+        with:
+          header: nix-lockfile-check
+          message: |
+            ### ⚠️ npm lockfile hash out of date
+
+            Checked against commit [`${{ steps.sha.outputs.short }}`](${{ github.server_url }}/${{ github.repository }}/commit/${{ steps.sha.outputs.full }}) (PR head at check time).
+
+            The `hash = "sha256-..."` line in these nix files no longer matches the committed `package-lock.json`:
+
+            ${{ steps.hash_check.outputs.report }}
+
+            #### Apply the fix
+
+            - [ ] **Apply lockfile fix** — tick to push a commit with the correct hashes to this PR branch
+            - Or [run the Nix Lockfile Fix workflow](${{ github.server_url }}/${{ github.repository }}/actions/workflows/nix-lockfile-fix.yml) manually (pass PR `#${{ github.event.pull_request.number }}`)
+            - Or locally: `nix run .#fix-lockfiles` and commit the diff
+
+      # Clear the sticky comment when either the build passed outright (no
+      # hash check needed) or the hash check explicitly returned stale=false
+      # (build failed for a non-hash reason).
+      - name: Clear sticky PR comment (resolved)
+        if: |
+          github.event_name == 'pull_request' &&
+          runner.os == 'Linux' &&
+          (steps.hash_check.outputs.stale == 'false' ||
+           (steps.flake.outcome == 'success' && steps.build.outcome == 'success'))
+        uses: marocchino/sticky-pull-request-comment@52423e01640425a022ef5fd42c6fb5f633a02728  # v2.9.1
+        with:
+          header: nix-lockfile-check
+          delete: true
+
+      - name: Final fail if build or flake failed
+        if: steps.flake.outcome == 'failure' || steps.build.outcome == 'failure'
+        run: |
+          if [ "${{ steps.hash_check.outputs.stale }}" == "true" ]; then
+            echo "::error::Nix build failed due to stale npm lockfile hash. Run: nix run .#fix-lockfiles"
+          else
+            echo "::error::Nix build/flake check failed. See logs above."
+          fi
+          exit 1
+
      - name: Evaluate flake (macOS)
        if: runner.os == 'macOS'
        run: nix flake show --json > /dev/null
@@ -494,7 +494,7 @@ branding:
  agent_name: "My Agent"
  welcome: "Welcome message"
  response_label: " ⚔ Agent "
-  prompt_symbol: "⚔ ❯ "
+  prompt_symbol: "⚔"

 tool_prefix: "╎"             # Tool output line prefix
 ```
@@ -14,7 +14,7 @@ ENV PLAYWRIGHT_BROWSERS_PATH=/opt/hermes/.playwright
 # that would otherwise accumulate when hermes runs as PID 1. See #15012.
 RUN apt-get update && \
    apt-get install -y --no-install-recommends \
-    build-essential nodejs npm python3 ripgrep ffmpeg gcc python3-dev libffi-dev procps git openssh-client docker-cli tini && \
+    build-essential curl nodejs npm python3 ripgrep ffmpeg gcc python3-dev libffi-dev procps git openssh-client docker-cli tini && \
    rm -rf /var/lib/apt/lists/*

 # Non-root user for runtime; UID can be overridden via HERMES_UID at runtime
@@ -28,10 +28,26 @@ WORKDIR /opt/hermes
 # ---------- Layer-cached dependency install ----------
 # Copy only package manifests first so npm install + Playwright are cached
 # unless the lockfiles themselves change.
+#
+# ui-tui/packages/hermes-ink/ is copied IN FULL (not just its manifests)
+# because it is referenced as a `file:` workspace dependency from
+# ui-tui/package.json.  Copying the tree up front lets npm resolve the
+# workspace to real content instead of stopping at a bare package.json.
 COPY package.json package-lock.json ./
 COPY web/package.json web/package-lock.json web/
 COPY ui-tui/package.json ui-tui/package-lock.json ui-tui/
-COPY ui-tui/packages/hermes-ink/package.json ui-tui/packages/hermes-ink/package-lock.json ui-tui/packages/hermes-ink/
+COPY ui-tui/packages/hermes-ink/ ui-tui/packages/hermes-ink/
+
+# `npm_config_install_links=false` forces npm to install `file:` deps as
+# symlinks (the npm 10+ default) even on Debian's older bundled npm 9.x,
+# which defaults to `install-links=true` and installs file deps as *copies*.
+# The host-side package-lock.json is generated with a newer npm that uses
+# symlinks, so an install-as-copy produces a hidden node_modules/.package-lock.json
+# that permanently disagrees with the root lock on the @hermes/ink entry.
+# That disagreement trips the TUI launcher's `_tui_need_npm_install()`
+# check on every startup and triggers a runtime `npm install` that then
+# fails with EACCES (node_modules/ is root-owned from build time).
+ENV npm_config_install_links=false

 RUN npm install --prefer-offline --no-audit && \
    npx playwright install --with-deps chromium --only-shell && \
@@ -45,13 +61,7 @@ COPY --chown=hermes:hermes . .

 # Build browser dashboard and terminal UI assets.
 RUN cd web && npm run build && \
-    cd ../ui-tui && npm run build && \
-    rm -rf node_modules/@hermes/ink && \
-    rm -rf packages/hermes-ink/node_modules && \
-    cp -R packages/hermes-ink node_modules/@hermes/ink && \
-    npm install --omit=dev --prefer-offline --no-audit --prefix node_modules/@hermes/ink && \
-    rm -rf node_modules/@hermes/ink/node_modules/react && \
-    node --input-type=module -e "await import('@hermes/ink')"
+    cd ../ui-tui && npm run build

 # ---------- Permissions ----------
 # Make install dir world-readable so any HERMES_UID can read it at runtime.
@@ -0,0 +1,505 @@
+# Hermes Agent v0.12.0 (v2026.4.30)
+
+**Release Date:** April 30, 2026
+**Since v0.11.0:** 1,096 commits · 550 merged PRs · 1,270 files changed · 217,776 insertions · 213 community contributors (including co-authors)
+
+> The Curator release — Hermes Agent now maintains itself. An autonomous background Curator grades, prunes, and consolidates your skill library on its own schedule. The self-improvement loop that reviews what to save got a substantial upgrade. Four new inference providers, a 18th messaging platform, a 19th via Teams plugin, native Spotify + Google Meet integrations, ComfyUI and TouchDesigner-MCP moved from optional to bundled-by-default, and a ~57% cut to visible TUI cold start.
+
+---
+
+## ✨ Highlights
+
+- **Autonomous Curator** — `hermes curator` runs as a background agent on the gateway's cron ticker (7-day cycle default). It grades your skill library, consolidates related skills, prunes dead ones, and writes per-run reports to `logs/curator/run.json` + `REPORT.md`. Archived skills are classified consolidated-vs-pruned via model + heuristic. Defense-in-depth gates protect bundled/hub skills from mutation. Unified under `auxiliary.curator` — pick the curator's model in `hermes model`, manage it from the dashboard. `hermes curator status` ranks skills by usage (most-used / least-used). ([#17277](https://github.com/NousResearch/hermes-agent/pull/17277), [#17307](https://github.com/NousResearch/hermes-agent/pull/17307), [#17941](https://github.com/NousResearch/hermes-agent/pull/17941), [#17868](https://github.com/NousResearch/hermes-agent/pull/17868), [#18033](https://github.com/NousResearch/hermes-agent/pull/18033))
+
+- **Self-improvement loop — substantially upgraded** — The background review fork (the core of Hermes' self-improvement: after each turn it decides what memories/skills to save or update) is now class-first (rubric-based rather than free-form), active-update biased (prefers the skill the agent just loaded), handles `references/`/`templates/` sub-files, and properly inherits the parent's live runtime (provider, model, credentials actually propagate). Restricted to memory + skills toolsets so it can't sprawl. Memory providers shut down cleanly. Prior-turn tool messages excluded from the summary so the fork sees a clean context. ([#16026](https://github.com/NousResearch/hermes-agent/pull/16026), [#17213](https://github.com/NousResearch/hermes-agent/pull/17213), [#16099](https://github.com/NousResearch/hermes-agent/pull/16099), [#16569](https://github.com/NousResearch/hermes-agent/pull/16569), [#16204](https://github.com/NousResearch/hermes-agent/pull/16204), [#15057](https://github.com/NousResearch/hermes-agent/pull/15057))
+
+- **Skill integrations — major expansion** — **ComfyUI v5** with official CLI + REST + hardware-gated local install, moved from optional to **built-in by default** ([#17610](https://github.com/NousResearch/hermes-agent/pull/17610), [#17631](https://github.com/NousResearch/hermes-agent/pull/17631), [#17734](https://github.com/NousResearch/hermes-agent/pull/17734)). **TouchDesigner-MCP** bundled by default, expanded with GLSL, post-FX, audio, geometry, and 9 new reference docs ([#16753](https://github.com/NousResearch/hermes-agent/pull/16753), [#16624](https://github.com/NousResearch/hermes-agent/pull/16624), [#16768](https://github.com/NousResearch/hermes-agent/pull/16768) — @kshitijk4poor + @SHL0MS). **Humanizer** skill ports a text-cleaner that strips AI-isms ([#16787](https://github.com/NousResearch/hermes-agent/pull/16787)). **claude-design** HTML artifact skill + design-md (Google DESIGN.md spec) + airtable salvage + `skill_manage` edits in `external_dirs` + direct-URL skill install + `/reload-skills` slash command. ([#16358](https://github.com/NousResearch/hermes-agent/pull/16358), [#14876](https://github.com/NousResearch/hermes-agent/pull/14876), [#16291](https://github.com/NousResearch/hermes-agent/pull/16291), [#17512](https://github.com/NousResearch/hermes-agent/pull/17512), [#16323](https://github.com/NousResearch/hermes-agent/pull/16323), [#17744](https://github.com/NousResearch/hermes-agent/pull/17744))
+
+- **LM Studio — first-class provider** — upgraded from a custom-endpoint alias to a full-blown native provider: dedicated auth, `hermes doctor` checks, reasoning transport, live `/models` listing. (Salvage of @kshitijk4poor's #17061.) ([#17102](https://github.com/NousResearch/hermes-agent/pull/17102))
+
+- **Four more new inference providers** — **GMI Cloud** (first-class, salvage of #11955 — @isaachuangGMICLOUD), **Azure AI Foundry** with auto-detection, **MiniMax OAuth** with PKCE browser flow (salvage #15203), **Tencent Tokenhub** (salvage of #16860). ([#16663](https://github.com/NousResearch/hermes-agent/pull/16663), [#15845](https://github.com/NousResearch/hermes-agent/pull/15845), [#17524](https://github.com/NousResearch/hermes-agent/pull/17524), [#16960](https://github.com/NousResearch/hermes-agent/pull/16960))
+
+- **Pluggable gateway platforms + Microsoft Teams** — the gateway is now a plugin host. Drop-in messaging adapters live outside the core, and Microsoft Teams is the first plugin-shipped platform. (Salvage of #17664.) ([#17751](https://github.com/NousResearch/hermes-agent/pull/17751), [#17828](https://github.com/NousResearch/hermes-agent/pull/17828))
+
+- **Tencent 元宝 (Yuanbao) — 18th messaging platform** — native gateway adapter with text + media delivery. ([#16298](https://github.com/NousResearch/hermes-agent/pull/16298), [#17424](https://github.com/NousResearch/hermes-agent/pull/17424))
+
+- **Spotify — native tools + bundled skill + wizard** — 7 tools (play, search, queue, playlists, devices) behind PKCE OAuth, interactive setup wizard, bundled skill, surfacing in `hermes tools`, cron usage documented. ([#15121](https://github.com/NousResearch/hermes-agent/pull/15121), [#15130](https://github.com/NousResearch/hermes-agent/pull/15130), [#15154](https://github.com/NousResearch/hermes-agent/pull/15154), [#15180](https://github.com/NousResearch/hermes-agent/pull/15180))
+
+- **Google Meet plugin** — join calls, transcribe, speak, follow up. Realtime OpenAI transport + Node bot server, full pipeline bundled as a plugin. ([#16364](https://github.com/NousResearch/hermes-agent/pull/16364))
+
+- **`hermes -z` one-shot mode + `hermes update --check`** — non-interactive `hermes -z <prompt>` with `--model`/`--provider`/`HERMES_INFERENCE_MODEL`. `hermes update --check` preflight. Opt-in pre-update HERMES_HOME backup. ([#15702](https://github.com/NousResearch/hermes-agent/pull/15702), [#15704](https://github.com/NousResearch/hermes-agent/pull/15704), [#15841](https://github.com/NousResearch/hermes-agent/pull/15841), [#16539](https://github.com/NousResearch/hermes-agent/pull/16539), [#16566](https://github.com/NousResearch/hermes-agent/pull/16566))
+
+- **Models dashboard tab + in-browser model config** — rich per-model analytics, switch main + auxiliary models from the dashboard. ([#17745](https://github.com/NousResearch/hermes-agent/pull/17745), [#17802](https://github.com/NousResearch/hermes-agent/pull/17802))
+
+- **Remote model catalog manifest** — OpenRouter + Nous Portal model catalogs are now pulled from a remote manifest so new models show up without a release. ([#16033](https://github.com/NousResearch/hermes-agent/pull/16033))
+
+- **Native multimodal image routing** — images now route based on the model's actual vision capability rather than provider defaults. ([#16506](https://github.com/NousResearch/hermes-agent/pull/16506))
+
+- **Gateway media parity** — native multi-image sending across Telegram, Discord, Slack, Mattermost, Email, and Signal; centralized audio routing with FLAC support + Telegram document fallback. ([#17909](https://github.com/NousResearch/hermes-agent/pull/17909), [#17833](https://github.com/NousResearch/hermes-agent/pull/17833))
+
+- **TUI catches up to (and past) the classic CLI** — LaTeX rendering (@austinpickett), `/reload` .env hot-reload, pluggable busy-indicator styles (@OutThisLife, #13610), opt-in auto-resume of last session, expanded light-terminal auto-detection, session delete from `/resume` picker with `d`, modified mouse-wheel line scroll, and a `/mouse` toggle that kills ConPTY's phantom mouse injection (@kevin-ho). ([#17175](https://github.com/NousResearch/hermes-agent/pull/17175), [#17286](https://github.com/NousResearch/hermes-agent/pull/17286), [#17150](https://github.com/NousResearch/hermes-agent/pull/17150), [#17130](https://github.com/NousResearch/hermes-agent/pull/17130), [#17113](https://github.com/NousResearch/hermes-agent/pull/17113), [#17668](https://github.com/NousResearch/hermes-agent/pull/17668), [#17669](https://github.com/NousResearch/hermes-agent/pull/17669), [#15488](https://github.com/NousResearch/hermes-agent/pull/15488))
+
+- **Observability + achievements plugins** — bundled Langfuse observability plugin (salvage #16845) + bundled hermes-achievements plugin that scans full session history. ([#16917](https://github.com/NousResearch/hermes-agent/pull/16917), [#17754](https://github.com/NousResearch/hermes-agent/pull/17754))
+
+- **TTS provider registry + Piper local TTS** — pluggable `tts.providers.<name>` registry; Piper ships as a native local TTS provider. (Closes #8508.) ([#17843](https://github.com/NousResearch/hermes-agent/pull/17843), [#17885](https://github.com/NousResearch/hermes-agent/pull/17885))
+
+- **Vercel Sandbox backend** — Vercel sandboxes as an execute_code/terminal backend (@kshitijk4poor). ([#17445](https://github.com/NousResearch/hermes-agent/pull/17445))
+
+- **Secret redaction off by default** — default flipped to off. Prevents the long-standing patch-corruption incidents where fake secret-shaped substrings mangled tool outputs. Opt in via `redaction.enabled: true` when you need it. ([#16794](https://github.com/NousResearch/hermes-agent/pull/16794))
+
+- **Cold-start performance** — visible TUI cold start cut **~57%** via lazy agent init (@OutThisLife), lazy imports of OpenAI / Anthropic / Firecrawl / account_usage, mtime-cached `load_config()`, memoized `get_tool_definitions()` with TTL-cached `check_fn` results, precompiled dangerous-command patterns. ([#17190](https://github.com/NousResearch/hermes-agent/pull/17190), [#17046](https://github.com/NousResearch/hermes-agent/pull/17046), [#17041](https://github.com/NousResearch/hermes-agent/pull/17041), [#17098](https://github.com/NousResearch/hermes-agent/pull/17098), [#17206](https://github.com/NousResearch/hermes-agent/pull/17206))
+
+- **Configurable prompt cache TTL** — `prompt_caching.cache_ttl` (5m default, 1h opt-in — cost savings for bursty sessions that keep cache warm). Salvage of #12659. ([#15065](https://github.com/NousResearch/hermes-agent/pull/15065))
+
+---
+
+## 🧠 Autonomous Curator & Self-Improvement Loop
+
+### Curator — autonomous skill maintenance
+- **`hermes curator` as a background agent** — runs on the gateway's cron ticker, 7-day cycle by default, umbrella-first prompt, inherits parent config, unbounded iterations ([#17277](https://github.com/NousResearch/hermes-agent/pull/17277) — issue #7816)
+- **Per-run reports** — `logs/curator/run.json` + `REPORT.md` per cycle ([#17307](https://github.com/NousResearch/hermes-agent/pull/17307))
+- **Consolidated vs pruned classification** — archived skills split with model + heuristic ([#17941](https://github.com/NousResearch/hermes-agent/pull/17941))
+- **`hermes curator status`** — ranks skills by usage, shows most-used and least-used ([#18033](https://github.com/NousResearch/hermes-agent/pull/18033))
+- **Unified under `auxiliary.curator`** — pick the model in `hermes model`, configure from the dashboard ([#17868](https://github.com/NousResearch/hermes-agent/pull/17868))
+- **Documentation** — dedicated curator feature page on the docs site ([#17563](https://github.com/NousResearch/hermes-agent/pull/17563))
+- Fix: seed defaults on update, create `logs/curator/` directory, defer fire import ([#17927](https://github.com/NousResearch/hermes-agent/pull/17927))
+- Fix: scan nested archive subdirs in `restore_skill` (@0xDevNinja) ([#17951](https://github.com/NousResearch/hermes-agent/pull/17951))
+- Fix: use actual skill activity in curator status (@y0shua1ee) ([#17953](https://github.com/NousResearch/hermes-agent/pull/17953))
+- Fix: `skill_manage` refuses writes on pinned skills; pinning now blocks curator writes ([#17562](https://github.com/NousResearch/hermes-agent/pull/17562), [#17578](https://github.com/NousResearch/hermes-agent/pull/17578))
+- Fix: `bump_use()` wired into skill invocation + preload + skill_view (salvage #17782) ([#17932](https://github.com/NousResearch/hermes-agent/pull/17932))
+
+### Self-improvement loop (background review fork)
+- **Class-first skill-review prompt** — rubric-based grading rather than free-form "should this update" ([#16026](https://github.com/NousResearch/hermes-agent/pull/16026))
+- **Active-update bias** — prefers updating skills the agent just loaded, handles `references/` + `templates/` sub-files ([#17213](https://github.com/NousResearch/hermes-agent/pull/17213))
+- **Fork inherits parent's live runtime** — provider, model, credentials actually propagate now ([#16099](https://github.com/NousResearch/hermes-agent/pull/16099))
+- **Scoped toolsets** — review fork restricted to memory + skills (no shell, no web) ([#16569](https://github.com/NousResearch/hermes-agent/pull/16569))
+- **Clean shutdown** — background review memory providers exit properly (salvage #15289) ([#16204](https://github.com/NousResearch/hermes-agent/pull/16204))
+- **Clean context** — prior-history tool messages excluded from review summary (salvage #14967) ([#15057](https://github.com/NousResearch/hermes-agent/pull/15057))
+
+---
+
+## 🧩 Skills Ecosystem
+
+### Skill integrations — newly bundled or promoted
+- **ComfyUI v5** — official CLI + REST + hardware-gated local install; **moved from optional to built-in** ([#17610](https://github.com/NousResearch/hermes-agent/pull/17610), [#17631](https://github.com/NousResearch/hermes-agent/pull/17631), [#17734](https://github.com/NousResearch/hermes-agent/pull/17734), [#17612](https://github.com/NousResearch/hermes-agent/pull/17612))
+- **TouchDesigner-MCP** — **bundled by default** ([#16753](https://github.com/NousResearch/hermes-agent/pull/16753) — @kshitijk4poor), expanded with GLSL, post-FX, audio, geometry references ([#16624](https://github.com/NousResearch/hermes-agent/pull/16624)), 9 new reference docs ([#16768](https://github.com/NousResearch/hermes-agent/pull/16768) — @SHL0MS)
+- **Humanizer** — strips AI-isms from text ([#16787](https://github.com/NousResearch/hermes-agent/pull/16787))
+- **claude-design** — HTML artifact skill with disambiguation from other design skills ([#16358](https://github.com/NousResearch/hermes-agent/pull/16358))
+- **design-md** — Google's DESIGN.md spec skill ([#14876](https://github.com/NousResearch/hermes-agent/pull/14876))
+- **airtable** — salvaged skill + skill API keys wired into `.env` (#15838) ([#16291](https://github.com/NousResearch/hermes-agent/pull/16291))
+- **pretext** — creative browser demos with @chenglou/pretext ([#17259](https://github.com/NousResearch/hermes-agent/pull/17259))
+- **spike** + **sketch** — throwaway experiments + HTML mockups, adapted from gsd-build ([#17421](https://github.com/NousResearch/hermes-agent/pull/17421))
+
+### Skills UX
+- **Install skills from a direct HTTP(S) URL** — `hermes skills install <url>` ([#16323](https://github.com/NousResearch/hermes-agent/pull/16323))
+- **`/reload-skills`** slash command (salvage #17670) ([#17744](https://github.com/NousResearch/hermes-agent/pull/17744))
+- **`hermes skills list`** shows enabled/disabled status ([#16129](https://github.com/NousResearch/hermes-agent/pull/16129))
+- **`skill_manage` refuses writes on pinned skills** ([#17562](https://github.com/NousResearch/hermes-agent/pull/17562))
+- **`skill_manage` edits external_dirs skills in place** (salvage #9966) ([#17512](https://github.com/NousResearch/hermes-agent/pull/17512), [#17289](https://github.com/NousResearch/hermes-agent/pull/17289))
+- Fix: inline-shell rendering in `skill_view` ([#15376](https://github.com/NousResearch/hermes-agent/pull/15376))
+- Fix: exclude `.archive/` from skill index walk (salvage #17639) ([#17931](https://github.com/NousResearch/hermes-agent/pull/17931))
+- Fix: dedicated docs page per bundled + optional skill ([#14929](https://github.com/NousResearch/hermes-agent/pull/14929))
+- Fix: `google-workspace` shared HERMES_HOME helper + ship deps as optional extra ([#15405](https://github.com/NousResearch/hermes-agent/pull/15405))
+- Fix: auto-wrap ASCII-art code blocks in generated skill pages ([#16497](https://github.com/NousResearch/hermes-agent/pull/16497))
+- Point agent at `hermes-agent` skill + docs site for Hermes questions ([#16535](https://github.com/NousResearch/hermes-agent/pull/16535))
+
+---
+
+## 🏗️ Core Agent & Architecture
+
+### Provider & Model Support
+
+#### New providers
+- **GMI Cloud** — first-class API-key provider on par with Arcee/Kilocode/Xiaomi (salvage of #11955 — @isaachuangGMICLOUD) ([#16663](https://github.com/NousResearch/hermes-agent/pull/16663))
+- **Azure AI Foundry** — auto-detection, full wiring ([#15845](https://github.com/NousResearch/hermes-agent/pull/15845))
+- **LM Studio** — upgraded from custom-endpoint alias to first-class provider: dedicated auth, doctor checks, reasoning transport, live `/models` (salvage of #17061 — @kshitijk4poor) ([#17102](https://github.com/NousResearch/hermes-agent/pull/17102))
+- **MiniMax OAuth** — PKCE browser flow with full OAuth integration (salvage #15203) ([#17524](https://github.com/NousResearch/hermes-agent/pull/17524))
+- **Tencent Tokenhub** — new provider (salvage of #16860) ([#16960](https://github.com/NousResearch/hermes-agent/pull/16960))
+
+#### Model catalog
+- **Remote model catalog manifest** — OpenRouter + Nous Portal catalogs pulled from remote manifest so new models show up without a release ([#16033](https://github.com/NousResearch/hermes-agent/pull/16033))
+- `openai/gpt-5.5` and `gpt-5.5-pro` added to OpenRouter + Nous Portal ([#15343](https://github.com/NousResearch/hermes-agent/pull/15343))
+- `deepseek-v4-pro` and `deepseek-v4-flash` added ([#14934](https://github.com/NousResearch/hermes-agent/pull/14934))
+- `qwen3.6-plus` added to Alibaba-supported models ([#16896](https://github.com/NousResearch/hermes-agent/pull/16896))
+- Gemini free-tier keys blocked at setup with 429 guidance surfacing ([#15100](https://github.com/NousResearch/hermes-agent/pull/15100))
+
+#### Model configuration
+- **Configurable `prompt_caching.cache_ttl`** — 5m default, 1h opt-in (salvage #12659) ([#15065](https://github.com/NousResearch/hermes-agent/pull/15065))
+- `/fast` whitelist broadened to all OpenAI + Anthropic models ([#16883](https://github.com/NousResearch/hermes-agent/pull/16883))
+- `auxiliary.extra_body.reasoning` translates into Codex Responses API ([#17004](https://github.com/NousResearch/hermes-agent/pull/17004))
+- `hermes fallback` command for managing fallback providers ([#16052](https://github.com/NousResearch/hermes-agent/pull/16052))
+
+### Agent Loop & Conversation
+- **Native multimodal image routing** — based on model vision capability, not provider defaults ([#16506](https://github.com/NousResearch/hermes-agent/pull/16506))
+- **Delegate `child_timeout_seconds` default bumped to 600s** ([#14809](https://github.com/NousResearch/hermes-agent/pull/14809))
+- **Diagnostic dump when subagent times out with 0 API calls** ([#15105](https://github.com/NousResearch/hermes-agent/pull/15105))
+- **Gateway busts cached agent on compression/context_length config edits** ([#17008](https://github.com/NousResearch/hermes-agent/pull/17008))
+- **Opt-in runtime-metadata footer on final replies** ([#17026](https://github.com/NousResearch/hermes-agent/pull/17026))
+- `/reload-mcp` awareness — rebuild cached agents + prompt-cache cost confirmation ([#17729](https://github.com/NousResearch/hermes-agent/pull/17729))
+- Fix: repair CamelCase + `_tool` suffix tool-call emissions ([#15124](https://github.com/NousResearch/hermes-agent/pull/15124))
+- Fix: retry on `json.JSONDecodeError` instead of treating as local validation error ([#15107](https://github.com/NousResearch/hermes-agent/pull/15107))
+- Fix: handle unescaped control chars in `tool_call.arguments` ([#15356](https://github.com/NousResearch/hermes-agent/pull/15356))
+- Fix: ordering fix in `_copy_reasoning_content_for_api` — cross-provider reasoning isolation (@Zjianru) ([#15749](https://github.com/NousResearch/hermes-agent/pull/15749))
+- Fix: inject empty `reasoning_content` for DeepSeek/Kimi `tool_calls` unconditionally (@Zjianru) ([#15762](https://github.com/NousResearch/hermes-agent/pull/15762))
+- Fix: persist streamed `reasoning_content` on assistant turns (#16844) ([#16892](https://github.com/NousResearch/hermes-agent/pull/16892))
+- Fix: cancel coroutine on timeout so worker thread exits; full traceback on tool failure ([#17428](https://github.com/NousResearch/hermes-agent/pull/17428))
+- Fix: isolate `get_tool_definitions` quiet_mode cache + dedup LCM injection (#17335) ([#17889](https://github.com/NousResearch/hermes-agent/pull/17889))
+- Fix: serialize concurrent `hermes_tools` RPC calls from `execute_code` (#17770) ([#17894](https://github.com/NousResearch/hermes-agent/pull/17894), [#17902](https://github.com/NousResearch/hermes-agent/pull/17902))
+- Fix: rename `[SYSTEM:` → `[IMPORTANT:` in all user-injected markers (dodges Azure content filter) ([#16114](https://github.com/NousResearch/hermes-agent/pull/16114))
+
+### Compression
+- **Retry summary on main model for unknown errors before giving up** ([#16774](https://github.com/NousResearch/hermes-agent/pull/16774))
+- **Notify users when configured aux model fails even if main-model fallback recovers** ([#16775](https://github.com/NousResearch/hermes-agent/pull/16775))
+- `/compress` wrapped in `_busy_command` to block input during compression ([#15388](https://github.com/NousResearch/hermes-agent/pull/15388))
+- Fix: reserve system + tools headroom when aux binds threshold ([#15631](https://github.com/NousResearch/hermes-agent/pull/15631))
+- Fix: use text-char sum for multimodal token estimation in `_find_tail_cut_by_tokens` ([#16369](https://github.com/NousResearch/hermes-agent/pull/16369))
+
+### Session, Memory & State
+- **Trigram FTS5 index for CJK search, replace LIKE fallback** (@alt-glitch) ([#16651](https://github.com/NousResearch/hermes-agent/pull/16651))
+- **Index `tool_name` + `tool_calls` in FTS5, with repair + migration** (salvages #16866) ([#16914](https://github.com/NousResearch/hermes-agent/pull/16914))
+- **Checkpoints: auto-prune orphan and stale shadow repos at startup** ([#16303](https://github.com/NousResearch/hermes-agent/pull/16303))
+- **Memory providers notified on mid-process session_id rotation** (#6672) ([#17409](https://github.com/NousResearch/hermes-agent/pull/17409))
+- Fix: quote underscored terms in FTS5 query sanitization ([#16915](https://github.com/NousResearch/hermes-agent/pull/16915))
+- Fix: resolve viking_read 500/412 on file URIs + pseudo-summary URIs (salvage #5886) ([#17869](https://github.com/NousResearch/hermes-agent/pull/17869))
+- Fix: skip external-provider sync on interrupted turns ([#15395](https://github.com/NousResearch/hermes-agent/pull/15395))
+- Fix: close embedded Hindsight async client cleanly (salvage #14605) ([#16209](https://github.com/NousResearch/hermes-agent/pull/16209))
+- Fix: pass session transcript to `shutdown_memory_provider` on gateway + CLI (#15165) ([#16571](https://github.com/NousResearch/hermes-agent/pull/16571))
+- Fix: write-origin metadata seam ([#15346](https://github.com/NousResearch/hermes-agent/pull/15346))
+- Fix: preserve symlinks during atomic file writes ([#16980](https://github.com/NousResearch/hermes-agent/pull/16980))
+- Refactor: remove `flush_memories` entirely ([#15696](https://github.com/NousResearch/hermes-agent/pull/15696))
+
+### Auxiliary models
+- Fix: surface auxiliary failures in UI (previously silent) ([#15324](https://github.com/NousResearch/hermes-agent/pull/15324))
+- Fix: surface title-gen auxiliary failures instead of silently dropping ([#16371](https://github.com/NousResearch/hermes-agent/pull/16371))
+- Fix: generalize unsupported-parameter detector and harden `max_tokens` retry ([#15633](https://github.com/NousResearch/hermes-agent/pull/15633))
+
+---
+
+## 📱 Messaging Platforms (Gateway)
+
+### New Platforms
+- **Microsoft Teams (19th platform)** — as a plugin, + xdist collision guard ([#17828](https://github.com/NousResearch/hermes-agent/pull/17828))
+- **Yuanbao (Tencent 元宝, 18th platform)** — native adapter with text + media delivery ([#16298](https://github.com/NousResearch/hermes-agent/pull/16298), [#17424](https://github.com/NousResearch/hermes-agent/pull/17424), [#16880](https://github.com/NousResearch/hermes-agent/pull/16880))
+
+### Pluggable Gateway Platforms
+- **Drop-in messaging adapters** — the gateway is now a plugin host for platforms (salvage of #17664) ([#17751](https://github.com/NousResearch/hermes-agent/pull/17751))
+
+### Telegram
+- **Chat allowlists for groups and forums** (@web3blind) ([#15027](https://github.com/NousResearch/hermes-agent/pull/15027))
+- **Send fresh finals for stale preview streams** (port openclaw#72038) ([#16261](https://github.com/NousResearch/hermes-agent/pull/16261))
+- **Render markdown tables as row-group bullets + prompt hint** ([#16997](https://github.com/NousResearch/hermes-agent/pull/16997))
+- Document fallback in centralized audio routing ([#17833](https://github.com/NousResearch/hermes-agent/pull/17833))
+- Native multi-image sending ([#17909](https://github.com/NousResearch/hermes-agent/pull/17909))
+
+### Discord
+- **Opt-in toolsets + ID injection + tool split + Feishu wiring** (salvage #15457, #15458) ([#15610](https://github.com/NousResearch/hermes-agent/pull/15610), [#15613](https://github.com/NousResearch/hermes-agent/pull/15613))
+- Fix: coerce `limit` parameter to int before `min()` call ([#16319](https://github.com/NousResearch/hermes-agent/pull/16319))
+
+### Slack
+- **Register every gateway command as a native slash (Discord/Telegram parity)** ([#16164](https://github.com/NousResearch/hermes-agent/pull/16164))
+- **`strict_mention` config** — prevents thread auto-engagement ([#16193](https://github.com/NousResearch/hermes-agent/pull/16193))
+- **`channel_skill_bindings`** — bind specific skills to specific Slack channels ([#16283](https://github.com/NousResearch/hermes-agent/pull/16283))
+
+### Signal
+- **Native formatting** — markdown → bodyRanges, reply quotes, reactions ([#17417](https://github.com/NousResearch/hermes-agent/pull/17417))
+- Native multi-image sending ([#17909](https://github.com/NousResearch/hermes-agent/pull/17909))
+
+### Feishu / Mattermost / Email / Signal
+- All participate in **native multi-image sending** ([#17909](https://github.com/NousResearch/hermes-agent/pull/17909))
+
+### Gateway Core
+- **Centralized audio routing + FLAC support + Telegram doc fallback** ([#17833](https://github.com/NousResearch/hermes-agent/pull/17833))
+- **Native multi-image sending** across Telegram, Discord, Slack, Mattermost, Email, Signal ([#17909](https://github.com/NousResearch/hermes-agent/pull/17909))
+- **Make hygiene hard message limit configurable** ([#17000](https://github.com/NousResearch/hermes-agent/pull/17000))
+- **Opt-in runtime-metadata footer on final replies** ([#17026](https://github.com/NousResearch/hermes-agent/pull/17026))
+- **`pre_gateway_dispatch` hook** — plugins can intercept before dispatch ([#15050](https://github.com/NousResearch/hermes-agent/pull/15050))
+- **`pre_approval_request` / `post_approval_response` hooks** ([#16776](https://github.com/NousResearch/hermes-agent/pull/16776))
+- Fix: timeouts — guard `load_config()` call against runtime exceptions ([#16318](https://github.com/NousResearch/hermes-agent/pull/16318))
+- Fix: support passing handler tools via registry ([#15613](https://github.com/NousResearch/hermes-agent/pull/15613))
+
+---
+
+## 🔧 Tool System
+
+### Plugin-first architecture
+- **Pluggable gateway platforms** — platforms can ship as plugins ([#17751](https://github.com/NousResearch/hermes-agent/pull/17751))
+- **Microsoft Teams as first plugin-shipped platform** ([#17828](https://github.com/NousResearch/hermes-agent/pull/17828))
+- **`pre_gateway_dispatch` hook** ([#15050](https://github.com/NousResearch/hermes-agent/pull/15050))
+- **`pre_approval_request` + `post_approval_response` hooks** ([#16776](https://github.com/NousResearch/hermes-agent/pull/16776))
+- **`duration_ms` on `post_tool_call`** (inspired by Claude Code 2.1.119) ([#15429](https://github.com/NousResearch/hermes-agent/pull/15429))
+- **Bundled plugins**: Spotify ([#15174](https://github.com/NousResearch/hermes-agent/pull/15174)), Google Meet ([#16364](https://github.com/NousResearch/hermes-agent/pull/16364)), Langfuse observability ([#16917](https://github.com/NousResearch/hermes-agent/pull/16917)), hermes-achievements ([#17754](https://github.com/NousResearch/hermes-agent/pull/17754))
+- **Page-scoped plugin slots for built-in dashboard pages** ([#15658](https://github.com/NousResearch/hermes-agent/pull/15658))
+- **Declarative plugin installation for NixOS module** (@alt-glitch) ([#15953](https://github.com/NousResearch/hermes-agent/pull/15953))
+
+### Browser
+- **CDP supervisor** — dialog detection + response + cross-origin iframe eval ([#14540](https://github.com/NousResearch/hermes-agent/pull/14540))
+- **Auto-spawn local Chromium for LAN/localhost URLs** when cloud provider is configured ([#16136](https://github.com/NousResearch/hermes-agent/pull/16136))
+
+### Execute code / Terminal
+- **Vercel Sandbox backend** for `execute_code` / terminal (@kshitijk4poor) ([#17445](https://github.com/NousResearch/hermes-agent/pull/17445))
+- **Collapse subagent `task_id`s to shared container** ([#16177](https://github.com/NousResearch/hermes-agent/pull/16177))
+- **Docker: run container as host user** to avoid root-owned bind mounts (@benbarclay) ([#17305](https://github.com/NousResearch/hermes-agent/pull/17305))
+- Fix: safely quote `~/` subpaths in wrapped `cd` commands ([#15394](https://github.com/NousResearch/hermes-agent/pull/15394))
+- Fix: close file descriptor in `LocalEnvironment._update_cwd` ([#17300](https://github.com/NousResearch/hermes-agent/pull/17300))
+- Fix: SSH — prevent tar from overwriting remote home dir permissions ([#17898](https://github.com/NousResearch/hermes-agent/pull/17898), [#17867](https://github.com/NousResearch/hermes-agent/pull/17867))
+
+### Image generation
+- See Provider section for updates; no new image providers this window.
+
+### TTS / Voice
+- **Pluggable TTS provider registry** under `tts.providers.<name>` ([#17843](https://github.com/NousResearch/hermes-agent/pull/17843))
+- **Piper** as native local TTS provider (closes #8508) ([#17885](https://github.com/NousResearch/hermes-agent/pull/17885))
+- **Voice mode CLI parity in the TUI** — VAD loop + TTS + crash forensics ([#14810](https://github.com/NousResearch/hermes-agent/pull/14810))
+- Fix: vision — use HERMES_HOME-based cache dir instead of cwd ([#17719](https://github.com/NousResearch/hermes-agent/pull/17719))
+
+### Cron
+- **Honor `hermes tools` config for the cron platform** ([#14798](https://github.com/NousResearch/hermes-agent/pull/14798))
+- **Per-job `workdir`** — project-aware cron runs ([#15110](https://github.com/NousResearch/hermes-agent/pull/15110))
+- **`context_from` field** — chain cron job outputs ([#15606](https://github.com/NousResearch/hermes-agent/pull/15606))
+- Fix: promote `croniter` to a core dependency ([#17577](https://github.com/NousResearch/hermes-agent/pull/17577))
+
+### Web search
+- **Expose `limit` for `web_search`** ([#16934](https://github.com/NousResearch/hermes-agent/pull/16934))
+
+### Maps
+- Fix: include seconds in timezone UTC offset output ([#16300](https://github.com/NousResearch/hermes-agent/pull/16300))
+
+### Approvals
+- **Hardline blocklist for unrecoverable commands** ([#15878](https://github.com/NousResearch/hermes-agent/pull/15878))
+- Perf: precompile DANGEROUS_PATTERNS and HARDLINE_PATTERNS ([#17206](https://github.com/NousResearch/hermes-agent/pull/17206))
+
+### ACP
+- **Advertise and forward image prompts** ([#18030](https://github.com/NousResearch/hermes-agent/pull/18030))
+
+### API Server
+- **POST `/v1/runs/{run_id}/stop`** (salvage of #15656) ([#15842](https://github.com/NousResearch/hermes-agent/pull/15842))
+- **Expose run status for external UIs** (#17085) ([#17458](https://github.com/NousResearch/hermes-agent/pull/17458))
+
+### Nix
+- **Declarative plugin installation for NixOS module** (@alt-glitch) ([#15953](https://github.com/NousResearch/hermes-agent/pull/15953))
+- Fix: use `--rebuild` in fix-lockfiles to bypass cached FOD store paths ([#15444](https://github.com/NousResearch/hermes-agent/pull/15444))
+- Fix: `extraPackages` now actually works via per-user profile ([#17047](https://github.com/NousResearch/hermes-agent/pull/17047))
+- Fix: refresh web/ npm-deps hash to unblock main builds ([#17174](https://github.com/NousResearch/hermes-agent/pull/17174))
+- Fix: replace magic-nix-cache with Cachix ([#17928](https://github.com/NousResearch/hermes-agent/pull/17928))
+
+---
+
+## 🖥️ TUI
+
+### New features
+- **LaTeX rendering** (@austinpickett) ([#17175](https://github.com/NousResearch/hermes-agent/pull/17175))
+- **`/reload` .env hot-reload** — ported from the classic CLI ([#17286](https://github.com/NousResearch/hermes-agent/pull/17286))
+- **Pluggable busy-indicator styles** (@OutThisLife, #13610) ([#17150](https://github.com/NousResearch/hermes-agent/pull/17150))
+- **Opt-in auto-resume of the most recent session** (@OutThisLife) ([#17130](https://github.com/NousResearch/hermes-agent/pull/17130))
+- **Expanded light-terminal auto-detection** — `HERMES_TUI_THEME` + background hex (@OutThisLife) ([#17113](https://github.com/NousResearch/hermes-agent/pull/17113))
+- **Delete sessions from `/resume` picker with `d`** (@OutThisLife) ([#17668](https://github.com/NousResearch/hermes-agent/pull/17668))
+- **Line-by-line scroll on modified mouse wheel** (@OutThisLife) ([#17669](https://github.com/NousResearch/hermes-agent/pull/17669))
+- **Delete queued message while editing with ctrl-x / cancel with esc** (@OutThisLife) ([#16707](https://github.com/NousResearch/hermes-agent/pull/16707))
+- **Per-section visibility for the details accordion** (@OutThisLife) ([#14968](https://github.com/NousResearch/hermes-agent/pull/14968))
+- **Voice mode CLI parity** — VAD loop + TTS + crash forensics ([#14810](https://github.com/NousResearch/hermes-agent/pull/14810))
+- **Contextual first-touch hints ported to TUI** — `/busy`, `/verbose` ([#16054](https://github.com/NousResearch/hermes-agent/pull/16054))
+- **Mini help menu on `?` in the input field** (@ethernet8023) ([#18043](https://github.com/NousResearch/hermes-agent/pull/18043))
+
+### Fixes
+- Fix: proactive mouse disable on ConPTY + `/mouse` toggle command (@kevin-ho, WSL2 ghost-mouse fix) ([#15488](https://github.com/NousResearch/hermes-agent/pull/15488))
+- Fix: restore skills search RPC ([#15870](https://github.com/NousResearch/hermes-agent/pull/15870))
+- Perf: cache text measurements across yoga flex re-passes ([#14818](https://github.com/NousResearch/hermes-agent/pull/14818))
+- Perf: stabilize long-session scrolling ([#15926](https://github.com/NousResearch/hermes-agent/pull/15926))
+- Perf: lazily seed virtual history heights ([#16523](https://github.com/NousResearch/hermes-agent/pull/16523))
+- Perf: cut visible cold start ~57% with lazy agent init ([#17190](https://github.com/NousResearch/hermes-agent/pull/17190))
+
+---
+
+## 🖱️ CLI & User Experience
+
+### New commands
+- **`hermes -z <prompt>`** — non-interactive one-shot mode ([#15702](https://github.com/NousResearch/hermes-agent/pull/15702))
+- **`hermes -z` with `--model` / `--provider` / `HERMES_INFERENCE_MODEL`** ([#15704](https://github.com/NousResearch/hermes-agent/pull/15704))
+- **`hermes update --check`** preflight flag ([#15841](https://github.com/NousResearch/hermes-agent/pull/15841))
+- **`hermes fallback`** command for managing fallback providers ([#16052](https://github.com/NousResearch/hermes-agent/pull/16052))
+- **`/busy`** slash command for busy input mode ([#15382](https://github.com/NousResearch/hermes-agent/pull/15382))
+- **`/busy` input mode 'steer'** as a third option ([#16279](https://github.com/NousResearch/hermes-agent/pull/16279))
+- **`/btw` as alias for `/background`** ([#16053](https://github.com/NousResearch/hermes-agent/pull/16053))
+- **`/reload-skills`** slash command (salvage #17670) ([#17744](https://github.com/NousResearch/hermes-agent/pull/17744))
+- **Surface `/queue`, `/bg`, `/steer` in agent-running placeholder** ([#16118](https://github.com/NousResearch/hermes-agent/pull/16118))
+
+### Setup / onboarding
+- **Auto-reconfigure on existing installs** ([#15879](https://github.com/NousResearch/hermes-agent/pull/15879))
+- **Contextual first-touch hints for `/busy` and `/verbose`** ([#16046](https://github.com/NousResearch/hermes-agent/pull/16046))
+- **Cost-saving tips from the April 30 tip-of-the-day** ([#17841](https://github.com/NousResearch/hermes-agent/pull/17841))
+- **Hyperlink startup banner title to the latest GitHub Release** ([#14945](https://github.com/NousResearch/hermes-agent/pull/14945))
+
+### Update / backup
+- **Snapshot pairing data before `git pull`** ([#16383](https://github.com/NousResearch/hermes-agent/pull/16383))
+- **Auto-backup HERMES_HOME before `hermes update`** (opt-in, off by default) ([#16539](https://github.com/NousResearch/hermes-agent/pull/16539), [#16566](https://github.com/NousResearch/hermes-agent/pull/16566))
+- **Exclude `checkpoints/` from backups** ([#16572](https://github.com/NousResearch/hermes-agent/pull/16572))
+- **Exclude SQLite WAL/SHM/journal sidecars from backups** ([#16576](https://github.com/NousResearch/hermes-agent/pull/16576))
+- **Installer FHS layout for root installs on Linux** ([#15608](https://github.com/NousResearch/hermes-agent/pull/15608))
+- Fix: kill stale dashboards instead of warning ([#17832](https://github.com/NousResearch/hermes-agent/pull/17832))
+- Fix: show correct update status on nix-built hermes ([#17550](https://github.com/NousResearch/hermes-agent/pull/17550))
+
+### Slash-command housekeeping
+- Refactor: drop `/provider`, `/plan` handler, and clean up slash registry ([#15047](https://github.com/NousResearch/hermes-agent/pull/15047))
+- Refactor: drop `persist_session` plumbing + fix broken `/btw` mid-turn bypass ([#16075](https://github.com/NousResearch/hermes-agent/pull/16075))
+
+### OpenClaw migration (for folks coming from OpenClaw)
+- **Hardened OpenClaw import** — plan-first apply, redaction, pre-migration backup ([#16911](https://github.com/NousResearch/hermes-agent/pull/16911))
+- Fix: case-preserving brand rewrite + one-time `~/.openclaw` residue banner ([#16327](https://github.com/NousResearch/hermes-agent/pull/16327))
+- Fix: resolve `openclaw` workspace files from `agents.defaults.workspace` ([#16879](https://github.com/NousResearch/hermes-agent/pull/16879))
+- Fix: resolve model aliases against real OpenClaw catalog schema (salvage #16778) ([#16977](https://github.com/NousResearch/hermes-agent/pull/16977))
+
+---
+
+## 📊 Web Dashboard
+
+- **Models tab** — rich per-model analytics ([#17745](https://github.com/NousResearch/hermes-agent/pull/17745))
+- **Configure main + auxiliary models from the Models page** ([#17802](https://github.com/NousResearch/hermes-agent/pull/17802))
+- **Dashboard Chat tab — xterm.js + JSON-RPC sidecar** (supersedes #12710 + #13379, @OutThisLife) ([#14890](https://github.com/NousResearch/hermes-agent/pull/14890))
+- **Dashboard layout refresh** (@austinpickett) ([#14899](https://github.com/NousResearch/hermes-agent/pull/14899))
+- **`--stop` and `--status` flags** on the dashboard CLI ([#17840](https://github.com/NousResearch/hermes-agent/pull/17840))
+- **Page-scoped plugin slots for built-in pages** ([#15658](https://github.com/NousResearch/hermes-agent/pull/15658))
+- Fix: replace all buttons for design system buttons ([#17007](https://github.com/NousResearch/hermes-agent/pull/17007))
+
+---
+
+## ⚡ Performance
+
+- **TUI visible cold start cut ~57%** via lazy agent init ([#17190](https://github.com/NousResearch/hermes-agent/pull/17190))
+- **Lazy-import OpenAI, Anthropic, Firecrawl, account_usage** ([#17046](https://github.com/NousResearch/hermes-agent/pull/17046))
+- **mtime-cache `load_config()` and `read_raw_config()`** ([#17041](https://github.com/NousResearch/hermes-agent/pull/17041))
+- **Memoize `get_tool_definitions()` + TTL-cache `check_fn` results** ([#17098](https://github.com/NousResearch/hermes-agent/pull/17098))
+- **Precompile DANGEROUS_PATTERNS and HARDLINE_PATTERNS** ([#17206](https://github.com/NousResearch/hermes-agent/pull/17206))
+- **Cache Ink text measurements across yoga flex re-passes** ([#14818](https://github.com/NousResearch/hermes-agent/pull/14818))
+- **Stabilize long-session scrolling** ([#15926](https://github.com/NousResearch/hermes-agent/pull/15926))
+- **Lazily seed virtual history heights** ([#16523](https://github.com/NousResearch/hermes-agent/pull/16523))
+
+---
+
+## 🔒 Security & Reliability
+
+- **Secret redaction off by default** — stops corrupting patches / API payloads with fake-key substitutions. Opt in via `redaction.enabled: true` ([#16794](https://github.com/NousResearch/hermes-agent/pull/16794))
+- **`[SYSTEM:` → `[IMPORTANT:`** in all user-injected markers (Azure content filter dodge) ([#16114](https://github.com/NousResearch/hermes-agent/pull/16114))
+- **Hardline blocklist for unrecoverable commands** ([#15878](https://github.com/NousResearch/hermes-agent/pull/15878))
+- **Canonical `mask_secret` helper; fix status.py DIM drift** ([#17207](https://github.com/NousResearch/hermes-agent/pull/17207))
+- **Sweep expired paste.rs uploads on a real timer** ([#16431](https://github.com/NousResearch/hermes-agent/pull/16431))
+- **Preserve symlinks during atomic file writes** ([#16980](https://github.com/NousResearch/hermes-agent/pull/16980))
+- **Probe `/dev/tty` by opening it, not bare existence** ([#17024](https://github.com/NousResearch/hermes-agent/pull/17024))
+
+---
+
+## 🐛 Notable Bug Fixes
+
+This window includes 360 `fix:` PRs. Selected highlights from across the stack:
+
+- **Background review fork inherits parent's live runtime** — provider/model/creds now propagate correctly ([#16099](https://github.com/NousResearch/hermes-agent/pull/16099))
+- **Hindsight configurable `HINDSIGHT_TIMEOUT` env var** ([#15077](https://github.com/NousResearch/hermes-agent/pull/15077))
+- **Tools: normalize numeric entries + clear stale `no_mcp` in `_save_platform_tools`** ([#15607](https://github.com/NousResearch/hermes-agent/pull/15607))
+- **MCP: rewrite `definitions` refs to `$defs` in input schemas** — closes provider-side 400s
+- **Azure content filter compatibility** — renamed `[SYSTEM:` markers so Azure's content filter stops flagging them ([#16114](https://github.com/NousResearch/hermes-agent/pull/16114))
+- **Vision cache uses HERMES_HOME instead of cwd** ([#17719](https://github.com/NousResearch/hermes-agent/pull/17719))
+- **FTS5 search** — tool_name + tool_calls indexing with repair + migration ([#16914](https://github.com/NousResearch/hermes-agent/pull/16914))
+- **Streaming reasoning persists on assistant turns** ([#16892](https://github.com/NousResearch/hermes-agent/pull/16892))
+- **execute_code concurrent RPC serialization** (#17770) ([#17894](https://github.com/NousResearch/hermes-agent/pull/17894), [#17902](https://github.com/NousResearch/hermes-agent/pull/17902))
+- **Background reviewer scoped to memory + skills toolsets** — no more accidental web/shell escapes ([#16569](https://github.com/NousResearch/hermes-agent/pull/16569))
+- **Compression recovery** — retry on main before giving up; notify user when aux fails ([#16774](https://github.com/NousResearch/hermes-agent/pull/16774), [#16775](https://github.com/NousResearch/hermes-agent/pull/16775))
+- **`croniter` promoted to a core dependency** ([#17577](https://github.com/NousResearch/hermes-agent/pull/17577))
+- **Discord tool `limit` parameter coerced to int** before `min()` call ([#16319](https://github.com/NousResearch/hermes-agent/pull/16319))
+- **Yuanbao messaging platform entrance fix** ([#16880](https://github.com/NousResearch/hermes-agent/pull/16880))
+- **ACP advertise and forward image prompts** ([#18030](https://github.com/NousResearch/hermes-agent/pull/18030))
+- **DeepSeek / Kimi reasoning content isolation** across cross-provider histories (@Zjianru) ([#15749](https://github.com/NousResearch/hermes-agent/pull/15749), [#15762](https://github.com/NousResearch/hermes-agent/pull/15762))
+- **Preserve reasoning_content replay on DeepSeek v4 + Kimi/Moonshot thinking** ([#18045](https://github.com/NousResearch/hermes-agent/pull/18045))
+
+The vast majority of the 360 fixes landed in the streaming/compression/tool-calling paths across all providers — DeepSeek, Kimi, Moonshot, GLM, Qwen, MiniMax, Gemini, Anthropic, OpenAI — alongside TUI polish (resize, scroll, sticky-prompt) and gateway platform-specific edge cases.
+
+---
+
+## 🧪 Testing & CI
+
+- Hermetic test parity (`scripts/run_tests.sh`) held across this window
+- **Microsoft Teams xdist collision guard** — prevents worker collisions when Teams platform tests run in parallel ([#17828](https://github.com/NousResearch/hermes-agent/pull/17828))
+- Chore: remove unused imports and dead locals (ruff F401, F841) ([#17010](https://github.com/NousResearch/hermes-agent/pull/17010))
+
+---
+
+## 📚 Documentation
+
+- **Curator feature page** added to docs site ([#17563](https://github.com/NousResearch/hermes-agent/pull/17563))
+- **Document pin also blocking `skill_manage` writes** ([#17578](https://github.com/NousResearch/hermes-agent/pull/17578))
+- **Direct-URL skill install documented** across features, reference, guide, and `hermes-agent` skill ([#16355](https://github.com/NousResearch/hermes-agent/pull/16355))
+- **Hooks tutorial — build a BOOT.md startup checklist** (replaces the removed built-in hook) ([#17202](https://github.com/NousResearch/hermes-agent/pull/17202))
+- **ComfyUI docs: ask local vs cloud FIRST before hardware check** ([#17612](https://github.com/NousResearch/hermes-agent/pull/17612))
+- **Obliteratus skill: link YouTube video guide in SKILL.md** ([#15808](https://github.com/NousResearch/hermes-agent/pull/15808))
+- Per-skill docs pages generated for bundled + optional skills; ASCII art code blocks auto-wrapped ([#14929](https://github.com/NousResearch/hermes-agent/pull/14929), [#16497](https://github.com/NousResearch/hermes-agent/pull/16497))
+
+---
+
+## ⚖️ Removed / Reverted
+
+- **Kanban multi-profile collaboration board** — landed in #16081, reverted in ([#16098](https://github.com/NousResearch/hermes-agent/pull/16098)) while the design is reworked
+- **computer-use cua-driver** — 3 preparatory PRs landed then were reverted in ([#16927](https://github.com/NousResearch/hermes-agent/pull/16927))
+- **BOOT.md built-in hook** removed ([#17093](https://github.com/NousResearch/hermes-agent/pull/17093)); the hooks tutorial ([#17202](https://github.com/NousResearch/hermes-agent/pull/17202)) shows how to build the same workflow yourself with a shell hook
+- **`/provider` + `/plan` slash commands dropped** ([#15047](https://github.com/NousResearch/hermes-agent/pull/15047))
+- **`flush_memories` removed entirely** ([#15696](https://github.com/NousResearch/hermes-agent/pull/15696))
+
+---
+
+## 👥 Contributors
+
+### Core
+- **@teknium1** (Teknium)
+
+### Top Community Contributors (by merged PR count since v0.11.0)
+
+- **@OutThisLife** (Brooklyn) — 52 PRs · TUI — light-terminal detection + pluggable busy styles + auto-resume + session-delete from /resume + mouse-wheel scrolling + xterm.js dashboard Chat tab + cold-start cut + accordion polish
+- **@kshitijk4poor** — 12 PRs · LM Studio first-class provider (salvage), Vercel Sandbox backend, GMI Cloud salvage, bundled-by-default touchdesigner-mcp, many tool-call / reasoning fixes
+- **@helix4u** — 10 PRs · MCP schema robustness, assorted stability fixes
+- **@alt-glitch** — 8 PRs · trigram FTS5 CJK search, declarative Nix plugin install, matrix/feishu hints and fixes
+- **@ethernet8023** — 4 PRs
+- **@austinpickett** — 4 PRs · LaTeX rendering in TUI, dashboard layout refresh
+- **@benbarclay** — 3 PRs · Docker run-as-host-user so bind mounts don't get root-owned
+- **@vominh1919** — 2 PRs
+- **@stephenschoettler** — 2 PRs
+- **@kevin-ho** — ConPTY mouse-injection fix (#15488)
+- **@Zjianru** — cross-provider reasoning_content isolation + DeepSeek/Kimi empty-reasoning injection (#15749, #15762)
+- **@web3blind** — Telegram chat allowlists for groups and forums (#15027)
+- **@SHL0MS** — 9 new TouchDesigner-MCP reference docs (#16768)
+- **@0xDevNinja** — curator `restore_skill` nested-archive fix (#17951)
+- **@y0shua1ee** — curator `use` activity fix (#17953)
+
+### Also contributing
+Salvaged or co-authored work from **@isaachuangGMICLOUD** (GMI Cloud), earlier upstream PRs from the original author of each salvage chain, and a long tail of one-shot fixes, documentation nudges, and skill contributions from the community.
+
+### All Contributors (alphabetical, excluding @teknium1)
+
+@0xbyt4, @0xharryriddle, @0xDevNinja, @0z1-ghb, @5park1e, @A-FdL-Prog, @aj-nt, @akhater, @alblez, @alexg0bot,
+@alexzhu0, @AllardQuek, @alt-glitch, @amanning3390, @amanuel2, @AndreKurait, @andrewhosf, @Andy283, @andyylin,
+@angel12, @AntAISecurityLab, @ash, @austinpickett, @badgerbees, @BadTechBandit, @Bartok9, @beenherebefore,
+@beesrsj2500, @BeliefanX, @benbarclay, @benjaminsehl, @BlackishGreen33, @bloodcarter, @BlueBirdBack,
+@briandevans, @brooklynnicholson, @bsgdigital, @buray, @bwjoke, @camaragon, @cdanis, @cgarwood82,
+@charles-brooks, @chen1749144759, @chengoak, @ching-kaching, @Contentment003111, @crayfish-ai, @CruxExperts,
+@cyclingwithelephants, @dandaka, @danklynn, @ddupont808, @dhabibi, @difujia, @dimitrovi, @dlkakbs,
+@dontcallmejames, @EKKOLearnAI, @emozilla, @ericnicolaides, @Erosika, @ethernet8023, @exiao, @Feranmi10,
+@flobo3, @foxion37, @georgeglessner, @georgex8001, @ghostmfr, @H-Ali13381, @HangGlidersRule, @harryplusplus,
+@haru398801, @heathley, @hejuntt1014, @hekaru-agent, @helix4u, @Heltman, @HenkDz, @heyitsaamir, @hharry11,
+@hhhonzik, @hhuang91, @HiddenPuppy, @htsh, @iamagenius00, @in-liberty420, @innocarpe, @irispillars, @iRonin,
+@isaachuangGMICLOUD, @Ito-69, @j3ffffff, @jackjin1997, @jakubkrcmar, @Jason2031, @JayGwod, @jerome-benoit,
+@johnncenae, @Kailigithub, @keiravoss94, @kevin-ho, @knockyai, @konsisumer, @kshitijk4poor, @kunlabs, @l0hde,
+@Leihb, @leoneparise, @LeonSGP43, @liizfq, @liuhao1024, @loongzhao, @lsdsjy, @luyao618, @ma-pony, @Magaav,
+@MagicRay1217, @math0r-be, @MattMaximo, @maxims-oss, @MaxyMoos, @maymuneth, @mcndjxlefnd, @memosr,
+@MestreY0d4-Uninter, @mewwts, @Mirac1eSky, @MorAlekss, @mrhwick, @mrunmayee17, @mssteuer, @Nanako0129,
+@nazirulhafiy, @Nerijusas, @Nicecsh, @nicoloboschi, @nightq, @ningfangbin, @octo-patch, @Octopus,
+@OutThisLife, @Paperclip, @pein892, @perlowja, @prasadus92, @qike-ms, @qiyin-code, @Readon, @ReginaldasR,
+@revaraver, @rfilgueiras, @rmoen, @romanornr, @rugvedS07, @rylena, @samrusani, @Sanjays2402, @sasha-id,
+@Satoshi-agi, @scheidti, @scotttrinh, @season179, @SeeYangZhi, @sgaofen, @shamork, @shannonsands, @SHL0MS,
+@simbam99, @Societus, @socrates1024, @Sonoyunchu, @sprmn24, @stephenschoettler, @tangyuanjc, @TechPrototyper,
+@tekgnosis-net, @ThomassJonax, @tmimmanuel, @tochukwuada, @Tosko4, @Tranquil-Flow, @twozle, @txbxxx,
+@UgwujaGeorge, @Versun, @vlwkaos, @voidborne-d, @vominh1919, @Wang-tianhao, @Wangshengyang2004, @web3blind,
+@westers, @Wysie, @xandersbell, @xiahu88988, @XieNBi, @xinbenlv, @xnbi, @y0shua1ee, @yatesjalex, @yes999zc,
+@yeyitech, @Yoimex, @YueLich, @Yukipukii1, @zhiyanliu, @zicochaos, @Zjianru, @zkl2333, @zons-zhaozhy,
+@ztexydt-cqh.
+
+Also: @Siddharth Balyan, @YuShu.
+
+---
+
+**Full Changelog**: [v2026.4.23...v2026.4.30](https://github.com/NousResearch/hermes-agent/compare/v2026.4.23...v2026.4.30)
@@ -13,6 +13,7 @@ from typing import Any, Deque, Optional
 import acp
 from acp.schema import (
    AgentCapabilities,
+    AgentMessageChunk,
    AuthenticateResponse,
    AvailableCommand,
    AvailableCommandsUpdate,
@@ -30,6 +31,7 @@ from acp.schema import (
    McpServerStdio,
    ModelInfo,
    NewSessionResponse,
+    PromptCapabilities,
    PromptResponse,
    ResumeSessionResponse,
    SetSessionConfigOptionResponse,
@@ -45,6 +47,7 @@ from acp.schema import (
    TextContentBlock,
    UnstructuredCommandInput,
    Usage,
+    UserMessageChunk,
 )

 # AuthMethodAgent was renamed from AuthMethod in agent-client-protocol 0.9.0
@@ -88,17 +91,69 @@ def _extract_text(
        | EmbeddedResourceContentBlock
    ],
 ) -> str:
-    """Extract plain text from ACP content blocks."""
+    """Extract plain text from ACP content blocks for display/commands."""
    parts: list[str] = []
    for block in prompt:
        if isinstance(block, TextContentBlock):
            parts.append(block.text)
        elif hasattr(block, "text"):
            parts.append(str(block.text))
-        # Non-text blocks are ignored for now.
    return "\n".join(parts)


+def _image_block_to_openai_part(block: ImageContentBlock) -> dict[str, Any] | None:
+    """Convert an ACP image content block to OpenAI-style multimodal content."""
+    data = str(getattr(block, "data", "") or "").strip()
+    uri = str(getattr(block, "uri", "") or "").strip()
+    mime_type = str(getattr(block, "mime_type", "") or "image/png").strip() or "image/png"
+
+    if data:
+        url = data if data.startswith("data:") else f"data:{mime_type};base64,{data}"
+    elif uri:
+        url = uri
+    else:
+        return None
+
+    return {"type": "image_url", "image_url": {"url": url}}
+
+
+def _content_blocks_to_openai_user_content(
+    prompt: list[
+        TextContentBlock
+        | ImageContentBlock
+        | AudioContentBlock
+        | ResourceContentBlock
+        | EmbeddedResourceContentBlock
+    ],
+) -> str | list[dict[str, Any]]:
+    """Convert ACP prompt blocks into a Hermes/OpenAI-compatible user content payload."""
+    parts: list[dict[str, Any]] = []
+    text_parts: list[str] = []
+
+    for block in prompt:
+        if isinstance(block, TextContentBlock):
+            if block.text:
+                parts.append({"type": "text", "text": block.text})
+                text_parts.append(block.text)
+            continue
+        if isinstance(block, ImageContentBlock):
+            image_part = _image_block_to_openai_part(block)
+            if image_part is not None:
+                parts.append(image_part)
+            continue
+
+    if not parts:
+        return _extract_text(prompt)
+
+    # Keep pure text prompts as strings so slash-command handling and text-only
+    # providers keep the exact legacy path. Switch to structured content only
+    # when an actual non-text block is present.
+    if all(part.get("type") == "text" for part in parts):
+        return "\n".join(text_parts)
+
+    return parts
+
+
 class HermesACPAgent(acp.Agent):
    """ACP Agent implementation wrapping Hermes AIAgent."""

@@ -109,6 +164,8 @@ class HermesACPAgent(acp.Agent):
        "context": "Show conversation context info",
        "reset": "Clear conversation history",
        "compact": "Compress conversation context",
+        "steer": "Inject guidance into the currently running agent turn",
+        "queue": "Queue a prompt to run after the current turn finishes",
        "version": "Show Hermes version",
    }

@@ -138,6 +195,16 @@ class HermesACPAgent(acp.Agent):
            "name": "compact",
            "description": "Compress conversation context",
        },
+        {
+            "name": "steer",
+            "description": "Inject guidance into the currently running agent turn",
+            "input_hint": "guidance for the active turn",
+        },
+        {
+            "name": "queue",
+            "description": "Queue a prompt to run after the current turn finishes",
+            "input_hint": "prompt to run next",
+        },
        {
            "name": "version",
            "description": "Show Hermes version",
@@ -352,6 +419,7 @@ class HermesACPAgent(acp.Agent):
            agent_info=Implementation(name="hermes-agent", version=HERMES_VERSION),
            agent_capabilities=AgentCapabilities(
                load_session=True,
+                prompt_capabilities=PromptCapabilities(image=True),
                session_capabilities=SessionCapabilities(
                    fork=SessionForkCapabilities(),
                    list=SessionListCapabilities(),
@@ -377,6 +445,78 @@ class HermesACPAgent(acp.Agent):

    # ---- Session management -------------------------------------------------

+    @staticmethod
+    def _history_message_text(message: dict[str, Any]) -> str:
+        """Extract displayable text from a persisted OpenAI-style message."""
+        content = message.get("content")
+        if isinstance(content, str):
+            return content.strip()
+        if isinstance(content, list):
+            parts: list[str] = []
+            for item in content:
+                if isinstance(item, dict):
+                    text = item.get("text")
+                    if isinstance(text, str):
+                        parts.append(text)
+                    elif item.get("type") == "text" and isinstance(item.get("content"), str):
+                        parts.append(item["content"])
+                elif isinstance(item, str):
+                    parts.append(item)
+            return "\n".join(part.strip() for part in parts if part and part.strip()).strip()
+        return ""
+
+    @staticmethod
+    def _history_message_update(
+        *,
+        role: str,
+        text: str,
+    ) -> UserMessageChunk | AgentMessageChunk | None:
+        """Build an ACP history replay update for a user/assistant message."""
+        block = TextContentBlock(type="text", text=text)
+        if role == "user":
+            return UserMessageChunk(
+                session_update="user_message_chunk",
+                content=block,
+            )
+        if role == "assistant":
+            return AgentMessageChunk(
+                session_update="agent_message_chunk",
+                content=block,
+            )
+        return None
+
+    async def _replay_session_history(self, state: SessionState) -> None:
+        """Send persisted user/assistant history to clients during session/load.
+
+        Zed's ACP history UI calls ``session/load`` after the user picks an item
+        from the Agents sidebar. The agent must then replay the full conversation
+        as ``user_message_chunk`` / ``agent_message_chunk`` notifications; merely
+        restoring server-side state makes Hermes remember context, but leaves the
+        editor looking like a clean thread.
+        """
+        if not self._conn or not state.history:
+            return
+
+        for message in state.history:
+            role = str(message.get("role") or "")
+            if role not in {"user", "assistant"}:
+                continue
+            text = self._history_message_text(message)
+            if not text:
+                continue
+            update = self._history_message_update(role=role, text=text)
+            if update is None:
+                continue
+            try:
+                await self._conn.session_update(session_id=state.session_id, update=update)
+            except Exception:
+                logger.warning(
+                    "Failed to replay ACP history for session %s",
+                    state.session_id,
+                    exc_info=True,
+                )
+                return
+
    async def new_session(
        self,
        cwd: str,
@@ -405,6 +545,7 @@ class HermesACPAgent(acp.Agent):
            return None
        await self._register_session_mcp_servers(state, mcp_servers)
        logger.info("Loaded session %s", session_id)
+        await self._replay_session_history(state)
        self._schedule_available_commands_update(session_id)
        return LoadSessionResponse(models=self._build_model_state(state))

@@ -421,12 +562,16 @@ class HermesACPAgent(acp.Agent):
            state = self.session_manager.create_session(cwd=cwd)
        await self._register_session_mcp_servers(state, mcp_servers)
        logger.info("Resumed session %s", state.session_id)
+        await self._replay_session_history(state)
        self._schedule_available_commands_update(state.session_id)
        return ResumeSessionResponse(models=self._build_model_state(state))

    async def cancel(self, session_id: str, **kwargs: Any) -> None:
        state = self.session_manager.get_session(session_id)
        if state and state.cancel_event:
+            with state.runtime_lock:
+                if state.is_running and state.current_prompt_text:
+                    state.interrupted_prompt_text = state.current_prompt_text
            state.cancel_event.set()
            try:
                if getattr(state, "agent", None) and hasattr(state.agent, "interrupt"):
@@ -517,11 +662,51 @@ class HermesACPAgent(acp.Agent):
            return PromptResponse(stop_reason="refusal")

        user_text = _extract_text(prompt).strip()
-        if not user_text:
+        user_content = _content_blocks_to_openai_user_content(prompt)
+        has_content = bool(user_text) or (
+            isinstance(user_content, list) and bool(user_content)
+        )
+        if not has_content:
            return PromptResponse(stop_reason="end_turn")

-        # Intercept slash commands — handle locally without calling the LLM
-        if user_text.startswith("/"):
+        # /steer on an idle session has no in-flight tool call to inject into.
+        # Rewrite it so the payload runs as a normal user prompt, matching the
+        # gateway's behavior (gateway/run.py ~L4898). Two sub-cases:
+        #   1. Zed-interrupt salvage — a prior prompt was cancelled by the
+        #      client right before /steer arrived; replay it with the steer
+        #      text attached as explicit correction/guidance so the user's
+        #      in-flight work isn't lost.
+        #   2. Plain idle — no prior work to salvage; just run the steer
+        #      payload as a regular prompt. Without this, _cmd_steer would
+        #      silently append to state.queued_prompts and respond with
+        #      "No active turn — queued for the next turn", which looks like
+        #      /queue even though the user never typed /queue.
+        if isinstance(user_content, str) and user_text.startswith("/steer"):
+            steer_text = user_text.split(maxsplit=1)[1].strip() if len(user_text.split(maxsplit=1)) > 1 else ""
+            interrupted_prompt = ""
+            rewrite_idle = False
+            with state.runtime_lock:
+                if not state.is_running and steer_text:
+                    if state.interrupted_prompt_text:
+                        interrupted_prompt = state.interrupted_prompt_text
+                        state.interrupted_prompt_text = ""
+                    else:
+                        rewrite_idle = True
+            if interrupted_prompt:
+                user_text = (
+                    f"{interrupted_prompt}\n\n"
+                    f"User correction/guidance after interrupt: {steer_text}"
+                )
+                user_content = user_text
+            elif rewrite_idle:
+                user_text = steer_text
+                user_content = steer_text
+
+        # Intercept slash commands — handle locally without calling the LLM.
+        # Slash commands are text-only; if the client included images/resources,
+        # send the whole multimodal prompt to the agent instead of treating it as
+        # an ACP command.
+        if isinstance(user_content, str) and user_text.startswith("/"):
            response_text = self._handle_slash_command(user_text, state)
            if response_text is not None:
                if self._conn:
@@ -529,6 +714,24 @@ class HermesACPAgent(acp.Agent):
                    await self._conn.session_update(session_id, update)
                return PromptResponse(stop_reason="end_turn")

+        # If Zed sends another regular prompt while the same ACP session is
+        # still running, queue it instead of racing two AIAgent loops against
+        # the same state.history. /steer and /queue are handled above and can
+        # land immediately.
+        with state.runtime_lock:
+            if state.is_running:
+                queued_text = user_text or "[Image attachment]"
+                state.queued_prompts.append(queued_text)
+                depth = len(state.queued_prompts)
+                if self._conn:
+                    update = acp.update_agent_message_text(
+                        f"Queued for the next turn. ({depth} queued)"
+                    )
+                    await self._conn.session_update(session_id, update)
+                return PromptResponse(stop_reason="end_turn")
+            state.is_running = True
+            state.current_prompt_text = user_text or "[Image attachment]"
+
        logger.info("Prompt on session %s: %s", session_id, user_text[:100])

        conn = self._conn
@@ -604,9 +807,10 @@ class HermesACPAgent(acp.Agent):
            os.environ["HERMES_INTERACTIVE"] = "1"
            try:
                result = agent.run_conversation(
-                    user_message=user_text,
+                    user_message=user_content,
                    conversation_history=state.history,
                    task_id=session_id,
+                    persist_user_message=user_text or "[Image attachment]",
                )
                return result
            except Exception as e:
@@ -639,6 +843,9 @@ class HermesACPAgent(acp.Agent):
            result = await loop.run_in_executor(_executor, ctx.run, _run_agent)
        except Exception:
            logger.exception("Executor error for session %s", session_id)
+            with state.runtime_lock:
+                state.is_running = False
+                state.current_prompt_text = ""
            return PromptResponse(stop_reason="end_turn")

        if result.get("messages"):
@@ -664,6 +871,28 @@ class HermesACPAgent(acp.Agent):
            update = acp.update_agent_message_text(final_response)
            await conn.session_update(session_id, update)

+        # Mark this turn idle before draining queued work so recursive prompt()
+        # calls can acquire the session. Queued turns are intentionally run as
+        # normal follow-up user prompts, preserving role alternation and history.
+        with state.runtime_lock:
+            state.is_running = False
+            state.current_prompt_text = ""
+
+        while True:
+            with state.runtime_lock:
+                if not state.queued_prompts:
+                    break
+                next_prompt = state.queued_prompts.pop(0)
+            if conn:
+                await conn.session_update(
+                    session_id,
+                    acp.update_user_message_text(next_prompt),
+                )
+            await self.prompt(
+                prompt=[TextContentBlock(type="text", text=next_prompt)],
+                session_id=session_id,
+            )
+
        usage = None
        if any(result.get(key) is not None for key in ("prompt_tokens", "completion_tokens", "total_tokens")):
            usage = Usage(
@@ -741,6 +970,8 @@ class HermesACPAgent(acp.Agent):
            "context": self._cmd_context,
            "reset": self._cmd_reset,
            "compact": self._cmd_compact,
+            "steer": self._cmd_steer,
+            "queue": self._cmd_queue,
            "version": self._cmd_version,
        }.get(cmd)

@@ -837,10 +1068,16 @@ class HermesACPAgent(acp.Agent):
            if not hasattr(agent, "_compress_context"):
                return "Context compression not available for this agent."

-            from agent.model_metadata import estimate_messages_tokens_rough
+            from agent.model_metadata import estimate_request_tokens_rough

            original_count = len(state.history)
-            approx_tokens = estimate_messages_tokens_rough(state.history)
+            # Include system prompt + tool schemas so the figure reflects real
+            # request pressure, not a transcript-only underestimate (#6217).
+            _sys_prompt = getattr(agent, "_cached_system_prompt", "") or ""
+            _tools = getattr(agent, "tools", None) or None
+            approx_tokens = estimate_request_tokens_rough(
+                state.history, system_prompt=_sys_prompt, tools=_tools
+            )
            original_session_db = getattr(agent, "_session_db", None)

            try:
@@ -860,7 +1097,13 @@ class HermesACPAgent(acp.Agent):
            self.session_manager.save_session(state.session_id)

            new_count = len(state.history)
-            new_tokens = estimate_messages_tokens_rough(state.history)
+            _sys_prompt_after = getattr(agent, "_cached_system_prompt", "") or _sys_prompt
+            _tools_after = getattr(agent, "tools", None) or _tools
+            new_tokens = estimate_request_tokens_rough(
+                state.history,
+                system_prompt=_sys_prompt_after,
+                tools=_tools_after,
+            )
            return (
                f"Context compressed: {original_count} -> {new_count} messages\n"
                f"~{approx_tokens:,} -> ~{new_tokens:,} tokens"
@@ -868,6 +1111,34 @@ class HermesACPAgent(acp.Agent):
        except Exception as e:
            return f"Compression failed: {e}"

+    def _cmd_steer(self, args: str, state: SessionState) -> str:
+        steer_text = args.strip()
+        if not steer_text:
+            return "Usage: /steer <guidance>"
+
+        if state.is_running and hasattr(state.agent, "steer"):
+            try:
+                if state.agent.steer(steer_text):
+                    preview = steer_text[:80] + ("..." if len(steer_text) > 80 else "")
+                    return f"⏩ Steer queued for the active turn: {preview}"
+            except Exception as exc:
+                logger.warning("ACP steer failed for session %s: %s", state.session_id, exc)
+                return f"⚠️ Steer failed: {exc}"
+
+        with state.runtime_lock:
+            state.queued_prompts.append(steer_text)
+            depth = len(state.queued_prompts)
+        return f"No active turn — queued for the next turn. ({depth} queued)"
+
+    def _cmd_queue(self, args: str, state: SessionState) -> str:
+        queued_text = args.strip()
+        if not queued_text:
+            return "Usage: /queue <prompt>"
+        with state.runtime_lock:
+            state.queued_prompts.append(queued_text)
+            depth = len(state.queued_prompts)
+        return f"Queued for the next turn. ({depth} queued)"
+
    def _cmd_version(self, args: str, state: SessionState) -> str:
        return f"Hermes Agent v{HERMES_VERSION}"

@@ -26,6 +26,33 @@ from typing import Any, Dict, List, Optional
 logger = logging.getLogger(__name__)


+def _win_path_to_wsl(path: str) -> str | None:
+    """Convert a Windows drive path to its WSL /mnt/<drive>/... equivalent."""
+    match = re.match(r"^([A-Za-z]):[\\/](.*)$", path)
+    if not match:
+        return None
+    drive = match.group(1).lower()
+    tail = match.group(2).replace("\\", "/")
+    return f"/mnt/{drive}/{tail}"
+
+
+def _translate_acp_cwd(cwd: str) -> str:
+    """Translate Windows ACP cwd values when Hermes itself is running in WSL.
+
+    Windows ACP clients can launch ``hermes acp`` inside WSL while still sending
+    editor workspaces as Windows drive paths such as ``E:\\Projects``. Store
+    and execute against the WSL mount path so agents, tools, and persisted ACP
+    sessions all agree on the usable workspace. Native Linux/macOS keeps the
+    original cwd unchanged.
+    """
+    from hermes_constants import is_wsl
+
+    if not is_wsl():
+        return cwd
+    translated = _win_path_to_wsl(str(cwd))
+    return translated if translated is not None else cwd
+
+
 def _normalize_cwd_for_compare(cwd: str | None) -> str:
    raw = str(cwd or ".").strip()
    if not raw:
@@ -34,11 +61,9 @@ def _normalize_cwd_for_compare(cwd: str | None) -> str:

    # Normalize Windows drive paths into the equivalent WSL mount form so
    # ACP history filters match the same workspace across Windows and WSL.
-    match = re.match(r"^([A-Za-z]):[\\/](.*)$", expanded)
-    if match:
-        drive = match.group(1).lower()
-        tail = match.group(2).replace("\\", "/")
-        expanded = f"/mnt/{drive}/{tail}"
+    translated = _win_path_to_wsl(expanded)
+    if translated is not None:
+        expanded = translated
    elif re.match(r"^/mnt/[A-Za-z]/", expanded):
        expanded = f"/mnt/{expanded[5].lower()}/{expanded[7:]}"

@@ -96,12 +121,18 @@ def _acp_stderr_print(*args, **kwargs) -> None:


 def _register_task_cwd(task_id: str, cwd: str) -> None:
-    """Bind a task/session id to the editor's working directory for tools."""
+    """Bind a task/session id to the editor's working directory for tools.
+
+    Zed can launch Hermes from a Windows workspace while the ACP process runs
+    inside WSL. In that case ACP sends cwd as e.g. ``E:\\Projects\\POTI``;
+    local tools need the WSL mount equivalent or subprocess creation fails
+    before the command can run.
+    """
    if not task_id:
        return
    try:
        from tools.terminal_tool import register_task_env_overrides
-        register_task_env_overrides(task_id, {"cwd": cwd})
+        register_task_env_overrides(task_id, {"cwd": _translate_acp_cwd(cwd)})
    except Exception:
        logger.debug("Failed to register ACP task cwd override", exc_info=True)

@@ -145,6 +176,11 @@ class SessionState:
    model: str = ""
    history: List[Dict[str, Any]] = field(default_factory=list)
    cancel_event: Any = None  # threading.Event
+    is_running: bool = False
+    queued_prompts: List[str] = field(default_factory=list)
+    runtime_lock: Any = field(default_factory=Lock)
+    current_prompt_text: str = ""
+    interrupted_prompt_text: str = ""


 class SessionManager:
@@ -175,6 +211,7 @@ class SessionManager:
        """Create a new session with a unique ID and a fresh AIAgent."""
        import threading

+        cwd = _translate_acp_cwd(cwd)
        session_id = str(uuid.uuid4())
        agent = self._make_agent(session_id=session_id, cwd=cwd)
        state = SessionState(
@@ -217,6 +254,7 @@ class SessionManager:
        """Deep-copy a session's history into a new session."""
        import threading

+        cwd = _translate_acp_cwd(cwd)
        original = self.get_session(session_id)  # checks DB too
        if original is None:
            return None
@@ -318,6 +356,7 @@ class SessionManager:

    def update_cwd(self, session_id: str, cwd: str) -> Optional[SessionState]:
        """Update the working directory for a session and its tool overrides."""
+        cwd = _translate_acp_cwd(cwd)
        state = self.get_session(session_id)  # checks DB too
        if state is None:
            return None
@@ -20,7 +20,7 @@ from pathlib import Path

 from hermes_constants import get_hermes_home
 from typing import Any, Dict, List, Optional, Tuple
-from utils import normalize_proxy_env_vars
+from utils import base_url_host_matches, normalize_proxy_env_vars

 # NOTE: `import anthropic` is deliberately NOT at module top — the SDK pulls
 # ~220 ms of imports (anthropic.types, anthropic.lib.tools._beta_runner, etc.)
@@ -257,11 +257,10 @@ _OAUTH_ONLY_BETAS = [
    "oauth-2025-04-20",
 ]

-# Claude Code version — sent on OAuth token-exchange / refresh requests
-# (platform.claude.com/v1/oauth/token) as the client's user-agent. Anthropic's
-# OAuth flow validates the UA and may reject requests with a version that's
-# too old, so detecting dynamically keeps users on a current Claude Code
-# install from hitting stale-version errors during login/refresh.
+# Claude Code identity — required for OAuth requests to be routed correctly.
+# Without these, Anthropic's infrastructure intermittently 500s OAuth traffic.
+# The version must stay reasonably current — Anthropic rejects OAuth requests
+# when the spoofed user-agent version is too far behind the actual release.
 _CLAUDE_CODE_VERSION_FALLBACK = "2.1.74"
 _claude_code_version_cache: Optional[str] = None

@@ -269,9 +268,9 @@ _claude_code_version_cache: Optional[str] = None
 def _detect_claude_code_version() -> str:
    """Detect the installed Claude Code version, fall back to a static constant.

-    Used only by the OAuth token-exchange / refresh flow
-    (``platform.claude.com/v1/oauth/token``). The Messages API client no
-    longer sends a claude-cli user-agent.
+    Anthropic's OAuth infrastructure validates the user-agent version and may
+    reject requests with a version that's too old.  Detecting dynamically means
+    users who keep Claude Code updated never hit stale-version 400s.
    """
    import subprocess as _sp

@@ -291,13 +290,12 @@ def _detect_claude_code_version() -> str:
    return _CLAUDE_CODE_VERSION_FALLBACK


-def _get_claude_code_version() -> str:
-    """Lazily detect the installed Claude Code version for OAuth flow headers.
+_CLAUDE_CODE_SYSTEM_PREFIX = "You are Claude Code, Anthropic's official CLI for Claude."
+_MCP_TOOL_PREFIX = "mcp_"

-    Used only on the OAuth token-exchange and refresh endpoints
-    (``platform.claude.com/v1/oauth/token``). The Messages API client does
-    not send a claude-cli user-agent.
-    """
+
+def _get_claude_code_version() -> str:
+    """Lazily detect the installed Claude Code version when OAuth headers need it."""
    global _claude_code_version_cache
    if _claude_code_version_cache is None:
        _claude_code_version_cache = _detect_claude_code_version()
@@ -367,6 +365,88 @@ def _is_kimi_coding_endpoint(base_url: str | None) -> bool:
    return normalized.rstrip("/").lower().startswith("https://api.kimi.com/coding")


+# Model-name prefixes that identify the Kimi / Moonshot family.  Covers
+# - official slugs: ``kimi-k2.5``, ``kimi_thinking``, ``moonshot-v1-8k``
+# - common release lines: ``k1.5-...``, ``k2-thinking``, ``k25-...``, ``k2.5-...``
+# Matched case-insensitively against the post-``normalize_model_name`` form,
+# so a caller's ``provider/vendor/model`` slug is handled the same as a
+# bare name.
+_KIMI_FAMILY_MODEL_PREFIXES = (
+    "kimi-", "kimi_",
+    "moonshot-", "moonshot_",
+    "k1.", "k1-",
+    "k2.", "k2-",
+    "k25", "k2.5",
+)
+
+
+def _model_name_is_kimi_family(model: str | None) -> bool:
+    if not isinstance(model, str):
+        return False
+    m = model.strip().lower()
+    if not m:
+        return False
+    # Strip vendor prefix (e.g. ``moonshotai/kimi-k2.5`` → ``kimi-k2.5``)
+    if "/" in m:
+        m = m.rsplit("/", 1)[-1]
+    return m.startswith(_KIMI_FAMILY_MODEL_PREFIXES)
+
+
+def _is_kimi_family_endpoint(base_url: str | None, model: str | None = None) -> bool:
+    """Return True for any Kimi / Moonshot Anthropic-Messages-speaking endpoint.
+
+    Broader than ``_is_kimi_coding_endpoint`` — matches:
+
+    - Kimi's official ``/coding`` URL (legacy check, preserved)
+    - Any ``api.kimi.com`` / ``moonshot.ai`` / ``moonshot.cn`` host
+    - Custom or proxied endpoints whose *model* name is in the Kimi / Moonshot
+      family (``kimi-*``, ``moonshot-*``, ``k1.*``, ``k2.*``, …).  Users with
+      ``api_mode: anthropic_messages`` on a private gateway fronting Kimi
+      fall into this branch — the upstream still enforces Kimi's thinking
+      semantics (reasoning_content required on every replayed tool-call
+      message) regardless of the gateway's hostname.
+
+    Used to decide whether to drop Anthropic's ``thinking`` kwarg and to
+    preserve unsigned reasoning_content-derived thinking blocks on replay.
+    See hermes-agent#13848, #17057.
+    """
+    if _is_kimi_coding_endpoint(base_url):
+        return True
+    for _domain in ("api.kimi.com", "moonshot.ai", "moonshot.cn"):
+        if base_url_host_matches(base_url or "", _domain):
+            return True
+    if _model_name_is_kimi_family(model):
+        return True
+    return False
+
+
+def _is_deepseek_anthropic_endpoint(base_url: str | None) -> bool:
+    """Return True for DeepSeek's Anthropic-compatible endpoint.
+
+    DeepSeek's ``/anthropic`` route speaks the Anthropic Messages protocol
+    but, when thinking mode is enabled, requires the ``thinking`` blocks
+    from prior assistant turns to round-trip on subsequent requests — the
+    generic third-party path strips them and triggers HTTP 400::
+
+        The content[].thinking in the thinking mode must be passed back
+        to the API.
+
+    Per DeepSeek's published compatibility matrix the blocks are unsigned
+    (no Anthropic-proprietary signature, no ``redacted_thinking`` support),
+    so this endpoint is handled with the same strip-signed / keep-unsigned
+    policy used for Kimi's ``/coding`` endpoint.  The match is pinned to
+    the ``/anthropic`` path so the OpenAI-compatible ``api.deepseek.com``
+    base URL (which never reaches this adapter) is not misclassified.
+    See hermes-agent#16748.
+    """
+    if not base_url_host_matches(base_url or "", "api.deepseek.com"):
+        return False
+    normalized = _normalize_base_url_text(base_url)
+    if not normalized:
+        return False
+    return "/anthropic" in normalized.rstrip("/").lower()
+
+
 def _requires_bearer_auth(base_url: str | None) -> bool:
    """Return True for Anthropic-compatible providers that require Bearer auth.

@@ -381,7 +461,11 @@ def _requires_bearer_auth(base_url: str | None) -> bool:
    return normalized.startswith(("https://api.minimax.io/anthropic", "https://api.minimaxi.com/anthropic"))


-def _common_betas_for_base_url(base_url: str | None) -> list[str]:
+def _common_betas_for_base_url(
+    base_url: str | None,
+    *,
+    drop_context_1m_beta: bool = False,
+) -> list[str]:
    """Return the beta headers that are safe for the configured endpoint.

    MiniMax's Anthropic-compatible endpoints (Bearer-auth) reject requests
@@ -392,14 +476,30 @@ def _common_betas_for_base_url(base_url: str | None) -> list[str]:
    The ``context-1m-2025-08-07`` beta is also stripped for Bearer-auth
    endpoints — MiniMax hosts its own models, not Claude, so the header is
    irrelevant at best and risks request rejection at worst.
+
+    ``drop_context_1m_beta=True`` additionally strips the 1M-context beta on
+    otherwise-unrelated endpoints. The OAuth retry path flips this flag after
+    a subscription rejects the beta with
+    "The long context beta is not yet available for this subscription" so
+    subsequent requests in the same session don't repeat the probe. See the
+    reactive recovery loop in ``run_agent.py`` and issue-comment history on
+    PR #17680 for the full rationale.
    """
    if _requires_bearer_auth(base_url):
        _stripped = {_TOOL_STREAMING_BETA, _CONTEXT_1M_BETA}
        return [b for b in _COMMON_BETAS if b not in _stripped]
+    if drop_context_1m_beta:
+        return [b for b in _COMMON_BETAS if b != _CONTEXT_1M_BETA]
    return _COMMON_BETAS


-def build_anthropic_client(api_key: str, base_url: str = None, timeout: float = None):
+def build_anthropic_client(
+    api_key: str,
+    base_url: str = None,
+    timeout: float = None,
+    *,
+    drop_context_1m_beta: bool = False,
+):
    """Create an Anthropic client, auto-detecting setup-tokens vs API keys.

    If *timeout* is provided it overrides the default 900s read timeout.  The
@@ -408,6 +508,12 @@ def build_anthropic_client(api_key: str, base_url: str = None, timeout: float =
    Anthropic-compatible providers respect the same knob as OpenAI-wire
    providers.

+    ``drop_context_1m_beta=True`` strips ``context-1m-2025-08-07`` from the
+    client-level ``anthropic-beta`` header. Used by the reactive OAuth retry
+    path in ``run_agent.py`` when a subscription rejects the beta; leave at
+    its default on fresh clients so 1M-capable subscriptions keep the
+    capability.
+
    Returns an anthropic.Anthropic instance.
    """
    _anthropic_sdk = _get_anthropic_sdk()
@@ -437,7 +543,10 @@ def build_anthropic_client(api_key: str, base_url: str = None, timeout: float =
            kwargs["default_query"] = {"api-version": "2025-04-15"}
        else:
            kwargs["base_url"] = normalized_base_url
-    common_betas = _common_betas_for_base_url(normalized_base_url)
+    common_betas = _common_betas_for_base_url(
+        normalized_base_url,
+        drop_context_1m_beta=drop_context_1m_beta,
+    )

    if _is_kimi_coding_endpoint(base_url):
        # Kimi's /coding endpoint requires User-Agent: claude-code/0.1.0
@@ -467,21 +576,15 @@ def build_anthropic_client(api_key: str, base_url: str = None, timeout: float =
        if common_betas:
            kwargs["default_headers"] = {"anthropic-beta": ",".join(common_betas)}
    elif _is_oauth_token(api_key):
-        # OAuth access token / setup-token → Bearer auth + OAuth-only betas.
-        # The OAuth-specific beta headers are still required by Anthropic's
-        # OAuth-gated Messages API path; the Claude Code user-agent / x-app
-        # spoofing is deliberately NOT sent — Hermes identifies as itself.
-        #
-        # ``context-1m-2025-08-07`` is stripped here: Anthropic rejects
-        # OAuth requests that carry it with
-        #   "This authentication style is incompatible with the long
-        #    context beta header."
-        # Subscription-gated OAuth traffic gets the 200K default window.
-        oauth_safe_common = [b for b in common_betas if b != _CONTEXT_1M_BETA]
-        all_betas = oauth_safe_common + _OAUTH_ONLY_BETAS
+        # OAuth access token / setup-token → Bearer auth + Claude Code identity.
+        # Anthropic routes OAuth requests based on user-agent and headers;
+        # without Claude Code's fingerprint, requests get intermittent 500s.
+        all_betas = common_betas + _OAUTH_ONLY_BETAS
        kwargs["auth_token"] = api_key
        kwargs["default_headers"] = {
            "anthropic-beta": ",".join(all_betas),
+            "user-agent": f"claude-cli/{_get_claude_code_version()} (external, cli)",
+            "x-app": "cli",
        }
    else:
        # Regular API key → x-api-key header + common betas
@@ -825,45 +928,17 @@ def resolve_anthropic_token() -> Optional[str]:
    """Resolve an Anthropic token from all available sources.

    Priority:
-      1. Hermes credential pool (``~/.hermes/auth.json`` →
-         ``credential_pool.anthropic``) — OAuth tokens minted by Hermes'
-         own PKCE login flow. Entries are auto-refreshed when near
-         expiry. Env-sourced pool entries (``source="env:..."``) are
-         skipped here so the env-var priority logic below still runs.
-      2. ANTHROPIC_TOKEN env var (OAuth/setup token saved by Hermes)
-      3. CLAUDE_CODE_OAUTH_TOKEN env var
-      4. Claude Code credentials (~/.claude.json or ~/.claude/.credentials.json)
+      1. ANTHROPIC_TOKEN env var (OAuth/setup token saved by Hermes)
+      2. CLAUDE_CODE_OAUTH_TOKEN env var
+      3. Claude Code credentials (~/.claude.json or ~/.claude/.credentials.json)
         — with automatic refresh if expired and a refresh token is available
-      5. ANTHROPIC_API_KEY env var (regular API key, or legacy fallback)
+      4. ANTHROPIC_API_KEY env var (regular API key, or legacy fallback)

    Returns the token string or None.
    """
-    # 1. Hermes credential pool — the live source of truth for tokens
-    #    minted via ``hermes login anthropic`` / the dashboard PKCE flow.
-    #    ``select()`` picks the best available entry and refreshes it if
-    #    it's near expiry, so callers always get a fresh token.
-    #
-    #    Skip env-sourced pool entries (``env:ANTHROPIC_TOKEN``, etc.) —
-    #    those are passthroughs of the env var, and the env-var branches
-    #    below have richer priority logic (``_prefer_refreshable_claude_code_token``)
-    #    that can upgrade a static env OAuth token to a refreshed
-    #    Claude Code token. Letting the pool win here would short-circuit
-    #    that upgrade.
-    try:
-        from agent.credential_pool import load_pool
-        pool = load_pool("anthropic")
-        entry = pool.select()
-        if entry and entry.access_token and not entry.source.startswith("env:"):
-            return entry.access_token
-    except Exception as exc:
-        # Pool lookup is best-effort — fall through to env/file sources
-        # if anything goes wrong (e.g. auth.json corruption during a
-        # concurrent write).
-        logger.debug("Credential-pool lookup failed for anthropic: %s", exc)
-
    creds = read_claude_code_credentials()

-    # 2. Hermes-managed OAuth/setup token env var
+    # 1. Hermes-managed OAuth/setup token env var
    token = os.getenv("ANTHROPIC_TOKEN", "").strip()
    if token:
        preferred = _prefer_refreshable_claude_code_token(token, creds)
@@ -871,7 +946,7 @@ def resolve_anthropic_token() -> Optional[str]:
            return preferred
        return token

-    # 3. CLAUDE_CODE_OAUTH_TOKEN (used by Claude Code for setup-tokens)
+    # 2. CLAUDE_CODE_OAUTH_TOKEN (used by Claude Code for setup-tokens)
    cc_token = os.getenv("CLAUDE_CODE_OAUTH_TOKEN", "").strip()
    if cc_token:
        preferred = _prefer_refreshable_claude_code_token(cc_token, creds)
@@ -879,12 +954,12 @@ def resolve_anthropic_token() -> Optional[str]:
            return preferred
        return cc_token

-    # 4. Claude Code credential file
+    # 3. Claude Code credential file
    resolved_claude_token = _resolve_claude_code_token_from_credentials(creds)
    if resolved_claude_token:
        return resolved_claude_token

-    # 5. Regular API key, or a legacy OAuth token saved in ANTHROPIC_API_KEY.
+    # 4. Regular API key, or a legacy OAuth token saved in ANTHROPIC_API_KEY.
    # This remains as a compatibility fallback for pre-migration Hermes configs.
    api_key = os.getenv("ANTHROPIC_API_KEY", "").strip()
    if api_key:
@@ -1112,9 +1187,12 @@ def normalize_model_name(model: str, preserve_dots: bool = False) -> str:
        # These must not be converted to hyphens.  See issue #12295.
        if _is_bedrock_model_id(model):
            return model
-        # OpenRouter uses dots for version separators (claude-opus-4.6),
-        # Anthropic uses hyphens (claude-opus-4-6). Convert dots to hyphens.
-        model = model.replace(".", "-")
+        # Only convert dots to hyphens for Anthropic/Claude models.
+        # Non-Anthropic models (gpt-5.4, gemini-2.5, etc.) use dots
+        # as part of their canonical names.  See issue #17171.
+        _lower = model.lower()
+        if _lower.startswith("claude-") or _lower.startswith("anthropic/"):
+            model = model.replace(".", "-")
    return model


@@ -1301,6 +1379,7 @@ def _convert_content_to_anthropic(content: Any) -> Any:
 def convert_messages_to_anthropic(
    messages: List[Dict],
    base_url: str | None = None,
+    model: str | None = None,
 ) -> Tuple[Optional[Any], List[Dict]]:
    """Convert OpenAI-format messages to Anthropic format.

@@ -1312,6 +1391,12 @@ def convert_messages_to_anthropic(
    endpoint, all thinking block signatures are stripped.  Signatures are
    Anthropic-proprietary — third-party endpoints cannot validate them and will
    reject them with HTTP 400 "Invalid signature in thinking block".
+
+    When *model* is provided and matches the Kimi / Moonshot family (or
+    *base_url* is a Kimi / Moonshot host), unsigned thinking blocks
+    synthesised from ``reasoning_content`` are preserved on replayed
+    assistant tool-call messages — Kimi requires the field to exist, even
+    if empty.
    """
    system = None
    result = []
@@ -1540,7 +1625,16 @@ def convert_messages_to_anthropic(
    #    cache markers can interfere with signature validation.
    _THINKING_TYPES = frozenset(("thinking", "redacted_thinking"))
    _is_third_party = _is_third_party_anthropic_endpoint(base_url)
-    _is_kimi = _is_kimi_coding_endpoint(base_url)
+    # Kimi /coding and DeepSeek /anthropic share a contract: both speak the
+    # Anthropic Messages protocol upstream but require that thinking blocks
+    # synthesised from reasoning_content round-trip on subsequent turns when
+    # thinking is enabled.  Signed Anthropic blocks still have to be stripped
+    # (neither endpoint can validate Anthropic's signatures); unsigned blocks
+    # are preserved.  See hermes-agent#13848 (Kimi) and #16748 (DeepSeek).
+    _preserve_unsigned_thinking = (
+        _is_kimi_family_endpoint(base_url, model)
+        or _is_deepseek_anthropic_endpoint(base_url)
+    )

    last_assistant_idx = None
    for i in range(len(result) - 1, -1, -1):
@@ -1552,22 +1646,22 @@ def convert_messages_to_anthropic(
        if m.get("role") != "assistant" or not isinstance(m.get("content"), list):
            continue

-        if _is_kimi:
-            # Kimi's /coding endpoint enables thinking server-side and
-            # requires unsigned thinking blocks on replayed assistant
-            # tool-call messages.  Strip signed Anthropic blocks (Kimi
-            # can't validate signatures) but preserve the unsigned ones
-            # we synthesised from reasoning_content above.
+        if _preserve_unsigned_thinking:
+            # Kimi's /coding and DeepSeek's /anthropic endpoints both enable
+            # thinking server-side and require unsigned thinking blocks on
+            # replayed assistant tool-call messages.  Strip signed Anthropic
+            # blocks (neither upstream can validate Anthropic signatures) but
+            # preserve the unsigned ones we synthesised from reasoning_content.
            new_content = []
            for b in m["content"]:
                if not isinstance(b, dict) or b.get("type") not in _THINKING_TYPES:
                    new_content.append(b)
                    continue
                if b.get("signature") or b.get("data"):
-                    # Anthropic-signed block — Kimi can't validate, strip
+                    # Anthropic-signed block — upstream can't validate, strip
                    continue
                # Unsigned thinking (synthesised from reasoning_content) —
-                # keep it: Kimi needs it for message-history validation.
+                # keep it: the upstream needs it for message-history validation.
                new_content.append(b)
            m["content"] = new_content or [{"type": "text", "text": "(empty)"}]
        elif _is_third_party or idx != last_assistant_idx:
@@ -1624,6 +1718,7 @@ def build_anthropic_kwargs(
    context_length: Optional[int] = None,
    base_url: str | None = None,
    fast_mode: bool = False,
+    drop_context_1m_beta: bool = False,
 ) -> Dict[str, Any]:
    """Build kwargs for anthropic.messages.create().

@@ -1649,10 +1744,8 @@ def build_anthropic_kwargs(
    "max_tokens too large given prompt" errors and retry with a smaller cap
    (see parse_available_output_tokens_from_error + _ephemeral_max_output_tokens).

-    When *is_oauth* is True, enables the OAuth-only beta headers required by
-    Anthropic's subscription-gated Messages endpoint (fast-mode branch only;
-    the default headers are set by build_anthropic_client). No system-prompt
-    or tool-name rewriting is performed — Hermes identifies as itself.
+    When *is_oauth* is True, applies Claude Code compatibility transforms:
+    system prompt prefix, tool name prefixing, and prompt sanitization.

    When *preserve_dots* is True, model name dots are not converted to hyphens
    (for Alibaba/DashScope anthropic-compatible endpoints: qwen3.5-plus).
@@ -1665,7 +1758,9 @@ def build_anthropic_kwargs(
    Currently only supported on native Anthropic endpoints (not third-party
    compatible ones).
    """
-    system, anthropic_messages = convert_messages_to_anthropic(messages, base_url=base_url)
+    system, anthropic_messages = convert_messages_to_anthropic(
+        messages, base_url=base_url, model=model
+    )
    anthropic_tools = convert_tools_to_anthropic(tools) if tools else []

    model = normalize_model_name(model, preserve_dots=preserve_dots)
@@ -1685,11 +1780,45 @@ def build_anthropic_kwargs(
    if context_length and effective_max_tokens > context_length:
        effective_max_tokens = max(context_length - 1, 1)

-    # OAuth requests go through Anthropic's subscription-gated Messages
-    # endpoint but otherwise send the real Hermes system prompt and real
-    # Hermes tool names — the only OAuth-specific wire differences are
-    # Bearer auth and the _OAUTH_ONLY_BETAS header (applied in
-    # build_anthropic_client and the fast-mode branch below).
+    # ── OAuth: Claude Code identity ──────────────────────────────────
+    if is_oauth:
+        # 1. Prepend Claude Code system prompt identity
+        cc_block = {"type": "text", "text": _CLAUDE_CODE_SYSTEM_PREFIX}
+        if isinstance(system, list):
+            system = [cc_block] + system
+        elif isinstance(system, str) and system:
+            system = [cc_block, {"type": "text", "text": system}]
+        else:
+            system = [cc_block]
+
+        # 2. Sanitize system prompt — replace product name references
+        #    to avoid Anthropic's server-side content filters.
+        for block in system:
+            if isinstance(block, dict) and block.get("type") == "text":
+                text = block.get("text", "")
+                text = text.replace("Hermes Agent", "Claude Code")
+                text = text.replace("Hermes agent", "Claude Code")
+                text = text.replace("hermes-agent", "claude-code")
+                text = text.replace("Nous Research", "Anthropic")
+                block["text"] = text
+
+        # 3. Prefix tool names with mcp_ (Claude Code convention)
+        if anthropic_tools:
+            for tool in anthropic_tools:
+                if "name" in tool:
+                    tool["name"] = _MCP_TOOL_PREFIX + tool["name"]
+
+        # 4. Prefix tool names in message history (tool_use and tool_result blocks)
+        for msg in anthropic_messages:
+            content = msg.get("content")
+            if isinstance(content, list):
+                for block in content:
+                    if isinstance(block, dict):
+                        if block.get("type") == "tool_use" and "name" in block:
+                            if not block["name"].startswith(_MCP_TOOL_PREFIX):
+                                block["name"] = _MCP_TOOL_PREFIX + block["name"]
+                        elif block.get("type") == "tool_result" and "tool_use_id" in block:
+                            pass  # tool_result uses ID, not name

    kwargs: Dict[str, Any] = {
        "model": model,
@@ -1737,7 +1866,7 @@ def build_anthropic_kwargs(
    # silently hides reasoning text that Hermes surfaces in its CLI. We
    # request "summarized" so the reasoning blocks stay populated — matching
    # 4.6 behavior and preserving the activity-feed UX during long tool runs.
-    _is_kimi_coding = _is_kimi_coding_endpoint(base_url)
+    _is_kimi_coding = _is_kimi_family_endpoint(base_url, model)
    if reasoning_config and isinstance(reasoning_config, dict) and not _is_kimi_coding:
        if reasoning_config.get("enabled") is not False and "haiku" not in model.lower():
            effort = str(reasoning_config.get("effort", "medium")).lower()
@@ -1778,11 +1907,11 @@ def build_anthropic_kwargs(
        kwargs.setdefault("extra_body", {})["speed"] = "fast"
        # Build extra_headers with ALL applicable betas (the per-request
        # extra_headers override the client-level anthropic-beta header).
-        betas = list(_common_betas_for_base_url(base_url))
+        betas = list(_common_betas_for_base_url(
+            base_url,
+            drop_context_1m_beta=drop_context_1m_beta,
+        ))
        if is_oauth:
-            # Strip context-1m — incompatible with OAuth auth. See matching
-            # comment in build_anthropic_client().
-            betas = [b for b in betas if b != _CONTEXT_1M_BETA]
            betas.extend(_OAUTH_ONLY_BETAS)
        betas.append(_FAST_MODE_BETA)
        kwargs["extra_headers"] = {"anthropic-beta": ",".join(betas)}
@@ -5,11 +5,11 @@ session search, web extraction, vision analysis, browser vision) picks up
 the best available backend without duplicating fallback logic.

 Resolution order for text tasks (auto mode):
-  1. OpenRouter  (OPENROUTER_API_KEY)
-  2. Nous Portal (~/.hermes/auth.json active provider)
-  3. Custom endpoint (config.yaml model.base_url + OPENAI_API_KEY)
-  4. Codex OAuth (Responses API via chatgpt.com with gpt-5.3-codex,
-     wrapped to look like a chat.completions client)
+  1. User's main provider + main model (used regardless of provider type —
+     aggregators, direct API-key providers, native Anthropic, Codex, etc.)
+  2. OpenRouter  (OPENROUTER_API_KEY)
+  3. Nous Portal (~/.hermes/auth.json active provider)
+  4. Custom endpoint (config.yaml model.base_url + OPENAI_API_KEY)
  5. Native Anthropic
  6. Direct API-key providers (z.ai/GLM, Kimi/Moonshot, MiniMax, MiniMax-CN)
  7. None
@@ -18,10 +18,16 @@ Resolution order for vision/multimodal tasks (auto mode):
  1. Selected main provider, if it is one of the supported vision backends below
  2. OpenRouter
  3. Nous Portal
-  4. Codex OAuth (gpt-5.3-codex supports vision via Responses API)
-  5. Native Anthropic
-  6. Custom endpoint (for local vision models: Qwen-VL, LLaVA, Pixtral, etc.)
-  7. None
+  4. Native Anthropic
+  5. Custom endpoint (for local vision models: Qwen-VL, LLaVA, Pixtral, etc.)
+  6. None
+
+Codex OAuth (ChatGPT-account auth) is intentionally NOT in either
+fallback chain: OpenAI gates this endpoint behind an undocumented,
+shifting model allow-list, so "just try Codex with a hardcoded model"
+rots on its own.  Codex is used only when the user's main provider *is*
+openai-codex (Step 1 above) or when a caller explicitly requests it with
+a model (auxiliary.<task>.provider + auxiliary.<task>.model).

 Per-task overrides are configured in config.yaml under the ``auxiliary:`` section
 (e.g. ``auxiliary.vision.provider``, ``auxiliary.compression.model``).
@@ -101,6 +107,14 @@ from utils import base_url_host_matches, base_url_hostname, normalize_proxy_env_
 logger = logging.getLogger(__name__)


+def _safe_isinstance(obj: Any, maybe_type: Any) -> bool:
+    """Return False instead of raising when a patched symbol is not a type."""
+    try:
+        return isinstance(obj, maybe_type)
+    except TypeError:
+        return False
+
+
 def _extract_url_query_params(url: str):
    """Extract query params from URL, return (clean_url, default_query dict or None)."""
    parsed = urlparse(url)
@@ -210,6 +224,7 @@ _API_KEY_PROVIDER_AUX_MODELS: Dict[str, str] = {
    "kimi-coding-cn": "kimi-k2-turbo-preview",
    "gmi": "google/gemini-3.1-flash-lite-preview",
    "minimax": "MiniMax-M2.7",
+    "minimax-oauth": "MiniMax-M2.7-highspeed",
    "minimax-cn": "MiniMax-M2.7",
    "anthropic": "claude-haiku-4-5-20251001",
    "ai-gateway": "google/gemini-3-flash",
@@ -229,6 +244,21 @@ _PROVIDER_VISION_MODELS: Dict[str, str] = {
    "zai": "glm-5v-turbo",
 }

+# Providers whose endpoint does not accept image input, even though the
+# provider's broader ecosystem has vision models available elsewhere.  When
+# `auxiliary.vision.provider: auto` sees one of these as the main provider,
+# it must skip straight to the aggregator chain instead of returning a client
+# that will 404 on every vision request.
+#
+# kimi-coding / kimi-coding-cn: the Kimi Coding Plan routes through
+# api.kimi.com/coding (Anthropic Messages wire) which Kimi's own docs
+# describe as having no image_in capability. Vision lives on the separate
+# Kimi Platform (api.moonshot.ai, OpenAI-wire, pay-as-you-go).  See #17076.
+_PROVIDERS_WITHOUT_VISION: frozenset = frozenset({
+    "kimi-coding",
+    "kimi-coding-cn",
+})
+
 # OpenRouter app attribution headers
 _OR_HEADERS = {
    "HTTP-Referer": "https://hermes-agent.nousresearch.com",
@@ -261,12 +291,14 @@ _NOUS_DEFAULT_BASE_URL = "https://inference-api.nousresearch.com/v1"
 _ANTHROPIC_DEFAULT_BASE_URL = "https://api.anthropic.com"
 _AUTH_JSON_PATH = get_hermes_home() / "auth.json"

-# Codex fallback: uses the Responses API (the only endpoint the Codex
-# OAuth token can access) with a fast model for auxiliary tasks.
-# ChatGPT-backed Codex accounts currently reject gpt-5.3-codex for these
-# auxiliary flows, while gpt-5.2-codex remains broadly available and supports
-# vision via Responses.
-_CODEX_AUX_MODEL = "gpt-5.2-codex"
+# Codex OAuth endpoint used when a caller explicitly requests
+# provider="openai-codex".  There is deliberately no hardcoded default
+# model: the set of models OpenAI accepts on this endpoint for
+# ChatGPT-account auth is an undocumented, shifting allow-list, and
+# pinning one here has drifted silently twice (gpt-5.3-codex → gpt-5.2-codex
+# → gpt-5.4 over 6 weeks in early 2026).  Callers must pass the model
+# they want explicitly (from config.yaml model.model, auxiliary.<task>.model,
+# or the user's active Codex model selection).
 _CODEX_AUX_BASE_URL = "https://chatgpt.com/backend-api/codex"


@@ -323,6 +355,13 @@ def _to_openai_base_url(base_url: str) -> str:
        rewritten = url[: -len("/anthropic")] + "/v1"
        logger.debug("Auxiliary client: rewrote base URL %s → %s", url, rewritten)
        return rewritten
+    if "api.kimi.com" in url and url.endswith("/coding"):
+        # Kimi Code uses /coding/v1/messages for Anthropic SDK (appends /v1/messages)
+        # but /coding/v1/chat/completions for OpenAI SDK (appends /chat/completions)
+        # Without /v1 here, OpenAI SDK hits /coding/chat/completions — a 404.
+        rewritten = url + "/v1"
+        logger.debug("Auxiliary client: rewrote Kimi base URL %s → %s", url, rewritten)
+        return rewritten
    return url


@@ -713,7 +752,9 @@ class _AnthropicCompletionsAdapter:

        response = self._client.messages.create(**anthropic_kwargs)
        _transport = get_transport("anthropic_messages")
-        _nr = _transport.normalize_response(response)
+        _nr = _transport.normalize_response(
+            response, strip_tool_prefix=self._is_oauth
+        )

        # ToolCall already duck-types as OpenAI shape (.type, .function.name,
        # .function.arguments) via properties, so no wrapping needed.
@@ -843,20 +884,20 @@ def _maybe_wrap_anthropic(
    - The ``anthropic`` SDK is not installed (falls back to OpenAI wire).
    """
    # Already wrapped — don't double-wrap.
-    if isinstance(client_obj, AnthropicAuxiliaryClient):
+    if _safe_isinstance(client_obj, AnthropicAuxiliaryClient):
        return client_obj
    # Other specialized adapters we should never re-dispatch.
-    if isinstance(client_obj, CodexAuxiliaryClient):
+    if _safe_isinstance(client_obj, CodexAuxiliaryClient):
        return client_obj
    try:
        from agent.gemini_native_adapter import GeminiNativeClient
-        if isinstance(client_obj, GeminiNativeClient):
+        if _safe_isinstance(client_obj, GeminiNativeClient):
            return client_obj
    except ImportError:
        pass
    try:
        from agent.copilot_acp_client import CopilotACPClient
-        if isinstance(client_obj, CopilotACPClient):
+        if _safe_isinstance(client_obj, CopilotACPClient):
            return client_obj
    except ImportError:
        pass
@@ -1052,9 +1093,8 @@ def _resolve_api_key_provider() -> Tuple[Optional[OpenAI], Optional[str]]:
            if not api_key:
                continue

-            base_url = _to_openai_base_url(
-                _pool_runtime_base_url(entry, pconfig.inference_base_url) or pconfig.inference_base_url
-            )
+            raw_base_url = _pool_runtime_base_url(entry, pconfig.inference_base_url) or pconfig.inference_base_url
+            base_url = _to_openai_base_url(raw_base_url)
            model = _API_KEY_PROVIDER_AUX_MODELS.get(provider_id)
            if model is None:
                continue  # skip provider if we don't know a valid aux model
@@ -1072,7 +1112,7 @@ def _resolve_api_key_provider() -> Tuple[Optional[OpenAI], Optional[str]]:

                extra["default_headers"] = copilot_default_headers()
            _client = OpenAI(api_key=api_key, base_url=base_url, **extra)
-            _client = _maybe_wrap_anthropic(_client, model, api_key, base_url)
+            _client = _maybe_wrap_anthropic(_client, model, api_key, raw_base_url)
            return _client, model

        creds = resolve_api_key_provider_credentials(provider_id)
@@ -1080,9 +1120,8 @@ def _resolve_api_key_provider() -> Tuple[Optional[OpenAI], Optional[str]]:
        if not api_key:
            continue

-        base_url = _to_openai_base_url(
-            str(creds.get("base_url", "")).strip().rstrip("/") or pconfig.inference_base_url
-        )
+        raw_base_url = str(creds.get("base_url", "")).strip().rstrip("/") or pconfig.inference_base_url
+        base_url = _to_openai_base_url(raw_base_url)
        model = _API_KEY_PROVIDER_AUX_MODELS.get(provider_id)
        if model is None:
            continue  # skip provider if we don't know a valid aux model
@@ -1100,7 +1139,7 @@ def _resolve_api_key_provider() -> Tuple[Optional[OpenAI], Optional[str]]:

            extra["default_headers"] = copilot_default_headers()
        _client = OpenAI(api_key=api_key, base_url=base_url, **extra)
-        _client = _maybe_wrap_anthropic(_client, model, api_key, base_url)
+        _client = _maybe_wrap_anthropic(_client, model, api_key, raw_base_url)
        return _client, model

    return None, None
@@ -1394,7 +1433,23 @@ def _try_custom_endpoint() -> Tuple[Optional[Any], Optional[str]]:
    return _fallback_client, model


-def _try_codex() -> Tuple[Optional[Any], Optional[str]]:
+def _build_codex_client(model: str) -> Tuple[Optional[Any], Optional[str]]:
+    """Build a CodexAuxiliaryClient for an explicitly-requested model.
+
+    There is no auto-selection of the Codex model: the ChatGPT-account
+    Codex endpoint's accepted model list is an undocumented, drifting
+    allow-list, so any hardcoded default we pick goes stale.  The caller
+    is responsible for passing the model (e.g. from the user's own
+    ``model.model`` or ``auxiliary.<task>.model`` config).
+
+    Returns (None, None) when no Codex OAuth token is available.
+    """
+    if not model:
+        logger.warning(
+            "Auxiliary client: openai-codex requested without a model; "
+            "pass model explicitly (auxiliary.<task>.model in config.yaml)."
+        )
+        return None, None
    pool_present, entry = _select_pool_entry("openai-codex")
    if pool_present:
        codex_token = _pool_runtime_api_key(entry)
@@ -1410,13 +1465,13 @@ def _try_codex() -> Tuple[Optional[Any], Optional[str]]:
        if not codex_token:
            return None, None
        base_url = _CODEX_AUX_BASE_URL
-    logger.debug("Auxiliary client: Codex OAuth (%s via Responses API)", _CODEX_AUX_MODEL)
+    logger.debug("Auxiliary client: Codex OAuth (%s via Responses API)", model)
    real_client = OpenAI(
        api_key=codex_token,
        base_url=base_url,
        default_headers=_codex_cloudflare_headers(codex_token),
    )
-    return CodexAuxiliaryClient(real_client, _CODEX_AUX_MODEL), _CODEX_AUX_MODEL
+    return CodexAuxiliaryClient(real_client, model), model


 def _try_anthropic() -> Tuple[Optional[Any], Optional[str]]:
@@ -1471,7 +1526,6 @@ _AUTO_PROVIDER_LABELS = {
    "_try_openrouter": "openrouter",
    "_try_nous": "nous",
    "_try_custom_endpoint": "local/custom",
-    "_try_codex": "openai-codex",
    "_resolve_api_key_provider": "api-key",
 }

@@ -1498,12 +1552,18 @@ def _get_provider_chain() -> List[tuple]:

    Built at call time (not module level) so that test patches
    on the ``_try_*`` functions are picked up correctly.
+
+    NOTE: ``openai-codex`` is deliberately NOT in this chain.  The
+    ChatGPT-account Codex endpoint only accepts a shifting, undocumented
+    allow-list of model IDs, so falling back to it with a guessed model
+    fails more often than not.  Codex is used only when the user's main
+    provider *is* openai-codex (see Step 1 of ``_resolve_auto``) or when
+    a caller explicitly requests it with a model.
    """
    return [
        ("openrouter", _try_openrouter),
        ("nous", _try_nous),
        ("local/custom", _try_custom_endpoint),
-        ("openai-codex", _try_codex),
        ("api-key", _resolve_api_key_provider),
    ]

@@ -1917,6 +1977,12 @@ def resolve_provider_client(
        (client, resolved_model) or (None, None) if auth is unavailable.
    """
    _validate_proxy_env_urls()
+    # Preserve the original provider name before alias normalization so a
+    # user-declared ``custom_providers`` entry whose name coincidentally
+    # matches a built-in alias (e.g. user names their custom provider "kimi"
+    # which aliases to "kimi-coding") is still reachable via the named-custom
+    # branch below.
+    original_provider = (provider or "").strip().lower()
    # Normalise aliases
    provider = _normalize_aux_provider(provider)

@@ -2019,6 +2085,13 @@ def resolve_provider_client(

    # ── OpenAI Codex (OAuth → Responses API) ─────────────────────────
    if provider == "openai-codex":
+        if not model:
+            logger.warning(
+                "resolve_provider_client: openai-codex requested without a "
+                "model; pass model explicitly (e.g. model.model in config.yaml "
+                "or auxiliary.<task>.model for per-task aux routing)."
+            )
+            return None, None
        if raw_codex:
            # Return the raw OpenAI client for callers that need direct
            # access to responses.stream() (e.g., the main agent loop).
@@ -2027,7 +2100,7 @@ def resolve_provider_client(
                logger.warning("resolve_provider_client: openai-codex requested "
                               "but no Codex OAuth token found (run: hermes model)")
                return None, None
-            final_model = _normalize_resolved_model(model or _CODEX_AUX_MODEL, provider)
+            final_model = _normalize_resolved_model(model, provider)
            raw_client = OpenAI(
                api_key=codex_token,
                base_url=_CODEX_AUX_BASE_URL,
@@ -2035,7 +2108,7 @@ def resolve_provider_client(
            )
            return (raw_client, final_model)
        # Standard path: wrap in CodexAuxiliaryClient adapter
-        client, default = _try_codex()
+        client, default = _build_codex_client(model)
        if client is None:
            logger.warning("resolve_provider_client: openai-codex requested "
                           "but no Codex OAuth token found (run: hermes model)")
@@ -2078,9 +2151,9 @@ def resolve_provider_client(
            client = _wrap_if_needed(client, final_model, custom_base, custom_key)
            return (_to_async_client(client, final_model, is_vision=is_vision) if async_mode
                    else (client, final_model))
-        # Try custom first, then codex, then API-key providers
-        for try_fn in (_try_custom_endpoint, _try_codex,
-                       _resolve_api_key_provider):
+        # Try custom first, then API-key providers (Codex excluded here:
+        # falling through to Codex with no model is a stale-constant trap).
+        for try_fn in (_try_custom_endpoint, _resolve_api_key_provider):
            client, default = try_fn()
            if client is not None:
                final_model = _normalize_resolved_model(model or default, provider)
@@ -2096,7 +2169,18 @@ def resolve_provider_client(
    # ── Named custom providers (config.yaml providers dict / custom_providers list) ───
    try:
        from hermes_cli.runtime_provider import _get_named_custom_provider
-        custom_entry = _get_named_custom_provider(provider)
+        # When the raw requested name is an alias (``kimi`` → ``kimi-coding``)
+        # and the user defined a ``custom_providers`` entry under that alias
+        # name, the custom entry is the intended target — the built-in alias
+        # rewriting would otherwise hijack the request.  Only preferred when
+        # the raw name is an alias (not a canonical provider name) so custom
+        # entries that coincidentally match a canonical provider (e.g. ``nous``)
+        # still defer to the built-in per `_get_named_custom_provider`'s guard.
+        custom_entry = None
+        if original_provider and original_provider != provider:
+            custom_entry = _get_named_custom_provider(original_provider)
+        if custom_entry is None:
+            custom_entry = _get_named_custom_provider(provider)
        if custom_entry:
            custom_base = custom_entry.get("base_url", "").strip()
            custom_key = custom_entry.get("api_key", "").strip()
@@ -2122,8 +2206,10 @@ def resolve_provider_client(
                # Anthropic fallback SDK still sees the original URL.
                if entry_api_mode == "anthropic_messages":
                    openai_base = custom_base
+                    raw_base_for_wrap = custom_base
                else:
                    openai_base = _to_openai_base_url(custom_base)
+                    raw_base_for_wrap = custom_base
                _clean_base2, _dq2 = _extract_url_query_params(openai_base)
                _extra2 = {"default_query": _dq2} if _dq2 else {}
                logger.debug(
@@ -2167,7 +2253,7 @@ def resolve_provider_client(
                ):
                    client = CodexAuxiliaryClient(client, final_model)
                else:
-                    client = _wrap_if_needed(client, final_model, openai_base, custom_key)
+                    client = _wrap_if_needed(client, final_model, raw_base_for_wrap, custom_key)
                return (_to_async_client(client, final_model, is_vision=is_vision) if async_mode
                        else (client, final_model))
            logger.warning(
@@ -2204,6 +2290,12 @@ def resolve_provider_client(

        creds = resolve_api_key_provider_credentials(provider)
        api_key = str(creds.get("api_key", "")).strip()
+        # Honour an explicit api_key override (e.g. from a fallback_model entry
+        # or a custom_providers entry) so callers that pass an explicit
+        # credential can authenticate against endpoints where no built-in
+        # credential is registered for this provider alias.
+        if explicit_api_key:
+            api_key = explicit_api_key.strip() or api_key
        if not api_key:
            tried_sources = list(pconfig.api_key_env_vars)
            if provider == "copilot":
@@ -2213,9 +2305,13 @@ def resolve_provider_client(
                         provider, ", ".join(tried_sources))
            return None, None

-        base_url = _to_openai_base_url(
-            str(creds.get("base_url", "")).strip().rstrip("/") or pconfig.inference_base_url
-        )
+        raw_base_url = str(creds.get("base_url", "")).strip().rstrip("/") or pconfig.inference_base_url
+        base_url = _to_openai_base_url(raw_base_url)
+        # Honour an explicit base_url override from the caller — used when a
+        # fallback_model entry (or custom_providers lookup) routes through a
+        # built-in provider name but targets a user-specified endpoint.
+        if explicit_base_url:
+            base_url = _to_openai_base_url(explicit_base_url.strip().rstrip("/"))

        default_model = _API_KEY_PROVIDER_AUX_MODELS.get(provider, "")
        final_model = _normalize_resolved_model(model or default_model, provider)
@@ -2264,7 +2360,7 @@ def resolve_provider_client(
        # Anthropic-wire endpoints (Kimi Coding Plan api.kimi.com/coding,
        # /anthropic-suffixed gateways) so named providers like kimi-coding
        # land on the right transport without needing per-provider branches.
-        client = _wrap_if_needed(client, final_model, base_url, api_key)
+        client = _wrap_if_needed(client, final_model, raw_base_url, api_key)

        logger.debug("resolve_provider_client: %s (%s)", provider, final_model)
        return (_to_async_client(client, final_model, is_vision=is_vision) if async_mode
@@ -2427,7 +2523,10 @@ def _resolve_strict_vision_backend(
    if provider == "nous":
        return _try_nous(vision=True)
    if provider == "openai-codex":
-        return _try_codex()
+        # Route through resolve_provider_client so the caller's explicit
+        # model is used.  There is no safe default Codex model (shifting
+        # allow-list); callers must specify via auxiliary.<task>.model.
+        return resolve_provider_client("openai-codex", model, is_vision=True)
    if provider == "anthropic":
        return _try_anthropic()
    if provider == "custom":
@@ -2532,6 +2631,19 @@ def resolve_vision_provider_client(
                        main_provider, default_model or resolved_model or main_model,
                    )
                    return _finalize(main_provider, sync_client, default_model)
+            elif main_provider in _PROVIDERS_WITHOUT_VISION:
+                # Kimi Coding Plan's /coding endpoint (Anthropic Messages wire)
+                # does not accept image input — Kimi's own docs say "Current
+                # model does not support image input, switch to a model with
+                # image_in capability" and vision lives on the separate Kimi
+                # Platform (api.moonshot.ai). Skip the main provider and fall
+                # through to the aggregator chain instead of returning a
+                # client that will 404 on every vision request (#17076).
+                logger.debug(
+                    "Vision auto-detect: skipping main provider %s (no "
+                    "vision support) — falling through to aggregator chain",
+                    main_provider,
+                )
            else:
                rpc_client, rpc_model = resolve_provider_client(
                    main_provider, vision_model,
@@ -3013,7 +3125,7 @@ def _get_task_extra_body(task: str) -> Dict[str, Any]:

 # Providers that use Anthropic-compatible endpoints (via OpenAI SDK wrapper).
 # Their image content blocks must use Anthropic format, not OpenAI format.
-_ANTHROPIC_COMPAT_PROVIDERS = frozenset({"minimax", "minimax-cn"})
+_ANTHROPIC_COMPAT_PROVIDERS = frozenset({"minimax", "minimax-oauth", "minimax-cn"})


 def _is_anthropic_compat_endpoint(provider: str, base_url: str) -> bool:
@@ -538,7 +538,7 @@ class ContextCompressor(ContextEngine):
            # Token-budget approach: walk backward accumulating tokens
            accumulated = 0
            boundary = len(result)
-            min_protect = min(protect_tail_count, len(result) - 1)
+            min_protect = min(protect_tail_count, len(result))
            for i in range(len(result) - 1, -1, -1):
                msg = result[i]
                raw_content = msg.get("content") or ""
@@ -992,8 +992,8 @@ The user has requested that this compaction PRIORITISE preserving all informatio
    def _get_tool_call_id(tc) -> str:
        """Extract the call ID from a tool_call entry (dict or SimpleNamespace)."""
        if isinstance(tc, dict):
-            return tc.get("id", "")
-        return getattr(tc, "id", "") or ""
+            return tc.get("call_id", "") or tc.get("id", "") or ""
+        return getattr(tc, "call_id", "") or getattr(tc, "id", "") or ""

    def _sanitize_tool_pairs(self, messages: List[Dict[str, Any]]) -> List[Dict[str, Any]]:
        """Fix orphaned tool_call / tool_result pairs after compression.
@@ -608,7 +608,7 @@ class CopilotACPClient:
                    end = start + limit if isinstance(limit, int) and limit > 0 else None
                    content = "".join(lines[start:end])
                if content:
-                    content = redact_sensitive_text(content)
+                    content = redact_sensitive_text(content, force=True)
                response = {
                    "jsonrpc": "2.0",
                    "id": message_id,
@@ -1299,6 +1299,48 @@ def _seed_from_singletons(provider: str, entries: List[PooledCredential]) -> Tup
        except Exception as exc:
            logger.debug("Qwen OAuth token seed failed: %s", exc)

+    elif provider == "minimax-oauth":
+        # MiniMax OAuth tokens live in ~/.hermes/auth.json providers.minimax-oauth.
+        # Seed the pool so `/auth list` reflects the logged-in state and the
+        # standard `hermes auth remove minimax-oauth <N>` flow works.
+        # Use refresh_if_expiring=False equivalent: resolve_minimax_oauth_runtime_credentials
+        # always refreshes on expiry, so instead read raw state here to avoid
+        # surprise network calls during provider discovery.
+        try:
+            from hermes_cli.auth import get_provider_auth_state
+            state = get_provider_auth_state("minimax-oauth")
+            if state and state.get("access_token"):
+                source_name = "oauth"
+                if not _is_suppressed(provider, source_name):
+                    active_sources.add(source_name)
+                    expires_at_ms = None
+                    try:
+                        from datetime import datetime as _dt
+                        raw = state.get("expires_at", "")
+                        if raw:
+                            expires_at_ms = int(_dt.fromisoformat(raw).timestamp() * 1000)
+                    except Exception:
+                        expires_at_ms = None
+                    base_url = str(state.get("inference_base_url", "") or "").rstrip("/")
+                    changed |= _upsert_entry(
+                        entries,
+                        provider,
+                        source_name,
+                        {
+                            "source": source_name,
+                            "auth_type": AUTH_TYPE_OAUTH,
+                            "access_token": state["access_token"],
+                            "refresh_token": state.get("refresh_token"),
+                            "expires_at_ms": expires_at_ms,
+                            "base_url": base_url,
+                            "label": state.get("label", "") or label_from_token(
+                                state.get("access_token", ""), source_name
+                            ),
+                        },
+                    )
+        except Exception as exc:
+            logger.debug("MiniMax OAuth token seed failed: %s", exc)
+
    elif provider == "openai-codex":
        # Respect user suppression — `hermes auth remove openai-codex` marks
        # the device_code source as suppressed so it won't be re-seeded from
@@ -252,6 +252,19 @@ def _remove_nous_device_code(provider: str, removed) -> RemovalResult:
    return result


+def _remove_minimax_oauth(provider: str, removed) -> RemovalResult:
+    """MiniMax OAuth lives in auth.json providers.minimax-oauth — clear it.
+
+    Same pattern as Nous: single-source OAuth state with refresh tokens.
+    Suppression of the `oauth` source ensures the pool reseed path
+    (_seed_from_singletons) doesn't instantly undo the removal.
+    """
+    result = RemovalResult()
+    if _clear_auth_store_provider(provider):
+        result.cleaned.append(f"Cleared {provider} OAuth tokens from auth store")
+    return result
+
+
 def _remove_codex_device_code(provider: str, removed) -> RemovalResult:
    """Codex tokens live in TWO places: our auth store AND ~/.codex/auth.json.

@@ -389,6 +402,11 @@ def _register_all_sources() -> None:
        remove_fn=_remove_qwen_cli,
        description="~/.qwen/oauth_creds.json",
    ))
+    register(RemovalStep(
+        provider="minimax-oauth", source_id="oauth",
+        remove_fn=_remove_minimax_oauth,
+        description="auth.json providers.minimax-oauth",
+    ))
    register(RemovalStep(
        provider="*", source_id="config:",
        match_fn=lambda src: src.startswith("config:") or src == "model_config",
@@ -0,0 +1,440 @@
+"""Curator snapshot + rollback.
+
+A pre-run snapshot of ``~/.hermes/skills/`` (excluding ``.curator_backups/``
+itself) is taken before any mutating curator pass. Snapshots are tar.gz
+files under ``~/.hermes/skills/.curator_backups/<utc-iso>/`` with a
+companion ``manifest.json`` describing the snapshot (reason, time, size,
+counted skill files). Rollback picks a snapshot, moves the current
+``skills/`` tree aside into another snapshot so even the rollback itself
+is undoable, then extracts the chosen snapshot into place.
+
+The snapshot does NOT include:
+  - ``.curator_backups/`` (would recurse)
+  - ``.hub/`` (hub-installed skills — managed by the hub, not us)
+
+It DOES include:
+  - all SKILL.md files + their directories (``scripts/``, ``references/``,
+    ``templates/``, ``assets/``)
+  - ``.usage.json`` (usage telemetry — needed to rehydrate state cleanly)
+  - ``.archive/`` (so rollback restores previously-archived skills too)
+  - ``.curator_state`` (so rolling back also restores the last-run-at
+    pointer — otherwise the curator would immediately re-fire on the next
+    tick)
+  - ``.bundled_manifest`` (so protection markers stay consistent)
+"""
+
+from __future__ import annotations
+
+import json
+import logging
+import os
+import re
+import shutil
+import tarfile
+import tempfile
+import time
+from datetime import datetime, timezone
+from pathlib import Path
+from typing import Any, Dict, List, Optional, Tuple
+
+from hermes_constants import get_hermes_home
+
+logger = logging.getLogger(__name__)
+
+
+DEFAULT_KEEP = 5
+
+# Entries under skills/ that should NEVER be rolled up into a snapshot.
+# .hub/ is managed by the skills hub; rolling it back would break lockfile
+# invariants. .curator_backups is the backup dir itself — recursion bomb.
+_EXCLUDE_TOP_LEVEL = {".curator_backups", ".hub"}
+
+# Snapshot id regex: UTC ISO with colons replaced by dashes so the filename
+# is portable (Windows-safe). An optional ``-NN`` suffix handles two
+# snapshots landing in the same wallclock second.
+_ID_RE = re.compile(r"^\d{4}-\d{2}-\d{2}T\d{2}-\d{2}-\d{2}Z(-\d{2})?$")
+
+
+def _backups_dir() -> Path:
+    return get_hermes_home() / "skills" / ".curator_backups"
+
+
+def _skills_dir() -> Path:
+    return get_hermes_home() / "skills"
+
+
+def _utc_id(now: Optional[datetime] = None) -> str:
+    """UTC ISO-ish filesystem-safe timestamp: ``2026-05-01T13-05-42Z``."""
+    if now is None:
+        now = datetime.now(timezone.utc)
+    # isoformat → "2026-05-01T13:05:42.123456+00:00"; strip subseconds and tz.
+    s = now.replace(microsecond=0).isoformat()
+    if s.endswith("+00:00"):
+        s = s[:-6]
+    return s.replace(":", "-") + "Z"
+
+
+def _load_config() -> Dict[str, Any]:
+    try:
+        from hermes_cli.config import load_config
+        cfg = load_config()
+    except Exception as e:
+        logger.debug("Failed to load config for curator backup: %s", e)
+        return {}
+    if not isinstance(cfg, dict):
+        return {}
+    cur = cfg.get("curator") or {}
+    if not isinstance(cur, dict):
+        return {}
+    bk = cur.get("backup") or {}
+    return bk if isinstance(bk, dict) else {}
+
+
+def is_enabled() -> bool:
+    """Default ON — the whole point of the backup is safety by default."""
+    return bool(_load_config().get("enabled", True))
+
+
+def get_keep() -> int:
+    cfg = _load_config()
+    try:
+        n = int(cfg.get("keep", DEFAULT_KEEP))
+    except (TypeError, ValueError):
+        n = DEFAULT_KEEP
+    return max(1, n)
+
+
+# ---------------------------------------------------------------------------
+# Snapshot
+# ---------------------------------------------------------------------------
+
+def _count_skill_files(base: Path) -> int:
+    try:
+        return sum(1 for _ in base.rglob("SKILL.md"))
+    except OSError:
+        return 0
+
+
+def _write_manifest(dest: Path, reason: str, archive_path: Path,
+                    skills_counted: int) -> None:
+    manifest = {
+        "id": dest.name,
+        "reason": reason,
+        "created_at": datetime.now(timezone.utc).isoformat(),
+        "archive": archive_path.name,
+        "archive_bytes": archive_path.stat().st_size,
+        "skill_files": skills_counted,
+    }
+    (dest / "manifest.json").write_text(
+        json.dumps(manifest, indent=2, sort_keys=True), encoding="utf-8"
+    )
+
+
+def snapshot_skills(reason: str = "manual") -> Optional[Path]:
+    """Create a tar.gz snapshot of ``~/.hermes/skills/`` and prune old ones.
+
+    Returns the snapshot directory path, or ``None`` if the snapshot was
+    skipped (backup disabled, skills dir missing, or an IO error occurred —
+    in which case we log at debug and return None so the curator never
+    aborts a pass because of a backup failure).
+    """
+    if not is_enabled():
+        logger.debug("Curator backup disabled by config; skipping snapshot")
+        return None
+
+    skills = _skills_dir()
+    if not skills.exists():
+        logger.debug("No ~/.hermes/skills/ directory — nothing to back up")
+        return None
+
+    backups = _backups_dir()
+    try:
+        backups.mkdir(parents=True, exist_ok=True)
+    except OSError as e:
+        logger.debug("Failed to create backups dir %s: %s", backups, e)
+        return None
+
+    # Uniquify: if a snapshot with the same second already exists (can
+    # happen if two curator runs fire in the same second), append a short
+    # counter. Avoids clobbering and avoids timestamp collisions.
+    base_id = _utc_id()
+    snap_id = base_id
+    counter = 1
+    while (backups / snap_id).exists():
+        snap_id = f"{base_id}-{counter:02d}"
+        counter += 1
+
+    dest = backups / snap_id
+    try:
+        dest.mkdir(parents=True, exist_ok=False)
+    except OSError as e:
+        logger.debug("Failed to create snapshot dir %s: %s", dest, e)
+        return None
+
+    archive = dest / "skills.tar.gz"
+    try:
+        # Stream into the tarball — no tempdir copy needed.
+        with tarfile.open(archive, "w:gz", compresslevel=6) as tf:
+            for entry in sorted(skills.iterdir()):
+                if entry.name in _EXCLUDE_TOP_LEVEL:
+                    continue
+                # arcname: store paths relative to skills/ so extraction
+                # drops cleanly back into the skills dir.
+                tf.add(str(entry), arcname=entry.name, recursive=True)
+        _write_manifest(dest, reason, archive, _count_skill_files(skills))
+    except (OSError, tarfile.TarError) as e:
+        logger.debug("Curator snapshot failed: %s", e, exc_info=True)
+        # Clean up partial snapshot
+        try:
+            shutil.rmtree(dest, ignore_errors=True)
+        except OSError:
+            pass
+        return None
+
+    _prune_old(keep=get_keep())
+    logger.info("Curator snapshot created: %s (%s)", snap_id, reason)
+    return dest
+
+
+def _prune_old(keep: int) -> List[str]:
+    """Delete regular snapshots beyond the newest *keep*. Returns deleted
+    ids. Staging dirs (``.rollback-staging-*``) are implementation detail
+    and pruned independently on every call."""
+    backups = _backups_dir()
+    if not backups.exists():
+        return []
+    entries: List[Tuple[str, Path]] = []
+    stale_staging: List[Path] = []
+    for child in backups.iterdir():
+        if not child.is_dir():
+            continue
+        if child.name.startswith(".rollback-staging-"):
+            # Staging dirs are only supposed to exist briefly during a
+            # rollback. If we find one here (e.g. from a crashed rollback),
+            # clean it up opportunistically.
+            stale_staging.append(child)
+            continue
+        if _ID_RE.match(child.name):
+            entries.append((child.name, child))
+    # Newest first (lexicographic works because the id is UTC ISO).
+    entries.sort(key=lambda t: t[0], reverse=True)
+    deleted: List[str] = []
+    for _, path in entries[keep:]:
+        try:
+            shutil.rmtree(path)
+            deleted.append(path.name)
+        except OSError as e:
+            logger.debug("Failed to prune %s: %s", path, e)
+    for path in stale_staging:
+        try:
+            shutil.rmtree(path)
+        except OSError as e:
+            logger.debug("Failed to clean stale staging dir %s: %s", path, e)
+    return deleted
+
+
+# ---------------------------------------------------------------------------
+# List + rollback
+# ---------------------------------------------------------------------------
+
+def _read_manifest(snap_dir: Path) -> Dict[str, Any]:
+    mf = snap_dir / "manifest.json"
+    if not mf.exists():
+        return {}
+    try:
+        return json.loads(mf.read_text(encoding="utf-8"))
+    except (OSError, json.JSONDecodeError):
+        return {}
+
+
+def list_backups() -> List[Dict[str, Any]]:
+    """Return all restorable snapshots, newest first. Only entries with a
+    real ``skills.tar.gz`` tarball are listed — transient
+    ``.rollback-staging-*`` directories created mid-rollback are
+    implementation detail and not shown."""
+    backups = _backups_dir()
+    if not backups.exists():
+        return []
+    out: List[Dict[str, Any]] = []
+    for child in sorted(backups.iterdir(), reverse=True):
+        if not child.is_dir():
+            continue
+        if not _ID_RE.match(child.name):
+            continue
+        if not (child / "skills.tar.gz").exists():
+            continue
+        mf = _read_manifest(child)
+        mf.setdefault("id", child.name)
+        mf.setdefault("path", str(child))
+        if "archive_bytes" not in mf:
+            arc = child / "skills.tar.gz"
+            try:
+                mf["archive_bytes"] = arc.stat().st_size
+            except OSError:
+                mf["archive_bytes"] = 0
+        out.append(mf)
+    return out
+
+
+def _resolve_backup(backup_id: Optional[str]) -> Optional[Path]:
+    """Return the path of the requested backup, or the newest one if
+    *backup_id* is None. Returns None if no match."""
+    backups = _backups_dir()
+    if not backups.exists():
+        return None
+    if backup_id:
+        target = backups / backup_id
+        if (
+            target.is_dir()
+            and _ID_RE.match(backup_id)
+            and (target / "skills.tar.gz").exists()
+        ):
+            return target
+        return None
+    candidates = [
+        c for c in sorted(backups.iterdir(), reverse=True)
+        if c.is_dir() and _ID_RE.match(c.name) and (c / "skills.tar.gz").exists()
+    ]
+    return candidates[0] if candidates else None
+
+
+def rollback(backup_id: Optional[str] = None) -> Tuple[bool, str, Optional[Path]]:
+    """Restore ``~/.hermes/skills/`` from a snapshot.
+
+    Strategy:
+      1. Resolve the target snapshot (explicit id or newest regular).
+      2. Take a safety snapshot of the CURRENT skills tree under
+         ``.curator_backups/pre-rollback-<ts>/`` so the rollback itself is
+         undoable.
+      3. Move all current top-level entries (except ``.curator_backups``
+         and ``.hub``) into a tempdir.
+      4. Extract the chosen snapshot into ``~/.hermes/skills/``.
+      5. On failure during 4, move the tempdir contents back (best-effort)
+         and return failure.
+
+    Returns ``(ok, message, snapshot_path)``.
+    """
+    target = _resolve_backup(backup_id)
+    if target is None:
+        return (
+            False,
+            f"no matching backup found"
+            + (f" for id '{backup_id}'" if backup_id else "")
+            + " (use `hermes curator rollback --list` to see available snapshots)",
+            None,
+        )
+    archive = target / "skills.tar.gz"
+    if not archive.exists():
+        return (False, f"snapshot {target.name} has no skills.tar.gz — corrupted?", None)
+
+    skills = _skills_dir()
+    skills.mkdir(parents=True, exist_ok=True)
+    backups = _backups_dir()
+    backups.mkdir(parents=True, exist_ok=True)
+
+    # Step 2: safety snapshot of current state FIRST. If this fails we bail
+    # out before touching anything — otherwise a failed extract could leave
+    # the user with no skills.
+    try:
+        snapshot_skills(reason=f"pre-rollback to {target.name}")
+    except Exception as e:
+        return (False, f"pre-rollback safety snapshot failed: {e}", None)
+
+    # Additionally move current entries into an internal staging dir so
+    # the extract happens into an empty skills tree (predictable result).
+    # This dir is implementation detail — not listed as a restorable
+    # backup. The safety snapshot above is the user-facing undo handle.
+    staged = backups / f".rollback-staging-{_utc_id()}"
+    try:
+        staged.mkdir(parents=True, exist_ok=False)
+    except OSError as e:
+        return (False, f"failed to create staging dir: {e}", None)
+
+    moved: List[Tuple[Path, Path]] = []
+    try:
+        for entry in list(skills.iterdir()):
+            if entry.name in _EXCLUDE_TOP_LEVEL:
+                continue
+            dest = staged / entry.name
+            shutil.move(str(entry), str(dest))
+            moved.append((entry, dest))
+    except OSError as e:
+        # Best-effort rollback of the move
+        for orig, dest in moved:
+            try:
+                shutil.move(str(dest), str(orig))
+            except OSError:
+                pass
+        try:
+            shutil.rmtree(staged, ignore_errors=True)
+        except OSError:
+            pass
+        return (False, f"failed to stage current skills: {e}", None)
+
+    # Step 4: extract the snapshot into skills/
+    try:
+        with tarfile.open(archive, "r:gz") as tf:
+            # Python 3.12+ supports filter='data' for safer extraction.
+            # Fall back to the unfiltered call for older interpreters but
+            # still reject absolute paths and .. components defensively.
+            for member in tf.getmembers():
+                name = member.name
+                if name.startswith("/") or ".." in Path(name).parts:
+                    raise tarfile.TarError(
+                        f"refusing to extract unsafe path: {name!r}"
+                    )
+            try:
+                tf.extractall(str(skills), filter="data")  # type: ignore[call-arg]
+            except TypeError:
+                # Python < 3.12 — no filter kwarg
+                tf.extractall(str(skills))
+    except (OSError, tarfile.TarError) as e:
+        # Best-effort recover: move staged contents back
+        for orig, dest in moved:
+            try:
+                shutil.move(str(dest), str(orig))
+            except OSError:
+                pass
+        try:
+            shutil.rmtree(staged, ignore_errors=True)
+        except OSError:
+            pass
+        return (False, f"snapshot extract failed (state restored): {e}", None)
+
+    # Extract succeeded — the staging dir has served its purpose. The
+    # user's undo handle is the safety snapshot tarball we took earlier.
+    try:
+        shutil.rmtree(staged, ignore_errors=True)
+    except OSError:
+        pass
+
+    logger.info("Curator rollback: restored from %s", target.name)
+    return (True, f"restored from snapshot {target.name}", target)
+
+
+# ---------------------------------------------------------------------------
+# Human-readable summary for CLI
+# ---------------------------------------------------------------------------
+
+def format_size(n: int) -> str:
+    for unit in ("B", "KB", "MB", "GB"):
+        if n < 1024 or unit == "GB":
+            return f"{n:.1f} {unit}" if unit != "B" else f"{n} B"
+        n /= 1024
+    return f"{n:.1f} GB"
+
+
+def summarize_backups() -> str:
+    rows = list_backups()
+    if not rows:
+        return "No curator snapshots yet."
+    lines = [f"{'id':<24}  {'reason':<40}  {'skills':>6}  {'size':>8}"]
+    lines.append("─" * len(lines[0]))
+    for r in rows:
+        lines.append(
+            f"{r.get('id','?'):<24}  "
+            f"{(r.get('reason','?') or '?')[:40]:<40}  "
+            f"{r.get('skill_files', 0):>6}  "
+            f"{format_size(int(r.get('archive_bytes', 0))):>8}"
+        )
+    return "\n".join(lines)
@@ -54,6 +54,7 @@ class FailoverReason(enum.Enum):
    # Provider-specific
    thinking_signature = "thinking_signature"  # Anthropic thinking block sig invalid
    long_context_tier = "long_context_tier"    # Anthropic "extra usage" tier gate
+    oauth_long_context_beta_forbidden = "oauth_long_context_beta_forbidden"  # Anthropic OAuth subscription rejects 1M context beta — disable beta and retry

    # Catch-all
    unknown = "unknown"                  # Unclassifiable — retry with backoff
@@ -450,6 +451,25 @@ def classify_api_error(
            should_compress=True,
        )

+    # Anthropic OAuth subscription rejects the 1M-context beta header.
+    # Observed error body: "The long context beta is not yet available for
+    # this subscription." Returned as HTTP 400 from native Anthropic when
+    # the subscription doesn't include 1M context, even though the request
+    # carries ``anthropic-beta: context-1m-2025-08-07``. The recovery path
+    # in run_agent.py rebuilds the Anthropic client with the beta stripped
+    # and retries once. Pattern is narrow enough that it won't collide with
+    # the 429 tier-gate pattern above (different status, different phrase).
+    if (
+        status_code == 400
+        and "long context beta" in error_msg
+        and "not yet available" in error_msg
+    ):
+        return _result(
+            FailoverReason.oauth_long_context_beta_forbidden,
+            retryable=True,
+            should_compress=False,
+        )
+
    # ── 2. HTTP status code classification ──────────────────────────

    if status_code is not None:
@@ -20,25 +20,25 @@ def summarize_manual_compression(
        headline = f"No changes from compression: {before_count} messages"
        if after_tokens == before_tokens:
            token_line = (
-                f"Rough transcript estimate: ~{before_tokens:,} tokens (unchanged)"
+                f"Approx request size: ~{before_tokens:,} tokens (unchanged)"
            )
        else:
            token_line = (
-                f"Rough transcript estimate: ~{before_tokens:,} → "
+                f"Approx request size: ~{before_tokens:,} → "
                f"~{after_tokens:,} tokens"
            )
    else:
        headline = f"Compressed: {before_count} → {after_count} messages"
        token_line = (
-            f"Rough transcript estimate: ~{before_tokens:,} → "
+            f"Approx request size: ~{before_tokens:,} → "
            f"~{after_tokens:,} tokens"
        )

    note = None
    if not noop and after_count < before_count and after_tokens > before_tokens:
        note = (
-            "Note: fewer messages can still raise this rough transcript estimate "
-            "when compression rewrites the transcript into denser summaries."
+            "Note: fewer messages can still raise this estimate when "
+            "compression rewrites the transcript into denser summaries."
        )

    return {
@@ -402,6 +402,41 @@ class MemoryManager:
                    provider.name, e,
                )

+    def on_session_switch(
+        self,
+        new_session_id: str,
+        *,
+        parent_session_id: str = "",
+        reset: bool = False,
+        **kwargs,
+    ) -> None:
+        """Notify all providers that the agent's session_id has rotated.
+
+        Fires on ``/resume``, ``/branch``, ``/reset``, ``/new``, and
+        context compression — any path that reassigns
+        ``AIAgent.session_id`` without tearing the provider down.
+
+        Providers keep running; they only need to refresh cached
+        per-session state so subsequent writes land in the correct
+        session's record. See ``MemoryProvider.on_session_switch`` for
+        the full contract.
+        """
+        if not new_session_id:
+            return
+        for provider in self._providers:
+            try:
+                provider.on_session_switch(
+                    new_session_id,
+                    parent_session_id=parent_session_id,
+                    reset=reset,
+                    **kwargs,
+                )
+            except Exception as e:
+                logger.debug(
+                    "Memory provider '%s' on_session_switch failed: %s",
+                    provider.name, e,
+                )
+
    def on_pre_compress(self, messages: List[Dict[str, Any]]) -> str:
        """Notify all providers before context compression.

@@ -25,6 +25,7 @@ Lifecycle (called by MemoryManager, wired in run_agent.py):
 Optional hooks (override to opt in):
  on_turn_start(turn, message, **kwargs) — per-turn tick with runtime context
  on_session_end(messages)               — end-of-session extraction
+  on_session_switch(new_session_id, **kwargs) — mid-process session_id rotation
  on_pre_compress(messages) -> str       — extract before context compression
  on_memory_write(action, target, content, metadata=None) — mirror built-in memory writes
  on_delegation(task, result, **kwargs)  — parent-side observation of subagent work
@@ -160,6 +161,45 @@ class MemoryProvider(ABC):
        (CLI exit, /reset, gateway session expiry).
        """

+    def on_session_switch(
+        self,
+        new_session_id: str,
+        *,
+        parent_session_id: str = "",
+        reset: bool = False,
+        **kwargs,
+    ) -> None:
+        """Called when the agent switches session_id mid-process.
+
+        Fires on ``/resume``, ``/branch``, ``/reset``, ``/new`` (CLI), the
+        gateway equivalents, and context compression — any path that
+        reassigns ``AIAgent.session_id`` without tearing the provider down.
+
+        Providers that cache per-session state in ``initialize()``
+        (``_session_id``, ``_document_id``, accumulated turn buffers,
+        counters) should update or reset that state here so subsequent
+        writes land in the correct session's record.
+
+        Parameters
+        ----------
+        new_session_id:
+            The session_id the agent just switched to.
+        parent_session_id:
+            The previous session_id, if meaningful — set for ``/branch``
+            (fork lineage), context compression (continuation lineage),
+            and ``/resume`` (the session we're leaving). Empty string
+            when no lineage applies.
+        reset:
+            ``True`` when this is a genuinely new conversation, not a
+            resumption of an existing one. Fired by ``/reset`` / ``/new``.
+            Providers should flush accumulated per-session buffers
+            (``_session_turns``, ``_turn_counter``, etc.) when this is
+            set. ``False`` for ``/resume`` / ``/branch`` / compression
+            where the logical conversation continues under the new id.
+
+        Default is no-op for backward compatibility.
+        """
+
    def on_pre_compress(self, messages: List[Dict[str, Any]]) -> str:
        """Called before context compression discards old messages.

@@ -46,7 +46,7 @@ def _resolve_requests_verify() -> bool | str:
 # are preserved so the full model name reaches cache lookups and server queries.
 _PROVIDER_PREFIXES: frozenset[str] = frozenset({
    "openrouter", "nous", "openai-codex", "copilot", "copilot-acp",
-    "gemini", "ollama-cloud", "zai", "kimi-coding", "kimi-coding-cn", "stepfun", "minimax", "minimax-cn", "anthropic", "deepseek",
+    "gemini", "ollama-cloud", "zai", "kimi-coding", "kimi-coding-cn", "stepfun", "minimax", "minimax-oauth", "minimax-cn", "anthropic", "deepseek",
    "opencode-zen", "opencode-go", "ai-gateway", "kilocode", "alibaba",
    "qwen-oauth",
    "xiaomi",
@@ -1247,7 +1247,7 @@ def get_model_context_length(
    6. Nous suffix-match via OpenRouter cache
    7. models.dev registry lookup (provider-aware)
    8. Thin hardcoded defaults (broad family patterns)
-    9. Default fallback (128K)
+    9. Default fallback (256K)
    """
    # 0. Explicit config override — user knows best
    if config_context_length is not None and isinstance(config_context_length, int) and config_context_length > 0:
@@ -1427,7 +1427,7 @@ def get_model_context_length(
                save_context_length(model, base_url, local_ctx)
            return local_ctx

-    # 10. Default fallback — 128K
+    # 10. Default fallback — 256K
    return DEFAULT_FALLBACK_CONTEXT


@@ -149,6 +149,7 @@ PROVIDER_TO_MODELS_DEV: Dict[str, str] = {
    "stepfun": "stepfun",
    "kimi-coding-cn": "kimi-for-coding",
    "minimax": "minimax",
+    "minimax-oauth": "minimax",
    "minimax-cn": "minimax-cn",
    "deepseek": "deepseek",
    "alibaba": "alibaba",
@@ -81,15 +81,56 @@ def _repair_schema(node: Any, is_schema: bool = True) -> Any:
        return repaired

    # Rule 2: when anyOf is present, type belongs only on the children.
+    # Additionally, Moonshot rejects null-type branches inside anyOf
+    # (enum value (<nil>) does not match any type in [string]).
+    # Collapse the anyOf to the first non-null branch and infer its type.
    if "anyOf" in repaired and isinstance(repaired["anyOf"], list):
        repaired.pop("type", None)
-        return repaired
+        non_null = [b for b in repaired["anyOf"]
+                    if isinstance(b, dict) and b.get("type") != "null"]
+        if non_null and len(non_null) < len(repaired["anyOf"]):
+            # Drop the anyOf wrapper — keep only the non-null branch.
+            # If there's a single non-null branch, promote it and fall
+            # through to Rules 1/3 so nullable/enum cleanup still applies
+            # to the merged node.
+            if len(non_null) == 1:
+                merge = {k: v for k, v in repaired.items() if k != "anyOf"}
+                merge.update(non_null[0])
+                repaired = merge
+            else:
+                repaired["anyOf"] = non_null
+                return repaired
+        else:
+            # Nothing to collapse — parent type stripped, children already
+            # repaired by the recursive walk above.
+            return repaired
+
+    # Moonshot also rejects non-standard keywords like ``nullable`` on
+    # parameter schemas — strip it.
+    repaired.pop("nullable", None)

    # Rule 1: property schemas without type need one.  $ref nodes are exempt
    # — their type comes from the referenced definition.
-    if "$ref" in repaired:
-        return repaired
-    return _fill_missing_type(repaired)
+    # Fill missing type BEFORE Rule 3 so enum cleanup can check the type.
+    if "$ref" not in repaired:
+        repaired = _fill_missing_type(repaired)
+
+    # Rule 3: Moonshot rejects null/empty-string values inside enum arrays
+    # when the parent type is a scalar (string, integer, etc.).  The error:
+    #   "enum value (<nil>) does not match any type in [string]"
+    # Strip null and empty-string from enum values, and if the enum becomes
+    # empty, drop it entirely.
+    if "enum" in repaired and isinstance(repaired["enum"], list):
+        node_type = repaired.get("type")
+        if node_type in ("string", "integer", "number", "boolean"):
+            cleaned = [v for v in repaired["enum"]
+                       if v is not None and v != ""]
+            if cleaned:
+                repaired["enum"] = cleaned
+            else:
+                repaired.pop("enum")
+
+    return repaired


 def _fill_missing_type(node: Dict[str, Any]) -> Dict[str, Any]:
@@ -98,17 +98,19 @@ def tool_progress_hint_cli() -> str:
 def openclaw_residue_hint_cli() -> str:
    """Banner shown the first time Hermes starts and finds ``~/.openclaw/``.

-    OpenClaw-era config, memory, and skill paths in ``~/.openclaw/`` will
-    otherwise attract the agent (memory entries like ``~/.openclaw/config.yaml``
-    get carried forward and the agent dutifully reads them). ``hermes claw
-    cleanup`` renames the directory so the agent stops finding it.
+    Points users at ``hermes claw migrate`` (non-destructive port of config,
+    memory, and skills) first. ``hermes claw cleanup`` is mentioned as the
+    follow-up step for users who have already migrated and want to archive
+    the old directory — with a warning that archiving breaks OpenClaw.
    """
    return (
-        "Heads up — an OpenClaw workspace was detected at ~/.openclaw/.\n"
-        "After migrating, the agent can still get confused and read that "
-        "directory's config/memory instead of Hermes's.\n"
-        "Run `hermes claw cleanup` to archive it (rename → .openclaw.pre-migration). "
-        "This tip only shows once; rerun it any time with `hermes claw cleanup`."
+        "A legacy OpenClaw directory was detected at ~/.openclaw/.\n"
+        "To port your config, memory, and skills over to Hermes, run "
+        "`hermes claw migrate`.\n"
+        "If you've already migrated and want to archive the old directory, "
+        "run `hermes claw cleanup` (renames it to ~/.openclaw.pre-migration — "
+        "OpenClaw will stop working after this).\n"
+        "This tip only shows once."
    )


@@ -182,6 +182,64 @@ SKILLS_GUIDANCE = (
    "Skills that aren't maintained become liabilities."
 )

+KANBAN_GUIDANCE = (
+    "# You are a Kanban worker\n"
+    "You were spawned by the Hermes Kanban dispatcher to execute ONE task from "
+    "the shared board at `~/.hermes/kanban.db`. Your task id is in "
+    "`$HERMES_KANBAN_TASK`; your workspace is `$HERMES_KANBAN_WORKSPACE`. "
+    "The `kanban_*` tools in your schema are your primary coordination surface — "
+    "they write directly to the shared SQLite DB and work regardless of terminal "
+    "backend (local/docker/modal/ssh).\n"
+    "\n"
+    "## Lifecycle\n"
+    "\n"
+    "1. **Orient.** Call `kanban_show()` first (no args — it defaults to your "
+    "task). The response includes title, body, parent-task handoffs (summary + "
+    "metadata), any prior attempts on this task if you're a retry, the full "
+    "comment thread, and a pre-formatted `worker_context` you can treat as "
+    "ground truth.\n"
+    "2. **Work inside the workspace.** `cd $HERMES_KANBAN_WORKSPACE` before "
+    "any file operations. The workspace is yours for this run. Don't modify "
+    "files outside it unless the task explicitly asks.\n"
+    "3. **Heartbeat on long operations.** Call `kanban_heartbeat(note=...)` "
+    "every few minutes during long subprocesses (training, encoding, crawling). "
+    "Skip heartbeats for short tasks.\n"
+    "4. **Block on genuine ambiguity.** If you need a human decision you cannot "
+    "infer (missing credentials, UX choice, paywalled source, peer output you "
+    "need first), call `kanban_block(reason=\"...\")` and stop. Don't guess. "
+    "The user will unblock with context and the dispatcher will respawn you.\n"
+    "5. **Complete with structured handoff.** Call `kanban_complete(summary=..., "
+    "metadata=...)`. `summary` is 1–3 human-readable sentences naming concrete "
+    "artifacts. `metadata` is machine-readable facts "
+    "(`{changed_files: [...], tests_run: N, decisions: [...]}`). Downstream "
+    "workers read both via their own `kanban_show`. Never put secrets / "
+    "tokens / raw PII in either field — run rows are durable forever.\n"
+    "6. **If follow-up work appears, create it; don't do it.** Use "
+    "`kanban_create(title=..., assignee=<right-profile>, parents=[your-task-id])` "
+    "to spawn a child task for the appropriate specialist profile instead of "
+    "scope-creeping into the next thing.\n"
+    "\n"
+    "## Orchestrator mode\n"
+    "\n"
+    "If your task is itself a decomposition task (e.g. a planner profile given "
+    "a high-level goal), use `kanban_create` to fan out into child tasks — one "
+    "per specialist, each with an explicit `assignee` and `parents=[...]` to "
+    "express dependencies. Then `kanban_complete` your own task with a summary "
+    "of the decomposition. Do NOT execute the work yourself; your job is "
+    "routing, not implementation.\n"
+    "\n"
+    "## Do NOT\n"
+    "\n"
+    "- Do not shell out to `hermes kanban <verb>` for board operations. Use "
+    "the `kanban_*` tools — they work across all terminal backends.\n"
+    "- Do not complete a task you didn't actually finish. Block it.\n"
+    "- Do not assign follow-up work to yourself. Assign it to the right "
+    "specialist profile.\n"
+    "- Do not call `delegate_task` as a board substitute. `delegate_task` is "
+    "for short reasoning subtasks inside your own run; board tasks are for "
+    "cross-agent handoffs that outlive one API loop."
+)
+
 TOOL_USE_ENFORCEMENT_GUIDANCE = (
    "# Tool-use enforcement\n"
    "You MUST use your tools to take action — do not describe what you would do "
@@ -184,11 +184,59 @@ _PREFIX_RE = re.compile(
 )


+def mask_secret(
+    value: str,
+    *,
+    head: int = 4,
+    tail: int = 4,
+    floor: int = 12,
+    placeholder: str = "***",
+    empty: str = "",
+) -> str:
+    """Mask a secret for display, preserving ``head`` and ``tail`` characters.
+
+    Canonical helper for display-time redaction across Hermes — used by
+    ``hermes config``, ``hermes status``, ``hermes dump``, and anywhere
+    a secret needs to be shown truncated for debuggability while still
+    keeping the bulk hidden.
+
+    Args:
+        value:       The secret to mask. ``None``/empty returns ``empty``.
+        head:        Leading characters to preserve. Default 4.
+        tail:        Trailing characters to preserve. Default 4.
+        floor:       Values shorter than ``head + tail + floor_margin`` are
+                     fully masked (returns ``placeholder``). Default 12 —
+                     matches the existing config/status/dump convention.
+        placeholder: Value returned for too-short inputs. Default ``"***"``.
+        empty:       Value returned when ``value`` is falsy (None, ""). The
+                     caller can override this to e.g. ``color("(not set)",
+                     Colors.DIM)`` for user-facing display.
+
+    Examples:
+        >>> mask_secret("sk-proj-abcdef1234567890")
+        'sk-p...7890'
+        >>> mask_secret("short")                         # fully masked
+        '***'
+        >>> mask_secret("")                              # empty default
+        ''
+        >>> mask_secret("", empty="(not set)")           # empty override
+        '(not set)'
+        >>> mask_secret("long-token", head=6, tail=4, floor=18)
+        '***'
+    """
+    if not value:
+        return empty
+    if len(value) < floor:
+        return placeholder
+    return f"{value[:head]}...{value[-tail:]}"
+
+
 def _mask_token(token: str) -> str:
-    """Mask a token, preserving prefix for long tokens."""
-    if len(token) < 18:
+    """Mask a log token — conservative 18-char floor, preserves 6 prefix / 4 suffix."""
+    # Empty input: historically this returned "***" rather than "". Preserve.
+    if not token:
        return "***"
-    return f"{token[:6]}...{token[-4:]}"
+    return mask_secret(token, head=6, tail=4, floor=18)


 def _redact_query_string(query: str) -> str:
@@ -257,11 +305,13 @@ def _redact_form_body(text: str) -> str:
    return _redact_query_string(text.strip())


-def redact_sensitive_text(text: str) -> str:
+def redact_sensitive_text(text: str, *, force: bool = False) -> str:
    """Apply all redaction patterns to a block of text.

    Safe to call on any string -- non-matching text passes through unchanged.
    Disabled by default — enable via security.redact_secrets: true in config.yaml.
+    Set force=True for safety boundaries that must never return raw secrets
+    regardless of the user's global logging redaction preference.
    """
    if text is None:
        return None
@@ -269,7 +319,7 @@ def redact_sensitive_text(text: str) -> str:
        text = str(text)
    if not text:
        return text
-    if not _REDACT_ENABLED:
+    if not (force or _REDACT_ENABLED):
        return text

    # Known prefixes (sk-, ghp_, etc.)
@@ -234,7 +234,7 @@ def scan_skill_commands() -> Dict[str, Dict[str, Any]]:

        for scan_dir in dirs_to_scan:
            for skill_md in iter_skill_index_files(scan_dir, "SKILL.md"):
-                if any(part in ('.git', '.github', '.hub') for part in skill_md.parts):
+                if any(part in ('.git', '.github', '.hub', '.archive') for part in skill_md.parts):
                    continue
                try:
                    content = skill_md.read_text(encoding='utf-8')
@@ -284,6 +284,71 @@ def get_skill_commands() -> Dict[str, Dict[str, Any]]:
    return _skill_commands


+def reload_skills() -> Dict[str, Any]:
+    """Re-scan the skills directory and return a diff of what changed.
+
+    Rescans ``~/.hermes/skills/`` and any ``skills.external_dirs`` so the
+    slash-command map (``agent.skill_commands._skill_commands``) reflects
+    skills added or removed on disk.
+
+    This does NOT invalidate the skills system-prompt cache. Skills are
+    called by name via ``/skill-name``, ``skills_list``, or ``skill_view``
+    — they don't need to be in the system prompt for the model to use them.
+    Keeping the prompt cache intact preserves prefix caching across the
+    reload, so a user invoking ``/reload-skills`` pays no cache-reset cost.
+
+    Returns:
+        Dict with keys::
+
+            {
+              "added":      [{"name": str, "description": str}, ...],
+              "removed":    [{"name": str, "description": str}, ...],
+              "unchanged":  [skill names present before and after],
+              "total":      total skill count after rescan,
+              "commands":   total /slash-skill count after rescan,
+            }
+
+        ``description`` is the skill's full SKILL.md frontmatter
+        ``description:`` field — the same string the system prompt renders
+        as ``    - name: description`` for pre-existing skills.
+    """
+    # Snapshot pre-reload state (name -> description) from the current
+    # slash-command cache. Using dicts lets the post-rescan diff carry
+    # descriptions for newly-visible or just-removed skills without a
+    # second disk walk.
+    def _snapshot(cmds: Dict[str, Dict[str, Any]]) -> Dict[str, str]:
+        out: Dict[str, str] = {}
+        for slash_key, info in cmds.items():
+            bare = slash_key.lstrip("/")
+            out[bare] = (info or {}).get("description") or ""
+        return out
+
+    before = _snapshot(_skill_commands)
+
+    # Rescan the skills dir. ``scan_skill_commands`` resets
+    # ``_skill_commands = {}`` internally and repopulates it.
+    new_commands = scan_skill_commands()
+
+    after = _snapshot(new_commands)
+
+    added_names = sorted(set(after) - set(before))
+    removed_names = sorted(set(before) - set(after))
+    unchanged = sorted(set(after) & set(before))
+
+    added = [{"name": n, "description": after[n]} for n in added_names]
+    # For removed skills, use the description we had cached pre-rescan
+    # (the skill file is gone so we can't re-read it).
+    removed = [{"name": n, "description": before[n]} for n in removed_names]
+
+    return {
+        "added": added,
+        "removed": removed,
+        "unchanged": unchanged,
+        "total": len(after),
+        "commands": len(new_commands),
+    }
+
+
 def resolve_skill_command_key(command: str) -> Optional[str]:
    """Resolve a user-typed /command to its canonical skill_cmds key.

@@ -328,6 +393,14 @@ def build_skill_invocation_message(
        return f"[Failed to load skill: {skill_info['name']}]"

    loaded_skill, skill_dir, skill_name = loaded
+
+    # Track active usage for Curator lifecycle management (#17782)
+    try:
+        from tools.skill_usage import bump_use
+        bump_use(skill_name)
+    except Exception:
+        pass  # Non-critical — skill invocation proceeds regardless
+
    activation_note = (
        f'[IMPORTANT: The user has invoked the "{skill_name}" skill, indicating they want '
        "you to follow its instructions. The full skill content is loaded below.]"
@@ -367,6 +440,14 @@ def build_preloaded_skills_prompt(
            continue

        loaded_skill, skill_dir, skill_name = loaded
+
+        # Track active usage for Curator lifecycle management (#17782)
+        try:
+            from tools.skill_usage import bump_use
+            bump_use(skill_name)
+        except Exception:
+            pass  # Non-critical
+
        activation_note = (
            f'[IMPORTANT: The user launched this CLI session with the "{skill_name}" skill '
            "preloaded. Treat its instructions as active guidance for the duration of this "
@@ -24,7 +24,7 @@ PLATFORM_MAP = {
    "windows": "win32",
 }

-EXCLUDED_SKILL_DIRS = frozenset((".git", ".github", ".hub"))
+EXCLUDED_SKILL_DIRS = frozenset((".git", ".github", ".hub", ".archive"))

 # ── Lazy YAML loader ─────────────────────────────────────────────────────

@@ -200,6 +200,9 @@ def get_external_skills_dirs() -> List[Path]:
    if not isinstance(raw_dirs, list):
        return []

+    from hermes_constants import get_hermes_home
+
+    hermes_home = get_hermes_home()
    local_skills = get_skills_dir().resolve()
    seen: Set[Path] = set()
    result: List[Path] = []
@@ -210,7 +213,12 @@ def get_external_skills_dirs() -> List[Path]:
            continue
        # Expand ~ and environment variables
        expanded = os.path.expanduser(os.path.expandvars(entry))
-        p = Path(expanded).resolve()
+        p = Path(expanded)
+        # Resolve relative paths against HERMES_HOME, not cwd
+        if not p.is_absolute():
+            p = (hermes_home / p).resolve()
+        else:
+            p = p.resolve()
        if p == local_skills:
            continue
        if p in seen:
@@ -432,7 +440,7 @@ def extract_skill_description(frontmatter: Dict[str, Any]) -> str:
 def iter_skill_index_files(skills_dir: Path, filename: str):
    """Walk skills_dir yielding sorted paths matching *filename*.

-    Excludes ``.git``, ``.github``, ``.hub`` directories.
+    Excludes ``.git``, ``.github``, ``.hub``, ``.archive`` directories.
    """
    matches = []
    for root, dirs, files in os.walk(skills_dir, followlinks=True):
@@ -0,0 +1,455 @@
+"""Pure tool-call loop guardrail primitives.
+
+The controller in this module is intentionally side-effect free: it tracks
+per-turn tool-call observations and returns decisions. Runtime code owns whether
+those decisions become warning guidance, synthetic tool results, or controlled
+turn halts.
+"""
+
+from __future__ import annotations
+
+import hashlib
+import json
+from dataclasses import dataclass, field
+from typing import Any, Mapping
+
+from utils import safe_json_loads
+
+
+IDEMPOTENT_TOOL_NAMES = frozenset(
+    {
+        "read_file",
+        "search_files",
+        "web_search",
+        "web_extract",
+        "session_search",
+        "browser_snapshot",
+        "browser_console",
+        "browser_get_images",
+        "mcp_filesystem_read_file",
+        "mcp_filesystem_read_text_file",
+        "mcp_filesystem_read_multiple_files",
+        "mcp_filesystem_list_directory",
+        "mcp_filesystem_list_directory_with_sizes",
+        "mcp_filesystem_directory_tree",
+        "mcp_filesystem_get_file_info",
+        "mcp_filesystem_search_files",
+    }
+)
+
+MUTATING_TOOL_NAMES = frozenset(
+    {
+        "terminal",
+        "execute_code",
+        "write_file",
+        "patch",
+        "todo",
+        "memory",
+        "skill_manage",
+        "browser_click",
+        "browser_type",
+        "browser_press",
+        "browser_scroll",
+        "browser_navigate",
+        "send_message",
+        "cronjob",
+        "delegate_task",
+        "process",
+    }
+)
+
+
+@dataclass(frozen=True)
+class ToolCallGuardrailConfig:
+    """Thresholds for per-turn tool-call loop detection.
+
+    Warnings are enabled by default and never prevent tool execution. Hard stops
+    are explicit opt-in so interactive CLI/TUI sessions get a gentle nudge unless
+    the user enables circuit-breaker behavior in config.yaml.
+    """
+
+    warnings_enabled: bool = True
+    hard_stop_enabled: bool = False
+    exact_failure_warn_after: int = 2
+    exact_failure_block_after: int = 5
+    same_tool_failure_warn_after: int = 3
+    same_tool_failure_halt_after: int = 8
+    no_progress_warn_after: int = 2
+    no_progress_block_after: int = 5
+    idempotent_tools: frozenset[str] = field(default_factory=lambda: IDEMPOTENT_TOOL_NAMES)
+    mutating_tools: frozenset[str] = field(default_factory=lambda: MUTATING_TOOL_NAMES)
+
+    @classmethod
+    def from_mapping(cls, data: Mapping[str, Any] | None) -> "ToolCallGuardrailConfig":
+        """Build config from the `tool_loop_guardrails` config.yaml section."""
+        if not isinstance(data, Mapping):
+            return cls()
+
+        warn_after = data.get("warn_after")
+        if not isinstance(warn_after, Mapping):
+            warn_after = {}
+        hard_stop_after = data.get("hard_stop_after")
+        if not isinstance(hard_stop_after, Mapping):
+            hard_stop_after = {}
+
+        defaults = cls()
+        return cls(
+            warnings_enabled=_as_bool(data.get("warnings_enabled"), defaults.warnings_enabled),
+            hard_stop_enabled=_as_bool(data.get("hard_stop_enabled"), defaults.hard_stop_enabled),
+            exact_failure_warn_after=_positive_int(
+                warn_after.get("exact_failure", data.get("exact_failure_warn_after")),
+                defaults.exact_failure_warn_after,
+            ),
+            same_tool_failure_warn_after=_positive_int(
+                warn_after.get("same_tool_failure", data.get("same_tool_failure_warn_after")),
+                defaults.same_tool_failure_warn_after,
+            ),
+            no_progress_warn_after=_positive_int(
+                warn_after.get("idempotent_no_progress", data.get("no_progress_warn_after")),
+                defaults.no_progress_warn_after,
+            ),
+            exact_failure_block_after=_positive_int(
+                hard_stop_after.get("exact_failure", data.get("exact_failure_block_after")),
+                defaults.exact_failure_block_after,
+            ),
+            same_tool_failure_halt_after=_positive_int(
+                hard_stop_after.get("same_tool_failure", data.get("same_tool_failure_halt_after")),
+                defaults.same_tool_failure_halt_after,
+            ),
+            no_progress_block_after=_positive_int(
+                hard_stop_after.get("idempotent_no_progress", data.get("no_progress_block_after")),
+                defaults.no_progress_block_after,
+            ),
+        )
+
+
+@dataclass(frozen=True)
+class ToolCallSignature:
+    """Stable, non-reversible identity for a tool name plus canonical args."""
+
+    tool_name: str
+    args_hash: str
+
+    @classmethod
+    def from_call(cls, tool_name: str, args: Mapping[str, Any] | None) -> "ToolCallSignature":
+        canonical = canonical_tool_args(args or {})
+        return cls(tool_name=tool_name, args_hash=_sha256(canonical))
+
+    def to_metadata(self) -> dict[str, str]:
+        """Return public metadata without raw argument values."""
+        return {"tool_name": self.tool_name, "args_hash": self.args_hash}
+
+
+@dataclass(frozen=True)
+class ToolGuardrailDecision:
+    """Decision returned by the tool-call guardrail controller."""
+
+    action: str = "allow"  # allow | warn | block | halt
+    code: str = "allow"
+    message: str = ""
+    tool_name: str = ""
+    count: int = 0
+    signature: ToolCallSignature | None = None
+
+    @property
+    def allows_execution(self) -> bool:
+        return self.action in {"allow", "warn"}
+
+    @property
+    def should_halt(self) -> bool:
+        return self.action in {"block", "halt"}
+
+    def to_metadata(self) -> dict[str, Any]:
+        data: dict[str, Any] = {
+            "action": self.action,
+            "code": self.code,
+            "message": self.message,
+            "tool_name": self.tool_name,
+            "count": self.count,
+        }
+        if self.signature is not None:
+            data["signature"] = self.signature.to_metadata()
+        return data
+
+
+def canonical_tool_args(args: Mapping[str, Any]) -> str:
+    """Return sorted compact JSON for parsed tool arguments."""
+    if not isinstance(args, Mapping):
+        raise TypeError(f"tool args must be a mapping, got {type(args).__name__}")
+    return json.dumps(
+        args,
+        ensure_ascii=False,
+        sort_keys=True,
+        separators=(",", ":"),
+        default=str,
+    )
+
+
+def classify_tool_failure(tool_name: str, result: str | None) -> tuple[bool, str]:
+    """Safety-fallback classifier used only when callers don't pass ``failed``.
+
+    Mirrors ``agent.display._detect_tool_failure`` exactly so the guardrail
+    never disagrees with the CLI's user-visible ``[error]`` tag. Production
+    callers in ``run_agent.py`` always pass an explicit ``failed=`` derived
+    from ``_detect_tool_failure``; this function exists so standalone callers
+    (tests, tooling) still get consistent behavior.
+    """
+    if result is None:
+        return False, ""
+
+    if tool_name == "terminal":
+        data = safe_json_loads(result)
+        if isinstance(data, dict):
+            exit_code = data.get("exit_code")
+            if exit_code is not None and exit_code != 0:
+                return True, f" [exit {exit_code}]"
+        return False, ""
+
+    if tool_name == "memory":
+        data = safe_json_loads(result)
+        if isinstance(data, dict):
+            if data.get("success") is False and "exceed the limit" in data.get("error", ""):
+                return True, " [full]"
+
+    lower = result[:500].lower()
+    if '"error"' in lower or '"failed"' in lower or result.startswith("Error"):
+        return True, " [error]"
+
+    return False, ""
+
+
+class ToolCallGuardrailController:
+    """Per-turn controller for repeated failed/non-progressing tool calls."""
+
+    def __init__(self, config: ToolCallGuardrailConfig | None = None):
+        self.config = config or ToolCallGuardrailConfig()
+        self.reset_for_turn()
+
+    def reset_for_turn(self) -> None:
+        self._exact_failure_counts: dict[ToolCallSignature, int] = {}
+        self._same_tool_failure_counts: dict[str, int] = {}
+        self._no_progress: dict[ToolCallSignature, tuple[str, int]] = {}
+        self._halt_decision: ToolGuardrailDecision | None = None
+
+    @property
+    def halt_decision(self) -> ToolGuardrailDecision | None:
+        return self._halt_decision
+
+    def before_call(self, tool_name: str, args: Mapping[str, Any] | None) -> ToolGuardrailDecision:
+        signature = ToolCallSignature.from_call(tool_name, _coerce_args(args))
+        if not self.config.hard_stop_enabled:
+            return ToolGuardrailDecision(tool_name=tool_name, signature=signature)
+
+        exact_count = self._exact_failure_counts.get(signature, 0)
+        if exact_count >= self.config.exact_failure_block_after:
+            decision = ToolGuardrailDecision(
+                action="block",
+                code="repeated_exact_failure_block",
+                message=(
+                    f"Blocked {tool_name}: the same tool call failed {exact_count} "
+                    "times with identical arguments. Stop retrying it unchanged; "
+                    "change strategy or explain the blocker."
+                ),
+                tool_name=tool_name,
+                count=exact_count,
+                signature=signature,
+            )
+            self._halt_decision = decision
+            return decision
+
+        if self._is_idempotent(tool_name):
+            record = self._no_progress.get(signature)
+            if record is not None:
+                _result_hash, repeat_count = record
+                if repeat_count >= self.config.no_progress_block_after:
+                    decision = ToolGuardrailDecision(
+                        action="block",
+                        code="idempotent_no_progress_block",
+                        message=(
+                            f"Blocked {tool_name}: this read-only call returned the same "
+                            f"result {repeat_count} times. Stop repeating it unchanged; "
+                            "use the result already provided or try a different query."
+                        ),
+                        tool_name=tool_name,
+                        count=repeat_count,
+                        signature=signature,
+                    )
+                    self._halt_decision = decision
+                    return decision
+
+        return ToolGuardrailDecision(tool_name=tool_name, signature=signature)
+
+    def after_call(
+        self,
+        tool_name: str,
+        args: Mapping[str, Any] | None,
+        result: str | None,
+        *,
+        failed: bool | None = None,
+    ) -> ToolGuardrailDecision:
+        args = _coerce_args(args)
+        signature = ToolCallSignature.from_call(tool_name, args)
+        if failed is None:
+            failed, _ = classify_tool_failure(tool_name, result)
+
+        if failed:
+            exact_count = self._exact_failure_counts.get(signature, 0) + 1
+            self._exact_failure_counts[signature] = exact_count
+            self._no_progress.pop(signature, None)
+
+            same_count = self._same_tool_failure_counts.get(tool_name, 0) + 1
+            self._same_tool_failure_counts[tool_name] = same_count
+
+            if self.config.hard_stop_enabled and same_count >= self.config.same_tool_failure_halt_after:
+                decision = ToolGuardrailDecision(
+                    action="halt",
+                    code="same_tool_failure_halt",
+                    message=(
+                        f"Stopped {tool_name}: it failed {same_count} times this turn. "
+                        "Stop retrying the same failing tool path and choose a different approach."
+                    ),
+                    tool_name=tool_name,
+                    count=same_count,
+                    signature=signature,
+                )
+                self._halt_decision = decision
+                return decision
+
+            if self.config.warnings_enabled and exact_count >= self.config.exact_failure_warn_after:
+                return ToolGuardrailDecision(
+                    action="warn",
+                    code="repeated_exact_failure_warning",
+                    message=(
+                        f"{tool_name} has failed {exact_count} times with identical arguments. "
+                        "This looks like a loop; inspect the error and change strategy "
+                        "instead of retrying it unchanged."
+                    ),
+                    tool_name=tool_name,
+                    count=exact_count,
+                    signature=signature,
+                )
+
+            if self.config.warnings_enabled and same_count >= self.config.same_tool_failure_warn_after:
+                return ToolGuardrailDecision(
+                    action="warn",
+                    code="same_tool_failure_warning",
+                    message=(
+                        f"{tool_name} has failed {same_count} times this turn. "
+                        "This looks like a loop; change approach before retrying."
+                    ),
+                    tool_name=tool_name,
+                    count=same_count,
+                    signature=signature,
+                )
+
+            return ToolGuardrailDecision(tool_name=tool_name, count=exact_count, signature=signature)
+
+        self._exact_failure_counts.pop(signature, None)
+        self._same_tool_failure_counts.pop(tool_name, None)
+
+        if not self._is_idempotent(tool_name):
+            self._no_progress.pop(signature, None)
+            return ToolGuardrailDecision(tool_name=tool_name, signature=signature)
+
+        result_hash = _result_hash(result)
+        previous = self._no_progress.get(signature)
+        repeat_count = 1
+        if previous is not None and previous[0] == result_hash:
+            repeat_count = previous[1] + 1
+        self._no_progress[signature] = (result_hash, repeat_count)
+
+        if self.config.warnings_enabled and repeat_count >= self.config.no_progress_warn_after:
+            return ToolGuardrailDecision(
+                action="warn",
+                code="idempotent_no_progress_warning",
+                message=(
+                    f"{tool_name} returned the same result {repeat_count} times. "
+                    "Use the result already provided or change the query instead of "
+                    "repeating it unchanged."
+                ),
+                tool_name=tool_name,
+                count=repeat_count,
+                signature=signature,
+            )
+
+        return ToolGuardrailDecision(tool_name=tool_name, count=repeat_count, signature=signature)
+
+    def _is_idempotent(self, tool_name: str) -> bool:
+        if tool_name in self.config.mutating_tools:
+            return False
+        return tool_name in self.config.idempotent_tools
+
+
+def toolguard_synthetic_result(decision: ToolGuardrailDecision) -> str:
+    """Build a synthetic role=tool content string for a blocked tool call."""
+    return json.dumps(
+        {
+            "error": decision.message,
+            "guardrail": decision.to_metadata(),
+        },
+        ensure_ascii=False,
+    )
+
+
+def append_toolguard_guidance(result: str, decision: ToolGuardrailDecision) -> str:
+    """Append runtime guidance to the current tool result content."""
+    if decision.action not in {"warn", "halt"} or not decision.message:
+        return result
+    label = "Tool loop hard stop" if decision.action == "halt" else "Tool loop warning"
+    suffix = (
+        f"\n\n[{label}: "
+        f"{decision.code}; count={decision.count}; {decision.message}]"
+    )
+    return (result or "") + suffix
+
+
+def _coerce_args(args: Mapping[str, Any] | None) -> Mapping[str, Any]:
+    return args if isinstance(args, Mapping) else {}
+
+
+def _result_hash(result: str | None) -> str:
+    parsed = safe_json_loads(result or "")
+    if parsed is not None:
+        try:
+            canonical = json.dumps(
+                parsed,
+                ensure_ascii=False,
+                sort_keys=True,
+                separators=(",", ":"),
+                default=str,
+            )
+        except TypeError:
+            canonical = str(parsed)
+    else:
+        canonical = result or ""
+    return _sha256(canonical)
+
+
+def _as_bool(value: Any, default: bool) -> bool:
+    if value is None:
+        return default
+    if isinstance(value, bool):
+        return value
+    if isinstance(value, (int, float)):
+        return bool(value)
+    if isinstance(value, str):
+        lowered = value.strip().lower()
+        if lowered in {"1", "true", "yes", "on", "enabled"}:
+            return True
+        if lowered in {"0", "false", "no", "off", "disabled"}:
+            return False
+    return default
+
+
+def _positive_int(value: Any, default: int) -> int:
+    if value is None:
+        return default
+    try:
+        parsed = int(value)
+    except (TypeError, ValueError):
+        return default
+    return parsed if parsed >= 1 else default
+
+
+def _sha256(value: str) -> str:
+    return hashlib.sha256(value.encode("utf-8")).hexdigest()
@@ -58,6 +58,7 @@ class AnthropicTransport(ProviderTransport):
            context_length: int | None
            base_url: str | None
            fast_mode: bool
+            drop_context_1m_beta: bool
        """
        from agent.anthropic_adapter import build_anthropic_kwargs

@@ -73,6 +74,7 @@ class AnthropicTransport(ProviderTransport):
            context_length=params.get("context_length"),
            base_url=params.get("base_url"),
            fast_mode=params.get("fast_mode", False),
+            drop_context_1m_beta=params.get("drop_context_1m_beta", False),
        )

    def normalize_response(self, response: Any, **kwargs) -> NormalizedResponse:
@@ -85,6 +87,9 @@ class AnthropicTransport(ProviderTransport):
        from agent.anthropic_adapter import _to_plain_data
        from agent.transports.types import ToolCall

+        strip_tool_prefix = kwargs.get("strip_tool_prefix", False)
+        _MCP_PREFIX = "mcp_"
+
        text_parts = []
        reasoning_parts = []
        reasoning_details = []
@@ -99,10 +104,13 @@ class AnthropicTransport(ProviderTransport):
                if isinstance(block_dict, dict):
                    reasoning_details.append(block_dict)
            elif block.type == "tool_use":
+                name = block.name
+                if strip_tool_prefix and name.startswith(_MCP_PREFIX):
+                    name = name[len(_MCP_PREFIX):]
                tool_calls.append(
                    ToolCall(
                        id=block.id,
-                        name=block.name,
+                        name=name,
                        arguments=json.dumps(block.input),
                    )
                )
@@ -20,15 +20,22 @@ from agent.transports.types import NormalizedResponse, ToolCall, Usage


 def _build_gemini_thinking_config(model: str, reasoning_config: dict | None) -> dict | None:
-    """Translate Hermes/OpenRouter-style reasoning config to Gemini thinkingConfig.
-
-    Gemini native/cloud-code adapters do not read ``extra_body.reasoning``.
-    They only inspect ``extra_body.thinking_config`` / ``thinkingConfig`` and
-    then request thought parts with ``includeThoughts`` enabled.
-    """
+    """Translate Hermes/OpenRouter-style reasoning config to Gemini thinkingConfig."""
    if reasoning_config is None or not isinstance(reasoning_config, dict):
        return None

+    normalized_model = (model or "").strip().lower()
+    if normalized_model.startswith("google/"):
+        normalized_model = normalized_model.split("/", 1)[1]
+
+    # ``thinking_config`` is a Gemini-only request parameter. The same
+    # ``gemini`` provider also serves Gemma (and historically PaLM/Bard);
+    # those reject the field with HTTP 400 "Unknown name 'thinking_config':
+    # Cannot find field" — including the polite ``{"includeThoughts": False}``
+    # form. Omit the field entirely on non-Gemini models. (#17426)
+    if not normalized_model.startswith("gemini"):
+        return None
+
    if reasoning_config.get("enabled") is False:
        # Gemini can hide thought parts even when internal thinking still
        # happens; omit thinkingLevel to avoid model-specific validation quirks.
@@ -39,9 +46,6 @@ def _build_gemini_thinking_config(model: str, reasoning_config: dict | None) ->
        return {"includeThoughts": False}

    thinking_config: Dict[str, Any] = {"includeThoughts": True}
-    normalized_model = (model or "").strip().lower()
-    if normalized_model.startswith("google/"):
-        normalized_model = normalized_model.split("/", 1)[1]

    # Gemini 2.5 accepts thinkingBudget; don't guess a budget from Hermes'
    # coarse effort levels. ``includeThoughts`` alone is enough to surface
@@ -71,6 +75,30 @@ def _build_gemini_thinking_config(model: str, reasoning_config: dict | None) ->
    return thinking_config


+def _snake_case_gemini_thinking_config(config: dict | None) -> dict | None:
+    """Convert Gemini thinking config keys to the OpenAI-compat field names."""
+    if not isinstance(config, dict) or not config:
+        return None
+
+    translated: Dict[str, Any] = {}
+    if isinstance(config.get("includeThoughts"), bool):
+        translated["include_thoughts"] = config["includeThoughts"]
+    if isinstance(config.get("thinkingLevel"), str) and config["thinkingLevel"].strip():
+        translated["thinking_level"] = config["thinkingLevel"].strip().lower()
+    if isinstance(config.get("thinkingBudget"), (int, float)):
+        translated["thinking_budget"] = int(config["thinkingBudget"])
+    return translated or None
+
+
+def _is_gemini_openai_compat_base_url(base_url: Any) -> bool:
+    normalized = str(base_url or "").strip().rstrip("/").lower()
+    if not normalized:
+        return False
+    if "generativelanguage.googleapis.com" not in normalized:
+        return False
+    return normalized.endswith("/openai")
+
+
 class ChatCompletionsTransport(ProviderTransport):
    """Transport for api_mode='chat_completions'.

@@ -309,6 +337,7 @@ class ChatCompletionsTransport(ProviderTransport):
        is_nous = params.get("is_nous", False)
        is_github_models = params.get("is_github_models", False)
        provider_name = str(params.get("provider_name") or "").strip().lower()
+        base_url = params.get("base_url")

        provider_prefs = params.get("provider_preferences")
        if provider_prefs and is_openrouter:
@@ -362,7 +391,19 @@ class ChatCompletionsTransport(ProviderTransport):
        if is_qwen:
            extra_body["vl_high_resolution_images"] = True

-        if provider_name in {"gemini", "google-gemini-cli"}:
+        if provider_name == "gemini":
+            raw_thinking_config = _build_gemini_thinking_config(model, reasoning_config)
+            if _is_gemini_openai_compat_base_url(base_url):
+                thinking_config = _snake_case_gemini_thinking_config(raw_thinking_config)
+                if thinking_config:
+                    openai_compat_extra = extra_body.get("extra_body", {})
+                    google_extra = openai_compat_extra.get("google", {})
+                    google_extra["thinking_config"] = thinking_config
+                    openai_compat_extra["google"] = google_extra
+                    extra_body["extra_body"] = openai_compat_extra
+            elif raw_thinking_config:
+                extra_body["thinking_config"] = raw_thinking_config
+        elif provider_name == "google-gemini-cli":
            thinking_config = _build_gemini_thinking_config(model, reasoning_config)
            if thinking_config:
                extra_body["thinking_config"] = thinking_config
@@ -436,9 +477,13 @@ class ChatCompletionsTransport(ProviderTransport):
        # so keep them apart in provider_data rather than merging.
        reasoning = getattr(msg, "reasoning", None)
        reasoning_content = getattr(msg, "reasoning_content", None)
+        if reasoning_content is None and hasattr(msg, "model_extra"):
+            model_extra = getattr(msg, "model_extra", None) or {}
+            if isinstance(model_extra, dict) and "reasoning_content" in model_extra:
+                reasoning_content = model_extra["reasoning_content"]

        provider_data: Dict[str, Any] = {}
-        if reasoning_content:
+        if reasoning_content is not None:
            provider_data["reasoning_content"] = reasoning_content
        rd = getattr(msg, "reasoning_details", None)
        if rd:
@@ -359,6 +359,25 @@ _OFFICIAL_DOCS_PRICING: Dict[tuple[str, str], PricingEntry] = {
        source_url="https://aws.amazon.com/bedrock/pricing/",
        pricing_version="bedrock-pricing-2026-04",
    ),
+    # MiniMax
+    (
+        "minimax",
+        "minimax-m2.7",
+    ): PricingEntry(
+        input_cost_per_million=Decimal("0.30"),
+        output_cost_per_million=Decimal("1.20"),
+        source="official_docs_snapshot",
+        pricing_version="minimax-pricing-2026-04",
+    ),
+    (
+        "minimax-cn",
+        "minimax-m2.7",
+    ): PricingEntry(
+        input_cost_per_million=Decimal("0.30"),
+        output_cost_per_million=Decimal("1.20"),
+        source="official_docs_snapshot",
+        pricing_version="minimax-pricing-2026-04",
+    ),
 }


@@ -400,6 +419,8 @@ def resolve_billing_route(
        return BillingRoute(provider="anthropic", model=model.split("/")[-1], base_url=base_url or "", billing_mode="official_docs_snapshot")
    if provider_name == "openai":
        return BillingRoute(provider="openai", model=model.split("/")[-1], base_url=base_url or "", billing_mode="official_docs_snapshot")
+    if provider_name in {"minimax", "minimax-cn"}:
+        return BillingRoute(provider=provider_name, model=model.split("/")[-1], base_url=base_url or "", billing_mode="official_docs_snapshot")
    if provider_name in {"custom", "local"} or (base and "localhost" in base):
        return BillingRoute(provider=provider_name or "custom", model=model, base_url=base_url or "", billing_mode="unknown")
    return BillingRoute(provider=provider_name or "unknown", model=model.split("/")[-1] if model else "", base_url=base_url or "", billing_mode="unknown")
@@ -180,6 +180,11 @@ terminal:
 #   lifetime_seconds: 300
 #   docker_image: "nikolaik/python-nodejs:python3.11-nodejs20"
 #   docker_mount_cwd_to_workspace: true   # Explicit opt-in: mount your launch cwd into /workspace
+#   # Optional: run the container as your host user's uid:gid so files written
+#   # into bind-mounted dirs are owned by you, not root. Drops SETUID/SETGID
+#   # caps too since no gosu privilege drop is needed. Leave off if your
+#   # chosen docker_image expects to start as root.
+#   docker_run_as_host_user: true
 #   # Optional: explicitly forward selected env vars into Docker.
 #   # These values come from your current shell first, then ~/.hermes/.env.
 #   # Warning: anything forwarded here is visible to commands run in the container.
@@ -284,6 +289,25 @@ browser:
  # after this period of no activity between agent loops (default: 120 = 2 minutes)
  inactivity_timeout: 120

+# =============================================================================
+# Tool Loop Guardrails
+# =============================================================================
+# Soft warnings are enabled by default. They append guidance to repeated failed
+# or non-progressing tool results but still let the tool execute. Hard stops are
+# opt-in circuit breakers for autonomous/cron sessions where stopping a loop is
+# preferable to spending the full iteration budget.
+tool_loop_guardrails:
+  warnings_enabled: true
+  hard_stop_enabled: false
+  warn_after:
+    exact_failure: 2
+    same_tool_failure: 3
+    idempotent_no_progress: 2
+  hard_stop_after:
+    exact_failure: 5
+    same_tool_failure: 8
+    idempotent_no_progress: 5
+
 # =============================================================================
 # Context Compression (Auto-shrinks long conversations)
 # =============================================================================
@@ -565,7 +589,7 @@ agent:
 #   - A preset like "hermes-cli" or "hermes-telegram" (curated tool set)
 #   - A list of individual toolsets to compose your own (see list below)
 #
-# Supported platform keys: cli, telegram, discord, whatsapp, slack, qqbot
+# Supported platform keys: cli, telegram, discord, whatsapp, slack, qqbot, teams
 #
 # Examples:
 #
@@ -595,6 +619,7 @@ agent:
 #   signal:        hermes-signal         (same as telegram)
 #   homeassistant: hermes-homeassistant  (same as telegram)
 #   qqbot:            hermes-qqbot            (same as telegram)
+#   teams:            hermes-teams            (same as telegram)
 #
 platform_toolsets:
  cli: [hermes-cli]
@@ -606,6 +631,7 @@ platform_toolsets:
  homeassistant: [hermes-homeassistant]
  qqbot: [hermes-qqbot]
  yuanbao: [hermes-yuanbao]
+  teams: [hermes-teams]

 # =============================================================================
 # Gateway Platform Settings
@@ -927,7 +953,7 @@ display:
  #     agent_name: "My Agent"               # Banner title and branding
  #     welcome: "Welcome message"           # Shown at CLI startup
  #     response_label: " ⚔ Agent "         # Response box header label
-  #     prompt_symbol: "⚔ ❯ "              # Prompt symbol
+  #     prompt_symbol: "⚔"                  # Prompt symbol (bare token; renderers add trailing space)
  #   tool_prefix: "╎"                       # Tool output line prefix (default: ┊)
  #
  skin: default
@@ -313,13 +313,21 @@ def compute_next_run(schedule: Dict[str, Any], last_run_at: Optional[str] = None
    elif schedule["kind"] == "cron":
        if not HAS_CRONITER:
            logger.warning(
-                "Cannot compute next run for cron schedule %r: 'croniter' "
-                "is not installed. Install the 'cron' extra (pip install "
-                "'hermes-agent[cron]') to re-enable recurring cron jobs.",
+                "Cannot compute next run for cron schedule %r: 'croniter' is "
+                "not installed. croniter is a core dependency as of v0.9.x; "
+                "reinstall hermes-agent or run 'pip install croniter' in your "
+                "runtime env.",
                schedule.get("expr"),
            )
            return None
-        cron = croniter(schedule["expr"], now)
+        # Use last_run_at as the croniter base when available, consistent
+        # with interval jobs.  This ensures that after a crash/restart,
+        # the next run is anchored to the actual last execution time
+        # rather than to an arbitrary restart time.
+        base_time = now
+        if last_run_at:
+            base_time = _ensure_aware(datetime.fromisoformat(last_run_at))
+        cron = croniter(schedule["expr"], base_time)
        next_run = cron.get_next(datetime)
        return next_run.isoformat()

@@ -874,3 +882,121 @@ def save_job_output(job_id: str, output: str):
        raise
    
    return output_file
+
+
+# =============================================================================
+# Skill reference rewriting (curator integration)
+# =============================================================================
+
+def rewrite_skill_refs(
+    consolidated: Optional[Dict[str, str]] = None,
+    pruned: Optional[List[str]] = None,
+) -> Dict[str, Any]:
+    """Rewrite cron job skill references after a curator consolidation pass.
+
+    When the curator consolidates a skill X into umbrella Y (or archives X
+    as pruned), any cron job that lists ``X`` in its ``skills`` field will
+    fail to load ``X`` at run time — the scheduler logs a warning and
+    skips the skill, so the job runs without the instructions it was
+    scheduled to follow. See cron/scheduler.py where ``skill_view`` is
+    called per skill name.
+
+    This function repairs cron jobs in-place:
+
+    - A skill listed in ``consolidated`` is replaced with its umbrella
+      target (the ``into`` value). If the umbrella is already in the
+      job's skill list, the stale name is dropped without duplication.
+    - A skill listed in ``pruned`` is dropped outright — there is no
+      forwarding target.
+    - Ordering and other skills in the list are preserved.
+    - The legacy ``skill`` field is realigned via ``_apply_skill_fields``.
+
+    Args:
+        consolidated: mapping of ``old_skill_name -> umbrella_skill_name``.
+        pruned: list of skill names that were archived with no forwarding
+            target.
+
+    Returns a report dict::
+
+        {
+            "rewrites": [
+                {
+                    "job_id": ...,
+                    "job_name": ...,
+                    "before": [...],
+                    "after": [...],
+                    "mapped": {"old": "new", ...},
+                    "dropped": ["old", ...],
+                },
+                ...
+            ],
+            "jobs_updated": N,
+            "jobs_scanned": M,
+        }
+
+    Best-effort: exceptions from loading/saving propagate to the caller so
+    tests can assert behaviour; the curator invocation site wraps this
+    call in a try/except so a failure here never breaks the curator.
+    """
+    consolidated = dict(consolidated or {})
+    pruned_set = set(pruned or [])
+    # A skill listed in both wins as "consolidated" — it has a target,
+    # which is the more useful of the two outcomes.
+    pruned_set -= set(consolidated.keys())
+
+    if not consolidated and not pruned_set:
+        return {"rewrites": [], "jobs_updated": 0, "jobs_scanned": 0}
+
+    with _jobs_file_lock:
+        jobs = load_jobs()
+        rewrites: List[Dict[str, Any]] = []
+        changed = False
+
+        for job in jobs:
+            skills_before = _normalize_skill_list(job.get("skill"), job.get("skills"))
+            if not skills_before:
+                continue
+
+            mapped: Dict[str, str] = {}
+            dropped: List[str] = []
+            new_skills: List[str] = []
+
+            for name in skills_before:
+                if name in consolidated:
+                    target = consolidated[name]
+                    mapped[name] = target
+                    if target and target not in new_skills:
+                        new_skills.append(target)
+                elif name in pruned_set:
+                    dropped.append(name)
+                else:
+                    if name not in new_skills:
+                        new_skills.append(name)
+
+            if not mapped and not dropped:
+                continue
+
+            job["skills"] = new_skills
+            job["skill"] = new_skills[0] if new_skills else None
+            changed = True
+
+            rewrites.append({
+                "job_id": job.get("id"),
+                "job_name": job.get("name") or job.get("id"),
+                "before": list(skills_before),
+                "after": list(new_skills),
+                "mapped": mapped,
+                "dropped": dropped,
+            })
+
+        if changed:
+            save_jobs(jobs)
+            logger.info(
+                "Curator rewrote skill references in %d cron job(s)", len(rewrites)
+            )
+
+        return {
+            "rewrites": rewrites,
+            "jobs_updated": len(rewrites),
+            "jobs_scanned": len(jobs),
+        }
@@ -233,12 +233,32 @@ def _resolve_single_delivery_target(job: dict, deliver_value: str) -> Optional[d
    }


+def _normalize_deliver_value(deliver) -> str:
+    """Normalize a stored/submitted ``deliver`` value to its canonical string form.
+
+    The contract is that ``deliver`` is a string (``"local"``, ``"origin"``,
+    ``"telegram"``, ``"telegram:-1001:17"``, or comma-separated combinations).
+    Historically some callers — MCP clients passing an array, direct edits of
+    ``jobs.json``, or stale code paths — have stored a list/tuple like
+    ``["telegram"]``.  ``str(["telegram"])`` would serialize to the literal
+    string ``"['telegram']"``, which is not a known platform and fails
+    resolution silently.  Flatten lists/tuples into a comma-separated string
+    so both forms work.  Returns ``"local"`` for anything falsy.
+    """
+    if deliver is None or deliver == "":
+        return "local"
+    if isinstance(deliver, (list, tuple)):
+        parts = [str(p).strip() for p in deliver if str(p).strip()]
+        return ",".join(parts) if parts else "local"
+    return str(deliver)
+
+
 def _resolve_delivery_targets(job: dict) -> List[dict]:
    """Resolve all concrete auto-delivery targets for a cron job (supports comma-separated deliver)."""
-    deliver = job.get("deliver", "local")
+    deliver = _normalize_deliver_value(job.get("deliver", "local"))
    if deliver == "local":
        return []
-    parts = [p.strip() for p in str(deliver).split(",") if p.strip()]
+    parts = [p.strip() for p in deliver.split(",") if p.strip()]
    seen = set()
    targets = []
    for part in parts:
@@ -257,13 +277,21 @@ def _resolve_delivery_target(job: dict) -> Optional[dict]:
    return targets[0] if targets else None


-# Media extension sets — keep in sync with gateway/platforms/base.py:_process_message_background
-_AUDIO_EXTS = frozenset({'.ogg', '.opus', '.mp3', '.wav', '.m4a'})
+# Media extension sets — audio routing is centralized in gateway.platforms.base
+# via should_send_media_as_audio() so Telegram-specific rules stay in one place.
 _VIDEO_EXTS = frozenset({'.mp4', '.mov', '.avi', '.mkv', '.webm', '.3gp'})
 _IMAGE_EXTS = frozenset({'.jpg', '.jpeg', '.png', '.webp', '.gif'})


-def _send_media_via_adapter(adapter, chat_id: str, media_files: list, metadata: dict | None, loop, job: dict) -> None:
+def _send_media_via_adapter(
+    adapter,
+    chat_id: str,
+    media_files: list,
+    metadata: dict | None,
+    loop,
+    job: dict,
+    platform=None,
+) -> None:
    """Send extracted MEDIA files as native platform attachments via a live adapter.

    Routes each file to the appropriate adapter method (send_voice, send_image_file,
@@ -272,10 +300,13 @@ def _send_media_via_adapter(adapter, chat_id: str, media_files: list, metadata:
    """
    from pathlib import Path

+    from gateway.platforms.base import should_send_media_as_audio
+
    for media_path, _is_voice in media_files:
        try:
            ext = Path(media_path).suffix.lower()
-            if ext in _AUDIO_EXTS:
+            route_platform = platform if platform is not None else getattr(adapter, "platform", None)
+            if should_send_media_as_audio(route_platform, ext, is_voice=_is_voice):
                coro = adapter.send_voice(chat_id=chat_id, audio_path=media_path, metadata=metadata)
            elif ext in _VIDEO_EXTS:
                coro = adapter.send_video(chat_id=chat_id, video_path=media_path, metadata=metadata)
@@ -321,27 +352,6 @@ def _deliver_result(job: dict, content: str, adapters=None, loop=None) -> Option
    from tools.send_message_tool import _send_to_platform
    from gateway.config import load_gateway_config, Platform

-    platform_map = {
-        "telegram": Platform.TELEGRAM,
-        "discord": Platform.DISCORD,
-        "slack": Platform.SLACK,
-        "whatsapp": Platform.WHATSAPP,
-        "signal": Platform.SIGNAL,
-        "matrix": Platform.MATRIX,
-        "mattermost": Platform.MATTERMOST,
-        "homeassistant": Platform.HOMEASSISTANT,
-        "dingtalk": Platform.DINGTALK,
-        "feishu": Platform.FEISHU,
-        "wecom": Platform.WECOM,
-        "wecom_callback": Platform.WECOM_CALLBACK,
-        "weixin": Platform.WEIXIN,
-        "email": Platform.EMAIL,
-        "sms": Platform.SMS,
-        "bluebubbles": Platform.BLUEBUBBLES,
-        "qqbot": Platform.QQBOT,
-        "yuanbao": Platform.YUANBAO,
-    }
-
    # Optionally wrap the content with a header/footer so the user knows this
    # is a cron delivery.  Wrapping is on by default; set cron.wrap_response: false
    # in config.yaml for clean output.
@@ -398,13 +408,23 @@ def _deliver_result(job: dict, content: str, adapters=None, loop=None) -> Option
                job["id"], platform_name, chat_id, thread_id,
            )

-        platform = platform_map.get(platform_name.lower())
-        if not platform:
+        # Built-in names resolve to their enum member; plugin platform names
+        # create dynamic members via Platform._missing_().
+        try:
+            platform = Platform(platform_name.lower())
+        except (ValueError, KeyError):
            msg = f"unknown platform '{platform_name}'"
            logger.warning("Job '%s': %s", job["id"], msg)
            delivery_errors.append(msg)
            continue

+        pconfig = config.platforms.get(platform)
+        if not pconfig or not pconfig.enabled:
+            msg = f"platform '{platform_name}' not configured/enabled"
+            logger.warning("Job '%s': %s", job["id"], msg)
+            delivery_errors.append(msg)
+            continue
+
        # Prefer the live adapter when the gateway is running — this supports E2EE
        # rooms (e.g. Matrix) where the standalone HTTP path cannot encrypt.
        runtime_adapter = (adapters or {}).get(platform)
@@ -435,7 +455,15 @@ def _deliver_result(job: dict, content: str, adapters=None, loop=None) -> Option

                # Send extracted media files as native attachments via the live adapter
                if adapter_ok and media_files:
-                    _send_media_via_adapter(runtime_adapter, chat_id, media_files, send_metadata, loop, job)
+                    _send_media_via_adapter(
+                        runtime_adapter,
+                        chat_id,
+                        media_files,
+                        send_metadata,
+                        loop,
+                        job,
+                        platform=platform,
+                    )

                if adapter_ok:
                    logger.info("Job '%s': delivered to %s:%s via live adapter", job["id"], platform_name, chat_id)
@@ -447,13 +475,6 @@ def _deliver_result(job: dict, content: str, adapters=None, loop=None) -> Option
                )

        if not delivered:
-            pconfig = config.platforms.get(platform)
-            if not pconfig or not pconfig.enabled:
-                msg = f"platform '{platform_name}' not configured/enabled"
-                logger.warning("Job '%s': %s", job["id"], msg)
-                delivery_errors.append(msg)
-                continue
-
            # Standalone path: run the async send in a fresh event loop (safe from any thread)
            coro = _send_to_platform(platform, pconfig, chat_id, cleaned_delivery_content, thread_id=thread_id, media_files=media_files)
            try:
@@ -840,6 +861,13 @@ def run_job(job: dict) -> tuple[bool, str, str, Optional[str]]:
        chat_id=str(origin["chat_id"]) if origin else "",
        chat_name=origin.get("chat_name", "") if origin else "",
    )
+    _cron_delivery_vars = (
+        "HERMES_CRON_AUTO_DELIVER_PLATFORM",
+        "HERMES_CRON_AUTO_DELIVER_CHAT_ID",
+        "HERMES_CRON_AUTO_DELIVER_THREAD_ID",
+    )
+    for _var_name in _cron_delivery_vars:
+        _VAR_MAP[_var_name].set("")

    # Per-job working directory.  When set (and validated at create/update
    # time), we point TERMINAL_CWD at it so:
@@ -878,8 +906,11 @@ def run_job(job: dict) -> tuple[bool, str, str, Optional[str]]:
        if delivery_target:
            _VAR_MAP["HERMES_CRON_AUTO_DELIVER_PLATFORM"].set(delivery_target["platform"])
            _VAR_MAP["HERMES_CRON_AUTO_DELIVER_CHAT_ID"].set(str(delivery_target["chat_id"]))
-            if delivery_target.get("thread_id") is not None:
-                _VAR_MAP["HERMES_CRON_AUTO_DELIVER_THREAD_ID"].set(str(delivery_target["thread_id"]))
+            _VAR_MAP["HERMES_CRON_AUTO_DELIVER_THREAD_ID"].set(
+                ""
+                if delivery_target.get("thread_id") is None
+                else str(delivery_target["thread_id"])
+            )

        model = job.get("model") or os.getenv("HERMES_MODEL") or ""

@@ -1013,10 +1044,12 @@ def run_job(job: dict) -> tuple[bool, str, str, Optional[str]]:
            enabled_toolsets=_resolve_cron_enabled_toolsets(job, _cfg),
            disabled_toolsets=["cronjob", "messaging", "clarify"],
            quiet_mode=True,
-            # When a workdir is configured, inject AGENTS.md / CLAUDE.md /
-            # .cursorrules from that directory; otherwise preserve the old
-            # behaviour (don't inject SOUL.md/AGENTS.md from the scheduler cwd).
+            # Cron jobs should always inherit the user's SOUL.md identity from
+            # HERMES_HOME. When a workdir is configured, also inject project
+            # context files (AGENTS.md / CLAUDE.md / .cursorrules) from there.
+            # Without a workdir, keep cwd context discovery disabled.
            skip_context_files=not bool(_job_workdir),
+            load_soul_identity=True,
            skip_memory=True,  # Cron system prompts would corrupt user representations
            platform="cron",
            session_id=_cron_session_id,
@@ -1031,7 +1064,18 @@ def run_job(job: dict) -> tuple[bool, str, str, Optional[str]]:
        #
        # Uses the agent's built-in activity tracker (updated by
        # _touch_activity() on every tool call, API call, and stream delta).
-        _cron_timeout = float(os.getenv("HERMES_CRON_TIMEOUT", 600))
+        _raw_cron_timeout = os.getenv("HERMES_CRON_TIMEOUT", "").strip()
+        if _raw_cron_timeout:
+            try:
+                _cron_timeout = float(_raw_cron_timeout)
+            except (ValueError, TypeError):
+                logger.warning(
+                    "Invalid HERMES_CRON_TIMEOUT=%r; using default 600s",
+                    _raw_cron_timeout,
+                )
+                _cron_timeout = 600.0
+        else:
+            _cron_timeout = 600.0
        _cron_inactivity_limit = _cron_timeout if _cron_timeout > 0 else None
        _POLL_INTERVAL = 5.0
        _cron_pool = concurrent.futures.ThreadPoolExecutor(max_workers=1)
@@ -1106,6 +1150,21 @@ def run_job(job: dict) -> tuple[bool, str, str, Optional[str]]:
                f"agent.run_conversation returned {type(result).__name__} instead of dict: {result!r}"
            )

+        # If the agent itself reported failure (e.g. all retries exhausted on
+        # API errors, model abort, mid-run interrupt), do not silently mark the
+        # job as successful. run_agent populates `failed=True`/`completed=False`
+        # on these paths and may put the error into `final_response`, which
+        # would otherwise be delivered as if it were the agent's reply and the
+        # job's `last_status` set to "ok". Raise so the except handler below
+        # builds the proper failure tuple. (issue #17855)
+        if result.get("failed") is True or result.get("completed") is False:
+            _err_text = (
+                result.get("error")
+                or (result.get("final_response") or "").strip()
+                or "agent reported failure"
+            )
+            raise RuntimeError(_err_text)
+
        final_response = result.get("final_response", "") or ""
        # Strip leaked placeholder text that upstream may inject on empty completions.
        if final_response.strip() == "(No response generated)":
@@ -1165,6 +1224,8 @@ def run_job(job: dict) -> tuple[bool, str, str, Optional[str]]:
                os.environ["TERMINAL_CWD"] = _prior_terminal_cwd
        # Clean up ContextVar session/delivery state for this job.
        clear_session_vars(_ctx_tokens)
+        for _var_name in _cron_delivery_vars:
+            _VAR_MAP[_var_name].set("")
        if _session_db:
            try:
                _session_db.end_session(_cron_session_id, "cron_complete")
@@ -34,6 +34,13 @@ services:
      # uncomment BOTH lines (API_SERVER_KEY is mandatory for auth):
      # - API_SERVER_HOST=0.0.0.0
      # - API_SERVER_KEY=${API_SERVER_KEY}
+      # Microsoft Teams — uncomment and fill in to enable Teams gateway.
+      # Register your bot at https://dev.botframework.com/ to get these values.
+      # - TEAMS_CLIENT_ID=${TEAMS_CLIENT_ID}
+      # - TEAMS_CLIENT_SECRET=${TEAMS_CLIENT_SECRET}
+      # - TEAMS_TENANT_ID=${TEAMS_TENANT_ID}
+      # - TEAMS_ALLOWED_USERS=${TEAMS_ALLOWED_USERS}
+      # - TEAMS_PORT=${TEAMS_PORT:-3978}
    command: ["gateway", "run"]

  dashboard:
@@ -86,6 +86,16 @@ async def build_channel_directory(adapters: Dict[Any, Any]) -> Dict[str, Any]:
            continue
        platforms[plat_name] = _build_from_sessions(plat_name)

+    # Include plugin-registered platforms (dynamic enum members aren't in
+    # Platform.__members__, so the loop above misses them).
+    try:
+        from gateway.platform_registry import platform_registry
+        for entry in platform_registry.plugin_entries():
+            if entry.name not in _SKIP_SESSION_DISCOVERY and entry.name not in platforms:
+                platforms[entry.name] = _build_from_sessions(entry.name)
+    except Exception:
+        pass
+
    directory = {
        "updated_at": datetime.now().isoformat(),
        "platforms": platforms,
@@ -13,7 +13,7 @@ import os
 import json
 from pathlib import Path
 from dataclasses import dataclass, field
-from typing import Dict, List, Optional, Any
+from typing import Dict, List, Optional, Any, Callable
 from enum import Enum

 from hermes_cli.config import get_hermes_home
@@ -36,6 +36,26 @@ def _coerce_bool(value: Any, default: bool = True) -> bool:
    return is_truthy_value(value, default=default)


+def _coerce_float(value: Any, default: float) -> float:
+    """Coerce numeric config values, falling back on malformed input."""
+    if value is None:
+        return default
+    try:
+        return float(value)
+    except (TypeError, ValueError):
+        return default
+
+
+def _coerce_int(value: Any, default: int) -> int:
+    """Coerce integer config values, falling back on malformed input."""
+    if value is None:
+        return default
+    try:
+        return int(value)
+    except (TypeError, ValueError):
+        return default
+
+
 def _normalize_unauthorized_dm_behavior(value: Any, default: str = "pair") -> str:
    """Normalize unauthorized DM behavior to a supported value."""
    if isinstance(value, str):
@@ -45,8 +65,28 @@ def _normalize_unauthorized_dm_behavior(value: Any, default: str = "pair") -> st
    return default


+def _normalize_notice_delivery(value: Any, default: str = "public") -> str:
+    """Normalize notice delivery mode to a supported value."""
+    if isinstance(value, str):
+        normalized = value.strip().lower()
+        if normalized in {"public", "private"}:
+            return normalized
+    return default
+
+
+# Module-level cache for bundled platform plugin names (lives outside the
+# enum so it doesn't become an accidental enum member).
+_Platform__bundled_plugin_names: Optional[set] = None
+
+
 class Platform(Enum):
-    """Supported messaging platforms."""
+    """Supported messaging platforms.
+
+    Built-in platforms have explicit members.  Plugin platforms use dynamic
+    members created on-demand by ``_missing_()`` so that
+    ``Platform("irc")`` works without modifying this enum.  Dynamic members
+    are cached in ``_value2member_map_`` for identity-stable comparisons.
+    """
    LOCAL = "local"
    TELEGRAM = "telegram"
    DISCORD = "discord"
@@ -68,6 +108,76 @@ class Platform(Enum):
    BLUEBUBBLES = "bluebubbles"
    QQBOT = "qqbot"
    YUANBAO = "yuanbao"
+    @classmethod
+    def _missing_(cls, value):
+        """Accept unknown platform names only for known plugin adapters.
+
+        Creates a pseudo-member cached in ``_value2member_map_`` so that
+        ``Platform("irc") is Platform("irc")`` holds True (identity-stable).
+        Arbitrary strings are rejected to prevent enum pollution.
+        """
+        if not isinstance(value, str) or not value.strip():
+            return None
+        # Normalise to lowercase to avoid case mismatches in config
+        value = value.strip().lower()
+        # Check cache first (another call may have created it already)
+        if value in cls._value2member_map_:
+            return cls._value2member_map_[value]
+
+        # Only create pseudo-members for bundled plugin platforms (discovered
+        # via filesystem scan) or runtime-registered plugin platforms.
+        global _Platform__bundled_plugin_names
+        if _Platform__bundled_plugin_names is None:
+            _Platform__bundled_plugin_names = cls._scan_bundled_plugin_platforms()
+        if value in _Platform__bundled_plugin_names:
+            pseudo = object.__new__(cls)
+            pseudo._value_ = value
+            pseudo._name_ = value.upper().replace("-", "_").replace(" ", "_")
+            cls._value2member_map_[value] = pseudo
+            cls._member_map_[pseudo._name_] = pseudo
+            return pseudo
+
+        # Runtime-registered plugins (e.g. user-installed, discovered after
+        # the enum was defined).
+        try:
+            from gateway.platform_registry import platform_registry
+            if platform_registry.is_registered(value):
+                pseudo = object.__new__(cls)
+                pseudo._value_ = value
+                pseudo._name_ = value.upper().replace("-", "_").replace(" ", "_")
+                cls._value2member_map_[value] = pseudo
+                cls._member_map_[pseudo._name_] = pseudo
+                return pseudo
+        except Exception:
+            pass
+
+        return None
+
+    @classmethod
+    def _scan_bundled_plugin_platforms(cls) -> set:
+        """Return names of bundled platform plugins under ``plugins/platforms/``."""
+        names: set = set()
+        try:
+            platforms_dir = Path(__file__).parent.parent / "plugins" / "platforms"
+            if platforms_dir.is_dir():
+                for child in platforms_dir.iterdir():
+                    if (
+                        child.is_dir()
+                        and (child / "__init__.py").exists()
+                        and (
+                            (child / "plugin.yaml").exists()
+                            or (child / "plugin.yml").exists()
+                        )
+                    ):
+                        names.add(child.name.lower())
+        except Exception:
+            pass
+        return names
+
+
+# Snapshot of built-in platform values before any dynamic _missing_ lookups.
+# Used to distinguish real platforms from arbitrary strings.
+_BUILTIN_PLATFORM_VALUES = frozenset(m.value for m in Platform.__members__.values())


@dataclass
@@ -220,17 +330,55 @@ class StreamingConfig:
        if not data:
            return cls()
        return cls(
-            enabled=data.get("enabled", False),
+            enabled=_coerce_bool(data.get("enabled"), False),
            transport=data.get("transport", "edit"),
-            edit_interval=float(data.get("edit_interval", 1.0)),
-            buffer_threshold=int(data.get("buffer_threshold", 40)),
+            edit_interval=_coerce_float(data.get("edit_interval"), 1.0),
+            buffer_threshold=_coerce_int(data.get("buffer_threshold"), 40),
            cursor=data.get("cursor", " ▉"),
-            fresh_final_after_seconds=float(
-                data.get("fresh_final_after_seconds", 60.0)
+            fresh_final_after_seconds=_coerce_float(
+                data.get("fresh_final_after_seconds"), 60.0
            ),
        )


+# -----------------------------------------------------------------------------
+# Built-in platform connection checkers
+# -----------------------------------------------------------------------------
+# Each callable receives a ``PlatformConfig`` and returns ``True`` when the
+# platform is sufficiently configured to be considered "connected".  Platforms
+# that rely on the generic ``token or api_key`` check (Telegram, Discord,
+# Slack, Matrix, Mattermost, HomeAssistant) do not need an entry here.
+_PLATFORM_CONNECTED_CHECKERS: dict[Platform, Callable[[PlatformConfig], bool]] = {
+    Platform.WEIXIN: lambda cfg: bool(
+        cfg.extra.get("account_id") and (cfg.token or cfg.extra.get("token"))
+    ),
+    Platform.WHATSAPP: lambda cfg: True,  # bridge handles auth
+    Platform.SIGNAL: lambda cfg: bool(cfg.extra.get("http_url")),
+    Platform.EMAIL: lambda cfg: bool(cfg.extra.get("address")),
+    Platform.SMS: lambda cfg: bool(os.getenv("TWILIO_ACCOUNT_SID")),
+    Platform.API_SERVER: lambda cfg: True,
+    Platform.WEBHOOK: lambda cfg: True,
+    Platform.FEISHU: lambda cfg: bool(cfg.extra.get("app_id")),
+    Platform.WECOM: lambda cfg: bool(cfg.extra.get("bot_id")),
+    Platform.WECOM_CALLBACK: lambda cfg: bool(
+        cfg.extra.get("corp_id") or cfg.extra.get("apps")
+    ),
+    Platform.BLUEBUBBLES: lambda cfg: bool(
+        cfg.extra.get("server_url") and cfg.extra.get("password")
+    ),
+    Platform.QQBOT: lambda cfg: bool(
+        cfg.extra.get("app_id") and cfg.extra.get("client_secret")
+    ),
+    Platform.YUANBAO: lambda cfg: bool(
+        cfg.extra.get("app_id") and cfg.extra.get("app_secret")
+    ),
+    Platform.DINGTALK: lambda cfg: bool(
+        (cfg.extra.get("client_id") or os.getenv("DINGTALK_CLIENT_ID"))
+        and (cfg.extra.get("client_secret") or os.getenv("DINGTALK_CLIENT_SECRET"))
+    ),
+}
+
+
@dataclass
 class GatewayConfig:
    """
@@ -284,61 +432,43 @@ class GatewayConfig:
        for platform, config in self.platforms.items():
            if not config.enabled:
                continue
-            # Weixin requires both a token and an account_id
-            if platform == Platform.WEIXIN:
-                if config.extra.get("account_id") and (config.token or config.extra.get("token")):
-                    connected.append(platform)
-                continue
-            # Platforms that use token/api_key auth
-            if config.token or config.api_key:
+            if self._is_platform_connected(platform, config):
                connected.append(platform)
-            # WhatsApp uses enabled flag only (bridge handles auth)
-            elif platform == Platform.WHATSAPP:
-                connected.append(platform)
-            # Signal uses extra dict for config (http_url + account)
-            elif platform == Platform.SIGNAL and config.extra.get("http_url"):
-                connected.append(platform)
-            # Email uses extra dict for config (address + imap_host + smtp_host)
-            elif platform == Platform.EMAIL and config.extra.get("address"):
-                connected.append(platform)
-            # SMS uses api_key (Twilio auth token) — SID checked via env
-            elif platform == Platform.SMS and os.getenv("TWILIO_ACCOUNT_SID"):
-                connected.append(platform)
-            # API Server uses enabled flag only (no token needed)
-            elif platform == Platform.API_SERVER:
-                connected.append(platform)
-            # Webhook uses enabled flag only (secrets are per-route)
-            elif platform == Platform.WEBHOOK:
-                connected.append(platform)
-            # Feishu uses extra dict for app credentials
-            elif platform == Platform.FEISHU and config.extra.get("app_id"):
-                connected.append(platform)
-            # WeCom bot mode uses extra dict for bot credentials
-            elif platform == Platform.WECOM and config.extra.get("bot_id"):
-                connected.append(platform)
-            # WeCom callback mode uses corp_id or apps list
-            elif platform == Platform.WECOM_CALLBACK and (
-                config.extra.get("corp_id") or config.extra.get("apps")
-            ):
-                connected.append(platform)
-            # BlueBubbles uses extra dict for local server config
-            elif platform == Platform.BLUEBUBBLES and config.extra.get("server_url") and config.extra.get("password"):
-                connected.append(platform)
-            # QQBot uses extra dict for app credentials
-            elif platform == Platform.QQBOT and config.extra.get("app_id") and config.extra.get("client_secret"):
-                connected.append(platform)
-            # Yuanbao uses extra dict for app credentials
-            elif platform == Platform.YUANBAO and config.extra.get("app_id") and config.extra.get("app_secret"):
-                connected.append(platform)
-            # DingTalk uses client_id/client_secret from config.extra or env vars
-            elif platform == Platform.DINGTALK and (
-                config.extra.get("client_id") or os.getenv("DINGTALK_CLIENT_ID")
-            ) and (
-                config.extra.get("client_secret") or os.getenv("DINGTALK_CLIENT_SECRET")
-            ):
-                connected.append(platform)
-        
        return connected
+
+    def _is_platform_connected(self, platform: Platform, config: PlatformConfig) -> bool:
+        """Check whether a single platform is sufficiently configured."""
+        # Weixin requires both a token and an account_id (checked first so
+        # the generic token branch doesn't let it through without account_id).
+        if platform == Platform.WEIXIN:
+            return bool(
+                config.extra.get("account_id")
+                and (config.token or config.extra.get("token"))
+            )
+
+        # Generic token/api_key auth covers Telegram, Discord, Slack, etc.
+        if config.token or config.api_key:
+            return True
+
+        # Platform-specific check
+        checker = _PLATFORM_CONNECTED_CHECKERS.get(platform)
+        if checker is not None:
+            return checker(config)
+
+        # Plugin-registered platforms
+        try:
+            from gateway.platform_registry import platform_registry
+            entry = platform_registry.get(platform.value)
+            if entry:
+                if entry.is_connected is not None:
+                    return entry.is_connected(config)
+                if entry.validate_config is not None:
+                    return entry.validate_config(config)
+                return True
+        except Exception:
+            pass  # Registry not yet initialised during early import
+
+        return False
    
    def get_home_channel(self, platform: Platform) -> Optional[HomeChannel]:
        """Get the home channel for a platform."""
@@ -471,6 +601,17 @@ class GatewayConfig:
                )
        return self.unauthorized_dm_behavior

+    def get_notice_delivery(self, platform: Optional[Platform] = None) -> str:
+        """Return the effective notice-delivery mode for a platform."""
+        if platform:
+            platform_cfg = self.platforms.get(platform)
+            if platform_cfg and "notice_delivery" in platform_cfg.extra:
+                return _normalize_notice_delivery(
+                    platform_cfg.extra.get("notice_delivery"),
+                    "public",
+                )
+        return "public"
+

 def load_gateway_config() -> GatewayConfig:
    """
@@ -586,6 +727,11 @@ def load_gateway_config() -> GatewayConfig:
                        platform_cfg.get("unauthorized_dm_behavior"),
                        gw_data.get("unauthorized_dm_behavior", "pair"),
                    )
+                if "notice_delivery" in platform_cfg:
+                    bridged["notice_delivery"] = _normalize_notice_delivery(
+                        platform_cfg.get("notice_delivery"),
+                        "public",
+                    )
                if "reply_prefix" in platform_cfg:
                    bridged["reply_prefix"] = platform_cfg["reply_prefix"]
                if "reply_in_thread" in platform_cfg:
@@ -714,11 +860,21 @@ def load_gateway_config() -> GatewayConfig:
                    os.environ["TELEGRAM_REACTIONS"] = str(telegram_cfg["reactions"]).lower()
                if "proxy_url" in telegram_cfg and not os.getenv("TELEGRAM_PROXY"):
                    os.environ["TELEGRAM_PROXY"] = str(telegram_cfg["proxy_url"]).strip()
-                if "group_allowed_chats" in telegram_cfg and not os.getenv("TELEGRAM_GROUP_ALLOWED_USERS"):
-                    gac = telegram_cfg["group_allowed_chats"]
-                    if isinstance(gac, list):
-                        gac = ",".join(str(v) for v in gac)
-                    os.environ["TELEGRAM_GROUP_ALLOWED_USERS"] = str(gac)
+                allowed_users = telegram_cfg.get("allow_from")
+                if allowed_users is not None and not os.getenv("TELEGRAM_ALLOWED_USERS"):
+                    if isinstance(allowed_users, list):
+                        allowed_users = ",".join(str(v) for v in allowed_users)
+                    os.environ["TELEGRAM_ALLOWED_USERS"] = str(allowed_users)
+                group_allowed_users = telegram_cfg.get("group_allow_from")
+                if group_allowed_users is not None and not os.getenv("TELEGRAM_GROUP_ALLOWED_USERS"):
+                    if isinstance(group_allowed_users, list):
+                        group_allowed_users = ",".join(str(v) for v in group_allowed_users)
+                    os.environ["TELEGRAM_GROUP_ALLOWED_USERS"] = str(group_allowed_users)
+                group_allowed_chats = telegram_cfg.get("group_allowed_chats")
+                if group_allowed_chats is not None and not os.getenv("TELEGRAM_GROUP_ALLOWED_CHATS"):
+                    if isinstance(group_allowed_chats, list):
+                        group_allowed_chats = ",".join(str(v) for v in group_allowed_chats)
+                    os.environ["TELEGRAM_GROUP_ALLOWED_CHATS"] = str(group_allowed_chats)
                if "disable_link_previews" in telegram_cfg:
                    plat_data = platforms_data.setdefault(Platform.TELEGRAM.value, {})
                    if not isinstance(plat_data, dict):
@@ -789,6 +945,12 @@ def load_gateway_config() -> GatewayConfig:
                if "dm_mention_threads" in matrix_cfg and not os.getenv("MATRIX_DM_MENTION_THREADS"):
                    os.environ["MATRIX_DM_MENTION_THREADS"] = str(matrix_cfg["dm_mention_threads"]).lower()

+            # Feishu settings → env vars (env vars take precedence)
+            feishu_cfg = yaml_cfg.get("feishu", {})
+            if isinstance(feishu_cfg, dict):
+                if "allow_bots" in feishu_cfg and not os.getenv("FEISHU_ALLOW_BOTS"):
+                    os.environ["FEISHU_ALLOW_BOTS"] = str(feishu_cfg["allow_bots"]).lower()
+
    except Exception as e:
        logger.warning(
            "Failed to process config.yaml — falling back to .env / gateway.json values. "
@@ -940,7 +1102,14 @@ def _apply_env_overrides(config: GatewayConfig) -> None:
        if Platform.WHATSAPP not in config.platforms:
            config.platforms[Platform.WHATSAPP] = PlatformConfig()
        config.platforms[Platform.WHATSAPP].enabled = True
-    
+    whatsapp_home = os.getenv("WHATSAPP_HOME_CHANNEL")
+    if whatsapp_home and Platform.WHATSAPP in config.platforms:
+        config.platforms[Platform.WHATSAPP].home_channel = HomeChannel(
+            platform=Platform.WHATSAPP,
+            chat_id=whatsapp_home,
+            name=os.getenv("WHATSAPP_HOME_CHANNEL_NAME", "Home"),
+        )
+
    # Slack
    slack_token = os.getenv("SLACK_BOT_TOKEN")
    if slack_token:
@@ -1371,3 +1540,25 @@ def _apply_env_overrides(config: GatewayConfig) -> None:
            config.default_reset_policy.at_hour = int(reset_hour)
        except ValueError:
            pass
+
+    # Registry-driven enable for plugin platforms.  Built-ins have explicit
+    # blocks above; plugins expose check_fn() which is the single source of
+    # truth for "are my env vars set?".  When it returns True, ensure the
+    # platform is enabled so start() will create its adapter.
+    try:
+        from hermes_cli.plugins import discover_plugins
+        discover_plugins()  # idempotent
+        from gateway.platform_registry import platform_registry
+        for entry in platform_registry.plugin_entries():
+            try:
+                if not entry.check_fn():
+                    continue
+            except Exception as e:
+                logger.debug("check_fn for %s raised: %s", entry.name, e)
+                continue
+            platform = Platform(entry.name)
+            if platform not in config.platforms:
+                config.platforms[platform] = PlatformConfig()
+            config.platforms[platform].enabled = True
+    except Exception as e:
+        logger.debug("Plugin platform enable pass failed: %s", e)
@@ -53,9 +53,10 @@ class DeliveryTarget:
        - "telegram" → Telegram home channel
        - "telegram:123456" → specific Telegram chat
        """
-        target = target.strip().lower()
+        target_stripped = target.strip()
+        target_lower = target_stripped.lower()
        
-        if target == "origin":
+        if target_lower == "origin":
            if origin:
                return cls(
                    platform=origin.platform,
@@ -67,13 +68,14 @@ class DeliveryTarget:
                # Fallback to local if no origin
                return cls(platform=Platform.LOCAL, is_origin=True)
        
-        if target == "local":
+        if target_lower == "local":
            return cls(platform=Platform.LOCAL)
        
        # Check for platform:chat_id or platform:chat_id:thread_id format
-        if ":" in target:
-            parts = target.split(":", 2)
-            platform_str = parts[0]
+        # Use the original case for chat_id/thread_id to preserve case-sensitive IDs
+        if ":" in target_stripped:
+            parts = target_stripped.split(":", 2)
+            platform_str = parts[0].lower()  # Platform names are case-insensitive
            chat_id = parts[1] if len(parts) > 1 else None
            thread_id = parts[2] if len(parts) > 2 else None
            try:
@@ -85,7 +87,7 @@ class DeliveryTarget:
        
        # Just a platform name (use home channel)
        try:
-            platform = Platform(target)
+            platform = Platform(target_lower)
            return cls(platform=platform)
        except ValueError:
            # Unknown platform, treat as local
@@ -21,6 +21,7 @@ Errors in hooks are caught and logged but never block the main pipeline.

 import asyncio
 import importlib.util
+import sys
 from typing import Any, Callable, Dict, List, Optional

 import yaml
@@ -97,16 +98,28 @@ class HookRegistry:
                    print(f"[hooks] Skipping {hook_name}: no events declared", flush=True)
                    continue

-                # Dynamically load the handler module
+                # Dynamically load the handler module.
+                # Register in sys.modules BEFORE exec_module so Pydantic /
+                # dataclasses / typing introspection can resolve forward
+                # references (triggered by `from __future__ import annotations`
+                # in the handler). Without this, a handler that declares a
+                # Pydantic BaseModel for webhook/event payloads fails at first
+                # dispatch with "TypeAdapter ... is not fully defined".
+                module_name = f"hermes_hook_{hook_name}"
                spec = importlib.util.spec_from_file_location(
-                    f"hermes_hook_{hook_name}", handler_path
+                    module_name, handler_path
                )
                if spec is None or spec.loader is None:
                    print(f"[hooks] Skipping {hook_name}: could not load handler.py", flush=True)
                    continue

                module = importlib.util.module_from_spec(spec)
-                spec.loader.exec_module(module)
+                sys.modules[module_name] = module
+                try:
+                    spec.loader.exec_module(module)
+                except Exception:
+                    sys.modules.pop(module_name, None)
+                    raise

                handle_fn = getattr(module, "handle", None)
                if handle_fn is None:
@@ -0,0 +1,212 @@
+"""
+Platform Adapter Registry
+
+Allows platform adapters (built-in and plugin) to self-register so the gateway
+can discover and instantiate them without hardcoded if/elif chains.
+
+Built-in adapters continue to use the existing if/elif in _create_adapter()
+for now.  Plugin adapters register here via PluginContext.register_platform()
+and are looked up first -- if nothing is found the gateway falls through to
+the legacy code path.
+
+Usage (plugin side):
+
+    from gateway.platform_registry import platform_registry, PlatformEntry
+
+    platform_registry.register(PlatformEntry(
+        name="irc",
+        label="IRC",
+        adapter_factory=lambda cfg: IRCAdapter(cfg),
+        check_fn=check_requirements,
+        validate_config=lambda cfg: bool(cfg.extra.get("server")),
+        required_env=["IRC_SERVER"],
+        install_hint="pip install irc",
+    ))
+
+Usage (gateway side):
+
+    adapter = platform_registry.create_adapter("irc", platform_config)
+"""
+
+import logging
+from dataclasses import dataclass, field
+from typing import Any, Callable, Optional
+
+logger = logging.getLogger(__name__)
+
+
+@dataclass
+class PlatformEntry:
+    """Metadata and factory for a single platform adapter."""
+
+    # Identifier used in config.yaml (e.g. "irc", "viber").
+    name: str
+
+    # Human-readable label (e.g. "IRC", "Viber").
+    label: str
+
+    # Factory callable: receives a PlatformConfig, returns an adapter instance.
+    # Using a factory instead of a bare class lets plugins do custom init
+    # (e.g. passing extra kwargs, wrapping in try/except).
+    adapter_factory: Callable[[Any], Any]
+
+    # Returns True when the platform's dependencies are available.
+    check_fn: Callable[[], bool]
+
+    # Optional: given a PlatformConfig, is it properly configured?
+    # If None, the registry skips config validation and lets the adapter
+    # fail at connect() time with a descriptive error.
+    validate_config: Optional[Callable[[Any], bool]] = None
+
+    # Optional: given a PlatformConfig, is the platform connected/enabled?
+    # Used by ``GatewayConfig.get_connected_platforms()`` and setup UI status.
+    # If None, falls back to ``validate_config`` or ``check_fn``.
+    is_connected: Optional[Callable[[Any], bool]] = None
+
+    # Env vars this platform needs (for ``hermes setup`` display).
+    required_env: list = field(default_factory=list)
+
+    # Hint shown when check_fn returns False.
+    install_hint: str = ""
+
+    # Optional setup function for interactive configuration.
+    # Signature: () -> None (prompts user, saves env vars).
+    # If None, falls back to _setup_standard_platform (needs token_var + vars)
+    # or a generic "set these env vars" display.
+    setup_fn: Optional[Callable[[], None]] = None
+
+    # "builtin" or "plugin"
+    source: str = "plugin"
+
+    # Name of the plugin manifest that registered this entry (empty for
+    # built-ins).  Used by ``hermes gateway setup`` to auto-enable the
+    # owning plugin when the user configures its platform.
+    plugin_name: str = ""
+
+    # ── Auth env var names (for _is_user_authorized integration) ──
+    # E.g. "IRC_ALLOWED_USERS" — checked for comma-separated user IDs.
+    allowed_users_env: str = ""
+    # E.g. "IRC_ALLOW_ALL_USERS" — if truthy, all users authorized.
+    allow_all_env: str = ""
+
+    # ── Message limits ──
+    # Max message length for smart-chunking.  0 = no limit.
+    max_message_length: int = 0
+
+    # ── Privacy ──
+    # If True, session descriptions redact PII (phone numbers, etc.)
+    pii_safe: bool = False
+
+    # ── Display ──
+    # Emoji for CLI/gateway display (e.g. "💬")
+    emoji: str = "🔌"
+
+    # Whether this platform should appear in _UPDATE_ALLOWED_PLATFORMS
+    # (allows /update command from this platform).
+    allow_update_command: bool = True
+
+    # ── LLM guidance ──
+    # Platform hint injected into the system prompt (e.g. "You are on IRC.
+    # Do not use markdown.").  Empty string = no hint.
+    platform_hint: str = ""
+
+
+class PlatformRegistry:
+    """Central registry of platform adapters.
+
+    Thread-safe for reads (dict lookups are atomic under GIL).
+    Writes happen at startup during sequential discovery.
+    """
+
+    def __init__(self) -> None:
+        self._entries: dict[str, PlatformEntry] = {}
+
+    def register(self, entry: PlatformEntry) -> None:
+        """Register a platform adapter entry.
+
+        If an entry with the same name exists, it is replaced (last writer
+        wins -- this lets plugins override built-in adapters if desired).
+        """
+        if entry.name in self._entries:
+            prev = self._entries[entry.name]
+            logger.info(
+                "Platform '%s' re-registered (was %s, now %s)",
+                entry.name,
+                prev.source,
+                entry.source,
+            )
+        self._entries[entry.name] = entry
+        logger.debug("Registered platform adapter: %s (%s)", entry.name, entry.source)
+
+    def unregister(self, name: str) -> bool:
+        """Remove a platform entry.  Returns True if it existed."""
+        return self._entries.pop(name, None) is not None
+
+    def get(self, name: str) -> Optional[PlatformEntry]:
+        """Look up a platform entry by name."""
+        return self._entries.get(name)
+
+    def all_entries(self) -> list[PlatformEntry]:
+        """Return all registered platform entries."""
+        return list(self._entries.values())
+
+    def plugin_entries(self) -> list[PlatformEntry]:
+        """Return only plugin-registered platform entries."""
+        return [e for e in self._entries.values() if e.source == "plugin"]
+
+    def is_registered(self, name: str) -> bool:
+        return name in self._entries
+
+    def create_adapter(self, name: str, config: Any) -> Optional[Any]:
+        """Create an adapter instance for the given platform name.
+
+        Returns None if:
+        - No entry registered for *name*
+        - check_fn() returns False (missing deps)
+        - validate_config() returns False (misconfigured)
+        - The factory raises an exception
+        """
+        entry = self._entries.get(name)
+        if entry is None:
+            return None
+
+        if not entry.check_fn():
+            hint = f" ({entry.install_hint})" if entry.install_hint else ""
+            logger.warning(
+                "Platform '%s' requirements not met%s",
+                entry.label,
+                hint,
+            )
+            return None
+
+        if entry.validate_config is not None:
+            try:
+                if not entry.validate_config(config):
+                    logger.warning(
+                        "Platform '%s' config validation failed",
+                        entry.label,
+                    )
+                    return None
+            except Exception as e:
+                logger.warning(
+                    "Platform '%s' config validation error: %s",
+                    entry.label,
+                    e,
+                )
+                return None
+
+        try:
+            adapter = entry.adapter_factory(config)
+            return adapter
+        except Exception as e:
+            logger.error(
+                "Failed to create adapter for platform '%s': %s",
+                entry.label,
+                e,
+                exc_info=True,
+            )
+            return None
+
+
+# Module-level singleton
+platform_registry = PlatformRegistry()
@@ -1,9 +1,30 @@
 # Adding a New Messaging Platform

-Checklist for integrating a new messaging platform into the Hermes gateway.
-Use this as a reference when building a new adapter — every item here is a
-real integration point that exists in the codebase. Missing any of them will
-cause broken functionality, missing features, or inconsistent behavior.
+There are two ways to add a platform to the Hermes gateway:
+
+## Plugin Path (Recommended for Community/Third-Party)
+
+Create a plugin directory in `~/.hermes/plugins/` with a `PLUGIN.yaml` and
+`adapter.py`.  The adapter inherits from `BasePlatformAdapter` and registers
+via `ctx.register_platform()` in the `register(ctx)` entry point.  This
+requires **zero changes to core Hermes code**.
+
+The plugin system automatically handles: adapter creation, config parsing,
+user authorization, cron delivery, send_message routing, system prompt hints,
+status display, gateway setup, and more.
+
+See `plugins/platforms/irc/` for a complete reference implementation, and
+`website/docs/developer-guide/adding-platform-adapters.md` for the full
+plugin guide with code examples.
+
+---
+
+## Built-in Path (Core Contributors Only)
+
+Checklist for integrating a platform directly into the Hermes core.
+Use this as a reference when building a built-in adapter — every item here
+is a real integration point. Missing any of them will cause broken
+functionality, missing features, or inconsistent behavior.

 ---

@@ -7,7 +7,9 @@ Exposes an HTTP server with endpoints:
 - GET  /v1/responses/{response_id} — Retrieve a stored response
 - DELETE /v1/responses/{response_id} — Delete a stored response
 - GET  /v1/models                  — lists hermes-agent as an available model
+- GET  /v1/capabilities            — machine-readable API capabilities for external UIs
 - POST /v1/runs                    — start a run, returns run_id immediately (202)
+- GET  /v1/runs/{run_id}           — retrieve current run status
 - GET  /v1/runs/{run_id}/events    — SSE stream of structured lifecycle events
 - POST /v1/runs/{run_id}/stop    — interrupt a running agent
 - GET  /health                     — health check
@@ -590,6 +592,8 @@ class APIServerAdapter(BasePlatformAdapter):
        # Active run agent/task references for stop support
        self._active_run_agents: Dict[str, Any] = {}
        self._active_run_tasks: Dict[str, "asyncio.Task"] = {}
+        # Pollable run status for dashboards and external control-plane UIs.
+        self._run_statuses: Dict[str, Dict[str, Any]] = {}
        self._session_db: Optional[Any] = None  # Lazy-init SessionDB for session continuity

    @staticmethod
@@ -808,6 +812,51 @@ class APIServerAdapter(BasePlatformAdapter):
            ],
        })

+    async def _handle_capabilities(self, request: "web.Request") -> "web.Response":
+        """GET /v1/capabilities — advertise the stable API surface.
+
+        External UIs and orchestrators use this endpoint to discover the API
+        server's plugin-safe contract without scraping docs or assuming that
+        every Hermes version exposes the same endpoints.
+        """
+        auth_err = self._check_auth(request)
+        if auth_err:
+            return auth_err
+
+        return web.json_response({
+            "object": "hermes.api_server.capabilities",
+            "platform": "hermes-agent",
+            "model": self._model_name,
+            "auth": {
+                "type": "bearer",
+                "required": bool(self._api_key),
+            },
+            "features": {
+                "chat_completions": True,
+                "chat_completions_streaming": True,
+                "responses_api": True,
+                "responses_streaming": True,
+                "run_submission": True,
+                "run_status": True,
+                "run_events_sse": True,
+                "run_stop": True,
+                "tool_progress_events": True,
+                "session_continuity_header": "X-Hermes-Session-Id",
+                "cors": bool(self._cors_origins),
+            },
+            "endpoints": {
+                "health": {"method": "GET", "path": "/health"},
+                "health_detailed": {"method": "GET", "path": "/health/detailed"},
+                "models": {"method": "GET", "path": "/v1/models"},
+                "chat_completions": {"method": "POST", "path": "/v1/chat/completions"},
+                "responses": {"method": "POST", "path": "/v1/responses"},
+                "runs": {"method": "POST", "path": "/v1/runs"},
+                "run_status": {"method": "GET", "path": "/v1/runs/{run_id}"},
+                "run_events": {"method": "GET", "path": "/v1/runs/{run_id}/events"},
+                "run_stop": {"method": "POST", "path": "/v1/runs/{run_id}/stop"},
+            },
+        })
+
    async def _handle_chat_completions(self, request: "web.Request") -> "web.Response":
        """POST /v1/chat/completions — OpenAI Chat Completions format."""
        auth_err = self._check_auth(request)
@@ -932,39 +981,62 @@ class APIServerAdapter(BasePlatformAdapter):
                if delta is not None:
                    _stream_q.put(delta)

-            def _on_tool_progress(event_type, name, preview, args, **kwargs):
-                """Send tool progress as a separate SSE event.
+            # Track which tool_call_ids we've emitted a "running" lifecycle
+            # event for, so a "completed" event without a matching "running"
+            # (e.g. internal/filtered tools) is silently dropped instead of
+            # producing an orphaned event clients can't correlate.
+            _started_tool_call_ids: set[str] = set()

-                Previously, progress markers like ``⏰ list`` were injected
-                directly into ``delta.content``.  OpenAI-compatible frontends
-                (Open WebUI, LobeChat, …) store ``delta.content`` verbatim as
-                the assistant message and send it back on subsequent requests.
-                After enough turns the model learns to *emit* the markers as
-                plain text instead of issuing real tool calls — silently
-                hallucinating tool results.  See #6972.
+            def _on_tool_start(tool_call_id, function_name, function_args):
+                """Emit ``hermes.tool.progress`` with ``status: running``.

-                The fix: push a tagged tuple ``("__tool_progress__", payload)``
-                onto the stream queue.  The SSE writer emits it as a custom
-                ``event: hermes.tool.progress`` line that compliant frontends
-                can render for UX but will *not* persist into conversation
-                history.  Clients that don't understand the custom event type
-                silently ignore it per the SSE specification.
+                Replaces the old ``tool_progress_callback("tool.started",
+                ...)`` emit so SSE consumers receive a single event per
+                tool start, carrying both the legacy ``tool``/``emoji``/
+                ``label`` payload (for #6972 frontends) and the new
+                ``toolCallId``/``status`` correlation fields (#16588).
+
+                Skips tools whose names start with ``_`` so internal
+                events (``_thinking``, …) stay off the wire — matching
+                the prior ``_on_tool_progress`` filter exactly.
                """
-                if event_type != "tool.started":
+                if not tool_call_id or function_name.startswith("_"):
                    return
-                if name.startswith("_"):
-                    return
-                from agent.display import get_tool_emoji
-                emoji = get_tool_emoji(name)
-                label = preview or name
+                _started_tool_call_ids.add(tool_call_id)
+                from agent.display import build_tool_preview, get_tool_emoji
+                label = build_tool_preview(function_name, function_args) or function_name
                _stream_q.put(("__tool_progress__", {
-                    "tool": name,
-                    "emoji": emoji,
+                    "tool": function_name,
+                    "emoji": get_tool_emoji(function_name),
                    "label": label,
+                    "toolCallId": tool_call_id,
+                    "status": "running",
+                }))
+
+            def _on_tool_complete(tool_call_id, function_name, function_args, function_result):
+                """Emit the matching ``status: completed`` event.
+
+                Dropped if the start was filtered (internal tool, missing
+                id, or never seen) so clients never get an orphaned
+                ``completed`` they can't correlate to a prior ``running``.
+                """
+                if not tool_call_id or tool_call_id not in _started_tool_call_ids:
+                    return
+                _started_tool_call_ids.discard(tool_call_id)
+                _stream_q.put(("__tool_progress__", {
+                    "tool": function_name,
+                    "toolCallId": tool_call_id,
+                    "status": "completed",
                }))

            # Start agent in background.  agent_ref is a mutable container
            # so the SSE writer can interrupt the agent on client disconnect.
+            #
+            # ``tool_progress_callback`` is intentionally not wired here:
+            # it would duplicate every emit because ``run_agent`` fires it
+            # side-by-side with ``tool_start_callback``/``tool_complete_callback``.
+            # The structured callbacks are strictly richer (they carry the
+            # tool_call id), so they own the chat-completions SSE channel.
            agent_ref = [None]
            agent_task = asyncio.ensure_future(self._run_agent(
                user_message=user_message,
@@ -972,7 +1044,8 @@ class APIServerAdapter(BasePlatformAdapter):
                ephemeral_system_prompt=system_prompt,
                session_id=session_id,
                stream_delta_callback=_on_delta,
-                tool_progress_callback=_on_tool_progress,
+                tool_start_callback=_on_tool_start,
+                tool_complete_callback=_on_tool_complete,
                agent_ref=agent_ref,
            ))

@@ -1087,7 +1160,8 @@ class APIServerAdapter(BasePlatformAdapter):
                Tagged tuples ``("__tool_progress__", payload)`` are sent
                as a custom ``event: hermes.tool.progress`` SSE event so
                frontends can display them without storing the markers in
-                conversation history.  See #6972.
+                conversation history.  See #6972 for the original event,
+                #16588 for the ``toolCallId``/``status`` lifecycle fields.
                """
                if isinstance(item, tuple) and len(item) == 2 and item[0] == "__tool_progress__":
                    event_data = json.dumps(item[1])
@@ -2277,10 +2351,11 @@ class APIServerAdapter(BasePlatformAdapter):
            )
            if agent_ref is not None:
                agent_ref[0] = agent
+            effective_task_id = session_id or str(uuid.uuid4())
            result = agent.run_conversation(
                user_message=user_message,
                conversation_history=conversation_history,
-                task_id="default",
+                task_id=effective_task_id,
            )
            usage = {
                "input_tokens": getattr(agent, "session_prompt_tokens", 0) or 0,
@@ -2297,10 +2372,31 @@ class APIServerAdapter(BasePlatformAdapter):

    _MAX_CONCURRENT_RUNS = 10  # Prevent unbounded resource allocation
    _RUN_STREAM_TTL = 300  # seconds before orphaned runs are swept
+    _RUN_STATUS_TTL = 3600  # seconds to retain terminal run status for polling
+
+    def _set_run_status(self, run_id: str, status: str, **fields: Any) -> Dict[str, Any]:
+        """Update pollable run status without exposing private agent objects."""
+        now = time.time()
+        current = self._run_statuses.get(run_id, {})
+        current.update({
+            "object": "hermes.run",
+            "run_id": run_id,
+            "status": status,
+            "updated_at": now,
+        })
+        current.setdefault("created_at", fields.pop("created_at", now))
+        current.update(fields)
+        self._run_statuses[run_id] = current
+        return current

    def _make_run_event_callback(self, run_id: str, loop: "asyncio.AbstractEventLoop"):
        """Return a tool_progress_callback that pushes structured events to the run's SSE queue."""
        def _push(event: Dict[str, Any]) -> None:
+            self._set_run_status(
+                run_id,
+                self._run_statuses.get(run_id, {}).get("status", "running"),
+                last_event=event.get("event"),
+            )
            q = self._run_streams.get(run_id)
            if q is None:
                return
@@ -2365,28 +2461,6 @@ class APIServerAdapter(BasePlatformAdapter):
        if not user_message:
            return web.json_response(_openai_error("No user message found in input"), status=400)

-        run_id = f"run_{uuid.uuid4().hex}"
-        loop = asyncio.get_running_loop()
-        q: "asyncio.Queue[Optional[Dict]]" = asyncio.Queue()
-        self._run_streams[run_id] = q
-        self._run_streams_created[run_id] = time.time()
-
-        event_cb = self._make_run_event_callback(run_id, loop)
-
-        # Also wire stream_delta_callback so message.delta events flow through
-        def _text_cb(delta: Optional[str]) -> None:
-            if delta is None:
-                return
-            try:
-                loop.call_soon_threadsafe(q.put_nowait, {
-                    "event": "message.delta",
-                    "run_id": run_id,
-                    "timestamp": time.time(),
-                    "delta": delta,
-                })
-            except Exception:
-                pass
-
        instructions = body.get("instructions")
        previous_response_id = body.get("previous_response_id")

@@ -2434,11 +2508,42 @@ class APIServerAdapter(BasePlatformAdapter):
                        )
                    conversation_history.append({"role": msg["role"], "content": str(content)})

+        run_id = f"run_{uuid.uuid4().hex}"
        session_id = body.get("session_id") or stored_session_id or run_id
        ephemeral_system_prompt = instructions
+        loop = asyncio.get_running_loop()
+        q: "asyncio.Queue[Optional[Dict]]" = asyncio.Queue()
+        created_at = time.time()
+        self._run_streams[run_id] = q
+        self._run_streams_created[run_id] = created_at
+
+        event_cb = self._make_run_event_callback(run_id, loop)
+
+        # Also wire stream_delta_callback so message.delta events flow through.
+        def _text_cb(delta: Optional[str]) -> None:
+            if delta is None:
+                return
+            try:
+                loop.call_soon_threadsafe(q.put_nowait, {
+                    "event": "message.delta",
+                    "run_id": run_id,
+                    "timestamp": time.time(),
+                    "delta": delta,
+                })
+            except Exception:
+                pass
+
+        self._set_run_status(
+            run_id,
+            "queued",
+            created_at=created_at,
+            session_id=session_id,
+            model=body.get("model", self._model_name),
+        )

        async def _run_and_close():
            try:
+                self._set_run_status(run_id, "running")
                agent = self._create_agent(
                    ephemeral_system_prompt=ephemeral_system_prompt,
                    session_id=session_id,
@@ -2447,10 +2552,11 @@ class APIServerAdapter(BasePlatformAdapter):
                )
                self._active_run_agents[run_id] = agent
                def _run_sync():
+                    effective_task_id = session_id or run_id
                    r = agent.run_conversation(
                        user_message=user_message,
                        conversation_history=conversation_history,
-                        task_id="default",
+                        task_id=effective_task_id,
                    )
                    u = {
                        "input_tokens": getattr(agent, "session_prompt_tokens", 0) or 0,
@@ -2468,8 +2574,36 @@ class APIServerAdapter(BasePlatformAdapter):
                    "output": final_response,
                    "usage": usage,
                })
+                self._set_run_status(
+                    run_id,
+                    "completed",
+                    output=final_response,
+                    usage=usage,
+                    last_event="run.completed",
+                )
+            except asyncio.CancelledError:
+                self._set_run_status(
+                    run_id,
+                    "cancelled",
+                    last_event="run.cancelled",
+                )
+                try:
+                    q.put_nowait({
+                        "event": "run.cancelled",
+                        "run_id": run_id,
+                        "timestamp": time.time(),
+                    })
+                except Exception:
+                    pass
+                raise
            except Exception as exc:
                logger.exception("[api_server] run %s failed", run_id)
+                self._set_run_status(
+                    run_id,
+                    "failed",
+                    error=str(exc),
+                    last_event="run.failed",
+                )
                try:
                    q.put_nowait({
                        "event": "run.failed",
@@ -2499,6 +2633,21 @@ class APIServerAdapter(BasePlatformAdapter):

        return web.json_response({"run_id": run_id, "status": "started"}, status=202)

+    async def _handle_get_run(self, request: "web.Request") -> "web.Response":
+        """GET /v1/runs/{run_id} — return pollable run status for external UIs."""
+        auth_err = self._check_auth(request)
+        if auth_err:
+            return auth_err
+
+        run_id = request.match_info["run_id"]
+        status = self._run_statuses.get(run_id)
+        if status is None:
+            return web.json_response(
+                _openai_error(f"Run not found: {run_id}", code="run_not_found"),
+                status=404,
+            )
+        return web.json_response(status)
+
    async def _handle_run_events(self, request: "web.Request") -> "web.StreamResponse":
        """GET /v1/runs/{run_id}/events — SSE stream of structured agent lifecycle events."""
        auth_err = self._check_auth(request)
@@ -2561,6 +2710,8 @@ class APIServerAdapter(BasePlatformAdapter):
        if agent is None and task is None:
            return web.json_response(_openai_error(f"Run not found: {run_id}", code="run_not_found"), status=404)

+        self._set_run_status(run_id, "stopping", last_event="run.stopping")
+
        if agent is not None:
            try:
                agent.interrupt("Stop requested via API")
@@ -2603,6 +2754,15 @@ class APIServerAdapter(BasePlatformAdapter):
                self._active_run_agents.pop(run_id, None)
                self._active_run_tasks.pop(run_id, None)

+            stale_statuses = [
+                run_id
+                for run_id, status in list(self._run_statuses.items())
+                if status.get("status") in {"completed", "failed", "cancelled"}
+                and now - float(status.get("updated_at", 0) or 0) > self._RUN_STATUS_TTL
+            ]
+            for run_id in stale_statuses:
+                self._run_statuses.pop(run_id, None)
+
    # ------------------------------------------------------------------
    # BasePlatformAdapter interface
    # ------------------------------------------------------------------
@@ -2621,6 +2781,7 @@ class APIServerAdapter(BasePlatformAdapter):
            self._app.router.add_get("/health/detailed", self._handle_health_detailed)
            self._app.router.add_get("/v1/health", self._handle_health)
            self._app.router.add_get("/v1/models", self._handle_models)
+            self._app.router.add_get("/v1/capabilities", self._handle_capabilities)
            self._app.router.add_post("/v1/chat/completions", self._handle_chat_completions)
            self._app.router.add_post("/v1/responses", self._handle_responses)
            self._app.router.add_get("/v1/responses/{response_id}", self._handle_get_response)
@@ -2636,6 +2797,7 @@ class APIServerAdapter(BasePlatformAdapter):
            self._app.router.add_post("/api/jobs/{job_id}/run", self._handle_run_job)
            # Structured event streaming
            self._app.router.add_post("/v1/runs", self._handle_runs)
+            self._app.router.add_get("/v1/runs/{run_id}", self._handle_get_run)
            self._app.router.add_get("/v1/runs/{run_id}/events", self._handle_run_events)
            self._app.router.add_post("/v1/runs/{run_id}/stop", self._handle_stop_run)
            # Start background sweep to clean up orphaned (unconsumed) run streams
@@ -23,6 +23,45 @@ from utils import normalize_proxy_url

 logger = logging.getLogger(__name__)

+# Audio file extensions Hermes recognizes for native audio delivery.
+# Kept in sync with tools/send_message_tool.py and cron/scheduler.py via
+# should_send_media_as_audio() below.
+_AUDIO_EXTS = frozenset({'.ogg', '.opus', '.mp3', '.wav', '.m4a', '.flac'})
+# Telegram's Bot API sendAudio only accepts MP3 / M4A. Other audio
+# formats either need to go through sendVoice (Opus/OGG) or must be
+# delivered as a regular document.
+_TELEGRAM_AUDIO_ATTACHMENT_EXTS = frozenset({'.mp3', '.m4a'})
+_TELEGRAM_VOICE_EXTS = frozenset({'.ogg', '.opus'})
+
+
+def _platform_name(platform) -> str:
+    """Normalize a Platform enum / raw string into a lowercase name."""
+    value = getattr(platform, "value", platform)
+    return str(value or "").lower()
+
+
+def should_send_media_as_audio(platform, ext: str, is_voice: bool = False) -> bool:
+    """Return True when a media file should use the platform's audio sender.
+
+    Other platforms: every recognized audio extension routes through the
+    audio sender.
+
+    Telegram: the Bot API only accepts MP3/M4A for sendAudio and
+    Opus/OGG for sendVoice. Opus/OGG is only routed as audio when the
+    caller flagged ``is_voice=True`` (so we don't turn a regular audio
+    attachment into a voice bubble just because the file happens to be
+    Opus). Everything else falls through to document delivery by
+    returning ``False``.
+    """
+    normalized_ext = (ext or "").lower()
+    if normalized_ext not in _AUDIO_EXTS:
+        return False
+    if _platform_name(platform) == "telegram":
+        if normalized_ext in _TELEGRAM_VOICE_EXTS:
+            return is_voice
+        return normalized_ext in _TELEGRAM_AUDIO_ATTACHMENT_EXTS
+    return True
+

 def utf16_len(s: str) -> int:
    """Count UTF-16 code units in *s*.
@@ -377,7 +416,7 @@ def is_host_excluded_by_no_proxy(hostname: str, no_proxy_value: str | None = Non
 from dataclasses import dataclass, field
 from datetime import datetime
 from pathlib import Path
-from typing import Dict, List, Optional, Any, Callable, Awaitable, Tuple
+from typing import Dict, List, Optional, Any, Callable, Awaitable, Tuple, Union
 from enum import Enum

 from pathlib import Path as _Path
@@ -942,7 +981,7 @@ def coerce_plaintext_gateway_command(event: "MessageEvent") -> None:
        return


-@dataclass 
+@dataclass
 class SendResult:
    """Result of sending a message."""
    success: bool
@@ -952,6 +991,45 @@ class SendResult:
    retryable: bool = False  # True for transient connection errors — base will retry automatically


+class EphemeralReply(str):
+    """System-notice reply that auto-deletes after a TTL.
+
+    Slash-command handlers in ``gateway/run.py`` can return this wrapper
+    instead of a plain string to request that the reply message be deleted
+    after ``ttl_seconds`` on platforms that support ``delete_message``.
+
+    Subclassing ``str`` keeps the wrapper transparent to anything that
+    treats handler return values as text (existing tests use ``in`` /
+    ``startswith`` / equality; the ``_process_message_background`` pipeline
+    extracts attachments from the string content).  ``isinstance(r,
+    EphemeralReply)`` still distinguishes ephemeral replies from plain
+    strings so the send path can schedule deletion.
+
+    Platforms that don't override :meth:`BasePlatformAdapter.delete_message`
+    silently ignore the TTL — the message is sent normally and left in
+    place.  When ``ttl_seconds`` is ``None``, the pipeline uses the
+    configured ``display.ephemeral_system_ttl`` default.  A default of ``0``
+    disables auto-deletion globally, preserving prior behavior.
+    """
+
+    ttl_seconds: Optional[int]
+
+    def __new__(cls, text: str, ttl_seconds: Optional[int] = None):
+        instance = super().__new__(cls, text)
+        instance.ttl_seconds = ttl_seconds
+        return instance
+
+    @property
+    def text(self) -> str:
+        """Return the underlying text.
+
+        Provided for call sites that want an explicit string conversion,
+        though ``str(reply)`` and using ``reply`` directly where a string
+        is expected both work identically.
+        """
+        return str.__str__(self)
+
+
 def merge_pending_message_event(
    pending_messages: Dict[str, MessageEvent],
    session_key: str,
@@ -995,6 +1073,11 @@ def merge_pending_message_event(
                    existing.text = event.text
            if existing_is_photo or incoming_is_photo:
                existing.message_type = MessageType.PHOTO
+            elif (
+                getattr(existing, "message_type", None) == MessageType.TEXT
+                and event.message_type != MessageType.TEXT
+            ):
+                existing.message_type = event.message_type
            return

        if (
@@ -1029,8 +1112,10 @@ _RETRYABLE_ERROR_PATTERNS = (
 )


-# Type for message handlers
-MessageHandler = Callable[[MessageEvent], Awaitable[Optional[str]]]
+# Type for message handlers.  Handlers may return a plain string (normal
+# reply), an ``EphemeralReply`` to opt the reply into auto-deletion, or
+# ``None`` when the response was already delivered (e.g. via streaming).
+MessageHandler = Callable[[MessageEvent], Awaitable[Optional[Union[str, "EphemeralReply"]]]]


 def resolve_channel_prompt(
@@ -1415,6 +1500,119 @@ class BasePlatformAdapter(ABC):
        """
        return False

+    def _get_ephemeral_system_ttl_default(self) -> int:
+        """Read ``display.ephemeral_system_ttl`` from config.
+
+        Returns the TTL in seconds to use when an :class:`EphemeralReply`
+        does not specify one explicitly.  ``0`` (the default) disables
+        auto-deletion.  Non-fatal if config is unreadable.
+        """
+        try:
+            from hermes_cli.config import load_config as _load_config
+        except Exception:
+            return 0
+        try:
+            cfg = _load_config()
+        except Exception:
+            return 0
+        display = cfg.get("display", {}) if isinstance(cfg, dict) else {}
+        if not isinstance(display, dict):
+            return 0
+        raw = display.get("ephemeral_system_ttl", 0)
+        try:
+            return int(raw)
+        except (TypeError, ValueError):
+            return 0
+
+    def _schedule_ephemeral_delete(
+        self,
+        chat_id: str,
+        message_id: str,
+        ttl_seconds: int,
+    ) -> None:
+        """Spawn a detached task that deletes ``message_id`` after ``ttl_seconds``.
+
+        Best-effort — failures (gateway restart, permission denied, message
+        too old for Telegram's 48h window) are swallowed at debug level.
+        Does not block the caller.
+        """
+
+        async def _run_delete() -> None:
+            try:
+                await asyncio.sleep(max(1, int(ttl_seconds)))
+                await self.delete_message(chat_id=chat_id, message_id=message_id)
+            except asyncio.CancelledError:
+                raise
+            except Exception as e:
+                logger.debug(
+                    "[%s] Ephemeral delete failed for %s/%s: %s",
+                    self.name, chat_id, message_id, e,
+                )
+
+        coro = _run_delete()
+        try:
+            asyncio.create_task(coro)
+        except RuntimeError:
+            # No running loop (e.g. unit tests that never reach the async
+            # path).  Close the coroutine cleanly so Python doesn't warn
+            # about it never being awaited, then drop silently.
+            coro.close()
+
+    async def send_slash_confirm(
+        self,
+        chat_id: str,
+        title: str,
+        message: str,
+        session_key: str,
+        confirm_id: str,
+        metadata: Optional[Dict[str, Any]] = None,
+    ) -> SendResult:
+        """Send a three-option slash-command confirmation prompt.
+
+        Used by the gateway's generic slash-confirm primitive (see
+        ``GatewayRunner._request_slash_confirm``) for commands that have a
+        non-destructive but expensive side effect the user should explicitly
+        acknowledge — the current caller is ``/reload-mcp``, which
+        invalidates the provider prompt cache.
+
+        Platforms with inline-button support (Telegram, Discord, Slack,
+        Matrix, Feishu) should override this to render three buttons:
+        Approve Once / Always Approve / Cancel.  Button callbacks MUST be
+        routed back through the gateway by calling
+        ``GatewayRunner._resolve_slash_confirm(confirm_id, choice)`` where
+        ``choice`` is ``"once"`` / ``"always"`` / ``"cancel"``.
+
+        Platforms without button UIs leave this as the default and fall
+        through to the gateway's text fallback (which sends ``message`` as
+        plain text and intercepts the next ``/approve`` / ``/always`` /
+        ``/cancel`` reply).
+
+        ``confirm_id`` is a short string generated by the gateway; the
+        adapter stores it alongside any platform-specific state needed to
+        route the callback (e.g. Telegram's ``_approval_state`` dict).
+        """
+        return SendResult(success=False, error="Not supported")
+
+    async def send_private_notice(
+        self,
+        chat_id: str,
+        user_id: Optional[str],
+        content: str,
+        reply_to: Optional[str] = None,
+        metadata: Optional[Dict[str, Any]] = None,
+    ) -> SendResult:
+        """Send a notice privately when the platform supports it.
+
+        The default implementation falls back to a normal send so callers can
+        use one code path across platforms.
+        """
+        return await self.send(
+            chat_id=chat_id,
+            content=content,
+            reply_to=reply_to,
+            metadata=metadata,
+        )
+
    async def send_typing(self, chat_id: str, metadata=None) -> None:
        """
        Send a typing indicator.
@@ -1431,7 +1629,64 @@ class BasePlatformAdapter(ABC):
        Default is a no-op for platforms with one-shot typing indicators.
        """
        pass
-    
+
+    async def send_multiple_images(
+        self,
+        chat_id: str,
+        images: List[Tuple[str, str]],
+        metadata: Optional[Dict[str, Any]] = None,
+        human_delay: float = 0.0,
+    ) -> None:
+        """Send a batch of images.
+
+        Accepts ``http(s)://``, ``file://`` URIs in the first tuple
+        element.
+
+        Default implementation sends each item individually,
+        routing animated GIFs through ``send_animation`` and local
+        files through ``send_image_file``.
+
+        Override in subclasses to bundle into a single native API call
+        (e.g. Signal's multi-attachment RPC)
+        """
+        from urllib.parse import unquote as _unquote
+
+        for image_url, alt_text in images:
+            if human_delay > 0:
+                await asyncio.sleep(human_delay)
+            try:
+                logger.info(
+                    "[%s] Sending image: %s (alt=%s)",
+                    self.name,
+                    safe_url_for_log(image_url),
+                    alt_text[:30] if alt_text else "",
+                )
+                if image_url.startswith("file://"):
+                    img_result = await self.send_image_file(
+                        chat_id=chat_id,
+                        image_path=_unquote(image_url[7:]),
+                        caption=alt_text if alt_text else None,
+                        metadata=metadata,
+                    )
+                elif self._is_animation_url(image_url):
+                    img_result = await self.send_animation(
+                        chat_id=chat_id,
+                        animation_url=image_url,
+                        caption=alt_text if alt_text else None,
+                        metadata=metadata,
+                    )
+                else:
+                    img_result = await self.send_image(
+                        chat_id=chat_id,
+                        image_url=image_url,
+                        caption=alt_text if alt_text else None,
+                        metadata=metadata,
+                    )
+                if not img_result.success:
+                    logger.error("[%s] Failed to send image: %s", self.name, img_result.error)
+            except Exception as img_err:
+                logger.error("[%s] Error sending image: %s", self.name, img_err, exc_info=True)
+
    async def send_image(
        self,
        chat_id: str,
@@ -1640,7 +1895,7 @@ class BasePlatformAdapter(ABC):
        # Extract MEDIA:<path> tags, allowing optional whitespace after the colon
        # and quoted/backticked paths for LLM-formatted outputs.
        media_pattern = re.compile(
-            r'''[`"']?MEDIA:\s*(?P<path>`[^`\n]+`|"[^"\n]+"|'[^'\n]+'|(?:~/|/)\S+(?:[^\S\n]+\S+)*?\.(?:png|jpe?g|gif|webp|mp4|mov|avi|mkv|webm|ogg|opus|mp3|wav|m4a|epub|pdf|zip|rar|7z|docx?|xlsx?|pptx?|txt|csv|apk|ipa)(?=[\s`"',;:)\]}]|$)|\S+)[`"']?'''
+            r'''[`"']?MEDIA:\s*(?P<path>`[^`\n]+`|"[^"\n]+"|'[^'\n]+'|(?:~/|/)\S+(?:[^\S\n]+\S+)*?\.(?:png|jpe?g|gif|webp|mp4|mov|avi|mkv|webm|ogg|opus|mp3|wav|m4a|flac|epub|pdf|zip|rar|7z|docx?|xlsx?|pptx?|txt|csv|apk|ipa)(?=[\s`"',;:)\]}]|$)|\S+)[`"']?'''
        )
        for match in media_pattern.finditer(content):
            path = match.group("path").strip()
@@ -1780,11 +2035,19 @@ class BasePlatformAdapter(ABC):
                if stop_event is None:
                    await asyncio.sleep(interval)
                    continue
-                try:
-                    await asyncio.wait_for(stop_event.wait(), timeout=interval)
-                except asyncio.TimeoutError:
-                    continue
-                return
+                loop = asyncio.get_running_loop()
+                deadline = loop.time() + interval
+                while not stop_event.is_set():
+                    remaining = deadline - loop.time()
+                    if remaining <= 0:
+                        break
+                    # Poll instead of wait_for(stop_event.wait()).  Cancelling
+                    # wait_for while it owns the inner Event.wait task can leave
+                    # shutdown paths stuck awaiting the typing task on Python
+                    # 3.11/pytest-asyncio; sleep cancellation is immediate.
+                    await asyncio.sleep(min(0.25, remaining))
+                if stop_event.is_set():
+                    return
        except asyncio.CancelledError:
            pass  # Normal cancellation when handler completes
        finally:
@@ -1904,6 +2167,28 @@ class BasePlatformAdapter(ABC):
        lowered = error.lower()
        return "timed out" in lowered or "readtimeout" in lowered or "writetimeout" in lowered

+    def _unwrap_ephemeral(self, response: Any) -> Tuple[Optional[str], int]:
+        """Unwrap a handler response into (text, ttl_seconds).
+
+        Accepts a plain string, ``None``, or an :class:`EphemeralReply`.
+        Returns ``(text, ttl)`` where ``ttl > 0`` means the caller should
+        schedule a deletion via :meth:`_schedule_ephemeral_delete` after
+        the send succeeds.  ``ttl`` is forced to 0 when the adapter
+        doesn't override :meth:`delete_message` so non-supporting
+        platforms silently degrade to normal sends.
+        """
+        if isinstance(response, EphemeralReply):
+            ttl = response.ttl_seconds
+            if ttl is None:
+                try:
+                    ttl = int(self._get_ephemeral_system_ttl_default())
+                except Exception:
+                    ttl = 0
+            if ttl and ttl > 0 and type(self).delete_message is BasePlatformAdapter.delete_message:
+                ttl = 0
+            return response.text, int(ttl or 0)
+        return response, 0
+
    async def _send_with_retry(
        self,
        chat_id: str,
@@ -2117,6 +2402,12 @@ class BasePlatformAdapter(ABC):
        ``release_guard=False`` keeps the adapter-level session guard in place
        so reset-like commands can finish atomically before follow-up messages
        are allowed to start a fresh background task.
+
+        Bounded by a 5s timeout so a wedged finally block in the cancelled
+        task (typing-task cleanup, on_processing_complete hook, etc.) can't
+        stall the calling dispatch coroutine — particularly under pytest-
+        asyncio where the event loop's cancellation-propagation semantics
+        differ subtly from a bare ``asyncio.run`` harness.
        """
        task = self._session_tasks.pop(session_key, None)
        if task is not None and not task.done():
@@ -2128,9 +2419,15 @@ class BasePlatformAdapter(ABC):
            self._expected_cancelled_tasks.add(task)
            task.cancel()
            try:
-                await task
+                await asyncio.wait_for(asyncio.shield(task), timeout=5.0)
            except asyncio.CancelledError:
                pass
+            except asyncio.TimeoutError:
+                logger.warning(
+                    "[%s] Cancelled task for %s did not exit within 5s; "
+                    "unblocking dispatch and letting the task unwind in the background",
+                    self.name, session_key,
+                )
            except Exception:
                logger.debug(
                    "[%s] Session cancellation raised while unwinding %s",
@@ -2199,13 +2496,20 @@ class BasePlatformAdapter(ABC):
                release_guard=False,
                discard_pending=False,
            )
-            if response:
-                await self._send_with_retry(
+            _text, _eph_ttl = self._unwrap_ephemeral(response)
+            if _text:
+                _r = await self._send_with_retry(
                    chat_id=event.source.chat_id,
-                    content=response,
+                    content=_text,
                    reply_to=event.message_id,
                    metadata=thread_meta,
                )
+                if _eph_ttl > 0 and _r.success and _r.message_id:
+                    self._schedule_ephemeral_delete(
+                        chat_id=event.source.chat_id,
+                        message_id=_r.message_id,
+                        ttl_seconds=_eph_ttl,
+                    )
        except Exception:
            # On failure, restore the original guard if one still exists so
            # we don't leave the session in a half-reset state.
@@ -2285,13 +2589,20 @@ class BasePlatformAdapter(ABC):
                try:
                    _thread_meta = {"thread_id": event.source.thread_id} if event.source.thread_id else None
                    response = await self._message_handler(event)
-                    if response:
-                        await self._send_with_retry(
+                    _text, _eph_ttl = self._unwrap_ephemeral(response)
+                    if _text:
+                        _r = await self._send_with_retry(
                            chat_id=event.source.chat_id,
-                            content=response,
+                            content=_text,
                            reply_to=event.message_id,
                            metadata=_thread_meta,
                        )
+                        if _eph_ttl > 0 and _r.success and _r.message_id:
+                            self._schedule_ephemeral_delete(
+                                chat_id=event.source.chat_id,
+                                message_id=_r.message_id,
+                                ttl_seconds=_eph_ttl,
+                            )
                except Exception as e:
                    logger.error("[%s] Command '/%s' dispatch failed: %s", self.name, cmd, e, exc_info=True)
                return
@@ -2365,7 +2676,6 @@ class BasePlatformAdapter(ABC):
        # Fall back to a new Event only if the entry was removed externally.
        interrupt_event = self._active_sessions.get(session_key) or asyncio.Event()
        self._active_sessions[session_key] = interrupt_event
-        callback_generation = getattr(interrupt_event, "_hermes_run_generation", None)
        
        # Start continuous typing indicator (refreshes every 2 seconds)
        _thread_metadata = {"thread_id": event.source.thread_id} if event.source.thread_id else None
@@ -2382,13 +2692,32 @@ class BasePlatformAdapter(ABC):
                **_keep_typing_kwargs,
            )
        )
+
+        async def _stop_typing_task() -> None:
+            typing_task.cancel()
+            try:
+                await asyncio.wait_for(asyncio.shield(typing_task), timeout=0.5)
+            except (asyncio.CancelledError, asyncio.TimeoutError):
+                # Cancellation cleanup must not block adapter shutdown.  The
+                # typing task is already cancelled; if the parent task is also
+                # cancelling, let this message-processing task unwind now.
+                pass
        
        try:
            await self._run_processing_hook("on_processing_start", event)

            # Call the handler (this can take a while with tool calls)
            response = await self._message_handler(event)
-            
+
+            # Slash-command handlers may return an EphemeralReply sentinel to
+            # request that their reply message auto-delete after a TTL (used
+            # for system notices like "✨ New session started!" that the user
+            # doesn't need to keep in the thread).  Unwrap here so all the
+            # downstream extract_media / text-processing logic sees a plain
+            # string, and remember the TTL + platform capability so the
+            # post-send block can schedule the deletion.
+            response, _ephemeral_ttl = self._unwrap_ephemeral(response)
+
            # Send response if any.  A None/empty response is normal when
            # streaming already delivered the text (already_sent=True) or
            # when the message was queued behind an active agent.  Log at
@@ -2477,53 +2806,78 @@ class BasePlatformAdapter(ABC):
                    )
                    _record_delivery(result)

+                    # Schedule auto-deletion of system-notice replies.
+                    # Detached so the handler returns immediately; errors
+                    # (permission denied, message too old) are swallowed.
+                    if (
+                        _ephemeral_ttl
+                        and _ephemeral_ttl > 0
+                        and result.success
+                        and result.message_id
+                    ):
+                        self._schedule_ephemeral_delete(
+                            chat_id=event.source.chat_id,
+                            message_id=result.message_id,
+                            ttl_seconds=_ephemeral_ttl,
+                        )
+
                # Human-like pacing delay between text and media
                human_delay = self._get_human_delay()

                # Send extracted images as native attachments
                if images:
                    logger.info("[%s] Extracted %d image(s) to send as attachments", self.name, len(images))
-                for image_url, alt_text in images:
-                    if human_delay > 0:
-                        await asyncio.sleep(human_delay)
                    try:
-                        logger.info(
-                            "[%s] Sending image: %s (alt=%s)",
-                            self.name,
-                            safe_url_for_log(image_url),
-                            alt_text[:30] if alt_text else "",
+                        await self.send_multiple_images(
+                            chat_id=event.source.chat_id,
+                            images=images,
+                            metadata=_thread_metadata,
+                            human_delay=human_delay,
                        )
-                        # Route animated GIFs through send_animation for proper playback
-                        if self._is_animation_url(image_url):
-                            img_result = await self.send_animation(
-                                chat_id=event.source.chat_id,
-                                animation_url=image_url,
-                                caption=alt_text if alt_text else None,
-                                metadata=_thread_metadata,
-                            )
-                        else:
-                            img_result = await self.send_image(
-                                chat_id=event.source.chat_id,
-                                image_url=image_url,
-                                caption=alt_text if alt_text else None,
-                                metadata=_thread_metadata,
-                            )
-                        if not img_result.success:
-                            logger.error("[%s] Failed to send image: %s", self.name, img_result.error)
-                    except Exception as img_err:
-                        logger.error("[%s] Error sending image: %s", self.name, img_err, exc_info=True)
+                    except Exception as batch_err:
+                        logger.warning("[%s] Error batching images: %s", self.name, batch_err, exc_info=True)
+

                # Send extracted media files — route by file type
-                _AUDIO_EXTS = {'.ogg', '.opus', '.mp3', '.wav', '.m4a'}
                _VIDEO_EXTS = {'.mp4', '.mov', '.avi', '.mkv', '.webm', '.3gp'}
                _IMAGE_EXTS = {'.jpg', '.jpeg', '.png', '.webp', '.gif'}

+                # Partition images out of media_files + local_files so they
+                # can be sent as a single batch (Signal RPC)
+                from urllib.parse import quote as _quote
+                _image_paths: list = []
+                _non_image_media: list = []
                for media_path, is_voice in media_files:
+                    _ext = Path(media_path).suffix.lower()
+                    if _ext in _IMAGE_EXTS and not is_voice:
+                        _image_paths.append(media_path)
+                    else:
+                        _non_image_media.append((media_path, is_voice))
+                _non_image_local: list = []
+                for file_path in local_files:
+                    if Path(file_path).suffix.lower() in _IMAGE_EXTS:
+                        _image_paths.append(file_path)
+                    else:
+                        _non_image_local.append(file_path)
+
+                if _image_paths:
+                    try:
+                        _batch = [(f"file://{_quote(p)}", "") for p in _image_paths]
+                        await self.send_multiple_images(
+                            chat_id=event.source.chat_id,
+                            images=_batch,
+                            metadata=_thread_metadata,
+                            human_delay=human_delay,
+                        )
+                    except Exception as batch_err:
+                        logger.warning("[%s] Error batching images: %s", self.name, batch_err, exc_info=True)
+
+                for media_path, is_voice in _non_image_media:
                    if human_delay > 0:
                        await asyncio.sleep(human_delay)
                    try:
                        ext = Path(media_path).suffix.lower()
-                        if ext in _AUDIO_EXTS:
+                        if should_send_media_as_audio(self.platform, ext, is_voice=is_voice):
                            media_result = await self.send_voice(
                                chat_id=event.source.chat_id,
                                audio_path=media_path,
@@ -2535,12 +2889,6 @@ class BasePlatformAdapter(ABC):
                                video_path=media_path,
                                metadata=_thread_metadata,
                            )
-                        elif ext in _IMAGE_EXTS:
-                            media_result = await self.send_image_file(
-                                chat_id=event.source.chat_id,
-                                image_path=media_path,
-                                metadata=_thread_metadata,
-                            )
                        else:
                            media_result = await self.send_document(
                                chat_id=event.source.chat_id,
@@ -2553,19 +2901,13 @@ class BasePlatformAdapter(ABC):
                    except Exception as media_err:
                        logger.warning("[%s] Error sending media: %s", self.name, media_err)

-                # Send auto-detected local files as native attachments
-                for file_path in local_files:
+                # Send auto-detected local non-image files as native attachments
+                for file_path in _non_image_local:
                    if human_delay > 0:
                        await asyncio.sleep(human_delay)
                    try:
                        ext = Path(file_path).suffix.lower()
-                        if ext in _IMAGE_EXTS:
-                            await self.send_image_file(
-                                chat_id=event.source.chat_id,
-                                image_path=file_path,
-                                metadata=_thread_metadata,
-                            )
-                        elif ext in _VIDEO_EXTS:
+                        if ext in _VIDEO_EXTS:
                            await self.send_video(
                                chat_id=event.source.chat_id,
                                video_path=file_path,
@@ -2604,14 +2946,28 @@ class BasePlatformAdapter(ABC):
                _active = self._active_sessions.get(session_key)
                if _active is not None:
                    _active.clear()
-                typing_task.cancel()
+                await _stop_typing_task()
+                # Spawn a fresh task for the pending message instead of
+                # recursing.  Issue #17758: `await
+                # self._process_message_background(...)` here grew the
+                # call stack one frame per chained follow-up, and under
+                # sustained pending-queue activity the C stack would
+                # exhaust at ~2000 frames and SIGSEGV the process.
+                # Mirror the late-arrival drain pattern below: hand off
+                # to a new task and return so this frame can unwind.
+                drain_task = asyncio.create_task(
+                    self._process_message_background(pending_event, session_key)
+                )
+                # Hand ownership of the session to the drain task so
+                # stale-lock detection keeps working while it runs.
+                self._session_tasks[session_key] = drain_task
                try:
-                    await typing_task
-                except asyncio.CancelledError:
+                    self._background_tasks.add(drain_task)
+                    drain_task.add_done_callback(self._background_tasks.discard)
+                except TypeError:
+                    # Tests stub create_task() with non-hashable sentinels; tolerate.
                    pass
-                # Process pending message in new background task
-                await self._process_message_background(pending_event, session_key)
-                return  # Already cleaned up
+                return  # Drain task owns the session now.
                
        except asyncio.CancelledError:
            current_task = asyncio.current_task()
@@ -2642,7 +2998,20 @@ class BasePlatformAdapter(ABC):
        finally:
            # Fire any one-shot post-delivery callback registered for this
            # session (e.g. deferred background-review notifications).
-            _callback_generation = callback_generation
+            #
+            # Snapshot the callback generation HERE (after the agent has run),
+            # not at the top of this task.  _hermes_run_generation is set on
+            # the interrupt event by GatewayRunner._bind_adapter_run_generation
+            # during _handle_message_with_agent — which happens DURING the
+            # self._message_handler(event) await above.  Snapshotting earlier
+            # always captured None, which bypassed the generation-ownership
+            # check in pop_post_delivery_callback and let stale runs fire a
+            # fresher run's callbacks.
+            _callback_generation = getattr(
+                interrupt_event,
+                "_hermes_run_generation",
+                None,
+            )
            if hasattr(self, "pop_post_delivery_callback"):
                _post_cb = self.pop_post_delivery_callback(
                    session_key,
@@ -2656,11 +3025,7 @@ class BasePlatformAdapter(ABC):
                except Exception:
                    pass
            # Stop typing indicator
-            typing_task.cancel()
-            try:
-                await typing_task
-            except asyncio.CancelledError:
-                pass
+            await _stop_typing_task()
            # Also cancel any platform-level persistent typing tasks (e.g. Discord)
            # that may have been recreated by _keep_typing after the last stop_typing()
            try:
@@ -2677,25 +3042,41 @@ class BasePlatformAdapter(ABC):
            # dropped (user never gets a reply).
            late_pending = self._pending_messages.pop(session_key, None)
            if late_pending is not None:
-                logger.debug(
-                    "[%s] Late-arrival pending message during cleanup — spawning drain task",
-                    self.name,
-                )
-                _active = self._active_sessions.get(session_key)
-                if _active is not None:
-                    _active.clear()
-                drain_task = asyncio.create_task(
-                    self._process_message_background(late_pending, session_key)
-                )
-                # Hand ownership of the session to the drain task so stale-lock
-                # detection keeps working while it runs.
-                self._session_tasks[session_key] = drain_task
-                try:
-                    self._background_tasks.add(drain_task)
-                    drain_task.add_done_callback(self._background_tasks.discard)
-                except TypeError:
-                    # Tests stub create_task() with non-hashable sentinels; tolerate.
-                    pass
+                current_task = asyncio.current_task()
+                existing_task = self._session_tasks.get(session_key)
+                if (
+                    existing_task is not None
+                    and existing_task is not current_task
+                ):
+                    # The in-band drain (or an earlier late-arrival drain)
+                    # already spawned a follow-up task that owns this
+                    # session.  Re-queue the late-arrival event so that
+                    # task picks it up — avoids spawning two concurrent
+                    # _process_message_background tasks for the same key
+                    # (#17758 follow-up: prevents the create_task path
+                    # from racing with itself across the in-band/finally
+                    # boundary).
+                    self._pending_messages[session_key] = late_pending
+                else:
+                    logger.debug(
+                        "[%s] Late-arrival pending message during cleanup — spawning drain task",
+                        self.name,
+                    )
+                    _active = self._active_sessions.get(session_key)
+                    if _active is not None:
+                        _active.clear()
+                    drain_task = asyncio.create_task(
+                        self._process_message_background(late_pending, session_key)
+                    )
+                    # Hand ownership of the session to the drain task so stale-lock
+                    # detection keeps working while it runs.
+                    self._session_tasks[session_key] = drain_task
+                    try:
+                        self._background_tasks.add(drain_task)
+                        drain_task.add_done_callback(self._background_tasks.discard)
+                    except TypeError:
+                        # Tests stub create_task() with non-hashable sentinels; tolerate.
+                        pass
                # Leave _active_sessions[session_key] populated — the drain
                # task's own lifecycle will clean it up.
            else:
@@ -2703,16 +3084,34 @@ class BasePlatformAdapter(ABC):
                # reset-like command that already swapped in its own
                # command_guard (and cancelled us) can't be accidentally
                # cleared by our unwind.  The command owns the session now.
+                #
+                # The owner-check also covers the in-band drain handoff
+                # above: when we spawned a drain_task and transferred
+                # ownership via ``_session_tasks[session_key] = drain_task``,
+                # ``_session_tasks.get(session_key) is current_task`` is
+                # False, so we leave _active_sessions populated.  Without
+                # this guard, the drain task picks up the same
+                # interrupt_event in its own _process_message_background
+                # entry, _release_session_guard's guard-match succeeds,
+                # and we'd delete the entry while the drain task is still
+                # running — letting a concurrent inbound message pass
+                # the Level-1 guard and spawn a second handler for the
+                # same session.
                current_task = asyncio.current_task()
                if current_task is not None and self._session_tasks.get(session_key) is current_task:
                    del self._session_tasks[session_key]
-                self._release_session_guard(session_key, guard=interrupt_event)
+                    self._release_session_guard(session_key, guard=interrupt_event)
    
    async def cancel_background_tasks(self) -> None:
        """Cancel any in-flight background message-processing tasks.

        Used during gateway shutdown/replacement so active sessions from the old
        process do not keep running after adapters are being torn down.
+
+        Each cancelled task is awaited with a 5s bound so a wedged finally
+        (typing-task cleanup, on_processing_complete hook) can't stall the
+        whole shutdown path.  Stragglers are released from our tracking and
+        allowed to finish unwinding on their own.
        """
        # Loop until no new tasks appear.  Without this, a message
        # arriving during the `await asyncio.gather` below would spawn
@@ -2731,7 +3130,21 @@ class BasePlatformAdapter(ABC):
            for task in tasks:
                self._expected_cancelled_tasks.add(task)
                task.cancel()
-            await asyncio.gather(*tasks, return_exceptions=True)
+            try:
+                await asyncio.wait_for(
+                    asyncio.gather(
+                        *(asyncio.shield(t) for t in tasks),
+                        return_exceptions=True,
+                    ),
+                    timeout=5.0,
+                )
+            except asyncio.TimeoutError:
+                logger.warning(
+                    "[%s] %d background task(s) did not exit within 5s; "
+                    "releasing tracking and letting them unwind in the background",
+                    self.name, len([t for t in tasks if not t.done()]),
+                )
+                break
            # Loop: late-arrival tasks spawned during the gather above
            # will be in self._background_tasks now.  Re-check.
        self._background_tasks.clear()
@@ -18,7 +18,7 @@ import tempfile
 import threading
 import time
 from collections import defaultdict
-from typing import Callable, Dict, Optional, Any
+from typing import Callable, Dict, List, Optional, Any, Tuple

 logger = logging.getLogger(__name__)

@@ -1343,6 +1343,134 @@ class DiscordAdapter(BasePlatformAdapter):
            msg = await channel.send(content=caption if caption else None, file=file)
        return SendResult(success=True, message_id=str(msg.id))

+    async def send_multiple_images(
+        self,
+        chat_id: str,
+        images: List[Tuple[str, str]],
+        metadata: Optional[Dict[str, Any]] = None,
+        human_delay: float = 0.0,
+    ) -> None:
+        """Send a batch of images as a single Discord message with multiple attachments.
+
+        Discord permits up to 10 file attachments per message. Batches are
+        chunked accordingly. URL images are downloaded into memory and
+        uploaded as inline attachments (same pattern as ``send_image`` so
+        they render inline, not as bare links). Local files are opened
+        directly. On per-chunk failure the remaining images in that chunk
+        fall back to the base per-image loop.
+        """
+        if not self._client:
+            return
+        if not images:
+            return
+
+        try:
+            import discord as _discord_mod
+            import io as _io
+            from urllib.parse import unquote as _unquote
+        except Exception:  # pragma: no cover
+            await super().send_multiple_images(chat_id, images, metadata, human_delay)
+            return
+
+        try:
+            channel = self._client.get_channel(int(chat_id))
+            if not channel:
+                channel = await self._client.fetch_channel(int(chat_id))
+            if not channel:
+                logger.warning("[%s] Channel %s not found for multi-image send", self.name, chat_id)
+                return
+        except Exception as e:
+            logger.warning("[%s] Failed to resolve channel for multi-image send: %s", self.name, e)
+            await super().send_multiple_images(chat_id, images, metadata, human_delay)
+            return
+
+        CHUNK = 10
+        chunks = [images[i:i + CHUNK] for i in range(0, len(images), CHUNK)]
+
+        for chunk_idx, chunk in enumerate(chunks):
+            if human_delay > 0 and chunk_idx > 0:
+                await asyncio.sleep(human_delay)
+
+            files: List[Any] = []
+            captions: List[str] = []
+            aiohttp_session = None
+            try:
+                for image_url, alt_text in chunk:
+                    if alt_text:
+                        captions.append(alt_text)
+                    if image_url.startswith("file://"):
+                        local_path = _unquote(image_url[7:])
+                        if not os.path.exists(local_path):
+                            logger.warning("[%s] Skipping missing image: %s", self.name, local_path)
+                            continue
+                        files.append(_discord_mod.File(local_path, filename=os.path.basename(local_path)))
+                    else:
+                        if not is_safe_url(image_url):
+                            logger.warning("[%s] Blocked unsafe image URL in batch", self.name)
+                            continue
+                        # Download to BytesIO so it renders inline
+                        try:
+                            import aiohttp as _aiohttp
+                            from gateway.platforms.base import resolve_proxy_url, proxy_kwargs_for_aiohttp
+                            _proxy = resolve_proxy_url(platform_env_var="DISCORD_PROXY")
+                            _sess_kw, _req_kw = proxy_kwargs_for_aiohttp(_proxy)
+                            if aiohttp_session is None:
+                                aiohttp_session = _aiohttp.ClientSession(**_sess_kw)
+                            async with aiohttp_session.get(
+                                image_url, timeout=_aiohttp.ClientTimeout(total=30), **_req_kw,
+                            ) as resp:
+                                if resp.status != 200:
+                                    logger.warning(
+                                        "[%s] Failed to download image (HTTP %d) in batch: %s",
+                                        self.name, resp.status, image_url[:80],
+                                    )
+                                    continue
+                                data = await resp.read()
+                                ct = resp.headers.get("content-type", "image/png")
+                                ext = "png"
+                                if "jpeg" in ct or "jpg" in ct:
+                                    ext = "jpg"
+                                elif "gif" in ct:
+                                    ext = "gif"
+                                elif "webp" in ct:
+                                    ext = "webp"
+                                files.append(_discord_mod.File(_io.BytesIO(data), filename=f"image_{len(files)}.{ext}"))
+                        except Exception as dl_err:
+                            logger.warning("[%s] Download failed for %s: %s", self.name, image_url[:80], dl_err)
+                            continue
+
+                if not files:
+                    continue
+
+                # Use the first caption if any (Discord only has one message body for the group)
+                content = captions[0] if captions else None
+                logger.info(
+                    "[%s] Sending %d image(s) as single Discord message (chunk %d/%d)",
+                    self.name, len(files), chunk_idx + 1, len(chunks),
+                )
+
+                if self._is_forum_parent(channel):
+                    await self._forum_post_file(
+                        channel,
+                        content=(content or "").strip(),
+                        files=files,
+                    )
+                else:
+                    await channel.send(content=content, files=files)
+            except Exception as e:
+                logger.warning(
+                    "[%s] Multi-image Discord send failed (chunk %d/%d), falling back to per-image: %s",
+                    self.name, chunk_idx + 1, len(chunks), e,
+                    exc_info=True,
+                )
+                await super().send_multiple_images(chat_id, chunk, metadata, human_delay=human_delay)
+            finally:
+                if aiohttp_session is not None:
+                    try:
+                        await aiohttp_session.close()
+                    except Exception:
+                        pass
+
    async def play_tts(
        self,
        chat_id: str,
@@ -2270,6 +2398,10 @@ class DiscordAdapter(BasePlatformAdapter):
        async def slash_reload_mcp(interaction: discord.Interaction):
            await self._run_simple_slash(interaction, "/reload-mcp")

+        @tree.command(name="reload-skills", description="Re-scan ~/.hermes/skills/ for new or removed skills")
+        async def slash_reload_skills(interaction: discord.Interaction):
+            await self._run_simple_slash(interaction, "/reload-skills")
+
        @tree.command(name="voice", description="Toggle voice reply mode")
        @discord.app_commands.describe(mode="Voice mode: on, off, tts, channel, leave, or status")
        @discord.app_commands.choices(mode=[
@@ -2719,8 +2851,15 @@ class DiscordAdapter(BasePlatformAdapter):
            raw = os.getenv("DISCORD_FREE_RESPONSE_CHANNELS", "")
        if isinstance(raw, list):
            return {str(part).strip() for part in raw if str(part).strip()}
-        if isinstance(raw, str) and raw.strip():
-            return {part.strip() for part in raw.split(",") if part.strip()}
+        # Coerce non-list scalars (str/int/float) to str before splitting.
+        # YAML parses a bare numeric value such as
+        # `free_response_channels: 1491973769726791812` as int, which was
+        # previously falling through the isinstance(str) branch and silently
+        # returning an empty set.  str() here accepts whatever scalar the YAML
+        # loader hands us without changing existing string/CSV semantics.
+        s = str(raw).strip() if raw is not None else ""
+        if s:
+            return {part.strip() for part in s.split(",") if part.strip()}
        return set()

    def _thread_parent_channel(self, channel: Any) -> Any:
@@ -2906,9 +3045,47 @@ class DiscordAdapter(BasePlatformAdapter):
        except Exception as e:
            return SendResult(success=False, error=str(e))

+    async def send_slash_confirm(
+        self, chat_id: str, title: str, message: str, session_key: str,
+        confirm_id: str, metadata: Optional[dict] = None,
+    ) -> SendResult:
+        """Send a three-button slash-command confirmation prompt."""
+        if not self._client or not DISCORD_AVAILABLE:
+            return SendResult(success=False, error="Not connected")
+
+        try:
+            target_id = chat_id
+            if metadata and metadata.get("thread_id"):
+                target_id = metadata["thread_id"]
+
+            channel = self._client.get_channel(int(target_id))
+            if not channel:
+                channel = await self._client.fetch_channel(int(target_id))
+
+            # Embed description limit is 4096; message usually fits easily.
+            max_desc = 4088
+            body = message if len(message) <= max_desc else message[: max_desc - 3] + "..."
+            embed = discord.Embed(
+                title=title or "Confirm",
+                description=body,
+                color=discord.Color.orange(),
+            )
+
+            view = SlashConfirmView(
+                session_key=session_key,
+                confirm_id=confirm_id,
+                allowed_user_ids=self._allowed_user_ids,
+            )
+
+            msg = await channel.send(embed=embed, view=view)
+            return SendResult(success=True, message_id=str(msg.id))
+        except Exception as e:
+            return SendResult(success=False, error=str(e))
+
    async def send_update_prompt(
        self, chat_id: str, prompt: str, default: str = "",
        session_key: str = "",
+        metadata: Optional[Dict[str, Any]] = None,
    ) -> SendResult:
        """Send an interactive button-based update prompt (Yes / No).

@@ -2918,9 +3095,10 @@ class DiscordAdapter(BasePlatformAdapter):
        if not self._client or not DISCORD_AVAILABLE:
            return SendResult(success=False, error="Not connected")
        try:
-            channel = self._client.get_channel(int(chat_id))
+            target_id = metadata.get("thread_id") if metadata and metadata.get("thread_id") else chat_id
+            channel = self._client.get_channel(int(target_id))
            if not channel:
-                channel = await self._client.fetch_channel(int(chat_id))
+                channel = await self._client.fetch_channel(int(target_id))

            default_hint = f" (default: {default})" if default else ""
            embed = discord.Embed(
@@ -3639,6 +3817,103 @@ if DISCORD_AVAILABLE:
            for child in self.children:
                child.disabled = True

+    class SlashConfirmView(discord.ui.View):
+        """Three-button view for generic slash-command confirmations.
+
+        Used by ``/reload-mcp`` and any future slash command routed through
+        ``GatewayRunner._request_slash_confirm``.  Buttons map to the
+        gateway's three choices:
+
+          * "Approve Once"   → ``choice="once"``
+          * "Always Approve" → ``choice="always"``
+          * "Cancel"         → ``choice="cancel"``
+
+        Clicking calls the module-level
+        ``tools.slash_confirm.resolve(session_key, confirm_id, choice)``
+        which runs the handler the runner stored for this ``session_key``.
+        Only users in the adapter's allowlist can click.  Times out after
+        5 minutes (matches the gateway primitive's timeout).
+        """
+
+        def __init__(self, session_key: str, confirm_id: str, allowed_user_ids: set):
+            super().__init__(timeout=300)
+            self.session_key = session_key
+            self.confirm_id = confirm_id
+            self.allowed_user_ids = allowed_user_ids
+            self.resolved = False
+
+        def _check_auth(self, interaction: discord.Interaction) -> bool:
+            if not self.allowed_user_ids:
+                return True
+            return str(interaction.user.id) in self.allowed_user_ids
+
+        async def _resolve(
+            self, interaction: discord.Interaction, choice: str,
+            color: discord.Color, label: str,
+        ):
+            if self.resolved:
+                await interaction.response.send_message(
+                    "This prompt has already been resolved~", ephemeral=True,
+                )
+                return
+            if not self._check_auth(interaction):
+                await interaction.response.send_message(
+                    "You're not authorized to answer this prompt~", ephemeral=True,
+                )
+                return
+
+            self.resolved = True
+
+            embed = interaction.message.embeds[0] if interaction.message.embeds else None
+            if embed:
+                embed.color = color
+                embed.set_footer(text=f"{label} by {interaction.user.display_name}")
+
+            for child in self.children:
+                child.disabled = True
+
+            await interaction.response.edit_message(embed=embed, view=self)
+
+            # Resolve via the module-level primitive.  If the handler
+            # returns a follow-up message, post it in the same channel.
+            try:
+                from tools import slash_confirm as _slash_confirm_mod
+                result_text = await _slash_confirm_mod.resolve(
+                    self.session_key, self.confirm_id, choice,
+                )
+                if result_text:
+                    await interaction.followup.send(result_text)
+                logger.info(
+                    "Discord button resolved slash-confirm for session %s "
+                    "(choice=%s, user=%s)",
+                    self.session_key, choice, interaction.user.display_name,
+                )
+            except Exception as exc:
+                logger.error("Discord slash-confirm resolve failed: %s", exc, exc_info=True)
+
+        @discord.ui.button(label="Approve Once", style=discord.ButtonStyle.green)
+        async def approve_once(
+            self, interaction: discord.Interaction, button: discord.ui.Button,
+        ):
+            await self._resolve(interaction, "once", discord.Color.green(), "Approved once")
+
+        @discord.ui.button(label="Always Approve", style=discord.ButtonStyle.blurple)
+        async def approve_always(
+            self, interaction: discord.Interaction, button: discord.ui.Button,
+        ):
+            await self._resolve(interaction, "always", discord.Color.purple(), "Always approved")
+
+        @discord.ui.button(label="Cancel", style=discord.ButtonStyle.red)
+        async def cancel(
+            self, interaction: discord.Interaction, button: discord.ui.Button,
+        ):
+            await self._resolve(interaction, "cancel", discord.Color.greyple(), "Cancelled")
+
+        async def on_timeout(self):
+            self.resolved = True
+            for child in self.children:
+                child.disabled = True
+
    class UpdatePromptView(discord.ui.View):
        """Interactive Yes/No buttons for ``hermes update`` prompts.

@@ -31,7 +31,7 @@ from email.mime.base import MIMEBase
 from email.utils import formatdate
 from email import encoders
 from pathlib import Path
-from typing import Any, Dict, List, Optional
+from typing import Any, Dict, List, Optional, Tuple

 from gateway.platforms.base import (
    BasePlatformAdapter,
@@ -540,6 +540,113 @@ class EmailAdapter(BasePlatformAdapter):
        text += f"\n\nImage: {image_url}"
        return await self.send(chat_id, text.strip(), reply_to)

+    async def send_multiple_images(
+        self,
+        chat_id: str,
+        images: List[Tuple[str, str]],
+        metadata: Optional[Dict[str, Any]] = None,
+        human_delay: float = 0.0,
+    ) -> None:
+        """Send a batch of images as a single email with multiple MIME attachments.
+
+        Local files are attached directly. URL images have their URL
+        appended to the body (email adapter does not download remote
+        images). No hard cap — email clients handle dozens of
+        attachments fine, subject to SMTP message size limits.
+        """
+        if not images:
+            return
+
+        from urllib.parse import unquote as _unquote
+
+        body_parts: List[str] = []
+        local_paths: List[str] = []
+        for image_url, alt_text in images:
+            if alt_text:
+                body_parts.append(alt_text)
+            if image_url.startswith("file://"):
+                local_path = _unquote(image_url[7:])
+                if Path(local_path).exists():
+                    local_paths.append(local_path)
+                else:
+                    logger.warning("[Email] Skipping missing image: %s", local_path)
+            else:
+                # Remote URLs just get linked in the body (parity with send_image)
+                body_parts.append(f"Image: {image_url}")
+
+        if not local_paths and not body_parts:
+            return
+
+        body = "\n\n".join(body_parts)
+
+        try:
+            loop = asyncio.get_running_loop()
+            await loop.run_in_executor(
+                None,
+                self._send_email_with_attachments,
+                chat_id,
+                body,
+                local_paths,
+            )
+        except Exception as e:
+            logger.error("[Email] Multi-image send failed, falling back: %s", e, exc_info=True)
+            await super().send_multiple_images(chat_id, images, metadata, human_delay)
+
+    def _send_email_with_attachments(
+        self,
+        to_addr: str,
+        body: str,
+        file_paths: List[str],
+    ) -> str:
+        """Send an email with multiple file attachments via SMTP."""
+        msg = MIMEMultipart()
+        msg["From"] = self._address
+        msg["To"] = to_addr
+
+        ctx = self._thread_context.get(to_addr, {})
+        subject = ctx.get("subject", "Hermes Agent")
+        if not subject.startswith("Re:"):
+            subject = f"Re: {subject}"
+        msg["Subject"] = subject
+
+        original_msg_id = ctx.get("message_id")
+        if original_msg_id:
+            msg["In-Reply-To"] = original_msg_id
+            msg["References"] = original_msg_id
+
+        msg["Date"] = formatdate(localtime=True)
+        msg_id = f"<hermes-{uuid.uuid4().hex[:12]}@{self._address.split('@')[1]}>"
+        msg["Message-ID"] = msg_id
+
+        if body:
+            msg.attach(MIMEText(body, "plain", "utf-8"))
+
+        for file_path in file_paths:
+            p = Path(file_path)
+            try:
+                with open(p, "rb") as f:
+                    part = MIMEBase("application", "octet-stream")
+                    part.set_payload(f.read())
+                    encoders.encode_base64(part)
+                    part.add_header("Content-Disposition", f"attachment; filename={p.name}")
+                    msg.attach(part)
+            except Exception as e:
+                logger.warning("[Email] Failed to attach %s: %s", file_path, e)
+
+        smtp = smtplib.SMTP(self._smtp_host, self._smtp_port, timeout=30)
+        try:
+            smtp.starttls(context=ssl.create_default_context())
+            smtp.login(self._address, self._password)
+            smtp.send_message(msg)
+        finally:
+            try:
+                smtp.quit()
+            except Exception:
+                smtp.close()
+
+        logger.info("[Email] Sent multi-attachment email to %s (%d files)", to_addr, len(file_paths))
+        return msg_id
+
    async def send_document(
        self,
        chat_id: str,
@@ -64,7 +64,7 @@ from dataclasses import dataclass, field
 from datetime import datetime
 from pathlib import Path
 from types import SimpleNamespace
-from typing import Any, Dict, List, Optional, Sequence
+from typing import Any, Dict, List, Literal, Optional, Sequence
 from urllib.error import HTTPError, URLError
 from urllib.parse import urlencode
 from urllib.request import Request, urlopen
@@ -141,6 +141,7 @@ from gateway.platforms.base import (
 )
 from gateway.status import acquire_scoped_lock, release_scoped_lock
 from hermes_constants import get_hermes_home
+from utils import atomic_json_write

 logger = logging.getLogger(__name__)

@@ -387,6 +388,8 @@ class FeishuAdapterSettings:
    admins: frozenset[str] = frozenset()
    default_group_policy: str = ""
    group_rules: Dict[str, FeishuGroupRule] = field(default_factory=dict)
+    allow_bots: str = "none"  # "none" | "mentions" | "all"
+    require_mention: bool = True


@dataclass
@@ -396,6 +399,7 @@ class FeishuGroupRule:
    policy: str  # "open" | "allowlist" | "blacklist" | "admin_only" | "disabled"
    allowlist: set[str] = field(default_factory=set)
    blacklist: set[str] = field(default_factory=set)
+    require_mention: Optional[bool] = None  # None = inherit global


@dataclass
@@ -405,6 +409,40 @@ class FeishuBatchState:
    counts: Dict[str, int] = field(default_factory=dict)


+# ---------------------------------------------------------------------------
+# Admission: policy types
+# ---------------------------------------------------------------------------
+
+
+RejectReason = Literal[
+    "self_echo",
+    "self_ids_unknown",
+    "bots_disabled",
+    "bot_not_mentioned",
+    "group_policy_rejected",
+]
+
+
+def _is_bot_sender(sender: Any) -> bool:
+    # receive_v1 docs say {user, bot}; accept "app" defensively.
+    return getattr(sender, "sender_type", "") in ("bot", "app")
+
+
+def _sender_identity(sender: Any) -> frozenset:
+    # Take any non-empty id variant — tenant sender_id_type decides which are populated.
+    sid = getattr(sender, "sender_id", None)
+    if sid is None:
+        return frozenset()
+    return frozenset(
+        v for v in (
+            getattr(sid, "open_id", None),
+            getattr(sid, "user_id", None),
+            getattr(sid, "union_id", None),
+        )
+        if v
+    )
+
+
 # ---------------------------------------------------------------------------
 # Markdown rendering helpers
 # ---------------------------------------------------------------------------
@@ -1377,10 +1415,16 @@ class FeishuAdapter(BasePlatformAdapter):
            for chat_id, rule_cfg in raw_group_rules.items():
                if not isinstance(rule_cfg, dict):
                    continue
+                # Only override when the key is explicitly set — missing vs false
+                # must not collapse.
+                per_chat_require_mention: Optional[bool] = None
+                if "require_mention" in rule_cfg:
+                    per_chat_require_mention = _to_boolean(rule_cfg.get("require_mention"))
                group_rules[str(chat_id)] = FeishuGroupRule(
                    policy=str(rule_cfg.get("policy", "open")).strip().lower(),
                    allowlist=set(str(u).strip() for u in rule_cfg.get("allowlist", []) if str(u).strip()),
                    blacklist=set(str(u).strip() for u in rule_cfg.get("blacklist", []) if str(u).strip()),
+                    require_mention=per_chat_require_mention,
                )

        # Bot-level admins
@@ -1390,6 +1434,16 @@ class FeishuAdapter(BasePlatformAdapter):
        # Default group policy (for groups not in group_rules)
        default_group_policy = str(extra.get("default_group_policy", "")).strip().lower()

+        # Env-only so adapter and gateway auth bypass share one source; yaml
+        # feishu.allow_bots is bridged to this env var at config load.
+        allow_bots = os.getenv("FEISHU_ALLOW_BOTS", "none").strip().lower()
+        if allow_bots not in ("none", "mentions", "all"):
+            logger.warning(
+                "[Feishu] Unknown allow_bots=%r, falling back to 'none'. Valid: none, mentions, all.",
+                allow_bots,
+            )
+            allow_bots = "none"
+
        return FeishuAdapterSettings(
            app_id=str(extra.get("app_id") or os.getenv("FEISHU_APP_ID", "")).strip(),
            app_secret=str(extra.get("app_secret") or os.getenv("FEISHU_APP_SECRET", "")).strip(),
@@ -1446,6 +1500,10 @@ class FeishuAdapter(BasePlatformAdapter):
            admins=admins,
            default_group_policy=default_group_policy,
            group_rules=group_rules,
+            allow_bots=allow_bots,
+            require_mention=_to_boolean(
+                extra.get("require_mention", os.getenv("FEISHU_REQUIRE_MENTION", "true"))
+            ),
        )

    def _apply_settings(self, settings: FeishuAdapterSettings) -> None:
@@ -1476,6 +1534,8 @@ class FeishuAdapter(BasePlatformAdapter):
        self._ws_reconnect_interval = settings.ws_reconnect_interval
        self._ws_ping_interval = settings.ws_ping_interval
        self._ws_ping_timeout = settings.ws_ping_timeout
+        self._allow_bots = settings.allow_bots
+        self._require_mention = settings.require_mention

    def _build_event_handler(self) -> Any:
        if EventDispatcherHandler is None:
@@ -2189,30 +2249,28 @@ class FeishuAdapter(BasePlatformAdapter):
        event = getattr(data, "event", None)
        message = getattr(event, "message", None)
        sender = getattr(event, "sender", None)
-        sender_id = getattr(sender, "sender_id", None)
-        if not message or not sender_id:
-            logger.debug("[Feishu] Dropping malformed inbound event: missing message or sender_id")
+        if not message or not sender or not getattr(sender, "sender_id", None):
+            logger.debug("[Feishu] Dropping malformed inbound event: missing message/sender")
            return

        message_id = getattr(message, "message_id", None)
        if not message_id or self._is_duplicate(message_id):
            logger.debug("[Feishu] Dropping duplicate/missing message_id: %s", message_id)
            return
-        if self._is_self_sent_bot_message(event):
-            logger.debug("[Feishu] Dropping self-sent bot event: %s", message_id)
+
+        reason = self._admit(sender, message)
+        if reason is not None:
+            logger.debug("[Feishu] dropping inbound event: %s", reason)
            return

        chat_type = getattr(message, "chat_type", "p2p")
-        chat_id = getattr(message, "chat_id", "") or ""
-        if chat_type != "p2p" and not self._should_accept_group_message(message, sender_id, chat_id):
-            logger.debug("[Feishu] Dropping group message that failed mention/policy gate: %s", message_id)
-            return
        await self._process_inbound_message(
            data=data,
            message=message,
-            sender_id=sender_id,
+            sender_id=getattr(sender, "sender_id", None),
            chat_type=chat_type,
            message_id=message_id,
+            is_bot=_is_bot_sender(sender),
        )

    def _on_message_read_event(self, data: P2ImMessageMessageReadV1) -> None:
@@ -2389,10 +2447,11 @@ class FeishuAdapter(BasePlatformAdapter):
            msg = items[0] if items else None
            if not msg:
                return
+            # GET im/v1/messages returns sender.id=app_id for bot messages —
+            # peer bots and us share sender_type="app" but differ on app_id.
            sender = getattr(msg, "sender", None)
-            sender_type = str(getattr(sender, "sender_type", "") or "").lower()
-            if sender_type != "app":
-                return  # only route reactions on our own bot messages
+            if str(getattr(sender, "id", "") or "") != self._app_id:
+                return  # only route reactions on this bot's own messages
            chat_id = str(getattr(msg, "chat_id", "") or "")
            chat_type_raw = str(getattr(msg, "chat_type", "p2p") or "p2p")
            if not chat_id:
@@ -2679,6 +2738,7 @@ class FeishuAdapter(BasePlatformAdapter):
        sender_id: Any,
        chat_type: str,
        message_id: str,
+        is_bot: bool = False,
    ) -> None:
        text, inbound_type, media_urls, media_types, mentions = await self._extract_message_content(message)

@@ -2704,19 +2764,27 @@ class FeishuAdapter(BasePlatformAdapter):
        )
        reply_to_text = await self._fetch_message_text(reply_to_message_id) if reply_to_message_id else None

+        sender_primary = (
+            getattr(sender_id, "open_id", None)
+            or getattr(sender_id, "user_id", None)
+            or getattr(sender_id, "union_id", None)
+            or "<unknown>"
+        )
        logger.info(
-            "[Feishu] Inbound %s message received: id=%s type=%s chat_id=%s text=%r media=%d",
+            "[Feishu] Inbound %s message received: id=%s type=%s chat_id=%s sender=%s:%s text=%r media=%d",
            "dm" if chat_type == "p2p" else "group",
            message_id,
            inbound_type.value,
            getattr(message, "chat_id", "") or "",
+            "bot" if is_bot else "user",
+            sender_primary,
            text[:120],
            len(media_urls),
        )

        chat_id = getattr(message, "chat_id", "") or ""
        chat_info = await self.get_chat_info(chat_id)
-        sender_profile = await self._resolve_sender_profile(sender_id)
+        sender_profile = await self._resolve_sender_profile(sender_id, is_bot=is_bot)
        source = self.build_source(
            chat_id=chat_id,
            chat_name=chat_info.get("name") or chat_id or "Feishu Chat",
@@ -2725,6 +2793,7 @@ class FeishuAdapter(BasePlatformAdapter):
            user_name=sender_profile["user_name"],
            thread_id=getattr(message, "thread_id", None) or None,
            user_id_alt=sender_profile["user_id_alt"],
+            is_bot=is_bot,
        )
        normalized = MessageEvent(
            text=text,
@@ -3447,7 +3516,12 @@ class FeishuAdapter(BasePlatformAdapter):
            return "dm"
        return "group"

-    async def _resolve_sender_profile(self, sender_id: Any) -> Dict[str, Optional[str]]:
+    async def _resolve_sender_profile(
+        self,
+        sender_id: Any,
+        *,
+        is_bot: bool = False,
+    ) -> Dict[str, Optional[str]]:
        """Map Feishu's three-tier user IDs onto Hermes' SessionSource fields.

        Preference order for the primary ``user_id`` field:
@@ -3464,7 +3538,11 @@ class FeishuAdapter(BasePlatformAdapter):
        union_id = getattr(sender_id, "union_id", None) or None
        # Prefer tenant-scoped user_id; fall back to app-scoped open_id.
        primary_id = user_id or open_id
-        display_name = await self._resolve_sender_name_from_api(primary_id or union_id)
+        # bot/v3/bots/basic_batch only accepts open_id.
+        name_lookup_id = open_id if is_bot else (primary_id or union_id)
+        display_name = await self._resolve_sender_name_from_api(
+            name_lookup_id, is_bot=is_bot,
+        )
        return {
            "user_id": primary_id,
            "user_name": display_name,
@@ -3484,11 +3562,14 @@ class FeishuAdapter(BasePlatformAdapter):
        self._sender_name_cache.pop(sender_id, None)
        return None

-    async def _resolve_sender_name_from_api(self, sender_id: Optional[str]) -> Optional[str]:
-        """Fetch the sender's display name from the Feishu contact API with a 10-minute cache.
-
-        ID-type detection mirrors openclaw: ou_ → open_id, on_ → union_id, else user_id.
-        Failures are silently suppressed; the message pipeline must not block on name resolution.
+    async def _resolve_sender_name_from_api(
+        self,
+        sender_id: Optional[str],
+        *,
+        is_bot: bool = False,
+    ) -> Optional[str]:
+        """Bots divert to bot/basic_batch — contact API doesn't return bot names.
+        Failures are silent so the pipeline never blocks on name resolution.
        """
        if not sender_id or not self._client:
            return None
@@ -3498,7 +3579,16 @@ class FeishuAdapter(BasePlatformAdapter):
        now = time.time()
        cached_name = self._get_cached_sender_name(trimmed)
        if cached_name is not None:
-            return cached_name
+            return cached_name or None  # "" cached means "known nameless"
+        if is_bot:
+            names = await self._fetch_bot_names([trimmed])
+            if names is None:
+                return None
+            expire_at = now + _FEISHU_SENDER_NAME_TTL_SECONDS
+            for oid, name in names.items():
+                self._sender_name_cache[oid] = (name, expire_at)
+            hit = self._sender_name_cache.get(trimmed)
+            return (hit[0] or None) if hit else None
        try:
            from lark_oapi.api.contact.v3 import GetUserRequest  # lazy import
            if trimmed.startswith("ou_"):
@@ -3527,6 +3617,35 @@ class FeishuAdapter(BasePlatformAdapter):
            logger.debug("[Feishu] Failed to resolve sender name for %s", sender_id, exc_info=True)
        return None

+    async def _fetch_bot_names(self, bot_ids: List[str]) -> Optional[Dict[str, str]]:
+        if not self._client or not bot_ids:
+            return None
+        try:
+            req = (
+                BaseRequest.builder()
+                .http_method(HttpMethod.GET)
+                .uri("/open-apis/bot/v3/bots/basic_batch")
+                .queries([("bot_ids", oid) for oid in bot_ids])
+                .token_types({AccessTokenType.TENANT})
+                .build()
+            )
+            resp = await asyncio.to_thread(self._client.request, req)
+            content = getattr(getattr(resp, "raw", None), "content", None)
+            if not content:
+                return None
+            payload = json.loads(content)
+            if payload.get("code") != 0:
+                return None
+            bots = (payload.get("data") or {}).get("bots") or {}
+            return {
+                oid: str(info.get("name") or "").strip()
+                for oid, info in bots.items()
+                if oid
+            }
+        except Exception:
+            logger.debug("[Feishu] Failed to fetch bot names for %s", bot_ids, exc_info=True)
+            return None
+
    async def _fetch_message_text(self, message_id: str) -> Optional[str]:
        if not self._client or not message_id:
            return None
@@ -3590,10 +3709,60 @@ class FeishuAdapter(BasePlatformAdapter):
            logger.exception("[Feishu] Background inbound processing failed")

    # =========================================================================
-    # Group policy and mention gating
+    # Inbound admission
    # =========================================================================

-    def _allow_group_message(self, sender_id: Any, chat_id: str = "") -> bool:
+    def _admit(self, sender: Any, message: Any) -> Optional[RejectReason]:
+        sender_ids = _sender_identity(sender)
+        self_ids = frozenset(v for v in (self._bot_open_id, self._bot_user_id) if v)
+        is_bot = _is_bot_sender(sender)
+        is_group = getattr(message, "chat_type", "p2p") != "p2p"
+        chat_id = getattr(message, "chat_id", "") or ""
+        require_mention = is_group and self._require_mention_for(chat_id)
+
+        # Defensive only — Feishu doesn't echo our outbound back as inbound,
+        # and open_id is always populated on both sides.
+        if self_ids and sender_ids & self_ids:
+            return "self_echo"
+
+        if is_bot:
+            mode = self._allow_bots
+            if mode != "mentions" and mode != "all":
+                return "bots_disabled"
+            # Defensive: pre-hydration or malformed payloads.
+            if not self_ids or not sender_ids:
+                return "self_ids_unknown"
+            # Step 4 covers mention enforcement for groups when require_mention
+            # is on; check here only on paths step 4 won't reach.
+            if mode == "mentions" and not require_mention and not self._mentions_self(message):
+                return "bot_not_mentioned"
+
+        if not is_group:
+            return None
+
+        if not self._allow_group_message(
+            getattr(sender, "sender_id", None), chat_id, is_bot=is_bot,
+        ):
+            return "group_policy_rejected"
+        if require_mention and not self._mentions_self(message):
+            return "group_policy_rejected"
+        return None
+
+    def _require_mention_for(self, chat_id: str) -> bool:
+        rule = self._group_rules.get(chat_id) if chat_id else None
+        if rule and rule.require_mention is not None:
+            return rule.require_mention
+        return self._require_mention
+
+    # --- Group policy ---------------------------------------------------------
+
+    def _allow_group_message(
+        self,
+        sender_id: Any,
+        chat_id: str = "",
+        *,
+        is_bot: bool = False,
+    ) -> bool:
        """Per-group policy gate for non-DM traffic."""
        sender_open_id = getattr(sender_id, "open_id", None)
        sender_user_id = getattr(sender_id, "user_id", None)
@@ -3612,12 +3781,17 @@ class FeishuAdapter(BasePlatformAdapter):
            allowlist = self._allowed_group_users
            blacklist = set()

+        # Channel locks apply to everyone; allowlist/blacklist only gate humans
+        # (bots were already cleared upstream by FEISHU_ALLOW_BOTS).
        if policy == "disabled":
            return False
        if policy == "open":
            return True
        if policy == "admin_only":
            return False
+        if is_bot:
+            return True
+
        if policy == "allowlist":
            return bool(sender_ids and (sender_ids & allowlist))
        if policy == "blacklist":
@@ -3625,17 +3799,16 @@ class FeishuAdapter(BasePlatformAdapter):

        return bool(sender_ids and (sender_ids & self._allowed_group_users))

-    def _should_accept_group_message(self, message: Any, sender_id: Any, chat_id: str = "") -> bool:
-        """Require an explicit @mention before group messages enter the agent."""
-        if not self._allow_group_message(sender_id, chat_id):
-            return False
-        # @_all is Feishu's @everyone placeholder — always route to the bot.
+    # --- Mention detection ----------------------------------------------------
+
+    def _mentions_self(self, message: Any) -> bool:
+        # @_all is Feishu's @everyone placeholder.
        raw_content = getattr(message, "content", "") or ""
        if "@_all" in raw_content:
            return True
        mentions = getattr(message, "mentions", None) or []
-        if mentions:
-            return self._message_mentions_bot(mentions)
+        if mentions and self._message_mentions_bot(mentions):
+            return True
        normalized = normalize_feishu_message(
            message_type=getattr(message, "message_type", "") or "",
            raw_content=raw_content,
@@ -3644,23 +3817,6 @@ class FeishuAdapter(BasePlatformAdapter):
        )
        return self._post_mentions_bot(normalized.mentions)

-    def _is_self_sent_bot_message(self, event: Any) -> bool:
-        """Return True only for Feishu events emitted by this Hermes bot."""
-        sender = getattr(event, "sender", None)
-        sender_type = str(getattr(sender, "sender_type", "") or "").strip().lower()
-        if sender_type not in {"bot", "app"}:
-            return False
-
-        sender_id = getattr(sender, "sender_id", None)
-        sender_open_id = str(getattr(sender_id, "open_id", "") or "").strip()
-        sender_user_id = str(getattr(sender_id, "user_id", "") or "").strip()
-
-        if self._bot_open_id and sender_open_id == self._bot_open_id:
-            return True
-        if self._bot_user_id and sender_user_id == self._bot_user_id:
-            return True
-        return False
-
    def _message_mentions_bot(self, mentions: List[Any]) -> bool:
        # IDs trump names: when both sides have open_id (or both user_id),
        # match requires equal IDs. Name fallback only when either side
@@ -3804,7 +3960,7 @@ class FeishuAdapter(BasePlatformAdapter):
            recent = self._seen_message_order[-self._dedup_cache_size:]
            # Save as {msg_id: timestamp} so TTL filtering works across restarts.
            payload = {"message_ids": {k: self._seen_message_ids[k] for k in recent if k in self._seen_message_ids}}
-            self._dedup_state_path.write_text(json.dumps(payload, ensure_ascii=False), encoding="utf-8")
+            atomic_json_write(self._dedup_state_path, payload, indent=None)
        except OSError:
            logger.warning("[Feishu] Failed to persist dedup state to %s", self._dedup_state_path, exc_info=True)

@@ -13,6 +13,8 @@ import time
 from pathlib import Path
 from typing import TYPE_CHECKING, Dict

+from utils import atomic_json_write
+
 if TYPE_CHECKING:
    from gateway.platforms.base import MessageEvent

@@ -237,12 +239,11 @@ class ThreadParticipationTracker:

    def _save(self) -> None:
        path = self._state_path()
-        path.parent.mkdir(parents=True, exist_ok=True)
        thread_list = list(self._threads)
        if len(thread_list) > self._max_tracked:
            thread_list = thread_list[-self._max_tracked:]
            self._threads = set(thread_list)
-        path.write_text(json.dumps(thread_list), encoding="utf-8")
+        atomic_json_write(path, thread_list, indent=None)

    def mark(self, thread_id: str) -> None:
        """Mark *thread_id* as participated and persist."""
@@ -19,7 +19,7 @@ import logging
 import os
 import re
 from pathlib import Path
-from typing import Any, Dict, List, Optional
+from typing import Any, Dict, List, Optional, Tuple

 from gateway.config import Platform, PlatformConfig
 from gateway.platforms.helpers import MessageDeduplicator
@@ -496,6 +496,100 @@ class MattermostAdapter(BasePlatformAdapter):
            return SendResult(success=False, error="Failed to post with file")
        return SendResult(success=True, message_id=data["id"])

+    async def send_multiple_images(
+        self,
+        chat_id: str,
+        images: List[Tuple[str, str]],
+        metadata: Optional[Dict[str, Any]] = None,
+        human_delay: float = 0.0,
+    ) -> None:
+        """Send a batch of images as a single Mattermost post with multiple attachments.
+
+        Mattermost supports up to 5 ``file_ids`` per post. Each image is
+        uploaded individually (Mattermost's file API is one-at-a-time),
+        then a single post is created referencing all uploaded file_ids
+        at once. Batches larger than 5 are chunked. Falls back to the
+        base per-image loop on total failure.
+        """
+        if not images:
+            return
+
+        import mimetypes
+        import aiohttp
+        from urllib.parse import unquote as _unquote
+
+        CHUNK = 5  # Mattermost post file_ids cap
+        chunks = [images[i:i + CHUNK] for i in range(0, len(images), CHUNK)]
+
+        for chunk_idx, chunk in enumerate(chunks):
+            if human_delay > 0 and chunk_idx > 0:
+                await asyncio.sleep(human_delay)
+
+            file_ids: List[str] = []
+            caption_parts: List[str] = []
+            try:
+                for image_url, alt_text in chunk:
+                    if alt_text:
+                        caption_parts.append(alt_text)
+
+                    if image_url.startswith("file://"):
+                        local_path = _unquote(image_url[7:])
+                        p = Path(local_path)
+                        if not p.exists():
+                            logger.warning("Mattermost: skipping missing image %s", local_path)
+                            continue
+                        fname = p.name
+                        ct = mimetypes.guess_type(fname)[0] or "image/png"
+                        file_data = p.read_bytes()
+                    else:
+                        from tools.url_safety import is_safe_url
+                        if not is_safe_url(image_url):
+                            logger.warning("Mattermost: blocked unsafe image URL in batch")
+                            continue
+                        try:
+                            async with self._session.get(
+                                image_url, timeout=aiohttp.ClientTimeout(total=30)
+                            ) as resp:
+                                if resp.status >= 400:
+                                    logger.warning(
+                                        "Mattermost: failed to download image (HTTP %d): %s",
+                                        resp.status, image_url[:80],
+                                    )
+                                    continue
+                                file_data = await resp.read()
+                                ct = resp.content_type or "image/png"
+                        except Exception as dl_err:
+                            logger.warning("Mattermost: download failed for %s: %s", image_url[:80], dl_err)
+                            continue
+                        fname = image_url.rsplit("/", 1)[-1].split("?")[0] or f"image_{len(file_ids)}.png"
+
+                    fid = await self._upload_file(chat_id, file_data, fname, ct)
+                    if fid:
+                        file_ids.append(fid)
+
+                if not file_ids:
+                    continue
+
+                payload: Dict[str, Any] = {
+                    "channel_id": chat_id,
+                    "message": "\n".join(caption_parts),
+                    "file_ids": file_ids,
+                }
+                logger.info(
+                    "Mattermost: sending %d image(s) as single post (chunk %d/%d)",
+                    len(file_ids), chunk_idx + 1, len(chunks),
+                )
+                data = await self._api_post("posts", payload)
+                if not data or "id" not in data:
+                    logger.warning("Mattermost: multi-image post failed, falling back")
+                    await super().send_multiple_images(chat_id, chunk, metadata, human_delay=human_delay)
+            except Exception as e:
+                logger.warning(
+                    "Mattermost: multi-image send failed (chunk %d/%d), falling back: %s",
+                    chunk_idx + 1, len(chunks), e, exc_info=True,
+                )
+                await super().send_multiple_images(chat_id, chunk, metadata, human_delay=human_delay)
+
    # ------------------------------------------------------------------
    # WebSocket
    # ------------------------------------------------------------------
@@ -976,6 +976,18 @@ class QQAdapter(BasePlatformAdapter):
        if not channel_id:
            return

+        # Apply group_policy ACL — guild channels are group-like contexts.
+        # Without this check any member of any guild the bot is in could
+        # bypass the configured allowlist.
+        guild_id = str(d.get("guild_id", ""))
+        author_id = str(author.get("id", ""))
+        if not self._is_group_allowed(guild_id or channel_id, author_id):
+            logger.debug(
+                "[%s] Guild message blocked by ACL: channel=%s user=%s",
+                self._log_tag, channel_id, author_id,
+            )
+            return
+
        member = d.get("member") if isinstance(d.get("member"), dict) else {}
        nick = str(member.get("nick", "")) or str(author.get("username", ""))

@@ -1032,6 +1044,17 @@ class QQAdapter(BasePlatformAdapter):
        if not guild_id:
            return

+        # Apply dm_policy ACL — guild DMs were previously unauthenticated.
+        # Without this check any member of any guild the bot is in could
+        # bypass the configured allowlist via direct messages.
+        author_id = str(author.get("id", ""))
+        if not self._is_dm_allowed(author_id):
+            logger.debug(
+                "[%s] Guild DM blocked by ACL: guild=%s user=%s",
+                self._log_tag, guild_id, author_id,
+            )
+            return
+
        text = content
        att_result = await self._process_attachments(d.get("attachments"))
        image_urls = att_result["image_urls"]
@@ -21,7 +21,7 @@ import time
 import uuid
 from datetime import datetime, timezone
 from pathlib import Path
-from typing import Dict, List, Optional, Any
+from typing import Any, Dict, List, Optional, Tuple
 from urllib.parse import quote, unquote

 import httpx
@@ -31,6 +31,7 @@ from gateway.platforms.base import (
    BasePlatformAdapter,
    MessageEvent,
    MessageType,
+    ProcessingOutcome,
    SendResult,
    cache_image_from_bytes,
    cache_audio_from_bytes,
@@ -38,6 +39,17 @@ from gateway.platforms.base import (
    cache_image_from_url,
 )
 from gateway.platforms.helpers import redact_phone
+from gateway.platforms.signal_rate_limit import (
+    SIGNAL_BATCH_PACING_NOTICE_THRESHOLD,
+    SIGNAL_MAX_ATTACHMENTS_PER_MSG,
+    SIGNAL_RATE_LIMIT_MAX_ATTEMPTS,
+    SignalRateLimitError,
+    _extract_retry_after_seconds,
+    _format_wait,
+    _is_signal_rate_limit_error,
+    _signal_send_timeout,
+    get_scheduler,
+)

 logger = logging.getLogger(__name__)

@@ -52,6 +64,7 @@ SSE_RETRY_DELAY_MAX = 60.0
 HEALTH_CHECK_INTERVAL = 30.0  # seconds between health checks
 HEALTH_CHECK_STALE_THRESHOLD = 120.0  # seconds without SSE activity before concern

+
 # ---------------------------------------------------------------------------
 # Helpers
 # ---------------------------------------------------------------------------
@@ -162,6 +175,10 @@ class SignalAdapter(BasePlatformAdapter):
    """Signal messenger adapter using signal-cli HTTP daemon."""

    platform = Platform.SIGNAL
+    # Signal has no real edit API for already-sent messages. Mark it explicitly
+    # so streaming suppresses the visible cursor instead of leaving a stale tofu
+    # square behind in chat clients when edit attempts fail.
+    SUPPORTS_MESSAGE_EDITING = False

    def __init__(self, config: PlatformConfig):
        super().__init__(config, Platform.SIGNAL)
@@ -488,6 +505,11 @@ class SignalAdapter(BasePlatformAdapter):
        if text and mentions:
            text = _render_mentions(text, mentions)

+        # Extract quote (reply-to) context from Signal dataMessage
+        quote_data = data_message.get("quote") or {}
+        reply_to_id = str(quote_data.get("id")) if quote_data.get("id") else None
+        reply_to_text = quote_data.get("text")
+
        # Process attachments
        attachments_data = data_message.get("attachments", [])
        media_urls = []
@@ -512,6 +534,18 @@ class SignalAdapter(BasePlatformAdapter):
                except Exception:
                    logger.exception("Signal: failed to fetch attachment %s", att_id)

+        # Skip envelopes with no meaningful content (no text, no attachments).
+        # Catches profile key updates, empty messages, and other metadata-only
+        # envelopes that still carry a dataMessage wrapper but have nothing
+        # worth processing. See issue: signal-cli logs "Profile key update" +
+        # Hermes receives msg='' triggering a full agent turn for nothing.
+        if (not text or not text.strip()) and not media_urls:
+            logger.debug(
+                "Signal: skipping contentless envelope from %s (%d attachments)",
+                redact_phone(sender), len(media_urls) if media_urls else 0,
+            )
+            return
+
        # Build session source
        source = self.build_source(
            chat_id=chat_id,
@@ -541,7 +575,9 @@ class SignalAdapter(BasePlatformAdapter):
        else:
            timestamp = datetime.now(tz=timezone.utc)

-        # Build and dispatch event
+        # Build and dispatch event.
+        # Store raw envelope data in raw_message so on_processing_start/complete
+        # can extract targetAuthor + targetTimestamp for sendReaction.
        event = MessageEvent(
            source=source,
            text=text or "",
@@ -549,6 +585,9 @@ class SignalAdapter(BasePlatformAdapter):
            media_urls=media_urls,
            media_types=media_types,
            timestamp=timestamp,
+            raw_message={"sender": sender, "timestamp_ms": ts_ms},
+            reply_to_message_id=reply_to_id,
+            reply_to_text=reply_to_text,
        )

        logger.debug("Signal: message from %s in %s: %s",
@@ -659,6 +698,8 @@ class SignalAdapter(BasePlatformAdapter):
        rpc_id: str = None,
        *,
        log_failures: bool = True,
+        raise_on_rate_limit: bool = False,
+        timeout: float = 30.0,
    ) -> Any:
        """Send a JSON-RPC 2.0 request to signal-cli daemon.

@@ -667,6 +708,11 @@ class SignalAdapter(BasePlatformAdapter):
        repeated NETWORK_FAILURE spam for unreachable recipients while
        still preserving visibility for the first occurrence and for
        unrelated RPCs.
+
+        When ``raise_on_rate_limit=True``, a Signal ``[429]`` /
+        ``RateLimitException`` response raises ``SignalRateLimitError``
+        instead of being swallowed — lets callers (multi-attachment send)
+        opt into backoff-retry without changing default behaviour.
        """
        if not self.client:
            logger.warning("Signal: RPC called but client not connected")
@@ -686,20 +732,28 @@ class SignalAdapter(BasePlatformAdapter):
            resp = await self.client.post(
                f"{self.http_url}/api/v1/rpc",
                json=payload,
-                timeout=30.0,
+                timeout=timeout,
            )
            resp.raise_for_status()
            data = resp.json()

            if "error" in data:
+                err = data["error"]
+                if raise_on_rate_limit:
+                    if _is_signal_rate_limit_error(err):
+                        err_msg = str(err.get("message", "")) if isinstance(err, dict) else str(err)
+                        retry_after = _extract_retry_after_seconds(err)
+                        raise SignalRateLimitError(err_msg, retry_after=retry_after)
                if log_failures:
-                    logger.warning("Signal RPC error (%s): %s", method, data["error"])
+                    logger.warning("Signal RPC error (%s): %s", method, err)
                else:
-                    logger.debug("Signal RPC error (%s): %s", method, data["error"])
+                    logger.debug("Signal RPC error (%s): %s", method, err)
                return None

            return data.get("result")

+        except SignalRateLimitError:
+            raise
        except Exception as e:
            if log_failures:
                logger.warning("Signal RPC %s failed: %s", method, e)
@@ -707,6 +761,159 @@ class SignalAdapter(BasePlatformAdapter):
                logger.debug("Signal RPC %s failed: %s", method, e)
            return None

+    # ------------------------------------------------------------------
+    # Formatting — markdown → Signal body ranges
+    # ------------------------------------------------------------------
+
+    @staticmethod
+    def _markdown_to_signal(text: str) -> tuple:
+        """Convert markdown to plain text + Signal textStyles list.
+
+        Signal doesn't render markdown.  Instead it uses ``bodyRanges``
+        (exposed by signal-cli as ``textStyle`` / ``textStyles`` params)
+        with the format ``start:length:STYLE``.
+
+        Positions are measured in **UTF-16 code units** (not Python code
+        points) because that's what the Signal protocol uses.
+
+        Supported styles: BOLD, ITALIC, STRIKETHROUGH, MONOSPACE.
+        (Signal's SPOILER style is not currently mapped — no standard
+        markdown syntax for it; would need ``||spoiler||`` parsing.)
+
+        Returns ``(plain_text, styles_list)`` where *styles_list* may be
+        empty if there's nothing to format.
+        """
+        import re
+
+        def _utf16_len(s: str) -> int:
+            """Length of *s* in UTF-16 code units."""
+            return len(s.encode("utf-16-le")) // 2
+
+        # Pre-process: normalize whitespace before any position tracking
+        # so later operations don't invalidate recorded offsets.
+        text = re.sub(r"\n{3,}", "\n\n", text)
+        text = text.strip()
+
+        styles: list = []
+
+        # --- Phase 1: fenced code blocks  ```...``` → MONOSPACE ---
+        _CB = re.compile(r"```[a-zA-Z0-9_+-]*\n?(.*?)```", re.DOTALL)
+        while m := _CB.search(text):
+            inner = m.group(1).rstrip("\n")
+            start = m.start()
+            text = text[: m.start()] + inner + text[m.end() :]
+            styles.append((start, len(inner), "MONOSPACE"))
+
+        # --- Phase 2: heading markers  # Foo → Foo (BOLD) ---
+        _HEADING = re.compile(r"^#{1,6}\s+", re.MULTILINE)
+        new_text = ""
+        last_end = 0
+        for m in _HEADING.finditer(text):
+            new_text += text[last_end : m.start()]
+            last_end = m.end()
+            eol = text.find("\n", m.end())
+            if eol == -1:
+                eol = len(text)
+            heading_text = text[m.end() : eol]
+            start = len(new_text)
+            new_text += heading_text
+            styles.append((start, len(heading_text), "BOLD"))
+            last_end = eol
+        new_text += text[last_end:]
+        text = new_text
+
+        # --- Phase 3: inline patterns (single-pass to avoid offset drift) ---
+        # The old code processed each pattern sequentially, stripping markers
+        # and recording positions per-pass.  Later passes shifted text without
+        # adjusting earlier positions → bold/italic landed mid-word.
+        #
+        # Fix: collect ALL non-overlapping matches first, then strip every
+        # marker in one pass so positions are computed against the final text.
+        _PATTERNS = [
+            (re.compile(r"\*\*(.+?)\*\*", re.DOTALL), "BOLD"),
+            (re.compile(r"__(.+?)__", re.DOTALL), "BOLD"),
+            (re.compile(r"~~(.+?)~~", re.DOTALL), "STRIKETHROUGH"),
+            (re.compile(r"`(.+?)`"), "MONOSPACE"),
+            (re.compile(r"(?<!\*)\*(?!\*| )(.+?)(?<!\*)\*(?!\*)"), "ITALIC"),
+            (re.compile(r"(?<!\w)_(?!_)(.+?)(?<!_)_(?!\w)"), "ITALIC"),
+        ]
+
+        # Collect all non-overlapping matches (earlier patterns win ties).
+        all_matches: list = []  # (start, end, g1_start, g1_end, style)
+        occupied: list = []     # (start, end) intervals already claimed
+        for pat, style in _PATTERNS:
+            for m in pat.finditer(text):
+                ms, me = m.start(), m.end()
+                if not any(ms < oe and me > os for os, oe in occupied):
+                    all_matches.append((ms, me, m.start(1), m.end(1), style))
+                    occupied.append((ms, me))
+        all_matches.sort()
+
+        # Build removal list so we can adjust Phase 1/2 styles.
+        # Each match removes its prefix markers (start..g1_start) and
+        # suffix markers (g1_end..end).
+        removals: list = []  # (position, length) sorted
+        for ms, me, g1s, g1e, _ in all_matches:
+            if g1s > ms:
+                removals.append((ms, g1s - ms))
+            if me > g1e:
+                removals.append((g1e, me - g1e))
+        removals.sort()
+
+        # Adjust Phase 1/2 styles for characters about to be removed.
+        def _adj(pos: int) -> int:
+            shift = 0
+            for rp, rl in removals:
+                if rp < pos:
+                    shift += min(rl, pos - rp)
+                else:
+                    break
+            return pos - shift
+
+        adjusted_prior: list = []
+        for s, l, st in styles:
+            ns = _adj(s)
+            ne = _adj(s + l)
+            if ne > ns:
+                adjusted_prior.append((ns, ne - ns, st))
+
+        # Strip all inline markers in one pass → positions are correct.
+        result = ""
+        last_end = 0
+        inline_styles: list = []
+        for ms, me, g1s, g1e, sty in all_matches:
+            result += text[last_end:ms]
+            pos = len(result)
+            inner = text[g1s:g1e]
+            result += inner
+            inline_styles.append((pos, len(inner), sty))
+            last_end = me
+        result += text[last_end:]
+        text = result
+
+        styles = adjusted_prior + inline_styles
+
+        # Convert code-point offsets → UTF-16 code-unit offsets
+        style_strings = []
+        for cp_start, cp_len, stype in sorted(styles):
+            # Safety: skip any out-of-bounds styles
+            if cp_start < 0 or cp_start + cp_len > len(text):
+                continue
+            u16_start = _utf16_len(text[:cp_start])
+            u16_len = _utf16_len(text[cp_start : cp_start + cp_len])
+            style_strings.append(f"{u16_start}:{u16_len}:{stype}")
+
+        return text, style_strings
+
+    def format_message(self, content: str) -> str:
+        """Strip markdown for plain-text fallback (used by base class).
+
+        The actual rich formatting happens in send() via _markdown_to_signal().
+        """
+        # This is only called if someone uses the base-class send path.
+        # Our send() override bypasses this entirely.
+        return content
+
    # ------------------------------------------------------------------
    # Sending
    # ------------------------------------------------------------------
@@ -718,14 +925,22 @@ class SignalAdapter(BasePlatformAdapter):
        reply_to: Optional[str] = None,
        metadata: Optional[Dict[str, Any]] = None,
    ) -> SendResult:
-        """Send a text message."""
+        """Send a text message with native Signal formatting."""
        await self._stop_typing_indicator(chat_id)

+        plain_text, text_styles = self._markdown_to_signal(content)
+
        params: Dict[str, Any] = {
            "account": self.account,
-            "message": content,
+            "message": plain_text,
        }

+        if text_styles:
+            if len(text_styles) == 1:
+                params["textStyle"] = text_styles[0]
+            else:
+                params["textStyles"] = text_styles
+
        if chat_id.startswith("group:"):
            params["groupId"] = chat_id[6:]
        else:
@@ -735,11 +950,10 @@ class SignalAdapter(BasePlatformAdapter):

        if result is not None:
            self._track_sent_timestamp(result)
-            # Use the timestamp from the RPC result as a pseudo message_id.
-            # Signal doesn't have real message IDs, but the stream consumer
-            # needs a truthy value to follow its edit→fallback path correctly.
-            _msg_id = str(result.get("timestamp", "")) if isinstance(result, dict) else None
-            return SendResult(success=True, message_id=_msg_id or None)
+            # Signal has no editable message identifier. Returning None keeps the
+            # stream consumer on the non-edit fallback path instead of pretending
+            # future edits can remove an in-progress cursor from the chat thread.
+            return SendResult(success=True, message_id=None)
        return SendResult(success=False, error="RPC send failed")

    def _track_sent_timestamp(self, rpc_result) -> None:
@@ -803,6 +1017,178 @@ class SignalAdapter(BasePlatformAdapter):
            self._typing_failures.pop(chat_id, None)
            self._typing_skip_until.pop(chat_id, None)

+    async def send_multiple_images(
+        self,
+        chat_id: str,
+        images: List[Tuple[str, str]],
+        metadata: Optional[Dict[str, Any]] = None,
+        human_delay: float = 0.0,
+    ) -> None:
+        """Send a batch of images via chunked Signal RPC calls.
+
+        Per-image alt texts are dropped — Signal's send RPC only carries
+        one shared message body. Bad images (download failure, missing
+        file, oversize) are skipped with a warning so one bad URL
+        doesn't lose the rest of the batch. ``human_delay`` is ignored:
+        the rate-limit scheduler handles inter-batch pacing.
+        """
+        if not images:
+            return
+
+        scheduler = get_scheduler()
+        logger.info(
+            "Signal send_multiple_images: received %d image(s) for %s — "
+            "scheduler state: %s",
+            len(images), chat_id[:30], scheduler.state(),
+        )
+
+        await self._stop_typing_indicator(chat_id)
+
+        attachments: List[str] = []
+        skipped_download = 0
+        skipped_missing = 0
+        skipped_oversize = 0
+        for image_url, _alt_text in images:
+            if image_url.startswith("file://"):
+                file_path = unquote(image_url[7:])
+            else:
+                try:
+                    file_path = await cache_image_from_url(image_url)
+                except Exception as e:
+                    logger.warning("Signal: failed to download image %s: %s", image_url, e)
+                    skipped_download += 1
+                    continue
+
+            if not file_path or not Path(file_path).exists():
+                logger.warning("Signal: image file not found for %s", image_url)
+                skipped_missing += 1
+                continue
+
+            file_size = Path(file_path).stat().st_size
+            if file_size > SIGNAL_MAX_ATTACHMENT_SIZE:
+                logger.warning(
+                    "Signal: image too large (%d bytes), skipping %s", file_size, image_url
+                )
+                skipped_oversize += 1
+                continue
+
+            attachments.append(file_path)
+
+        if not attachments:
+            logger.error(
+                "Signal: no valid images in batch of %d "
+                "(download=%d missing=%d oversize=%d)",
+                len(images), skipped_download, skipped_missing, skipped_oversize,
+            )
+            return
+
+        logger.info(
+            "Signal send_multiple_images: %d/%d images valid, sending in chunks",
+            len(attachments), len(images),
+        )
+
+        base_params: Dict[str, Any] = {
+            "account": self.account,
+            "message": "",
+        }
+        if chat_id.startswith("group:"):
+            base_params["groupId"] = chat_id[6:]
+        else:
+            base_params["recipient"] = [await self._resolve_recipient(chat_id)]
+
+        att_batches = [
+            attachments[i:i + SIGNAL_MAX_ATTACHMENTS_PER_MSG]
+            for i in range(0, len(attachments), SIGNAL_MAX_ATTACHMENTS_PER_MSG)
+        ]
+
+        for idx, att_batch in enumerate(att_batches):
+            n = len(att_batch)
+            estimated = scheduler.estimate_wait(n)
+            logger.debug(
+                "Signal batch %d/%d: %d attachments, estimated wait=%.1fs",
+                idx + 1, len(att_batches), n, estimated,
+            )
+            if estimated >= SIGNAL_BATCH_PACING_NOTICE_THRESHOLD:
+                await self._notify_batch_pacing(
+                    chat_id, idx + 1, len(att_batches), estimated
+                )
+
+            params = dict(base_params, attachments=att_batch)
+            send_timeout = _signal_send_timeout(n)
+
+            for attempt in range(1, SIGNAL_RATE_LIMIT_MAX_ATTEMPTS + 1):
+                await scheduler.acquire(n)
+                try:
+                    _rpc_t0 = time.monotonic()
+                    result = await self._rpc(
+                        "send", params, raise_on_rate_limit=True, timeout=send_timeout,
+                    )
+                    _rpc_duration = time.monotonic() - _rpc_t0
+                    if result is not None:
+                        self._track_sent_timestamp(result)
+                        await scheduler.report_rpc_duration(_rpc_duration, n)
+                        logger.info(
+                            "Signal batch %d/%d: %d attachments sent in %.1fs "
+                            "(attempt %d/%d)",
+                            idx + 1, len(att_batches), n, _rpc_duration,
+                            attempt, SIGNAL_RATE_LIMIT_MAX_ATTEMPTS,
+                        )
+                    else:
+                        # Assume the server didn't accept the batch, don't deduce tokens
+                        logger.error(
+                            "Signal: RPC send failed for batch %d/%d (%d attachments, "
+                            "attempt %d/%d, rpc_duration=%.1fs)",
+                            idx + 1, len(att_batches), n,
+                            attempt, SIGNAL_RATE_LIMIT_MAX_ATTEMPTS,
+                            _rpc_duration,
+                        )
+                        # Retry transient (non-rate-limit) failures once
+                        if attempt < SIGNAL_RATE_LIMIT_MAX_ATTEMPTS:
+                            backoff = 2.0 ** attempt
+                            logger.info(
+                                "Signal: retrying batch %d/%d after %.1fs backoff",
+                                idx + 1, len(att_batches), backoff,
+                            )
+                            await asyncio.sleep(backoff)
+                            continue
+                    break
+                except SignalRateLimitError as e:
+                    scheduler.feedback(e.retry_after, n)
+                    if attempt >= SIGNAL_RATE_LIMIT_MAX_ATTEMPTS:
+                        logger.error(
+                            "Signal: rate-limit retries exhausted on batch %d/%d "
+                            "(%d attachments lost, server retry_after=%s)",
+                            idx + 1, len(att_batches), n,
+                            f"{e.retry_after:.0f}s" if e.retry_after else "unknown",
+                        )
+                        break
+                    logger.warning(
+                        "Signal: rate-limited on batch %d/%d "
+                        "(attempt %d/%d, server retry_after=%s); "
+                        "scheduler will pace the retry",
+                        idx + 1, len(att_batches),
+                        attempt, SIGNAL_RATE_LIMIT_MAX_ATTEMPTS,
+                        f"{e.retry_after:.0f}s" if e.retry_after else "unknown",
+                    )
+
+    async def _notify_batch_pacing(
+        self,
+        chat_id: str,
+        next_batch_idx: int,
+        total_batches: int,
+        wait_s: float,
+    ) -> None:
+        """Inform the user when an inter-batch pacing wait crosses the
+        notice threshold. Best-effort; logs and continues on failure."""
+        try:
+            await self.send(
+                chat_id,
+                f"(More images coming — pausing ~{_format_wait(wait_s)} "
+                f"for Signal rate limit, batch {next_batch_idx}/{total_batches}.)",
+            )
+        except Exception as e:
+            logger.warning("Signal: failed to send pacing notice: %s", e)
+
    async def send_image(
        self,
        chat_id: str,
@@ -963,6 +1349,110 @@ class SignalAdapter(BasePlatformAdapter):
        _keep_typing finally block to clean up platform-level typing tasks."""
        await self._stop_typing_indicator(chat_id)

+    # ------------------------------------------------------------------
+    # Reactions
+    # ------------------------------------------------------------------
+
+    async def send_reaction(
+        self,
+        chat_id: str,
+        emoji: str,
+        target_author: str,
+        target_timestamp: int,
+    ) -> bool:
+        """Send a reaction emoji to a specific message via signal-cli RPC.
+
+        Args:
+            chat_id: The chat (phone number or "group:<id>")
+            emoji: Reaction emoji string (e.g. "👀", "✅")
+            target_author: Phone number / UUID of the message author
+            target_timestamp: Signal timestamp (ms) of the message to react to
+        """
+        params: Dict[str, Any] = {
+            "account": self.account,
+            "emoji": emoji,
+            "targetAuthor": target_author,
+            "targetTimestamp": target_timestamp,
+        }
+
+        if chat_id.startswith("group:"):
+            params["groupId"] = chat_id[6:]
+        else:
+            params["recipient"] = [chat_id]
+
+        result = await self._rpc("sendReaction", params)
+        if result is not None:
+            return True
+        logger.debug("Signal: sendReaction failed (chat=%s, emoji=%s)", chat_id[:20], emoji)
+        return False
+
+    async def remove_reaction(
+        self,
+        chat_id: str,
+        target_author: str,
+        target_timestamp: int,
+    ) -> bool:
+        """Remove a reaction by sending an empty-string emoji."""
+        params: Dict[str, Any] = {
+            "account": self.account,
+            "emoji": "",
+            "targetAuthor": target_author,
+            "targetTimestamp": target_timestamp,
+            "remove": True,
+        }
+
+        if chat_id.startswith("group:"):
+            params["groupId"] = chat_id[6:]
+        else:
+            params["recipient"] = [chat_id]
+
+        result = await self._rpc("sendReaction", params)
+        return result is not None
+
+    # ------------------------------------------------------------------
+    # Processing Lifecycle Hooks (reactions as progress indicators)
+    # ------------------------------------------------------------------
+
+    def _extract_reaction_target(self, event: MessageEvent) -> Optional[tuple]:
+        """Extract (target_author, target_timestamp) from a MessageEvent.
+
+        Returns None if the event doesn't carry the raw Signal envelope data
+        needed for sendReaction.
+        """
+        raw = event.raw_message
+        if not isinstance(raw, dict):
+            return None
+        author = raw.get("sender")
+        ts = raw.get("timestamp_ms")
+        if not author or not ts:
+            return None
+        return (author, ts)
+
+    async def on_processing_start(self, event: MessageEvent) -> None:
+        """React with 👀 when processing begins."""
+        target = self._extract_reaction_target(event)
+        if target:
+            await self.send_reaction(event.source.chat_id, "👀", *target)
+
+    async def on_processing_complete(self, event: MessageEvent, outcome: "ProcessingOutcome") -> None:
+        """Swap the 👀 reaction for ✅ (success) or ❌ (failure).
+
+        On CANCELLED we leave the 👀 in place — no terminal outcome means
+        the reaction should keep reflecting "in progress" (matches Telegram).
+        """
+        if outcome == ProcessingOutcome.CANCELLED:
+            return
+        target = self._extract_reaction_target(event)
+        if not target:
+            return
+        chat_id = event.source.chat_id
+        # Remove the in-progress reaction, then add the final one
+        await self.remove_reaction(chat_id, *target)
+        if outcome == ProcessingOutcome.SUCCESS:
+            await self.send_reaction(chat_id, "✅", *target)
+        elif outcome == ProcessingOutcome.FAILURE:
+            await self.send_reaction(chat_id, "❌", *target)
+
    # ------------------------------------------------------------------
    # Chat Info
    # ------------------------------------------------------------------
@@ -0,0 +1,369 @@
+"""
+Signal attachment rate-limit scheduler.
+
+Process-wide token-bucket simulator that mirrors the per-account
+attachment rate limit signal-cli/Signal-Server enforce. Producers
+(``SignalAdapter.send_multiple_images`` and the ``send_message`` tool's
+Signal path) call ``acquire(n)`` before an attachment send; on a 429
+they call ``feedback(retry_after, n)`` so the model recalibrates from
+the server's authoritative hint.
+
+The scheduler serializes concurrent calls through an ``asyncio.Lock``,
+giving FIFO fairness across agent sessions sharing one signal-cli
+daemon.
+"""
+
+from __future__ import annotations
+
+import asyncio
+import logging
+import re
+import time
+from typing import Any, Optional
+
+logger = logging.getLogger(__name__)
+
+
+# ---------------------------------------------------------------------------
+# Constants
+# ---------------------------------------------------------------------------
+
+SIGNAL_MAX_ATTACHMENTS_PER_MSG = 32  # per-message attachment cap (source: Signal-{Android,Desktop} source code)
+SIGNAL_RATE_LIMIT_BUCKET_CAPACITY = 50  # server-side token-bucket capacity for attachments rate limiting
+SIGNAL_RATE_LIMIT_DEFAULT_RETRY_AFTER = 4  # fallback token refill interval for signal-cli < v0.14.3
+SIGNAL_RATE_LIMIT_MAX_ATTEMPTS = 2  # initial attempt + 1 retry
+SIGNAL_BATCH_PACING_NOTICE_THRESHOLD = 10.0  # if estimated waiting time > 10s, notify the user about the delay
+SIGNAL_RPC_ERROR_RATELIMIT = -5  # signal-cli (v0.14.3+) JSON-RPC error code for RateLimitException
+
+
+# ---------------------------------------------------------------------------
+# Errors
+# ---------------------------------------------------------------------------
+
+class SignalRateLimitError(Exception):
+    """
+    Raised by ``SignalAdapter._rpc`` for rate-limit responses when the
+    caller has opted in via ``raise_on_rate_limit=True``.
+
+    Carries the server-supplied per-token Retry-After (in seconds) on
+    signal-cli ≥ v0.14.3
+    ``retry_after`` is None when the version doesn't expose it.
+    """
+
+    def __init__(self, message: str, retry_after: Optional[float] = None) -> None:
+        super().__init__(message)
+        self.retry_after = retry_after
+
+
+class SignalSchedulerError(Exception):
+    pass
+
+# ---------------------------------------------------------------------------
+# Detection helpers — used to fish a 429 out of signal-cli's various error
+# shapes (typed code, [429] substring, libsignal-net RetryLaterException
+# leaked through AttachmentInvalidException).
+# ---------------------------------------------------------------------------
+
+# "Retry after 4 seconds" / "retry after 4 second" — libsignal-net's
+# RetryLaterException string form, surfaced when 429s hit during
+# attachment upload (signal-cli wraps these as AttachmentInvalidException
+# rather than RateLimitException, so the typed path doesn't fire).
+_RETRY_AFTER_RE = re.compile(r"Retry after (\d+(?:\.\d+)?)\s*second", re.IGNORECASE)
+
+
+def _extract_retry_after_seconds(err: Any) -> Optional[float]:
+    """Pull the per-token Retry-After window from a signal-cli rate-limit error.
+
+    Tries two sources, in order:
+    1. ``error.data.response.results[*].retryAfterSeconds`` — the
+       structured field signal-cli ≥ v0.14.3 surfaces for plain
+       RateLimitException.
+    2. ``"Retry after N seconds"`` parsed out of the message — covers
+       libsignal-net's RetryLaterException that gets wrapped as
+       AttachmentInvalidException during attachment upload, where the
+       structured field stays null.
+
+    Returns None when neither yields a value.
+    """
+    msg = ""
+    if isinstance(err, dict):
+        data = err.get("data") or {}
+        response = data.get("response") or {}
+        results = response.get("results") or []
+        candidates = [
+            r.get("retryAfterSeconds") for r in results
+            if isinstance(r, dict) and r.get("retryAfterSeconds")
+        ]
+        if candidates:
+            return float(max(candidates))
+        msg = str(err.get("message", ""))
+    else:
+        msg = str(err)
+    match = _RETRY_AFTER_RE.search(msg)
+    return float(match.group(1)) if match else None
+
+
+def _is_signal_rate_limit_error(err: Any) -> bool:
+    """True if a signal-cli RPC error reflects a rate-limit failure.
+
+    Matches three layers:
+    - typed ``RATELIMIT_ERROR`` code (signal-cli ≥ v0.14.3, plain
+      RateLimitException)
+    - legacy ``[429] / RateLimitException`` substrings
+    - libsignal-net's ``RetryLaterException`` / ``Retry after N seconds``
+      surfaced inside ``AttachmentInvalidException`` when the rate
+      limit is hit during attachment upload — signal-cli never re-tags
+      these as RateLimitException, so substring is the only signal.
+    """
+    if isinstance(err, dict) and err.get("code") == SIGNAL_RPC_ERROR_RATELIMIT:
+        return True
+
+    message = (
+        str(err.get("message", ""))
+        if isinstance(err, dict)
+        else str(err)
+    )
+    msg_lower = message.lower()
+    return (
+        "[429]" in message
+        or "ratelimit" in msg_lower
+        or "retrylaterexception" in msg_lower
+        or "retry after" in msg_lower
+    )
+
+
+# ---------------------------------------------------------------------------
+# Misc helpers
+# ---------------------------------------------------------------------------
+
+def _format_wait(seconds: float) -> str:
+    """Human-friendly wait label for user-facing pacing notices."""
+    s = max(0.0, seconds)
+    if s < 90:
+        return f"{int(round(s))}s"
+    return f"{max(1, int(round(s / 60)))} min"
+
+
+def _signal_send_timeout(num_attachments: int) -> float:
+    """HTTP timeout for a Signal ``send`` RPC.
+
+    signal-cli uploads attachments serially during the call, so the
+    server-side time scales with batch size. Default 30s is fine for
+    text-only sends but truncates large attachment batches mid-upload —
+    we then log a phantom failure even though signal-cli completes the
+    send a few seconds later. Scale at 5s/attachment with a 60s floor.
+    """
+    if num_attachments <= 0:
+        return 30.0
+    return max(60.0, 5.0 * num_attachments)
+
+
+# ---------------------------------------------------------------------------
+# Scheduler
+# ---------------------------------------------------------------------------
+
+class SignalAttachmentScheduler:
+    """Process-wide token-bucket simulator for Signal attachment sends.
+
+    The bucket holds up to ``capacity`` tokens (default 50, matching
+    Signal's server-side rate-limit bucket size). Each attachment consumes one
+    token. Tokens refill at ``refill_rate`` tokens/second, calibrated
+    from the per-token Retry-After hint we get from the server when a
+    429 fires. Until we've observed one, we use the documented default
+    (1 token / 4 seconds).
+
+    Concurrent ``acquire(n)`` calls serialize through an
+    ``asyncio.Lock`` — natural FIFO across agent sessions hitting the
+    same daemon.
+    """
+
+    def __init__(
+        self,
+        capacity: float = float(SIGNAL_RATE_LIMIT_BUCKET_CAPACITY),
+        default_retry_after: float = float(SIGNAL_RATE_LIMIT_DEFAULT_RETRY_AFTER),
+    ) -> None:
+        self.capacity = float(capacity)
+        self.tokens = float(capacity)
+        self.refill_rate = 1.0 / float(default_retry_after)
+        self.last_refill = time.monotonic()
+        self._lock = asyncio.Lock()
+
+    # ------------------------------------------------------------------
+    # Internals
+    # ------------------------------------------------------------------
+
+    def _refill(self) -> None:
+        now = time.monotonic()
+        elapsed = now - self.last_refill
+        if elapsed > 0 and self.tokens < self.capacity:
+            self.tokens = min(self.capacity, self.tokens + elapsed * self.refill_rate)
+        self.last_refill = now
+
+    # ------------------------------------------------------------------
+    # Public API
+    # ------------------------------------------------------------------
+
+    def estimate_wait(self, n: int) -> float:
+        """Best-effort estimate of the seconds until ``n`` tokens would
+        be available. Used to decide whether to emit a user-facing
+        pacing notice *before* committing to an ``acquire`` that may
+        block silently. Lock-free; small races vs. concurrent acquires
+        are benign for an informational notice.
+        """
+        now = time.monotonic()
+        elapsed = now - self.last_refill
+        projected = self.tokens
+        if elapsed > 0 and projected < self.capacity:
+            projected = min(self.capacity, projected + elapsed * self.refill_rate)
+        deficit = n - projected
+        if deficit <= 0:
+            return 0.0
+        return deficit / self.refill_rate
+
+    async def acquire(self, n: int) -> float:
+        """Block until at least ``n`` tokens are available, return the
+        seconds slept.
+
+        Does **not** deduct tokens — the bucket is a read-only model of
+        server-side capacity.  Call ``report_rpc_duration()`` after the
+        RPC to synchronise the model with the server timeline.
+
+        Not perfect in case lots of coroutines try to acquire for big
+        uploads (``report_rpc_duration`` will take a long time to get hit)
+        but this is just a simulation. Signal server is ground truth and
+        will raise rate-limit exceptions triggering requeues.
+
+        The lock is released during ``asyncio.sleep`` so other callers
+        can interleave.  A retry loop re-checks after each sleep in
+        case the deadline was pessimistic.
+        """
+        if n <= 0:
+            return 0.0
+        if n > self.capacity:
+            raise SignalSchedulerError(
+                f"Signal scheduler was called requesting {n} tokens "
+                f"(max is {self.capacity})",
+            )
+
+        total_slept = 0.0
+        first_pass = True
+        while True:
+            async with self._lock:
+                self._refill()
+                if self.tokens >= n:
+                    if not first_pass or total_slept > 0:
+                        logger.debug(
+                            "Signal scheduler: tokens sufficient for %d "
+                            "(remaining=%.1f, total_slept=%.1fs)",
+                            n, self.tokens, total_slept,
+                        )
+                    return total_slept
+                deficit = n - self.tokens
+            wait = deficit / self.refill_rate
+            if first_pass:
+                logger.info(
+                    "Signal scheduler: pausing %.1fs for %d tokens "
+                    "(available=%.1f, deficit=%.1f, refill=%.4f/s ≈ %.1fs/token)",
+                    wait, n, self.tokens, deficit,
+                    self.refill_rate, 1.0 / self.refill_rate,
+                )
+                first_pass = False
+            await asyncio.sleep(wait)
+            total_slept += wait
+
+    async def report_rpc_duration(self, rpc_duration: float, n_attachments: int) -> None:
+        """Record an attachment-send RPC that just completed.
+
+        Deducts ``n_attachments`` tokens without crediting refill during
+        the upload window. Signal's server checks the bucket at RPC start
+        and does *not* refill during request processing — refill resumes
+        after the response. Crediting upload-time refill causes cumulative
+        drift that eventually triggers 429s.
+
+        Advances ``last_refill`` so the next ``acquire`` / ``_refill``
+        starts counting from this point.
+        """
+        if n_attachments <= 0:
+            return
+
+        async with self._lock:
+            now = time.monotonic()
+            token_before = self.tokens
+            self.tokens = max(0.0, token_before - float(n_attachments))
+            self.last_refill = now
+        logger.log(
+            logging.INFO if rpc_duration > 10 and n_attachments > 5 else logging.DEBUG,
+            "Signal scheduler: RPC for %d att took %.1fs — "
+            "tokens %.1f → %.1f (deducted=%d, no upload refill credited, refill=%.4fs⁻¹)",
+            n_attachments, rpc_duration,
+            token_before, self.tokens,
+            n_attachments, self.refill_rate,
+        )
+
+    def feedback(self, retry_after: Optional[float], n_attempted: int) -> None:
+        """Apply server feedback after a 429.
+
+        ``retry_after`` is the per-*token* refill window the server
+        reports (None when signal-cli is older than v0.14.3 and didn't
+        surface it).
+
+        When present we calibrate ``refill_rate`` from it:
+        the server is authoritative.
+        """
+        if retry_after and retry_after > 0:
+            new_rate = 1.0 / float(retry_after)
+            if new_rate != self.refill_rate:
+                logger.info(
+                    "Signal scheduler: calibrating refill_rate to %.4f tokens/sec "
+                    "(server retry_after=%.1fs per token)",
+                    new_rate, retry_after,
+                )
+                self.refill_rate = new_rate
+        self.tokens = 0.0
+        self.last_refill = time.monotonic()
+
+    def state(self) -> dict:
+        """Return current scheduler state for diagnostic logging (read-only).
+
+        Does not advance ``last_refill`` — safe to call from logging paths
+        without perturbing the bucket.
+        """
+        now = time.monotonic()
+        elapsed = now - self.last_refill
+        projected = self.tokens
+        if elapsed > 0 and projected < self.capacity:
+            projected = min(self.capacity, projected + elapsed * self.refill_rate)
+        return {
+            "tokens": round(projected, 1),
+            "capacity": int(self.capacity),
+            "refill_rate": round(self.refill_rate, 4),
+            "refill_seconds_per_token": round(1.0 / self.refill_rate, 1) if self.refill_rate > 0 else float("inf"),
+        }
+
+
+# ---------------------------------------------------------------------------
+# Process-wide singleton
+# ---------------------------------------------------------------------------
+
+_scheduler: Optional[SignalAttachmentScheduler] = None
+
+
+def get_scheduler() -> SignalAttachmentScheduler:
+    """Return the process-wide scheduler, creating it on first access."""
+    global _scheduler
+    if _scheduler is None:
+        _scheduler = SignalAttachmentScheduler()
+        logger.info(
+            "Signal scheduler: created (capacity=%d tokens, refill=%.4f/s ≈ %.1fs/token)",
+            int(_scheduler.capacity),
+            _scheduler.refill_rate,
+            1.0 / _scheduler.refill_rate,
+        )
+    return _scheduler
+
+
+def _reset_scheduler() -> None:
+    """Drop the cached scheduler so the next ``get_scheduler`` call
+    builds a fresh one. Test-only — never call from production paths."""
+    global _scheduler
+    _scheduler = None
@@ -9,6 +9,7 @@ Uses slack-bolt (Python) with Socket Mode for:
 """

 import asyncio
+import contextvars
 import json
 import logging
 import os
@@ -21,6 +22,7 @@ try:
    from slack_bolt.async_app import AsyncApp
    from slack_bolt.adapter.socket_mode.async_handler import AsyncSocketModeHandler
    from slack_sdk.web.async_client import AsyncWebClient
+    import aiohttp
    SLACK_AVAILABLE = True
 except ImportError:
    SLACK_AVAILABLE = False
@@ -50,6 +52,16 @@ from gateway.platforms.base import (

 logger = logging.getLogger(__name__)

+# ContextVar carrying the user_id of the slash-command invoker.
+# Set in _handle_slash_command, read in send() to match the correct
+# stashed response_url when multiple users issue commands on the same
+# channel concurrently.  ContextVars propagate to child asyncio.Tasks
+# (Python 3.7+), so the value set in _handle_slash_command's task is
+# visible in _process_message_background's child task.
+_slash_user_id: contextvars.ContextVar[Optional[str]] = contextvars.ContextVar(
+    "_slash_user_id", default=None,
+)
+

@dataclass
 class _ThreadContextCache:
@@ -310,6 +322,11 @@ class SlackAdapter(BasePlatformAdapter):
        # Track active assistant thread status indicators so stop_typing can
        # clear them (chat_id → thread_ts).
        self._active_status_threads: Dict[str, str] = {}
+        # Slash-command contexts: stash response_url + user_id so send()
+        # can route the first reply ephemerally.  Keyed by
+        # (channel_id, user_id) to avoid cross-user collisions.
+        # Each value: {"response_url": str, "ts": float}
+        self._slash_command_contexts: Dict[Tuple[str, str], Dict[str, Any]] = {}

    def _describe_slack_api_error(self, response: Any, *, file_obj: Optional[Dict[str, Any]] = None) -> Optional[str]:
        """Convert Slack API auth/permission failures into actionable user-facing text."""
@@ -368,6 +385,103 @@ class SlackAdapter(BasePlatformAdapter):
            )
        return None

+    # ------------------------------------------------------------------
+    # Slash-command ephemeral helpers
+    # ------------------------------------------------------------------
+
+    _SLASH_CTX_TTL = 120.0  # seconds — response_url is valid for 30 min;
+    # we use a much shorter TTL to avoid routing unrelated messages
+    # as ephemeral if the command handler was slow or dropped.
+
+    def _pop_slash_context(
+        self, chat_id: str,
+    ) -> Optional[Dict[str, Any]]:
+        """Return and remove the slash-command context for *chat_id*, if fresh.
+
+        Contexts older than ``_SLASH_CTX_TTL`` seconds are silently discarded.
+
+        Uses the ``_slash_user_id`` ContextVar (set in ``_handle_slash_command``)
+        to match the exact ``(channel_id, user_id)`` key.  This prevents a
+        concurrent slash command from a different user on the same channel from
+        stealing another user's ephemeral context.  Falls back to a
+        channel-only scan when the ContextVar is unset (e.g. send() called
+        from a non-slash code path — should not match anything).
+        """
+        now = time.monotonic()
+        # Clean up stale entries on every lookup — dict is small.
+        stale_keys = [
+            k for k, v in self._slash_command_contexts.items()
+            if now - v["ts"] > self._SLASH_CTX_TTL
+        ]
+        for k in stale_keys:
+            self._slash_command_contexts.pop(k, None)
+
+        # Precise match: (channel_id, user_id) from ContextVar.
+        uid = _slash_user_id.get()
+        if uid:
+            return self._slash_command_contexts.pop((chat_id, uid), None)
+
+        # Fallback: channel-only scan (only reachable when ContextVar is
+        # unset, i.e. send() called outside a slash-command async context).
+        match_key = None
+        for key in list(self._slash_command_contexts):
+            if key[0] == chat_id:
+                match_key = key
+                break
+        if match_key is None:
+            return None
+        return self._slash_command_contexts.pop(match_key)
+
+    async def _send_slash_ephemeral(
+        self,
+        ctx: Dict[str, Any],
+        content: str,
+    ) -> "SendResult":
+        """Replace the initial ephemeral ack via ``response_url``.
+
+        Slack's ``response_url`` accepts a POST with ``replace_original``
+        for up to 30 minutes after the slash command was invoked.  This
+        lets us swap the "Running /cmd…" placeholder with the real reply,
+        and the message stays ephemeral ("Only visible to you").
+
+        Falls back to a simple ``True`` SendResult if the POST fails —
+        the user already saw the initial ack, so a delivery failure here
+        is non-critical.
+        """
+        formatted = self.format_message(content)
+        # Slack's response_url has the same ~40k char limit as chat_postMessage.
+        # Truncate to MAX_MESSAGE_LENGTH and use only the first chunk — the
+        # response_url replaces a single ephemeral ack, so multi-chunk isn't
+        # possible.  Long responses are rare for command replies.
+        chunks = self.truncate_message(formatted, self.MAX_MESSAGE_LENGTH)
+        text = chunks[0] if chunks else formatted
+        payload = {
+            "response_type": "ephemeral",
+            "replace_original": True,
+            "text": text,
+        }
+        try:
+            async with aiohttp.ClientSession() as session:
+                async with session.post(
+                    ctx["response_url"],
+                    json=payload,
+                    timeout=aiohttp.ClientTimeout(total=10),
+                ) as resp:
+                    if resp.status == 200:
+                        return SendResult(success=True, message_id=None)
+                    body = await resp.text()
+                    logger.warning(
+                        "[Slack] response_url POST returned %s: %s",
+                        resp.status,
+                        body[:200],
+                    )
+        except Exception as e:
+            logger.warning(
+                "[Slack] response_url POST failed: %s", e,
+            )
+        # Non-fatal — the user saw the initial ack already.
+        return SendResult(success=True, message_id=None)
+
    async def connect(self) -> bool:
        """Connect to Slack via Socket Mode."""
        if not SLACK_AVAILABLE:
@@ -446,12 +560,16 @@ class SlackAdapter(BasePlatformAdapter):
            async def handle_message_event(event, say):
                await self._handle_slack_message(event)

-            # Acknowledge app_mention events to prevent Bolt 404 errors.
-            # The "message" handler above already processes @mentions in
-            # channels, so this is intentionally a no-op to avoid duplicates.
+            # Handle app_mention explicitly. In some Slack app configurations,
+            # channel mentions arrive only as app_mention events rather than the
+            # generic message event. Forward them into the normal message
+            # pipeline so @mentions reliably produce replies.
+            # NOTE: when Slack fires BOTH message and app_mention for the same
+            # @mention, they share the same event ts — the dedup in
+            # _handle_slack_message (MessageDeduplicator) suppresses the second.
            @self._app.event("app_mention")
            async def handle_app_mention(event, say):
-                pass
+                await self._handle_slack_message(event)

            # File lifecycle events can arrive around snippet uploads even when
            # the actual user message is what we care about. Ack them so Slack
@@ -502,7 +620,11 @@ class SlackAdapter(BasePlatformAdapter):

            @self._app.command(_slash_pattern)
            async def handle_hermes_command(ack, command):
-                await ack()
+                slash = (command.get("command") or "").lstrip("/")
+                await ack(
+                    response_type="ephemeral",
+                    text=f"Running `/{slash}`…",
+                )
                await self._handle_slash_command(command)

            # Register Block Kit action handlers for approval buttons
@@ -514,6 +636,15 @@ class SlackAdapter(BasePlatformAdapter):
            ):
                self._app.action(_action_id)(self._handle_approval_action)

+            # Register Block Kit action handlers for slash-confirm buttons
+            # (generic three-option prompts; see tools/slash_confirm.py).
+            for _action_id in (
+                "hermes_confirm_once",
+                "hermes_confirm_always",
+                "hermes_confirm_cancel",
+            ):
+                self._app.action(_action_id)(self._handle_slash_confirm_action)
+
            # Start Socket Mode handler in background
            self._handler = AsyncSocketModeHandler(self._app, app_token, proxy=proxy_url)
            _apply_slack_proxy(self._handler.client, proxy_url)
@@ -565,6 +696,17 @@ class SlackAdapter(BasePlatformAdapter):
            return SendResult(success=False, error="Not connected")

        try:
+            # Check for a pending slash-command context.  When the user ran a
+            # native slash command (e.g. /q, /stop, /model), the initial ack
+            # already showed an ephemeral "Running /cmd…" message.  If we have
+            # a stashed response_url for this channel, replace that ack with
+            # the actual command reply ephemerally instead of posting publicly.
+            slash_ctx = self._pop_slash_context(chat_id)
+            if slash_ctx:
+                return await self._send_slash_ephemeral(
+                    slash_ctx, content,
+                )
+
            # Convert standard markdown → Slack mrkdwn
            formatted = self.format_message(content)

@@ -592,6 +734,10 @@ class SlackAdapter(BasePlatformAdapter):

                last_result = await self._get_client(chat_id).chat_postMessage(**kwargs)

+            # Clear Slack Assistant status as soon as the final message is posted.
+            if thread_ts:
+                await self.stop_typing(chat_id)
+
            # Track the sent message ts so we can auto-respond to thread
            # replies without requiring @mention.
            sent_ts = last_result.get("ts") if last_result else None
@@ -615,6 +761,42 @@ class SlackAdapter(BasePlatformAdapter):
            logger.error("[Slack] Send error: %s", e, exc_info=True)
            return SendResult(success=False, error=str(e))

+    async def send_private_notice(
+        self,
+        chat_id: str,
+        user_id: str,
+        content: str,
+        reply_to: Optional[str] = None,
+        metadata: Optional[Dict[str, Any]] = None,
+    ) -> SendResult:
+        """Send a Slack ephemeral message visible only to one user."""
+        if not self._app:
+            return SendResult(success=False, error="Not connected")
+        if not chat_id or not user_id:
+            return SendResult(success=False, error="chat_id and user_id are required")
+
+        try:
+            formatted = self.format_message(content)
+            thread_ts = self._resolve_thread_ts(reply_to, metadata)
+            kwargs = {
+                "channel": chat_id,
+                "user": user_id,
+                "text": formatted,
+                "mrkdwn": True,
+            }
+            if thread_ts:
+                kwargs["thread_ts"] = thread_ts
+
+            result = await self._get_client(chat_id).chat_postEphemeral(**kwargs)
+            return SendResult(
+                success=True,
+                message_id=result.get("message_ts") or result.get("ts"),
+                raw_response=result,
+            )
+        except Exception as e:  # pragma: no cover - defensive logging
+            logger.error("[Slack] Ephemeral send error: %s", e, exc_info=True)
+            return SendResult(success=False, error=str(e))
+
    async def edit_message(
        self,
        chat_id: str,
@@ -633,6 +815,8 @@ class SlackAdapter(BasePlatformAdapter):
                ts=message_id,
                text=formatted,
            )
+            if finalize:
+                await self.stop_typing(chat_id)
            return SendResult(success=True, message_id=message_id)
        except Exception as e:  # pragma: no cover - defensive logging
            logger.error(
@@ -673,7 +857,7 @@ class SlackAdapter(BasePlatformAdapter):
            # in an assistant-enabled context. Falls back to reactions.
            logger.debug("[Slack] assistant.threads.setStatus failed: %s", e)

-    async def stop_typing(self, chat_id: str) -> None:
+    async def stop_typing(self, chat_id: str, metadata=None) -> None:
        """Clear the assistant thread status indicator."""
        if not self._app:
            return
@@ -783,6 +967,111 @@ class SlackAdapter(BasePlatformAdapter):

        raise last_exc

+    async def send_multiple_images(
+        self,
+        chat_id: str,
+        images: List[Tuple[str, str]],
+        metadata: Optional[Dict[str, Any]] = None,
+        human_delay: float = 0.0,
+    ) -> None:
+        """Send a batch of images as a single Slack message with multiple file uploads.
+
+        Uses ``files_upload_v2`` with its ``file_uploads`` parameter so all
+        images show up attached to one ``initial_comment`` message instead
+        of N separate messages. Falls back to the base per-image loop on
+        any failure.
+
+        The batch limit is 10 file uploads per call (Slack server-side cap).
+        """
+        if not self._app:
+            return
+        if not images:
+            return
+
+        try:
+            import httpx as _httpx
+            from urllib.parse import unquote as _unquote
+            from tools.url_safety import is_safe_url as _is_safe_url
+        except Exception:
+            await super().send_multiple_images(chat_id, images, metadata, human_delay)
+            return
+
+        thread_ts = self._resolve_thread_ts(None, metadata)
+
+        CHUNK = 10
+        chunks = [images[i:i + CHUNK] for i in range(0, len(images), CHUNK)]
+
+        for chunk_idx, chunk in enumerate(chunks):
+            if human_delay > 0 and chunk_idx > 0:
+                await asyncio.sleep(human_delay)
+
+            file_uploads: List[Dict[str, Any]] = []
+            initial_comment_parts: List[str] = []
+            try:
+                async with _httpx.AsyncClient(timeout=30.0, follow_redirects=True) as http_client:
+                    for image_url, alt_text in chunk:
+                        if alt_text:
+                            initial_comment_parts.append(alt_text)
+
+                        if image_url.startswith("file://"):
+                            local_path = _unquote(image_url[7:])
+                            if not os.path.exists(local_path):
+                                logger.warning("[Slack] Skipping missing image: %s", local_path)
+                                continue
+                            file_uploads.append({
+                                "file": local_path,
+                                "filename": os.path.basename(local_path),
+                            })
+                        else:
+                            if not _is_safe_url(image_url):
+                                logger.warning("[Slack] Blocked unsafe image URL in batch")
+                                continue
+                            try:
+                                response = await http_client.get(image_url)
+                                response.raise_for_status()
+                                ext = "png"
+                                ct = response.headers.get("content-type", "")
+                                if "jpeg" in ct or "jpg" in ct:
+                                    ext = "jpg"
+                                elif "gif" in ct:
+                                    ext = "gif"
+                                elif "webp" in ct:
+                                    ext = "webp"
+                                file_uploads.append({
+                                    "content": response.content,
+                                    "filename": f"image_{len(file_uploads)}.{ext}",
+                                })
+                            except Exception as dl_err:
+                                logger.warning(
+                                    "[Slack] Download failed for %s: %s",
+                                    safe_url_for_log(image_url), dl_err,
+                                )
+                                continue
+
+                if not file_uploads:
+                    continue
+
+                initial_comment = "\n".join(initial_comment_parts) if initial_comment_parts else ""
+                logger.info(
+                    "[Slack] Sending %d image(s) in single files_upload_v2 (chunk %d/%d)",
+                    len(file_uploads), chunk_idx + 1, len(chunks),
+                )
+                result = await self._get_client(chat_id).files_upload_v2(
+                    channel=chat_id,
+                    file_uploads=file_uploads,
+                    initial_comment=initial_comment,
+                    thread_ts=thread_ts,
+                )
+                self._record_uploaded_file_thread(chat_id, thread_ts)
+                _ = result
+            except Exception as e:
+                logger.warning(
+                    "[Slack] Multi-image files_upload_v2 failed (chunk %d/%d), falling back to per-image: %s",
+                    chunk_idx + 1, len(chunks), e,
+                    exc_info=True,
+                )
+                await super().send_multiple_images(chat_id, chunk, metadata, human_delay=human_delay)
+
    def _record_uploaded_file_thread(self, chat_id: str, thread_ts: Optional[str]) -> None:
        """Treat successful file uploads as bot participation in a thread."""
        if not thread_ts:
@@ -855,7 +1144,7 @@ class SlackAdapter(BasePlatformAdapter):
            return _ph(f'<{url}|{label}>')

        text = re.sub(
-            r'\[([^\]]+)\]\(([^()]*(?:\([^()]*\)[^()]*)*)\)',
+            r'(?<!!)\[([^\]]+)\]\(([^()]*(?:\([^()]*\)[^()]*)*)\)',
            _convert_markdown_link,
            text,
        )
@@ -902,9 +1191,11 @@ class SlackAdapter(BasePlatformAdapter):
        )

        # 10) Convert italic: _text_ stays as _text_ (already Slack italic)
-        #     Single *text* → _text_ (Slack italic)
+        #     Single *text* → _text_ (Slack italic), but only when the
+        #     emphasized text touches non-whitespace on both sides so literal
+        #     delimiters like "a * b * c" are preserved.
        text = re.sub(
-            r'(?<!\*)\*([^*\n]+)\*(?!\*)',
+            r'(?<!\*)\*(\S(?:[^*\n]*?\S)?)\*(?!\*)',
            lambda m: _ph(f'_{m.group(1)}_'),
            text,
        )
@@ -1931,6 +2222,168 @@ class SlackAdapter(BasePlatformAdapter):
            logger.error("[Slack] send_exec_approval failed: %s", e, exc_info=True)
            return SendResult(success=False, error=str(e))

+    async def send_slash_confirm(
+        self, chat_id: str, title: str, message: str, session_key: str,
+        confirm_id: str, metadata: Optional[Dict[str, Any]] = None,
+    ) -> SendResult:
+        """Send a Block Kit three-option slash-command confirmation prompt."""
+        if not self._app:
+            return SendResult(success=False, error="Not connected")
+
+        try:
+            body = message[:2900] + "..." if len(message) > 2900 else message
+            thread_ts = self._resolve_thread_ts(None, metadata)
+            # Encode session_key and confirm_id into the button value so the
+            # callback handler can resolve without extra bookkeeping.
+            value = f"{session_key}|{confirm_id}"
+
+            blocks = [
+                {
+                    "type": "section",
+                    "text": {
+                        "type": "mrkdwn",
+                        "text": f"*{title or 'Confirm'}*\n\n{body}",
+                    },
+                },
+                {
+                    "type": "actions",
+                    "elements": [
+                        {
+                            "type": "button",
+                            "text": {"type": "plain_text", "text": "Approve Once"},
+                            "style": "primary",
+                            "action_id": "hermes_confirm_once",
+                            "value": value,
+                        },
+                        {
+                            "type": "button",
+                            "text": {"type": "plain_text", "text": "Always Approve"},
+                            "action_id": "hermes_confirm_always",
+                            "value": value,
+                        },
+                        {
+                            "type": "button",
+                            "text": {"type": "plain_text", "text": "Cancel"},
+                            "style": "danger",
+                            "action_id": "hermes_confirm_cancel",
+                            "value": value,
+                        },
+                    ],
+                },
+            ]
+
+            kwargs: Dict[str, Any] = {
+                "channel": chat_id,
+                "text": f"{title or 'Confirm'}: {body[:100]}",
+                "blocks": blocks,
+            }
+            if thread_ts:
+                kwargs["thread_ts"] = thread_ts
+
+            result = await self._get_client(chat_id).chat_postMessage(**kwargs)
+            return SendResult(success=True, message_id=result.get("ts", ""), raw_response=result)
+        except Exception as e:
+            logger.error("[Slack] send_slash_confirm failed: %s", e, exc_info=True)
+            return SendResult(success=False, error=str(e))
+
+    async def _handle_slash_confirm_action(self, ack, body, action) -> None:
+        """Handle a slash-confirm button click from Block Kit."""
+        await ack()
+
+        action_id = action.get("action_id", "")
+        value = action.get("value", "")
+        message = body.get("message", {})
+        msg_ts = message.get("ts", "")
+        channel_id = body.get("channel", {}).get("id", "")
+        user_name = body.get("user", {}).get("name", "unknown")
+        user_id = body.get("user", {}).get("id", "")
+
+        # Authorization — reuse the exec-approval allowlist.
+        allowed_csv = os.getenv("SLACK_ALLOWED_USERS", "").strip()
+        if allowed_csv:
+            allowed_ids = {uid.strip() for uid in allowed_csv.split(",") if uid.strip()}
+            if "*" not in allowed_ids and user_id not in allowed_ids:
+                logger.warning(
+                    "[Slack] Unauthorized slash-confirm click by %s (%s) — ignoring",
+                    user_name, user_id,
+                )
+                return
+
+        # Parse session_key|confirm_id back out
+        if "|" not in value:
+            logger.warning("[Slack] Malformed slash-confirm value: %s", value)
+            return
+        session_key, confirm_id = value.split("|", 1)
+
+        choice_map = {
+            "hermes_confirm_once": "once",
+            "hermes_confirm_always": "always",
+            "hermes_confirm_cancel": "cancel",
+        }
+        choice = choice_map.get(action_id, "cancel")
+
+        label_map = {
+            "once": f"✅ Approved once by {user_name}",
+            "always": f"🔒 Always approved by {user_name}",
+            "cancel": f"❌ Cancelled by {user_name}",
+        }
+        decision_text = label_map.get(choice, f"Resolved by {user_name}")
+
+        # Pull original prompt body out of the section block so we can show
+        # the decision inline without losing context.
+        original_text = ""
+        for block in message.get("blocks", []):
+            if block.get("type") == "section":
+                original_text = block.get("text", {}).get("text", "")
+                break
+
+        updated_blocks = [
+            {
+                "type": "section",
+                "text": {
+                    "type": "mrkdwn",
+                    "text": original_text or "Confirmation prompt",
+                },
+            },
+            {
+                "type": "context",
+                "elements": [
+                    {"type": "mrkdwn", "text": decision_text},
+                ],
+            },
+        ]
+
+        try:
+            await self._get_client(channel_id).chat_update(
+                channel=channel_id,
+                ts=msg_ts,
+                text=decision_text,
+                blocks=updated_blocks,
+            )
+        except Exception as e:
+            logger.warning("[Slack] Failed to update slash-confirm message: %s", e)
+
+        # Resolve via the module-level primitive and post any follow-up.
+        try:
+            from tools import slash_confirm as _slash_confirm_mod
+            result_text = await _slash_confirm_mod.resolve(session_key, confirm_id, choice)
+            if result_text:
+                post_kwargs: Dict[str, Any] = {
+                    "channel": channel_id,
+                    "text": result_text,
+                }
+                # Inherit the thread so the reply stays in the same place.
+                thread_ts = message.get("thread_ts") or msg_ts
+                if thread_ts:
+                    post_kwargs["thread_ts"] = thread_ts
+                await self._get_client(channel_id).chat_postMessage(**post_kwargs)
+            logger.info(
+                "Slack button resolved slash-confirm for session %s (choice=%s, user=%s)",
+                session_key, choice, user_name,
+            )
+        except Exception as exc:
+            logger.error("Failed to resolve slash-confirm from Slack button: %s", exc, exc_info=True)
+
    async def _handle_approval_action(self, ack, body, action) -> None:
        """Handle an approval button click from Block Kit."""
        await ack()
@@ -2248,9 +2701,14 @@ class SlackAdapter(BasePlatformAdapter):
            # gateway command dispatcher by prepending the slash.
            text = f"/{slash_name} {text}".strip()

+        # Slack slash commands can originate from DMs or shared channels.
+        # Preserve DM semantics only for DM channel IDs; shared channels must
+        # keep group semantics so different users do not collide into one
+        # session key.
+        is_dm = str(channel_id).startswith("D")
        source = self.build_source(
            chat_id=channel_id,
-            chat_type="dm",  # Slash commands are always in DM-like context
+            chat_type="dm" if is_dm else "group",
            user_id=user_id,
        )

@@ -2261,7 +2719,26 @@ class SlackAdapter(BasePlatformAdapter):
            raw_message=command,
        )

-        await self.handle_message(event)
+        # Stash the Slack response_url so the first reply for this
+        # channel+user can be routed ephemerally (replaces the initial
+        # "Running /cmd…" ack shown by handle_hermes_command).
+        # Only stash for COMMAND events (text starts with "/") — free-form
+        # questions via "/hermes <question>" must produce public replies so
+        # the whole channel can see the agent's answer.
+        response_url = command.get("response_url", "")
+        if response_url and user_id and channel_id and text.startswith("/"):
+            self._slash_command_contexts[(channel_id, user_id)] = {
+                "response_url": response_url,
+                "ts": time.monotonic(),
+            }
+
+        # Set the ContextVar so send() can match the correct stashed
+        # response_url even when multiple users slash concurrently.
+        _slash_user_id_token = _slash_user_id.set(user_id or None)
+        try:
+            await self.handle_message(event)
+        finally:
+            _slash_user_id.reset(_slash_user_id_token)

    def _has_active_session_for_thread(
        self,
@@ -2422,6 +2899,13 @@ class SlackAdapter(BasePlatformAdapter):
            raw = os.getenv("SLACK_FREE_RESPONSE_CHANNELS", "")
        if isinstance(raw, list):
            return {str(part).strip() for part in raw if str(part).strip()}
-        if isinstance(raw, str) and raw.strip():
-            return {part.strip() for part in raw.split(",") if part.strip()}
+        # Coerce non-list scalars (str/int/float) to str before splitting.
+        # A bare numeric YAML value (`free_response_channels: 1234567890`) is
+        # loaded as int and was previously falling through the isinstance(str)
+        # branch to return an empty set.  str() here accepts whatever scalar
+        # the YAML loader hands us without changing existing string/CSV
+        # semantics.
+        s = str(raw).strip() if raw is not None else ""
+        if s:
+            return {part.strip() for part in s.split(",") if part.strip()}
        return set()
@@ -237,14 +237,14 @@ def _wrap_markdown_tables(text: str) -> str:
 class TelegramAdapter(BasePlatformAdapter):
    """
    Telegram bot adapter.
-    
+
    Handles:
    - Receiving messages from users and groups
    - Sending responses with Telegram markdown
    - Forum topics (thread_id support)
    - Media messages
    """
-    
+
    # Telegram message limits
    MAX_MESSAGE_LENGTH = 4096
    # Threshold for detecting Telegram client-side message splits.
@@ -252,7 +252,7 @@ class TelegramAdapter(BasePlatformAdapter):
    _SPLIT_THRESHOLD = 4000
    MEDIA_GROUP_WAIT_SECONDS = 0.8
    _GENERAL_TOPIC_THREAD_ID = "1"
-    
+
    def __init__(self, config: PlatformConfig):
        super().__init__(config, Platform.TELEGRAM)
        self._app: Optional[Application] = None
@@ -286,15 +286,57 @@ class TelegramAdapter(BasePlatformAdapter):
        self._model_picker_state: Dict[str, dict] = {}
        # Approval button state: message_id → session_key
        self._approval_state: Dict[int, str] = {}
+        # Slash-confirm button state: confirm_id → session_key (for /reload-mcp
+        # and any other slash-confirm prompts; see GatewayRunner._request_slash_confirm).
+        self._slash_confirm_state: Dict[str, str] = {}

-    @staticmethod
-    def _is_callback_user_authorized(user_id: str) -> bool:
+    def _is_callback_user_authorized(
+        self,
+        user_id: str,
+        *,
+        chat_id: Optional[str] = None,
+        chat_type: Optional[str] = None,
+        thread_id: Optional[str] = None,
+        user_name: Optional[str] = None,
+    ) -> bool:
        """Return whether a Telegram inline-button caller may perform gated actions."""
+        normalized_user_id = str(user_id or "").strip()
+        if not normalized_user_id:
+            return False
+
+        runner = getattr(getattr(self, "_message_handler", None), "__self__", None)
+        auth_fn = getattr(runner, "_is_user_authorized", None)
+        if callable(auth_fn):
+            try:
+                from gateway.session import SessionSource
+
+                normalized_chat_type = str(chat_type or "dm").strip().lower() or "dm"
+                if normalized_chat_type == "private":
+                    normalized_chat_type = "dm"
+                elif normalized_chat_type == "supergroup":
+                    normalized_chat_type = "forum" if thread_id is not None else "group"
+
+                source = SessionSource(
+                    platform=Platform.TELEGRAM,
+                    chat_id=str(chat_id or normalized_user_id),
+                    chat_type=normalized_chat_type,
+                    user_id=normalized_user_id,
+                    user_name=str(user_name).strip() if user_name else None,
+                    thread_id=str(thread_id) if thread_id is not None else None,
+                )
+                return bool(auth_fn(source))
+            except Exception:
+                logger.debug(
+                    "[Telegram] Falling back to env-only callback auth for user %s",
+                    normalized_user_id,
+                    exc_info=True,
+                )
+
        allowed_csv = os.getenv("TELEGRAM_ALLOWED_USERS", "").strip()
        if not allowed_csv:
            return True
        allowed_ids = {uid.strip() for uid in allowed_csv.split(",") if uid.strip()}
-        return "*" in allowed_ids or user_id in allowed_ids
+        return "*" in allowed_ids or normalized_user_id in allowed_ids

    @classmethod
    def _metadata_thread_id(cls, metadata: Optional[Dict[str, Any]]) -> Optional[str]:
@@ -719,6 +761,20 @@ class TelegramAdapter(BasePlatformAdapter):
                    # Persist thread_id to config so we don't recreate on next restart
                    self._persist_dm_topic_thread_id(int(chat_id), topic_name, thread_id)

+                    # Send a seed message so the topic is visible in Telegram's client.
+                    # Empty topics are hidden by the client UI until they contain a message.
+                    try:
+                        await self._bot.send_message(
+                            chat_id=int(chat_id),
+                            message_thread_id=thread_id,
+                            text=f"\U0001f4cc {topic_name}",
+                        )
+                    except Exception as seed_err:
+                        logger.debug(
+                            "[%s] Could not send seed message to topic '%s': %s",
+                            self.name, topic_name, seed_err,
+                        )
+
    async def connect(self) -> bool:
        """Connect to Telegram via polling or webhook.

@@ -994,7 +1050,7 @@ class TelegramAdapter(BasePlatformAdapter):
            self._set_fatal_error("telegram_connect_error", message, retryable=True)
            logger.error("[%s] Failed to connect to Telegram: %s", self.name, e, exc_info=True)
            return False
-    
+
    async def disconnect(self) -> None:
        """Stop polling/webhook, cancel pending album flushes, and disconnect."""
        pending_media_group_tasks = list(self._media_group_tasks.values())
@@ -1318,6 +1374,7 @@ class TelegramAdapter(BasePlatformAdapter):
    async def send_update_prompt(
        self, chat_id: str, prompt: str, default: str = "",
        session_key: str = "",
+        metadata: Optional[Dict[str, Any]] = None,
    ) -> SendResult:
        """Send an inline-keyboard update prompt (Yes / No buttons).

@@ -1335,11 +1392,14 @@ class TelegramAdapter(BasePlatformAdapter):
                    InlineKeyboardButton("✗ No", callback_data="update_prompt:n"),
                ]
            ])
+            thread_id = self._metadata_thread_id(metadata)
+            message_thread_id = self._message_thread_id_for_send(thread_id)
            msg = await self._bot.send_message(
                chat_id=int(chat_id),
                text=text,
                parse_mode=ParseMode.MARKDOWN,
                reply_markup=keyboard,
+                message_thread_id=message_thread_id,
                **self._link_preview_kwargs(),
            )
            return SendResult(success=True, message_id=str(msg.message_id))
@@ -1411,6 +1471,48 @@ class TelegramAdapter(BasePlatformAdapter):
            logger.warning("[%s] send_exec_approval failed: %s", self.name, e)
            return SendResult(success=False, error=str(e))

+    async def send_slash_confirm(
+        self, chat_id: str, title: str, message: str, session_key: str,
+        confirm_id: str, metadata: Optional[Dict[str, Any]] = None,
+    ) -> SendResult:
+        """Render a three-button slash-command confirmation prompt."""
+        if not self._bot:
+            return SendResult(success=False, error="Not connected")
+
+        try:
+            # Message body: render as plain text (message already contains
+            # markdown formatting from the gateway primitive).
+            preview = message if len(message) <= 3800 else message[:3800] + "..."
+
+            keyboard = InlineKeyboardMarkup([
+                [
+                    InlineKeyboardButton("✅ Approve Once", callback_data=f"sc:once:{confirm_id}"),
+                    InlineKeyboardButton("🔒 Always Approve", callback_data=f"sc:always:{confirm_id}"),
+                ],
+                [
+                    InlineKeyboardButton("❌ Cancel", callback_data=f"sc:cancel:{confirm_id}"),
+                ],
+            ])
+
+            thread_id = self._metadata_thread_id(metadata)
+            kwargs: Dict[str, Any] = {
+                "chat_id": int(chat_id),
+                "text": preview,
+                "parse_mode": ParseMode.MARKDOWN,
+                "reply_markup": keyboard,
+                **self._link_preview_kwargs(),
+            }
+            message_thread_id = self._message_thread_id_for_send(thread_id)
+            if message_thread_id is not None:
+                kwargs["message_thread_id"] = message_thread_id
+
+            msg = await self._bot.send_message(**kwargs)
+            self._slash_confirm_state[confirm_id] = session_key
+            return SendResult(success=True, message_id=str(msg.message_id))
+        except Exception as e:
+            logger.warning("[%s] send_slash_confirm failed: %s", self.name, e)
+            return SendResult(success=False, error=str(e))
+
    async def send_model_picker(
        self,
        chat_id: str,
@@ -1715,6 +1817,12 @@ class TelegramAdapter(BasePlatformAdapter):
        if not query or not query.data:
            return
        data = query.data
+        query_message = getattr(query, "message", None)
+        query_chat_id = getattr(query_message, "chat_id", None)
+        query_chat = getattr(query_message, "chat", None)
+        query_chat_type = getattr(query_chat, "type", None)
+        query_thread_id = getattr(query_message, "message_thread_id", None)
+        query_user_name = getattr(query.from_user, "first_name", None)

        # --- Model picker callbacks ---
        if data.startswith(("mp:", "mm:", "mb", "mx", "mg:")):
@@ -1736,7 +1844,13 @@ class TelegramAdapter(BasePlatformAdapter):

                # Only authorized users may click approval buttons.
                caller_id = str(getattr(query.from_user, "id", ""))
-                if not self._is_callback_user_authorized(caller_id):
+                if not self._is_callback_user_authorized(
+                    caller_id,
+                    chat_id=query_chat_id,
+                    chat_type=str(query_chat_type) if query_chat_type is not None else None,
+                    thread_id=str(query_thread_id) if query_thread_id is not None else None,
+                    user_name=query_user_name,
+                ):
                    await query.answer(text="⛔ You are not authorized to approve commands.")
                    return

@@ -1779,12 +1893,86 @@ class TelegramAdapter(BasePlatformAdapter):
                    logger.error("Failed to resolve gateway approval from Telegram button: %s", exc)
            return

+        # --- Slash-confirm callbacks (sc:choice:confirm_id) ---
+        if data.startswith("sc:"):
+            parts = data.split(":", 2)
+            if len(parts) == 3:
+                choice = parts[1]  # once, always, cancel
+                confirm_id = parts[2]
+
+                caller_id = str(getattr(query.from_user, "id", ""))
+                if not self._is_callback_user_authorized(
+                    caller_id,
+                    chat_id=query_chat_id,
+                    chat_type=str(query_chat_type) if query_chat_type is not None else None,
+                    thread_id=str(query_thread_id) if query_thread_id is not None else None,
+                    user_name=query_user_name,
+                ):
+                    await query.answer(text="⛔ You are not authorized to answer this prompt.")
+                    return
+
+                session_key = self._slash_confirm_state.pop(confirm_id, None)
+                if not session_key:
+                    await query.answer(text="This prompt has already been resolved.")
+                    return
+
+                label_map = {
+                    "once": "✅ Approved once",
+                    "always": "🔒 Always approve",
+                    "cancel": "❌ Cancelled",
+                }
+                user_display = getattr(query.from_user, "first_name", "User")
+                label = label_map.get(choice, "Resolved")
+
+                await query.answer(text=label)
+
+                try:
+                    await query.edit_message_text(
+                        text=f"{label} by {user_display}",
+                        parse_mode=ParseMode.MARKDOWN,
+                        reply_markup=None,
+                    )
+                except Exception:
+                    pass
+
+                # Resolve via the module-level primitive.  The runner stored
+                # a handler keyed by session_key; we run it on the event
+                # loop and (if it returns a string) send it as a follow-up
+                # message in the same chat.
+                try:
+                    from tools import slash_confirm as _slash_confirm_mod
+                    result_text = await _slash_confirm_mod.resolve(
+                        session_key, confirm_id, choice,
+                    )
+                    if result_text and query.message:
+                        # Inherit the prompt message's thread so the reply
+                        # lands in the same supergroup topic / reply chain.
+                        thread_id = getattr(query.message, "message_thread_id", None)
+                        send_kwargs: Dict[str, Any] = {
+                            "chat_id": int(query.message.chat_id),
+                            "text": result_text,
+                            "parse_mode": ParseMode.MARKDOWN,
+                            **self._link_preview_kwargs(),
+                        }
+                        if thread_id is not None:
+                            send_kwargs["message_thread_id"] = thread_id
+                        await self._bot.send_message(**send_kwargs)
+                except Exception as exc:
+                    logger.error("[%s] slash-confirm callback failed: %s", self.name, exc, exc_info=True)
+            return
+
        # --- Update prompt callbacks ---
        if not data.startswith("update_prompt:"):
            return
        answer = data.split(":", 1)[1]  # "y" or "n"
        caller_id = str(getattr(query.from_user, "id", ""))
-        if not self._is_callback_user_authorized(caller_id):
+        if not self._is_callback_user_authorized(
+            caller_id,
+            chat_id=query_chat_id,
+            chat_type=str(query_chat_type) if query_chat_type is not None else None,
+            thread_id=str(query_thread_id) if query_thread_id is not None else None,
+            user_name=query_user_name,
+        ):
            await query.answer(text="⛔ You are not authorized to answer update prompts.")
            return
        await query.answer(text=f"Sent '{answer}' to the update process.")
@@ -1844,8 +2032,9 @@ class TelegramAdapter(BasePlatformAdapter):
                return SendResult(success=False, error=self._missing_media_path_error("Audio", audio_path))
            
            with open(audio_path, "rb") as audio_file:
-                # .ogg files -> send as voice (round playable bubble)
-                if audio_path.endswith((".ogg", ".opus")):
+                ext = os.path.splitext(audio_path)[1].lower()
+                # .ogg / .opus files -> send as voice (round playable bubble)
+                if ext in (".ogg", ".opus"):
                    _voice_thread = self._metadata_thread_id(metadata)
                    msg = await self._bot.send_voice(
                        chat_id=int(chat_id),
@@ -1854,8 +2043,8 @@ class TelegramAdapter(BasePlatformAdapter):
                        reply_to_message_id=int(reply_to) if reply_to else None,
                        message_thread_id=self._message_thread_id_for_send(_voice_thread),
                    )
-                else:
-                    # .mp3 and others -> send as audio file
+                elif ext in (".mp3", ".m4a"):
+                    # Telegram's Bot API sendAudio only accepts MP3 / M4A.
                    _audio_thread = self._metadata_thread_id(metadata)
                    msg = await self._bot.send_audio(
                        chat_id=int(chat_id),
@@ -1864,6 +2053,16 @@ class TelegramAdapter(BasePlatformAdapter):
                        reply_to_message_id=int(reply_to) if reply_to else None,
                        message_thread_id=self._message_thread_id_for_send(_audio_thread),
                    )
+                else:
+                    # Formats Telegram can't play natively (.wav, .flac, ...)
+                    # — fall back to document delivery instead of raising.
+                    return await self.send_document(
+                        chat_id=chat_id,
+                        file_path=audio_path,
+                        caption=caption,
+                        reply_to=reply_to,
+                        metadata=metadata,
+                    )
            return SendResult(success=True, message_id=str(msg.message_id))
        except Exception as e:
            logger.error(
@@ -1873,7 +2072,118 @@ class TelegramAdapter(BasePlatformAdapter):
                exc_info=True,
            )
            return await super().send_voice(chat_id, audio_path, caption, reply_to)
-    
+
+    async def send_multiple_images(
+        self,
+        chat_id: str,
+        images: List[tuple],
+        metadata: Optional[Dict[str, Any]] = None,
+        human_delay: float = 0.0,
+    ) -> None:
+        """Send a batch of images natively via Telegram's media group API.
+
+        Telegram's ``send_media_group`` bundles up to 10 photos/videos into
+        a single album. Larger batches are chunked. Animated GIFs cannot
+        go into a media group (they require ``send_animation``), so they
+        are peeled off and sent individually via the base default path.
+
+        URL-based photos go into the group directly; local files are
+        opened as byte streams. On failure the whole batch falls back to
+        the base adapter's per-image loop.
+        """
+        if not self._bot:
+            return
+        if not images:
+            return
+
+        try:
+            from telegram import InputMediaPhoto
+        except Exception as exc:  # pragma: no cover - missing SDK
+            logger.warning(
+                "[%s] InputMediaPhoto unavailable, falling back to per-image send: %s",
+                self.name, exc,
+            )
+            await super().send_multiple_images(chat_id, images, metadata, human_delay)
+            return
+
+        # Peel off animations — they need send_animation, not send_media_group
+        animations: List[tuple] = []
+        photos: List[tuple] = []
+        for image_url, alt_text in images:
+            if not image_url.startswith("file://") and self._is_animation_url(image_url):
+                animations.append((image_url, alt_text))
+            else:
+                photos.append((image_url, alt_text))
+
+        # Animations: route through the base default (per-image send_animation)
+        if animations:
+            await super().send_multiple_images(
+                chat_id, animations, metadata, human_delay=human_delay,
+            )
+
+        if not photos:
+            return
+
+        from urllib.parse import unquote as _unquote
+        _thread = self._metadata_thread_id(metadata)
+        _thread_id = self._message_thread_id_for_send(_thread)
+
+        # Chunk into groups of 10 (Telegram's album limit)
+        CHUNK = 10
+        chunks = [photos[i:i + CHUNK] for i in range(0, len(photos), CHUNK)]
+
+        for chunk_idx, chunk in enumerate(chunks):
+            if human_delay > 0 and chunk_idx > 0:
+                await asyncio.sleep(human_delay)
+
+            media: List[Any] = []
+            opened_files: List[Any] = []
+            try:
+                for image_url, alt_text in chunk:
+                    caption = alt_text[:1024] if alt_text else None
+                    if image_url.startswith("file://"):
+                        local_path = _unquote(image_url[7:])
+                        if not os.path.exists(local_path):
+                            logger.warning(
+                                "[%s] Skipping missing image in media group: %s",
+                                self.name, local_path,
+                            )
+                            continue
+                        fh = open(local_path, "rb")
+                        opened_files.append(fh)
+                        media.append(InputMediaPhoto(media=fh, caption=caption))
+                    else:
+                        media.append(InputMediaPhoto(media=image_url, caption=caption))
+
+                if not media:
+                    continue
+
+                logger.info(
+                    "[%s] Sending media group of %d photo(s) (chunk %d/%d)",
+                    self.name, len(media), chunk_idx + 1, len(chunks),
+                )
+                await self._bot.send_media_group(
+                    chat_id=int(chat_id),
+                    media=media,
+                    message_thread_id=_thread_id,
+                )
+            except Exception as e:
+                logger.warning(
+                    "[%s] send_media_group failed (chunk %d/%d), falling back to per-image: %s",
+                    self.name, chunk_idx + 1, len(chunks), e,
+                    exc_info=True,
+                )
+                # Fallback: send each photo in this chunk individually
+                await super().send_multiple_images(
+                    chat_id, chunk, metadata, human_delay=human_delay,
+                )
+            finally:
+                for fh in opened_files:
+                    try:
+                        fh.close()
+                    except Exception:
+                        pass
+
    async def send_image_file(
        self,
        chat_id: str,
@@ -2040,7 +2350,7 @@ class TelegramAdapter(BasePlatformAdapter):
                )
                # Final fallback: send URL as text
                return await super().send_image(chat_id, image_url, caption, reply_to)
-    
+
    async def send_animation(
        self,
        chat_id: str,
@@ -2102,7 +2412,7 @@ class TelegramAdapter(BasePlatformAdapter):
                    e,
                    exc_info=True,
                )
-    
+
    async def get_chat_info(self, chat_id: str) -> Dict[str, Any]:
        """Get information about a Telegram chat."""
        if not self._bot:
@@ -2136,7 +2446,7 @@ class TelegramAdapter(BasePlatformAdapter):
                exc_info=True,
            )
            return {"name": str(chat_id), "type": "dm", "error": str(e)}
-    
+
    def format_message(self, content: str) -> str:
        """
        Convert standard markdown to Telegram MarkdownV2 format.
@@ -2308,7 +2618,7 @@ class TelegramAdapter(BasePlatformAdapter):
        text = ''.join(_safe_parts)

        return text
-    
+
    # ── Group mention gating ──────────────────────────────────────────────

    def _telegram_require_mention(self) -> bool:
@@ -2523,7 +2833,7 @@ class TelegramAdapter(BasePlatformAdapter):
        event = self._build_message_event(update.message, MessageType.TEXT, update_id=update.update_id)
        event.text = self._clean_bot_trigger_text(event.text)
        self._enqueue_text_event(event)
-    
+
    async def _handle_command(self, update: Update, context: ContextTypes.DEFAULT_TYPE) -> None:
        """Handle incoming command messages."""
        if not update.message or not update.message.text:
@@ -2533,7 +2843,7 @@ class TelegramAdapter(BasePlatformAdapter):
        
        event = self._build_message_event(update.message, MessageType.COMMAND, update_id=update.update_id)
        await self.handle_message(event)
-    
+
    async def _handle_location_message(self, update: Update, context: ContextTypes.DEFAULT_TYPE) -> None:
        """Handle incoming location/venue pin messages."""
        if not update.message:
@@ -2891,7 +3201,7 @@ class TelegramAdapter(BasePlatformAdapter):
            return

        await self.handle_message(event)
-    
+
    async def _queue_media_group_event(self, media_group_id: str, event: MessageEvent) -> None:
        """Buffer Telegram media-group items so albums arrive as one logical event.

@@ -202,26 +202,22 @@ class WebhookAdapter(BasePlatformAdapter):
        if deliver_type == "github_comment":
            return await self._deliver_github_comment(content, delivery)

-        # Cross-platform delivery — any platform with a gateway adapter
-        if self.gateway_runner and deliver_type in (
-            "telegram",
-            "discord",
-            "slack",
-            "signal",
-            "sms",
-            "whatsapp",
-            "matrix",
-            "mattermost",
-            "homeassistant",
-            "email",
-            "dingtalk",
-            "feishu",
-            "wecom",
-            "wecom_callback",
-            "weixin",
-            "bluebubbles",
-            "qqbot",
-        ):
+        # Cross-platform delivery — any platform with a gateway adapter.
+        # Check both built-in names and plugin-registered platforms.
+        _BUILTIN_DELIVER_PLATFORMS = {
+            "telegram", "discord", "slack", "signal", "sms", "whatsapp",
+            "matrix", "mattermost", "homeassistant", "email", "dingtalk",
+            "feishu", "wecom", "wecom_callback", "weixin", "bluebubbles",
+            "qqbot", "yuanbao",
+        }
+        _is_known_platform = deliver_type in _BUILTIN_DELIVER_PLATFORMS
+        if not _is_known_platform:
+            try:
+                from gateway.platform_registry import platform_registry
+                _is_known_platform = platform_registry.is_registered(deliver_type)
+            except Exception:
+                pass
+        if self.gateway_runner and _is_known_platform:
            return await self._deliver_cross_platform(
                deliver_type, content, delivery
            )
@@ -92,6 +92,18 @@ SESSION_EXPIRED_ERRCODE = -14
 RATE_LIMIT_ERRCODE = -2  # iLink frequency limit — backoff and retry
 MESSAGE_DEDUP_TTL_SECONDS = 300

+
+def _is_stale_session_ret(
+    ret: "Optional[int]", errcode: "Optional[int]", errmsg: "Optional[str]",
+) -> bool:
+    """True when iLink returns ret=-2 / errcode=-2 with 'unknown error',
+    which is a stale-session signal (same as errcode=-14) rather than
+    a genuine rate limit."""
+    if ret != RATE_LIMIT_ERRCODE and errcode != RATE_LIMIT_ERRCODE:
+        return False
+    return (errmsg or "").lower() == "unknown error"
+
+
 MEDIA_IMAGE = 1
 MEDIA_VIDEO = 2
 MEDIA_FILE = 3
@@ -1210,6 +1222,17 @@ class WeixinAdapter(BasePlatformAdapter):
        self._mark_connected()
        _LIVE_ADAPTERS[self._token] = self
        logger.info("[%s] Connected account=%s base=%s", self.name, _safe_id(self._account_id), self._base_url)
+        if self._group_policy != "disabled":
+            logger.warning(
+                "[%s] WEIXIN_GROUP_POLICY=%s is set, but QR-login connects an iLink bot "
+                "identity (e.g. ...@im.bot) which typically cannot be invited into ordinary "
+                "WeChat groups. iLink usually does not deliver ordinary-group events for "
+                "these accounts, so group messages may never reach Hermes regardless of this "
+                "policy. If group delivery doesn't work, the limitation is on the iLink side, "
+                "not in Hermes.",
+                self.name,
+                self._group_policy,
+            )
        return True

    async def disconnect(self) -> None:
@@ -1254,7 +1277,8 @@ class WeixinAdapter(BasePlatformAdapter):
                ret = response.get("ret", 0)
                errcode = response.get("errcode", 0)
                if ret not in (0, None) or errcode not in (0, None):
-                    if ret == SESSION_EXPIRED_ERRCODE or errcode == SESSION_EXPIRED_ERRCODE:
+                    if (ret == SESSION_EXPIRED_ERRCODE or errcode == SESSION_EXPIRED_ERRCODE
+                            or _is_stale_session_ret(ret, errcode, response.get("errmsg"))):
                        logger.error("[%s] Session expired; pausing for 10 minutes", self.name)
                        await asyncio.sleep(600)
                        consecutive_failures = 0
@@ -1519,6 +1543,7 @@ class WeixinAdapter(BasePlatformAdapter):
                        is_session_expired = (
                            ret == SESSION_EXPIRED_ERRCODE
                            or errcode == SESSION_EXPIRED_ERRCODE
+                            or _is_stale_session_ret(ret, errcode, resp.get("errmsg"))
                        )
                        # Session expired — strip token and retry once
                        if is_session_expired and not retried_without_token and context_token:
@@ -1595,7 +1620,7 @@ class WeixinAdapter(BasePlatformAdapter):
        _, image_cleaned = self.extract_images(cleaned_content)
        local_files, final_content = self.extract_local_files(image_cleaned)

-        _AUDIO_EXTS = {".ogg", ".opus", ".mp3", ".wav", ".m4a"}
+        _AUDIO_EXTS = {".ogg", ".opus", ".mp3", ".wav", ".m4a", ".flac"}
        _VIDEO_EXTS = {".mp4", ".mov", ".avi", ".mkv", ".webm", ".3gp"}
        _IMAGE_EXTS = {".jpg", ".jpeg", ".png", ".webp", ".gif"}

@@ -1896,10 +1896,12 @@ class OwnerCommandMiddleware(InboundMiddleware):
        if cmd not in cls.ALLOWLIST:
            return None, None, False

-        # Sender identity check: bot owner <-> push.from_account == push.bot_owner_id
-        # owner_id = (push or {}).get("bot_owner_id") or ""
-        # is_owner = bool(owner_id) and owner_id == from_account
-        is_owner = True
+        # Sender identity check: bot owner <-> push.from_account == push.bot_owner_id.
+        # The allowlisted commands (/approve, /deny, /stop, /reset, ...) are
+        # privileged — leaking them to non-owners lets any group member approve
+        # a dangerous tool call, kill the owner's task, or wipe session state.
+        owner_id = str((push or {}).get("bot_owner_id") or "").strip()
+        is_owner = bool(owner_id) and owner_id == from_account
        return cmd, cmd_line, is_owner

    async def handle(self, ctx: InboundContext, next_fn) -> None:
@@ -62,6 +62,7 @@ from .config import (
 )
 from .whatsapp_identity import (
    canonical_whatsapp_identifier,
+    normalize_whatsapp_identifier,  # noqa: F401 - re-exported for gateway.session callers
 )
 from utils import atomic_replace

@@ -234,7 +235,7 @@ def build_session_context_prompt(
 ) -> str:
    """
    Build the dynamic system prompt section that tells the agent about its context.
-    
+
    This is injected into the system prompt so the agent knows:
    - Where messages are coming from
    - What platforms are connected
@@ -246,13 +247,23 @@ def build_session_context_prompt(
    Platforms like Discord are excluded because mentions need real IDs.
    Routing still uses the original values (they stay in SessionSource).
    """
-    # Only apply redaction on platforms where IDs aren't needed for mentions
-    redact_pii = redact_pii and context.source.platform in _PII_SAFE_PLATFORMS
+    # Only apply redaction on platforms where IDs aren't needed for mentions.
+    # Check both the hardcoded set (builtins) and the plugin registry.
+    _is_pii_safe = context.source.platform in _PII_SAFE_PLATFORMS
+    if not _is_pii_safe:
+        try:
+            from gateway.platform_registry import platform_registry
+            entry = platform_registry.get(context.source.platform.value)
+            if entry and entry.pii_safe:
+                _is_pii_safe = True
+        except Exception:
+            pass
+    redact_pii = redact_pii and _is_pii_safe
    lines = [
        "## Current Session Context",
        "",
    ]
-    
+
    # Source info
    platform_name = context.source.platform.value.title()
    if context.source.platform == Platform.LOCAL:
@@ -277,7 +288,7 @@ def build_session_context_prompt(
        else:
            desc = src.description
        lines.append(f"**Source:** {platform_name} ({desc})")
-    
+
    # Channel topic (if available - provides context about the channel's purpose)
    if context.source.chat_topic:
        lines.append(f"**Channel Topic:** {context.source.chat_topic}")
@@ -302,7 +313,7 @@ def build_session_context_prompt(
        if redact_pii:
            uid = _hash_sender_id(uid)
        lines.append(f"**User ID:** {uid}")
-    
+
    # Platform-specific behavioral notes
    if context.source.platform == Platform.SLACK:
        lines.append("")
@@ -368,9 +379,9 @@ def build_session_context_prompt(
    for p in context.connected_platforms:
        if p != Platform.LOCAL:
            platforms_list.append(f"{p.value}: Connected ✓")
-    
+
    lines.append(f"**Connected Platforms:** {', '.join(platforms_list)}")
-    
+
    # Home channels
    if context.home_channels:
        lines.append("")
@@ -378,11 +389,11 @@ def build_session_context_prompt(
        for platform, home in context.home_channels.items():
            hc_id = _hash_chat_id(home.chat_id) if redact_pii else home.chat_id
            lines.append(f"  - {platform.value}: {home.name} (ID: {hc_id})")
-    
+
    # Delivery options for scheduled tasks
    lines.append("")
    lines.append("**Delivery options for scheduled tasks:**")
-    
+
    from hermes_constants import display_hermes_home

    # Origin delivery
@@ -398,15 +409,15 @@ def build_session_context_prompt(
    lines.append(
        f"- `\"local\"` → Save to local files only ({display_hermes_home()}/cron/output/)"
    )
-    
+
    # Platform home channels
    for platform, home in context.home_channels.items():
        lines.append(f"- `\"{platform.value}\"` → Home channel ({home.name})")
-    
+
    # Note about explicit targeting
    lines.append("")
    lines.append("*For explicit targeting, use `\"platform:chat_id\"` format if the user provides a specific chat ID.*")
-    
+
    return "\n".join(lines)


@@ -447,6 +458,15 @@ class SessionEntry:
    was_auto_reset: bool = False
    auto_reset_reason: Optional[str] = None  # "idle" or "daily"
    reset_had_activity: bool = False  # whether the expired session had any messages
+
+    # Set by reset_session() when the user explicitly sends /new or /reset.
+    # Consumed once by _handle_message_with_agent to trigger topic/channel
+    # skill re-injection on the first message of the new session.  We can't
+    # reuse was_auto_reset for this because that flag fires the "session
+    # expired due to inactivity" user-facing notice and a misleading
+    # context-note prepend — both wrong for an explicit manual reset.
+    # See issue #6508.
+    is_fresh_reset: bool = False
    
    # Set by the background expiry watcher after it finalizes an expired
    # session (invoking on_session_finalize hooks and evicting the cached
@@ -497,6 +517,7 @@ class SessionEntry:
                if self.last_resume_marked_at
                else None
            ),
+            "is_fresh_reset": self.is_fresh_reset,
        }
        if self.origin:
            result["origin"] = self.origin.to_dict()
@@ -545,6 +566,7 @@ class SessionEntry:
            resume_pending=data.get("resume_pending", False),
            resume_reason=data.get("resume_reason"),
            last_resume_marked_at=last_resume_marked_at,
+            is_fresh_reset=data.get("is_fresh_reset", False),
        )


@@ -1121,6 +1143,7 @@ class SessionStore:
                display_name=old_entry.display_name,
                platform=old_entry.platform,
                chat_type=old_entry.chat_type,
+                is_fresh_reset=True,
            )

            self._entries[session_key] = new_entry
@@ -21,6 +21,7 @@ from datetime import datetime, timezone
 from pathlib import Path
 from hermes_constants import get_hermes_home
 from typing import Any, Optional
+from utils import atomic_json_write

 if sys.platform == "win32":
    import msvcrt
@@ -34,6 +35,10 @@ _IS_WINDOWS = sys.platform == "win32"
 _UNSET = object()
 _GATEWAY_LOCK_FILENAME = "gateway.lock"
 _gateway_lock_handle = None
+# Windows byte-range locks are mandatory for other readers. Lock a byte well
+# past the JSON payload so runtime status / PID readers can still read the file
+# while another process holds the mutual-exclusion lock.
+_WINDOWS_LOCK_OFFSET = 1024 * 1024


 def _get_pid_path() -> Path:
@@ -205,8 +210,7 @@ def _read_json_file(path: Path) -> Optional[dict[str, Any]]:


 def _write_json_file(path: Path, payload: dict[str, Any]) -> None:
-    path.parent.mkdir(parents=True, exist_ok=True)
-    path.write_text(json.dumps(payload))
+    atomic_json_write(path, payload, indent=None, separators=(",", ":"))


 def _read_pid_record(pid_path: Optional[Path] = None) -> Optional[dict]:
@@ -286,7 +290,7 @@ def _try_acquire_file_lock(handle) -> bool:
            if handle.tell() == 0:
                handle.write("\n")
                handle.flush()
-            handle.seek(0)
+            handle.seek(_WINDOWS_LOCK_OFFSET)
            msvcrt.locking(handle.fileno(), msvcrt.LK_NBLCK, 1)
        else:
            fcntl.flock(handle.fileno(), fcntl.LOCK_EX | fcntl.LOCK_NB)
@@ -298,7 +302,7 @@ def _try_acquire_file_lock(handle) -> bool:
 def _release_file_lock(handle) -> None:
    try:
        if _IS_WINDOWS:
-            handle.seek(0)
+            handle.seek(_WINDOWS_LOCK_OFFSET)
            msvcrt.locking(handle.fileno(), msvcrt.LK_UNLCK, 1)
        else:
            fcntl.flock(handle.fileno(), fcntl.LOCK_UN)
@@ -91,11 +91,20 @@ class GatewayStreamConsumer:
        chat_id: str,
        config: Optional[StreamConsumerConfig] = None,
        metadata: Optional[dict] = None,
+        on_new_message: Optional[callable] = None,
    ):
        self.adapter = adapter
        self.chat_id = chat_id
        self.cfg = config or StreamConsumerConfig()
        self.metadata = metadata
+        # Fired whenever a fresh content bubble is created on the platform
+        # (first-send of a new message, commentary, overflow chunk, or
+        # fallback continuation). The gateway uses this to linearize the
+        # tool-progress bubble: when content resumes after a tool batch,
+        # the next tool.started should open a NEW progress bubble below
+        # the content, not edit the old bubble above it.
+        # Called with no arguments. Exceptions are swallowed.
+        self._on_new_message = on_new_message
        self._queue: queue.Queue = queue.Queue()
        self._accumulated = ""
        self._message_id: Optional[str] = None
@@ -146,6 +155,16 @@ class GatewayStreamConsumer:
        if text:
            self._queue.put((_COMMENTARY, text))

+    def _notify_new_message(self) -> None:
+        """Fire the on_new_message callback, swallowing any errors."""
+        cb = self._on_new_message
+        if cb is None:
+            return
+        try:
+            cb()
+        except Exception:
+            logger.debug("on_new_message callback error", exc_info=True)
+
    def _reset_segment_state(self, *, preserve_no_edit: bool = False) -> None:
        if preserve_no_edit and self._message_id == "__no_edit__":
            return
@@ -529,6 +548,9 @@ class GatewayStreamConsumer:
                self._message_id = str(result.message_id)
                self._already_sent = True
                self._last_sent_text = text
+                # Fresh content bubble — close off any stale tool bubble
+                # above so the next tool starts a new bubble below.
+                self._notify_new_message()
                return str(result.message_id)
            else:
                self._edit_supported = False
@@ -661,6 +683,9 @@ class GatewayStreamConsumer:
            sent_any_chunk = True
            last_successful_chunk = chunk
            last_message_id = result.message_id or last_message_id
+            # Each fallback chunk is a fresh platform message — notify
+            # so any stale tool-progress bubble gets closed off.
+            self._notify_new_message()

        self._message_id = last_message_id
        self._already_sent = True
@@ -744,6 +769,11 @@ class GatewayStreamConsumer:
            # tool..."), not the final response. Setting already_sent would cause
            # the final response to be incorrectly suppressed when there are
            # multiple tool calls. See: https://github.com/NousResearch/hermes-agent/issues/10454
+            if result.success:
+                # Commentary counts as fresh content — close off any
+                # stale tool bubble above it so the next tool starts a
+                # new bubble below.
+                self._notify_new_message()
            return result.success
        except Exception as e:
            logger.error("Commentary send error: %s", e)
@@ -973,6 +1003,11 @@ class GatewayStreamConsumer:
                        # every delta/tool boundary when platforms accept a
                        # message but do not return an editable message id.
                        self._message_id = "__no_edit__"
+                    # Notify the gateway that a fresh content bubble was
+                    # created so any accumulated tool-progress bubble above
+                    # gets closed off — the next tool fires into a new
+                    # bubble below, preserving chronological order.
+                    self._notify_new_message()
                    return True
                else:
                    # Initial send failed — disable streaming for this session
@@ -11,5 +11,5 @@ Provides subcommands for:
 - hermes cron          - Manage cron jobs
 """

-__version__ = "0.11.0"
-__release_date__ = "2026.4.23"
+__version__ = "0.12.0"
+__release_date__ = "2026.4.30"
@@ -0,0 +1,373 @@
+"""
+Top-level argparse construction for the hermes CLI.
+
+Lives in its own module so other modules (e.g. ``relaunch.py``) can
+introspect the parser to discover which flags exist without running the
+``main`` fn.
+
+Only the top-level parser and the ``chat`` subparser live here. Every other
+subparser (model, gateway, sessions, …) is built inline in ``main.py``
+because its dispatch is tightly coupled to module-level ``cmd_*`` functions.
+"""
+
+import argparse
+
+
+# `--profile` / `-p` is consumed by ``main._apply_profile_override`` before
+# argparse runs (it sets ``HERMES_HOME`` and strips itself from ``sys.argv``),
+# so it isn't on the parser. Listed here so all "carry over on relaunch"
+# metadata lives in one file.
+PRE_ARGPARSE_INHERITED_FLAGS: list[tuple[str, bool]] = [
+    ("--profile", True),
+    ("-p", True),
+]
+
+
+def _inherited_flag(parser, *args, **kwargs):
+    """Register a flag that ``hermes_cli.relaunch`` should carry over when
+    the CLI re-execs itself (e.g. after ``sessions browse`` picks a session,
+    or after the setup wizard launches chat).
+
+    Equivalent to ``parser.add_argument(...)`` plus tagging the resulting
+    Action with ``inherit_on_relaunch = True`` so the relaunch table builder
+    can find it via introspection.
+    """
+    action = parser.add_argument(*args, **kwargs)
+    action.inherit_on_relaunch = True
+    return action
+
+
+_EPILOGUE = """
+Examples:
+    hermes                        Start interactive chat
+    hermes chat -q "Hello"        Single query mode
+    hermes -c                     Resume the most recent session
+    hermes -c "my project"        Resume a session by name (latest in lineage)
+    hermes --resume <session_id>  Resume a specific session by ID
+    hermes setup                  Run setup wizard
+    hermes logout                 Clear stored authentication
+    hermes auth add <provider>    Add a pooled credential
+    hermes auth list              List pooled credentials
+    hermes auth remove <p> <t>    Remove pooled credential by index, id, or label
+    hermes auth reset <provider>  Clear exhaustion status for a provider
+    hermes model                  Select default model
+    hermes fallback [list]        Show fallback provider chain
+    hermes fallback add           Add a fallback provider (same picker as `hermes model`)
+    hermes fallback remove        Remove a fallback provider from the chain
+    hermes config                 View configuration
+    hermes config edit            Edit config in $EDITOR
+    hermes config set model gpt-4 Set a config value
+    hermes gateway                Run messaging gateway
+    hermes -s hermes-agent-dev,github-auth
+    hermes -w                     Start in isolated git worktree
+    hermes gateway install        Install gateway background service
+    hermes sessions list          List past sessions
+    hermes sessions browse        Interactive session picker
+    hermes sessions rename ID T   Rename/title a session
+    hermes logs                   View agent.log (last 50 lines)
+    hermes logs -f                Follow agent.log in real time
+    hermes logs errors            View errors.log
+    hermes logs --since 1h        Lines from the last hour
+    hermes debug share             Upload debug report for support
+    hermes update                 Update to latest version
+
+For more help on a command:
+    hermes <command> --help
+"""
+
+
+def build_top_level_parser():
+    """Build the top-level parser, the subparsers action, and the ``chat`` subparser.
+
+    Returns ``(parser, subparsers, chat_parser)``. The caller wires
+    ``chat_parser.set_defaults(func=cmd_chat)`` and continues registering
+    other subparsers via ``subparsers.add_parser(...)``.
+    """
+    parser = argparse.ArgumentParser(
+        prog="hermes",
+        description="Hermes Agent - AI assistant with tool-calling capabilities",
+        formatter_class=argparse.RawDescriptionHelpFormatter,
+        epilog=_EPILOGUE,
+    )
+
+    parser.add_argument(
+        "--version", "-V", action="store_true", help="Show version and exit"
+    )
+    parser.add_argument(
+        "-z",
+        "--oneshot",
+        metavar="PROMPT",
+        default=None,
+        help=(
+            "One-shot mode: send a single prompt and print ONLY the final "
+            "response text to stdout. No banner, no spinner, no tool "
+            "previews, no session_id line. Tools, memory, rules, and "
+            "AGENTS.md in the CWD are loaded as normal; approvals are "
+            "auto-bypassed. Intended for scripts / pipes."
+        ),
+    )
+    # --model / --provider are accepted at the top level so they can pair
+    # with -z without needing the `chat` subcommand.  If neither -z nor a
+    # subcommand consumes them, they fall through harmlessly as None.
+    # Mirrors `hermes chat --model ... --provider ...` semantics.
+    _inherited_flag(
+        parser,
+        "-m",
+        "--model",
+        default=None,
+        help=(
+            "Model override for this invocation (e.g. anthropic/claude-sonnet-4.6). "
+            "Applies to -z/--oneshot and --tui. Also settable via HERMES_INFERENCE_MODEL env var."
+        ),
+    )
+    _inherited_flag(
+        parser,
+        "--provider",
+        default=None,
+        help=(
+            "Provider override for this invocation (e.g. openrouter, anthropic). "
+            "Applies to -z/--oneshot and --tui. Also settable via HERMES_INFERENCE_PROVIDER env var."
+        ),
+    )
+    parser.add_argument(
+        "-t",
+        "--toolsets",
+        default=None,
+        help="Comma-separated toolsets to enable for this invocation. Applies to -z/--oneshot and --tui.",
+    )
+    parser.add_argument(
+        "--resume",
+        "-r",
+        metavar="SESSION",
+        default=None,
+        help="Resume a previous session by ID or title",
+    )
+    parser.add_argument(
+        "--continue",
+        "-c",
+        dest="continue_last",
+        nargs="?",
+        const=True,
+        default=None,
+        metavar="SESSION_NAME",
+        help="Resume a session by name, or the most recent if no name given",
+    )
+    parser.add_argument(
+        "--worktree",
+        "-w",
+        action="store_true",
+        default=False,
+        help="Run in an isolated git worktree (for parallel agents)",
+    )
+    _inherited_flag(
+        parser,
+        "--accept-hooks",
+        action="store_true",
+        default=False,
+        help=(
+            "Auto-approve any unseen shell hooks declared in config.yaml "
+            "without a TTY prompt.  Equivalent to HERMES_ACCEPT_HOOKS=1 or "
+            "hooks_auto_accept: true in config.yaml.  Use on CI / headless "
+            "runs that can't prompt."
+        ),
+    )
+    _inherited_flag(
+        parser,
+        "--skills",
+        "-s",
+        action="append",
+        default=None,
+        help="Preload one or more skills for the session (repeat flag or comma-separate)",
+    )
+    _inherited_flag(
+        parser,
+        "--yolo",
+        action="store_true",
+        default=False,
+        help="Bypass all dangerous command approval prompts (use at your own risk)",
+    )
+    _inherited_flag(
+        parser,
+        "--pass-session-id",
+        action="store_true",
+        default=False,
+        help="Include the session ID in the agent's system prompt",
+    )
+    _inherited_flag(
+        parser,
+        "--ignore-user-config",
+        action="store_true",
+        default=False,
+        help="Ignore ~/.hermes/config.yaml and fall back to built-in defaults (credentials in .env are still loaded)",
+    )
+    _inherited_flag(
+        parser,
+        "--ignore-rules",
+        action="store_true",
+        default=False,
+        help="Skip auto-injection of AGENTS.md, SOUL.md, .cursorrules, memory, and preloaded skills",
+    )
+    _inherited_flag(
+        parser,
+        "--tui",
+        action="store_true",
+        default=False,
+        help="Launch the modern TUI instead of the classic REPL",
+    )
+    _inherited_flag(
+        parser,
+        "--dev",
+        dest="tui_dev",
+        action="store_true",
+        default=False,
+        help="With --tui: run TypeScript sources via tsx (skip dist build)",
+    )
+
+    subparsers = parser.add_subparsers(dest="command", help="Command to run")
+
+    # =========================================================================
+    # chat command
+    # =========================================================================
+    chat_parser = subparsers.add_parser(
+        "chat",
+        help="Interactive chat with the agent",
+        description="Start an interactive chat session with Hermes Agent",
+    )
+    chat_parser.add_argument(
+        "-q", "--query", help="Single query (non-interactive mode)"
+    )
+    chat_parser.add_argument(
+        "--image", help="Optional local image path to attach to a single query"
+    )
+    _inherited_flag(
+        chat_parser,
+        "-m", "--model", help="Model to use (e.g., anthropic/claude-sonnet-4)",
+    )
+    chat_parser.add_argument(
+        "-t", "--toolsets", help="Comma-separated toolsets to enable"
+    )
+    _inherited_flag(
+        chat_parser,
+        "-s",
+        "--skills",
+        action="append",
+        default=argparse.SUPPRESS,
+        help="Preload one or more skills for the session (repeat flag or comma-separate)",
+    )
+    _inherited_flag(
+        chat_parser,
+        "--provider",
+        # No `choices=` here: user-defined providers from config.yaml `providers:`
+        # are also valid values, and runtime resolution (resolve_runtime_provider)
+        # handles validation/error reporting consistently with the top-level
+        # `--provider` flag.
+        default=None,
+        help="Inference provider (default: auto). Built-in or a user-defined name from `providers:` in config.yaml.",
+    )
+    chat_parser.add_argument(
+        "-v", "--verbose", action="store_true", help="Verbose output"
+    )
+    chat_parser.add_argument(
+        "-Q",
+        "--quiet",
+        action="store_true",
+        help="Quiet mode for programmatic use: suppress banner, spinner, and tool previews. Only output the final response and session info.",
+    )
+    chat_parser.add_argument(
+        "--resume",
+        "-r",
+        metavar="SESSION_ID",
+        default=argparse.SUPPRESS,
+        help="Resume a previous session by ID (shown on exit)",
+    )
+    chat_parser.add_argument(
+        "--continue",
+        "-c",
+        dest="continue_last",
+        nargs="?",
+        const=True,
+        default=argparse.SUPPRESS,
+        metavar="SESSION_NAME",
+        help="Resume a session by name, or the most recent if no name given",
+    )
+    chat_parser.add_argument(
+        "--worktree",
+        "-w",
+        action="store_true",
+        default=argparse.SUPPRESS,
+        help="Run in an isolated git worktree (for parallel agents on the same repo)",
+    )
+    _inherited_flag(
+        chat_parser,
+        "--accept-hooks",
+        action="store_true",
+        default=argparse.SUPPRESS,
+        help=(
+            "Auto-approve any unseen shell hooks declared in config.yaml "
+            "without a TTY prompt (see also HERMES_ACCEPT_HOOKS env var and "
+            "hooks_auto_accept: in config.yaml)."
+        ),
+    )
+    chat_parser.add_argument(
+        "--checkpoints",
+        action="store_true",
+        default=False,
+        help="Enable filesystem checkpoints before destructive file operations (use /rollback to restore)",
+    )
+    chat_parser.add_argument(
+        "--max-turns",
+        type=int,
+        default=None,
+        metavar="N",
+        help="Maximum tool-calling iterations per conversation turn (default: 90, or agent.max_turns in config)",
+    )
+    _inherited_flag(
+        chat_parser,
+        "--yolo",
+        action="store_true",
+        default=argparse.SUPPRESS,
+        help="Bypass all dangerous command approval prompts (use at your own risk)",
+    )
+    _inherited_flag(
+        chat_parser,
+        "--pass-session-id",
+        action="store_true",
+        default=argparse.SUPPRESS,
+        help="Include the session ID in the agent's system prompt",
+    )
+    _inherited_flag(
+        chat_parser,
+        "--ignore-user-config",
+        action="store_true",
+        default=argparse.SUPPRESS,
+        help="Ignore ~/.hermes/config.yaml and fall back to built-in defaults (credentials in .env are still loaded). Useful for isolated CI runs, reproduction, and third-party integrations.",
+    )
+    _inherited_flag(
+        chat_parser,
+        "--ignore-rules",
+        action="store_true",
+        default=argparse.SUPPRESS,
+        help="Skip auto-injection of AGENTS.md, SOUL.md, .cursorrules, memory, and preloaded skills. Combine with --ignore-user-config for a fully isolated run.",
+    )
+    chat_parser.add_argument(
+        "--source",
+        default=None,
+        help="Session source tag for filtering (default: cli). Use 'tool' for third-party integrations that should not appear in user session lists.",
+    )
+    _inherited_flag(
+        chat_parser,
+        "--tui",
+        action="store_true",
+        default=False,
+        help="Launch the modern TUI instead of the classic REPL",
+    )
+    _inherited_flag(
+        chat_parser,
+        "--dev",
+        dest="tui_dev",
+        action="store_true",
+        default=False,
+        help="With --tui: run TypeScript sources via tsx (skip dist build)",
+    )
+
+    return parser, subparsers, chat_parser
@@ -43,7 +43,7 @@ import yaml

 from hermes_cli.config import get_hermes_home, get_config_path, read_raw_config
 from hermes_constants import OPENROUTER_BASE_URL
-from utils import atomic_replace
+from utils import atomic_replace, atomic_yaml_write, is_truthy_value

 logger = logging.getLogger(__name__)

@@ -72,6 +72,14 @@ DEFAULT_AGENT_KEY_MIN_TTL_SECONDS = 30 * 60  # 30 minutes
 ACCESS_TOKEN_REFRESH_SKEW_SECONDS = 120       # refresh 2 min before expiry
 DEVICE_AUTH_POLL_INTERVAL_CAP_SECONDS = 1     # poll at most every 1s
 DEFAULT_CODEX_BASE_URL = "https://chatgpt.com/backend-api/codex"
+MINIMAX_OAUTH_CLIENT_ID = "78257093-7e40-4613-99e0-527b14b39113"
+MINIMAX_OAUTH_SCOPE = "group_id profile model.completion"
+MINIMAX_OAUTH_GRANT_TYPE = "urn:ietf:params:oauth:grant-type:user_code"
+MINIMAX_OAUTH_GLOBAL_BASE = "https://api.minimax.io"
+MINIMAX_OAUTH_CN_BASE = "https://api.minimaxi.com"
+MINIMAX_OAUTH_GLOBAL_INFERENCE = "https://api.minimax.io/anthropic"
+MINIMAX_OAUTH_CN_INFERENCE = "https://api.minimaxi.com/anthropic"
+MINIMAX_OAUTH_REFRESH_SKEW_SECONDS = 60
 DEFAULT_QWEN_BASE_URL = "https://portal.qwen.ai/v1"
 DEFAULT_GITHUB_MODELS_BASE_URL = "https://api.githubcopilot.com"
 DEFAULT_COPILOT_ACP_BASE_URL = "acp://copilot"
@@ -126,7 +134,7 @@ class ProviderConfig:
    """Describes a known inference provider."""
    id: str
    name: str
-    auth_type: str  # "oauth_device_code", "oauth_external", or "api_key"
+    auth_type: str  # "oauth_device_code", "oauth_external", "oauth_minimax", or "api_key"
    portal_base_url: str = ""
    inference_base_url: str = ""
    client_id: str = ""
@@ -255,6 +263,17 @@ PROVIDER_REGISTRY: Dict[str, ProviderConfig] = {
        api_key_env_vars=("MINIMAX_API_KEY",),
        base_url_env_var="MINIMAX_BASE_URL",
    ),
+    "minimax-oauth": ProviderConfig(
+        id="minimax-oauth",
+        name="MiniMax (OAuth \u00b7 minimax.io)",
+        auth_type="oauth_minimax",
+        portal_base_url=MINIMAX_OAUTH_GLOBAL_BASE,
+        inference_base_url=MINIMAX_OAUTH_GLOBAL_INFERENCE,
+        client_id=MINIMAX_OAUTH_CLIENT_ID,
+        scope=MINIMAX_OAUTH_SCOPE,
+        extra={"region": "global", "cn_portal_base_url": MINIMAX_OAUTH_CN_BASE,
+               "cn_inference_base_url": MINIMAX_OAUTH_CN_INFERENCE},
+    ),
    "anthropic": ProviderConfig(
        id="anthropic",
        name="Anthropic",
@@ -1153,6 +1172,7 @@ def resolve_provider(
        "arcee-ai": "arcee", "arceeai": "arcee",
        "gmi-cloud": "gmi", "gmicloud": "gmi",
        "minimax-china": "minimax-cn", "minimax_cn": "minimax-cn",
+        "minimax-portal": "minimax-oauth", "minimax-global": "minimax-oauth", "minimax_oauth": "minimax-oauth",
        "alibaba_coding": "alibaba-coding-plan", "alibaba-coding": "alibaba-coding-plan",
        "alibaba_coding_plan": "alibaba-coding-plan",
        "claude": "anthropic", "claude-code": "anthropic",
@@ -2460,8 +2480,8 @@ def _resolve_verify(
    tls_state = tls_state if isinstance(tls_state, dict) else {}

    effective_insecure = (
-        bool(insecure) if insecure is not None
-        else bool(tls_state.get("insecure", False))
+        is_truthy_value(insecure, default=False) if insecure is not None
+        else is_truthy_value(tls_state.get("insecure", False), default=False)
    )
    effective_ca = (
        ca_bundle
@@ -3633,7 +3653,7 @@ def _update_config_for_provider(

    config["model"] = model_cfg

-    config_path.write_text(yaml.safe_dump(config, sort_keys=False))
+    atomic_yaml_write(config_path, config, sort_keys=False)
    return config_path


@@ -3692,7 +3712,7 @@ def _reset_config_provider() -> Path:
        model["provider"] = "auto"
        if "base_url" in model:
            model["base_url"] = OPENROUTER_BASE_URL
-    config_path.write_text(yaml.safe_dump(config, sort_keys=False))
+    atomic_yaml_write(config_path, config, sort_keys=False)
    return config_path


@@ -4116,6 +4136,326 @@ def _codex_device_code_login() -> Dict[str, Any]:
    }


+# ==================== MiniMax Portal OAuth ====================
+
+def _minimax_pkce_pair() -> tuple:
+    """Generate (code_verifier, code_challenge_S256, state) for MiniMax OAuth."""
+    import secrets
+    verifier = secrets.token_urlsafe(64)[:96]
+    challenge = base64.urlsafe_b64encode(
+        hashlib.sha256(verifier.encode()).digest()
+    ).decode().rstrip("=")
+    state = secrets.token_urlsafe(16)
+    return verifier, challenge, state
+
+
+def _minimax_request_user_code(
+    client: httpx.Client, *, portal_base_url: str, client_id: str,
+    code_challenge: str, state: str,
+) -> Dict[str, Any]:
+    response = client.post(
+        f"{portal_base_url}/oauth/code",
+        data={
+            "response_type": "code",
+            "client_id": client_id,
+            "scope": MINIMAX_OAUTH_SCOPE,
+            "code_challenge": code_challenge,
+            "code_challenge_method": "S256",
+            "state": state,
+        },
+        headers={
+            "Content-Type": "application/x-www-form-urlencoded",
+            "Accept": "application/json",
+            "x-request-id": str(uuid.uuid4()),
+        },
+    )
+    if response.status_code != 200:
+        raise AuthError(
+            f"MiniMax OAuth authorization failed: {response.text or response.reason_phrase}",
+            provider="minimax-oauth", code="authorization_failed",
+        )
+    payload = response.json()
+    for field in ("user_code", "verification_uri", "expired_in"):
+        if field not in payload:
+            raise AuthError(
+                f"MiniMax OAuth response missing field: {field}",
+                provider="minimax-oauth", code="authorization_incomplete",
+            )
+    if payload.get("state") != state:
+        raise AuthError(
+            "MiniMax OAuth state mismatch (possible CSRF).",
+            provider="minimax-oauth", code="state_mismatch",
+        )
+    return payload
+
+
+def _minimax_poll_token(
+    client: httpx.Client, *, portal_base_url: str, client_id: str,
+    user_code: str, code_verifier: str, expired_in: int, interval_ms: Optional[int],
+) -> Dict[str, Any]:
+    # OpenClaw treats expired_in as a unix-ms timestamp (Date.now() < expireTimeMs).
+    # Defensive parsing: if it's small enough to be a duration, treat as seconds.
+    import time as _time
+    now_ms = int(_time.time() * 1000)
+    if expired_in > now_ms // 2:
+        # Looks like a unix-ms timestamp.
+        deadline = expired_in / 1000.0
+    else:
+        # Treat as duration in seconds from now.
+        deadline = _time.time() + max(1, expired_in)
+    interval = max(2.0, (interval_ms or 2000) / 1000.0)
+
+    while _time.time() < deadline:
+        response = client.post(
+            f"{portal_base_url}/oauth/token",
+            data={
+                "grant_type": MINIMAX_OAUTH_GRANT_TYPE,
+                "client_id": client_id,
+                "user_code": user_code,
+                "code_verifier": code_verifier,
+            },
+            headers={
+                "Content-Type": "application/x-www-form-urlencoded",
+                "Accept": "application/json",
+            },
+        )
+        try:
+            payload = response.json() if response.text else {}
+        except Exception:
+            payload = {}
+
+        if response.status_code != 200:
+            msg = (payload.get("base_resp", {}) or {}).get("status_msg") or response.text
+            raise AuthError(
+                f"MiniMax OAuth error: {msg or 'unknown'}",
+                provider="minimax-oauth", code="token_exchange_failed",
+            )
+
+        status = payload.get("status")
+        if status == "error":
+            raise AuthError(
+                "MiniMax OAuth reported an error. Please try again later.",
+                provider="minimax-oauth", code="authorization_denied",
+            )
+        if status == "success":
+            if not all(payload.get(k) for k in ("access_token", "refresh_token", "expired_in")):
+                raise AuthError(
+                    "MiniMax OAuth success payload missing required token fields.",
+                    provider="minimax-oauth", code="token_incomplete",
+                )
+            return payload
+        # "pending" or any other status -> keep polling
+        _time.sleep(interval)
+
+    raise AuthError(
+        "MiniMax OAuth timed out before authorization completed.",
+        provider="minimax-oauth", code="timeout",
+    )
+
+
+def _minimax_save_auth_state(auth_state: Dict[str, Any]) -> None:
+    """Persist MiniMax OAuth state to Hermes auth store (~/.hermes/auth.json)."""
+    with _auth_store_lock():
+        auth_store = _load_auth_store()
+        _save_provider_state(auth_store, "minimax-oauth", auth_state)
+        _save_auth_store(auth_store)
+
+
+def _minimax_oauth_login(
+    *, region: str = "global", open_browser: bool = True,
+    timeout_seconds: float = 15.0,
+) -> Dict[str, Any]:
+    """Run MiniMax OAuth flow, persist tokens, return auth state dict."""
+    pconfig = PROVIDER_REGISTRY["minimax-oauth"]
+    if region == "cn":
+        portal_base_url = pconfig.extra["cn_portal_base_url"]
+        inference_base_url = pconfig.extra["cn_inference_base_url"]
+    else:
+        portal_base_url = pconfig.portal_base_url
+        inference_base_url = pconfig.inference_base_url
+
+    verifier, challenge, state = _minimax_pkce_pair()
+
+    if _is_remote_session():
+        open_browser = False
+
+    print(f"Starting Hermes login via MiniMax ({region}) OAuth...")
+    print(f"Portal: {portal_base_url}")
+
+    with httpx.Client(timeout=httpx.Timeout(timeout_seconds),
+                      headers={"Accept": "application/json"}) as client:
+        code_data = _minimax_request_user_code(
+            client, portal_base_url=portal_base_url,
+            client_id=pconfig.client_id,
+            code_challenge=challenge, state=state,
+        )
+        verification_url = str(code_data["verification_uri"])
+        user_code = str(code_data["user_code"])
+
+        print()
+        print("To continue:")
+        print(f"  1. Open: {verification_url}")
+        print(f"  2. If prompted, enter code: {user_code}")
+        if open_browser:
+            if webbrowser.open(verification_url):
+                print("  (Opened browser for verification)")
+            else:
+                print("  Could not open browser automatically -- use the URL above.")
+
+        interval_raw = code_data.get("interval")
+        interval_ms = int(interval_raw) if interval_raw is not None else None
+        print("Waiting for approval...")
+
+        token_data = _minimax_poll_token(
+            client, portal_base_url=portal_base_url,
+            client_id=pconfig.client_id,
+            user_code=user_code, code_verifier=verifier,
+            expired_in=int(code_data["expired_in"]),
+            interval_ms=interval_ms,
+        )
+
+    now = datetime.now(timezone.utc)
+    expires_in_s = int(token_data["expired_in"])
+    expires_at = now.timestamp() + expires_in_s
+
+    auth_state = {
+        "provider": "minimax-oauth",
+        "region": region,
+        "portal_base_url": portal_base_url,
+        "inference_base_url": inference_base_url,
+        "client_id": pconfig.client_id,
+        "scope": MINIMAX_OAUTH_SCOPE,
+        "token_type": token_data.get("token_type", "Bearer"),
+        "access_token": token_data["access_token"],
+        "refresh_token": token_data["refresh_token"],
+        "resource_url": token_data.get("resource_url"),
+        "obtained_at": now.isoformat(),
+        "expires_at": datetime.fromtimestamp(expires_at, tz=timezone.utc).isoformat(),
+        "expires_in": expires_in_s,
+    }
+
+    _minimax_save_auth_state(auth_state)
+    print("\u2713 MiniMax OAuth login successful.")
+    if msg := token_data.get("notification_message"):
+        print(f"Note from MiniMax: {msg}")
+    return auth_state
+
+
+def _refresh_minimax_oauth_state(
+    state: Dict[str, Any], *, timeout_seconds: float = 15.0,
+    force: bool = False,
+) -> Dict[str, Any]:
+    """Refresh MiniMax OAuth access token if close to expiry (or forced)."""
+    if not state.get("refresh_token"):
+        raise AuthError(
+            "MiniMax OAuth state has no refresh_token; please re-login.",
+            provider="minimax-oauth", code="no_refresh_token", relogin_required=True,
+        )
+    try:
+        expires_at = datetime.fromisoformat(state.get("expires_at", "")).timestamp()
+    except Exception:
+        expires_at = 0.0
+    now = time.time()
+    if not force and (expires_at - now) > MINIMAX_OAUTH_REFRESH_SKEW_SECONDS:
+        return state
+
+    portal_base_url = state["portal_base_url"]
+    with httpx.Client(timeout=httpx.Timeout(timeout_seconds)) as client:
+        response = client.post(
+            f"{portal_base_url}/oauth/token",
+            data={
+                "grant_type": "refresh_token",
+                "client_id": state["client_id"],
+                "refresh_token": state["refresh_token"],
+            },
+            headers={
+                "Content-Type": "application/x-www-form-urlencoded",
+                "Accept": "application/json",
+            },
+        )
+    if response.status_code != 200:
+        body = response.text.lower()
+        relogin = any(m in body for m in
+                      ("invalid_grant", "refresh_token_reused", "invalid_refresh_token"))
+        raise AuthError(
+            f"MiniMax OAuth refresh failed: {response.text or response.reason_phrase}",
+            provider="minimax-oauth", code="refresh_failed",
+            relogin_required=relogin,
+        )
+    payload = response.json()
+    if payload.get("status") != "success":
+        raise AuthError(
+            "MiniMax OAuth refresh did not return success.",
+            provider="minimax-oauth", code="refresh_failed",
+            relogin_required=True,
+        )
+    now_dt = datetime.now(timezone.utc)
+    expires_in_s = int(payload["expired_in"])
+    new_state = dict(state)
+    new_state.update({
+        "access_token": payload["access_token"],
+        "refresh_token": payload.get("refresh_token", state["refresh_token"]),
+        "obtained_at": now_dt.isoformat(),
+        "expires_at": datetime.fromtimestamp(now_dt.timestamp() + expires_in_s,
+                                             tz=timezone.utc).isoformat(),
+        "expires_in": expires_in_s,
+    })
+    _minimax_save_auth_state(new_state)
+    return new_state
+
+
+def resolve_minimax_oauth_runtime_credentials(
+    *, min_token_ttl_seconds: int = MINIMAX_OAUTH_REFRESH_SKEW_SECONDS,
+) -> Dict[str, Any]:
+    """Return {provider, api_key, base_url, source} for minimax-oauth."""
+    state = get_provider_auth_state("minimax-oauth")
+    if not state or not state.get("access_token"):
+        raise AuthError(
+            "Not logged into MiniMax OAuth. Run `hermes model` and select "
+            "MiniMax (OAuth).",
+            provider="minimax-oauth", code="not_logged_in", relogin_required=True,
+        )
+    state = _refresh_minimax_oauth_state(state)
+    return {
+        "provider": "minimax-oauth",
+        "api_key": state["access_token"],
+        "base_url": state["inference_base_url"].rstrip("/"),
+        "source": "oauth",
+    }
+
+
+def get_minimax_oauth_auth_status() -> Dict[str, Any]:
+    """Return auth status dict for MiniMax OAuth provider."""
+    state = get_provider_auth_state("minimax-oauth")
+    if not state or not state.get("access_token"):
+        return {"logged_in": False, "provider": "minimax-oauth"}
+    try:
+        expires_at = datetime.fromisoformat(state.get("expires_at", "")).timestamp()
+        token_valid = (expires_at - time.time()) > 0
+    except Exception:
+        token_valid = bool(state.get("access_token"))
+    return {
+        "logged_in": token_valid,
+        "provider": "minimax-oauth",
+        "region": state.get("region", "global"),
+        "expires_at": state.get("expires_at"),
+    }
+
+
+def _login_minimax_oauth(args, pconfig: ProviderConfig) -> None:
+    """CLI entry for MiniMax OAuth login."""
+    region = getattr(args, "region", None) or "global"
+    open_browser = not getattr(args, "no_browser", False)
+    timeout = getattr(args, "timeout", None) or 15.0
+    try:
+        _minimax_oauth_login(
+            region=region, open_browser=open_browser, timeout_seconds=timeout,
+        )
+    except AuthError as exc:
+        print(format_auth_error(exc))
+        raise SystemExit(1)
+
+
 def _nous_device_code_login(
    *,
    portal_base_url: Optional[str] = None,
@@ -33,7 +33,7 @@ from hermes_constants import OPENROUTER_BASE_URL


 # Providers that support OAuth login in addition to API keys.
-_OAUTH_CAPABLE_PROVIDERS = {"anthropic", "nous", "openai-codex", "qwen-oauth", "google-gemini-cli"}
+_OAUTH_CAPABLE_PROVIDERS = {"anthropic", "nous", "openai-codex", "qwen-oauth", "google-gemini-cli", "minimax-oauth"}


 def _get_custom_provider_names() -> list:
@@ -170,7 +170,7 @@ def auth_add_command(args) -> None:
        if provider.startswith(CUSTOM_POOL_PREFIX):
            requested_type = AUTH_TYPE_API_KEY
        else:
-            requested_type = AUTH_TYPE_OAUTH if provider in {"anthropic", "nous", "openai-codex", "qwen-oauth", "google-gemini-cli"} else AUTH_TYPE_API_KEY
+            requested_type = AUTH_TYPE_OAUTH if provider in {"anthropic", "nous", "openai-codex", "qwen-oauth", "google-gemini-cli", "minimax-oauth"} else AUTH_TYPE_API_KEY

    pool = load_pool(provider)

@@ -333,6 +333,27 @@ def auth_add_command(args) -> None:
        print(f'Added {provider} OAuth credential #{len(pool.entries())}: "{entry.label}"')
        return

+    if provider == "minimax-oauth":
+        from hermes_cli.auth import resolve_minimax_oauth_runtime_credentials
+        creds = resolve_minimax_oauth_runtime_credentials()
+        label = (getattr(args, "label", None) or "").strip() or label_from_token(
+            creds["api_key"],
+            _oauth_default_label(provider, len(pool.entries()) + 1),
+        )
+        entry = PooledCredential(
+            provider=provider,
+            id=uuid.uuid4().hex[:6],
+            label=label,
+            auth_type=AUTH_TYPE_OAUTH,
+            priority=0,
+            source=f"{SOURCE_MANUAL}:minimax_oauth",
+            access_token=creds["api_key"],
+            base_url=creds.get("base_url"),
+        )
+        pool.add_entry(entry)
+        print(f'Added {provider} OAuth credential #{len(pool.entries())}: "{entry.label}"')
+        return
+
    raise SystemExit(f"`hermes auth add {provider}` is not implemented for auth type {requested_type} yet.")


@@ -5,6 +5,7 @@ Pure display functions with no HermesCLI state dependency.

 import json
 import logging
+import os
 import shutil
 import subprocess
 import threading
@@ -122,35 +123,36 @@ def get_available_skills() -> Dict[str, List[str]]:
 # Cache update check results for 6 hours to avoid repeated git fetches
 _UPDATE_CHECK_CACHE_SECONDS = 6 * 3600

+# Sentinel returned when we know an update exists but can't count commits
+# (e.g. nix-built hermes — no local git history to count against).
+UPDATE_AVAILABLE_NO_COUNT = -1

-def check_for_updates() -> Optional[int]:
-    """Check how many commits behind origin/main the local repo is.
+_UPSTREAM_REPO_URL = "https://github.com/NousResearch/hermes-agent.git"

-    Does a ``git fetch`` at most once every 6 hours (cached to
-    ``~/.hermes/.update_check``).  Returns the number of commits behind,
-    or ``None`` if the check fails or isn't applicable.
+
+def _check_via_rev(local_rev: str) -> Optional[int]:
+    """Compare an embedded git revision to upstream main via ls-remote.
+
+    Returns 0 if up-to-date, ``UPDATE_AVAILABLE_NO_COUNT`` if behind,
+    or ``None`` on failure.
    """
-    hermes_home = get_hermes_home()
-    repo_dir = hermes_home / "hermes-agent"
-    cache_file = hermes_home / ".update_check"
-
-    # Must be a git repo — fall back to project root for dev installs
-    if not (repo_dir / ".git").exists():
-        repo_dir = Path(__file__).parent.parent.resolve()
-    if not (repo_dir / ".git").exists():
-        return None
-
-    # Read cache
-    now = time.time()
    try:
-        if cache_file.exists():
-            cached = json.loads(cache_file.read_text())
-            if now - cached.get("ts", 0) < _UPDATE_CHECK_CACHE_SECONDS:
-                return cached.get("behind")
+        result = subprocess.run(
+            ["git", "ls-remote", _UPSTREAM_REPO_URL, "refs/heads/main"],
+            capture_output=True, text=True, timeout=10,
+        )
    except Exception:
-        pass
+        return None
+    if result.returncode != 0 or not result.stdout:
+        return None
+    upstream_rev = result.stdout.split()[0]
+    if not upstream_rev:
+        return None
+    return 0 if upstream_rev == local_rev else UPDATE_AVAILABLE_NO_COUNT

-    # Fetch latest refs (fast — only downloads ref metadata, no files)
+
+def _check_via_local_git(repo_dir: Path) -> Optional[int]:
+    """Count commits behind origin/main in a local checkout."""
    try:
        subprocess.run(
            ["git", "fetch", "origin", "--quiet"],
@@ -160,7 +162,6 @@ def check_for_updates() -> Optional[int]:
    except Exception:
        pass  # Offline or timeout — use stale refs, that's fine

-    # Count commits behind
    try:
        result = subprocess.run(
            ["git", "rev-list", "--count", "HEAD..origin/main"],
@@ -168,15 +169,52 @@ def check_for_updates() -> Optional[int]:
            cwd=str(repo_dir),
        )
        if result.returncode == 0:
-            behind = int(result.stdout.strip())
-        else:
-            behind = None
+            return int(result.stdout.strip())
    except Exception:
-        behind = None
+        pass
+    return None

-    # Write cache
+
+def check_for_updates() -> Optional[int]:
+    """Check whether a Hermes update is available.
+
+    Two paths: if ``HERMES_REVISION`` is set (nix builds embed it), compare
+    it to upstream main via ``git ls-remote``. Otherwise look for a local
+    git checkout and count commits behind ``origin/main``.
+
+    Returns the number of commits behind, ``UPDATE_AVAILABLE_NO_COUNT`` (-1)
+    if behind but the count is unknown, ``0`` if up-to-date, or ``None`` if
+    the check failed or doesn't apply. Cached for 6 hours.
+    """
+    hermes_home = get_hermes_home()
+    cache_file = hermes_home / ".update_check"
+    embedded_rev = os.environ.get("HERMES_REVISION") or None
+
+    # Read cache — invalidate if the embedded rev has changed since last check
+    now = time.time()
    try:
-        cache_file.write_text(json.dumps({"ts": now, "behind": behind}))
+        if cache_file.exists():
+            cached = json.loads(cache_file.read_text())
+            if (
+                now - cached.get("ts", 0) < _UPDATE_CHECK_CACHE_SECONDS
+                and cached.get("rev") == embedded_rev
+            ):
+                return cached.get("behind")
+    except Exception:
+        pass
+
+    if embedded_rev:
+        behind = _check_via_rev(embedded_rev)
+    else:
+        repo_dir = hermes_home / "hermes-agent"
+        if not (repo_dir / ".git").exists():
+            repo_dir = Path(__file__).parent.parent.resolve()
+        if not (repo_dir / ".git").exists():
+            return None
+        behind = _check_via_local_git(repo_dir)
+
+    try:
+        cache_file.write_text(json.dumps({"ts": now, "behind": behind, "rev": embedded_rev}))
    except Exception:
        pass

@@ -549,13 +587,23 @@ def build_welcome_banner(console: Console, model: str, cwd: str,
    # Update check — use prefetched result if available
    try:
        behind = get_update_result(timeout=0.5)
-        if behind and behind > 0:
-            from hermes_cli.config import recommended_update_command
-            commits_word = "commit" if behind == 1 else "commits"
-            right_lines.append(
-                f"[bold yellow]⚠ {behind} {commits_word} behind[/]"
-                f"[dim yellow] — run [bold]{recommended_update_command()}[/bold] to update[/]"
-            )
+        if behind is not None and behind != 0:
+            from hermes_cli.config import get_managed_update_command, recommended_update_command
+            if behind > 0:
+                commits_word = "commit" if behind == 1 else "commits"
+                right_lines.append(
+                    f"[bold yellow]⚠ {behind} {commits_word} behind[/]"
+                    f"[dim yellow] — run [bold]{recommended_update_command()}[/bold] to update[/]"
+                )
+            else:
+                # UPDATE_AVAILABLE_NO_COUNT: nix-built hermes; we know an update
+                # exists but not by how much, and we don't know how the user
+                # installed it (nix run, profile, system flake, home-manager).
+                managed_cmd = get_managed_update_command()
+                line = "[bold yellow]⚠ update available[/]"
+                if managed_cmd:
+                    line += f"[dim yellow] — run [bold]{managed_cmd}[/bold][/]"
+                right_lines.append(line)
    except Exception:
        pass  # Never break the banner over an update check

@@ -0,0 +1,138 @@
+"""Shared helpers for attaching Hermes to a local Chrome CDP port."""
+
+from __future__ import annotations
+
+import os
+import platform
+import shlex
+import shutil
+import subprocess
+
+from hermes_constants import get_hermes_home
+
+
+DEFAULT_BROWSER_CDP_PORT = 9222
+DEFAULT_BROWSER_CDP_URL = f"http://127.0.0.1:{DEFAULT_BROWSER_CDP_PORT}"
+
+_DARWIN_APPS = (
+    "/Applications/Google Chrome.app/Contents/MacOS/Google Chrome",
+    "/Applications/Chromium.app/Contents/MacOS/Chromium",
+    "/Applications/Brave Browser.app/Contents/MacOS/Brave Browser",
+    "/Applications/Microsoft Edge.app/Contents/MacOS/Microsoft Edge",
+)
+
+_WINDOWS_INSTALL_PARTS = (
+    ("Google", "Chrome", "Application", "chrome.exe"),
+    ("Chromium", "Application", "chrome.exe"),
+    ("Chromium", "Application", "chromium.exe"),
+    ("BraveSoftware", "Brave-Browser", "Application", "brave.exe"),
+    ("Microsoft", "Edge", "Application", "msedge.exe"),
+)
+
+_LINUX_BIN_NAMES = (
+    "google-chrome", "google-chrome-stable", "chromium-browser",
+    "chromium", "brave-browser", "microsoft-edge",
+)
+
+_WINDOWS_BIN_NAMES = (
+    "chrome.exe", "msedge.exe", "brave.exe", "chromium.exe",
+    "chrome", "msedge", "brave", "chromium",
+)
+
+
+def get_chrome_debug_candidates(system: str) -> list[str]:
+    candidates: list[str] = []
+    seen: set[str] = set()
+
+    def add(path: str | None) -> None:
+        if not path:
+            return
+        normalized = os.path.normcase(os.path.normpath(path))
+        if normalized in seen or not os.path.isfile(path):
+            return
+        candidates.append(path)
+        seen.add(normalized)
+
+    def add_install_paths(bases: tuple[str | None, ...]) -> None:
+        for base in filter(None, bases):
+            for parts in _WINDOWS_INSTALL_PARTS:
+                add(os.path.join(base, *parts))
+
+    if system == "Darwin":
+        for app in _DARWIN_APPS:
+            add(app)
+        return candidates
+
+    if system == "Windows":
+        for name in _WINDOWS_BIN_NAMES:
+            add(shutil.which(name))
+        add_install_paths((
+            os.environ.get("ProgramFiles"),
+            os.environ.get("ProgramFiles(x86)"),
+            os.environ.get("LOCALAPPDATA"),
+        ))
+        return candidates
+
+    for name in _LINUX_BIN_NAMES:
+        add(shutil.which(name))
+    add_install_paths(("/mnt/c/Program Files", "/mnt/c/Program Files (x86)"))
+    return candidates
+
+
+def chrome_debug_data_dir() -> str:
+    return str(get_hermes_home() / "chrome-debug")
+
+
+def _chrome_debug_args(port: int) -> list[str]:
+    return [
+        f"--remote-debugging-port={port}",
+        f"--user-data-dir={chrome_debug_data_dir()}",
+        "--no-first-run",
+        "--no-default-browser-check",
+    ]
+
+
+def manual_chrome_debug_command(port: int = DEFAULT_BROWSER_CDP_PORT, system: str | None = None) -> str | None:
+    system = system or platform.system()
+    candidates = get_chrome_debug_candidates(system)
+
+    if candidates:
+        argv = [candidates[0], *_chrome_debug_args(port)]
+        return subprocess.list2cmdline(argv) if system == "Windows" else shlex.join(argv)
+
+    if system == "Darwin":
+        data_dir = chrome_debug_data_dir()
+        return (
+            f'open -a "Google Chrome" --args --remote-debugging-port={port} '
+            f'--user-data-dir="{data_dir}" --no-first-run --no-default-browser-check'
+        )
+
+    return None
+
+
+def _detach_kwargs(system: str) -> dict:
+    if system != "Windows":
+        return {"start_new_session": True}
+    flags = getattr(subprocess, "DETACHED_PROCESS", 0) | getattr(
+        subprocess, "CREATE_NEW_PROCESS_GROUP", 0
+    )
+    return {"creationflags": flags} if flags else {}
+
+
+def try_launch_chrome_debug(port: int = DEFAULT_BROWSER_CDP_PORT, system: str | None = None) -> bool:
+    system = system or platform.system()
+    candidates = get_chrome_debug_candidates(system)
+    if not candidates:
+        return False
+
+    os.makedirs(chrome_debug_data_dir(), exist_ok=True)
+    try:
+        subprocess.Popen(
+            [candidates[0], *_chrome_debug_args(port)],
+            stdout=subprocess.DEVNULL,
+            stderr=subprocess.DEVNULL,
+            **_detach_kwargs(system),
+        )
+        return True
+    except Exception:
+        return False
@@ -19,6 +19,8 @@ from collections.abc import Callable, Mapping
 from dataclasses import dataclass
 from typing import Any

+from utils import is_truthy_value
+
 # prompt_toolkit is an optional CLI dependency — only needed for
 # SlashCommandCompleter and SlashCommandAutoSuggest.  Gateway and test
 # environments that lack it must still be able to import this module
@@ -66,6 +68,7 @@ COMMAND_REGISTRY: list[CommandDef] = [
               cli_only=True),
    CommandDef("history", "Show conversation history", "Session",
               cli_only=True),
+    CommandDef("recap", "Summarize recent activity in this session", "Session"),
    CommandDef("save", "Save the current conversation", "Session",
               cli_only=True),
    CommandDef("retry", "Retry the last message (resend to agent)", "Session"),
@@ -93,6 +96,8 @@ COMMAND_REGISTRY: list[CommandDef] = [
               aliases=("q",), args_hint="<prompt>"),
    CommandDef("steer", "Inject a message after the next tool call without interrupting", "Session",
               args_hint="<prompt>"),
+    CommandDef("goal", "Set a standing goal Hermes works on across turns until achieved", "Session",
+               args_hint="[text | pause | resume | clear | status]"),
    CommandDef("status", "Show session info", "Session"),
    CommandDef("profile", "Show active profile name and home directory", "Info"),
    CommandDef("sethome", "Set this chat as the home channel", "Session",
@@ -128,6 +133,9 @@ COMMAND_REGISTRY: list[CommandDef] = [
               subcommands=("normal", "fast", "status", "on", "off")),
    CommandDef("skin", "Show or change the display skin/theme", "Configuration",
               cli_only=True, args_hint="[name]"),
+    CommandDef("indicator", "Pick the TUI busy-indicator style", "Configuration",
+               cli_only=True, args_hint="[kaomoji|emoji|unicode|ascii]",
+               subcommands=("kaomoji", "emoji", "unicode", "ascii")),
    CommandDef("voice", "Toggle voice mode", "Configuration",
               args_hint="[on|off|tts|status]", subcommands=("on", "off", "tts", "status")),
    CommandDef("busy", "Control what Enter does while Hermes is working", "Configuration",
@@ -145,10 +153,20 @@ COMMAND_REGISTRY: list[CommandDef] = [
    CommandDef("cron", "Manage scheduled tasks", "Tools & Skills",
               cli_only=True, args_hint="[subcommand]",
               subcommands=("list", "add", "create", "edit", "pause", "resume", "run", "remove")),
+    CommandDef("curator", "Background skill maintenance (status, run, pin, archive)",
+               "Tools & Skills", args_hint="[subcommand]",
+               subcommands=("status", "run", "pause", "resume", "pin", "unpin", "restore")),
+    CommandDef("kanban", "Multi-profile collaboration board (tasks, links, comments)",
+               "Tools & Skills", args_hint="[subcommand]",
+               subcommands=("list", "ls", "show", "create", "assign", "link", "unlink",
+                            "claim", "comment", "complete", "block", "unblock", "archive",
+                            "tail", "dispatch", "context", "init", "gc")),
    CommandDef("reload", "Reload .env variables into the running session", "Tools & Skills",
               cli_only=True),
    CommandDef("reload-mcp", "Reload MCP servers from config", "Tools & Skills",
               aliases=("reload_mcp",)),
+    CommandDef("reload-skills", "Re-scan ~/.hermes/skills/ for newly installed or removed skills",
+               "Tools & Skills", aliases=("reload_skills",)),
    CommandDef("browser", "Connect browser tools to your live Chrome via CDP", "Tools & Skills",
               cli_only=True, args_hint="[connect|disconnect|status]",
               subcommands=("connect", "disconnect", "status")),
@@ -302,6 +320,7 @@ ACTIVE_SESSION_BYPASS_COMMANDS: frozenset[str] = frozenset(
        "new",
        "profile",
        "queue",
+        "recap",
        "restart",
        "status",
        "steer",
@@ -358,7 +377,7 @@ def _resolve_config_gates() -> set[str]:
            else:
                val = None
                break
-        if val:
+        if is_truthy_value(val, default=False):
            result.add(cmd.name)
    return result

@@ -821,6 +840,13 @@ def discord_skill_commands_by_category(
 _SLACK_MAX_SLASH_COMMANDS = 50
 _SLACK_NAME_LIMIT = 32
 _SLACK_INVALID_CHARS = re.compile(r"[^a-z0-9_\-]")
+_SLACK_RESERVED_COMMANDS = frozenset({
+    # Built-in Slack slash commands that cannot be registered by apps.
+    # https://slack.com/help/articles/201259356-Use-built-in-slash-commands
+    "me", "status", "away", "dnd", "shrug", "remind", "msg", "feed",
+    "who", "collapse", "expand", "leave", "join", "open", "search",
+    "topic", "mute", "pro", "shortcuts",
+})


 def _sanitize_slack_name(raw: str) -> str:
@@ -847,6 +873,10 @@ def slack_native_slashes() -> list[tuple[str, str, str]]:
    documented form (e.g. ``/background``, ``/bg``, and ``/btw`` all work).
    Plugin-registered slash commands are included too.

+    Commands whose sanitized name collides with a Slack built-in
+    (e.g. ``/status``, ``/me``, ``/join``) are silently skipped.  Users
+    can still reach them via ``/hermes <command>``.
+
    Results are clamped to Slack's 50-command limit with duplicate-name
    avoidance. ``/hermes`` is always reserved as the first entry so the
    legacy ``/hermes <subcommand>`` form keeps working for anything that
@@ -864,6 +894,8 @@ def slack_native_slashes() -> list[tuple[str, str, str]]:
        slack_name = _sanitize_slack_name(name)
        if not slack_name or slack_name in seen:
            return
+        if slack_name in _SLACK_RESERVED_COMMANDS:
+            return
        if len(entries) >= _SLACK_MAX_SLASH_COMMANDS:
            return
        # Slack description cap is 2000 chars; keep it short.
@@ -73,6 +73,8 @@ _EXTRA_ENV_KEYS = frozenset({
    "QQ_HOME_CHANNEL", "QQ_HOME_CHANNEL_NAME",  # legacy aliases (pre-rename, still read for back-compat)
    "QQ_ALLOWED_USERS", "QQ_GROUP_ALLOWED_USERS", "QQ_ALLOW_ALL_USERS", "QQ_MARKDOWN_SUPPORT",
    "QQ_STT_API_KEY", "QQ_STT_BASE_URL", "QQ_STT_MODEL",
+    "IRC_SERVER", "IRC_PORT", "IRC_NICKNAME", "IRC_CHANNEL",
+    "IRC_USE_TLS", "IRC_SERVER_PASSWORD", "IRC_NICKSERV_PASSWORD",
    "TERMINAL_ENV", "TERMINAL_SSH_KEY", "TERMINAL_SSH_PORT",
    "WHATSAPP_MODE", "WHATSAPP_ENABLED",
    "MATTERMOST_HOME_CHANNEL", "MATTERMOST_HOME_CHANNEL_NAME", "MATTERMOST_REPLY_MODE",
@@ -348,7 +350,7 @@ def ensure_hermes_home():
    else:
        home.mkdir(parents=True, exist_ok=True)
        _secure_dir(home)
-        for subdir in ("cron", "sessions", "logs", "memories"):
+        for subdir in ("cron", "sessions", "logs", "logs/curator", "memories"):
            d = home / subdir
            d.mkdir(parents=True, exist_ok=True)
            _secure_dir(d)
@@ -369,6 +371,10 @@ def _ensure_hermes_home_managed(home: Path):
                f"{d} does not exist. "
                "Run 'sudo nixos-rebuild switch' first."
            )
+    # Curator reports dir is a sub-path of logs/; create it if missing.
+    # In managed mode the activation script may not know about this subdir,
+    # so we mkdir it ourselves (it's inside an already-secured logs/ dir).
+    (home / "logs" / "curator").mkdir(parents=True, exist_ok=True)
    # Inside umask(0o007) scope — SOUL.md will be created as 0660
    _ensure_default_soul_md(home)

@@ -451,6 +457,7 @@ DEFAULT_CONFIG = {
        # remains available as a tool regardless of this setting — the routing
        # only controls how inbound user images are presented.
        "image_input_mode": "auto",
+        "disabled_toolsets": [],
    },
    
    "terminal": {
@@ -499,7 +506,8 @@ DEFAULT_CONFIG = {
        "singularity_image": "docker://nikolaik/python-nodejs:python3.11-nodejs20",
        "modal_image": "nikolaik/python-nodejs:python3.11-nodejs20",
        "daytona_image": "nikolaik/python-nodejs:python3.11-nodejs20",
-        # Container resource limits (docker, singularity, modal, daytona — ignored for local/ssh)
+        "vercel_runtime": "node24",
+        # Container resource limits (docker, singularity, modal, daytona, vercel_sandbox — ignored for local/ssh)
        "container_cpu": 1,
        "container_memory": 5120,       # MB (default 5GB)
        "container_disk": 51200,        # MB (default 50GB)
@@ -515,6 +523,16 @@ DEFAULT_CONFIG = {
        # Explicit opt-in: mount the host cwd into /workspace for Docker sessions.
        # Default off because passing host directories into a sandbox weakens isolation.
        "docker_mount_cwd_to_workspace": False,
+        # Explicit opt-in: run the Docker container as the host user's uid:gid
+        # (via `--user`).  When enabled, files written into bind-mounted dirs
+        # (docker_volumes, the persistent workspace, or the auto-mounted cwd)
+        # are owned by your host user instead of root, which avoids needing
+        # `sudo chown` after container runs. Default off to preserve behavior
+        # for images whose entrypoints expect to start as root (e.g. the
+        # bundled Hermes image, which drops to the `hermes` user via gosu).
+        # When on, SETUID/SETGID caps are omitted from the container since
+        # no privilege drop is needed.
+        "docker_run_as_host_user": False,
        # Persistent shell — keep a long-lived bash shell across execute() calls
        # so cwd/env vars/shell variables survive between commands.
        # Enabled by default for non-local backends (SSH); local is always opt-in
@@ -589,6 +607,24 @@ DEFAULT_CONFIG = {
        "max_line_length": 2000,
    },

+    # Tool loop guardrails nudge models when they repeat failed or
+    # non-progressing tool calls. Soft warnings are always-on by default;
+    # hard stops are opt-in so interactive CLI/TUI sessions keep flowing.
+    "tool_loop_guardrails": {
+        "warnings_enabled": True,
+        "hard_stop_enabled": False,
+        "warn_after": {
+            "exact_failure": 2,
+            "same_tool_failure": 3,
+            "idempotent_no_progress": 2,
+        },
+        "hard_stop_after": {
+            "exact_failure": 5,
+            "same_tool_failure": 8,
+            "idempotent_no_progress": 5,
+        },
+    },
+
    "compression": {
        "enabled": True,
        "threshold": 0.50,            # compress when context usage exceeds this ratio
@@ -696,6 +732,19 @@ DEFAULT_CONFIG = {
            "timeout": 30,
            "extra_body": {},
        },
+        # Curator — skill-usage review fork. Timeout is generous because the
+        # review pass can take several minutes on reasoning models (umbrella
+        # building over hundreds of candidate skills). "auto" = use main chat
+        # model; override via `hermes model` → auxiliary → Curator to route
+        # to a cheaper aux model (e.g. openrouter google/gemini-3-flash-preview).
+        "curator": {
+            "provider": "auto",
+            "model": "",
+            "base_url": "",
+            "api_key": "",
+            "timeout": 600,
+            "extra_body": {},
+        },
    },
    
    "display": {
@@ -715,6 +764,9 @@ DEFAULT_CONFIG = {
        "inline_diffs": True,     # Show inline diff previews for write actions (write_file, patch, skill_manage)
        "show_cost": False,       # Show $ cost in the status bar (off by default)
        "skin": "default",
+        # TUI busy indicator style: kaomoji (default), emoji, unicode (braille
+        # spinner), or ascii.  Live-swappable via `/indicator <style>`.
+        "tui_status_indicator": "kaomoji",
        "user_message_preview": {  # CLI: how many submitted user-message lines to echo back in scrollback
            "first_lines": 2,
            "last_lines": 2,
@@ -723,6 +775,14 @@ DEFAULT_CONFIG = {
        "tool_progress_command": False,  # Enable /verbose command in messaging gateway
        "tool_progress_overrides": {},  # DEPRECATED — use display.platforms instead
        "tool_preview_length": 0,  # Max chars for tool call previews (0 = no limit, show full paths/commands)
+        # Auto-delete system-notice replies (e.g. "✨ New session started!",
+        # "♻ Restarting gateway…", "⚡ Stopped…") after N seconds on platforms
+        # that support message deletion (currently Telegram; other platforms
+        # ignore and leave the message in place).  Only affects slash-command
+        # replies wrapped with gateway.platforms.base.EphemeralReply — agent
+        # responses and content messages are never touched.  Default 0
+        # (disabled) preserves prior behavior.
+        "ephemeral_system_ttl": 0,
        "platforms": {},  # Per-platform display overrides: {"telegram": {"tool_progress": "all"}, "slack": {"tool_progress": "off"}}
        # Gateway runtime-metadata footer appended to the FINAL message of a turn
        # (disabled by default to keep replies minimal). When enabled, renders
@@ -750,7 +810,7 @@ DEFAULT_CONFIG = {
    # limit (OpenAI 4096, xAI 15000, MiniMax 10000, ElevenLabs 5k-40k model-aware,
    # Gemini 5000, Edge 5000, Mistral 4000, NeuTTS/KittenTTS 2000).
    "tts": {
-        "provider": "edge",  # "edge" (free) | "elevenlabs" (premium) | "openai" | "xai" | "minimax" | "mistral" | "neutts" (local)
+        "provider": "edge",  # "edge" (free) | "elevenlabs" (premium) | "openai" | "xai" | "minimax" | "mistral" | "gemini" | "neutts" (local) | "kittentts" (local) | "piper" (local)
        "edge": {
            "voice": "en-US-AriaNeural",
            # Popular: AriaNeural, JennyNeural, AndrewNeural, BrianNeural, SoniaNeural
@@ -780,6 +840,19 @@ DEFAULT_CONFIG = {
            "model": "neuphonic/neutts-air-q4-gguf",  # HuggingFace model repo
            "device": "cpu",  # cpu, cuda, or mps
        },
+        "piper": {
+            # Voice name (e.g. "en_US-lessac-medium") downloaded on first
+            # use, OR an absolute path to a pre-downloaded .onnx file.
+            # Full voice list: https://github.com/OHF-Voice/piper1-gpl/blob/main/docs/VOICES.md
+            "voice": "en_US-lessac-medium",
+            # "voices_dir": "",        # Override voice cache dir; default = ~/.hermes/cache/piper-voices/
+            # "use_cuda": False,       # Requires onnxruntime-gpu
+            # "length_scale": 1.0,     # 2.0 = twice as slow
+            # "noise_scale": 0.667,
+            # "noise_w_scale": 0.8,
+            # "volume": 1.0,
+            # "normalize_audio": True,
+        },
    },
    
    "stt": {
@@ -879,7 +952,23 @@ DEFAULT_CONFIG = {
    # injected at the start of every API call for few-shot priming.
    # Never saved to sessions, logs, or trajectories.
    "prefill_messages_file": "",
-    
+
+    # Goals — persistent cross-turn goals (Ralph-style loop).
+    # After every turn, a lightweight judge call asks the auxiliary model
+    # whether the active /goal is satisfied by the assistant's last
+    # response. If not, Hermes feeds a continuation prompt back into the
+    # same session and keeps working until the goal is done, the turn
+    # budget is exhausted, or the user pauses/clears it. Judge failures
+    # fail OPEN (continue) so a flaky judge never wedges progress — the
+    # turn budget is the real backstop.
+    "goals": {
+        # Max continuation turns before Hermes auto-pauses the goal and
+        # asks the user to /goal resume. Protects against judge false
+        # negatives (goal actually done but judge says continue) and
+        # unbounded model spend on fuzzy / unachievable goals.
+        "max_turns": 20,
+    },
+
    # Skills — external skill directories for sharing skills across tools/agents.
    # Each path is expanded (~, ${VAR}) and resolved.  Read-only — skill creation
    # always goes to ~/.hermes/skills/.
@@ -912,6 +1001,37 @@ DEFAULT_CONFIG = {
        "guard_agent_created": False,
    },

+    # Curator — background skill maintenance.
+    #
+    # Periodically reviews AGENT-CREATED skills (never bundled or
+    # hub-installed) and keeps the collection tidy: marks long-unused skills
+    # as stale, archives genuinely obsolete ones (archive only, never
+    # deletes), and spawns a forked aux-model agent to consolidate overlaps
+    # and patch drift. Runs inactivity-triggered from session start — no
+    # cron daemon.
+    #
+    # See `hermes curator status` for the last run summary.
+    "curator": {
+        "enabled": True,
+        # How long to wait between curator runs (hours).  Default: 7 days.
+        "interval_hours": 24 * 7,
+        # Only run when the agent has been idle at least this long (hours).
+        "min_idle_hours": 2,
+        # Mark a skill as "stale" after this many days without use.
+        "stale_after_days": 30,
+        # Archive a skill (move to skills/.archive/) after this many days
+        # without use. Archived skills are recoverable — no auto-deletion.
+        "archive_after_days": 90,
+        # Pre-run backup: before every real curator pass (dry-run is
+        # skipped), snapshot ~/.hermes/skills/ into
+        # ~/.hermes/skills/.curator_backups/<utc-iso>/skills.tar.gz so the
+        # user can roll back with `hermes curator rollback`.
+        "backup": {
+            "enabled": True,
+            "keep": 5,  # retain last N regular snapshots
+        },
+    },
+
    # Honcho AI-native memory -- reads ~/.honcho/config.json as single source of truth.
    # This section is only needed for hermes-specific overrides; everything else
    # (apiKey, workspace, peerName, sessions, enabled) comes from the global config.
@@ -975,6 +1095,14 @@ DEFAULT_CONFIG = {
        "mode": "manual",
        "timeout": 60,
        "cron_mode": "deny",
+        # When true, /reload-mcp asks the user to confirm before rebuilding
+        # the MCP tool set for the active session.  Reloading invalidates
+        # the provider prompt cache (tool schemas are baked into the system
+        # prompt), so the next message re-sends full input tokens — this can
+        # be expensive on long-context or high-reasoning models.  Users click
+        # "Always Approve" to silence the prompt permanently; that flips
+        # this key to false.
+        "mcp_reload_confirm": True,
    },

    # Permanently allowed dangerous command patterns (added via "always" approval)
@@ -1027,6 +1155,24 @@ DEFAULT_CONFIG = {
        "max_parallel_jobs": None,
    },

+    # Kanban multi-agent coordination — controls the dispatcher loop that
+    # spawns workers for ready tasks. The dispatcher ticks every N seconds
+    # (default 60), reclaims stale claims, promotes dependency-satisfied
+    # todos to ready, and fires `hermes -p <assignee> chat -q ...` for
+    # each claimable ready task. One dispatcher per profile is sufficient;
+    # running more than one on the same kanban.db will race for claims.
+    "kanban": {
+        # Run the dispatcher inside the gateway process. On by default —
+        # the cost is ~300µs every `dispatch_interval_seconds` when idle,
+        # and gateway is the supervisor users already have. Set to false
+        # only if you run the dispatcher as a separate systemd unit or
+        # don't want the gateway to spawn workers.
+        "dispatch_in_gateway": True,
+        # Seconds between dispatcher ticks (idle or not). Lower = snappier
+        # pickup of newly-ready tasks; higher = less SQL pressure.
+        "dispatch_interval_seconds": 60,
+    },
+
    # execute_code settings — controls the tool used for programmatic tool calls.
    "code_execution": {
        # Execution mode:
@@ -1128,7 +1274,7 @@ DEFAULT_CONFIG = {
    },

    # Config schema version - bump this when adding new required fields
-    "_config_version": 22,
+    "_config_version": 23,
 }

 # =============================================================================
@@ -2033,6 +2179,43 @@ OPTIONAL_ENV_VARS = {
        "prompt": "QQ Sandbox Mode",
        "category": "messaging",
    },
+    "IRC_SERVER": {
+        "description": "IRC server hostname (e.g. irc.libera.chat)",
+        "prompt": "IRC server",
+        "url": None,
+        "password": False,
+        "category": "messaging",
+    },
+    "IRC_CHANNEL": {
+        "description": "IRC channel to join (e.g. #hermes)",
+        "prompt": "IRC channel",
+        "url": None,
+        "password": False,
+        "category": "messaging",
+    },
+    "IRC_NICKNAME": {
+        "description": "Bot nickname on IRC (default: hermes-bot)",
+        "prompt": "IRC nickname",
+        "url": None,
+        "password": False,
+        "category": "messaging",
+    },
+    "IRC_SERVER_PASSWORD": {
+        "description": "IRC server password (if required)",
+        "prompt": "IRC server password",
+        "url": None,
+        "password": True,
+        "category": "messaging",
+        "advanced": True,
+    },
+    "IRC_NICKSERV_PASSWORD": {
+        "description": "NickServ password for nick identification",
+        "prompt": "NickServ password",
+        "url": None,
+        "password": True,
+        "category": "messaging",
+        "advanced": True,
+    },
    "GATEWAY_ALLOW_ALL_USERS": {
        "description": "Allow all users to interact with messaging bots (true/false). Default: false.",
        "prompt": "Allow all users (true/false)",
@@ -2195,19 +2378,55 @@ def get_missing_env_vars(required_only: bool = False) -> List[Dict[str, Any]]:
    return missing


-def _set_nested(config: dict, dotted_key: str, value):
+def _set_nested(config, dotted_key: str, value):
    """Set a value at an arbitrarily nested dotted key path.

-    Creates intermediate dicts as needed, e.g. ``_set_nested(c, "a.b.c", 1)``
-    ensures ``c["a"]["b"]["c"] == 1``.
+    Supports both dict and list navigation:
+      _set_nested(c, "a.b.c", 1)     → c["a"]["b"]["c"] = 1
+      _set_nested(c, "a.0.b", 1)     → c["a"][0]["b"] = 1
+      _set_nested(c, "providers.1", "x") → c["providers"][1] = "x"
+
+    Intermediate dicts are created on demand.  List indices are parsed
+    from numeric path segments; the referenced index must already exist
+    (we do not grow lists — the user is navigating into structure they
+    wrote themselves).  If a segment targets a non-container leaf
+    (scalar), the leaf is replaced with a fresh dict so the write can
+    proceed — this preserves the pre-existing behavior for bare scalar
+    overrides (e.g. setting ``a.b.c`` where ``a.b`` was previously a
+    string).
+
+    Guards against #17876: before this fix the code unconditionally
+    replaced any non-dict value (including lists) with ``{}``, silently
+    destroying list-typed config like ``custom_providers`` whenever a
+    caller used an indexed path.
    """
    parts = dotted_key.split(".")
    current = config
    for part in parts[:-1]:
-        if part not in current or not isinstance(current.get(part), dict):
-            current[part] = {}
-        current = current[part]
-    current[parts[-1]] = value
+        if isinstance(current, list):
+            try:
+                idx = int(part)
+            except (TypeError, ValueError):
+                raise TypeError(
+                    f"Cannot navigate into list at key {dotted_key!r}: "
+                    f"segment {part!r} is not a numeric index"
+                )
+            current = current[idx]
+        elif isinstance(current, dict):
+            existing = current.get(part)
+            # Preserve dicts and lists; replace missing/scalar with a fresh dict.
+            if part not in current or not isinstance(existing, (dict, list)):
+                current[part] = {}
+            current = current[part]
+        else:
+            raise TypeError(
+                f"Cannot navigate into {type(current).__name__} at key {dotted_key!r}"
+            )
+    last = parts[-1]
+    if isinstance(current, list):
+        current[int(last)] = value
+    else:
+        current[last] = value


 def get_missing_config_fields() -> List[Dict[str, Any]]:
@@ -2250,7 +2469,17 @@ def get_missing_skill_config_vars() -> List[Dict[str, Any]]:
    except Exception:
        return []

-    all_vars = discover_all_skill_config_vars()
+    try:
+        all_vars = discover_all_skill_config_vars()
+    except Exception as e:
+        # A malformed SKILL.md, unreadable external skill dir, or similar
+        # should never break `hermes update`.  Skill-config prompting is a
+        # post-migration nicety, not a blocker.
+        import logging
+        logging.getLogger(__name__).debug(
+            "discover_all_skill_config_vars failed: %s", e
+        )
+        return []
    if not all_vars:
        return []

@@ -3128,6 +3357,90 @@ def migrate_config(interactive: bool = True, quiet: bool = False) -> Dict[str, A
                        "Use `hermes plugins enable <name>` to activate."
                    )

+    # ── Version 22 → 23: seed curator defaults + create logs/curator/ ──
+    # The curator (background skill maintenance) was added in PR #16049, but
+    # existing configs from before that PR (or before the April 2026
+    # unification under `auxiliary.curator`) never wrote the curator section
+    # to disk. The runtime deep-merge in `load_config()` fills defaults at
+    # read time, so the curator *functions*; but users can't see/edit the
+    # settings in their `config.yaml`, and `hermes curator status` has no
+    # stable logs dir to point at until the first run mkdir's it.
+    #
+    # This migration:
+    #   1. Writes the `curator` top-level section to config.yaml (enabled,
+    #      interval_hours, min_idle_hours, stale_after_days, archive_after_days)
+    #      — only keys the user hasn't already overridden.
+    #   2. Writes the `auxiliary.curator` aux-task slot (provider, model,
+    #      base_url, api_key, timeout, extra_body) — canonical slot for
+    #      routing the curator fork to a cheaper aux model.
+    #   3. Creates `~/.hermes/logs/curator/` if missing (belt-and-suspenders
+    #      on top of ensure_hermes_home() — old profiles that predate this
+    #      migration still benefit).
+    if current_ver < 23:
+        try:
+            curator_dir = get_hermes_home() / "logs" / "curator"
+            curator_dir.mkdir(parents=True, exist_ok=True)
+        except Exception as e:
+            results["warnings"].append(f"Could not create {curator_dir}: {e}")
+
+        config = read_raw_config()
+        touched = False
+
+        # (1) Top-level curator section — only add missing keys
+        _curator_defaults = DEFAULT_CONFIG.get("curator", {})
+        raw_curator = config.get("curator")
+        if not isinstance(raw_curator, dict):
+            raw_curator = {}
+        added_curator: List[str] = []
+        for k, v in _curator_defaults.items():
+            if k not in raw_curator:
+                raw_curator[k] = copy.deepcopy(v)
+                added_curator.append(k)
+        if added_curator:
+            config["curator"] = raw_curator
+            touched = True
+
+        # (2) auxiliary.curator task slot
+        _aux_curator_defaults = (
+            DEFAULT_CONFIG.get("auxiliary", {}).get("curator", {})
+        )
+        raw_aux = config.get("auxiliary")
+        if not isinstance(raw_aux, dict):
+            raw_aux = {}
+        raw_aux_curator = raw_aux.get("curator")
+        if not isinstance(raw_aux_curator, dict):
+            raw_aux_curator = {}
+        added_aux: List[str] = []
+        for k, v in _aux_curator_defaults.items():
+            if k not in raw_aux_curator:
+                raw_aux_curator[k] = copy.deepcopy(v)
+                added_aux.append(k)
+        if added_aux:
+            raw_aux["curator"] = raw_aux_curator
+            config["auxiliary"] = raw_aux
+            touched = True
+
+        if touched:
+            save_config(config)
+            if added_curator:
+                results["config_added"].append(
+                    f"curator ({len(added_curator)} default key(s))"
+                )
+                if not quiet:
+                    print(
+                        "  ✓ Seeded curator defaults in config.yaml: "
+                        f"{', '.join(added_curator)}"
+                    )
+            if added_aux:
+                results["config_added"].append(
+                    f"auxiliary.curator ({len(added_aux)} default key(s))"
+                )
+                if not quiet:
+                    print(
+                        "  ✓ Seeded auxiliary.curator defaults in config.yaml: "
+                        f"{', '.join(added_aux)}"
+                    )
+
    if current_ver < latest_ver and not quiet:
        print(f"Config version: {current_ver} → {latest_ver}")
    
@@ -3400,17 +3713,17 @@ def _preserve_env_ref_templates(current, raw, loaded_expanded=None):


 def _normalize_root_model_keys(config: Dict[str, Any]) -> Dict[str, Any]:
-    """Move stale root-level provider/base_url into model section.
+    """Move stale root-level provider/base_url/context_length into model section.

-    Some users (or older code) placed ``provider:`` and ``base_url:`` at the
-    config root instead of inside ``model:``.  These root-level keys are only
-    used as a fallback when the corresponding ``model.*`` key is empty — they
-    never override an existing ``model.provider`` or ``model.base_url``.
+    Some users (or older code) placed ``provider:``, ``base_url:``, or
+    ``context_length:`` at the config root instead of inside ``model:``.
+    These root-level keys are only used as a fallback when the corresponding
+    ``model.*`` key is empty — they never override an existing value.
    After migration the root-level keys are removed so they can't cause
    confusion on subsequent loads.
    """
    # Only act if there are root-level keys to migrate
-    has_root = any(config.get(k) for k in ("provider", "base_url"))
+    has_root = any(config.get(k) for k in ("provider", "base_url", "context_length"))
    if not has_root:
        return config

@@ -3420,7 +3733,7 @@ def _normalize_root_model_keys(config: Dict[str, Any]) -> Dict[str, Any]:
        model = {"default": model} if model else {}
        config["model"] = model

-    for key in ("provider", "base_url"):
+    for key in ("provider", "base_url", "context_length"):
        root_val = config.get(key)
        if root_val and not model.get(key):
            model[key] = root_val
@@ -3445,6 +3758,52 @@ def _normalize_max_turns_config(config: Dict[str, Any]) -> Dict[str, Any]:
    return config


+def cfg_get(cfg: Optional[Dict[str, Any]], *keys: str, default: Any = None) -> Any:
+    """Traverse nested dict keys safely, returning ``default`` on any miss.
+
+    Canonical helper for the ``cfg.get("X", {}).get("Y", default)`` pattern
+    that appears 50+ times across the codebase. Handles three common gotchas
+    in one place:
+
+      1. Missing intermediate keys (returns ``default``, no KeyError).
+      2. An intermediate value that's not a dict (e.g. a user wrote a string
+         where a section was expected). Returns ``default`` instead of
+         AttributeError on ``.get()``.
+      3. ``cfg is None`` (callers sometimes pass ``load_config() or None``).
+
+    Named ``cfg_get`` rather than ``cfg_path`` to avoid shadowing the
+    ubiquitous ``cfg_path = _hermes_home / "config.yaml"`` local variable
+    that appears in gateway/run.py, cron/scheduler.py, main.py, etc.
+
+    Explicit ``None`` values are returned as-is (matches ``dict.get(key,
+    default)`` semantics — ``default`` is only returned when the key is
+    *absent*, not when it's present but set to ``None``).
+
+    Examples:
+        >>> cfg_get({"agent": {"reasoning_effort": "high"}}, "agent", "reasoning_effort")
+        'high'
+        >>> cfg_get({}, "agent", "reasoning_effort", default="medium")
+        'medium'
+        >>> cfg_get({"agent": "oops_a_string"}, "agent", "reasoning_effort", default="low")
+        'low'
+        >>> cfg_get(None, "anything", default=42)
+        42
+        >>> cfg_get({"a": {"b": None}}, "a", "b", default="def")  # explicit None preserved
+        >>> cfg_get({"a": {"b": False}}, "a", "b", default=True)  # falsy values preserved
+        False
+    """
+    if not isinstance(cfg, dict):
+        return default
+    node: Any = cfg
+    for key in keys:
+        if not isinstance(node, dict):
+            return default
+        if key not in node:
+            return default
+        node = node[key]
+    return node
+
+

 def read_raw_config() -> Dict[str, Any]:
    """Read ~/.hermes/config.yaml as-is, without merging defaults or migrating.
@@ -3707,18 +4066,27 @@ def _sanitize_env_lines(lines: list) -> list:

        # Detect concatenated KEY=VALUE pairs on one line.
        # Search for known KEY= patterns at any position in the line.
-        split_positions = []
+        # We collect full needle ranges so we can drop matches that are
+        # fully contained within a longer overlapping needle. Without this,
+        # suffix collisions corrupt the file: e.g. LM_API_KEY= inside
+        # GLM_API_KEY= would otherwise split the line into "G\nLM_API_KEY=...".
+        match_ranges: list[tuple[int, int]] = []
        for key_name in known_keys:
            needle = key_name + "="
            idx = stripped.find(needle)
            while idx >= 0:
-                split_positions.append(idx)
+                match_ranges.append((idx, idx + len(needle)))
                idx = stripped.find(needle, idx + len(needle))

+        split_positions = sorted({
+            s for s, e in match_ranges
+            if not any(
+                s2 <= s and e2 >= e and (s2, e2) != (s, e)
+                for s2, e2 in match_ranges
+            )
+        })
+
        if len(split_positions) > 1:
-            split_positions.sort()
-            # Deduplicate (shouldn't happen, but be safe)
-            split_positions = sorted(set(split_positions))
            for i, pos in enumerate(split_positions):
                end = split_positions[i + 1] if i + 1 < len(split_positions) else len(stripped)
                part = stripped[pos:end].strip()
@@ -4010,12 +4378,13 @@ def get_env_value(key: str) -> Optional[str]:
 # =============================================================================

 def redact_key(key: str) -> str:
-    """Redact an API key for display."""
-    if not key:
-        return color("(not set)", Colors.DIM)
-    if len(key) < 12:
-        return "***"
-    return key[:4] + "..." + key[-4:]
+    """Redact an API key for display.
+
+    Thin wrapper over :func:`agent.redact.mask_secret` — preserves the
+    "(not set)" placeholder in dim color for the empty case.
+    """
+    from agent.redact import mask_secret
+    return mask_secret(key, empty=color("(not set)", Colors.DIM))


 def show_config():
@@ -4095,6 +4464,9 @@ def show_config():
        print(f"  Daytona image: {terminal.get('daytona_image', 'nikolaik/python-nodejs:python3.11-nodejs20')}")
        daytona_key = get_env_value('DAYTONA_API_KEY')
        print(f"  API key:      {'configured' if daytona_key else '(not set)'}")
+    elif terminal.get('backend') == 'vercel_sandbox':
+        print(f"  Vercel runtime: {terminal.get('vercel_runtime', 'node24')}")
+        print(f"  Vercel auth:    {'configured' if get_env_value('VERCEL_OIDC_TOKEN') or (get_env_value('VERCEL_TOKEN') and get_env_value('VERCEL_PROJECT_ID') and get_env_value('VERCEL_TEAM_ID')) else '(not set)'}")
    elif terminal.get('backend') == 'ssh':
        ssh_host = get_env_value('TERMINAL_SSH_HOST')
        ssh_user = get_env_value('TERMINAL_SSH_USER')
@@ -4252,15 +4624,11 @@ def set_config_value(key: str, value: str):
        except Exception:
            user_config = {}
    
-    # Handle nested keys (e.g., "tts.provider")
-    parts = key.split('.')
-    current = user_config
-    
-    for part in parts[:-1]:
-        if part not in current or not isinstance(current.get(part), dict):
-            current[part] = {}
-        current = current[part]
-    
+    # Handle nested keys (e.g., "tts.provider") including numeric list
+    # indices (e.g., "custom_providers.0.api_key").  Delegates to
+    # _set_nested which preserves list-typed nodes; before #17876 the
+    # inline navigation here silently overwrote lists with dicts.
+
    # Convert value to appropriate type
    if value.lower() in ('true', 'yes', 'on'):
        value = True
@@ -4270,8 +4638,8 @@ def set_config_value(key: str, value: str):
        value = int(value)
    elif value.replace('.', '', 1).isdigit():
        value = float(value)
-    
-    current[parts[-1]] = value
+
+    _set_nested(user_config, key, value)
    
    # Write only user config back (not the full merged defaults)
    ensure_hermes_home()
@@ -4287,7 +4655,9 @@ def set_config_value(key: str, value: str):
        "terminal.singularity_image": "TERMINAL_SINGULARITY_IMAGE",
        "terminal.modal_image": "TERMINAL_MODAL_IMAGE",
        "terminal.daytona_image": "TERMINAL_DAYTONA_IMAGE",
+        "terminal.vercel_runtime": "TERMINAL_VERCEL_RUNTIME",
        "terminal.docker_mount_cwd_to_workspace": "TERMINAL_DOCKER_MOUNT_CWD_TO_WORKSPACE",
+        "terminal.docker_run_as_host_user": "TERMINAL_DOCKER_RUN_AS_HOST_USER",
        "terminal.cwd": "TERMINAL_CWD",
        "terminal.timeout": "TERMINAL_TIMEOUT",
        "terminal.sandbox_dir": "TERMINAL_SANDBOX_DIR",
@@ -0,0 +1,418 @@
+"""CLI subcommand: `hermes curator <subcommand>`.
+
+Thin shell around agent/curator.py and tools/skill_usage.py. Renders a status
+table, triggers a run, pauses/resumes, and pins/unpins skills.
+
+This module intentionally has no side effects at import time — main.py wires
+the argparse subparsers on demand.
+"""
+
+from __future__ import annotations
+
+import argparse
+import sys
+from datetime import datetime, timezone
+from typing import Optional
+
+
+def _fmt_ts(ts: Optional[str]) -> str:
+    if not ts:
+        return "never"
+    try:
+        dt = datetime.fromisoformat(ts)
+    except (TypeError, ValueError):
+        return str(ts)
+    if dt.tzinfo is None:
+        dt = dt.replace(tzinfo=timezone.utc)
+    delta = datetime.now(timezone.utc) - dt
+    secs = int(delta.total_seconds())
+    if secs < 60:
+        return f"{secs}s ago"
+    if secs < 3600:
+        return f"{secs // 60}m ago"
+    if secs < 86400:
+        return f"{secs // 3600}h ago"
+    return f"{secs // 86400}d ago"
+
+
+def _cmd_status(args) -> int:
+    from agent import curator
+    from tools import skill_usage
+
+    state = curator.load_state()
+    enabled = curator.is_enabled()
+    paused = state.get("paused", False)
+    last_run = state.get("last_run_at")
+    summary = state.get("last_run_summary") or "(none)"
+    runs = state.get("run_count", 0)
+
+    status_line = (
+        "ENABLED" if enabled and not paused else
+        "PAUSED" if paused else
+        "DISABLED"
+    )
+    print(f"curator: {status_line}")
+    print(f"  runs:           {runs}")
+    print(f"  last run:       {_fmt_ts(last_run)}")
+    print(f"  last summary:   {summary}")
+    _report = state.get("last_report_path")
+    if _report:
+        print(f"  last report:    {_report}")
+    _ih = curator.get_interval_hours()
+    _interval_label = (
+        f"{_ih // 24}d" if _ih % 24 == 0 and _ih >= 24
+        else f"{_ih}h"
+    )
+    print(f"  interval:       every {_interval_label}")
+    print(f"  stale after:    {curator.get_stale_after_days()}d unused")
+    print(f"  archive after:  {curator.get_archive_after_days()}d unused")
+
+    rows = skill_usage.agent_created_report()
+    if not rows:
+        print("\nno agent-created skills")
+        return 0
+
+    by_state = {"active": [], "stale": [], "archived": []}
+    pinned = []
+    for r in rows:
+        state_name = r.get("state", "active")
+        by_state.setdefault(state_name, []).append(r)
+        if r.get("pinned"):
+            pinned.append(r["name"])
+
+    print(f"\nagent-created skills: {len(rows)} total")
+    for state_name in ("active", "stale", "archived"):
+        bucket = by_state.get(state_name, [])
+        print(f"  {state_name:10s} {len(bucket)}")
+
+    if pinned:
+        print(f"\npinned ({len(pinned)}): {', '.join(pinned)}")
+
+    # Show top 5 least-recently-active skills. Views and edits are activity too:
+    # curator should not report a skill as "never used" right after skill_view()
+    # or skill_manage() touched it.
+    active = sorted(
+        by_state.get("active", []),
+        key=lambda r: r.get("last_activity_at") or r.get("created_at") or "",
+    )[:5]
+    if active:
+        print("\nleast recently active (top 5):")
+        for r in active:
+            last = _fmt_ts(r.get("last_activity_at"))
+            print(
+                f"  {r['name']:40s}  "
+                f"activity={r.get('activity_count', 0):3d}  "
+                f"use={r.get('use_count', 0):3d}  "
+                f"view={r.get('view_count', 0):3d}  "
+                f"patches={r.get('patch_count', 0):3d}  "
+                f"last_activity={last}"
+            )
+
+    # Show top 5 most-active and least-active skills by activity_count
+    # (use + view + patch). This is a different signal from
+    # least-recently-active: activity_count reflects frequency,
+    # last_activity_at reflects recency. A skill touched 30 times a year
+    # ago is high-frequency but stale; a skill touched once yesterday is
+    # recent but low-frequency. Both can matter.
+    active_all = by_state.get("active", [])
+    if active_all:
+        most_active = sorted(
+            active_all,
+            key=lambda r: (r.get("activity_count") or 0, r.get("last_activity_at") or ""),
+            reverse=True,
+        )[:5]
+        if most_active and (most_active[0].get("activity_count") or 0) > 0:
+            print("\nmost active (top 5):")
+            for r in most_active:
+                last = _fmt_ts(r.get("last_activity_at"))
+                print(
+                    f"  {r['name']:40s}  "
+                    f"activity={r.get('activity_count', 0):3d}  "
+                    f"use={r.get('use_count', 0):3d}  "
+                    f"view={r.get('view_count', 0):3d}  "
+                    f"patches={r.get('patch_count', 0):3d}  "
+                    f"last_activity={last}"
+                )
+
+        least_active = sorted(
+            active_all,
+            key=lambda r: (r.get("activity_count") or 0, r.get("last_activity_at") or ""),
+        )[:5]
+        if least_active:
+            print("\nleast active (top 5):")
+            for r in least_active:
+                last = _fmt_ts(r.get("last_activity_at"))
+                print(
+                    f"  {r['name']:40s}  "
+                    f"activity={r.get('activity_count', 0):3d}  "
+                    f"use={r.get('use_count', 0):3d}  "
+                    f"view={r.get('view_count', 0):3d}  "
+                    f"patches={r.get('patch_count', 0):3d}  "
+                    f"last_activity={last}"
+                )
+
+    return 0
+
+
+def _cmd_run(args) -> int:
+    from agent import curator
+    if not curator.is_enabled():
+        print("curator: disabled via config; enable with `curator.enabled: true`")
+        return 1
+
+    dry = bool(getattr(args, "dry_run", False))
+    if dry:
+        print("curator: running DRY-RUN (report only, no mutations)...")
+    else:
+        print("curator: running review pass...")
+
+    def _on_summary(msg: str) -> None:
+        print(msg)
+
+    result = curator.run_curator_review(
+        on_summary=_on_summary,
+        synchronous=bool(args.synchronous),
+        dry_run=dry,
+    )
+    auto = result.get("auto_transitions", {})
+    if auto:
+        if dry:
+            print(
+                f"auto (preview): {auto.get('checked', 0)} candidate skill(s) "
+                "— no transitions applied in dry-run"
+            )
+        else:
+            print(
+                f"auto: checked={auto.get('checked', 0)} "
+                f"stale={auto.get('marked_stale', 0)} "
+                f"archived={auto.get('archived', 0)} "
+                f"reactivated={auto.get('reactivated', 0)}"
+            )
+    if not args.synchronous:
+        print("llm pass running in background — check `hermes curator status` later")
+    if dry:
+        print(
+            "dry-run: no changes applied. When the report lands, read it with "
+            "`hermes curator status` and run `hermes curator run` (no flag) to apply."
+        )
+    return 0
+
+
+def _cmd_pause(args) -> int:
+    from agent import curator
+    curator.set_paused(True)
+    print("curator: paused")
+    return 0
+
+
+def _cmd_resume(args) -> int:
+    from agent import curator
+    curator.set_paused(False)
+    print("curator: resumed")
+    return 0
+
+
+def _cmd_pin(args) -> int:
+    from tools import skill_usage
+    if not skill_usage.is_agent_created(args.skill):
+        print(
+            f"curator: '{args.skill}' is bundled or hub-installed — cannot pin "
+            "(only agent-created skills participate in curation)"
+        )
+        return 1
+    skill_usage.set_pinned(args.skill, True)
+    print(f"curator: pinned '{args.skill}' (will bypass auto-transitions)")
+    return 0
+
+
+def _cmd_unpin(args) -> int:
+    from tools import skill_usage
+    if not skill_usage.is_agent_created(args.skill):
+        print(
+            f"curator: '{args.skill}' is bundled or hub-installed — "
+            "there's nothing to unpin (curator only tracks agent-created skills)"
+        )
+        return 1
+    skill_usage.set_pinned(args.skill, False)
+    print(f"curator: unpinned '{args.skill}'")
+    return 0
+
+
+def _cmd_restore(args) -> int:
+    from tools import skill_usage
+    ok, msg = skill_usage.restore_skill(args.skill)
+    print(f"curator: {msg}")
+    return 0 if ok else 1
+
+
+def _cmd_backup(args) -> int:
+    """Take a manual snapshot of the skills tree. Same mechanism as the
+    automatic pre-run snapshot, just user-initiated."""
+    from agent import curator_backup
+    if not curator_backup.is_enabled():
+        print(
+            "curator: backups are disabled via config "
+            "(`curator.backup.enabled: false`); re-enable to snapshot"
+        )
+        return 1
+    reason = getattr(args, "reason", None) or "manual"
+    snap = curator_backup.snapshot_skills(reason=reason)
+    if snap is None:
+        print("curator: snapshot failed — check logs (backup disabled or IO error)")
+        return 1
+    print(f"curator: snapshot created at ~/.hermes/skills/.curator_backups/{snap.name}")
+    return 0
+
+
+def _cmd_rollback(args) -> int:
+    """Restore the skills tree from a snapshot. Defaults to newest.
+
+    ``--list`` prints available snapshots and exits. ``--id <stamp>`` picks
+    a specific one. Without ``-y``, prompts for confirmation. A safety
+    snapshot of the current tree is always taken first, so rollbacks are
+    themselves undoable.
+    """
+    from agent import curator_backup
+
+    if getattr(args, "list", False):
+        print(curator_backup.summarize_backups())
+        return 0
+
+    backup_id = getattr(args, "backup_id", None)
+    target_path = curator_backup._resolve_backup(backup_id)
+    if target_path is None:
+        rows = curator_backup.list_backups()
+        if not rows:
+            print(
+                "curator: no snapshots exist yet. Take one with "
+                "`hermes curator backup` or wait for the next curator run."
+            )
+        else:
+            print(
+                f"curator: no snapshot matching "
+                f"{'id ' + repr(backup_id) if backup_id else 'your query'}."
+            )
+            print("Available:")
+            print(curator_backup.summarize_backups())
+        return 1
+
+    manifest = curator_backup._read_manifest(target_path)
+    print(f"Rollback target: {target_path.name}")
+    if manifest:
+        print(f"  reason:      {manifest.get('reason', '?')}")
+        print(f"  created_at:  {manifest.get('created_at', '?')}")
+        print(f"  skill files: {manifest.get('skill_files', '?')}")
+    print(
+        "\nThis will replace the current ~/.hermes/skills/ tree (a safety "
+        "snapshot of the current state is taken first so this is undoable)."
+    )
+
+    if not getattr(args, "yes", False):
+        try:
+            ans = input("Proceed? [y/N] ").strip().lower()
+        except (EOFError, KeyboardInterrupt):
+            print("\ncancelled")
+            return 1
+        if ans not in ("y", "yes"):
+            print("cancelled")
+            return 1
+
+    ok, msg, _ = curator_backup.rollback(backup_id=target_path.name)
+    if ok:
+        print(f"curator: {msg}")
+        return 0
+    print(f"curator: rollback failed — {msg}")
+    return 1
+
+
+# ---------------------------------------------------------------------------
+# argparse wiring (called from hermes_cli.main)
+# ---------------------------------------------------------------------------
+
+def register_cli(parent: argparse.ArgumentParser) -> None:
+    """Attach `curator` subcommands to *parent*.
+
+    main.py calls this with the ArgumentParser returned by
+    ``subparsers.add_parser("curator", ...)``.
+    """
+    parent.set_defaults(func=lambda a: (parent.print_help(), 0)[1])
+    subs = parent.add_subparsers(dest="curator_command")
+
+    p_status = subs.add_parser("status", help="Show curator status and skill stats")
+    p_status.set_defaults(func=_cmd_status)
+
+    p_run = subs.add_parser("run", help="Trigger a curator review now")
+    p_run.add_argument(
+        "--sync", "--synchronous", dest="synchronous", action="store_true",
+        help="Wait for the LLM review pass to finish (default: background thread)",
+    )
+    p_run.add_argument(
+        "--dry-run", dest="dry_run", action="store_true",
+        help="Report only — no state changes, no archives, no consolidation "
+             "(use this to preview what curator would do)",
+    )
+    p_run.set_defaults(func=_cmd_run)
+
+    p_pause = subs.add_parser("pause", help="Pause the curator until resumed")
+    p_pause.set_defaults(func=_cmd_pause)
+
+    p_resume = subs.add_parser("resume", help="Resume a paused curator")
+    p_resume.set_defaults(func=_cmd_resume)
+
+    p_pin = subs.add_parser("pin", help="Pin a skill so the curator never auto-transitions it")
+    p_pin.add_argument("skill", help="Skill name")
+    p_pin.set_defaults(func=_cmd_pin)
+
+    p_unpin = subs.add_parser("unpin", help="Unpin a skill")
+    p_unpin.add_argument("skill", help="Skill name")
+    p_unpin.set_defaults(func=_cmd_unpin)
+
+    p_restore = subs.add_parser("restore", help="Restore an archived skill")
+    p_restore.add_argument("skill", help="Skill name")
+    p_restore.set_defaults(func=_cmd_restore)
+
+    p_backup = subs.add_parser(
+        "backup",
+        help="Take a manual tar.gz snapshot of ~/.hermes/skills/ "
+             "(curator also does this automatically before every real run)",
+    )
+    p_backup.add_argument(
+        "--reason", default=None,
+        help="Free-text label stored in manifest.json (default: 'manual')",
+    )
+    p_backup.set_defaults(func=_cmd_backup)
+
+    p_rollback = subs.add_parser(
+        "rollback",
+        help="Restore ~/.hermes/skills/ from a curator snapshot "
+             "(defaults to the newest)",
+    )
+    p_rollback.add_argument(
+        "--list", action="store_true",
+        help="List available snapshots and exit without restoring",
+    )
+    p_rollback.add_argument(
+        "--id", dest="backup_id", default=None,
+        help="Snapshot id to restore (see `--list`); default: newest",
+    )
+    p_rollback.add_argument(
+        "-y", "--yes", action="store_true",
+        help="Skip confirmation prompt",
+    )
+    p_rollback.set_defaults(func=_cmd_rollback)
+
+
+def cli_main(argv=None) -> int:
+    """Standalone entry (also usable by hermes_cli.main fallthrough)."""
+    parser = argparse.ArgumentParser(prog="hermes curator")
+    register_cli(parser)
+    args = parser.parse_args(argv)
+    fn = getattr(args, "func", None)
+    if fn is None:
+        parser.print_help()
+        return 0
+    return int(fn(args) or 0)
+
+
+if __name__ == "__main__":  # pragma: no cover
+    sys.exit(cli_main())
@@ -8,6 +8,7 @@ import os
 import sys
 import subprocess
 import shutil
+import importlib.util
 from pathlib import Path

 from hermes_cli.config import get_project_root, get_hermes_home, get_env_path
@@ -30,6 +31,7 @@ load_dotenv(PROJECT_ROOT / ".env", override=False, encoding="utf-8")

 from hermes_cli.colors import Colors, color
 from hermes_cli.models import _HERMES_USER_AGENT
+from hermes_cli.vercel_auth import describe_vercel_auth
 from hermes_constants import OPENROUTER_MODELS_URL
 from utils import base_url_host_matches

@@ -76,6 +78,14 @@ def _system_package_install_cmd(pkg: str) -> str:
    return f"sudo apt install {pkg}"


+def _safe_which(cmd: str) -> str | None:
+    """shutil.which wrapper resilient to platform monkeypatching in tests."""
+    try:
+        return shutil.which(cmd)
+    except Exception:
+        return None
+
+
 def _termux_browser_setup_steps(node_installed: bool) -> list[str]:
    steps: list[str] = []
    step = 1
@@ -293,15 +303,23 @@ def run_doctor(args):

            known_providers: set = set()
            try:
-                from hermes_cli.auth import PROVIDER_REGISTRY
+                from hermes_cli.auth import (
+                    PROVIDER_REGISTRY,
+                    resolve_provider as _resolve_auth_provider,
+                )
                known_providers = set(PROVIDER_REGISTRY.keys()) | {"openrouter", "custom", "auto"}
            except Exception:
+                _resolve_auth_provider = None
                pass
            try:
                from hermes_cli.config import get_compatible_custom_providers as _compatible_custom_providers
-                from hermes_cli.providers import resolve_provider_full as _resolve_provider_full
+                from hermes_cli.providers import (
+                    normalize_provider as _normalize_catalog_provider,
+                    resolve_provider_full as _resolve_provider_full,
+                )
            except Exception:
                _compatible_custom_providers = None
+                _normalize_catalog_provider = None
                _resolve_provider_full = None

            custom_providers = []
@@ -321,17 +339,43 @@ def run_doctor(args):
                if name:
                    known_providers.add("custom:" + name.lower().replace(" ", "-"))

-            canonical_provider = provider
+            valid_provider_ids = set(known_providers)
+            provider_ids_to_accept = {provider} if provider else set()
+            if _normalize_catalog_provider is not None:
+                for known_provider in known_providers:
+                    try:
+                        valid_provider_ids.add(_normalize_catalog_provider(known_provider))
+                    except Exception:
+                        continue
+
+            runtime_provider = provider
+            if (
+                provider
+                and _resolve_auth_provider is not None
+                and provider not in ("auto", "custom")
+            ):
+                try:
+                    runtime_provider = _resolve_auth_provider(provider)
+                    provider_ids_to_accept.add(runtime_provider)
+                except Exception:
+                    runtime_provider = provider
+
+            catalog_provider = provider
            if (
                provider
                and _resolve_provider_full is not None
                and provider not in ("auto", "custom")
            ):
                provider_def = _resolve_provider_full(provider, user_providers, custom_providers)
-                canonical_provider = provider_def.id if provider_def is not None else None
+                catalog_provider = provider_def.id if provider_def is not None else None
+                if catalog_provider is not None:
+                    provider_ids_to_accept.add(catalog_provider)

            if provider and provider != "auto":
-                if canonical_provider is None or (known_providers and canonical_provider not in known_providers):
+                if catalog_provider is None or (
+                    known_providers
+                    and not (provider_ids_to_accept & valid_provider_ids)
+                ):
                    known_list = ", ".join(sorted(known_providers)) if known_providers else "(unavailable)"
                    check_fail(
                        f"model.provider '{provider_raw}' is not a recognised provider",
@@ -344,7 +388,24 @@ def run_doctor(args):
                    )

            # Warn if model is set to a provider-prefixed name on a provider that doesn't use them
-            if default_model and "/" in default_model and canonical_provider and canonical_provider not in ("openrouter", "custom", "auto", "ai-gateway", "kilocode", "opencode-zen", "huggingface", "nous", "lmstudio"):
+            provider_for_policy = runtime_provider or catalog_provider
+            providers_accepting_vendor_slugs = {
+                "openrouter",
+                "custom",
+                "auto",
+                "ai-gateway",
+                "kilocode",
+                "opencode-zen",
+                "huggingface",
+                "lmstudio",
+                "nous",
+            }
+            if (
+                default_model
+                and "/" in default_model
+                and provider_for_policy
+                and provider_for_policy not in providers_accepting_vendor_slugs
+            ):
                check_warn(
                    f"model.default '{default_model}' uses a vendor/model slug but provider is '{provider_raw}'",
                    "(vendor-prefixed slugs belong to aggregators like openrouter)",
@@ -360,20 +421,24 @@ def run_doctor(args):
            # own env-var checks elsewhere in doctor, and get_auth_status()
            # returns a bare {logged_in: False} for anything it doesn't
            # explicitly dispatch, which would produce false positives.
-            if canonical_provider and canonical_provider not in ("auto", "custom", "openrouter"):
+            if runtime_provider and runtime_provider not in ("auto", "custom", "openrouter"):
                try:
                    from hermes_cli.auth import PROVIDER_REGISTRY, get_auth_status
-                    pconfig = PROVIDER_REGISTRY.get(canonical_provider)
+                    pconfig = PROVIDER_REGISTRY.get(runtime_provider)
                    if pconfig and getattr(pconfig, "auth_type", "") == "api_key":
-                        status = get_auth_status(canonical_provider) or {}
-                        configured = bool(status.get("configured") or status.get("logged_in") or status.get("api_key"))
+                        status = get_auth_status(runtime_provider) or {}
+                        configured = bool(
+                            status.get("configured")
+                            or status.get("logged_in")
+                            or status.get("api_key")
+                        )
                        if not configured:
                            check_fail(
-                                f"model.provider '{canonical_provider}' is set but no API key is configured",
+                                f"model.provider '{runtime_provider}' is set but no API key is configured",
                                "(check ~/.hermes/.env or run 'hermes setup')",
                            )
                            issues.append(
-                                f"No credentials found for provider '{canonical_provider}'. "
+                                f"No credentials found for provider '{runtime_provider}'. "
                                f"Run 'hermes setup' or set the provider's API key in {_DHH}/.env, "
                                f"or switch providers with 'hermes config set model.provider <name>'"
                            )
@@ -482,6 +547,7 @@ def run_doctor(args):
            get_nous_auth_status,
            get_codex_auth_status,
            get_gemini_oauth_auth_status,
+            get_minimax_oauth_auth_status,
        )

        nous_status = get_nous_auth_status()
@@ -511,10 +577,17 @@ def run_doctor(args):
            check_ok("Google Gemini OAuth", f"(logged in{suffix})")
        else:
            check_warn("Google Gemini OAuth", "(not logged in)")
+
+        minimax_status = get_minimax_oauth_auth_status()
+        if minimax_status.get("logged_in"):
+            region = minimax_status.get("region", "global")
+            check_ok("MiniMax OAuth", f"(logged in, region={region})")
+        else:
+            check_warn("MiniMax OAuth", "(not logged in)")
    except Exception as e:
        check_warn("Auth provider status", f"(could not check: {e})")

-    if shutil.which("codex"):
+    if _safe_which("codex"):
        check_ok("codex CLI")
    else:
        # Native OAuth uses Hermes' own device-code flow — the Codex CLI is
@@ -732,13 +805,13 @@ def run_doctor(args):
    print(color("◆ External Tools", Colors.CYAN, Colors.BOLD))
    
    # Git
-    if shutil.which("git"):
+    if _safe_which("git"):
        check_ok("git")
    else:
        check_warn("git not found", "(optional)")
    
    # ripgrep (optional, for faster file search)
-    if shutil.which("rg"):
+    if _safe_which("rg"):
        check_ok("ripgrep (rg)", "(faster file search)")
    else:
        check_warn("ripgrep (rg) not found", "(file search uses grep fallback)")
@@ -747,7 +820,7 @@ def run_doctor(args):
    # Docker (optional)
    terminal_env = os.getenv("TERMINAL_ENV", "local")
    if terminal_env == "docker":
-        if shutil.which("docker"):
+        if _safe_which("docker"):
            # Check if docker daemon is running
            try:
                result = subprocess.run(["docker", "info"], capture_output=True, timeout=10)
@@ -762,7 +835,7 @@ def run_doctor(args):
            check_fail("docker not found", "(required for TERMINAL_ENV=docker)")
            issues.append("Install Docker or change TERMINAL_ENV")
    else:
-        if shutil.which("docker"):
+        if _safe_which("docker"):
            check_ok("docker", "(optional)")
        else:
            if _is_termux():
@@ -808,8 +881,52 @@ def run_doctor(args):
            check_fail("daytona SDK not installed", "(pip install daytona)")
            issues.append("Install daytona SDK: pip install daytona")

+    # Vercel Sandbox (if using vercel_sandbox backend)
+    if terminal_env == "vercel_sandbox":
+        runtime = os.getenv("TERMINAL_VERCEL_RUNTIME", "node24").strip() or "node24"
+        from tools.terminal_tool import _SUPPORTED_VERCEL_RUNTIMES
+        if runtime in _SUPPORTED_VERCEL_RUNTIMES:
+            check_ok("Vercel runtime", f"({runtime})")
+        else:
+            supported = ", ".join(_SUPPORTED_VERCEL_RUNTIMES)
+            check_fail("Vercel runtime unsupported", f"({runtime}; use {supported})")
+            issues.append(f"Set TERMINAL_VERCEL_RUNTIME to one of: {supported}")
+
+        disk = os.getenv("TERMINAL_CONTAINER_DISK", "51200").strip()
+        if disk in ("", "0", "51200"):
+            check_ok("Vercel disk setting", "(uses platform default)")
+        else:
+            check_fail("Vercel custom disk unsupported", "(reset terminal.container_disk to 51200)")
+            issues.append("Vercel Sandbox does not support custom container_disk; use the shared default 51200")
+
+        if importlib.util.find_spec("vercel") is not None:
+            check_ok("vercel SDK", "(installed)")
+        else:
+            check_fail("vercel SDK not installed", "(pip install 'hermes-agent[vercel]')")
+            issues.append("Install the Vercel optional dependency: pip install 'hermes-agent[vercel]'")
+
+        auth_status = describe_vercel_auth()
+        if auth_status.ok:
+            check_ok("Vercel auth", f"({auth_status.label})")
+        elif auth_status.label.startswith("partial"):
+            check_fail("Vercel auth incomplete", f"({auth_status.label})")
+            issues.append("Set VERCEL_TOKEN, VERCEL_PROJECT_ID, and VERCEL_TEAM_ID together")
+        else:
+            check_fail("Vercel auth not configured", f"({auth_status.label})")
+            issues.append(
+                "Configure Vercel Sandbox auth with VERCEL_TOKEN, VERCEL_PROJECT_ID, and VERCEL_TEAM_ID"
+            )
+        for line in auth_status.detail_lines:
+            check_info(f"Vercel auth {line}")
+
+        persistent = os.getenv("TERMINAL_CONTAINER_PERSISTENT", "true").lower() in ("1", "true", "yes", "on")
+        if persistent:
+            check_info("Vercel persistence: snapshot filesystem only; live processes do not survive sandbox recreation")
+        else:
+            check_info("Vercel persistence: ephemeral filesystem")
+
    # Node.js + agent-browser (for browser automation tools)
-    if shutil.which("node"):
+    if _safe_which("node"):
        check_ok("Node.js")
        # Check if agent-browser is installed
        agent_browser_path = PROJECT_ROOT / "node_modules" / "agent-browser"
@@ -835,7 +952,7 @@ def run_doctor(args):
            check_warn("Node.js not found", "(optional, needed for browser tools)")
    
    # npm audit for all Node.js packages
-    if shutil.which("npm"):
+    if _safe_which("npm"):
        npm_dirs = [
            (PROJECT_ROOT, "Browser tools (agent-browser)"),
            (PROJECT_ROOT / "scripts" / "whatsapp-bridge", "WhatsApp bridge"),
@@ -914,10 +1031,16 @@ def run_doctor(args):
        print("  Checking Anthropic API...", end="", flush=True)
        try:
            import httpx
-            from agent.anthropic_adapter import _is_oauth_token, _COMMON_BETAS, _OAUTH_ONLY_BETAS
+            from agent.anthropic_adapter import (
+                _is_oauth_token,
+                _COMMON_BETAS,
+                _OAUTH_ONLY_BETAS,
+                _CONTEXT_1M_BETA,
+            )

            headers = {"anthropic-version": "2023-06-01"}
-            if _is_oauth_token(anthropic_key):
+            is_oauth = _is_oauth_token(anthropic_key)
+            if is_oauth:
                headers["Authorization"] = f"Bearer {anthropic_key}"
                headers["anthropic-beta"] = ",".join(_COMMON_BETAS + _OAUTH_ONLY_BETAS)
            else:
@@ -927,6 +1050,25 @@ def run_doctor(args):
                headers=headers,
                timeout=10
            )
+            # Reactive recovery: OAuth subscriptions that don't include 1M
+            # context reject the request with 400 "long context beta is not
+            # yet available for this subscription". Retry once with that
+            # beta stripped so the doctor check doesn't falsely report the
+            # Anthropic API as unreachable for those users.
+            if (
+                is_oauth
+                and response.status_code == 400
+                and "long context beta" in response.text.lower()
+                and "not yet available" in response.text.lower()
+            ):
+                headers["anthropic-beta"] = ",".join(
+                    [b for b in _COMMON_BETAS if b != _CONTEXT_1M_BETA] + list(_OAUTH_ONLY_BETAS)
+                )
+                response = httpx.get(
+                    "https://api.anthropic.com/v1/models",
+                    headers=headers,
+                    timeout=10,
+                )
            if response.status_code == 200:
                print(f"\r  {color('✓', Colors.GREEN)} Anthropic API                           ")
            elif response.status_code == 401:
@@ -33,12 +33,14 @@ def _get_git_commit(project_root: Path) -> str:


 def _redact(value: str) -> str:
-    """Redact all but first 4 and last 4 chars."""
-    if not value:
-        return ""
-    if len(value) < 12:
-        return "***"
-    return value[:4] + "..." + value[-4:]
+    """Redact all but first 4 and last 4 chars.
+
+    Thin wrapper over :func:`agent.redact.mask_secret`. Returns ``""`` for
+    an empty value (matches the historical behavior of this helper —
+    ``hermes dump`` formats empty values as blank, not as ``"(not set)"``).
+    """
+    from agent.redact import mask_secret
+    return mask_secret(value)


 def _gateway_status() -> str:
@@ -10,6 +10,7 @@ import shutil
 import signal
 import subprocess
 import sys
+import textwrap
 from dataclasses import dataclass
 from pathlib import Path

@@ -59,6 +60,13 @@ class GatewayRuntimeSnapshot:
    def has_process_service_mismatch(self) -> bool:
        return self.service_installed and self.running and not self.service_running

+
+@dataclass(frozen=True)
+class ProfileGatewayProcess:
+    profile: str
+    path: Path
+    pid: int
+
 def _get_service_pids() -> set:
    """Return PIDs currently managed by systemd or launchd gateway services.

@@ -279,9 +287,11 @@ def _scan_gateway_pids(exclude_pids: set[int], all_profiles: bool = False) -> li
                ["wmic", "process", "get", "ProcessId,CommandLine", "/FORMAT:LIST"],
                capture_output=True,
                text=True,
+                encoding="utf-8",
+                errors="ignore",
                timeout=10,
            )
-            if result.returncode != 0:
+            if result.returncode != 0 or result.stdout is None:
                return []
            current_cmd = ""
            for line in result.stdout.split("\n"):
@@ -369,6 +379,83 @@ def find_gateway_pids(exclude_pids: set | None = None, all_profiles: bool = Fals
    return pids


+def find_profile_gateway_processes(
+    exclude_pids: set | None = None,
+) -> list[ProfileGatewayProcess]:
+    """Return running gateway PIDs mapped to Hermes profiles via PID files."""
+    _exclude = set(exclude_pids or set())
+    processes: list[ProfileGatewayProcess] = []
+    try:
+        from gateway.status import get_running_pid
+        from hermes_cli.profiles import list_profiles
+    except Exception:
+        return processes
+
+    seen: set[int] = set()
+    for profile in list_profiles():
+        try:
+            pid = get_running_pid(profile.path / "gateway.pid", cleanup_stale=False)
+        except Exception:
+            continue
+        if pid is None or pid <= 0 or pid in _exclude or pid in seen:
+            continue
+        seen.add(pid)
+        processes.append(ProfileGatewayProcess(profile=profile.name, path=profile.path, pid=pid))
+    return processes
+
+
+def _gateway_run_args_for_profile(profile: str) -> list[str]:
+    args = [get_python_path(), "-m", "hermes_cli.main"]
+    if profile != "default":
+        args.extend(["--profile", profile])
+    args.extend(["gateway", "run", "--replace"])
+    return args
+
+
+def launch_detached_profile_gateway_restart(profile: str, old_pid: int) -> bool:
+    """Relaunch a manually-run profile gateway after its current PID exits."""
+    if old_pid <= 0:
+        return False
+
+    watcher = textwrap.dedent(
+        """
+        import os
+        import subprocess
+        import sys
+        import time
+
+        pid = int(sys.argv[1])
+        cmd = sys.argv[2:]
+        deadline = time.monotonic() + 120
+        while time.monotonic() < deadline:
+            try:
+                os.kill(pid, 0)
+            except ProcessLookupError:
+                break
+            except PermissionError:
+                pass
+            time.sleep(0.2)
+        subprocess.Popen(
+            cmd,
+            stdout=subprocess.DEVNULL,
+            stderr=subprocess.DEVNULL,
+            start_new_session=True,
+        )
+        """
+    ).strip()
+
+    try:
+        subprocess.Popen(
+            [sys.executable, "-c", watcher, str(old_pid), *_gateway_run_args_for_profile(profile)],
+            stdout=subprocess.DEVNULL,
+            stderr=subprocess.DEVNULL,
+            start_new_session=True,
+        )
+    except OSError:
+        return False
+    return True
+
+
 def _probe_systemd_service_running(system: bool = False) -> tuple[bool, bool]:
    selected_system = _select_systemd_scope(system)
    unit_exists = get_systemd_unit_path(system=selected_system).exists()
@@ -830,6 +917,22 @@ def _user_dbus_socket_path() -> Path:
    return Path(xdg) / "bus"


+def _user_systemd_private_socket_path() -> Path:
+    """Return the per-user systemd private socket path (regardless of existence)."""
+    xdg = os.environ.get("XDG_RUNTIME_DIR") or f"/run/user/{os.getuid()}"
+    return Path(xdg) / "systemd" / "private"
+
+
+def _user_systemd_socket_ready() -> bool:
+    """Return True when user-scope systemd has a reachable control socket.
+
+    Some distros expose only the per-user systemd private socket even when the
+    D-Bus session bus socket is absent. ``systemctl --user`` can still work in
+    that configuration, so preflight checks must treat either socket as valid.
+    """
+    return _user_dbus_socket_path().exists() or _user_systemd_private_socket_path().exists()
+
+
 def _ensure_user_systemd_env() -> None:
    """Ensure DBUS_SESSION_BUS_ADDRESS and XDG_RUNTIME_DIR are set for systemctl --user.

@@ -853,28 +956,29 @@ def _ensure_user_systemd_env() -> None:


 def _wait_for_user_dbus_socket(timeout: float = 3.0) -> bool:
-    """Poll for the user D-Bus socket to appear, up to ``timeout`` seconds.
+    """Poll for the user systemd runtime socket(s), up to ``timeout`` seconds.

-    Linger-enabled user@.service can take a second or two to spawn the socket
-    after ``loginctl enable-linger`` runs.  Returns True once the socket exists.
+    Linger-enabled user@.service can take a second or two to spawn its control
+    socket(s) after ``loginctl enable-linger`` runs. Returns True once either
+    the user D-Bus socket or the per-user systemd private socket exists.
    """
    import time

    deadline = time.monotonic() + timeout
    while time.monotonic() < deadline:
-        if _user_dbus_socket_path().exists():
+        if _user_systemd_socket_ready():
            _ensure_user_systemd_env()
            return True
        time.sleep(0.2)
-    return _user_dbus_socket_path().exists()
+    return _user_systemd_socket_ready()


 def _preflight_user_systemd(*, auto_enable_linger: bool = True) -> None:
-    """Ensure ``systemctl --user`` will reach the user D-Bus session bus.
+    """Ensure ``systemctl --user`` will reach the user-scope systemd instance.

-    No-op when the bus socket is already there (the common case on desktops
-    and linger-enabled servers).  On fresh SSH sessions where the socket is
-    missing:
+    No-op when the user D-Bus socket or per-user systemd private socket is
+    already there (the common case on desktops and linger-enabled servers). On
+    fresh SSH sessions where both are missing:

    * If linger is already enabled, wait briefly for user@.service to spawn
      the socket.
@@ -888,8 +992,7 @@ def _preflight_user_systemd(*, auto_enable_linger: bool = True) -> None:
    systemd operations and surface the message to the user.
    """
    _ensure_user_systemd_env()
-    bus_path = _user_dbus_socket_path()
-    if bus_path.exists():
+    if _user_systemd_socket_ready():
        return

    import getpass
@@ -903,7 +1006,7 @@ def _preflight_user_systemd(*, auto_enable_linger: bool = True) -> None:
        # Linger is on but socket still missing — unusual; fall through to error.
        _raise_user_systemd_unavailable(
            username,
-            reason="User D-Bus socket is missing even though linger is enabled.",
+            reason="User systemd control sockets are missing even though linger is enabled.",
            fix_hint=(
                f"  systemctl start user@{os.getuid()}.service\n"
                "  (may require sudo; try again after the command succeeds)"
@@ -2350,7 +2453,11 @@ def run_gateway(verbose: int = 0, quiet: bool = False, replace: bool = False):
    # Exit with code 1 if gateway fails to connect any platform,
    # so systemd Restart=on-failure will retry on transient errors
    verbosity = None if quiet else verbose
-    success = asyncio.run(start_gateway(replace=replace, verbosity=verbosity))
+    try:
+        success = asyncio.run(start_gateway(replace=replace, verbosity=verbosity))
+    except KeyboardInterrupt:
+        print("\nGateway stopped.")
+        return
    if not success:
        sys.exit(1)

@@ -2743,15 +2850,77 @@ _PLATFORMS = [
        ],
    },
 ]
+def _all_platforms() -> list[dict]:
+    """Return the full list of platforms for setup menus.
+
+    Combines the built-in ``_PLATFORMS`` with plugin platforms registered via
+    ``platform_registry``. Plugins are discovered on first call so bundled
+    platforms (like IRC, which auto-load via ``kind: platform``) appear in
+    ``hermes setup gateway`` without needing the gateway to be running.
+    Built-ins keep their dict shape; plugin entries are adapted to the same
+    shape with ``_registry_entry`` holding the source.
+    """
+    # Populate the registry so plugin platforms are visible. Idempotent.
+    # Bundled platform plugins (``kind: platform``) auto-load unconditionally,
+    # so every shipped messaging channel appears in the setup menu by default.
+    # User-installed platform plugins under ~/.hermes/plugins/ still require
+    # opt-in via ``plugins.enabled`` (untrusted code).
+    try:
+        from hermes_cli.plugins import discover_plugins
+        discover_plugins()
+    except Exception as e:
+        logger.debug("plugin discovery failed during platform enumeration: %s", e)
+
+    platforms = [dict(p) for p in _PLATFORMS]
+    by_key = {p["key"]: p for p in platforms}
+
+    try:
+        from gateway.platform_registry import platform_registry
+    except Exception:
+        return platforms
+
+    for entry in platform_registry.all_entries():
+        if entry.name in by_key:
+            continue  # built-in already covers it
+        platforms.append({
+            "key": entry.name,
+            "label": entry.label,
+            "emoji": entry.emoji,
+            "token_var": entry.required_env[0] if entry.required_env else "",
+            "install_hint": entry.install_hint,
+            "_registry_entry": entry,
+        })
+    return platforms


 def _platform_status(platform: dict) -> str:
    """Return a plain-text status string for a platform.

    Returns uncolored text so it can safely be embedded in
-    simple_term_menu items (ANSI codes break width calculation).
+    curses menu items (ANSI codes break width calculation).
    """
-    token_var = platform["token_var"]
+    entry = platform.get("_registry_entry")
+    if entry is not None:
+        configured = False
+        # Prefer is_connected (checks both env and config.yaml) over
+        # check_fn (typically just dependency / env presence).
+        if entry.is_connected is not None:
+            try:
+                from gateway.config import PlatformConfig
+                synthetic = PlatformConfig(enabled=True)
+                configured = bool(entry.is_connected(synthetic))
+            except Exception:
+                configured = False
+        if not configured:
+            try:
+                configured = bool(entry.check_fn())
+            except Exception:
+                configured = False
+        return "configured" if configured else "not configured"
+
+    token_var = platform.get("token_var", "")
+    if not token_var:
+        return "not configured"
    val = get_env_value(token_var)
    if token_var == "WHATSAPP_ENABLED":
        if val and val.lower() == "true":
@@ -3277,6 +3446,12 @@ def _setup_weixin():
        print_warning("  Direct messages disabled.")

    print()
+    print_info("  Note: QR login connects an iLink bot identity (e.g. ...@im.bot), not a")
+    print_info("  scriptable personal WeChat account. Ordinary WeChat groups typically cannot")
+    print_info("  invite an @im.bot identity, and iLink does not deliver ordinary-group events")
+    print_info("  to most bot accounts. The settings below only apply when iLink actually")
+    print_info("  delivers group events for your account type — otherwise DM remains the only")
+    print_info("  working channel regardless of this choice.")
    group_choices = [
        "Disable group chats (recommended)",
        "Allow all group chats",
@@ -3290,12 +3465,12 @@ def _setup_weixin():
    elif group_idx == 1:
        save_env_value("WEIXIN_GROUP_POLICY", "open")
        save_env_value("WEIXIN_GROUP_ALLOWED_USERS", "")
-        print_warning("  All group chats enabled.")
+        print_warning("  All group chats enabled (only takes effect if iLink delivers group events).")
    else:
-        allow_groups = prompt("  Allowed group chat IDs (comma-separated)", "", password=False).replace(" ", "")
+        allow_groups = prompt("  Allowed group chat IDs (comma-separated, not member user IDs)", "", password=False).replace(" ", "")
        save_env_value("WEIXIN_GROUP_POLICY", "allowlist")
        save_env_value("WEIXIN_GROUP_ALLOWED_USERS", allow_groups)
-        print_success("  Group allowlist saved.")
+        print_success("  Group allowlist saved (only takes effect if iLink delivers group events).")

    if user_id:
        print()
@@ -3703,6 +3878,71 @@ def _setup_signal():
    print_info(f"  Groups: {'enabled' if get_env_value('SIGNAL_GROUP_ALLOWED_USERS') else 'disabled'}")


+def _builtin_setup_fn(key: str):
+    """Resolve the interactive setup function for a built-in platform key.
+
+    Late-bound to avoid a circular import with ``hermes_cli.setup`` (which
+    imports from this module for the remaining bespoke flows).
+    """
+    from hermes_cli import setup as _s
+    return {
+        "telegram": _s._setup_telegram,
+        "discord": _s._setup_discord,
+        "slack": _s._setup_slack,
+        "matrix": _s._setup_matrix,
+        "mattermost": _s._setup_mattermost,
+        "bluebubbles": _s._setup_bluebubbles,
+        "webhooks": _s._setup_webhooks,
+        "signal": _setup_signal,
+        "whatsapp": _setup_whatsapp,
+        "weixin": _setup_weixin,
+        "dingtalk": _setup_dingtalk,
+        "feishu": _setup_feishu,
+        "wecom": _setup_wecom,
+        "qqbot": _setup_qqbot,
+    }.get(key)
+def _configure_platform(platform: dict) -> None:
+    """Run the interactive setup flow for a single platform.
+
+    Dispatch order:
+      1. Plugin-provided ``setup_fn`` on the registry entry.
+      2. Built-in setup function matched by platform key.
+      3. ``_setup_standard_platform`` when the entry has a ``vars`` schema.
+      4. Env-var hint fallback for plugins that offer no setup helper.
+
+    Bundled platform plugins (e.g. IRC) auto-load, so no plugin enable step
+    is needed here. User-installed platform plugins under ~/.hermes/plugins/
+    must already be in ``plugins.enabled`` before they appear in this menu.
+    """
+    entry = platform.get("_registry_entry")
+
+    if entry is not None and entry.setup_fn is not None:
+        entry.setup_fn()
+        return
+
+    fn = _builtin_setup_fn(platform["key"])
+    if fn is not None:
+        fn()
+        return
+
+    if platform.get("vars"):
+        _setup_standard_platform(platform)
+        return
+
+    # Plugin with no setup helper — show env-var instructions.
+    label = platform.get("label", platform["key"])
+    emoji = platform.get("emoji", "🔌")
+    print()
+    print(color(f"  ─── {emoji} {label} Setup ───", Colors.CYAN))
+    required = entry.required_env if entry else []
+    if required:
+        print_info(f"  Set these env vars in ~/.hermes/.env: {', '.join(required)}")
+    else:
+        print_info(f"  Configure {label} in config.yaml under gateway.platforms.{platform['key']}")
+    if platform.get("install_hint"):
+        print_info(f"  {platform['install_hint']}")
+
+
 def gateway_setup():
    """Interactive setup for messaging platforms + gateway service."""
    if is_managed():
@@ -3755,42 +3995,36 @@ def gateway_setup():
        print()
        print_header("Messaging Platforms")

-        menu_items = []
-        for plat in _PLATFORMS:
-            status = _platform_status(plat)
-            menu_items.append(f"{plat['label']}  ({status})")
+        platforms = _all_platforms()
+
+        menu_items = [
+            f"{p['emoji']} {p['label']}  ({_platform_status(p)})"
+            for p in platforms
+        ]
        menu_items.append("Done")

        choice = prompt_choice("Select a platform to configure:", menu_items, len(menu_items) - 1)
-
-        if choice == len(_PLATFORMS):
+        if choice == len(platforms):
            break

-        platform = _PLATFORMS[choice]
-
-        if platform["key"] == "whatsapp":
-            _setup_whatsapp()
-        elif platform["key"] == "signal":
-            _setup_signal()
-        elif platform["key"] == "weixin":
-            _setup_weixin()
-        elif platform["key"] == "dingtalk":
-            _setup_dingtalk()
-        elif platform["key"] == "feishu":
-            _setup_feishu()
-        elif platform["key"] == "qqbot":
-            _setup_qqbot()
-        elif platform["key"] == "wecom":
-            _setup_wecom()
-        else:
-            _setup_standard_platform(platform)
+        _configure_platform(platforms[choice])

    # ── Post-setup: offer to install/restart gateway ──
+    # Consider any platform (built-in or plugin) where the user has made
+    # meaningful progress.  ``_platform_status`` already handles plugin
+    # entries via their check_fn and per-platform dual-states like
+    # WhatsApp's "enabled, not paired".
+    def _is_progress(status: str) -> bool:
+        s = status.lower()
+        return not (
+            s == "not configured"
+            or s.startswith("partially")
+            or s.startswith("plugin disabled")
+        )
+
    any_configured = any(
-        bool(get_env_value(p["token_var"]))
-        for p in _PLATFORMS
-        if p["key"] != "whatsapp"
-    ) or (get_env_value("WHATSAPP_ENABLED") or "").lower() == "true"
+        _is_progress(_platform_status(p)) for p in _all_platforms()
+    )

    if any_configured:
        print()
@@ -0,0 +1,535 @@
+"""Persistent session goals — the Ralph loop for Hermes.
+
+A goal is a free-form user objective that stays active across turns. After
+each turn completes, a small judge call asks an auxiliary model "is this
+goal satisfied by the assistant's last response?". If not, Hermes feeds a
+continuation prompt back into the same session and keeps working until the
+goal is done, turn budget is exhausted, the user pauses/clears it, or the
+user sends a new message (which takes priority and pauses the goal loop).
+
+State is persisted in SessionDB's ``state_meta`` table keyed by
+``goal:<session_id>`` so ``/resume`` picks it up.
+
+Design notes / invariants:
+
+- The continuation prompt is just a normal user message appended to the
+  session via ``run_conversation``. No system-prompt mutation, no toolset
+  swap — prompt caching stays intact.
+- Judge failures are fail-OPEN: ``continue``. A broken judge must not wedge
+  progress; the turn budget is the backstop.
+- When a real user message arrives mid-loop it preempts the continuation
+  prompt and also pauses the goal loop for that turn (we still re-judge
+  after, so if the user's message happens to complete the goal the judge
+  will say ``done``).
+- This module has zero hard dependency on ``cli.HermesCLI`` or the gateway
+  runner — both wire the same ``GoalManager`` in.
+
+Nothing in this module touches the agent's system prompt or toolset.
+"""
+
+from __future__ import annotations
+
+import json
+import logging
+import re
+import time
+from dataclasses import dataclass, asdict
+from typing import Any, Dict, Optional, Tuple
+
+logger = logging.getLogger(__name__)
+
+
+# ──────────────────────────────────────────────────────────────────────
+# Constants & defaults
+# ──────────────────────────────────────────────────────────────────────
+
+DEFAULT_MAX_TURNS = 20
+DEFAULT_JUDGE_TIMEOUT = 30.0
+# Cap how much of the last response + recent messages we send to the judge.
+_JUDGE_RESPONSE_SNIPPET_CHARS = 4000
+
+
+CONTINUATION_PROMPT_TEMPLATE = (
+    "[Continuing toward your standing goal]\n"
+    "Goal: {goal}\n\n"
+    "Continue working toward this goal. Take the next concrete step. "
+    "If you believe the goal is complete, state so explicitly and stop. "
+    "If you are blocked and need input from the user, say so clearly and stop."
+)
+
+
+JUDGE_SYSTEM_PROMPT = (
+    "You are a strict judge evaluating whether an autonomous agent has "
+    "achieved a user's stated goal. You receive the goal text and the "
+    "agent's most recent response. Your only job is to decide whether "
+    "the goal is fully satisfied based on that response.\n\n"
+    "A goal is DONE only when:\n"
+    "- The response explicitly confirms the goal was completed, OR\n"
+    "- The response clearly shows the final deliverable was produced, OR\n"
+    "- The response explains the goal is unachievable / blocked / needs "
+    "user input (treat this as DONE with reason describing the block).\n\n"
+    "Otherwise the goal is NOT done — CONTINUE.\n\n"
+    "Reply ONLY with a single JSON object on one line:\n"
+    '{\"done\": <true|false>, \"reason\": \"<one-sentence rationale>\"}'
+)
+
+
+JUDGE_USER_PROMPT_TEMPLATE = (
+    "Goal:\n{goal}\n\n"
+    "Agent's most recent response:\n{response}\n\n"
+    "Is the goal satisfied?"
+)
+
+
+# ──────────────────────────────────────────────────────────────────────
+# Dataclass
+# ──────────────────────────────────────────────────────────────────────
+
+
+@dataclass
+class GoalState:
+    """Serializable goal state stored per session."""
+
+    goal: str
+    status: str = "active"          # active | paused | done | cleared
+    turns_used: int = 0
+    max_turns: int = DEFAULT_MAX_TURNS
+    created_at: float = 0.0
+    last_turn_at: float = 0.0
+    last_verdict: Optional[str] = None        # "done" | "continue" | "skipped"
+    last_reason: Optional[str] = None
+    paused_reason: Optional[str] = None       # why we auto-paused (budget, etc.)
+
+    def to_json(self) -> str:
+        return json.dumps(asdict(self), ensure_ascii=False)
+
+    @classmethod
+    def from_json(cls, raw: str) -> "GoalState":
+        data = json.loads(raw)
+        return cls(
+            goal=data.get("goal", ""),
+            status=data.get("status", "active"),
+            turns_used=int(data.get("turns_used", 0) or 0),
+            max_turns=int(data.get("max_turns", DEFAULT_MAX_TURNS) or DEFAULT_MAX_TURNS),
+            created_at=float(data.get("created_at", 0.0) or 0.0),
+            last_turn_at=float(data.get("last_turn_at", 0.0) or 0.0),
+            last_verdict=data.get("last_verdict"),
+            last_reason=data.get("last_reason"),
+            paused_reason=data.get("paused_reason"),
+        )
+
+
+# ──────────────────────────────────────────────────────────────────────
+# Persistence (SessionDB state_meta)
+# ──────────────────────────────────────────────────────────────────────
+
+
+def _meta_key(session_id: str) -> str:
+    return f"goal:{session_id}"
+
+
+_DB_CACHE: Dict[str, Any] = {}
+
+
+def _get_session_db() -> Optional[Any]:
+    """Return a SessionDB instance for the current HERMES_HOME.
+
+    SessionDB has no built-in singleton, but opening a new connection per
+    /goal call would thrash the file. We cache one instance per
+    ``hermes_home`` path so profile switches still pick up the right DB.
+    Defensive against import/instantiation failures so tests and
+    non-standard launchers can still use the GoalManager.
+    """
+    try:
+        from hermes_constants import get_hermes_home
+        from hermes_state import SessionDB
+
+        home = str(get_hermes_home())
+    except Exception as exc:  # pragma: no cover
+        logger.debug("GoalManager: SessionDB bootstrap failed (%s)", exc)
+        return None
+
+    cached = _DB_CACHE.get(home)
+    if cached is not None:
+        return cached
+    try:
+        db = SessionDB()
+    except Exception as exc:  # pragma: no cover
+        logger.debug("GoalManager: SessionDB() raised (%s)", exc)
+        return None
+    _DB_CACHE[home] = db
+    return db
+
+
+def load_goal(session_id: str) -> Optional[GoalState]:
+    """Load the goal for a session, or None if none exists."""
+    if not session_id:
+        return None
+    db = _get_session_db()
+    if db is None:
+        return None
+    try:
+        raw = db.get_meta(_meta_key(session_id))
+    except Exception as exc:
+        logger.debug("GoalManager: get_meta failed: %s", exc)
+        return None
+    if not raw:
+        return None
+    try:
+        return GoalState.from_json(raw)
+    except Exception as exc:
+        logger.warning("GoalManager: could not parse stored goal for %s: %s", session_id, exc)
+        return None
+
+
+def save_goal(session_id: str, state: GoalState) -> None:
+    """Persist a goal to SessionDB. No-op if DB unavailable."""
+    if not session_id:
+        return
+    db = _get_session_db()
+    if db is None:
+        return
+    try:
+        db.set_meta(_meta_key(session_id), state.to_json())
+    except Exception as exc:
+        logger.debug("GoalManager: set_meta failed: %s", exc)
+
+
+def clear_goal(session_id: str) -> None:
+    """Mark a goal cleared in the DB (preserved for audit, status=cleared)."""
+    state = load_goal(session_id)
+    if state is None:
+        return
+    state.status = "cleared"
+    save_goal(session_id, state)
+
+
+# ──────────────────────────────────────────────────────────────────────
+# Judge
+# ──────────────────────────────────────────────────────────────────────
+
+
+def _truncate(text: str, limit: int) -> str:
+    if not text:
+        return ""
+    if len(text) <= limit:
+        return text
+    return text[:limit] + "… [truncated]"
+
+
+_JSON_OBJECT_RE = re.compile(r"\{.*?\}", re.DOTALL)
+
+
+def _parse_judge_response(raw: str) -> Tuple[bool, str]:
+    """Parse the judge's reply. Fail-open to ``(False, "<reason>")``.
+
+    Returns ``(done, reason)``.
+    """
+    if not raw:
+        return False, "judge returned empty response"
+
+    text = raw.strip()
+
+    # Strip markdown code fences the model may wrap JSON in.
+    if text.startswith("```"):
+        text = text.strip("`")
+        # Peel off leading json/JSON/etc tag
+        nl = text.find("\n")
+        if nl != -1:
+            text = text[nl + 1:]
+
+    # First try: parse the whole blob.
+    data: Optional[Dict[str, Any]] = None
+    try:
+        data = json.loads(text)
+    except Exception:
+        # Second try: pull the first JSON object out.
+        match = _JSON_OBJECT_RE.search(text)
+        if match:
+            try:
+                data = json.loads(match.group(0))
+            except Exception:
+                data = None
+
+    if not isinstance(data, dict):
+        return False, f"judge reply was not JSON: {_truncate(raw, 200)!r}"
+
+    done_val = data.get("done")
+    if isinstance(done_val, str):
+        done = done_val.strip().lower() in ("true", "yes", "1", "done")
+    else:
+        done = bool(done_val)
+    reason = str(data.get("reason") or "").strip()
+    if not reason:
+        reason = "no reason provided"
+    return done, reason
+
+
+def judge_goal(
+    goal: str,
+    last_response: str,
+    *,
+    timeout: float = DEFAULT_JUDGE_TIMEOUT,
+) -> Tuple[str, str]:
+    """Ask the auxiliary model whether the goal is satisfied.
+
+    Returns ``(verdict, reason)`` where verdict is ``"done"``, ``"continue"``,
+    or ``"skipped"`` (when the judge couldn't be reached).
+
+    This is deliberately fail-open: any error returns ``("continue", "...")``
+    so a broken judge doesn't wedge progress — the turn budget is the
+    backstop.
+    """
+    if not goal.strip():
+        return "skipped", "empty goal"
+    if not last_response.strip():
+        # No substantive reply this turn — almost certainly not done yet.
+        return "continue", "empty response (nothing to evaluate)"
+
+    try:
+        from agent.auxiliary_client import get_text_auxiliary_client
+    except Exception as exc:
+        logger.debug("goal judge: auxiliary client import failed: %s", exc)
+        return "continue", "auxiliary client unavailable"
+
+    try:
+        client, model = get_text_auxiliary_client("goal_judge")
+    except Exception as exc:
+        logger.debug("goal judge: get_text_auxiliary_client failed: %s", exc)
+        return "continue", "auxiliary client unavailable"
+
+    if client is None or not model:
+        return "continue", "no auxiliary client configured"
+
+    prompt = JUDGE_USER_PROMPT_TEMPLATE.format(
+        goal=_truncate(goal, 2000),
+        response=_truncate(last_response, _JUDGE_RESPONSE_SNIPPET_CHARS),
+    )
+
+    try:
+        resp = client.chat.completions.create(
+            model=model,
+            messages=[
+                {"role": "system", "content": JUDGE_SYSTEM_PROMPT},
+                {"role": "user", "content": prompt},
+            ],
+            temperature=0,
+            max_tokens=200,
+            timeout=timeout,
+        )
+    except Exception as exc:
+        logger.info("goal judge: API call failed (%s) — falling through to continue", exc)
+        return "continue", f"judge error: {type(exc).__name__}"
+
+    try:
+        raw = resp.choices[0].message.content or ""
+    except Exception:
+        raw = ""
+
+    done, reason = _parse_judge_response(raw)
+    verdict = "done" if done else "continue"
+    logger.info("goal judge: verdict=%s reason=%s", verdict, _truncate(reason, 120))
+    return verdict, reason
+
+
+# ──────────────────────────────────────────────────────────────────────
+# GoalManager — the orchestration surface CLI + gateway talk to
+# ──────────────────────────────────────────────────────────────────────
+
+
+class GoalManager:
+    """Per-session goal state + continuation decisions.
+
+    The CLI and gateway each hold one ``GoalManager`` per live session.
+
+    Methods:
+
+    - ``set(goal)`` — start a new standing goal.
+    - ``clear()`` — remove the active goal.
+    - ``pause()`` / ``resume()`` — explicit user controls.
+    - ``status()`` — printable one-liner.
+    - ``evaluate_after_turn(last_response)`` — call the judge, update state,
+      and return a decision dict the caller uses to drive the next turn.
+    - ``next_continuation_prompt()`` — the canonical user-role message to
+      feed back into ``run_conversation``.
+    """
+
+    def __init__(self, session_id: str, *, default_max_turns: int = DEFAULT_MAX_TURNS):
+        self.session_id = session_id
+        self.default_max_turns = int(default_max_turns or DEFAULT_MAX_TURNS)
+        self._state: Optional[GoalState] = load_goal(session_id)
+
+    # --- introspection ------------------------------------------------
+
+    @property
+    def state(self) -> Optional[GoalState]:
+        return self._state
+
+    def is_active(self) -> bool:
+        return self._state is not None and self._state.status == "active"
+
+    def has_goal(self) -> bool:
+        return self._state is not None and self._state.status in ("active", "paused")
+
+    def status_line(self) -> str:
+        s = self._state
+        if s is None or s.status in ("cleared",):
+            return "No active goal. Set one with /goal <text>."
+        turns = f"{s.turns_used}/{s.max_turns} turns"
+        if s.status == "active":
+            return f"⊙ Goal (active, {turns}): {s.goal}"
+        if s.status == "paused":
+            extra = f" — {s.paused_reason}" if s.paused_reason else ""
+            return f"⏸ Goal (paused, {turns}{extra}): {s.goal}"
+        if s.status == "done":
+            return f"✓ Goal done ({turns}): {s.goal}"
+        return f"Goal ({s.status}, {turns}): {s.goal}"
+
+    # --- mutation -----------------------------------------------------
+
+    def set(self, goal: str, *, max_turns: Optional[int] = None) -> GoalState:
+        goal = (goal or "").strip()
+        if not goal:
+            raise ValueError("goal text is empty")
+        state = GoalState(
+            goal=goal,
+            status="active",
+            turns_used=0,
+            max_turns=int(max_turns) if max_turns else self.default_max_turns,
+            created_at=time.time(),
+            last_turn_at=0.0,
+        )
+        self._state = state
+        save_goal(self.session_id, state)
+        return state
+
+    def pause(self, reason: str = "user-paused") -> Optional[GoalState]:
+        if not self._state:
+            return None
+        self._state.status = "paused"
+        self._state.paused_reason = reason
+        save_goal(self.session_id, self._state)
+        return self._state
+
+    def resume(self, *, reset_budget: bool = True) -> Optional[GoalState]:
+        if not self._state:
+            return None
+        self._state.status = "active"
+        self._state.paused_reason = None
+        if reset_budget:
+            self._state.turns_used = 0
+        save_goal(self.session_id, self._state)
+        return self._state
+
+    def clear(self) -> None:
+        if self._state is None:
+            return
+        self._state.status = "cleared"
+        save_goal(self.session_id, self._state)
+        self._state = None
+
+    def mark_done(self, reason: str) -> None:
+        if not self._state:
+            return
+        self._state.status = "done"
+        self._state.last_verdict = "done"
+        self._state.last_reason = reason
+        save_goal(self.session_id, self._state)
+
+    # --- the main entry point called after every turn -----------------
+
+    def evaluate_after_turn(
+        self,
+        last_response: str,
+        *,
+        user_initiated: bool = True,
+    ) -> Dict[str, Any]:
+        """Run the judge and update state. Return a decision dict.
+
+        ``user_initiated`` distinguishes a real user prompt (True) from a
+        continuation prompt we fed ourselves (False). Both increment
+        ``turns_used`` because both consume model budget.
+
+        Decision keys:
+          - ``status``: current goal status after update
+          - ``should_continue``: bool — caller should fire another turn
+          - ``continuation_prompt``: str or None
+          - ``verdict``: "done" | "continue" | "skipped" | "inactive"
+          - ``reason``: str
+          - ``message``: user-visible one-liner to print/send
+        """
+        state = self._state
+        if state is None or state.status != "active":
+            return {
+                "status": state.status if state else None,
+                "should_continue": False,
+                "continuation_prompt": None,
+                "verdict": "inactive",
+                "reason": "no active goal",
+                "message": "",
+            }
+
+        # Count the turn that just finished.
+        state.turns_used += 1
+        state.last_turn_at = time.time()
+
+        verdict, reason = judge_goal(state.goal, last_response)
+        state.last_verdict = verdict
+        state.last_reason = reason
+
+        if verdict == "done":
+            state.status = "done"
+            save_goal(self.session_id, state)
+            return {
+                "status": "done",
+                "should_continue": False,
+                "continuation_prompt": None,
+                "verdict": "done",
+                "reason": reason,
+                "message": f"✓ Goal achieved: {reason}",
+            }
+
+        if state.turns_used >= state.max_turns:
+            state.status = "paused"
+            state.paused_reason = f"turn budget exhausted ({state.turns_used}/{state.max_turns})"
+            save_goal(self.session_id, state)
+            return {
+                "status": "paused",
+                "should_continue": False,
+                "continuation_prompt": None,
+                "verdict": "continue",
+                "reason": reason,
+                "message": (
+                    f"⏸ Goal paused — {state.turns_used}/{state.max_turns} turns used. "
+                    "Use /goal resume to keep going, or /goal clear to stop."
+                ),
+            }
+
+        save_goal(self.session_id, state)
+        return {
+            "status": "active",
+            "should_continue": True,
+            "continuation_prompt": self.next_continuation_prompt(),
+            "verdict": "continue",
+            "reason": reason,
+            "message": (
+                f"↻ Continuing toward goal ({state.turns_used}/{state.max_turns}): {reason}"
+            ),
+        }
+
+    def next_continuation_prompt(self) -> Optional[str]:
+        if not self._state or self._state.status != "active":
+            return None
+        return CONTINUATION_PROMPT_TEMPLATE.format(goal=self._state.goal)
+
+
+__all__ = [
+    "GoalState",
+    "GoalManager",
+    "CONTINUATION_PROMPT_TEMPLATE",
+    "DEFAULT_MAX_TURNS",
+    "load_goal",
+    "save_goal",
+    "clear_goal",
+    "judge_goal",
+]
@@ -16,6 +16,7 @@ import time
 from typing import Any, Dict, List, Optional, Tuple

 from hermes_cli.config import (
+    cfg_get,
    load_config,
    save_config,
    get_env_value,
@@ -716,7 +717,7 @@ def cmd_mcp_configure(args):

    # Update config
    config = load_config()
-    server_entry = config.get("mcp_servers", {}).get(name, {})
+    server_entry = cfg_get(config, "mcp_servers", name, default={})

    if len(chosen) == total:
        # All selected → remove include/exclude (register all)
@@ -96,6 +96,7 @@ _MATCHING_PREFIX_STRIP_PROVIDERS: frozenset[str] = frozenset({
    "kimi-coding",
    "kimi-coding-cn",
    "minimax",
+    "minimax-oauth",
    "minimax-cn",
    "alibaba",
    "qwen-oauth",
@@ -539,6 +539,7 @@ def resolve_display_context_length(
    api_key: str = "",
    model_info: Optional[ModelInfo] = None,
    custom_providers: list | None = None,
+    config_context_length: int | None = None,
 ) -> Optional[int]:
    """Resolve the context length to show in /model output.

@@ -565,6 +566,7 @@ def resolve_display_context_length(
            api_key=api_key or "",
            provider=provider or None,
            custom_providers=custom_providers,
+            config_context_length=config_context_length,
        )
        if ctx:
            return int(ctx)
@@ -889,14 +891,19 @@ def switch_model(
    if not validation.get("accepted"):
        override = False
        if user_providers:
-            for up in user_providers:
-                if isinstance(up, dict) and up.get("provider") == target_provider:
-                    cfg_models = up.get("models", [])
-                    if new_model in cfg_models or any(
-                        m.get("name") == new_model for m in cfg_models if isinstance(m, dict)
-                    ):
+            # user_providers is a dict: {provider_slug: config_dict}
+            for slug, cfg in user_providers.items():
+                if slug == target_provider:
+                    cfg_models = cfg.get("models", {})
+                    # Direct membership works for dict (keys) and list (strings)
+                    if new_model in cfg_models:
                        override = True
                        break
+                    # Also accept if models is a list of dicts with 'name' field
+                    if isinstance(cfg_models, list):
+                        if any(m.get("name") == new_model for m in cfg_models if isinstance(m, dict)):
+                            override = True
+                            break
        if override:
            validation = {"accepted": True, "persist": True, "recognized": False, "message": validation.get("message", "")}
        else:
@@ -1018,6 +1025,37 @@ def list_authenticated_providers(
    results: List[dict] = []
    seen_slugs: set = set()  # lowercase-normalized to catch case variants (#9545)
    seen_mdev_ids: set = set()  # prevent duplicate entries for aliases (e.g. kimi-coding + kimi-coding-cn)
+    # Effective base URLs of every built-in row we emit (normalized lower+rstrip).
+    # Section 4 uses this to hide ``custom_providers`` entries that point at the
+    # same endpoint as a built-in (e.g. a user-defined "my-dashscope" on
+    # https://coding-intl.dashscope.aliyuncs.com/v1 collides with the built-in
+    # alibaba-coding-plan row when DASHSCOPE_API_KEY is present). Fixes #16970.
+    _builtin_endpoints: set = set()
+
+    def _norm_url(url: str) -> str:
+        return str(url or "").strip().rstrip("/").lower()
+
+    def _record_builtin_endpoint(slug: str) -> None:
+        """Record the effective base URL for a built-in provider row.
+
+        Prefers the live env-override (e.g. DASHSCOPE_BASE_URL) over the
+        static inference_base_url so the dedup matches what a user typing
+        that URL into custom_providers would actually hit."""
+        try:
+            from hermes_cli.auth import PROVIDER_REGISTRY as _reg
+        except Exception:
+            return
+        pcfg = _reg.get(slug)
+        if not pcfg:
+            return
+        url = ""
+        if getattr(pcfg, "base_url_env_var", ""):
+            url = os.environ.get(pcfg.base_url_env_var, "") or ""
+        if not url:
+            url = getattr(pcfg, "inference_base_url", "") or ""
+        normed = _norm_url(url)
+        if normed:
+            _builtin_endpoints.add(normed)

    data = fetch_models_dev()

@@ -1124,6 +1162,7 @@ def list_authenticated_providers(
        })
        seen_slugs.add(slug.lower())
        seen_mdev_ids.add(mdev_id)
+        _record_builtin_endpoint(slug)

    # --- 2. Check Hermes-only providers (nous, openai-codex, copilot, opencode-go) ---
    from hermes_cli.providers import HERMES_OVERLAYS
@@ -1238,6 +1277,7 @@ def list_authenticated_providers(
        })
        seen_slugs.add(pid.lower())
        seen_slugs.add(hermes_slug.lower())
+        _record_builtin_endpoint(hermes_slug)

    # --- 2b. Cross-check canonical provider list ---
    # Catches providers that are in CANONICAL_PROVIDERS but weren't found
@@ -1317,6 +1357,7 @@ def list_authenticated_providers(
            "source": "canonical",
        })
        seen_slugs.add(_cp.slug.lower())
+        _record_builtin_endpoint(_cp.slug)

    # --- 3. User-defined endpoints from config ---
    # Track (name, base_url) of what section 3 emits so section 4 can skip
@@ -1376,14 +1417,17 @@ def list_authenticated_providers(
                        models_list = list(fb)

            # Prefer the endpoint's live /models list when credentials are
-            # available. This keeps OpenAI-compatible relays (for example CRS)
-            # in sync when the server catalog changes without requiring the
-            # user to mirror every model into config.yaml.
+            # available, unless the provider explicitly opts out via
+            # discover_models: false (e.g. dedicated endpoints that expose
+            # the entire aggregator catalog via /models).
            api_key = str(ep_cfg.get("api_key", "") or "").strip()
            if not api_key:
                key_env = str(ep_cfg.get("key_env", "") or "").strip()
                api_key = os.environ.get(key_env, "").strip() if key_env else ""
-            if api_url and api_key:
+            discover = ep_cfg.get("discover_models", True)
+            if isinstance(discover, str):
+                discover = discover.lower() not in ("false", "no", "0")
+            if api_url and api_key and discover:
                try:
                    from hermes_cli.models import fetch_api_models
                    live_models = fetch_api_models(api_key, api_url)
@@ -1467,7 +1511,14 @@ def list_authenticated_providers(
                    current_base_url
                    and api_url == current_base_url.strip().rstrip("/")
                ):
-                    slug = current_provider or custom_provider_slug(display_name)
+                    # Guard against bare "custom" slug left by a prior
+                    # failed switch — always resolve to the canonical
+                    # custom:<name> form.  (GH #17478)
+                    slug = (
+                        current_provider
+                        if current_provider and current_provider != "custom"
+                        else custom_provider_slug(display_name)
+                    )
                else:
                    slug = custom_provider_slug(display_name)
                groups[group_key] = {
@@ -1526,6 +1577,15 @@ def list_authenticated_providers(
            )
            if _pair_key[0] and _pair_key[1] and _pair_key in _section3_emitted_pairs:
                continue
+            # Skip if a built-in row (sections 1/2/2b) already represents this
+            # endpoint. Fixes #16970: a user-defined "my-dashscope" pointing at
+            # https://coding-intl.dashscope.aliyuncs.com/v1 duplicates the
+            # built-in alibaba-coding-plan row whenever DASHSCOPE_API_KEY is
+            # set. The built-in row carries the curated model list, correct
+            # auth wiring, and canonical slug — keep it and hide the shadow.
+            _grp_url_norm = _pair_key[1]
+            if _grp_url_norm and _grp_url_norm in _builtin_endpoints:
+                continue
            results.append({
                "slug": slug,
                "name": grp["name"],
@@ -40,6 +40,7 @@ OPENROUTER_MODELS: list[tuple[str, str]] = [
    ("anthropic/claude-sonnet-4.5",     ""),
    ("anthropic/claude-haiku-4.5",      ""),
    ("openrouter/elephant-alpha",       "free"),
+    ("openrouter/owl-alpha",            "free"),
    ("openai/gpt-5.5",                  ""),
    ("openai/gpt-5.4-mini",             ""),
    ("xiaomi/mimo-v2.5-pro",             ""),
@@ -288,6 +289,10 @@ _PROVIDER_MODELS: dict[str, list[str]] = {
        "MiniMax-M2.1",
        "MiniMax-M2",
    ],
+    "minimax-oauth": [
+        "MiniMax-M2.7",
+        "MiniMax-M2.7-highspeed",
+    ],
    "minimax-cn": [
        "MiniMax-M2.7",
        "MiniMax-M2.5",
@@ -769,7 +774,6 @@ CANONICAL_PROVIDERS: list[ProviderEntry] = [
    ProviderEntry("nous",           "Nous Portal",              "Nous Portal (Nous Research subscription)"),
    ProviderEntry("openrouter",     "OpenRouter",               "OpenRouter (100+ models, pay-per-use)"),
    ProviderEntry("lmstudio",       "LM Studio",                "LM Studio (local desktop app with built-in model server)"),
-    ProviderEntry("ai-gateway",     "Vercel AI Gateway",        "Vercel AI Gateway (200+ models, $5 free credit, no markup)"),
    ProviderEntry("anthropic",      "Anthropic",                "Anthropic (Claude models — API key or Claude Code)"),
    ProviderEntry("openai-codex",   "OpenAI Codex",             "OpenAI Codex"),
    ProviderEntry("xiaomi",         "Xiaomi MiMo",              "Xiaomi MiMo (MiMo-V2.5 and V2 models — pro, omni, flash)"),
@@ -788,6 +792,7 @@ CANONICAL_PROVIDERS: list[ProviderEntry] = [
    ProviderEntry("kimi-coding-cn", "Kimi / Moonshot (China)",  "Kimi / Moonshot China (Moonshot CN direct API)"),
    ProviderEntry("stepfun",        "StepFun Step Plan",       "StepFun Step Plan (agent/coding models via Step Plan API)"),
    ProviderEntry("minimax",        "MiniMax",                  "MiniMax (global direct API)"),
+    ProviderEntry("minimax-oauth",  "MiniMax (OAuth)",          "MiniMax via OAuth browser login (Coding Plan, minimax.io)"),
    ProviderEntry("minimax-cn",     "MiniMax (China)",          "MiniMax China (domestic direct API)"),
    ProviderEntry("alibaba",        "Alibaba Cloud (DashScope)","Alibaba Cloud / DashScope Coding (Qwen + multi-provider)"),
    ProviderEntry("ollama-cloud",   "Ollama Cloud",             "Ollama Cloud (cloud-hosted open models — ollama.com)"),
@@ -798,6 +803,7 @@ CANONICAL_PROVIDERS: list[ProviderEntry] = [
    ProviderEntry("opencode-go",    "OpenCode Go",              "OpenCode Go (open models, $10/month subscription)"),
    ProviderEntry("bedrock",        "AWS Bedrock",              "AWS Bedrock (Claude, Nova, Llama, DeepSeek — IAM or API key)"),
    ProviderEntry("azure-foundry",  "Azure Foundry",            "Azure Foundry (OpenAI-style or Anthropic-style endpoint — your Azure AI deployment)"),
+    ProviderEntry("ai-gateway",     "Vercel AI Gateway",        "Vercel AI Gateway"),
 ]

 # Derived dicts — used throughout the codebase
@@ -831,6 +837,9 @@ _PROVIDER_ALIASES = {
    "gmicloud": "gmi",
    "minimax-china": "minimax-cn",
    "minimax_cn": "minimax-cn",
+    "minimax-portal": "minimax-oauth",
+    "minimax-global": "minimax-oauth",
+    "minimax_oauth": "minimax-oauth",
    "claude": "anthropic",
    "claude-code": "anthropic",
    "deep-seek": "deepseek",
@@ -2026,28 +2035,56 @@ def _fetch_anthropic_models(timeout: float = 5.0) -> Optional[list[str]]:
        return None

    headers: dict[str, str] = {"anthropic-version": "2023-06-01"}
-    if _is_oauth_token(token):
+    is_oauth = _is_oauth_token(token)
+    if is_oauth:
        headers["Authorization"] = f"Bearer {token}"
-        from agent.anthropic_adapter import _COMMON_BETAS, _OAUTH_ONLY_BETAS
+        from agent.anthropic_adapter import _COMMON_BETAS, _OAUTH_ONLY_BETAS, _CONTEXT_1M_BETA
        headers["anthropic-beta"] = ",".join(_COMMON_BETAS + _OAUTH_ONLY_BETAS)
    else:
        headers["x-api-key"] = token

-    req = urllib.request.Request(
-        "https://api.anthropic.com/v1/models",
-        headers=headers,
-    )
-    try:
+    def _do_request(h: dict[str, str]):
+        req = urllib.request.Request(
+            "https://api.anthropic.com/v1/models",
+            headers=h,
+        )
        with urllib.request.urlopen(req, timeout=timeout) as resp:
-            data = json.loads(resp.read().decode())
-            models = [m["id"] for m in data.get("data", []) if m.get("id")]
-            # Sort: latest/largest first (opus > sonnet > haiku, higher version first)
-            return sorted(models, key=lambda m: (
-                "opus" not in m,      # opus first
-                "sonnet" not in m,    # then sonnet
-                "haiku" not in m,     # then haiku
-                m,                    # alphabetical within tier
-            ))
+            return json.loads(resp.read().decode())
+
+    try:
+        try:
+            data = _do_request(headers)
+        except urllib.error.HTTPError as http_err:
+            # Reactive recovery for OAuth subscriptions that reject the 1M
+            # context beta with 400 "long context beta is not yet available
+            # for this subscription". Retry once without the beta; re-raise
+            # anything else so the outer except logs it.
+            if (
+                is_oauth
+                and http_err.code == 400
+            ):
+                try:
+                    body_text = http_err.read().decode(errors="ignore").lower()
+                except Exception:
+                    body_text = ""
+                if "long context beta" in body_text and "not yet available" in body_text:
+                    headers["anthropic-beta"] = ",".join(
+                        [b for b in _COMMON_BETAS if b != _CONTEXT_1M_BETA]
+                        + list(_OAUTH_ONLY_BETAS)
+                    )
+                    data = _do_request(headers)
+                else:
+                    raise
+            else:
+                raise
+        models = [m["id"] for m in data.get("data", []) if m.get("id")]
+        # Sort: latest/largest first (opus > sonnet > haiku, higher version first)
+        return sorted(models, key=lambda m: (
+            "opus" not in m,      # opus first
+            "sonnet" not in m,    # then sonnet
+            "haiku" not in m,     # then haiku
+            m,                    # alphabetical within tier
+        ))
    except Exception as e:
        import logging
        logging.getLogger(__name__).debug("Failed to fetch Anthropic models: %s", e)
@@ -3,7 +3,8 @@
 Bypasses cli.py entirely.  No banner, no spinner, no session_id line,
 no stderr chatter.  Just the agent's final text to stdout.

-Toolsets = whatever the user has configured for "cli" in `hermes tools`.
+Toolsets = explicit --toolsets when provided, otherwise whatever the user has
+configured for "cli" in `hermes tools`.
 Rules / memory / AGENTS.md / preloaded skills = same as a normal chat turn.
 Approvals = auto-bypassed (HERMES_YOLO_MODE=1 is set for the call).
 Working directory = the user's CWD (AGENTS.md etc. resolve from there as usual).
@@ -28,10 +29,103 @@ from contextlib import redirect_stderr, redirect_stdout
 from typing import Optional


+def _normalize_toolsets(toolsets: object = None) -> list[str] | None:
+    if not toolsets:
+        return None
+
+    raw_items = [toolsets] if isinstance(toolsets, str) else toolsets
+    if not isinstance(raw_items, (list, tuple)):
+        raw_items = [raw_items]
+
+    normalized: list[str] = []
+    for item in raw_items:
+        if isinstance(item, str):
+            normalized.extend(part.strip() for part in item.split(","))
+        else:
+            normalized.append(str(item).strip())
+
+    return [item for item in normalized if item] or None
+
+
+def _validate_explicit_toolsets(toolsets: object = None) -> tuple[list[str] | None, str | None]:
+    normalized = _normalize_toolsets(toolsets)
+    if normalized is None:
+        return None, None
+
+    try:
+        from toolsets import validate_toolset
+    except Exception as exc:
+        return None, f"hermes -z: failed to validate --toolsets: {exc}\n"
+
+    built_in = [name for name in normalized if validate_toolset(name)]
+    unresolved = [name for name in normalized if name not in built_in]
+
+    if unresolved:
+        try:
+            from hermes_cli.plugins import discover_plugins
+
+            discover_plugins()
+            plugin_valid = [name for name in unresolved if validate_toolset(name)]
+        except Exception:
+            plugin_valid = []
+
+        if plugin_valid:
+            built_in.extend(plugin_valid)
+            unresolved = [name for name in unresolved if name not in plugin_valid]
+
+    if any(name in {"all", "*"} for name in built_in):
+        ignored = [name for name in normalized if name not in {"all", "*"}]
+        if ignored:
+            sys.stderr.write(
+                "hermes -z: --toolsets all enables every toolset; "
+                f"ignoring additional entries: {', '.join(ignored)}\n"
+            )
+        return None, None
+
+    mcp_names: set[str] = set()
+    mcp_disabled: set[str] = set()
+    if unresolved:
+        try:
+            from hermes_cli.config import read_raw_config
+            from hermes_cli.tools_config import _parse_enabled_flag
+
+            cfg = read_raw_config()
+            mcp_servers = cfg.get("mcp_servers") if isinstance(cfg.get("mcp_servers"), dict) else {}
+            for name, server_cfg in mcp_servers.items():
+                if not isinstance(server_cfg, dict):
+                    continue
+                if _parse_enabled_flag(server_cfg.get("enabled", True), default=True):
+                    mcp_names.add(str(name))
+                else:
+                    mcp_disabled.add(str(name))
+        except Exception:
+            mcp_names = set()
+            mcp_disabled = set()
+
+    mcp_valid = [name for name in unresolved if name in mcp_names]
+    disabled = [name for name in unresolved if name in mcp_disabled]
+    unknown = [name for name in unresolved if name not in mcp_names and name not in mcp_disabled]
+    valid = built_in + mcp_valid
+
+    if unknown:
+        sys.stderr.write(f"hermes -z: ignoring unknown --toolsets entries: {', '.join(unknown)}\n")
+    if disabled:
+        sys.stderr.write(
+            "hermes -z: ignoring disabled MCP servers (set enabled: true in config.yaml to use): "
+            f"{', '.join(disabled)}\n"
+        )
+
+    if not valid:
+        return None, "hermes -z: --toolsets did not contain any valid toolsets.\n"
+
+    return valid, None
+
+
 def run_oneshot(
    prompt: str,
    model: Optional[str] = None,
    provider: Optional[str] = None,
+    toolsets: object = None,
 ) -> int:
    """Execute a single prompt and print only the final content block.

@@ -42,6 +136,7 @@ def run_oneshot(
        provider: Optional provider override. Falls back to
            HERMES_INFERENCE_PROVIDER env var, then config.yaml's model.provider,
            then "auto".
+        toolsets: Optional comma-separated string or iterable of toolsets.

    Returns the exit code.  Caller should sys.exit() with the return.
    """
@@ -65,6 +160,12 @@ def run_oneshot(
        )
        return 2

+    explicit_toolsets, toolsets_error = _validate_explicit_toolsets(toolsets)
+    if toolsets_error:
+        sys.stderr.write(toolsets_error)
+        return 2
+    use_config_toolsets = _normalize_toolsets(toolsets) is None
+
    # Auto-approve any shell / tool approvals.  Non-interactive by
    # definition — a prompt would hang forever.
    os.environ["HERMES_YOLO_MODE"] = "1"
@@ -77,7 +178,13 @@ def run_oneshot(

    try:
        with redirect_stdout(devnull), redirect_stderr(devnull):
-            response = _run_agent(prompt, model=model, provider=provider)
+            response = _run_agent(
+                prompt,
+                model=model,
+                provider=provider,
+                toolsets=explicit_toolsets,
+                use_config_toolsets=use_config_toolsets,
+            )
    finally:
        try:
            devnull.close()
@@ -96,6 +203,8 @@ def _run_agent(
    prompt: str,
    model: Optional[str] = None,
    provider: Optional[str] = None,
+    toolsets: object = None,
+    use_config_toolsets: bool = True,
 ) -> str:
    """Build an AIAgent exactly like a normal CLI chat turn would, then
    run a single conversation.  Returns the final response string."""
@@ -168,9 +277,12 @@ def _run_agent(
        explicit_base_url=explicit_base_url_from_alias,
    )

-    # Pull in whatever toolsets the user has enabled for "cli".
-    # sorted() gives stable ordering; set→list for AIAgent's signature.
-    toolsets_list = sorted(_get_platform_tools(cfg, "cli"))
+    # Pull in explicit toolsets when provided; otherwise use whatever the user
+    # has enabled for "cli". sorted() gives stable ordering for config-derived
+    # sets; explicit values preserve user order.
+    toolsets_list = _normalize_toolsets(toolsets)
+    if toolsets_list is None and use_config_toolsets:
+        toolsets_list = sorted(_get_platform_tools(cfg, "cli"))

    agent = AIAgent(
        api_key=runtime.get("api_key"),
@@ -44,6 +44,40 @@ PLATFORMS: OrderedDict[str, PlatformInfo] = OrderedDict([


 def platform_label(key: str, default: str = "") -> str:
-    """Return the display label for a platform key, or *default*."""
+    """Return the display label for a platform key, or *default*.
+
+    Checks the static PLATFORMS dict first, then the plugin platform
+    registry for dynamically registered platforms.
+    """
    info = PLATFORMS.get(key)
-    return info.label if info is not None else default
+    if info is not None:
+        return info.label
+    # Check plugin registry
+    try:
+        from gateway.platform_registry import platform_registry
+        entry = platform_registry.get(key)
+        if entry:
+            return f"{entry.emoji}  {entry.label}" if entry.emoji else entry.label
+    except Exception:
+        pass
+    return default
+
+
+def get_all_platforms() -> "OrderedDict[str, PlatformInfo]":
+    """Return PLATFORMS merged with any plugin-registered platforms.
+
+    Plugin platforms are appended after builtins.  This is the function
+    that tools_config and skills_config should use for platform menus.
+    """
+    merged = OrderedDict(PLATFORMS)
+    try:
+        from gateway.platform_registry import platform_registry
+        for entry in platform_registry.plugin_entries():
+            if entry.name not in merged:
+                merged[entry.name] = PlatformInfo(
+                    label=f"{entry.emoji}  {entry.label}" if entry.emoji else entry.label,
+                    default_toolset=f"hermes-{entry.name}",
+                )
+    except Exception:
+        pass
+    return merged
@@ -33,11 +33,15 @@ so plugin-defined tools appear alongside the built-in tools.

 from __future__ import annotations

+import asyncio
 import importlib
 import importlib.metadata
 import importlib.util
+import inspect
 import logging
+import os
 import sys
+import threading
 import types
 from dataclasses import dataclass, field
 from pathlib import Path
@@ -45,6 +49,20 @@ from typing import Any, Callable, Dict, List, Optional, Set, Union

 from hermes_constants import get_hermes_home
 from utils import env_var_enabled
+from hermes_cli.config import cfg_get
+
+
+def get_bundled_plugins_dir() -> Path:
+    """Locate the bundled ``plugins/`` directory.
+
+    Honours ``HERMES_BUNDLED_PLUGINS`` (set by the Nix wrapper / packaged
+    installs) so read-only store paths are consulted first.  Falls back to
+    the in-repo path used during development.
+    """
+    env_override = os.getenv("HERMES_BUNDLED_PLUGINS")
+    if env_override:
+        return Path(env_override)
+    return Path(__file__).resolve().parent.parent / "plugins"

 try:
    import yaml
@@ -115,7 +133,7 @@ def _get_disabled_plugins() -> set:
    try:
        from hermes_cli.config import load_config
        config = load_config()
-        disabled = config.get("plugins", {}).get("disabled", [])
+        disabled = cfg_get(config, "plugins", "disabled", default=[])
        return set(disabled) if isinstance(disabled, list) else set()
    except Exception:
        return set()
@@ -155,7 +173,7 @@ def _get_enabled_plugins() -> Optional[set]:
 # Data classes
 # ---------------------------------------------------------------------------

-_VALID_PLUGIN_KINDS: Set[str] = {"standalone", "backend", "exclusive"}
+_VALID_PLUGIN_KINDS: Set[str] = {"standalone", "backend", "exclusive", "platform"}


@dataclass
@@ -181,6 +199,11 @@ class PluginManifest:
    #              Selection via ``<category>.provider`` config key; the
    #              category's own discovery system handles loading and the
    #              general scanner skips these.
+    # ``platform``: gateway messaging platform adapter (e.g. IRC). Bundled
+    #              platform plugins auto-load so every shipped platform is
+    #              available out of the box; user-installed platform plugins
+    #              in ~/.hermes/plugins/ still gated by ``plugins.enabled``
+    #              (untrusted code).
    kind: str = "standalone"
    # Registry key — path-derived, used by ``plugins.enabled``/``disabled``
    # lookups and by ``hermes plugins list``. For a flat plugin at
@@ -444,6 +467,62 @@ class PluginContext:
            self.manifest.name, provider.name,
        )

+    # -- platform adapter registration ---------------------------------------
+
+    def register_platform(
+        self,
+        name: str,
+        label: str,
+        adapter_factory: Callable,
+        check_fn: Callable,
+        validate_config: Callable | None = None,
+        required_env: list | None = None,
+        install_hint: str = "",
+        **entry_kwargs: Any,
+    ) -> None:
+        """Register a gateway platform adapter.
+
+        The adapter_factory receives a ``PlatformConfig`` and returns a
+        ``BasePlatformAdapter`` subclass instance.  The gateway calls
+        ``check_fn()`` before instantiation to verify dependencies.
+
+        Extra keyword arguments are forwarded to ``PlatformEntry`` (e.g.
+        ``setup_fn``, ``emoji``, ``allowed_users_env``, ``platform_hint``).
+        Unknown keys raise TypeError from the dataclass constructor.
+
+        Example::
+
+            ctx.register_platform(
+                name="irc",
+                label="IRC",
+                adapter_factory=lambda cfg: IRCAdapter(cfg),
+                check_fn=lambda: True,
+                emoji="💬",
+                setup_fn=irc_interactive_setup,
+            )
+        """
+        from gateway.platform_registry import platform_registry, PlatformEntry
+
+        entry_kwargs.setdefault("plugin_name", self.manifest.name)
+        entry = PlatformEntry(
+            name=name,
+            label=label,
+            adapter_factory=adapter_factory,
+            check_fn=check_fn,
+            validate_config=validate_config,
+            required_env=required_env or [],
+            install_hint=install_hint,
+            source="plugin",
+            **entry_kwargs,
+        )
+        platform_registry.register(entry)
+        self._manager._plugin_platform_names.add(name)
+        logger.debug(
+            "Plugin %s registered platform: %s",
+            self.manifest.name,
+            name,
+        )
+
    # -- hook registration --------------------------------------------------

    def register_hook(self, hook_name: str, callback: Callable) -> None:
@@ -522,6 +601,7 @@ class PluginManager:
        self._plugins: Dict[str, LoadedPlugin] = {}
        self._hooks: Dict[str, List[Callable]] = {}
        self._plugin_tool_names: Set[str] = set()
+        self._plugin_platform_names: Set[str] = set()
        self._cli_commands: Dict[str, dict] = {}
        self._context_engine = None  # Set by a plugin via register_context_engine()
        self._plugin_commands: Dict[str, dict] = {}  # Slash commands registered by plugins
@@ -564,16 +644,19 @@ class PluginManager:
        #   - category: ``plugins/image_gen/openai/plugin.yaml`` (backend)
        #
        # ``memory/`` and ``context_engine/`` are skipped at the top level —
-        # they have their own discovery systems. Porting those to the
-        # category-namespace ``kind: exclusive`` model is a future PR.
-        repo_plugins = Path(__file__).resolve().parent.parent / "plugins"
+        # they have their own discovery systems. ``platforms/`` is a category
+        # holding platform adapters (scanned one level deeper below).
+        repo_plugins = get_bundled_plugins_dir()
        manifests.extend(
            self._scan_directory(
                repo_plugins,
                source="bundled",
-                skip_names={"memory", "context_engine"},
+                skip_names={"memory", "context_engine", "platforms"},
            )
        )
+        manifests.extend(
+            self._scan_directory(repo_plugins / "platforms", source="bundled")
+        )

        # 2. User plugins (~/.hermes/plugins/)
        user_dir = get_hermes_home() / "plugins"
@@ -630,7 +713,11 @@ class PluginManager:
            # just work. Selection among them (e.g. which image_gen backend
            # services calls) is driven by ``<category>.provider`` config,
            # enforced by the tool wrapper.
-            if manifest.kind == "backend" and manifest.source == "bundled":
+            #
+            # Bundled platform plugins (gateway adapters like IRC) auto-load
+            # for the same reason: every platform Hermes ships must be
+            # available out of the box without the user having to opt in.
+            if manifest.source == "bundled" and manifest.kind in ("backend", "platform"):
                self._load_plugin(manifest)
                continue

@@ -1142,6 +1229,55 @@ def get_plugin_command_handler(name: str) -> Optional[Callable]:
    return entry["handler"] if entry else None


+_PLUGIN_COMMAND_AWAIT_TIMEOUT_SECS = 30.0
+
+
+def resolve_plugin_command_result(result: Any) -> Any:
+    """Resolve a plugin command return value, awaiting async handlers when needed.
+
+    Sync CLI/TUI dispatch sites call plugin handlers from plain functions.
+    If a handler is async, await it directly when no loop is running; if
+    we're already inside an active loop, run it in a helper thread with its
+    own loop so the caller still gets a concrete result synchronously. The
+    threaded path is bounded by a 30s timeout so a hung async handler cannot
+    wedge the terminal indefinitely.
+    """
+    if not inspect.isawaitable(result):
+        return result
+
+    try:
+        asyncio.get_running_loop()
+    except RuntimeError:
+        return asyncio.run(result)
+
+    outcome: Dict[str, Any] = {}
+    failure: Dict[str, BaseException] = {}
+    done = threading.Event()
+
+    def _runner() -> None:
+        try:
+            outcome["value"] = asyncio.run(result)
+        except BaseException as exc:  # pragma: no cover - re-raised below
+            failure["exc"] = exc
+        finally:
+            done.set()
+
+    thread = threading.Thread(
+        target=_runner,
+        name="hermes-plugin-command-await",
+        daemon=True,
+    )
+    thread.start()
+    if not done.wait(timeout=_PLUGIN_COMMAND_AWAIT_TIMEOUT_SECS):
+        raise TimeoutError(
+            "Plugin command async handler did not complete within "
+            f"{_PLUGIN_COMMAND_AWAIT_TIMEOUT_SECS:.0f}s"
+        )
+    if "exc" in failure:
+        raise failure["exc"]
+    return outcome.get("value")
+
+
 def get_plugin_commands() -> Dict[str, dict]:
    """Return the full plugin commands dict (name → {handler, description, plugin}).

@@ -15,12 +15,18 @@ import shutil
 import subprocess
 import sys
 from pathlib import Path
-from typing import Optional
+from typing import Any, Optional

 from hermes_constants import get_hermes_home
+from hermes_cli.config import cfg_get

 logger = logging.getLogger(__name__)

+
+class PluginOperationError(Exception):
+    """Recoverable plugin install/update failure (CLI exits; HTTP maps to 4xx)."""
+
+
 # Minimum manifest version this installer understands.
 # Plugins may declare ``manifest_version: 1`` in plugin.yaml;
 # future breaking changes to the manifest schema bump this.
@@ -149,6 +155,24 @@ def _copy_example_files(plugin_dir: Path, console) -> None:
                )


+def _missing_requires_env_names(manifest: dict) -> list[str]:
+    """Return declared ``requires_env`` names that are unset in ``~/.hermes/.env``."""
+    requires_env = manifest.get("requires_env") or []
+    if not requires_env:
+        return []
+
+    from hermes_cli.config import get_env_value
+
+    env_specs: list[dict] = []
+    for entry in requires_env:
+        if isinstance(entry, str):
+            env_specs.append({"name": entry})
+        elif isinstance(entry, dict) and entry.get("name"):
+            env_specs.append(entry)
+
+    return [s["name"] for s in env_specs if s.get("name") and not get_env_value(s["name"])]
+
+
 def _prompt_plugin_env_vars(manifest: dict, console) -> None:
    """Prompt for required environment variables declared in plugin.yaml.

@@ -282,6 +306,95 @@ def _require_installed_plugin(name: str, plugins_dir: Path, console) -> Path:
 # ---------------------------------------------------------------------------


+def _install_plugin_core(identifier: str, *, force: bool) -> tuple[Path, dict, str]:
+    """Clone Git plugin into ``~/.hermes/plugins``.
+
+    Returns ``(target_dir, installed_manifest, canonical_name)``.
+    Raises ``PluginOperationError`` on failure.
+    """
+    import tempfile
+
+    try:
+        git_url = _resolve_git_url(identifier)
+    except ValueError as e:
+        raise PluginOperationError(str(e)) from e
+
+    plugins_dir = _plugins_dir()
+
+    with tempfile.TemporaryDirectory() as tmp:
+        tmp_target = Path(tmp) / "plugin"
+
+        try:
+            result = subprocess.run(
+                ["git", "clone", "--depth", "1", git_url, str(tmp_target)],
+                capture_output=True,
+                text=True,
+                timeout=60,
+            )
+        except FileNotFoundError as e:
+            raise PluginOperationError(
+                "git is not installed or not in PATH.",
+            ) from e
+        except subprocess.TimeoutExpired as e:
+            raise PluginOperationError(
+                "Git clone timed out after 60 seconds.",
+            ) from e
+
+        if result.returncode != 0:
+            err = (result.stderr or result.stdout or "").strip()
+            raise PluginOperationError(f"Git clone failed:\n{err}")
+
+        manifest = _read_manifest(tmp_target)
+        plugin_name = manifest.get("name") or _repo_name_from_url(git_url)
+
+        try:
+            target = _sanitize_plugin_name(plugin_name, plugins_dir)
+        except ValueError as e:
+            raise PluginOperationError(str(e)) from e
+
+        mv = manifest.get("manifest_version")
+        if mv is not None:
+            try:
+                mv_int = int(mv)
+            except (ValueError, TypeError):
+                raise PluginOperationError(
+                    f"Plugin '{plugin_name}' has invalid manifest_version "
+                    f"'{mv}' (expected an integer).",
+                ) from None
+            if mv_int > _SUPPORTED_MANIFEST_VERSION:
+                from hermes_cli.config import recommended_update_command
+
+                raise PluginOperationError(
+                    f"Plugin '{plugin_name}' requires manifest_version {mv}, "
+                    f"but this installer only supports up to {_SUPPORTED_MANIFEST_VERSION}. "
+                    f"Run {recommended_update_command()} to update Hermes.",
+                ) from None
+
+        if target.exists():
+            if not force:
+                raise PluginOperationError(
+                    f"Plugin '{plugin_name}' already exists. Use force reinstall "
+                    f"or run `hermes plugins update {plugin_name}`.",
+                )
+            shutil.rmtree(target)
+
+        shutil.move(str(tmp_target), str(target))
+
+    has_yaml = (target / "plugin.yaml").exists() or (target / "plugin.yml").exists()
+    if not has_yaml and not (target / "__init__.py").exists():
+        logger.warning(
+            "%s has no plugin.yaml / __init__.py; may not be a valid plugin",
+            plugin_name,
+        )
+
+    from rich.console import Console
+
+    _copy_example_files(target, Console())
+    installed_manifest = _read_manifest(target)
+    installed_name = installed_manifest.get("name") or target.name
+    return target, installed_manifest, installed_name
+
+
 def cmd_install(
    identifier: str,
    force: bool = False,
@@ -292,7 +405,6 @@ def cmd_install(
    After install, prompt "Enable now? [y/N]" unless *enable* is provided
    (True = auto-enable without prompting, False = install disabled).
    """
-    import tempfile
    from rich.console import Console

    console = Console()
@@ -303,114 +415,41 @@ def cmd_install(
        console.print(f"[red]Error:[/red] {e}")
        sys.exit(1)

-    # Warn about insecure / local URL schemes
    if git_url.startswith(("http://", "file://")):
        console.print(
            "[yellow]Warning:[/yellow] Using insecure/local URL scheme. "
-            "Consider using https:// or git@ for production installs."
+            "Consider using https:// or git@ for production installs.",
        )

-    plugins_dir = _plugins_dir()
+    console.print(f"[dim]Cloning {git_url}...[/dim]")

-    # Clone into a temp directory first so we can read plugin.yaml for the name
-    with tempfile.TemporaryDirectory() as tmp:
-        tmp_target = Path(tmp) / "plugin"
-        console.print(f"[dim]Cloning {git_url}...[/dim]")
+    try:
+        target, installed_manifest, installed_name = _install_plugin_core(
+            identifier,
+            force=force,
+        )
+    except PluginOperationError as e:
+        console.print(f"[red]Error:[/red] {e}")
+        sys.exit(1)

-        try:
-            result = subprocess.run(
-                ["git", "clone", "--depth", "1", git_url, str(tmp_target)],
-                capture_output=True,
-                text=True,
-                timeout=60,
-            )
-        except FileNotFoundError:
-            console.print("[red]Error:[/red] git is not installed or not in PATH.")
-            sys.exit(1)
-        except subprocess.TimeoutExpired:
-            console.print("[red]Error:[/red] Git clone timed out after 60 seconds.")
-            sys.exit(1)
-
-        if result.returncode != 0:
-            console.print(
-                f"[red]Error:[/red] Git clone failed:\n{result.stderr.strip()}"
-            )
-            sys.exit(1)
-
-        # Read manifest
-        manifest = _read_manifest(tmp_target)
-        plugin_name = manifest.get("name") or _repo_name_from_url(git_url)
-
-        # Sanitize plugin name against path traversal
-        try:
-            target = _sanitize_plugin_name(plugin_name, plugins_dir)
-        except ValueError as e:
-            console.print(f"[red]Error:[/red] {e}")
-            sys.exit(1)
-
-        # Check manifest_version compatibility
-        mv = manifest.get("manifest_version")
-        if mv is not None:
-            try:
-                mv_int = int(mv)
-            except (ValueError, TypeError):
-                console.print(
-                    f"[red]Error:[/red] Plugin '{plugin_name}' has invalid "
-                    f"manifest_version '{mv}' (expected an integer)."
-                )
-                sys.exit(1)
-            if mv_int > _SUPPORTED_MANIFEST_VERSION:
-                from hermes_cli.config import recommended_update_command
-                console.print(
-                    f"[red]Error:[/red] Plugin '{plugin_name}' requires manifest_version "
-                    f"{mv}, but this installer only supports up to {_SUPPORTED_MANIFEST_VERSION}.\n"
-                    f"Run [bold]{recommended_update_command()}[/bold] to get a newer installer."
-                )
-                sys.exit(1)
-
-        if target.exists():
-            if not force:
-                console.print(
-                    f"[red]Error:[/red] Plugin '{plugin_name}' already exists at {target}.\n"
-                    f"Use [bold]--force[/bold] to remove and reinstall, or "
-                    f"[bold]hermes plugins update {plugin_name}[/bold] to pull latest."
-                )
-                sys.exit(1)
-            console.print(f"[dim]  Removing existing {plugin_name}...[/dim]")
-            shutil.rmtree(target)
-
-        # Move from temp to final location
-        shutil.move(str(tmp_target), str(target))
-
-    # Validate it looks like a plugin
-    if not (target / "plugin.yaml").exists() and not (target / "__init__.py").exists():
+    if not (target / "plugin.yaml").exists() and not (target / "plugin.yml").exists() and not (
+        target / "__init__.py"
+    ).exists():
        console.print(
-            f"[yellow]Warning:[/yellow] {plugin_name} doesn't contain plugin.yaml "
-            f"or __init__.py. It may not be a valid Hermes plugin."
+            f"[yellow]Warning:[/yellow] {installed_name} doesn't contain plugin.yaml "
+            f"or __init__.py. It may not be a valid Hermes plugin.",
        )

-    # Copy .example files to their real names (e.g. config.yaml.example → config.yaml)
-    _copy_example_files(target, console)
-
-    # Re-read manifest from installed location (for env var prompting)
-    installed_manifest = _read_manifest(target)
-
-    # Prompt for required environment variables before showing after-install docs
    _prompt_plugin_env_vars(installed_manifest, console)

    _display_after_install(target, identifier)

-    # Determine the canonical plugin name for enable-list bookkeeping.
-    installed_name = installed_manifest.get("name") or target.name
-
-    # Decide whether to enable: explicit flag > interactive prompt > default off
    should_enable = enable
    if should_enable is None:
-        # Interactive prompt unless stdin isn't a TTY (scripted install).
        if sys.stdin.isatty() and sys.stdout.isatty():
            try:
                answer = input(
-                    f"  Enable '{installed_name}' now? [y/N]: "
+                    f"  Enable '{installed_name}' now? [y/N]: ",
                ).strip().lower()
                should_enable = answer in ("y", "yes")
            except (EOFError, KeyboardInterrupt):
@@ -426,12 +465,12 @@ def cmd_install(
        _save_enabled_set(enabled)
        _save_disabled_set(disabled)
        console.print(
-            f"[green]✓[/green] Plugin [bold]{installed_name}[/bold] enabled."
+            f"[green]✓[/green] Plugin [bold]{installed_name}[/bold] enabled.",
        )
    else:
        console.print(
            f"[dim]Plugin installed but not enabled. "
-            f"Run `hermes plugins enable {installed_name}` to activate.[/dim]"
+            f"Run `hermes plugins enable {installed_name}` to activate.[/dim]",
        )

    console.print("[dim]Restart the gateway for the plugin to take effect:[/dim]")
@@ -461,36 +500,22 @@ def cmd_update(name: str) -> None:

    console.print(f"[dim]Updating {name}...[/dim]")

-    try:
-        result = subprocess.run(
-            ["git", "pull", "--ff-only"],
-            capture_output=True,
-            text=True,
-            timeout=60,
-            cwd=str(target),
-        )
-    except FileNotFoundError:
-        console.print("[red]Error:[/red] git is not installed or not in PATH.")
-        sys.exit(1)
-    except subprocess.TimeoutExpired:
-        console.print("[red]Error:[/red] Git pull timed out after 60 seconds.")
-        sys.exit(1)
-
-    if result.returncode != 0:
-        console.print(f"[red]Error:[/red] Git pull failed:\n{result.stderr.strip()}")
+    ok, output = _git_pull_plugin_dir(target)
+    if not ok:
+        console.print(f"[red]Error:[/red] {output}")
        sys.exit(1)

    # Copy any new .example files
    _copy_example_files(target, console)

-    output = result.stdout.strip()
-    if "Already up to date" in output:
+    out = output.strip()
+    if "Already up to date" in out:
        console.print(
            f"[green]✓[/green] Plugin [bold]{name}[/bold] is already up to date."
        )
    else:
        console.print(f"[green]✓[/green] Plugin [bold]{name}[/bold] updated.")
-        console.print(f"[dim]{output}[/dim]")
+        console.print(f"[dim]{out}[/dim]")


 def cmd_remove(name: str) -> None:
@@ -519,7 +544,7 @@ def _get_disabled_set() -> set:
    try:
        from hermes_cli.config import load_config
        config = load_config()
-        disabled = config.get("plugins", {}).get("disabled", [])
+        disabled = cfg_get(config, "plugins", "disabled", default=[])
        return set(disabled) if isinstance(disabled, list) else set()
    except Exception:
        return set()
@@ -629,10 +654,9 @@ def _plugin_exists(name: str) -> bool:
            manifest = _read_manifest(child)
            if manifest.get("name") == name:
                return True
-    # Bundled: <repo>/plugins/<name>/
-    from pathlib import Path as _P
-    import hermes_cli
-    repo_plugins = _P(hermes_cli.__file__).resolve().parent.parent / "plugins"
+    # Bundled: <repo>/plugins/<name>/ (or HERMES_BUNDLED_PLUGINS on Nix).
+    from hermes_cli.plugins import get_bundled_plugins_dir
+    repo_plugins = get_bundled_plugins_dir()
    if repo_plugins.is_dir():
        candidate = repo_plugins / name
        if candidate.is_dir() and (
@@ -659,8 +683,8 @@ def _discover_all_plugins() -> list:
    seen: dict = {}  # name -> (name, version, description, source, path)

    # Bundled (<repo>/plugins/<name>/), excluding memory/ and context_engine/
-    import hermes_cli
-    repo_plugins = Path(hermes_cli.__file__).resolve().parent.parent / "plugins"
+    from hermes_cli.plugins import get_bundled_plugins_dir
+    repo_plugins = get_bundled_plugins_dir()
    for base, source in ((repo_plugins, "bundled"), (_plugins_dir(), "user")):
        if not base.is_dir():
            continue
@@ -763,7 +787,7 @@ def _get_current_memory_provider() -> str:
    try:
        from hermes_cli.config import load_config
        config = load_config()
-        return config.get("memory", {}).get("provider", "") or ""
+        return cfg_get(config, "memory", "provider", default="") or ""
    except Exception:
        return ""

@@ -773,7 +797,7 @@ def _get_current_context_engine() -> str:
    try:
        from hermes_cli.config import load_config
        config = load_config()
-        return config.get("context", {}).get("engine", "compressor") or "compressor"
+        return cfg_get(config, "context", "engine", default="compressor") or "compressor"
    except Exception:
        return "compressor"

@@ -1244,6 +1268,247 @@ def _run_composite_fallback(plugin_names, plugin_labels, plugin_selected,
    print()


+def dashboard_install_plugin(
+    identifier: str,
+    *,
+    force: bool,
+    enable: bool,
+) -> dict[str, Any]:
+    """Non-interactive install for the web dashboard. Returns a JSON-serializable dict."""
+    warnings: list[str] = []
+    try:
+        git_url = _resolve_git_url(identifier)
+        if git_url.startswith(("http://", "file://")):
+            warnings.append(
+                "Insecure URL scheme; prefer https:// or git@ for production installs.",
+            )
+    except ValueError:
+        pass
+
+    try:
+        target, installed_manifest, installed_name = _install_plugin_core(
+            identifier,
+            force=force,
+        )
+    except PluginOperationError as exc:
+        return {"ok": False, "error": str(exc)}
+
+    missing_env = _missing_requires_env_names(installed_manifest)
+    if enable:
+        en = _get_enabled_set()
+        dis = _get_disabled_set()
+        en.add(installed_name)
+        dis.discard(installed_name)
+        _save_enabled_set(en)
+        _save_disabled_set(dis)
+
+    hint: str | None = None
+    ap = target / "after-install.md"
+    if ap.exists():
+        hint = str(ap)
+
+    return {
+        "ok": True,
+        "plugin_name": installed_name,
+        "warnings": warnings,
+        "missing_env": missing_env,
+        "after_install_path": hint,
+        "enabled": enable,
+    }
+
+
+def _get_plugin_toolset_key(name: str) -> Optional[str]:
+    """Return the toolset key a plugin registers its tools under, or None.
+
+    Queries the live tool registry — the plugin must already be loaded.
+    Falls back to reading ``provides_tools`` from plugin.yaml and looking
+    up the toolset from the registry for the first tool name found.
+    """
+    try:
+        from tools.registry import registry
+    except Exception:
+        return None
+
+    # Check the plugin manager for tools this plugin registered
+    try:
+        from hermes_cli.plugins import discover_plugins, get_plugin_manager
+        discover_plugins()  # idempotent — ensures plugins are loaded
+        manager = get_plugin_manager()
+        for _key, loaded in manager._plugins.items():
+            if loaded.manifest.name == name or _key == name:
+                for tool_name in loaded.tools_registered:
+                    entry = registry.get_entry(tool_name)
+                    if entry and entry.toolset:
+                        return entry.toolset
+                break
+    except Exception:
+        pass
+
+    # Fallback: read provides_tools from manifest on disk and query registry
+    try:
+        from hermes_cli.plugins import get_bundled_plugins_dir
+        for base in (get_bundled_plugins_dir(), _plugins_dir()):
+            if not base.is_dir():
+                continue
+            candidate = base / name
+            if candidate.is_dir():
+                manifest = _read_manifest(candidate)
+                for tool_name in manifest.get("provides_tools") or []:
+                    entry = registry.get_entry(tool_name)
+                    if entry and entry.toolset:
+                        return entry.toolset
+    except Exception:
+        pass
+
+    return None
+
+
+def _toggle_plugin_toolset(name: str, *, enable: bool) -> None:
+    """Add or remove a plugin's toolset from platform_toolsets for all platforms.
+
+    Only acts if the plugin actually provides tools (has a toolset key).
+    """
+    toolset_key = _get_plugin_toolset_key(name)
+    if not toolset_key:
+        return
+
+    from hermes_cli.config import load_config, save_config
+
+    config = load_config()
+    platform_toolsets = config.get("platform_toolsets")
+    if not isinstance(platform_toolsets, dict):
+        platform_toolsets = {}
+        config["platform_toolsets"] = platform_toolsets
+
+    changed = False
+    for platform, ts_list in platform_toolsets.items():
+        if not isinstance(ts_list, list):
+            continue
+        if enable:
+            if toolset_key not in ts_list:
+                ts_list.append(toolset_key)
+                changed = True
+        else:
+            if toolset_key in ts_list:
+                ts_list.remove(toolset_key)
+                changed = True
+
+    # If enabling and no platforms have toolset lists yet, add to "cli" at minimum
+    if enable and not changed and not platform_toolsets:
+        platform_toolsets["cli"] = [toolset_key]
+        changed = True
+
+    if changed:
+        save_config(config)
+
+
+def dashboard_set_agent_plugin_enabled(name: str, *, enabled: bool) -> dict[str, Any]:
+    """Enable or disable a plugin in ``config.yaml`` (runtime allow/deny lists).
+
+    For plugins that provide tools (toolsets), also toggles the toolset in
+    ``platform_toolsets`` so the agent actually sees the tools in sessions.
+    """
+    if not _plugin_exists(name):
+        return {"ok": False, "error": f"Plugin '{name}' is not installed or bundled."}
+
+    en = _get_enabled_set()
+    dis = _get_disabled_set()
+
+    if enabled:
+        if name in en and name not in dis:
+            return {"ok": True, "name": name, "unchanged": True}
+        en.add(name)
+        dis.discard(name)
+        _save_enabled_set(en)
+        _save_disabled_set(dis)
+        _toggle_plugin_toolset(name, enable=True)
+        return {"ok": True, "name": name, "unchanged": False}
+
+    if name not in en and name in dis:
+        return {"ok": True, "name": name, "unchanged": True}
+
+    en.discard(name)
+    dis.add(name)
+    _save_enabled_set(en)
+    _save_disabled_set(dis)
+    _toggle_plugin_toolset(name, enable=False)
+    return {"ok": True, "name": name, "unchanged": False}
+
+
+def _user_installed_plugin_dir(name: str) -> Optional[Path]:
+    """Resolved path under ``~/.hermes/plugins/<name>`` if it exists."""
+    plugins_dir = _plugins_dir()
+    try:
+        target = _sanitize_plugin_name(name, plugins_dir)
+    except ValueError:
+        return None
+    return target if target.is_dir() else None
+
+
+def dashboard_update_user_plugin(name: str) -> dict[str, Any]:
+    """``git pull`` inside ``~/.hermes/plugins/<name>``."""
+    target = _user_installed_plugin_dir(name)
+    if target is None:
+        return {
+            "ok": False,
+            "error": f"Plugin '{name}' was not found under {_plugins_dir()}.",
+        }
+
+    if not (target / ".git").exists():
+        return {
+            "ok": False,
+            "error": f"Plugin '{name}' is not a git checkout; cannot pull updates.",
+        }
+
+    ok, msg = _git_pull_plugin_dir(target)
+    if not ok:
+        return {"ok": False, "error": msg}
+
+    from rich.console import Console
+
+    _copy_example_files(target, Console())
+    unchanged = "Already up to date" in msg
+    return {"ok": True, "name": name, "output": msg, "unchanged": unchanged}
+
+
+def _git_pull_plugin_dir(target: Path) -> tuple[bool, str]:
+    try:
+        result = subprocess.run(
+            ["git", "pull", "--ff-only"],
+            capture_output=True,
+            text=True,
+            timeout=60,
+            cwd=str(target),
+        )
+    except FileNotFoundError:
+        return False, "git is not installed or not in PATH."
+    except subprocess.TimeoutExpired:
+        return False, "Git pull timed out after 60 seconds."
+
+    if result.returncode != 0:
+        err = (result.stderr or "").strip() or result.stdout.strip()
+        return False, err or "git pull failed."
+    return True, result.stdout.strip()
+
+
+def dashboard_remove_user_plugin(name: str) -> dict[str, Any]:
+    """Delete a plugin tree under ``~/.hermes/plugins/`` only."""
+    plugins_dir = _plugins_dir()
+    for n, _ver, _d, src, _path in _discover_all_plugins():
+        if n == name and src == "bundled":
+            return {"ok": False, "error": "Bundled plugins cannot be removed from the dashboard."}
+
+    target = _user_installed_plugin_dir(name)
+    if target is None:
+        return {
+            "ok": False,
+            "error": f"Plugin '{name}' was not found under {plugins_dir}.",
+        }
+
+    shutil.rmtree(target)
+    return {"ok": True, "name": name}
+
+
 def plugins_command(args) -> None:
    """Dispatch hermes plugins subcommands."""
    action = getattr(args, "plugins_action", None)
@@ -11,7 +11,7 @@ zero migration needed.
 Usage::

    hermes profile create coder          # fresh profile + bundled skills
-    hermes profile create coder --clone  # also copy config, .env, SOUL.md
+    hermes profile create coder --clone  # also copy config, .env, SOUL.md, skills
    hermes profile create coder --clone-all  # full copy of source profile
    coder chat                           # use via wrapper alias
    hermes -p coder chat                 # or via flag
@@ -71,6 +71,29 @@ _CLONE_ALL_STRIP = [
    "processes.json",
 ]

+
+def _clone_all_copytree_ignore(source_dir: Path):
+    """Ignore ``profiles/`` at the root of *source_dir* only.
+
+    ``~/.hermes`` contains ``profiles/<name>/`` for sibling named profiles.
+    ``shutil.copytree`` would otherwise duplicate that entire tree inside the
+    new profile (recursive ``.../profiles/.../profiles/...``). Export already
+    excludes ``profiles`` via ``_DEFAULT_EXPORT_EXCLUDE_ROOT`` — match that
+    behavior for ``--clone-all``.
+    """
+    source_resolved = source_dir.resolve()
+
+    def _ignore(directory: str, names: List[str]) -> List[str]:
+        try:
+            if Path(directory).resolve() == source_resolved:
+                return [n for n in names if n == "profiles"]
+        except (OSError, ValueError):
+            pass
+        return []
+
+    return _ignore
+
+
 # Directories/files to exclude when exporting the default (~/.hermes) profile.
 # The default profile contains infrastructure (repo checkout, worktrees, DBs,
 # caches, binaries) that named profiles don't have.  We exclude those so the
@@ -388,7 +411,8 @@ def create_profile(
    clone_all:
        If True, do a full copytree of the source (all state).
    clone_config:
-        If True, copy only config files (config.yaml, .env, SOUL.md).
+        If True, copy config files (config.yaml, .env, SOUL.md), installed
+        skills, and selected profile identity files from the source profile.
    no_alias:
        If True, skip wrapper script creation.

@@ -424,8 +448,12 @@ def create_profile(
            )

    if clone_all and source_dir:
-        # Full copy of source profile
-        shutil.copytree(source_dir, profile_dir)
+        # Full copy of source profile (exclude sibling ~/.hermes/profiles/)
+        shutil.copytree(
+            source_dir,
+            profile_dir,
+            ignore=_clone_all_copytree_ignore(source_dir),
+        )
        # Strip runtime files
        for stale in _CLONE_ALL_STRIP:
            (profile_dir / stale).unlink(missing_ok=True)
@@ -442,6 +470,14 @@ def create_profile(
                if src.exists():
                    shutil.copy2(src, profile_dir / filename)

+            # Clone installed skills from the source profile. The dashboard's
+            # "clone from default" flow is expected to preserve both bundled
+            # and user-installed skills so the new profile immediately has the
+            # same agent capabilities as the source profile.
+            source_skills = source_dir / "skills"
+            if source_skills.is_dir():
+                shutil.copytree(source_skills, profile_dir / "skills", dirs_exist_ok=True)
+
            # Clone memory and other subdirectory files
            for relpath in _CLONE_SUBDIR_FILES:
                src = source_dir / relpath
@@ -111,6 +111,11 @@ HERMES_OVERLAYS: Dict[str, HermesOverlay] = {
        transport="anthropic_messages",
        base_url_env_var="MINIMAX_BASE_URL",
    ),
+    "minimax-oauth": HermesOverlay(
+        transport="anthropic_messages",
+        auth_type="oauth_external",
+        base_url_override="https://api.minimax.io/anthropic",
+    ),
    "minimax-cn": HermesOverlay(
        transport="anthropic_messages",
        base_url_env_var="MINIMAX_CN_BASE_URL",
@@ -580,6 +585,12 @@ def resolve_custom_provider(
    if not requested:
        return None

+    # If the stored provider is the bare string "custom" (corrupt state
+    # from a prior model-switch bug), fall back to the first custom
+    # provider entry so existing configs self-heal.  (GH #17478)
+    bare_custom_fallback = requested == "custom"
+    first_valid = None
+
    for entry in custom_providers:
        if not isinstance(entry, dict):
            continue
@@ -594,6 +605,10 @@ def resolve_custom_provider(
        if not display_name or not api_url:
            continue

+        # Stash the first valid entry for bare-"custom" fallback
+        if first_valid is None:
+            first_valid = (display_name, api_url)
+
        slug = custom_provider_slug(display_name)
        if requested not in {display_name.lower(), slug}:
            continue
@@ -609,6 +624,21 @@ def resolve_custom_provider(
            source="user-config",
        )

+    # Self-heal: bare "custom" matched nothing — return first valid entry
+    if bare_custom_fallback and first_valid:
+        dname, aurl = first_valid
+        slug = custom_provider_slug(dname)
+        return ProviderDef(
+            id=slug,
+            name=dname,
+            transport="openai_chat",
+            api_key_env_vars=(),
+            base_url=aurl,
+            is_aggregator=False,
+            auth_type="api_key",
+            source="user-config",
+        )
+
    return None


@@ -0,0 +1,149 @@
+"""
+Unified self-relaunch for Hermes CLI.
+
+Preserves critical flags (--tui, --dev, --profile, --model, etc.) across
+process replacement so that ``hermes sessions browse`` or post-setup relaunch
+doesn't silently drop the user's UI mode or other preferences.
+
+Also works when ``hermes`` is not on PATH (e.g. ``nix run`` or ``python -m``).
+"""
+
+import os
+import shutil
+import sys
+from typing import Optional, Sequence
+
+from hermes_cli._parser import (
+    PRE_ARGPARSE_INHERITED_FLAGS,
+    build_top_level_parser,
+)
+
+
+def _build_inherited_flag_table() -> list[tuple[str, bool]]:
+    """Build the ``(option_string, takes_value)`` table of flags that must
+    survive a self-relaunch, by introspecting the real parser used by
+    ``hermes`` itself.
+
+    A flag participates if its argparse Action carries
+    ``inherit_on_relaunch = True`` — set by ``_parser._inherited_flag``.
+    """
+    parser, _subparsers, chat_parser = build_top_level_parser()
+
+    table: list[tuple[str, bool]] = []
+    seen: set[tuple[str, bool]] = set()
+    for p in (parser, chat_parser):
+        for action in p._actions:
+            if not action.option_strings:
+                continue  # positional / no flag form
+            if not getattr(action, "inherit_on_relaunch", False):
+                continue
+            takes_value = action.nargs != 0  # store_true/false set nargs=0
+            for opt in action.option_strings:
+                key = (opt, takes_value)
+                if key not in seen:
+                    seen.add(key)
+                    table.append(key)
+
+    table.extend(PRE_ARGPARSE_INHERITED_FLAGS)
+    return table
+
+
+_INHERITED_FLAGS_TABLE = _build_inherited_flag_table()
+
+
+def _extract_inherited_flags(argv: Sequence[str]) -> list[str]:
+    """Pull out flags that should carry over into a self-relaunched hermes."""
+    flags: list[str] = []
+    i = 0
+    while i < len(argv):
+        arg = argv[i]
+        if "=" in arg:
+            key = arg.split("=", 1)[0]
+            for flag, _ in _INHERITED_FLAGS_TABLE:
+                if key == flag:
+                    flags.append(arg)
+                    break
+            i += 1
+            continue
+
+        for flag, takes_value in _INHERITED_FLAGS_TABLE:
+            if arg == flag:
+                flags.append(arg)
+                if takes_value and i + 1 < len(argv) and not argv[i + 1].startswith("-"):
+                    flags.append(argv[i + 1])
+                    i += 1
+                break
+        i += 1
+    return flags
+
+
+def resolve_hermes_bin() -> Optional[str]:
+    """Find the hermes entry point.
+
+    Priority:
+      1. ``sys.argv[0]`` if it resolves to a real executable.
+      2. ``shutil.which("hermes")`` on PATH.
+      3. ``None`` → caller should fall back to ``python -m hermes_cli.main``.
+    """
+    argv0 = sys.argv[0]
+
+    # Absolute path to an executable (covers nix store, venv wrappers, etc.)
+    if os.path.isabs(argv0) and os.path.isfile(argv0) and os.access(argv0, os.X_OK):
+        return argv0
+
+    # Relative path — resolve against CWD
+    if not argv0.startswith("-") and os.path.isfile(argv0):
+        abs_path = os.path.abspath(argv0)
+        if os.access(abs_path, os.X_OK):
+            return abs_path
+
+    # PATH lookup
+    path_bin = shutil.which("hermes")
+    if path_bin:
+        return path_bin
+
+    return None
+
+
+def build_relaunch_argv(
+    extra_args: Sequence[str],
+    *,
+    preserve_inherited: bool = True,
+    original_argv: Optional[Sequence[str]] = None,
+) -> list[str]:
+    """Construct an argv list for replacing the current process with hermes.
+
+    Args:
+        extra_args: Arguments to append (e.g. ``["--resume", id]``).
+        preserve_inherited: Whether to carry over UI / behaviour flags
+            tagged with ``inherit_on_relaunch`` in the parser.
+        original_argv: The original argv to scan for flags (defaults to
+            ``sys.argv[1:]``).
+    """
+    bin_path = resolve_hermes_bin()
+
+    if bin_path:
+        argv = [bin_path]
+    else:
+        argv = [sys.executable, "-m", "hermes_cli.main"]
+
+    src = list(original_argv) if original_argv is not None else list(sys.argv[1:])
+
+    if preserve_inherited:
+        argv.extend(_extract_inherited_flags(src))
+
+    argv.extend(extra_args)
+    return argv
+
+
+def relaunch(
+    extra_args: Sequence[str],
+    *,
+    preserve_inherited: bool = True,
+    original_argv: Optional[Sequence[str]] = None,
+) -> None:
+    """Replace the current process with a fresh hermes invocation."""
+    new_argv = build_relaunch_argv(
+        extra_args, preserve_inherited=preserve_inherited, original_argv=original_argv
+    )
+    os.execvp(new_argv[0], new_argv)
@@ -358,11 +358,20 @@ def _get_named_custom_provider(requested_provider: str) -> Optional[Dict[str, An
        return None
    if not requested_norm.startswith("custom:"):
        try:
-            auth_mod.resolve_provider(requested_norm)
+            canonical = auth_mod.resolve_provider(requested_norm)
        except AuthError:
            pass
        else:
-            return None
+            # A user-declared ``custom_providers`` entry whose name matches
+            # only an *alias* (``kimi`` → built-in ``kimi-coding``) is the
+            # user's intended target — alias rewriting would otherwise hijack
+            # the request.  We only defer to the built-in when the raw name is
+            # the canonical provider itself (``nous``, ``openrouter``, …) so
+            # accidentally shadowing a canonical provider still resolves to
+            # the built-in. See tests/hermes_cli/test_runtime_provider_resolution.py
+            # ``test_named_custom_provider_does_not_shadow_builtin_provider``.
+            if (canonical or "").strip().lower() == requested_norm:
+                return None

    config = load_config()
    
@@ -391,7 +400,14 @@ def _get_named_custom_provider(requested_provider: str) -> Optional[Dict[str, An
                        "api_key": resolved_api_key,
                        "model": entry.get("default_model", ""),
                    }
-                    api_mode = _parse_api_mode(entry.get("api_mode"))
+                    # The v11→v12 migration writes the API mode under the new
+                    # ``transport`` field, but hand-edited configs may still
+                    # use the legacy ``api_mode`` spelling.  Accept both —
+                    # the runtime normaliser ``_normalize_custom_provider_entry``
+                    # already does, so without this lift every migrated config
+                    # silently downgrades codex_responses / anthropic_messages
+                    # providers to chat_completions in the resolved runtime.
+                    api_mode = _parse_api_mode(entry.get("api_mode") or entry.get("transport"))
                    if api_mode:
                        result["api_mode"] = api_mode
                    return result
@@ -409,7 +425,7 @@ def _get_named_custom_provider(requested_provider: str) -> Optional[Dict[str, An
                            "api_key": resolved_api_key,
                            "model": entry.get("default_model", ""),
                        }
-                        api_mode = _parse_api_mode(entry.get("api_mode"))
+                        api_mode = _parse_api_mode(entry.get("api_mode") or entry.get("transport"))
                        if api_mode:
                            result["api_mode"] = api_mode
                        return result
@@ -1070,6 +1086,20 @@ def resolve_runtime_provider(
            logger.info("Qwen OAuth credentials failed; "
                        "falling through to next provider.")

+    if provider == "minimax-oauth":
+        pconfig = PROVIDER_REGISTRY.get(provider)
+        if pconfig and pconfig.auth_type == "oauth_minimax":
+            from hermes_cli.auth import resolve_minimax_oauth_runtime_credentials
+            creds = resolve_minimax_oauth_runtime_credentials()
+            return {
+                "provider": provider,
+                "api_mode": "anthropic_messages",
+                "base_url": creds["base_url"],
+                "api_key": creds["api_key"],
+                "source": creds.get("source", "oauth"),
+                "requested_provider": requested_provider,
+            }
+
    if provider == "google-gemini-cli":
        try:
            creds = resolve_gemini_oauth_runtime_credentials()
@@ -0,0 +1,316 @@
+"""Session recap — summarize what's happened in the current session.
+
+Inspired by Claude Code's `/recap` command (v2.1.114, April 2026), which
+shows a one-line summary of what happened while a terminal was unfocused
+so users juggling multiple sessions can re-orient quickly.
+
+Source: https://code.claude.com/docs/en/whats-new/2026-w17
+
+Differences from Claude Code:
+    - Pure local computation from the in-memory conversation history. No
+      LLM call, no auxiliary model, no prompt-cache invalidation. A
+      recap should be instant and free.
+    - Works unchanged on CLI and every gateway platform (Telegram,
+      Discord, Slack, …) because both call into the same ``build_recap``
+      helper. Claude Code only shows this on the CLI.
+    - Tailored to hermes-agent's tool vocabulary (``terminal``, ``patch``,
+      ``write_file``, ``delegate_task``, ``browser_*``, ``web_*``) — the
+      recap surfaces which classes of work were most active.
+"""
+from __future__ import annotations
+
+import os
+from collections import Counter
+from typing import Any, Iterable, List, Mapping, Optional, Sequence, Tuple
+
+# How many recent user/assistant turns we consider "recent activity".
+_RECENT_TURN_WINDOW = 20
+
+# How many characters of the latest user prompt to show.
+_PROMPT_PREVIEW_CHARS = 140
+
+# How many characters of the latest assistant text to show.
+_ASSISTANT_PREVIEW_CHARS = 200
+
+# How many recently-touched files to list.
+_MAX_FILES_LISTED = 5
+
+# Tool names that identify a file-editing action and the argument key that
+# holds the path.
+_FILE_EDIT_TOOLS: Mapping[str, str] = {
+    "write_file": "path",
+    "patch": "path",
+    "read_file": "path",
+    "skill_manage": "file_path",
+    "skill_view": "file_path",
+}
+
+
+def _coerce_text(value: Any) -> str:
+    """Flatten assistant/user ``content`` into a plain string.
+
+    Content can be a string or a list of content blocks (for multimodal
+    or reasoning models). We concatenate every text-like block and
+    ignore the rest.
+    """
+    if value is None:
+        return ""
+    if isinstance(value, str):
+        return value
+    if isinstance(value, list):
+        parts: List[str] = []
+        for block in value:
+            if isinstance(block, str):
+                parts.append(block)
+                continue
+            if isinstance(block, Mapping):
+                text = block.get("text")
+                if isinstance(text, str) and text:
+                    parts.append(text)
+        return "\n".join(parts)
+    return str(value)
+
+
+def _tool_call_name_and_args(tool_call: Any) -> Tuple[str, Mapping[str, Any]]:
+    """Extract ``(name, arguments_dict)`` from a tool_call entry.
+
+    ``arguments`` may be a JSON string or a dict depending on provider.
+    Return an empty dict if it cannot be parsed.
+    """
+    if not isinstance(tool_call, Mapping):
+        return "", {}
+    fn = tool_call.get("function") or {}
+    if not isinstance(fn, Mapping):
+        return "", {}
+    name = str(fn.get("name") or "") or ""
+    raw_args = fn.get("arguments")
+    if isinstance(raw_args, Mapping):
+        return name, raw_args
+    if isinstance(raw_args, str) and raw_args:
+        try:
+            import json
+
+            parsed = json.loads(raw_args)
+            if isinstance(parsed, Mapping):
+                return name, parsed
+        except Exception:
+            return name, {}
+    return name, {}
+
+
+def _iter_assistant_tool_calls(
+    messages: Sequence[Mapping[str, Any]],
+) -> Iterable[Tuple[str, Mapping[str, Any]]]:
+    for msg in messages:
+        if not isinstance(msg, Mapping):
+            continue
+        if msg.get("role") != "assistant":
+            continue
+        tool_calls = msg.get("tool_calls") or []
+        if not isinstance(tool_calls, list):
+            continue
+        for tc in tool_calls:
+            name, args = _tool_call_name_and_args(tc)
+            if name:
+                yield name, args
+
+
+def _count_visible_turns(
+    messages: Sequence[Mapping[str, Any]],
+) -> Tuple[int, int, int]:
+    """Return ``(user_turn_count, assistant_turn_count, tool_message_count)``."""
+    users = assistants = tools = 0
+    for msg in messages:
+        if not isinstance(msg, Mapping):
+            continue
+        role = msg.get("role")
+        if role == "user":
+            users += 1
+        elif role == "assistant":
+            assistants += 1
+        elif role == "tool":
+            tools += 1
+    return users, assistants, tools
+
+
+def _latest_user_prompt(
+    messages: Sequence[Mapping[str, Any]],
+) -> Optional[str]:
+    for msg in reversed(messages):
+        if isinstance(msg, Mapping) and msg.get("role") == "user":
+            text = _coerce_text(msg.get("content")).strip()
+            if text:
+                return text
+    return None
+
+
+def _latest_assistant_text(
+    messages: Sequence[Mapping[str, Any]],
+) -> Optional[str]:
+    for msg in reversed(messages):
+        if not isinstance(msg, Mapping):
+            continue
+        if msg.get("role") != "assistant":
+            continue
+        text = _coerce_text(msg.get("content")).strip()
+        if text:
+            return text
+    return None
+
+
+def _recent_window(
+    messages: Sequence[Mapping[str, Any]], window: int = _RECENT_TURN_WINDOW
+) -> List[Mapping[str, Any]]:
+    """Return the tail slice of ``messages`` covering at most ``window``
+    user+assistant turns (tool messages ride along inside the window).
+
+    Iterating from the end, we count user and assistant messages and
+    keep everything from the first message that falls within the window.
+    """
+    count = 0
+    cut = 0
+    for i in range(len(messages) - 1, -1, -1):
+        msg = messages[i]
+        if isinstance(msg, Mapping) and msg.get("role") in ("user", "assistant"):
+            count += 1
+            if count >= window:
+                cut = i
+                break
+    else:
+        return list(messages)
+    return list(messages[cut:])
+
+
+def _shortened_path(path: str) -> str:
+    """Show a path relative to cwd when possible, otherwise with ~ expansion."""
+    if not path:
+        return path
+    try:
+        abs_path = os.path.abspath(os.path.expanduser(path))
+        cwd = os.getcwd()
+        if abs_path == cwd:
+            return "."
+        if abs_path.startswith(cwd + os.sep):
+            return abs_path[len(cwd) + 1 :]
+        home = os.path.expanduser("~")
+        if abs_path.startswith(home + os.sep):
+            return "~/" + abs_path[len(home) + 1 :]
+        return abs_path
+    except Exception:
+        return path
+
+
+def _summarise_tool_activity(
+    tool_calls: Sequence[Tuple[str, Mapping[str, Any]]],
+) -> Tuple[List[Tuple[str, int]], List[str]]:
+    """Return ``(tool_counts_sorted, recently_edited_files)``.
+
+    ``tool_counts_sorted`` is descending by count, keeping the full list
+    so callers can truncate for display. ``recently_edited_files`` lists
+    distinct paths (most recent first) from file-editing tools.
+    """
+    counter: Counter[str] = Counter()
+    files_seen: List[str] = []
+    files_set: set[str] = set()
+    # Walk in reverse so "most recent first" drops out of order-preserved iteration.
+    for name, args in reversed(list(tool_calls)):
+        counter[name] += 1
+        arg_key = _FILE_EDIT_TOOLS.get(name)
+        if arg_key:
+            path = args.get(arg_key)
+            if isinstance(path, str) and path and path not in files_set:
+                files_set.add(path)
+                files_seen.append(_shortened_path(path))
+    # Restore "reverse of reverse" for correct counts; Counter ignores order
+    # so only files_seen needed the reversal. Fix ordering: currently
+    # files_seen is newest→oldest which is what we want for display.
+    tool_counts = sorted(counter.items(), key=lambda kv: (-kv[1], kv[0]))
+    return tool_counts, files_seen
+
+
+def _truncate(text: str, limit: int) -> str:
+    text = " ".join(text.split())  # collapse newlines for a compact one-liner
+    if len(text) <= limit:
+        return text
+    return text[: limit - 1].rstrip() + "…"
+
+
+def build_recap(
+    messages: Sequence[Mapping[str, Any]],
+    *,
+    session_title: Optional[str] = None,
+    session_id: Optional[str] = None,
+    platform: Optional[str] = None,
+) -> str:
+    """Build a multi-line recap of recent activity.
+
+    Inputs:
+        messages: the full conversation history as a list of
+            chat-completion-style dicts (``role``, ``content``,
+            ``tool_calls``, …).
+        session_title: optional human title (from SessionDB).
+        session_id: optional session id.
+        platform: optional hint (``"cli"``, ``"telegram"``, …). Does not
+            change behavior today but is accepted for forward compat.
+
+    The output is plain text designed to render well in both a terminal
+    (with 80-col wrapping) and a gateway message bubble.
+    """
+    _ = platform  # reserved for future use
+    lines: List[str] = []
+
+    header_bits: List[str] = ["Session recap"]
+    if session_title:
+        header_bits.append(f"— {session_title}")
+    elif session_id:
+        header_bits.append(f"— {session_id[:8]}")
+    lines.append(" ".join(header_bits))
+
+    if not messages:
+        lines.append("  (nothing to recap — no messages yet)")
+        return "\n".join(lines)
+
+    users, assistants, tool_msgs = _count_visible_turns(messages)
+    window = _recent_window(messages)
+    win_users, win_assistants, _ = _count_visible_turns(window)
+
+    scope = (
+        f"{win_users} user turn{'s' if win_users != 1 else ''} / "
+        f"{win_assistants} assistant repl{'ies' if win_assistants != 1 else 'y'}"
+    )
+    if (users, assistants) != (win_users, win_assistants):
+        scope += f" (of {users}/{assistants} total)"
+    lines.append(f"  Recent: {scope}, {tool_msgs} tool result{'s' if tool_msgs != 1 else ''}")
+
+    tool_calls = list(_iter_assistant_tool_calls(window))
+    tool_counts, files = _summarise_tool_activity(tool_calls)
+    if tool_counts:
+        top = ", ".join(f"{name}×{count}" for name, count in tool_counts[:5])
+        extra = len(tool_counts) - 5
+        if extra > 0:
+            top += f" (+{extra} more)"
+        lines.append(f"  Tools used: {top}")
+    if files:
+        shown = files[:_MAX_FILES_LISTED]
+        extra = len(files) - len(shown)
+        entry = ", ".join(shown)
+        if extra > 0:
+            entry += f" (+{extra} more)"
+        lines.append(f"  Files touched: {entry}")
+
+    latest_user = _latest_user_prompt(window)
+    if latest_user:
+        lines.append(f"  Last ask: {_truncate(latest_user, _PROMPT_PREVIEW_CHARS)}")
+
+    latest_reply = _latest_assistant_text(window)
+    if latest_reply:
+        lines.append(f"  Last reply: {_truncate(latest_reply, _ASSISTANT_PREVIEW_CHARS)}")
+
+    if len(lines) == 2:
+        # Only the header + scope line — nothing substantive to show.
+        lines.append("  (no assistant activity yet in this window)")
+
+    return "\n".join(lines)
+
+
+__all__ = ["build_recap"]
@@ -12,6 +12,7 @@ Config files are stored in ~/.hermes/ for easy access.
 """

 import importlib.util
+import json
 import logging
 import os
 import shutil
@@ -131,6 +132,7 @@ def _set_reasoning_effort(config: Dict[str, Any], effort: str) -> None:

 # Import config helpers
 from hermes_cli.config import (
+    cfg_get,
    DEFAULT_CONFIG,
    get_hermes_home,
    get_config_path,
@@ -138,6 +140,7 @@ from hermes_cli.config import (
    load_config,
    save_config,
    save_env_value,
+    remove_env_value,
    get_env_value,
    ensure_hermes_home,
 )
@@ -441,7 +444,7 @@ def _print_setup_summary(config: dict, hermes_home):
            tool_status.append(("Image Generation", False, "FAL_KEY or OPENAI_API_KEY"))

    # TTS — show configured provider
-    tts_provider = config.get("tts", {}).get("provider", "edge")
+    tts_provider = cfg_get(config, "tts", "provider", default="edge")
    if subscription_features.tts.managed_by_nous:
        tool_status.append(("Text-to-Speech (OpenAI via Nous subscription)", True, None))
    elif tts_provider == "elevenlabs" and get_env_value("ELEVENLABS_API_KEY"):
@@ -480,7 +483,7 @@ def _print_setup_summary(config: dict, hermes_home):

    if subscription_features.modal.managed_by_nous:
        tool_status.append(("Modal Execution (Nous subscription)", True, None))
-    elif config.get("terminal", {}).get("backend") == "modal":
+    elif cfg_get(config, "terminal", "backend") == "modal":
        if subscription_features.modal.direct_override:
            tool_status.append(("Modal Execution (direct Modal)", True, None))
        else:
@@ -654,6 +657,102 @@ def _prompt_container_resources(config: dict):
        pass


+def _prompt_vercel_sandbox_settings(config: dict):
+    """Prompt for Vercel Sandbox settings without exposing unsupported disk sizing."""
+    terminal = config.setdefault("terminal", {})
+
+    print()
+    print_info("Vercel Sandbox settings:")
+    print_info("  Filesystem persistence uses Vercel snapshots.")
+    print_info("  Snapshots restore files only; live processes do not continue after sandbox recreation.")
+
+    from tools.terminal_tool import _SUPPORTED_VERCEL_RUNTIMES
+
+    current_runtime = terminal.get("vercel_runtime") or "node24"
+    supported_label = ", ".join(_SUPPORTED_VERCEL_RUNTIMES)
+    runtime = prompt(f"  Runtime ({supported_label})", current_runtime).strip() or current_runtime
+    if runtime not in _SUPPORTED_VERCEL_RUNTIMES:
+        print_warning(f"Unsupported Vercel runtime '{runtime}', keeping {current_runtime}.")
+        runtime = current_runtime if current_runtime in _SUPPORTED_VERCEL_RUNTIMES else "node24"
+    terminal["vercel_runtime"] = runtime
+    save_env_value("TERMINAL_VERCEL_RUNTIME", runtime)
+
+    current_persist = terminal.get("container_persistent", True)
+    persist_label = "yes" if current_persist else "no"
+    terminal["container_persistent"] = prompt(
+        "  Persist filesystem with snapshots? (yes/no)", persist_label
+    ).lower() in ("yes", "true", "y", "1")
+
+    current_cpu = terminal.get("container_cpu", 1)
+    cpu_str = prompt("  CPU cores", str(current_cpu))
+    try:
+        terminal["container_cpu"] = float(cpu_str)
+    except ValueError:
+        pass
+
+    current_mem = terminal.get("container_memory", 5120)
+    mem_str = prompt("  Memory in MB (5120 = 5GB)", str(current_mem))
+    try:
+        terminal["container_memory"] = int(mem_str)
+    except ValueError:
+        pass
+
+    if terminal.get("container_disk", 51200) not in (0, 51200):
+        print_warning("Vercel Sandbox does not support custom disk sizing; resetting container_disk to 51200.")
+    terminal["container_disk"] = 51200
+
+    print()
+    print_info("Vercel authentication:")
+    print_info("  Use a long-lived Vercel access token plus project/team IDs.")
+    linked_project = _read_nearest_vercel_project()
+    if linked_project:
+        print_info("  Found defaults in nearest .vercel/project.json.")
+
+    remove_env_value("VERCEL_OIDC_TOKEN")
+    token = prompt("    Vercel access token", get_env_value("VERCEL_TOKEN") or "", password=True)
+    project = prompt(
+        "    Vercel project ID",
+        get_env_value("VERCEL_PROJECT_ID") or linked_project.get("projectId", ""),
+    )
+    team = prompt(
+        "    Vercel team ID",
+        get_env_value("VERCEL_TEAM_ID") or linked_project.get("orgId", ""),
+    )
+    if token:
+        save_env_value("VERCEL_TOKEN", token)
+    if project:
+        save_env_value("VERCEL_PROJECT_ID", project)
+    if team:
+        save_env_value("VERCEL_TEAM_ID", team)
+
+
+def _read_nearest_vercel_project(start: Path | None = None) -> dict[str, str]:
+    """Read project/team defaults from the nearest Vercel link file."""
+    current = (start or Path.cwd()).resolve()
+    if current.is_file():
+        current = current.parent
+
+    for directory in (current, *current.parents):
+        project_file = directory / ".vercel" / "project.json"
+        if not project_file.exists():
+            continue
+        try:
+            data = json.loads(project_file.read_text(encoding="utf-8"))
+        except (OSError, json.JSONDecodeError):
+            return {}
+        if not isinstance(data, dict):
+            return {}
+        return {
+            key: value
+            for key, value in {
+                "projectId": data.get("projectId"),
+                "orgId": data.get("orgId"),
+            }.items()
+            if isinstance(value, str) and value.strip()
+        }
+    return {}
+
+
 # Tool categories and provider config are now in tools_config.py (shared
 # between `hermes tools` and `hermes setup tools`).

@@ -1179,7 +1278,7 @@ def setup_terminal_backend(config: dict):
    print_info(f"   Guide: {_DOCS_BASE}/developer-guide/environments")
    print()

-    current_backend = config.get("terminal", {}).get("backend", "local")
+    current_backend = cfg_get(config, "terminal", "backend", default="local")
    is_linux = _platform.system() == "Linux"

    # Build backend choices with descriptions
@@ -1189,11 +1288,12 @@ def setup_terminal_backend(config: dict):
        "Modal - serverless cloud sandbox",
        "SSH - run on a remote machine",
        "Daytona - persistent cloud development environment",
+        "Vercel Sandbox - cloud microVM with snapshot filesystem persistence",
    ]
-    idx_to_backend = {0: "local", 1: "docker", 2: "modal", 3: "ssh", 4: "daytona"}
-    backend_to_idx = {"local": 0, "docker": 1, "modal": 2, "ssh": 3, "daytona": 4}
+    idx_to_backend = {0: "local", 1: "docker", 2: "modal", 3: "ssh", 4: "daytona", 5: "vercel_sandbox"}
+    backend_to_idx = {"local": 0, "docker": 1, "modal": 2, "ssh": 3, "daytona": 4, "vercel_sandbox": 5}

-    next_idx = 5
+    next_idx = 6
    if is_linux:
        terminal_choices.append("Singularity/Apptainer - HPC-friendly container")
        idx_to_backend[next_idx] = "singularity"
@@ -1228,7 +1328,7 @@ def setup_terminal_backend(config: dict):
        print_info(
            "  the agent starts. CLI mode always starts in the current directory."
        )
-        current_cwd = config.get("terminal", {}).get("cwd", "")
+        current_cwd = cfg_get(config, "terminal", "cwd", default="")
        cwd = prompt("  Messaging working directory", current_cwd or str(Path.home()))
        if cwd:
            config["terminal"]["cwd"] = cwd
@@ -1259,9 +1359,7 @@ def setup_terminal_backend(config: dict):
            print_info(f"Docker found: {docker_bin}")

        # Docker image
-        current_image = config.get("terminal", {}).get(
-            "docker_image", "nikolaik/python-nodejs:python3.11-nodejs20"
-        )
+        current_image = cfg_get(config, "terminal", "docker_image", default="nikolaik/python-nodejs:python3.11-nodejs20")
        image = prompt("  Docker image", current_image)
        config["terminal"]["docker_image"] = image
        save_env_value("TERMINAL_DOCKER_IMAGE", image)
@@ -1281,9 +1379,7 @@ def setup_terminal_backend(config: dict):
        else:
            print_info(f"Found: {sing_bin}")

-        current_image = config.get("terminal", {}).get(
-            "singularity_image", "docker://nikolaik/python-nodejs:python3.11-nodejs20"
-        )
+        current_image = cfg_get(config, "terminal", "singularity_image", default="docker://nikolaik/python-nodejs:python3.11-nodejs20")
        image = prompt("  Container image", current_image)
        config["terminal"]["singularity_image"] = image
        save_env_value("TERMINAL_SINGULARITY_IMAGE", image)
@@ -1302,7 +1398,7 @@ def setup_terminal_backend(config: dict):
            get_nous_subscription_features(config).nous_auth_present
            and is_managed_tool_gateway_ready("modal")
        )
-        modal_mode = normalize_modal_mode(config.get("terminal", {}).get("modal_mode"))
+        modal_mode = normalize_modal_mode(cfg_get(config, "terminal", "modal_mode"))
        use_managed_modal = False
        if managed_modal_available:
            modal_choices = [
@@ -1439,15 +1535,46 @@ def setup_terminal_backend(config: dict):
                print_success("    Configured")

        # Daytona image
-        current_image = config.get("terminal", {}).get(
-            "daytona_image", "nikolaik/python-nodejs:python3.11-nodejs20"
-        )
+        current_image = cfg_get(config, "terminal", "daytona_image", default="nikolaik/python-nodejs:python3.11-nodejs20")
        image = prompt("  Sandbox image", current_image)
        config["terminal"]["daytona_image"] = image
        save_env_value("TERMINAL_DAYTONA_IMAGE", image)

        _prompt_container_resources(config)

+    elif selected_backend == "vercel_sandbox":
+        print_success("Terminal backend: Vercel Sandbox")
+        print_info("Cloud microVM sandboxes with snapshot-backed filesystem persistence.")
+        print_info("Requires the optional SDK: pip install 'hermes-agent[vercel]'")
+
+        try:
+            __import__("vercel")
+        except ImportError:
+            print_info("Installing vercel SDK...")
+            import subprocess
+
+            uv_bin = shutil.which("uv")
+            if uv_bin:
+                result = subprocess.run(
+                    [uv_bin, "pip", "install", "--python", sys.executable, "vercel"],
+                    capture_output=True,
+                    text=True,
+                )
+            else:
+                result = subprocess.run(
+                    [sys.executable, "-m", "pip", "install", "vercel"],
+                    capture_output=True,
+                    text=True,
+                )
+            if result.returncode == 0:
+                print_success("vercel SDK installed")
+            else:
+                print_warning("Install failed — run manually: pip install 'hermes-agent[vercel]'")
+                if result.stderr:
+                    print_info(f"  Error: {result.stderr.strip().splitlines()[-1]}")
+
+        _prompt_vercel_sandbox_settings(config)
+
    elif selected_backend == "ssh":
        print_success("Terminal backend: SSH")
        print_info("Run commands on a remote machine via SSH.")
@@ -1501,6 +1628,8 @@ def setup_terminal_backend(config: dict):
    save_env_value("TERMINAL_ENV", selected_backend)
    if selected_backend == "modal":
        save_env_value("TERMINAL_MODAL_MODE", config["terminal"].get("modal_mode", "auto"))
+    if selected_backend == "vercel_sandbox":
+        save_env_value("TERMINAL_VERCEL_RUNTIME", config["terminal"].get("vercel_runtime", "node24"))
    save_config(config)
    print()
    print_success(f"Terminal backend set to: {selected_backend}")
@@ -1545,7 +1674,7 @@ def setup_agent_settings(config: dict):

    # ── Max Iterations ──
    current_max = get_env_value("HERMES_MAX_ITERATIONS") or str(
-        config.get("agent", {}).get("max_turns", 90)
+        cfg_get(config, "agent", "max_turns", default=90)
    )
    print_info("Maximum tool-calling iterations per conversation.")
    print_info("Higher = more complex tasks, but costs more tokens.")
@@ -1573,7 +1702,7 @@ def setup_agent_settings(config: dict):
    print_info("  all     — Show every tool call with a short preview")
    print_info("  verbose — Full args, results, and debug logs")

-    current_mode = config.get("display", {}).get("tool_progress", "all")
+    current_mode = cfg_get(config, "display", "tool_progress", default="all")
    mode = prompt("Tool progress mode", current_mode)
    if mode.lower() in ("off", "new", "all", "verbose"):
        if "display" not in config:
@@ -1593,7 +1722,7 @@ def setup_agent_settings(config: dict):

    config.setdefault("compression", {})["enabled"] = True

-    current_threshold = config.get("compression", {}).get("threshold", 0.50)
+    current_threshold = cfg_get(config, "compression", "threshold", default=0.50)
    threshold_str = prompt("Compression threshold (0.5-0.95)", str(current_threshold))
    try:
        threshold = float(threshold_str)
@@ -2075,80 +2204,7 @@ def _setup_mattermost():
    home_channel = prompt("Home channel ID (leave empty to set later with /set-home)")
    if home_channel:
        save_env_value("MATTERMOST_HOME_CHANNEL", home_channel)
-
-
-def _setup_whatsapp():
-    """Configure WhatsApp bridge."""
-    print_header("WhatsApp")
-    existing = get_env_value("WHATSAPP_ENABLED")
-    if existing:
-        print_info("WhatsApp: already enabled")
-        return
-
-    print_info("WhatsApp connects via a built-in bridge (Baileys).")
-    print_info("Requires Node.js. Run 'hermes whatsapp' for guided setup.")
-    print()
-    if prompt_yes_no("Enable WhatsApp now?", True):
-        save_env_value("WHATSAPP_ENABLED", "true")
-        print_success("WhatsApp enabled")
-        print_info("Run 'hermes whatsapp' to choose your mode (separate bot number")
-        print_info("or personal self-chat) and pair via QR code.")
-
-
-def _setup_weixin():
-    """Configure Weixin (personal WeChat) via iLink Bot API QR login."""
-    from hermes_cli.gateway import _setup_weixin as _gateway_setup_weixin
-    _gateway_setup_weixin()
-
-
-def _setup_signal():
-    """Configure Signal via gateway setup."""
-    from hermes_cli.gateway import _setup_signal as _gateway_setup_signal
-    _gateway_setup_signal()
-
-
-def _setup_email():
-    """Configure Email via gateway setup."""
-    from hermes_cli.gateway import _setup_email as _gateway_setup_email
-    _gateway_setup_email()
-
-
-def _setup_sms():
-    """Configure SMS (Twilio) via gateway setup."""
-    from hermes_cli.gateway import _setup_sms as _gateway_setup_sms
-    _gateway_setup_sms()
-
-
-def _setup_dingtalk():
-    """Configure DingTalk via gateway setup."""
-    from hermes_cli.gateway import _setup_dingtalk as _gateway_setup_dingtalk
-    _gateway_setup_dingtalk()
-
-
-def _setup_feishu():
-    """Configure Feishu / Lark via gateway setup."""
-    from hermes_cli.gateway import _setup_feishu as _gateway_setup_feishu
-    _gateway_setup_feishu()
-
-
-def _setup_yuanbao():
-    """Configure Yuanbao via gateway setup."""
-    from hermes_cli.gateway import _setup_yuanbao as _gateway_setup_yuanbao
-    _gateway_setup_yuanbao()
-
-
-def _setup_wecom():
-    """Configure WeCom (Enterprise WeChat) via gateway setup."""
-    from hermes_cli.gateway import _setup_wecom as _gateway_setup_wecom
-    _gateway_setup_wecom()
-
-
-def _setup_wecom_callback():
-    """Configure WeCom Callback (self-built app) via gateway setup."""
-    from hermes_cli.gateway import _setup_wecom_callback as _gw_setup
-    _gw_setup()
-
-
+    print_info("   Open config in your editor:  hermes config edit")


 def _setup_bluebubbles():
@@ -2266,49 +2322,27 @@ def _setup_webhooks():
    print_info("   https://hermes-agent.nousresearch.com/docs/user-guide/messaging/webhooks/#configuring-routes")
    print()
    print_info("   Open config in your editor:  hermes config edit")
-
-
-# Platform registry for the gateway checklist
-_GATEWAY_PLATFORMS = [
-    ("Telegram", "TELEGRAM_BOT_TOKEN", _setup_telegram),
-    ("Discord", "DISCORD_BOT_TOKEN", _setup_discord),
-    ("Slack", "SLACK_BOT_TOKEN", _setup_slack),
-    ("Signal", "SIGNAL_HTTP_URL", _setup_signal),
-    ("Email", "EMAIL_ADDRESS", _setup_email),
-    ("SMS (Twilio)", "TWILIO_ACCOUNT_SID", _setup_sms),
-    ("Matrix", "MATRIX_ACCESS_TOKEN", _setup_matrix),
-    ("Mattermost", "MATTERMOST_TOKEN", _setup_mattermost),
-    ("WhatsApp", "WHATSAPP_ENABLED", _setup_whatsapp),
-    ("DingTalk", "DINGTALK_CLIENT_ID", _setup_dingtalk),
-    ("Feishu / Lark", "FEISHU_APP_ID", _setup_feishu),
-    ("Yuanbao", "YUANBAO_APP_ID", _setup_yuanbao),
-    ("WeCom (Enterprise WeChat)", "WECOM_BOT_ID", _setup_wecom),
-    ("WeCom Callback (Self-Built App)", "WECOM_CALLBACK_CORP_ID", _setup_wecom_callback),
-    ("Weixin (WeChat)", "WEIXIN_ACCOUNT_ID", _setup_weixin),
-    ("BlueBubbles (iMessage)", "BLUEBUBBLES_SERVER_URL", _setup_bluebubbles),
-    ("QQ Bot", "QQ_APP_ID", _setup_qqbot),
-    ("Webhooks (GitHub, GitLab, etc.)", "WEBHOOK_ENABLED", _setup_webhooks),
-]
+    print_info("   Open config in your editor:  hermes config edit")


 def setup_gateway(config: dict):
    """Configure messaging platform integrations."""
+    from hermes_cli.gateway import _all_platforms, _platform_status, _configure_platform
+
    print_header("Messaging Platforms")
    print_info("Connect to messaging platforms to chat with Hermes from anywhere.")
    print_info("Toggle with Space, confirm with Enter.")
    print()

-    # Build checklist items, pre-selecting already-configured platforms
+    platforms = _all_platforms()
+
+    # Build checklist, pre-selecting already-configured platforms.
    items = []
    pre_selected = []
-    for i, (name, env_var, _func) in enumerate(_GATEWAY_PLATFORMS):
-        # Matrix has two possible env vars
-        is_configured = bool(get_env_value(env_var))
-        if name == "Matrix" and not is_configured:
-            is_configured = bool(get_env_value("MATRIX_PASSWORD"))
-        label = f"{name}  (configured)" if is_configured else name
-        items.append(label)
-        if is_configured:
+    for i, plat in enumerate(platforms):
+        status = _platform_status(plat)
+        items.append(f"{plat['emoji']} {plat['label']}  ({status})")
+        if status == "configured":
            pre_selected.append(i)

    selected = prompt_checklist("Select platforms to configure:", items, pre_selected)
@@ -2318,28 +2352,22 @@ def setup_gateway(config: dict):
        return

    for idx in selected:
-        name, _env_var, setup_func = _GATEWAY_PLATFORMS[idx]
-        setup_func()
+        _configure_platform(platforms[idx])

    # ── Gateway Service Setup ──
-    any_messaging = (
-        get_env_value("TELEGRAM_BOT_TOKEN")
-        or get_env_value("DISCORD_BOT_TOKEN")
-        or get_env_value("SLACK_BOT_TOKEN")
-        or get_env_value("SIGNAL_HTTP_URL")
-        or get_env_value("EMAIL_ADDRESS")
-        or get_env_value("TWILIO_ACCOUNT_SID")
-        or get_env_value("MATTERMOST_TOKEN")
-        or get_env_value("MATRIX_ACCESS_TOKEN")
-        or get_env_value("MATRIX_PASSWORD")
-        or get_env_value("WHATSAPP_ENABLED")
-        or get_env_value("DINGTALK_CLIENT_ID")
-        or get_env_value("FEISHU_APP_ID")
-        or get_env_value("WECOM_BOT_ID")
-        or get_env_value("WEIXIN_ACCOUNT_ID")
-        or get_env_value("BLUEBUBBLES_SERVER_URL")
-        or get_env_value("QQ_APP_ID")
-        or get_env_value("WEBHOOK_ENABLED")
+    # Count any platform (built-in or plugin) the user configured during this
+    # setup pass — reuses ``_platform_status`` so plugin platforms like IRC
+    # are picked up without another hard-coded env-var list.
+    def _is_progress(status: str) -> bool:
+        s = status.lower()
+        return not (
+            s == "not configured"
+            or s.startswith("partially")
+            or s.startswith("plugin disabled")
+        )
+
+    any_messaging = any(
+        _is_progress(_platform_status(p)) for p in _all_platforms()
    )
    if any_messaging:
        print()
@@ -2601,21 +2629,26 @@ def _get_section_config_summary(config: dict, section_key: str) -> Optional[str]
        return "configured"

    elif section_key == "terminal":
-        backend = config.get("terminal", {}).get("backend", "local")
+        backend = cfg_get(config, "terminal", "backend", default="local")
        return f"backend: {backend}"

    elif section_key == "agent":
-        max_turns = config.get("agent", {}).get("max_turns", 90)
+        max_turns = cfg_get(config, "agent", "max_turns", default=90)
        return f"max turns: {max_turns}"

    elif section_key == "gateway":
-        platforms = [
-            _gateway_platform_short_label(label)
-            for label, env_var, _ in _GATEWAY_PLATFORMS
-            if get_env_value(env_var)
+        from hermes_cli.gateway import _all_platforms, _platform_status
+        # Count any non-empty status other than the "not configured" sentinel —
+        # platforms like WhatsApp ("enabled, not paired"), Matrix ("configured
+        # + E2EE"), and Signal ("partially configured") all indicate the user
+        # has already started setup and we shouldn't force the section to rerun.
+        configured = [
+            _gateway_platform_short_label(plat["label"])
+            for plat in _all_platforms()
+            if _platform_status(plat) and _platform_status(plat) != "not configured"
        ]
-        if platforms:
-            return ", ".join(platforms)
+        if configured:
+            return ", ".join(configured)
        return None  # No platforms configured — section must run

    elif section_key == "tools":
@@ -3120,33 +3153,14 @@ def run_setup_wizard(args):
    _offer_launch_chat()


-def _resolve_hermes_chat_argv() -> Optional[list[str]]:
-    """Resolve argv for launching ``hermes chat`` in a fresh process."""
-    hermes_bin = shutil.which("hermes")
-    if hermes_bin:
-        return [hermes_bin, "chat"]
-
-    try:
-        if importlib.util.find_spec("hermes_cli") is not None:
-            return [sys.executable, "-m", "hermes_cli.main", "chat"]
-    except Exception:
-        pass
-
-    return None
-
-
 def _offer_launch_chat():
    """Prompt the user to jump straight into chat after setup."""
    print()
    if not prompt_yes_no("Launch hermes chat now?", True):
        return

-    chat_argv = _resolve_hermes_chat_argv()
-    if not chat_argv:
-        print_info("Could not relaunch Hermes automatically. Run 'hermes chat' manually.")
-        return
-
-    os.execvp(chat_argv[0], chat_argv)
+    from hermes_cli.relaunch import relaunch
+    relaunch(["chat"])


 def _run_first_time_quick_setup(config: dict, hermes_home, is_existing: bool):
@@ -13,7 +13,7 @@ Config stored in ~/.hermes/config.yaml under:
 """
 from typing import List, Optional, Set

-from hermes_cli.config import load_config, save_config
+from hermes_cli.config import cfg_get, load_config, save_config
 from hermes_cli.colors import Colors, color
 from hermes_cli.platforms import PLATFORMS as _PLATFORMS

@@ -30,7 +30,7 @@ def get_disabled_skills(config: dict, platform: Optional[str] = None) -> Set[str
    global_disabled = set(skills_cfg.get("disabled", []))
    if platform is None:
        return global_disabled
-    platform_disabled = skills_cfg.get("platform_disabled", {}).get(platform)
+    platform_disabled = cfg_get(skills_cfg, "platform_disabled", platform)
    if platform_disabled is None:
        return global_disabled
    return set(platform_disabled)
@@ -68,7 +68,7 @@ All fields are optional. Missing values inherit from the ``default`` skin.
      welcome: "Welcome message"          # Shown at CLI startup
      goodbye: "Goodbye! ⚕"              # Shown on exit
      response_label: " ⚕ Hermes "       # Response box header label
-      prompt_symbol: "❯ "                # Input prompt symbol
+      prompt_symbol: "❯"                 # Input prompt symbol (bare token; renderers add trailing space)
      help_header: "(^_^)? Commands"      # /help header text

    # Tool prefix: character for tool output lines (default: ┊)
@@ -190,7 +190,7 @@ _BUILTIN_SKINS: Dict[str, Dict[str, Any]] = {
            "welcome": "Welcome to Hermes Agent! Type your message or /help for commands.",
            "goodbye": "Goodbye! ⚕",
            "response_label": " ⚕ Hermes ",
-            "prompt_symbol": "❯ ",
+            "prompt_symbol": "❯",
            "help_header": "(^_^)? Available Commands",
        },
        "tool_prefix": "┊",
@@ -242,7 +242,7 @@ _BUILTIN_SKINS: Dict[str, Dict[str, Any]] = {
            "welcome": "Welcome to Ares Agent! Type your message or /help for commands.",
            "goodbye": "Farewell, warrior! ⚔",
            "response_label": " ⚔ Ares ",
-            "prompt_symbol": "⚔ ❯ ",
+            "prompt_symbol": "⚔",
            "help_header": "(⚔) Available Commands",
        },
        "tool_prefix": "╎",
@@ -301,7 +301,7 @@ _BUILTIN_SKINS: Dict[str, Dict[str, Any]] = {
            "welcome": "Welcome to Hermes Agent! Type your message or /help for commands.",
            "goodbye": "Goodbye! ⚕",
            "response_label": " ⚕ Hermes ",
-            "prompt_symbol": "❯ ",
+            "prompt_symbol": "❯",
            "help_header": "[?] Available Commands",
        },
        "tool_prefix": "┊",
@@ -340,7 +340,7 @@ _BUILTIN_SKINS: Dict[str, Dict[str, Any]] = {
            "welcome": "Welcome to Hermes Agent! Type your message or /help for commands.",
            "goodbye": "Goodbye! ⚕",
            "response_label": " ⚕ Hermes ",
-            "prompt_symbol": "❯ ",
+            "prompt_symbol": "❯",
            "help_header": "(^_^)? Available Commands",
        },
        "tool_prefix": "┊",
@@ -377,7 +377,7 @@ _BUILTIN_SKINS: Dict[str, Dict[str, Any]] = {
            "welcome": "Welcome to Hermes Agent! Type your message or /help for commands.",
            "goodbye": "Goodbye! ⚕",
            "response_label": " ⚕ Hermes ",
-            "prompt_symbol": "❯ ",
+            "prompt_symbol": "❯",
            "help_header": "[?] Available Commands",
        },
        "tool_prefix": "│",
@@ -414,7 +414,7 @@ _BUILTIN_SKINS: Dict[str, Dict[str, Any]] = {
            "welcome": "Welcome to Hermes Agent! Type your message or /help for commands.",
            "goodbye": "Goodbye! \u2695",
            "response_label": " \u2695 Hermes ",
-            "prompt_symbol": "\u276f ",
+            "prompt_symbol": "\u276f",
            "help_header": "(^_^)? Available Commands",
        },
        "tool_prefix": "\u250a",
@@ -467,7 +467,7 @@ _BUILTIN_SKINS: Dict[str, Dict[str, Any]] = {
            "welcome": "Welcome to Poseidon Agent! Type your message or /help for commands.",
            "goodbye": "Fair winds! Ψ",
            "response_label": " Ψ Poseidon ",
-            "prompt_symbol": "Ψ ❯ ",
+            "prompt_symbol": "Ψ",
            "help_header": "(Ψ) Available Commands",
        },
        "tool_prefix": "│",
@@ -539,7 +539,7 @@ _BUILTIN_SKINS: Dict[str, Dict[str, Any]] = {
            "welcome": "Welcome to Sisyphus Agent! Type your message or /help for commands.",
            "goodbye": "The boulder waits. ◉",
            "response_label": " ◉ Sisyphus ",
-            "prompt_symbol": "◉ ❯ ",
+            "prompt_symbol": "◉",
            "help_header": "(◉) Available Commands",
        },
        "tool_prefix": "│",
@@ -612,7 +612,7 @@ _BUILTIN_SKINS: Dict[str, Dict[str, Any]] = {
            "welcome": "Welcome to Charizard Agent! Type your message or /help for commands.",
            "goodbye": "Flame out! ✦",
            "response_label": " ✦ Charizard ",
-            "prompt_symbol": "✦ ❯ ",
+            "prompt_symbol": "✦",
            "help_header": "(✦) Available Commands",
        },
        "tool_prefix": "│",
@@ -780,12 +780,21 @@ def init_skin_from_config(config: dict) -> None:
 # =============================================================================


-def get_active_prompt_symbol(fallback: str = "❯ ") -> str:
-    """Get the interactive prompt symbol from the active skin."""
+def get_active_prompt_symbol(fallback: str = "❯") -> str:
+    """Return the interactive prompt symbol with a single trailing space.
+
+    Skins store ``prompt_symbol`` as a bare token (no spaces). The trailing
+    space is appended here so callers can drop it straight into a rendered
+    prompt without hand-rolling whitespace.
+    """
    try:
-        return get_active_skin().get_branding("prompt_symbol", fallback)
+        raw = get_active_skin().get_branding("prompt_symbol", fallback)
    except Exception:
-        return fallback
+        raw = fallback
+
+    cleaned = (raw or fallback).strip()
+
+    return f"{cleaned or fallback.strip()} "



@@ -18,6 +18,7 @@ for reinstall when scopes/commands change.
 from __future__ import annotations

 import json
+import os
 import sys
 from pathlib import Path

@@ -128,7 +129,7 @@ def slack_manifest_command(args) -> int:

                target = Path(get_hermes_home()) / "slack-manifest.json"
            except Exception:
-                target = Path.home() / ".hermes" / "slack-manifest.json"
+                target = Path(os.environ.get("HERMES_HOME") or str(Path.home() / ".hermes")) / "slack-manifest.json"
        else:
            target = Path(write_target).expanduser()
        target.parent.mkdir(parents=True, exist_ok=True)
--- a/Show More
+++ b/Show More