feat: provider modules — ProviderProfile ABC, 30 providers, fetch_models, transport single-path

feat: provider modules — ProviderProfile ABC, 29 providers, fetch_models, transport single-path Introduces providers/ as the single source of truth for every inference provider. All 29 providers declared with correct data cross-checked against auth.py, runtime_provider.py and auxiliary_client.py. Rebased onto main (30307a980). Incorporates post-salvage fixes from 56724147e (gmi aux model google/gemini-3.1-flash-lite-preview, already set in providers/gmi.py).
2026-04-29 20:10:09 +05:30
908 changed files with 11120 additions and 119223 deletions
@@ -5,15 +5,7 @@

 # Dependencies
 node_modules
-**/node_modules
 .venv
-**/.venv
-
-# Built artifacts that are regenerated inside the image.  Excluded so local
-# rebuilds on the developer's machine don't invalidate the npm-install layer
-# that now depends on the full ui-tui/packages/hermes-ink/ tree being present.
-ui-tui/dist/
-ui-tui/packages/hermes-ink/dist/

 # CI/CD
 .github
@@ -398,19 +398,3 @@ IMAGE_TOOLS_DEBUG=false
 # Override STT provider endpoints (for proxies or self-hosted instances)
 # GROQ_BASE_URL=https://api.groq.com/openai/v1
 # STT_OPENAI_BASE_URL=https://api.openai.com/v1
-
-# =============================================================================
-# MICROSOFT TEAMS INTEGRATION
-# =============================================================================
-# Register a Bot in Azure: https://dev.botframework.com/ → "Register a bot"
-# Or use Azure Portal: Azure Active Directory → App registrations → New registration
-# Then add the bot to Teams via the Bot Framework or App Studio.
-#
-# TEAMS_CLIENT_ID=                     # Azure AD App (client) ID
-# TEAMS_CLIENT_SECRET=                 # Azure AD client secret value
-# TEAMS_TENANT_ID=                     # Azure AD tenant ID (or "common" for multi-tenant)
-# TEAMS_ALLOWED_USERS=                 # Comma-separated AAD object IDs or UPNs
-# TEAMS_ALLOW_ALL_USERS=false          # Set true to skip the allowlist
-# TEAMS_HOME_CHANNEL=                  # Default channel/chat ID for cron delivery
-# TEAMS_HOME_CHANNEL_NAME=             # Display name for the home channel
-# TEAMS_PORT=3978                      # Webhook listen port (Bot Framework default)
@@ -1,18 +1,8 @@
 name: 'Setup Nix'
-description: 'Install Nix and configure Cachix binary cache'
-
-inputs:
-  cachix-auth-token:
-    description: 'Cachix auth token (enables push). Omit for read-only.'
-    required: false
-    default: ''
+description: 'Install Nix with DeterminateSystems and enable magic-nix-cache'

 runs:
  using: composite
  steps:
    - uses: DeterminateSystems/nix-installer-action@ef8a148080ab6020fd15196c2084a2eea5ff2d25 # v22
-    - uses: cachix/cachix-action@1eb2ef646ac0255473d23a5907ad7b04ce94065c # v17
-      with:
-        name: hermes-agent
-        authToken: ${{ inputs.cachix-auth-token }}
-      continue-on-error: true
+    - uses: DeterminateSystems/magic-nix-cache-action@565684385bcd71bad329742eefe8d12f2e765b39 # v13
@@ -76,16 +76,6 @@ jobs:
        run: |
          mkdir -p _site/docs
          cp -r website/build/* _site/docs/
-          # llms.txt / llms-full.txt are also published at the site root
-          # (https://hermes-agent.nousresearch.com/llms.txt) because some
-          # agents and IDE plugins probe the classic root-level path rather
-          # than /docs/llms.txt. Same file, two URLs, one source of truth.
-          if [ -f website/build/llms.txt ]; then
-            cp website/build/llms.txt _site/llms.txt
-          fi
-          if [ -f website/build/llms-full.txt ]; then
-            cp website/build/llms-full.txt _site/llms-full.txt
-          fi

      - name: Upload artifact
        uses: actions/upload-pages-artifact@56afc609e74202658d3ffba0e8f6dda462b719fa  # v3
@@ -0,0 +1,68 @@
+name: Nix Lockfile Check
+
+on:
+  pull_request:
+  workflow_dispatch:
+
+permissions:
+  contents: read
+  pull-requests: write
+
+concurrency:
+  group: nix-lockfile-check-${{ github.ref }}
+  cancel-in-progress: true
+
+jobs:
+  check:
+    runs-on: ubuntu-latest
+    timeout-minutes: 20
+    steps:
+      - uses: actions/checkout@34e114876b0b11c390a56381ad16ebd13914f8d5  # v4
+
+      - uses: ./.github/actions/nix-setup
+
+      - name: Resolve head SHA
+        id: sha
+        shell: bash
+        run: |
+          FULL="${{ github.event.pull_request.head.sha || github.sha }}"
+          echo "full=$FULL" >> "$GITHUB_OUTPUT"
+          echo "short=${FULL:0:7}" >> "$GITHUB_OUTPUT"
+
+      - name: Check lockfile hashes
+        id: check
+        continue-on-error: true
+        env:
+          LINK_SHA: ${{ steps.sha.outputs.full }}
+        run: nix run .#fix-lockfiles -- --check
+
+      - name: Post sticky PR comment (stale)
+        if: steps.check.outputs.stale == 'true' && github.event_name == 'pull_request'
+        uses: marocchino/sticky-pull-request-comment@52423e01640425a022ef5fd42c6fb5f633a02728  # v2.9.1
+        with:
+          header: nix-lockfile-check
+          message: |
+            ### ⚠️ npm lockfile hash out of date
+
+            Checked against commit [`${{ steps.sha.outputs.short }}`](${{ github.server_url }}/${{ github.repository }}/commit/${{ steps.sha.outputs.full }}) (PR head at check time).
+
+            The `hash = "sha256-..."` line in these nix files no longer matches the committed `package-lock.json`:
+
+            ${{ steps.check.outputs.report }}
+
+            #### Apply the fix
+
+            - [ ] **Apply lockfile fix** — tick to push a commit with the correct hashes to this PR branch
+            - Or [run the Nix Lockfile Fix workflow](${{ github.server_url }}/${{ github.repository }}/actions/workflows/nix-lockfile-fix.yml) manually (pass PR `#${{ github.event.pull_request.number }}`)
+            - Or locally: `nix run .#fix-lockfiles -- --apply` and commit the diff
+
+      - name: Clear sticky PR comment (resolved)
+        if: steps.check.outputs.stale == 'false' && github.event_name == 'pull_request'
+        uses: marocchino/sticky-pull-request-comment@52423e01640425a022ef5fd42c6fb5f633a02728  # v2.9.1
+        with:
+          header: nix-lockfile-check
+          delete: true
+
+      - name: Fail if stale
+        if: steps.check.outputs.stale == 'true'
+        run: exit 1
@@ -1,13 +1,6 @@
 name: Nix Lockfile Fix

 on:
-  push:
-    branches: [main]
-    paths:
-      - 'ui-tui/package-lock.json'
-      - 'ui-tui/package.json'
-      - 'web/package-lock.json'
-      - 'web/package.json'
  workflow_dispatch:
    inputs:
      pr_number:
@@ -26,105 +19,9 @@ concurrency:
  cancel-in-progress: false

 jobs:
-  # ── Auto-fix on main ───────────────────────────────────────────────
-  # Fires when a push to main touches package.json or package-lock.json
-  # in ui-tui/ or web/. Runs fix-lockfiles and pushes the hash
-  # update commit directly to main so Nix builds never stay broken.
-  #
-  # Safety invariants:
-  #   1. The fix commit only touches nix/*.nix files, which are NOT in
-  #      the paths filter above, so this cannot re-trigger itself.
-  #   2. An explicit file-whitelist check before commit aborts if
-  #      fix-lockfiles ever modifies unexpected files.
-  #   3. Job-level concurrency with cancel-in-progress: true ensures
-  #      back-to-back pushes collapse to the newest; ref: main checkout
-  #      always operates on the latest branch state.
-  #   4. Uses a GitHub App token (not GITHUB_TOKEN) so the fix commit
-  #      triggers downstream nix.yml verification.
-  auto-fix-main:
-    if: github.event_name == 'push'
-    runs-on: ubuntu-latest
-    timeout-minutes: 25
-    concurrency:
-      group: auto-fix-main
-      cancel-in-progress: true
-    steps:
-      - name: Generate GitHub App token
-        id: app-token
-        uses: actions/create-github-app-token@7bfa3a4717ef143a604ee0a99d859b8886a96d00  # v1.9.3
-        with:
-          app-id: ${{ secrets.APP_ID }}
-          private-key: ${{ secrets.APP_PRIVATE_KEY }}
-
-      - uses: actions/checkout@34e114876b0b11c390a56381ad16ebd13914f8d5  # v4
-        with:
-          ref: main
-          token: ${{ steps.app-token.outputs.token }}
-
-      - uses: ./.github/actions/nix-setup
-        with:
-          cachix-auth-token: ${{ secrets.CACHIX_AUTH_TOKEN }}
-
-      - name: Apply lockfile hashes
-        id: apply
-        run: nix run .#fix-lockfiles -- --apply
-
-      - name: Commit & push
-        if: steps.apply.outputs.changed == 'true'
-        shell: bash
-        run: |
-          set -euo pipefail
-
-          # Ensure only nix files were modified — prevents accidental
-          # self-triggering if fix-lockfiles ever touches package files.
-          unexpected="$(git diff --name-only | grep -Ev '^nix/(tui|web)\.nix$' || true)"
-          if [ -n "$unexpected" ]; then
-            echo "::error::Unexpected modified files: $unexpected"
-            exit 1
-          fi
-
-          # Record the base SHA before committing — used to detect package
-          # file changes if we need to rebase after a non-fast-forward push.
-          BASE_SHA="$(git rev-parse HEAD)"
-
-          git config user.name 'github-actions[bot]'
-          git config user.email '41898282+github-actions[bot]@users.noreply.github.com'
-          git add nix/tui.nix nix/web.nix
-          git commit -m "fix(nix): auto-refresh npm lockfile hashes" \
-            -m "Source: $GITHUB_SHA" \
-            -m "Run: $GITHUB_SERVER_URL/$GITHUB_REPOSITORY/actions/runs/$GITHUB_RUN_ID"
-
-          # Retry push with rebase in case main advanced with an unrelated
-          # commit during the nix build. Without this, a non-fast-forward
-          # rejection silently loses the fix. If package files changed during
-          # the rebase, abort — a fresh auto-fix run will handle the new state.
-          for attempt in 1 2 3; do
-            if git push origin HEAD:main; then
-              exit 0
-            fi
-            echo "::warning::Push attempt $attempt failed (non-fast-forward?), rebasing…"
-            git fetch origin main
-
-            # If package files changed between our base and the new main,
-            # our computed hashes are stale. Abort and let the next triggered
-            # run recompute from the correct package-lock state.
-            pkg_changed="$(git diff --name-only "$BASE_SHA"..origin/main -- \
-              'ui-tui/package-lock.json' 'ui-tui/package.json' \
-              'web/package-lock.json' 'web/package.json' || true)"
-            if [ -n "$pkg_changed" ]; then
-              echo "::warning::Package files changed since hash computation — aborting; a fresh run will recompute"
-              exit 0
-            fi
-
-            git rebase origin/main
-          done
-          echo "::error::Failed to push after 3 rebase attempts"
-          exit 1
-
-  # ── PR fix (manual / checkbox) ─────────────────────────────────────
-  # Existing behavior: run on manual dispatch OR when a task-list
-  # checkbox in the sticky lockfile-check comment flips from [ ] to [x].
  fix:
+    # Run on manual dispatch OR when a task-list checkbox in the sticky
+    # lockfile-check comment flips from `[ ]` to `[x]`.
    if: |
      github.event_name == 'workflow_dispatch' ||
      (github.event_name == 'issue_comment'
@@ -202,12 +99,10 @@ jobs:
          fetch-depth: 0

      - uses: ./.github/actions/nix-setup
-        with:
-          cachix-auth-token: ${{ secrets.CACHIX_AUTH_TOKEN }}

      - name: Apply lockfile hashes
        id: apply
-        run: nix run .#fix-lockfiles
+        run: nix run .#fix-lockfiles -- --apply

      - name: Commit & push
        if: steps.apply.outputs.changed == 'true'
@@ -7,7 +7,6 @@ on:

 permissions:
  contents: read
-  pull-requests: write

 concurrency:
  group: nix-${{ github.ref }}
@@ -23,95 +22,12 @@ jobs:
    steps:
      - uses: actions/checkout@34e114876b0b11c390a56381ad16ebd13914f8d5 # v4
      - uses: ./.github/actions/nix-setup
-        with:
-          cachix-auth-token: ${{ secrets.CACHIX_AUTH_TOKEN }}
-
-      - name: Resolve head SHA
-        if: github.event_name == 'pull_request'
-        id: sha
-        shell: bash
-        run: |
-          FULL="${{ github.event.pull_request.head.sha || github.sha }}"
-          echo "full=$FULL" >> "$GITHUB_OUTPUT"
-          echo "short=${FULL:0:7}" >> "$GITHUB_OUTPUT"
-
      - name: Check flake
-        id: flake
        if: runner.os == 'Linux'
-        continue-on-error: true
        run: nix flake check --print-build-logs
-
      - name: Build package
-        id: build
        if: runner.os == 'Linux'
-        continue-on-error: true
        run: nix build --print-build-logs
-
-      # When the real Nix build fails, run a targeted diagnostic to see if
-      # the failure is specifically a stale npm lockfile hash in one of the
-      # known npm subpackages (tui / web).  This avoids surfacing a generic
-      # "build failed" message when the fix is a single known command.
-      - name: Diagnose npm lockfile hashes
-        id: hash_check
-        if: (steps.flake.outcome == 'failure' || steps.build.outcome == 'failure') && runner.os == 'Linux'
-        continue-on-error: true
-        env:
-          LINK_SHA: ${{ steps.sha.outputs.full }}
-        run: nix run .#fix-lockfiles -- --check
-
-      # If fix-lockfiles itself crashes (infrastructure blip, cache throttle,
-      # etc.) it won't set stale=true/false.  Treat that as a distinct failure
-      # mode rather than silently ignoring it.
-      - name: Fail if hash check crashed without reporting
-        if: steps.hash_check.outcome == 'failure' && steps.hash_check.outputs.stale != 'true' && steps.hash_check.outputs.stale != 'false'
-        run: |
-          echo "::error::fix-lockfiles exited without reporting stale status — likely an infrastructure or script failure"
-          exit 1
-
-      - name: Post sticky PR comment (stale hashes)
-        if: steps.hash_check.outputs.stale == 'true' && github.event_name == 'pull_request'
-        uses: marocchino/sticky-pull-request-comment@52423e01640425a022ef5fd42c6fb5f633a02728  # v2.9.1
-        with:
-          header: nix-lockfile-check
-          message: |
-            ### ⚠️ npm lockfile hash out of date
-
-            Checked against commit [`${{ steps.sha.outputs.short }}`](${{ github.server_url }}/${{ github.repository }}/commit/${{ steps.sha.outputs.full }}) (PR head at check time).
-
-            The `hash = "sha256-..."` line in these nix files no longer matches the committed `package-lock.json`:
-
-            ${{ steps.hash_check.outputs.report }}
-
-            #### Apply the fix
-
-            - [ ] **Apply lockfile fix** — tick to push a commit with the correct hashes to this PR branch
-            - Or [run the Nix Lockfile Fix workflow](${{ github.server_url }}/${{ github.repository }}/actions/workflows/nix-lockfile-fix.yml) manually (pass PR `#${{ github.event.pull_request.number }}`)
-            - Or locally: `nix run .#fix-lockfiles` and commit the diff
-
-      # Clear the sticky comment when either the build passed outright (no
-      # hash check needed) or the hash check explicitly returned stale=false
-      # (build failed for a non-hash reason).
-      - name: Clear sticky PR comment (resolved)
-        if: |
-          github.event_name == 'pull_request' &&
-          runner.os == 'Linux' &&
-          (steps.hash_check.outputs.stale == 'false' ||
-           (steps.flake.outcome == 'success' && steps.build.outcome == 'success'))
-        uses: marocchino/sticky-pull-request-comment@52423e01640425a022ef5fd42c6fb5f633a02728  # v2.9.1
-        with:
-          header: nix-lockfile-check
-          delete: true
-
-      - name: Final fail if build or flake failed
-        if: steps.flake.outcome == 'failure' || steps.build.outcome == 'failure'
-        run: |
-          if [ "${{ steps.hash_check.outputs.stale }}" == "true" ]; then
-            echo "::error::Nix build failed due to stale npm lockfile hash. Run: nix run .#fix-lockfiles"
-          else
-            echo "::error::Nix build/flake check failed. See logs above."
-          fi
-          exit 1
-
      - name: Evaluate flake (macOS)
        if: runner.os == 'macOS'
        run: nix flake show --json > /dev/null
@@ -38,7 +38,7 @@ hermes-agent/
 │   │                     #   homeassistant, signal, matrix, mattermost, email, sms,
 │   │                     #   dingtalk, wecom, weixin, feishu, qqbot, bluebubbles,
 │   │                     #   webhook, api_server, ...). See ADDING_A_PLATFORM.md.
-│   └── builtin_hooks/    # Extension point for always-registered gateway hooks (none shipped)
+│   └── builtin_hooks/    # Always-registered gateway hooks (boot-md, ...)
 ├── plugins/              # Plugin system (see "Plugins" section below)
 │   ├── memory/           # Memory-provider plugins (honcho, mem0, supermemory, ...)
 │   ├── context_engine/   # Context-engine plugins
@@ -494,7 +494,7 @@ branding:
  agent_name: "My Agent"
  welcome: "Welcome message"
  response_label: " ⚔ Agent "
-  prompt_symbol: "⚔"
+  prompt_symbol: "⚔ ❯ "

 tool_prefix: "╎"             # Tool output line prefix
 ```
@@ -14,7 +14,7 @@ ENV PLAYWRIGHT_BROWSERS_PATH=/opt/hermes/.playwright
 # that would otherwise accumulate when hermes runs as PID 1. See #15012.
 RUN apt-get update && \
    apt-get install -y --no-install-recommends \
-    build-essential curl nodejs npm python3 ripgrep ffmpeg gcc python3-dev libffi-dev procps git openssh-client docker-cli tini && \
+        build-essential nodejs npm python3 ripgrep ffmpeg gcc python3-dev libffi-dev procps git openssh-client docker-cli tini && \
    rm -rf /var/lib/apt/lists/*

 # Non-root user for runtime; UID can be overridden via HERMES_UID at runtime
@@ -28,26 +28,10 @@ WORKDIR /opt/hermes
 # ---------- Layer-cached dependency install ----------
 # Copy only package manifests first so npm install + Playwright are cached
 # unless the lockfiles themselves change.
-#
-# ui-tui/packages/hermes-ink/ is copied IN FULL (not just its manifests)
-# because it is referenced as a `file:` workspace dependency from
-# ui-tui/package.json.  Copying the tree up front lets npm resolve the
-# workspace to real content instead of stopping at a bare package.json.
 COPY package.json package-lock.json ./
 COPY web/package.json web/package-lock.json web/
 COPY ui-tui/package.json ui-tui/package-lock.json ui-tui/
-COPY ui-tui/packages/hermes-ink/ ui-tui/packages/hermes-ink/
-
-# `npm_config_install_links=false` forces npm to install `file:` deps as
-# symlinks (the npm 10+ default) even on Debian's older bundled npm 9.x,
-# which defaults to `install-links=true` and installs file deps as *copies*.
-# The host-side package-lock.json is generated with a newer npm that uses
-# symlinks, so an install-as-copy produces a hidden node_modules/.package-lock.json
-# that permanently disagrees with the root lock on the @hermes/ink entry.
-# That disagreement trips the TUI launcher's `_tui_need_npm_install()`
-# check on every startup and triggers a runtime `npm install` that then
-# fails with EACCES (node_modules/ is root-owned from build time).
-ENV npm_config_install_links=false
+COPY ui-tui/packages/hermes-ink/package.json ui-tui/packages/hermes-ink/package-lock.json ui-tui/packages/hermes-ink/

 RUN npm install --prefer-offline --no-audit && \
    npx playwright install --with-deps chromium --only-shell && \
@@ -1,505 +0,0 @@
-# Hermes Agent v0.12.0 (v2026.4.30)
-
-**Release Date:** April 30, 2026
-**Since v0.11.0:** 1,096 commits · 550 merged PRs · 1,270 files changed · 217,776 insertions · 213 community contributors (including co-authors)
-
-> The Curator release — Hermes Agent now maintains itself. An autonomous background Curator grades, prunes, and consolidates your skill library on its own schedule. The self-improvement loop that reviews what to save got a substantial upgrade. Four new inference providers, a 18th messaging platform, a 19th via Teams plugin, native Spotify + Google Meet integrations, ComfyUI and TouchDesigner-MCP moved from optional to bundled-by-default, and a ~57% cut to visible TUI cold start.
-
---
-
-## ✨ Highlights
-
- **Autonomous Curator** — `hermes curator` runs as a background agent on the gateway's cron ticker (7-day cycle default). It grades your skill library, consolidates related skills, prunes dead ones, and writes per-run reports to `logs/curator/run.json` + `REPORT.md`. Archived skills are classified consolidated-vs-pruned via model + heuristic. Defense-in-depth gates protect bundled/hub skills from mutation. Unified under `auxiliary.curator` — pick the curator's model in `hermes model`, manage it from the dashboard. `hermes curator status` ranks skills by usage (most-used / least-used). ([#17277](https://github.com/NousResearch/hermes-agent/pull/17277), [#17307](https://github.com/NousResearch/hermes-agent/pull/17307), [#17941](https://github.com/NousResearch/hermes-agent/pull/17941), [#17868](https://github.com/NousResearch/hermes-agent/pull/17868), [#18033](https://github.com/NousResearch/hermes-agent/pull/18033))
-
- **Self-improvement loop — substantially upgraded** — The background review fork (the core of Hermes' self-improvement: after each turn it decides what memories/skills to save or update) is now class-first (rubric-based rather than free-form), active-update biased (prefers the skill the agent just loaded), handles `references/`/`templates/` sub-files, and properly inherits the parent's live runtime (provider, model, credentials actually propagate). Restricted to memory + skills toolsets so it can't sprawl. Memory providers shut down cleanly. Prior-turn tool messages excluded from the summary so the fork sees a clean context. ([#16026](https://github.com/NousResearch/hermes-agent/pull/16026), [#17213](https://github.com/NousResearch/hermes-agent/pull/17213), [#16099](https://github.com/NousResearch/hermes-agent/pull/16099), [#16569](https://github.com/NousResearch/hermes-agent/pull/16569), [#16204](https://github.com/NousResearch/hermes-agent/pull/16204), [#15057](https://github.com/NousResearch/hermes-agent/pull/15057))
-
- **Skill integrations — major expansion** — **ComfyUI v5** with official CLI + REST + hardware-gated local install, moved from optional to **built-in by default** ([#17610](https://github.com/NousResearch/hermes-agent/pull/17610), [#17631](https://github.com/NousResearch/hermes-agent/pull/17631), [#17734](https://github.com/NousResearch/hermes-agent/pull/17734)). **TouchDesigner-MCP** bundled by default, expanded with GLSL, post-FX, audio, geometry, and 9 new reference docs ([#16753](https://github.com/NousResearch/hermes-agent/pull/16753), [#16624](https://github.com/NousResearch/hermes-agent/pull/16624), [#16768](https://github.com/NousResearch/hermes-agent/pull/16768) — @kshitijk4poor + @SHL0MS). **Humanizer** skill ports a text-cleaner that strips AI-isms ([#16787](https://github.com/NousResearch/hermes-agent/pull/16787)). **claude-design** HTML artifact skill + design-md (Google DESIGN.md spec) + airtable salvage + `skill_manage` edits in `external_dirs` + direct-URL skill install + `/reload-skills` slash command. ([#16358](https://github.com/NousResearch/hermes-agent/pull/16358), [#14876](https://github.com/NousResearch/hermes-agent/pull/14876), [#16291](https://github.com/NousResearch/hermes-agent/pull/16291), [#17512](https://github.com/NousResearch/hermes-agent/pull/17512), [#16323](https://github.com/NousResearch/hermes-agent/pull/16323), [#17744](https://github.com/NousResearch/hermes-agent/pull/17744))
-
- **LM Studio — first-class provider** — upgraded from a custom-endpoint alias to a full-blown native provider: dedicated auth, `hermes doctor` checks, reasoning transport, live `/models` listing. (Salvage of @kshitijk4poor's #17061.) ([#17102](https://github.com/NousResearch/hermes-agent/pull/17102))
-
- **Four more new inference providers** — **GMI Cloud** (first-class, salvage of #11955 — @isaachuangGMICLOUD), **Azure AI Foundry** with auto-detection, **MiniMax OAuth** with PKCE browser flow (salvage #15203), **Tencent Tokenhub** (salvage of #16860). ([#16663](https://github.com/NousResearch/hermes-agent/pull/16663), [#15845](https://github.com/NousResearch/hermes-agent/pull/15845), [#17524](https://github.com/NousResearch/hermes-agent/pull/17524), [#16960](https://github.com/NousResearch/hermes-agent/pull/16960))
-
- **Pluggable gateway platforms + Microsoft Teams** — the gateway is now a plugin host. Drop-in messaging adapters live outside the core, and Microsoft Teams is the first plugin-shipped platform. (Salvage of #17664.) ([#17751](https://github.com/NousResearch/hermes-agent/pull/17751), [#17828](https://github.com/NousResearch/hermes-agent/pull/17828))
-
- **Tencent 元宝 (Yuanbao) — 18th messaging platform** — native gateway adapter with text + media delivery. ([#16298](https://github.com/NousResearch/hermes-agent/pull/16298), [#17424](https://github.com/NousResearch/hermes-agent/pull/17424))
-
- **Spotify — native tools + bundled skill + wizard** — 7 tools (play, search, queue, playlists, devices) behind PKCE OAuth, interactive setup wizard, bundled skill, surfacing in `hermes tools`, cron usage documented. ([#15121](https://github.com/NousResearch/hermes-agent/pull/15121), [#15130](https://github.com/NousResearch/hermes-agent/pull/15130), [#15154](https://github.com/NousResearch/hermes-agent/pull/15154), [#15180](https://github.com/NousResearch/hermes-agent/pull/15180))
-
- **Google Meet plugin** — join calls, transcribe, speak, follow up. Realtime OpenAI transport + Node bot server, full pipeline bundled as a plugin. ([#16364](https://github.com/NousResearch/hermes-agent/pull/16364))
-
- **`hermes -z` one-shot mode + `hermes update --check`** — non-interactive `hermes -z <prompt>` with `--model`/`--provider`/`HERMES_INFERENCE_MODEL`. `hermes update --check` preflight. Opt-in pre-update HERMES_HOME backup. ([#15702](https://github.com/NousResearch/hermes-agent/pull/15702), [#15704](https://github.com/NousResearch/hermes-agent/pull/15704), [#15841](https://github.com/NousResearch/hermes-agent/pull/15841), [#16539](https://github.com/NousResearch/hermes-agent/pull/16539), [#16566](https://github.com/NousResearch/hermes-agent/pull/16566))
-
- **Models dashboard tab + in-browser model config** — rich per-model analytics, switch main + auxiliary models from the dashboard. ([#17745](https://github.com/NousResearch/hermes-agent/pull/17745), [#17802](https://github.com/NousResearch/hermes-agent/pull/17802))
-
- **Remote model catalog manifest** — OpenRouter + Nous Portal model catalogs are now pulled from a remote manifest so new models show up without a release. ([#16033](https://github.com/NousResearch/hermes-agent/pull/16033))
-
- **Native multimodal image routing** — images now route based on the model's actual vision capability rather than provider defaults. ([#16506](https://github.com/NousResearch/hermes-agent/pull/16506))
-
- **Gateway media parity** — native multi-image sending across Telegram, Discord, Slack, Mattermost, Email, and Signal; centralized audio routing with FLAC support + Telegram document fallback. ([#17909](https://github.com/NousResearch/hermes-agent/pull/17909), [#17833](https://github.com/NousResearch/hermes-agent/pull/17833))
-
- **TUI catches up to (and past) the classic CLI** — LaTeX rendering (@austinpickett), `/reload` .env hot-reload, pluggable busy-indicator styles (@OutThisLife, #13610), opt-in auto-resume of last session, expanded light-terminal auto-detection, session delete from `/resume` picker with `d`, modified mouse-wheel line scroll, and a `/mouse` toggle that kills ConPTY's phantom mouse injection (@kevin-ho). ([#17175](https://github.com/NousResearch/hermes-agent/pull/17175), [#17286](https://github.com/NousResearch/hermes-agent/pull/17286), [#17150](https://github.com/NousResearch/hermes-agent/pull/17150), [#17130](https://github.com/NousResearch/hermes-agent/pull/17130), [#17113](https://github.com/NousResearch/hermes-agent/pull/17113), [#17668](https://github.com/NousResearch/hermes-agent/pull/17668), [#17669](https://github.com/NousResearch/hermes-agent/pull/17669), [#15488](https://github.com/NousResearch/hermes-agent/pull/15488))
-
- **Observability + achievements plugins** — bundled Langfuse observability plugin (salvage #16845) + bundled hermes-achievements plugin that scans full session history. ([#16917](https://github.com/NousResearch/hermes-agent/pull/16917), [#17754](https://github.com/NousResearch/hermes-agent/pull/17754))
-
- **TTS provider registry + Piper local TTS** — pluggable `tts.providers.<name>` registry; Piper ships as a native local TTS provider. (Closes #8508.) ([#17843](https://github.com/NousResearch/hermes-agent/pull/17843), [#17885](https://github.com/NousResearch/hermes-agent/pull/17885))
-
- **Vercel Sandbox backend** — Vercel sandboxes as an execute_code/terminal backend (@kshitijk4poor). ([#17445](https://github.com/NousResearch/hermes-agent/pull/17445))
-
- **Secret redaction off by default** — default flipped to off. Prevents the long-standing patch-corruption incidents where fake secret-shaped substrings mangled tool outputs. Opt in via `redaction.enabled: true` when you need it. ([#16794](https://github.com/NousResearch/hermes-agent/pull/16794))
-
- **Cold-start performance** — visible TUI cold start cut **~57%** via lazy agent init (@OutThisLife), lazy imports of OpenAI / Anthropic / Firecrawl / account_usage, mtime-cached `load_config()`, memoized `get_tool_definitions()` with TTL-cached `check_fn` results, precompiled dangerous-command patterns. ([#17190](https://github.com/NousResearch/hermes-agent/pull/17190), [#17046](https://github.com/NousResearch/hermes-agent/pull/17046), [#17041](https://github.com/NousResearch/hermes-agent/pull/17041), [#17098](https://github.com/NousResearch/hermes-agent/pull/17098), [#17206](https://github.com/NousResearch/hermes-agent/pull/17206))
-
- **Configurable prompt cache TTL** — `prompt_caching.cache_ttl` (5m default, 1h opt-in — cost savings for bursty sessions that keep cache warm). Salvage of #12659. ([#15065](https://github.com/NousResearch/hermes-agent/pull/15065))
-
---
-
-## 🧠 Autonomous Curator & Self-Improvement Loop
-
-### Curator — autonomous skill maintenance
- **`hermes curator` as a background agent** — runs on the gateway's cron ticker, 7-day cycle by default, umbrella-first prompt, inherits parent config, unbounded iterations ([#17277](https://github.com/NousResearch/hermes-agent/pull/17277) — issue #7816)
- **Per-run reports** — `logs/curator/run.json` + `REPORT.md` per cycle ([#17307](https://github.com/NousResearch/hermes-agent/pull/17307))
- **Consolidated vs pruned classification** — archived skills split with model + heuristic ([#17941](https://github.com/NousResearch/hermes-agent/pull/17941))
- **`hermes curator status`** — ranks skills by usage, shows most-used and least-used ([#18033](https://github.com/NousResearch/hermes-agent/pull/18033))
- **Unified under `auxiliary.curator`** — pick the model in `hermes model`, configure from the dashboard ([#17868](https://github.com/NousResearch/hermes-agent/pull/17868))
- **Documentation** — dedicated curator feature page on the docs site ([#17563](https://github.com/NousResearch/hermes-agent/pull/17563))
- Fix: seed defaults on update, create `logs/curator/` directory, defer fire import ([#17927](https://github.com/NousResearch/hermes-agent/pull/17927))
- Fix: scan nested archive subdirs in `restore_skill` (@0xDevNinja) ([#17951](https://github.com/NousResearch/hermes-agent/pull/17951))
- Fix: use actual skill activity in curator status (@y0shua1ee) ([#17953](https://github.com/NousResearch/hermes-agent/pull/17953))
- Fix: `skill_manage` refuses writes on pinned skills; pinning now blocks curator writes ([#17562](https://github.com/NousResearch/hermes-agent/pull/17562), [#17578](https://github.com/NousResearch/hermes-agent/pull/17578))
- Fix: `bump_use()` wired into skill invocation + preload + skill_view (salvage #17782) ([#17932](https://github.com/NousResearch/hermes-agent/pull/17932))
-
-### Self-improvement loop (background review fork)
- **Class-first skill-review prompt** — rubric-based grading rather than free-form "should this update" ([#16026](https://github.com/NousResearch/hermes-agent/pull/16026))
- **Active-update bias** — prefers updating skills the agent just loaded, handles `references/` + `templates/` sub-files ([#17213](https://github.com/NousResearch/hermes-agent/pull/17213))
- **Fork inherits parent's live runtime** — provider, model, credentials actually propagate now ([#16099](https://github.com/NousResearch/hermes-agent/pull/16099))
- **Scoped toolsets** — review fork restricted to memory + skills (no shell, no web) ([#16569](https://github.com/NousResearch/hermes-agent/pull/16569))
- **Clean shutdown** — background review memory providers exit properly (salvage #15289) ([#16204](https://github.com/NousResearch/hermes-agent/pull/16204))
- **Clean context** — prior-history tool messages excluded from review summary (salvage #14967) ([#15057](https://github.com/NousResearch/hermes-agent/pull/15057))
-
---
-
-## 🧩 Skills Ecosystem
-
-### Skill integrations — newly bundled or promoted
- **ComfyUI v5** — official CLI + REST + hardware-gated local install; **moved from optional to built-in** ([#17610](https://github.com/NousResearch/hermes-agent/pull/17610), [#17631](https://github.com/NousResearch/hermes-agent/pull/17631), [#17734](https://github.com/NousResearch/hermes-agent/pull/17734), [#17612](https://github.com/NousResearch/hermes-agent/pull/17612))
- **TouchDesigner-MCP** — **bundled by default** ([#16753](https://github.com/NousResearch/hermes-agent/pull/16753) — @kshitijk4poor), expanded with GLSL, post-FX, audio, geometry references ([#16624](https://github.com/NousResearch/hermes-agent/pull/16624)), 9 new reference docs ([#16768](https://github.com/NousResearch/hermes-agent/pull/16768) — @SHL0MS)
- **Humanizer** — strips AI-isms from text ([#16787](https://github.com/NousResearch/hermes-agent/pull/16787))
- **claude-design** — HTML artifact skill with disambiguation from other design skills ([#16358](https://github.com/NousResearch/hermes-agent/pull/16358))
- **design-md** — Google's DESIGN.md spec skill ([#14876](https://github.com/NousResearch/hermes-agent/pull/14876))
- **airtable** — salvaged skill + skill API keys wired into `.env` (#15838) ([#16291](https://github.com/NousResearch/hermes-agent/pull/16291))
- **pretext** — creative browser demos with @chenglou/pretext ([#17259](https://github.com/NousResearch/hermes-agent/pull/17259))
- **spike** + **sketch** — throwaway experiments + HTML mockups, adapted from gsd-build ([#17421](https://github.com/NousResearch/hermes-agent/pull/17421))
-
-### Skills UX
- **Install skills from a direct HTTP(S) URL** — `hermes skills install <url>` ([#16323](https://github.com/NousResearch/hermes-agent/pull/16323))
- **`/reload-skills`** slash command (salvage #17670) ([#17744](https://github.com/NousResearch/hermes-agent/pull/17744))
- **`hermes skills list`** shows enabled/disabled status ([#16129](https://github.com/NousResearch/hermes-agent/pull/16129))
- **`skill_manage` refuses writes on pinned skills** ([#17562](https://github.com/NousResearch/hermes-agent/pull/17562))
- **`skill_manage` edits external_dirs skills in place** (salvage #9966) ([#17512](https://github.com/NousResearch/hermes-agent/pull/17512), [#17289](https://github.com/NousResearch/hermes-agent/pull/17289))
- Fix: inline-shell rendering in `skill_view` ([#15376](https://github.com/NousResearch/hermes-agent/pull/15376))
- Fix: exclude `.archive/` from skill index walk (salvage #17639) ([#17931](https://github.com/NousResearch/hermes-agent/pull/17931))
- Fix: dedicated docs page per bundled + optional skill ([#14929](https://github.com/NousResearch/hermes-agent/pull/14929))
- Fix: `google-workspace` shared HERMES_HOME helper + ship deps as optional extra ([#15405](https://github.com/NousResearch/hermes-agent/pull/15405))
- Fix: auto-wrap ASCII-art code blocks in generated skill pages ([#16497](https://github.com/NousResearch/hermes-agent/pull/16497))
- Point agent at `hermes-agent` skill + docs site for Hermes questions ([#16535](https://github.com/NousResearch/hermes-agent/pull/16535))
-
---
-
-## 🏗️ Core Agent & Architecture
-
-### Provider & Model Support
-
-#### New providers
- **GMI Cloud** — first-class API-key provider on par with Arcee/Kilocode/Xiaomi (salvage of #11955 — @isaachuangGMICLOUD) ([#16663](https://github.com/NousResearch/hermes-agent/pull/16663))
- **Azure AI Foundry** — auto-detection, full wiring ([#15845](https://github.com/NousResearch/hermes-agent/pull/15845))
- **LM Studio** — upgraded from custom-endpoint alias to first-class provider: dedicated auth, doctor checks, reasoning transport, live `/models` (salvage of #17061 — @kshitijk4poor) ([#17102](https://github.com/NousResearch/hermes-agent/pull/17102))
- **MiniMax OAuth** — PKCE browser flow with full OAuth integration (salvage #15203) ([#17524](https://github.com/NousResearch/hermes-agent/pull/17524))
- **Tencent Tokenhub** — new provider (salvage of #16860) ([#16960](https://github.com/NousResearch/hermes-agent/pull/16960))
-
-#### Model catalog
- **Remote model catalog manifest** — OpenRouter + Nous Portal catalogs pulled from remote manifest so new models show up without a release ([#16033](https://github.com/NousResearch/hermes-agent/pull/16033))
- `openai/gpt-5.5` and `gpt-5.5-pro` added to OpenRouter + Nous Portal ([#15343](https://github.com/NousResearch/hermes-agent/pull/15343))
- `deepseek-v4-pro` and `deepseek-v4-flash` added ([#14934](https://github.com/NousResearch/hermes-agent/pull/14934))
- `qwen3.6-plus` added to Alibaba-supported models ([#16896](https://github.com/NousResearch/hermes-agent/pull/16896))
- Gemini free-tier keys blocked at setup with 429 guidance surfacing ([#15100](https://github.com/NousResearch/hermes-agent/pull/15100))
-
-#### Model configuration
- **Configurable `prompt_caching.cache_ttl`** — 5m default, 1h opt-in (salvage #12659) ([#15065](https://github.com/NousResearch/hermes-agent/pull/15065))
- `/fast` whitelist broadened to all OpenAI + Anthropic models ([#16883](https://github.com/NousResearch/hermes-agent/pull/16883))
- `auxiliary.extra_body.reasoning` translates into Codex Responses API ([#17004](https://github.com/NousResearch/hermes-agent/pull/17004))
- `hermes fallback` command for managing fallback providers ([#16052](https://github.com/NousResearch/hermes-agent/pull/16052))
-
-### Agent Loop & Conversation
- **Native multimodal image routing** — based on model vision capability, not provider defaults ([#16506](https://github.com/NousResearch/hermes-agent/pull/16506))
- **Delegate `child_timeout_seconds` default bumped to 600s** ([#14809](https://github.com/NousResearch/hermes-agent/pull/14809))
- **Diagnostic dump when subagent times out with 0 API calls** ([#15105](https://github.com/NousResearch/hermes-agent/pull/15105))
- **Gateway busts cached agent on compression/context_length config edits** ([#17008](https://github.com/NousResearch/hermes-agent/pull/17008))
- **Opt-in runtime-metadata footer on final replies** ([#17026](https://github.com/NousResearch/hermes-agent/pull/17026))
- `/reload-mcp` awareness — rebuild cached agents + prompt-cache cost confirmation ([#17729](https://github.com/NousResearch/hermes-agent/pull/17729))
- Fix: repair CamelCase + `_tool` suffix tool-call emissions ([#15124](https://github.com/NousResearch/hermes-agent/pull/15124))
- Fix: retry on `json.JSONDecodeError` instead of treating as local validation error ([#15107](https://github.com/NousResearch/hermes-agent/pull/15107))
- Fix: handle unescaped control chars in `tool_call.arguments` ([#15356](https://github.com/NousResearch/hermes-agent/pull/15356))
- Fix: ordering fix in `_copy_reasoning_content_for_api` — cross-provider reasoning isolation (@Zjianru) ([#15749](https://github.com/NousResearch/hermes-agent/pull/15749))
- Fix: inject empty `reasoning_content` for DeepSeek/Kimi `tool_calls` unconditionally (@Zjianru) ([#15762](https://github.com/NousResearch/hermes-agent/pull/15762))
- Fix: persist streamed `reasoning_content` on assistant turns (#16844) ([#16892](https://github.com/NousResearch/hermes-agent/pull/16892))
- Fix: cancel coroutine on timeout so worker thread exits; full traceback on tool failure ([#17428](https://github.com/NousResearch/hermes-agent/pull/17428))
- Fix: isolate `get_tool_definitions` quiet_mode cache + dedup LCM injection (#17335) ([#17889](https://github.com/NousResearch/hermes-agent/pull/17889))
- Fix: serialize concurrent `hermes_tools` RPC calls from `execute_code` (#17770) ([#17894](https://github.com/NousResearch/hermes-agent/pull/17894), [#17902](https://github.com/NousResearch/hermes-agent/pull/17902))
- Fix: rename `[SYSTEM:` → `[IMPORTANT:` in all user-injected markers (dodges Azure content filter) ([#16114](https://github.com/NousResearch/hermes-agent/pull/16114))
-
-### Compression
- **Retry summary on main model for unknown errors before giving up** ([#16774](https://github.com/NousResearch/hermes-agent/pull/16774))
- **Notify users when configured aux model fails even if main-model fallback recovers** ([#16775](https://github.com/NousResearch/hermes-agent/pull/16775))
- `/compress` wrapped in `_busy_command` to block input during compression ([#15388](https://github.com/NousResearch/hermes-agent/pull/15388))
- Fix: reserve system + tools headroom when aux binds threshold ([#15631](https://github.com/NousResearch/hermes-agent/pull/15631))
- Fix: use text-char sum for multimodal token estimation in `_find_tail_cut_by_tokens` ([#16369](https://github.com/NousResearch/hermes-agent/pull/16369))
-
-### Session, Memory & State
- **Trigram FTS5 index for CJK search, replace LIKE fallback** (@alt-glitch) ([#16651](https://github.com/NousResearch/hermes-agent/pull/16651))
- **Index `tool_name` + `tool_calls` in FTS5, with repair + migration** (salvages #16866) ([#16914](https://github.com/NousResearch/hermes-agent/pull/16914))
- **Checkpoints: auto-prune orphan and stale shadow repos at startup** ([#16303](https://github.com/NousResearch/hermes-agent/pull/16303))
- **Memory providers notified on mid-process session_id rotation** (#6672) ([#17409](https://github.com/NousResearch/hermes-agent/pull/17409))
- Fix: quote underscored terms in FTS5 query sanitization ([#16915](https://github.com/NousResearch/hermes-agent/pull/16915))
- Fix: resolve viking_read 500/412 on file URIs + pseudo-summary URIs (salvage #5886) ([#17869](https://github.com/NousResearch/hermes-agent/pull/17869))
- Fix: skip external-provider sync on interrupted turns ([#15395](https://github.com/NousResearch/hermes-agent/pull/15395))
- Fix: close embedded Hindsight async client cleanly (salvage #14605) ([#16209](https://github.com/NousResearch/hermes-agent/pull/16209))
- Fix: pass session transcript to `shutdown_memory_provider` on gateway + CLI (#15165) ([#16571](https://github.com/NousResearch/hermes-agent/pull/16571))
- Fix: write-origin metadata seam ([#15346](https://github.com/NousResearch/hermes-agent/pull/15346))
- Fix: preserve symlinks during atomic file writes ([#16980](https://github.com/NousResearch/hermes-agent/pull/16980))
- Refactor: remove `flush_memories` entirely ([#15696](https://github.com/NousResearch/hermes-agent/pull/15696))
-
-### Auxiliary models
- Fix: surface auxiliary failures in UI (previously silent) ([#15324](https://github.com/NousResearch/hermes-agent/pull/15324))
- Fix: surface title-gen auxiliary failures instead of silently dropping ([#16371](https://github.com/NousResearch/hermes-agent/pull/16371))
- Fix: generalize unsupported-parameter detector and harden `max_tokens` retry ([#15633](https://github.com/NousResearch/hermes-agent/pull/15633))
-
---
-
-## 📱 Messaging Platforms (Gateway)
-
-### New Platforms
- **Microsoft Teams (19th platform)** — as a plugin, + xdist collision guard ([#17828](https://github.com/NousResearch/hermes-agent/pull/17828))
- **Yuanbao (Tencent 元宝, 18th platform)** — native adapter with text + media delivery ([#16298](https://github.com/NousResearch/hermes-agent/pull/16298), [#17424](https://github.com/NousResearch/hermes-agent/pull/17424), [#16880](https://github.com/NousResearch/hermes-agent/pull/16880))
-
-### Pluggable Gateway Platforms
- **Drop-in messaging adapters** — the gateway is now a plugin host for platforms (salvage of #17664) ([#17751](https://github.com/NousResearch/hermes-agent/pull/17751))
-
-### Telegram
- **Chat allowlists for groups and forums** (@web3blind) ([#15027](https://github.com/NousResearch/hermes-agent/pull/15027))
- **Send fresh finals for stale preview streams** (port openclaw#72038) ([#16261](https://github.com/NousResearch/hermes-agent/pull/16261))
- **Render markdown tables as row-group bullets + prompt hint** ([#16997](https://github.com/NousResearch/hermes-agent/pull/16997))
- Document fallback in centralized audio routing ([#17833](https://github.com/NousResearch/hermes-agent/pull/17833))
- Native multi-image sending ([#17909](https://github.com/NousResearch/hermes-agent/pull/17909))
-
-### Discord
- **Opt-in toolsets + ID injection + tool split + Feishu wiring** (salvage #15457, #15458) ([#15610](https://github.com/NousResearch/hermes-agent/pull/15610), [#15613](https://github.com/NousResearch/hermes-agent/pull/15613))
- Fix: coerce `limit` parameter to int before `min()` call ([#16319](https://github.com/NousResearch/hermes-agent/pull/16319))
-
-### Slack
- **Register every gateway command as a native slash (Discord/Telegram parity)** ([#16164](https://github.com/NousResearch/hermes-agent/pull/16164))
- **`strict_mention` config** — prevents thread auto-engagement ([#16193](https://github.com/NousResearch/hermes-agent/pull/16193))
- **`channel_skill_bindings`** — bind specific skills to specific Slack channels ([#16283](https://github.com/NousResearch/hermes-agent/pull/16283))
-
-### Signal
- **Native formatting** — markdown → bodyRanges, reply quotes, reactions ([#17417](https://github.com/NousResearch/hermes-agent/pull/17417))
- Native multi-image sending ([#17909](https://github.com/NousResearch/hermes-agent/pull/17909))
-
-### Feishu / Mattermost / Email / Signal
- All participate in **native multi-image sending** ([#17909](https://github.com/NousResearch/hermes-agent/pull/17909))
-
-### Gateway Core
- **Centralized audio routing + FLAC support + Telegram doc fallback** ([#17833](https://github.com/NousResearch/hermes-agent/pull/17833))
- **Native multi-image sending** across Telegram, Discord, Slack, Mattermost, Email, Signal ([#17909](https://github.com/NousResearch/hermes-agent/pull/17909))
- **Make hygiene hard message limit configurable** ([#17000](https://github.com/NousResearch/hermes-agent/pull/17000))
- **Opt-in runtime-metadata footer on final replies** ([#17026](https://github.com/NousResearch/hermes-agent/pull/17026))
- **`pre_gateway_dispatch` hook** — plugins can intercept before dispatch ([#15050](https://github.com/NousResearch/hermes-agent/pull/15050))
- **`pre_approval_request` / `post_approval_response` hooks** ([#16776](https://github.com/NousResearch/hermes-agent/pull/16776))
- Fix: timeouts — guard `load_config()` call against runtime exceptions ([#16318](https://github.com/NousResearch/hermes-agent/pull/16318))
- Fix: support passing handler tools via registry ([#15613](https://github.com/NousResearch/hermes-agent/pull/15613))
-
---
-
-## 🔧 Tool System
-
-### Plugin-first architecture
- **Pluggable gateway platforms** — platforms can ship as plugins ([#17751](https://github.com/NousResearch/hermes-agent/pull/17751))
- **Microsoft Teams as first plugin-shipped platform** ([#17828](https://github.com/NousResearch/hermes-agent/pull/17828))
- **`pre_gateway_dispatch` hook** ([#15050](https://github.com/NousResearch/hermes-agent/pull/15050))
- **`pre_approval_request` + `post_approval_response` hooks** ([#16776](https://github.com/NousResearch/hermes-agent/pull/16776))
- **`duration_ms` on `post_tool_call`** (inspired by Claude Code 2.1.119) ([#15429](https://github.com/NousResearch/hermes-agent/pull/15429))
- **Bundled plugins**: Spotify ([#15174](https://github.com/NousResearch/hermes-agent/pull/15174)), Google Meet ([#16364](https://github.com/NousResearch/hermes-agent/pull/16364)), Langfuse observability ([#16917](https://github.com/NousResearch/hermes-agent/pull/16917)), hermes-achievements ([#17754](https://github.com/NousResearch/hermes-agent/pull/17754))
- **Page-scoped plugin slots for built-in dashboard pages** ([#15658](https://github.com/NousResearch/hermes-agent/pull/15658))
- **Declarative plugin installation for NixOS module** (@alt-glitch) ([#15953](https://github.com/NousResearch/hermes-agent/pull/15953))
-
-### Browser
- **CDP supervisor** — dialog detection + response + cross-origin iframe eval ([#14540](https://github.com/NousResearch/hermes-agent/pull/14540))
- **Auto-spawn local Chromium for LAN/localhost URLs** when cloud provider is configured ([#16136](https://github.com/NousResearch/hermes-agent/pull/16136))
-
-### Execute code / Terminal
- **Vercel Sandbox backend** for `execute_code` / terminal (@kshitijk4poor) ([#17445](https://github.com/NousResearch/hermes-agent/pull/17445))
- **Collapse subagent `task_id`s to shared container** ([#16177](https://github.com/NousResearch/hermes-agent/pull/16177))
- **Docker: run container as host user** to avoid root-owned bind mounts (@benbarclay) ([#17305](https://github.com/NousResearch/hermes-agent/pull/17305))
- Fix: safely quote `~/` subpaths in wrapped `cd` commands ([#15394](https://github.com/NousResearch/hermes-agent/pull/15394))
- Fix: close file descriptor in `LocalEnvironment._update_cwd` ([#17300](https://github.com/NousResearch/hermes-agent/pull/17300))
- Fix: SSH — prevent tar from overwriting remote home dir permissions ([#17898](https://github.com/NousResearch/hermes-agent/pull/17898), [#17867](https://github.com/NousResearch/hermes-agent/pull/17867))
-
-### Image generation
- See Provider section for updates; no new image providers this window.
-
-### TTS / Voice
- **Pluggable TTS provider registry** under `tts.providers.<name>` ([#17843](https://github.com/NousResearch/hermes-agent/pull/17843))
- **Piper** as native local TTS provider (closes #8508) ([#17885](https://github.com/NousResearch/hermes-agent/pull/17885))
- **Voice mode CLI parity in the TUI** — VAD loop + TTS + crash forensics ([#14810](https://github.com/NousResearch/hermes-agent/pull/14810))
- Fix: vision — use HERMES_HOME-based cache dir instead of cwd ([#17719](https://github.com/NousResearch/hermes-agent/pull/17719))
-
-### Cron
- **Honor `hermes tools` config for the cron platform** ([#14798](https://github.com/NousResearch/hermes-agent/pull/14798))
- **Per-job `workdir`** — project-aware cron runs ([#15110](https://github.com/NousResearch/hermes-agent/pull/15110))
- **`context_from` field** — chain cron job outputs ([#15606](https://github.com/NousResearch/hermes-agent/pull/15606))
- Fix: promote `croniter` to a core dependency ([#17577](https://github.com/NousResearch/hermes-agent/pull/17577))
-
-### Web search
- **Expose `limit` for `web_search`** ([#16934](https://github.com/NousResearch/hermes-agent/pull/16934))
-
-### Maps
- Fix: include seconds in timezone UTC offset output ([#16300](https://github.com/NousResearch/hermes-agent/pull/16300))
-
-### Approvals
- **Hardline blocklist for unrecoverable commands** ([#15878](https://github.com/NousResearch/hermes-agent/pull/15878))
- Perf: precompile DANGEROUS_PATTERNS and HARDLINE_PATTERNS ([#17206](https://github.com/NousResearch/hermes-agent/pull/17206))
-
-### ACP
- **Advertise and forward image prompts** ([#18030](https://github.com/NousResearch/hermes-agent/pull/18030))
-
-### API Server
- **POST `/v1/runs/{run_id}/stop`** (salvage of #15656) ([#15842](https://github.com/NousResearch/hermes-agent/pull/15842))
- **Expose run status for external UIs** (#17085) ([#17458](https://github.com/NousResearch/hermes-agent/pull/17458))
-
-### Nix
- **Declarative plugin installation for NixOS module** (@alt-glitch) ([#15953](https://github.com/NousResearch/hermes-agent/pull/15953))
- Fix: use `--rebuild` in fix-lockfiles to bypass cached FOD store paths ([#15444](https://github.com/NousResearch/hermes-agent/pull/15444))
- Fix: `extraPackages` now actually works via per-user profile ([#17047](https://github.com/NousResearch/hermes-agent/pull/17047))
- Fix: refresh web/ npm-deps hash to unblock main builds ([#17174](https://github.com/NousResearch/hermes-agent/pull/17174))
- Fix: replace magic-nix-cache with Cachix ([#17928](https://github.com/NousResearch/hermes-agent/pull/17928))
-
---
-
-## 🖥️ TUI
-
-### New features
- **LaTeX rendering** (@austinpickett) ([#17175](https://github.com/NousResearch/hermes-agent/pull/17175))
- **`/reload` .env hot-reload** — ported from the classic CLI ([#17286](https://github.com/NousResearch/hermes-agent/pull/17286))
- **Pluggable busy-indicator styles** (@OutThisLife, #13610) ([#17150](https://github.com/NousResearch/hermes-agent/pull/17150))
- **Opt-in auto-resume of the most recent session** (@OutThisLife) ([#17130](https://github.com/NousResearch/hermes-agent/pull/17130))
- **Expanded light-terminal auto-detection** — `HERMES_TUI_THEME` + background hex (@OutThisLife) ([#17113](https://github.com/NousResearch/hermes-agent/pull/17113))
- **Delete sessions from `/resume` picker with `d`** (@OutThisLife) ([#17668](https://github.com/NousResearch/hermes-agent/pull/17668))
- **Line-by-line scroll on modified mouse wheel** (@OutThisLife) ([#17669](https://github.com/NousResearch/hermes-agent/pull/17669))
- **Delete queued message while editing with ctrl-x / cancel with esc** (@OutThisLife) ([#16707](https://github.com/NousResearch/hermes-agent/pull/16707))
- **Per-section visibility for the details accordion** (@OutThisLife) ([#14968](https://github.com/NousResearch/hermes-agent/pull/14968))
- **Voice mode CLI parity** — VAD loop + TTS + crash forensics ([#14810](https://github.com/NousResearch/hermes-agent/pull/14810))
- **Contextual first-touch hints ported to TUI** — `/busy`, `/verbose` ([#16054](https://github.com/NousResearch/hermes-agent/pull/16054))
- **Mini help menu on `?` in the input field** (@ethernet8023) ([#18043](https://github.com/NousResearch/hermes-agent/pull/18043))
-
-### Fixes
- Fix: proactive mouse disable on ConPTY + `/mouse` toggle command (@kevin-ho, WSL2 ghost-mouse fix) ([#15488](https://github.com/NousResearch/hermes-agent/pull/15488))
- Fix: restore skills search RPC ([#15870](https://github.com/NousResearch/hermes-agent/pull/15870))
- Perf: cache text measurements across yoga flex re-passes ([#14818](https://github.com/NousResearch/hermes-agent/pull/14818))
- Perf: stabilize long-session scrolling ([#15926](https://github.com/NousResearch/hermes-agent/pull/15926))
- Perf: lazily seed virtual history heights ([#16523](https://github.com/NousResearch/hermes-agent/pull/16523))
- Perf: cut visible cold start ~57% with lazy agent init ([#17190](https://github.com/NousResearch/hermes-agent/pull/17190))
-
---
-
-## 🖱️ CLI & User Experience
-
-### New commands
- **`hermes -z <prompt>`** — non-interactive one-shot mode ([#15702](https://github.com/NousResearch/hermes-agent/pull/15702))
- **`hermes -z` with `--model` / `--provider` / `HERMES_INFERENCE_MODEL`** ([#15704](https://github.com/NousResearch/hermes-agent/pull/15704))
- **`hermes update --check`** preflight flag ([#15841](https://github.com/NousResearch/hermes-agent/pull/15841))
- **`hermes fallback`** command for managing fallback providers ([#16052](https://github.com/NousResearch/hermes-agent/pull/16052))
- **`/busy`** slash command for busy input mode ([#15382](https://github.com/NousResearch/hermes-agent/pull/15382))
- **`/busy` input mode 'steer'** as a third option ([#16279](https://github.com/NousResearch/hermes-agent/pull/16279))
- **`/btw` as alias for `/background`** ([#16053](https://github.com/NousResearch/hermes-agent/pull/16053))
- **`/reload-skills`** slash command (salvage #17670) ([#17744](https://github.com/NousResearch/hermes-agent/pull/17744))
- **Surface `/queue`, `/bg`, `/steer` in agent-running placeholder** ([#16118](https://github.com/NousResearch/hermes-agent/pull/16118))
-
-### Setup / onboarding
- **Auto-reconfigure on existing installs** ([#15879](https://github.com/NousResearch/hermes-agent/pull/15879))
- **Contextual first-touch hints for `/busy` and `/verbose`** ([#16046](https://github.com/NousResearch/hermes-agent/pull/16046))
- **Cost-saving tips from the April 30 tip-of-the-day** ([#17841](https://github.com/NousResearch/hermes-agent/pull/17841))
- **Hyperlink startup banner title to the latest GitHub Release** ([#14945](https://github.com/NousResearch/hermes-agent/pull/14945))
-
-### Update / backup
- **Snapshot pairing data before `git pull`** ([#16383](https://github.com/NousResearch/hermes-agent/pull/16383))
- **Auto-backup HERMES_HOME before `hermes update`** (opt-in, off by default) ([#16539](https://github.com/NousResearch/hermes-agent/pull/16539), [#16566](https://github.com/NousResearch/hermes-agent/pull/16566))
- **Exclude `checkpoints/` from backups** ([#16572](https://github.com/NousResearch/hermes-agent/pull/16572))
- **Exclude SQLite WAL/SHM/journal sidecars from backups** ([#16576](https://github.com/NousResearch/hermes-agent/pull/16576))
- **Installer FHS layout for root installs on Linux** ([#15608](https://github.com/NousResearch/hermes-agent/pull/15608))
- Fix: kill stale dashboards instead of warning ([#17832](https://github.com/NousResearch/hermes-agent/pull/17832))
- Fix: show correct update status on nix-built hermes ([#17550](https://github.com/NousResearch/hermes-agent/pull/17550))
-
-### Slash-command housekeeping
- Refactor: drop `/provider`, `/plan` handler, and clean up slash registry ([#15047](https://github.com/NousResearch/hermes-agent/pull/15047))
- Refactor: drop `persist_session` plumbing + fix broken `/btw` mid-turn bypass ([#16075](https://github.com/NousResearch/hermes-agent/pull/16075))
-
-### OpenClaw migration (for folks coming from OpenClaw)
- **Hardened OpenClaw import** — plan-first apply, redaction, pre-migration backup ([#16911](https://github.com/NousResearch/hermes-agent/pull/16911))
- Fix: case-preserving brand rewrite + one-time `~/.openclaw` residue banner ([#16327](https://github.com/NousResearch/hermes-agent/pull/16327))
- Fix: resolve `openclaw` workspace files from `agents.defaults.workspace` ([#16879](https://github.com/NousResearch/hermes-agent/pull/16879))
- Fix: resolve model aliases against real OpenClaw catalog schema (salvage #16778) ([#16977](https://github.com/NousResearch/hermes-agent/pull/16977))
-
---
-
-## 📊 Web Dashboard
-
- **Models tab** — rich per-model analytics ([#17745](https://github.com/NousResearch/hermes-agent/pull/17745))
- **Configure main + auxiliary models from the Models page** ([#17802](https://github.com/NousResearch/hermes-agent/pull/17802))
- **Dashboard Chat tab — xterm.js + JSON-RPC sidecar** (supersedes #12710 + #13379, @OutThisLife) ([#14890](https://github.com/NousResearch/hermes-agent/pull/14890))
- **Dashboard layout refresh** (@austinpickett) ([#14899](https://github.com/NousResearch/hermes-agent/pull/14899))
- **`--stop` and `--status` flags** on the dashboard CLI ([#17840](https://github.com/NousResearch/hermes-agent/pull/17840))
- **Page-scoped plugin slots for built-in pages** ([#15658](https://github.com/NousResearch/hermes-agent/pull/15658))
- Fix: replace all buttons for design system buttons ([#17007](https://github.com/NousResearch/hermes-agent/pull/17007))
-
---
-
-## ⚡ Performance
-
- **TUI visible cold start cut ~57%** via lazy agent init ([#17190](https://github.com/NousResearch/hermes-agent/pull/17190))
- **Lazy-import OpenAI, Anthropic, Firecrawl, account_usage** ([#17046](https://github.com/NousResearch/hermes-agent/pull/17046))
- **mtime-cache `load_config()` and `read_raw_config()`** ([#17041](https://github.com/NousResearch/hermes-agent/pull/17041))
- **Memoize `get_tool_definitions()` + TTL-cache `check_fn` results** ([#17098](https://github.com/NousResearch/hermes-agent/pull/17098))
- **Precompile DANGEROUS_PATTERNS and HARDLINE_PATTERNS** ([#17206](https://github.com/NousResearch/hermes-agent/pull/17206))
- **Cache Ink text measurements across yoga flex re-passes** ([#14818](https://github.com/NousResearch/hermes-agent/pull/14818))
- **Stabilize long-session scrolling** ([#15926](https://github.com/NousResearch/hermes-agent/pull/15926))
- **Lazily seed virtual history heights** ([#16523](https://github.com/NousResearch/hermes-agent/pull/16523))
-
---
-
-## 🔒 Security & Reliability
-
- **Secret redaction off by default** — stops corrupting patches / API payloads with fake-key substitutions. Opt in via `redaction.enabled: true` ([#16794](https://github.com/NousResearch/hermes-agent/pull/16794))
- **`[SYSTEM:` → `[IMPORTANT:`** in all user-injected markers (Azure content filter dodge) ([#16114](https://github.com/NousResearch/hermes-agent/pull/16114))
- **Hardline blocklist for unrecoverable commands** ([#15878](https://github.com/NousResearch/hermes-agent/pull/15878))
- **Canonical `mask_secret` helper; fix status.py DIM drift** ([#17207](https://github.com/NousResearch/hermes-agent/pull/17207))
- **Sweep expired paste.rs uploads on a real timer** ([#16431](https://github.com/NousResearch/hermes-agent/pull/16431))
- **Preserve symlinks during atomic file writes** ([#16980](https://github.com/NousResearch/hermes-agent/pull/16980))
- **Probe `/dev/tty` by opening it, not bare existence** ([#17024](https://github.com/NousResearch/hermes-agent/pull/17024))
-
---
-
-## 🐛 Notable Bug Fixes
-
-This window includes 360 `fix:` PRs. Selected highlights from across the stack:
-
- **Background review fork inherits parent's live runtime** — provider/model/creds now propagate correctly ([#16099](https://github.com/NousResearch/hermes-agent/pull/16099))
- **Hindsight configurable `HINDSIGHT_TIMEOUT` env var** ([#15077](https://github.com/NousResearch/hermes-agent/pull/15077))
- **Tools: normalize numeric entries + clear stale `no_mcp` in `_save_platform_tools`** ([#15607](https://github.com/NousResearch/hermes-agent/pull/15607))
- **MCP: rewrite `definitions` refs to `$defs` in input schemas** — closes provider-side 400s
- **Azure content filter compatibility** — renamed `[SYSTEM:` markers so Azure's content filter stops flagging them ([#16114](https://github.com/NousResearch/hermes-agent/pull/16114))
- **Vision cache uses HERMES_HOME instead of cwd** ([#17719](https://github.com/NousResearch/hermes-agent/pull/17719))
- **FTS5 search** — tool_name + tool_calls indexing with repair + migration ([#16914](https://github.com/NousResearch/hermes-agent/pull/16914))
- **Streaming reasoning persists on assistant turns** ([#16892](https://github.com/NousResearch/hermes-agent/pull/16892))
- **execute_code concurrent RPC serialization** (#17770) ([#17894](https://github.com/NousResearch/hermes-agent/pull/17894), [#17902](https://github.com/NousResearch/hermes-agent/pull/17902))
- **Background reviewer scoped to memory + skills toolsets** — no more accidental web/shell escapes ([#16569](https://github.com/NousResearch/hermes-agent/pull/16569))
- **Compression recovery** — retry on main before giving up; notify user when aux fails ([#16774](https://github.com/NousResearch/hermes-agent/pull/16774), [#16775](https://github.com/NousResearch/hermes-agent/pull/16775))
- **`croniter` promoted to a core dependency** ([#17577](https://github.com/NousResearch/hermes-agent/pull/17577))
- **Discord tool `limit` parameter coerced to int** before `min()` call ([#16319](https://github.com/NousResearch/hermes-agent/pull/16319))
- **Yuanbao messaging platform entrance fix** ([#16880](https://github.com/NousResearch/hermes-agent/pull/16880))
- **ACP advertise and forward image prompts** ([#18030](https://github.com/NousResearch/hermes-agent/pull/18030))
- **DeepSeek / Kimi reasoning content isolation** across cross-provider histories (@Zjianru) ([#15749](https://github.com/NousResearch/hermes-agent/pull/15749), [#15762](https://github.com/NousResearch/hermes-agent/pull/15762))
- **Preserve reasoning_content replay on DeepSeek v4 + Kimi/Moonshot thinking** ([#18045](https://github.com/NousResearch/hermes-agent/pull/18045))
-
-The vast majority of the 360 fixes landed in the streaming/compression/tool-calling paths across all providers — DeepSeek, Kimi, Moonshot, GLM, Qwen, MiniMax, Gemini, Anthropic, OpenAI — alongside TUI polish (resize, scroll, sticky-prompt) and gateway platform-specific edge cases.
-
---
-
-## 🧪 Testing & CI
-
- Hermetic test parity (`scripts/run_tests.sh`) held across this window
- **Microsoft Teams xdist collision guard** — prevents worker collisions when Teams platform tests run in parallel ([#17828](https://github.com/NousResearch/hermes-agent/pull/17828))
- Chore: remove unused imports and dead locals (ruff F401, F841) ([#17010](https://github.com/NousResearch/hermes-agent/pull/17010))
-
---
-
-## 📚 Documentation
-
- **Curator feature page** added to docs site ([#17563](https://github.com/NousResearch/hermes-agent/pull/17563))
- **Document pin also blocking `skill_manage` writes** ([#17578](https://github.com/NousResearch/hermes-agent/pull/17578))
- **Direct-URL skill install documented** across features, reference, guide, and `hermes-agent` skill ([#16355](https://github.com/NousResearch/hermes-agent/pull/16355))
- **Hooks tutorial — build a BOOT.md startup checklist** (replaces the removed built-in hook) ([#17202](https://github.com/NousResearch/hermes-agent/pull/17202))
- **ComfyUI docs: ask local vs cloud FIRST before hardware check** ([#17612](https://github.com/NousResearch/hermes-agent/pull/17612))
- **Obliteratus skill: link YouTube video guide in SKILL.md** ([#15808](https://github.com/NousResearch/hermes-agent/pull/15808))
- Per-skill docs pages generated for bundled + optional skills; ASCII art code blocks auto-wrapped ([#14929](https://github.com/NousResearch/hermes-agent/pull/14929), [#16497](https://github.com/NousResearch/hermes-agent/pull/16497))
-
---
-
-## ⚖️ Removed / Reverted
-
- **Kanban multi-profile collaboration board** — landed in #16081, reverted in ([#16098](https://github.com/NousResearch/hermes-agent/pull/16098)) while the design is reworked
- **computer-use cua-driver** — 3 preparatory PRs landed then were reverted in ([#16927](https://github.com/NousResearch/hermes-agent/pull/16927))
- **BOOT.md built-in hook** removed ([#17093](https://github.com/NousResearch/hermes-agent/pull/17093)); the hooks tutorial ([#17202](https://github.com/NousResearch/hermes-agent/pull/17202)) shows how to build the same workflow yourself with a shell hook
- **`/provider` + `/plan` slash commands dropped** ([#15047](https://github.com/NousResearch/hermes-agent/pull/15047))
- **`flush_memories` removed entirely** ([#15696](https://github.com/NousResearch/hermes-agent/pull/15696))
-
---
-
-## 👥 Contributors
-
-### Core
- **@teknium1** (Teknium)
-
-### Top Community Contributors (by merged PR count since v0.11.0)
-
- **@OutThisLife** (Brooklyn) — 52 PRs · TUI — light-terminal detection + pluggable busy styles + auto-resume + session-delete from /resume + mouse-wheel scrolling + xterm.js dashboard Chat tab + cold-start cut + accordion polish
- **@kshitijk4poor** — 12 PRs · LM Studio first-class provider (salvage), Vercel Sandbox backend, GMI Cloud salvage, bundled-by-default touchdesigner-mcp, many tool-call / reasoning fixes
- **@helix4u** — 10 PRs · MCP schema robustness, assorted stability fixes
- **@alt-glitch** — 8 PRs · trigram FTS5 CJK search, declarative Nix plugin install, matrix/feishu hints and fixes
- **@ethernet8023** — 4 PRs
- **@austinpickett** — 4 PRs · LaTeX rendering in TUI, dashboard layout refresh
- **@benbarclay** — 3 PRs · Docker run-as-host-user so bind mounts don't get root-owned
- **@vominh1919** — 2 PRs
- **@stephenschoettler** — 2 PRs
- **@kevin-ho** — ConPTY mouse-injection fix (#15488)
- **@Zjianru** — cross-provider reasoning_content isolation + DeepSeek/Kimi empty-reasoning injection (#15749, #15762)
- **@web3blind** — Telegram chat allowlists for groups and forums (#15027)
- **@SHL0MS** — 9 new TouchDesigner-MCP reference docs (#16768)
- **@0xDevNinja** — curator `restore_skill` nested-archive fix (#17951)
- **@y0shua1ee** — curator `use` activity fix (#17953)
-
-### Also contributing
-Salvaged or co-authored work from **@isaachuangGMICLOUD** (GMI Cloud), earlier upstream PRs from the original author of each salvage chain, and a long tail of one-shot fixes, documentation nudges, and skill contributions from the community.
-
-### All Contributors (alphabetical, excluding @teknium1)
-
-@0xbyt4, @0xharryriddle, @0xDevNinja, @0z1-ghb, @5park1e, @A-FdL-Prog, @aj-nt, @akhater, @alblez, @alexg0bot,
-@alexzhu0, @AllardQuek, @alt-glitch, @amanning3390, @amanuel2, @AndreKurait, @andrewhosf, @Andy283, @andyylin,
-@angel12, @AntAISecurityLab, @ash, @austinpickett, @badgerbees, @BadTechBandit, @Bartok9, @beenherebefore,
-@beesrsj2500, @BeliefanX, @benbarclay, @benjaminsehl, @BlackishGreen33, @bloodcarter, @BlueBirdBack,
-@briandevans, @brooklynnicholson, @bsgdigital, @buray, @bwjoke, @camaragon, @cdanis, @cgarwood82,
-@charles-brooks, @chen1749144759, @chengoak, @ching-kaching, @Contentment003111, @crayfish-ai, @CruxExperts,
-@cyclingwithelephants, @dandaka, @danklynn, @ddupont808, @dhabibi, @difujia, @dimitrovi, @dlkakbs,
-@dontcallmejames, @EKKOLearnAI, @emozilla, @ericnicolaides, @Erosika, @ethernet8023, @exiao, @Feranmi10,
-@flobo3, @foxion37, @georgeglessner, @georgex8001, @ghostmfr, @H-Ali13381, @HangGlidersRule, @harryplusplus,
-@haru398801, @heathley, @hejuntt1014, @hekaru-agent, @helix4u, @Heltman, @HenkDz, @heyitsaamir, @hharry11,
-@hhhonzik, @hhuang91, @HiddenPuppy, @htsh, @iamagenius00, @in-liberty420, @innocarpe, @irispillars, @iRonin,
-@isaachuangGMICLOUD, @Ito-69, @j3ffffff, @jackjin1997, @jakubkrcmar, @Jason2031, @JayGwod, @jerome-benoit,
-@johnncenae, @Kailigithub, @keiravoss94, @kevin-ho, @knockyai, @konsisumer, @kshitijk4poor, @kunlabs, @l0hde,
-@Leihb, @leoneparise, @LeonSGP43, @liizfq, @liuhao1024, @loongzhao, @lsdsjy, @luyao618, @ma-pony, @Magaav,
-@MagicRay1217, @math0r-be, @MattMaximo, @maxims-oss, @MaxyMoos, @maymuneth, @mcndjxlefnd, @memosr,
-@MestreY0d4-Uninter, @mewwts, @Mirac1eSky, @MorAlekss, @mrhwick, @mrunmayee17, @mssteuer, @Nanako0129,
-@nazirulhafiy, @Nerijusas, @Nicecsh, @nicoloboschi, @nightq, @ningfangbin, @octo-patch, @Octopus,
-@OutThisLife, @Paperclip, @pein892, @perlowja, @prasadus92, @qike-ms, @qiyin-code, @Readon, @ReginaldasR,
-@revaraver, @rfilgueiras, @rmoen, @romanornr, @rugvedS07, @rylena, @samrusani, @Sanjays2402, @sasha-id,
-@Satoshi-agi, @scheidti, @scotttrinh, @season179, @SeeYangZhi, @sgaofen, @shamork, @shannonsands, @SHL0MS,
-@simbam99, @Societus, @socrates1024, @Sonoyunchu, @sprmn24, @stephenschoettler, @tangyuanjc, @TechPrototyper,
-@tekgnosis-net, @ThomassJonax, @tmimmanuel, @tochukwuada, @Tosko4, @Tranquil-Flow, @twozle, @txbxxx,
-@UgwujaGeorge, @Versun, @vlwkaos, @voidborne-d, @vominh1919, @Wang-tianhao, @Wangshengyang2004, @web3blind,
-@westers, @Wysie, @xandersbell, @xiahu88988, @XieNBi, @xinbenlv, @xnbi, @y0shua1ee, @yatesjalex, @yes999zc,
-@yeyitech, @Yoimex, @YueLich, @Yukipukii1, @zhiyanliu, @zicochaos, @Zjianru, @zkl2333, @zons-zhaozhy,
-@ztexydt-cqh.
-
-Also: @Siddharth Balyan, @YuShu.
-
---
-
-**Full Changelog**: [v2026.4.23...v2026.4.30](https://github.com/NousResearch/hermes-agent/compare/v2026.4.23...v2026.4.30)
@@ -0,0 +1,632 @@
+"""OpenAI-compatible shim that forwards Hermes requests to `copilot --acp`.
+
+This adapter lets Hermes treat the GitHub Copilot ACP server as a chat-style
+backend. Each request starts a short-lived ACP session, sends the formatted
+conversation as a single prompt, collects text chunks, and converts the result
+back into the minimal shape Hermes expects from an OpenAI client.
+"""
+
+from __future__ import annotations
+
+import json
+import os
+import queue
+import re
+import shlex
+import subprocess
+import threading
+import time
+from collections import deque
+from pathlib import Path
+from types import SimpleNamespace
+from typing import Any
+
+from agent.file_safety import get_read_block_error, is_write_denied
+from agent.redact import redact_sensitive_text
+
+ACP_MARKER_BASE_URL = "acp://copilot"
+_DEFAULT_TIMEOUT_SECONDS = 900.0
+
+_TOOL_CALL_BLOCK_RE = re.compile(r"<tool_call>\s*(\{.*?\})\s*</tool_call>", re.DOTALL)
+_TOOL_CALL_JSON_RE = re.compile(
+    r"\{\s*\"id\"\s*:\s*\"[^\"]+\"\s*,\s*\"type\"\s*:\s*\"function\"\s*,\s*\"function\"\s*:\s*\{.*?\}\s*\}",
+    re.DOTALL,
+)
+
+
+def _resolve_command() -> str:
+    return (
+        os.getenv("HERMES_COPILOT_ACP_COMMAND", "").strip()
+        or os.getenv("COPILOT_CLI_PATH", "").strip()
+        or "copilot"
+    )
+
+
+def _resolve_args() -> list[str]:
+    raw = os.getenv("HERMES_COPILOT_ACP_ARGS", "").strip()
+    if not raw:
+        return ["--acp", "--stdio"]
+    return shlex.split(raw)
+
+
+def _jsonrpc_error(message_id: Any, code: int, message: str) -> dict[str, Any]:
+    return {
+        "jsonrpc": "2.0",
+        "id": message_id,
+        "error": {
+            "code": code,
+            "message": message,
+        },
+    }
+
+
+def _permission_denied(message_id: Any) -> dict[str, Any]:
+    return {
+        "jsonrpc": "2.0",
+        "id": message_id,
+        "result": {
+            "outcome": {
+                "outcome": "cancelled",
+            }
+        },
+    }
+
+
+def _format_messages_as_prompt(
+    messages: list[dict[str, Any]],
+    model: str | None = None,
+    tools: list[dict[str, Any]] | None = None,
+    tool_choice: Any = None,
+) -> str:
+    sections: list[str] = [
+        "You are being used as the active ACP agent backend for Hermes.",
+        "Use ACP capabilities to complete tasks.",
+        "IMPORTANT: If you take an action with a tool, you MUST output tool calls using <tool_call>{...}</tool_call> blocks with JSON exactly in OpenAI function-call shape.",
+        "If no tool is needed, answer normally.",
+    ]
+    if model:
+        sections.append(f"Hermes requested model hint: {model}")
+
+    if isinstance(tools, list) and tools:
+        tool_specs: list[dict[str, Any]] = []
+        for t in tools:
+            if not isinstance(t, dict):
+                continue
+            fn = t.get("function") or {}
+            if not isinstance(fn, dict):
+                continue
+            name = fn.get("name")
+            if not isinstance(name, str) or not name.strip():
+                continue
+            tool_specs.append(
+                {
+                    "name": name.strip(),
+                    "description": fn.get("description", ""),
+                    "parameters": fn.get("parameters", {}),
+                }
+            )
+        if tool_specs:
+            sections.append(
+                "Available tools (OpenAI function schema). "
+                "When using a tool, emit ONLY <tool_call>{...}</tool_call> with one JSON object "
+                "containing id/type/function{name,arguments}. arguments must be a JSON string.\n"
+                + json.dumps(tool_specs, ensure_ascii=False)
+            )
+
+    if tool_choice is not None:
+        sections.append(
+            f"Tool choice hint: {json.dumps(tool_choice, ensure_ascii=False)}"
+        )
+
+    transcript: list[str] = []
+    for message in messages:
+        if not isinstance(message, dict):
+            continue
+        role = str(message.get("role") or "unknown").strip().lower()
+        if role == "tool":
+            role = "tool"
+        elif role not in {"system", "user", "assistant"}:
+            role = "context"
+
+        content = message.get("content")
+        rendered = _render_message_content(content)
+        if not rendered:
+            continue
+
+        label = {
+            "system": "System",
+            "user": "User",
+            "assistant": "Assistant",
+            "tool": "Tool",
+            "context": "Context",
+        }.get(role, role.title())
+        transcript.append(f"{label}:\n{rendered}")
+
+    if transcript:
+        sections.append("Conversation transcript:\n\n" + "\n\n".join(transcript))
+
+    sections.append("Continue the conversation from the latest user request.")
+    return "\n\n".join(
+        section.strip() for section in sections if section and section.strip()
+    )
+
+
+def _render_message_content(content: Any) -> str:
+    if content is None:
+        return ""
+    if isinstance(content, str):
+        return content.strip()
+    if isinstance(content, dict):
+        if "text" in content:
+            return str(content.get("text") or "").strip()
+        if "content" in content and isinstance(content.get("content"), str):
+            return str(content.get("content") or "").strip()
+        return json.dumps(content, ensure_ascii=True)
+    if isinstance(content, list):
+        parts: list[str] = []
+        for item in content:
+            if isinstance(item, str):
+                parts.append(item)
+            elif isinstance(item, dict):
+                text = item.get("text")
+                if isinstance(text, str) and text.strip():
+                    parts.append(text.strip())
+        return "\n".join(parts).strip()
+    return str(content).strip()
+
+
+def _extract_tool_calls_from_text(text: str) -> tuple[list[SimpleNamespace], str]:
+    if not isinstance(text, str) or not text.strip():
+        return [], ""
+
+    extracted: list[SimpleNamespace] = []
+    consumed_spans: list[tuple[int, int]] = []
+
+    def _try_add_tool_call(raw_json: str) -> None:
+        try:
+            obj = json.loads(raw_json)
+        except Exception:
+            return
+        if not isinstance(obj, dict):
+            return
+        fn = obj.get("function")
+        if not isinstance(fn, dict):
+            return
+        fn_name = fn.get("name")
+        if not isinstance(fn_name, str) or not fn_name.strip():
+            return
+        fn_args = fn.get("arguments", "{}")
+        if not isinstance(fn_args, str):
+            fn_args = json.dumps(fn_args, ensure_ascii=False)
+        call_id = obj.get("id")
+        if not isinstance(call_id, str) or not call_id.strip():
+            call_id = f"acp_call_{len(extracted) + 1}"
+
+        extracted.append(
+            SimpleNamespace(
+                id=call_id,
+                call_id=call_id,
+                response_item_id=None,
+                type="function",
+                function=SimpleNamespace(name=fn_name.strip(), arguments=fn_args),
+            )
+        )
+
+    for m in _TOOL_CALL_BLOCK_RE.finditer(text):
+        raw = m.group(1)
+        _try_add_tool_call(raw)
+        consumed_spans.append((m.start(), m.end()))
+
+    # Only try bare-JSON fallback when no XML blocks were found.
+    if not extracted:
+        for m in _TOOL_CALL_JSON_RE.finditer(text):
+            raw = m.group(0)
+            _try_add_tool_call(raw)
+            consumed_spans.append((m.start(), m.end()))
+
+    if not consumed_spans:
+        return extracted, text.strip()
+
+    consumed_spans.sort()
+    merged: list[tuple[int, int]] = []
+    for start, end in consumed_spans:
+        if not merged or start > merged[-1][1]:
+            merged.append((start, end))
+        else:
+            merged[-1] = (merged[-1][0], max(merged[-1][1], end))
+
+    parts: list[str] = []
+    cursor = 0
+    for start, end in merged:
+        if cursor < start:
+            parts.append(text[cursor:start])
+        cursor = max(cursor, end)
+    if cursor < len(text):
+        parts.append(text[cursor:])
+
+    cleaned = "\n".join(p.strip() for p in parts if p and p.strip()).strip()
+    return extracted, cleaned
+
+
+def _ensure_path_within_cwd(path_text: str, cwd: str) -> Path:
+    candidate = Path(path_text)
+    if not candidate.is_absolute():
+        raise PermissionError("ACP file-system paths must be absolute.")
+    resolved = candidate.resolve()
+    root = Path(cwd).resolve()
+    try:
+        resolved.relative_to(root)
+    except ValueError as exc:
+        raise PermissionError(
+            f"Path '{resolved}' is outside the session cwd '{root}'."
+        ) from exc
+    return resolved
+
+
+class _ACPChatCompletions:
+    def __init__(self, client: CopilotACPClient):
+        self._client = client
+
+    def create(self, **kwargs: Any) -> Any:
+        return self._client._create_chat_completion(**kwargs)
+
+
+class _ACPChatNamespace:
+    def __init__(self, client: CopilotACPClient):
+        self.completions = _ACPChatCompletions(client)
+
+
+class CopilotACPClient:
+    """Minimal OpenAI-client-compatible facade for Copilot ACP."""
+
+    def __init__(
+        self,
+        *,
+        api_key: str | None = None,
+        base_url: str | None = None,
+        default_headers: dict[str, str] | None = None,
+        acp_command: str | None = None,
+        acp_args: list[str] | None = None,
+        acp_cwd: str | None = None,
+        command: str | None = None,
+        args: list[str] | None = None,
+        **_: Any,
+    ):
+        self.api_key = api_key or "copilot-acp"
+        self.base_url = base_url or ACP_MARKER_BASE_URL
+        self._default_headers = dict(default_headers or {})
+        self._acp_command = acp_command or command or _resolve_command()
+        self._acp_args = list(acp_args or args or _resolve_args())
+        self._acp_cwd = str(Path(acp_cwd or os.getcwd()).resolve())
+        self.chat = _ACPChatNamespace(self)
+        self.is_closed = False
+        self._active_process: subprocess.Popen[str] | None = None
+        self._active_process_lock = threading.Lock()
+
+    def close(self) -> None:
+        proc: subprocess.Popen[str] | None
+        with self._active_process_lock:
+            proc = self._active_process
+            self._active_process = None
+        self.is_closed = True
+        if proc is None:
+            return
+        try:
+            proc.terminate()
+            proc.wait(timeout=2)
+        except Exception:
+            try:
+                proc.kill()
+            except Exception:
+                pass
+
+    def _create_chat_completion(
+        self,
+        *,
+        model: str | None = None,
+        messages: list[dict[str, Any]] | None = None,
+        timeout: float | None = None,
+        tools: list[dict[str, Any]] | None = None,
+        tool_choice: Any = None,
+        **_: Any,
+    ) -> Any:
+        prompt_text = _format_messages_as_prompt(
+            messages or [],
+            model=model,
+            tools=tools,
+            tool_choice=tool_choice,
+        )
+        # Normalise timeout: run_agent.py may pass an httpx.Timeout object
+        # (used natively by the OpenAI SDK) rather than a plain float.
+        if timeout is None:
+            _effective_timeout = _DEFAULT_TIMEOUT_SECONDS
+        elif isinstance(timeout, (int, float)):
+            _effective_timeout = float(timeout)
+        else:
+            # httpx.Timeout or similar — pick the largest component so the
+            # subprocess has enough wall-clock time for the full response.
+            _candidates = [
+                getattr(timeout, attr, None)
+                for attr in ("read", "write", "connect", "pool", "timeout")
+            ]
+            _numeric = [float(v) for v in _candidates if isinstance(v, (int, float))]
+            _effective_timeout = max(_numeric) if _numeric else _DEFAULT_TIMEOUT_SECONDS
+
+        response_text, reasoning_text = self._run_prompt(
+            prompt_text,
+            timeout_seconds=_effective_timeout,
+        )
+
+        tool_calls, cleaned_text = _extract_tool_calls_from_text(response_text)
+
+        usage = SimpleNamespace(
+            prompt_tokens=0,
+            completion_tokens=0,
+            total_tokens=0,
+            prompt_tokens_details=SimpleNamespace(cached_tokens=0),
+        )
+        assistant_message = SimpleNamespace(
+            content=cleaned_text,
+            tool_calls=tool_calls,
+            reasoning=reasoning_text or None,
+            reasoning_content=reasoning_text or None,
+            reasoning_details=None,
+        )
+        finish_reason = "tool_calls" if tool_calls else "stop"
+        choice = SimpleNamespace(message=assistant_message, finish_reason=finish_reason)
+        return SimpleNamespace(
+            choices=[choice],
+            usage=usage,
+            model=model or "copilot-acp",
+        )
+
+    def _run_prompt(
+        self, prompt_text: str, *, timeout_seconds: float
+    ) -> tuple[str, str]:
+        try:
+            proc = subprocess.Popen(
+                [self._acp_command] + self._acp_args,
+                stdin=subprocess.PIPE,
+                stdout=subprocess.PIPE,
+                stderr=subprocess.PIPE,
+                text=True,
+                bufsize=1,
+                cwd=self._acp_cwd,
+            )
+        except FileNotFoundError as exc:
+            raise RuntimeError(
+                f"Could not start Copilot ACP command '{self._acp_command}'. "
+                "Install GitHub Copilot CLI or set HERMES_COPILOT_ACP_COMMAND/COPILOT_CLI_PATH."
+            ) from exc
+
+        if proc.stdin is None or proc.stdout is None:
+            proc.kill()
+            raise RuntimeError("Copilot ACP process did not expose stdin/stdout pipes.")
+
+        self.is_closed = False
+        with self._active_process_lock:
+            self._active_process = proc
+
+        inbox: queue.Queue[dict[str, Any]] = queue.Queue()
+        stderr_tail: deque[str] = deque(maxlen=40)
+
+        def _stdout_reader() -> None:
+            if proc.stdout is None:
+                return
+            for line in proc.stdout:
+                try:
+                    inbox.put(json.loads(line))
+                except Exception:
+                    inbox.put({"raw": line.rstrip("\n")})
+
+        def _stderr_reader() -> None:
+            if proc.stderr is None:
+                return
+            for line in proc.stderr:
+                stderr_tail.append(line.rstrip("\n"))
+
+        out_thread = threading.Thread(target=_stdout_reader, daemon=True)
+        err_thread = threading.Thread(target=_stderr_reader, daemon=True)
+        out_thread.start()
+        err_thread.start()
+
+        next_id = 0
+
+        def _request(
+            method: str,
+            params: dict[str, Any],
+            *,
+            text_parts: list[str] | None = None,
+            reasoning_parts: list[str] | None = None,
+        ) -> Any:
+            nonlocal next_id
+            next_id += 1
+            request_id = next_id
+            payload = {
+                "jsonrpc": "2.0",
+                "id": request_id,
+                "method": method,
+                "params": params,
+            }
+            assert proc.stdin is not None  # always set: Popen(stdin=PIPE)
+            proc.stdin.write(json.dumps(payload) + "\n")
+            proc.stdin.flush()
+
+            deadline = time.time() + timeout_seconds
+            while time.time() < deadline:
+                if proc.poll() is not None:
+                    break
+                try:
+                    msg = inbox.get(timeout=0.1)
+                except queue.Empty:
+                    continue
+
+                if self._handle_server_message(
+                    msg,
+                    process=proc,
+                    cwd=self._acp_cwd,
+                    text_parts=text_parts,
+                    reasoning_parts=reasoning_parts,
+                ):
+                    continue
+
+                if msg.get("id") != request_id:
+                    continue
+                if "error" in msg:
+                    err = msg.get("error") or {}
+                    raise RuntimeError(
+                        f"Copilot ACP {method} failed: {err.get('message') or err}"
+                    )
+                return msg.get("result")
+
+            stderr_text = "\n".join(stderr_tail).strip()
+            if proc.poll() is not None and stderr_text:
+                raise RuntimeError(f"Copilot ACP process exited early: {stderr_text}")
+            raise TimeoutError(
+                f"Timed out waiting for Copilot ACP response to {method}."
+            )
+
+        try:
+            _request(
+                "initialize",
+                {
+                    "protocolVersion": 1,
+                    "clientCapabilities": {
+                        "fs": {
+                            "readTextFile": True,
+                            "writeTextFile": True,
+                        }
+                    },
+                    "clientInfo": {
+                        "name": "hermes-agent",
+                        "title": "Hermes Agent",
+                        "version": "0.0.0",
+                    },
+                },
+            )
+            session = (
+                _request(
+                    "session/new",
+                    {
+                        "cwd": self._acp_cwd,
+                        "mcpServers": [],
+                    },
+                )
+                or {}
+            )
+            session_id = str(session.get("sessionId") or "").strip()
+            if not session_id:
+                raise RuntimeError("Copilot ACP did not return a sessionId.")
+
+            text_parts: list[str] = []
+            reasoning_parts: list[str] = []
+            _request(
+                "session/prompt",
+                {
+                    "sessionId": session_id,
+                    "prompt": [
+                        {
+                            "type": "text",
+                            "text": prompt_text,
+                        }
+                    ],
+                },
+                text_parts=text_parts,
+                reasoning_parts=reasoning_parts,
+            )
+            return "".join(text_parts), "".join(reasoning_parts)
+        finally:
+            self.close()
+
+    def _handle_server_message(
+        self,
+        msg: dict[str, Any],
+        *,
+        process: subprocess.Popen[str],
+        cwd: str,
+        text_parts: list[str] | None,
+        reasoning_parts: list[str] | None,
+    ) -> bool:
+        method = msg.get("method")
+        if not isinstance(method, str):
+            return False
+
+        if method == "session/update":
+            params = msg.get("params") or {}
+            update = params.get("update") or {}
+            kind = str(update.get("sessionUpdate") or "").strip()
+            content = update.get("content") or {}
+            chunk_text = ""
+            if isinstance(content, dict):
+                chunk_text = str(content.get("text") or "")
+            if kind == "agent_message_chunk" and chunk_text and text_parts is not None:
+                text_parts.append(chunk_text)
+            elif (
+                kind == "agent_thought_chunk"
+                and chunk_text
+                and reasoning_parts is not None
+            ):
+                reasoning_parts.append(chunk_text)
+            return True
+
+        if process.stdin is None:
+            return True
+
+        message_id = msg.get("id")
+        params = msg.get("params") or {}
+
+        if method == "session/request_permission":
+            response = _permission_denied(message_id)
+        elif method == "fs/read_text_file":
+            try:
+                path = _ensure_path_within_cwd(str(params.get("path") or ""), cwd)
+                block_error = get_read_block_error(str(path))
+                if block_error:
+                    raise PermissionError(block_error)
+                content = path.read_text() if path.exists() else ""
+                line = params.get("line")
+                limit = params.get("limit")
+                if isinstance(line, int) and line > 1:
+                    lines = content.splitlines(keepends=True)
+                    start = line - 1
+                    end = (
+                        start + limit if isinstance(limit, int) and limit > 0 else None
+                    )
+                    content = "".join(lines[start:end])
+                if content:
+                    content = redact_sensitive_text(content)
+                response = {
+                    "jsonrpc": "2.0",
+                    "id": message_id,
+                    "result": {
+                        "content": content,
+                    },
+                }
+            except Exception as exc:
+                response = _jsonrpc_error(message_id, -32602, str(exc))
+        elif method == "fs/write_text_file":
+            try:
+                path = _ensure_path_within_cwd(str(params.get("path") or ""), cwd)
+                if is_write_denied(str(path)):
+                    raise PermissionError(
+                        f"Write denied: '{path}' is a protected system/credential file."
+                    )
+                path.parent.mkdir(parents=True, exist_ok=True)
+                path.write_text(str(params.get("content") or ""))
+                response = {
+                    "jsonrpc": "2.0",
+                    "id": message_id,
+                    "result": None,
+                }
+            except Exception as exc:
+                response = _jsonrpc_error(message_id, -32602, str(exc))
+        else:
+            response = _jsonrpc_error(
+                message_id,
+                -32601,
+                f"ACP client method '{method}' is not supported by Hermes yet.",
+            )
+
+        process.stdin.write(json.dumps(response) + "\n")
+        process.stdin.flush()
+        return True
@@ -112,17 +112,6 @@ def main() -> None:
    import acp
    from .server import HermesACPAgent

-    # MCP tool discovery from config.yaml — run before asyncio.run() so
-    # it's safe to use blocking waits.  (ACP also registers per-session
-    # MCP servers dynamically via asyncio.to_thread inside the event
-    # loop; that path is unaffected.)  Moved from model_tools.py module
-    # scope to avoid freezing the gateway's loop on lazy import (#16856).
-    try:
-        from tools.mcp_tool import discover_mcp_tools
-        discover_mcp_tools()
-    except Exception:
-        logger.debug("MCP tool discovery failed at ACP startup", exc_info=True)
-
    agent = HermesACPAgent()
    try:
        asyncio.run(acp.run_agent(agent, use_unstable_protocol=True))
@@ -3,7 +3,6 @@
 from __future__ import annotations

 import asyncio
-import contextvars
 import logging
 import os
 from collections import defaultdict, deque
@@ -13,7 +12,6 @@ from typing import Any, Deque, Optional
 import acp
 from acp.schema import (
    AgentCapabilities,
-    AgentMessageChunk,
    AuthenticateResponse,
    AvailableCommand,
    AvailableCommandsUpdate,
@@ -31,7 +29,6 @@ from acp.schema import (
    McpServerStdio,
    ModelInfo,
    NewSessionResponse,
-    PromptCapabilities,
    PromptResponse,
    ResumeSessionResponse,
    SetSessionConfigOptionResponse,
@@ -47,7 +44,6 @@ from acp.schema import (
    TextContentBlock,
    UnstructuredCommandInput,
    Usage,
-    UserMessageChunk,
 )

 # AuthMethodAgent was renamed from AuthMethod in agent-client-protocol 0.9.0
@@ -91,69 +87,17 @@ def _extract_text(
        | EmbeddedResourceContentBlock
    ],
 ) -> str:
-    """Extract plain text from ACP content blocks for display/commands."""
+    """Extract plain text from ACP content blocks."""
    parts: list[str] = []
    for block in prompt:
        if isinstance(block, TextContentBlock):
            parts.append(block.text)
        elif hasattr(block, "text"):
            parts.append(str(block.text))
+        # Non-text blocks are ignored for now.
    return "\n".join(parts)


-def _image_block_to_openai_part(block: ImageContentBlock) -> dict[str, Any] | None:
-    """Convert an ACP image content block to OpenAI-style multimodal content."""
-    data = str(getattr(block, "data", "") or "").strip()
-    uri = str(getattr(block, "uri", "") or "").strip()
-    mime_type = str(getattr(block, "mime_type", "") or "image/png").strip() or "image/png"
-
-    if data:
-        url = data if data.startswith("data:") else f"data:{mime_type};base64,{data}"
-    elif uri:
-        url = uri
-    else:
-        return None
-
-    return {"type": "image_url", "image_url": {"url": url}}
-
-
-def _content_blocks_to_openai_user_content(
-    prompt: list[
-        TextContentBlock
-        | ImageContentBlock
-        | AudioContentBlock
-        | ResourceContentBlock
-        | EmbeddedResourceContentBlock
-    ],
-) -> str | list[dict[str, Any]]:
-    """Convert ACP prompt blocks into a Hermes/OpenAI-compatible user content payload."""
-    parts: list[dict[str, Any]] = []
-    text_parts: list[str] = []
-
-    for block in prompt:
-        if isinstance(block, TextContentBlock):
-            if block.text:
-                parts.append({"type": "text", "text": block.text})
-                text_parts.append(block.text)
-            continue
-        if isinstance(block, ImageContentBlock):
-            image_part = _image_block_to_openai_part(block)
-            if image_part is not None:
-                parts.append(image_part)
-            continue
-
-    if not parts:
-        return _extract_text(prompt)
-
-    # Keep pure text prompts as strings so slash-command handling and text-only
-    # providers keep the exact legacy path. Switch to structured content only
-    # when an actual non-text block is present.
-    if all(part.get("type") == "text" for part in parts):
-        return "\n".join(text_parts)
-
-    return parts
-
-
 class HermesACPAgent(acp.Agent):
    """ACP Agent implementation wrapping Hermes AIAgent."""

@@ -164,8 +108,6 @@ class HermesACPAgent(acp.Agent):
        "context": "Show conversation context info",
        "reset": "Clear conversation history",
        "compact": "Compress conversation context",
-        "steer": "Inject guidance into the currently running agent turn",
-        "queue": "Queue a prompt to run after the current turn finishes",
        "version": "Show Hermes version",
    }

@@ -195,16 +137,6 @@ class HermesACPAgent(acp.Agent):
            "name": "compact",
            "description": "Compress conversation context",
        },
-        {
-            "name": "steer",
-            "description": "Inject guidance into the currently running agent turn",
-            "input_hint": "guidance for the active turn",
-        },
-        {
-            "name": "queue",
-            "description": "Queue a prompt to run after the current turn finishes",
-            "input_hint": "prompt to run next",
-        },
        {
            "name": "version",
            "description": "Show Hermes version",
@@ -419,7 +351,6 @@ class HermesACPAgent(acp.Agent):
            agent_info=Implementation(name="hermes-agent", version=HERMES_VERSION),
            agent_capabilities=AgentCapabilities(
                load_session=True,
-                prompt_capabilities=PromptCapabilities(image=True),
                session_capabilities=SessionCapabilities(
                    fork=SessionForkCapabilities(),
                    list=SessionListCapabilities(),
@@ -445,78 +376,6 @@ class HermesACPAgent(acp.Agent):

    # ---- Session management -------------------------------------------------

-    @staticmethod
-    def _history_message_text(message: dict[str, Any]) -> str:
-        """Extract displayable text from a persisted OpenAI-style message."""
-        content = message.get("content")
-        if isinstance(content, str):
-            return content.strip()
-        if isinstance(content, list):
-            parts: list[str] = []
-            for item in content:
-                if isinstance(item, dict):
-                    text = item.get("text")
-                    if isinstance(text, str):
-                        parts.append(text)
-                    elif item.get("type") == "text" and isinstance(item.get("content"), str):
-                        parts.append(item["content"])
-                elif isinstance(item, str):
-                    parts.append(item)
-            return "\n".join(part.strip() for part in parts if part and part.strip()).strip()
-        return ""
-
-    @staticmethod
-    def _history_message_update(
-        *,
-        role: str,
-        text: str,
-    ) -> UserMessageChunk | AgentMessageChunk | None:
-        """Build an ACP history replay update for a user/assistant message."""
-        block = TextContentBlock(type="text", text=text)
-        if role == "user":
-            return UserMessageChunk(
-                session_update="user_message_chunk",
-                content=block,
-            )
-        if role == "assistant":
-            return AgentMessageChunk(
-                session_update="agent_message_chunk",
-                content=block,
-            )
-        return None
-
-    async def _replay_session_history(self, state: SessionState) -> None:
-        """Send persisted user/assistant history to clients during session/load.
-
-        Zed's ACP history UI calls ``session/load`` after the user picks an item
-        from the Agents sidebar. The agent must then replay the full conversation
-        as ``user_message_chunk`` / ``agent_message_chunk`` notifications; merely
-        restoring server-side state makes Hermes remember context, but leaves the
-        editor looking like a clean thread.
-        """
-        if not self._conn or not state.history:
-            return
-
-        for message in state.history:
-            role = str(message.get("role") or "")
-            if role not in {"user", "assistant"}:
-                continue
-            text = self._history_message_text(message)
-            if not text:
-                continue
-            update = self._history_message_update(role=role, text=text)
-            if update is None:
-                continue
-            try:
-                await self._conn.session_update(session_id=state.session_id, update=update)
-            except Exception:
-                logger.warning(
-                    "Failed to replay ACP history for session %s",
-                    state.session_id,
-                    exc_info=True,
-                )
-                return
-
    async def new_session(
        self,
        cwd: str,
@@ -545,7 +404,6 @@ class HermesACPAgent(acp.Agent):
            return None
        await self._register_session_mcp_servers(state, mcp_servers)
        logger.info("Loaded session %s", session_id)
-        await self._replay_session_history(state)
        self._schedule_available_commands_update(session_id)
        return LoadSessionResponse(models=self._build_model_state(state))

@@ -562,16 +420,12 @@ class HermesACPAgent(acp.Agent):
            state = self.session_manager.create_session(cwd=cwd)
        await self._register_session_mcp_servers(state, mcp_servers)
        logger.info("Resumed session %s", state.session_id)
-        await self._replay_session_history(state)
        self._schedule_available_commands_update(state.session_id)
        return ResumeSessionResponse(models=self._build_model_state(state))

    async def cancel(self, session_id: str, **kwargs: Any) -> None:
        state = self.session_manager.get_session(session_id)
        if state and state.cancel_event:
-            with state.runtime_lock:
-                if state.is_running and state.current_prompt_text:
-                    state.interrupted_prompt_text = state.current_prompt_text
            state.cancel_event.set()
            try:
                if getattr(state, "agent", None) and hasattr(state.agent, "interrupt"):
@@ -662,51 +516,11 @@ class HermesACPAgent(acp.Agent):
            return PromptResponse(stop_reason="refusal")

        user_text = _extract_text(prompt).strip()
-        user_content = _content_blocks_to_openai_user_content(prompt)
-        has_content = bool(user_text) or (
-            isinstance(user_content, list) and bool(user_content)
-        )
-        if not has_content:
+        if not user_text:
            return PromptResponse(stop_reason="end_turn")

-        # /steer on an idle session has no in-flight tool call to inject into.
-        # Rewrite it so the payload runs as a normal user prompt, matching the
-        # gateway's behavior (gateway/run.py ~L4898). Two sub-cases:
-        #   1. Zed-interrupt salvage — a prior prompt was cancelled by the
-        #      client right before /steer arrived; replay it with the steer
-        #      text attached as explicit correction/guidance so the user's
-        #      in-flight work isn't lost.
-        #   2. Plain idle — no prior work to salvage; just run the steer
-        #      payload as a regular prompt. Without this, _cmd_steer would
-        #      silently append to state.queued_prompts and respond with
-        #      "No active turn — queued for the next turn", which looks like
-        #      /queue even though the user never typed /queue.
-        if isinstance(user_content, str) and user_text.startswith("/steer"):
-            steer_text = user_text.split(maxsplit=1)[1].strip() if len(user_text.split(maxsplit=1)) > 1 else ""
-            interrupted_prompt = ""
-            rewrite_idle = False
-            with state.runtime_lock:
-                if not state.is_running and steer_text:
-                    if state.interrupted_prompt_text:
-                        interrupted_prompt = state.interrupted_prompt_text
-                        state.interrupted_prompt_text = ""
-                    else:
-                        rewrite_idle = True
-            if interrupted_prompt:
-                user_text = (
-                    f"{interrupted_prompt}\n\n"
-                    f"User correction/guidance after interrupt: {steer_text}"
-                )
-                user_content = user_text
-            elif rewrite_idle:
-                user_text = steer_text
-                user_content = steer_text
-
-        # Intercept slash commands — handle locally without calling the LLM.
-        # Slash commands are text-only; if the client included images/resources,
-        # send the whole multimodal prompt to the agent instead of treating it as
-        # an ACP command.
-        if isinstance(user_content, str) and user_text.startswith("/"):
+        # Intercept slash commands — handle locally without calling the LLM
+        if user_text.startswith("/"):
            response_text = self._handle_slash_command(user_text, state)
            if response_text is not None:
                if self._conn:
@@ -714,24 +528,6 @@ class HermesACPAgent(acp.Agent):
                    await self._conn.session_update(session_id, update)
                return PromptResponse(stop_reason="end_turn")

-        # If Zed sends another regular prompt while the same ACP session is
-        # still running, queue it instead of racing two AIAgent loops against
-        # the same state.history. /steer and /queue are handled above and can
-        # land immediately.
-        with state.runtime_lock:
-            if state.is_running:
-                queued_text = user_text or "[Image attachment]"
-                state.queued_prompts.append(queued_text)
-                depth = len(state.queued_prompts)
-                if self._conn:
-                    update = acp.update_agent_message_text(
-                        f"Queued for the next turn. ({depth} queued)"
-                    )
-                    await self._conn.session_update(session_id, update)
-                return PromptResponse(stop_reason="end_turn")
-            state.is_running = True
-            state.current_prompt_text = user_text or "[Image attachment]"
-
        logger.info("Prompt on session %s: %s", session_id, user_text[:100])

        conn = self._conn
@@ -778,22 +574,6 @@ class HermesACPAgent(acp.Agent):

        def _run_agent() -> dict:
            nonlocal previous_approval_cb, previous_interactive
-            # Bind HERMES_SESSION_KEY for this session so per-session caches
-            # (e.g. the interactive sudo password cache in tools.terminal_tool)
-            # scope to the ACP session rather than leaking across sessions
-            # that land on the same reused executor thread. This call runs
-            # inside a contextvars.copy_context() below, so the ContextVar
-            # write is isolated from other concurrent ACP sessions.
-            try:
-                from gateway.session_context import (
-                    clear_session_vars,
-                    set_session_vars,
-                )
-                session_tokens = set_session_vars(session_key=session_id)
-            except Exception:
-                session_tokens = None
-                clear_session_vars = None  # type: ignore[assignment]
-                logger.debug("Could not set ACP session context", exc_info=True)
            if approval_cb:
                try:
                    from tools import terminal_tool as _terminal_tool
@@ -807,10 +587,9 @@ class HermesACPAgent(acp.Agent):
            os.environ["HERMES_INTERACTIVE"] = "1"
            try:
                result = agent.run_conversation(
-                    user_message=user_content,
+                    user_message=user_text,
                    conversation_history=state.history,
                    task_id=session_id,
-                    persist_user_message=user_text or "[Image attachment]",
                )
                return result
            except Exception as e:
@@ -828,24 +607,11 @@ class HermesACPAgent(acp.Agent):
                        _terminal_tool.set_approval_callback(previous_approval_cb)
                    except Exception:
                        logger.debug("Could not restore approval callback", exc_info=True)
-                if session_tokens is not None and clear_session_vars is not None:
-                    try:
-                        clear_session_vars(session_tokens)
-                    except Exception:
-                        logger.debug("Could not clear ACP session context", exc_info=True)

        try:
-            # Wrap the executor call in a fresh copy of the current context so
-            # concurrent ACP sessions on the shared ThreadPoolExecutor don't
-            # stomp on each other's ContextVar writes (HERMES_SESSION_KEY in
-            # particular — used by the interactive sudo password cache scope).
-            ctx = contextvars.copy_context()
-            result = await loop.run_in_executor(_executor, ctx.run, _run_agent)
+            result = await loop.run_in_executor(_executor, _run_agent)
        except Exception:
            logger.exception("Executor error for session %s", session_id)
-            with state.runtime_lock:
-                state.is_running = False
-                state.current_prompt_text = ""
            return PromptResponse(stop_reason="end_turn")

        if result.get("messages"):
@@ -871,28 +637,6 @@ class HermesACPAgent(acp.Agent):
            update = acp.update_agent_message_text(final_response)
            await conn.session_update(session_id, update)

-        # Mark this turn idle before draining queued work so recursive prompt()
-        # calls can acquire the session. Queued turns are intentionally run as
-        # normal follow-up user prompts, preserving role alternation and history.
-        with state.runtime_lock:
-            state.is_running = False
-            state.current_prompt_text = ""
-
-        while True:
-            with state.runtime_lock:
-                if not state.queued_prompts:
-                    break
-                next_prompt = state.queued_prompts.pop(0)
-            if conn:
-                await conn.session_update(
-                    session_id,
-                    acp.update_user_message_text(next_prompt),
-                )
-            await self.prompt(
-                prompt=[TextContentBlock(type="text", text=next_prompt)],
-                session_id=session_id,
-            )
-
        usage = None
        if any(result.get(key) is not None for key in ("prompt_tokens", "completion_tokens", "total_tokens")):
            usage = Usage(
@@ -970,8 +714,6 @@ class HermesACPAgent(acp.Agent):
            "context": self._cmd_context,
            "reset": self._cmd_reset,
            "compact": self._cmd_compact,
-            "steer": self._cmd_steer,
-            "queue": self._cmd_queue,
            "version": self._cmd_version,
        }.get(cmd)

@@ -1068,16 +810,10 @@ class HermesACPAgent(acp.Agent):
            if not hasattr(agent, "_compress_context"):
                return "Context compression not available for this agent."

-            from agent.model_metadata import estimate_request_tokens_rough
+            from agent.model_metadata import estimate_messages_tokens_rough

            original_count = len(state.history)
-            # Include system prompt + tool schemas so the figure reflects real
-            # request pressure, not a transcript-only underestimate (#6217).
-            _sys_prompt = getattr(agent, "_cached_system_prompt", "") or ""
-            _tools = getattr(agent, "tools", None) or None
-            approx_tokens = estimate_request_tokens_rough(
-                state.history, system_prompt=_sys_prompt, tools=_tools
-            )
+            approx_tokens = estimate_messages_tokens_rough(state.history)
            original_session_db = getattr(agent, "_session_db", None)

            try:
@@ -1097,13 +833,7 @@ class HermesACPAgent(acp.Agent):
            self.session_manager.save_session(state.session_id)

            new_count = len(state.history)
-            _sys_prompt_after = getattr(agent, "_cached_system_prompt", "") or _sys_prompt
-            _tools_after = getattr(agent, "tools", None) or _tools
-            new_tokens = estimate_request_tokens_rough(
-                state.history,
-                system_prompt=_sys_prompt_after,
-                tools=_tools_after,
-            )
+            new_tokens = estimate_messages_tokens_rough(state.history)
            return (
                f"Context compressed: {original_count} -> {new_count} messages\n"
                f"~{approx_tokens:,} -> ~{new_tokens:,} tokens"
@@ -1111,34 +841,6 @@ class HermesACPAgent(acp.Agent):
        except Exception as e:
            return f"Compression failed: {e}"

-    def _cmd_steer(self, args: str, state: SessionState) -> str:
-        steer_text = args.strip()
-        if not steer_text:
-            return "Usage: /steer <guidance>"
-
-        if state.is_running and hasattr(state.agent, "steer"):
-            try:
-                if state.agent.steer(steer_text):
-                    preview = steer_text[:80] + ("..." if len(steer_text) > 80 else "")
-                    return f"⏩ Steer queued for the active turn: {preview}"
-            except Exception as exc:
-                logger.warning("ACP steer failed for session %s: %s", state.session_id, exc)
-                return f"⚠️ Steer failed: {exc}"
-
-        with state.runtime_lock:
-            state.queued_prompts.append(steer_text)
-            depth = len(state.queued_prompts)
-        return f"No active turn — queued for the next turn. ({depth} queued)"
-
-    def _cmd_queue(self, args: str, state: SessionState) -> str:
-        queued_text = args.strip()
-        if not queued_text:
-            return "Usage: /queue <prompt>"
-        with state.runtime_lock:
-            state.queued_prompts.append(queued_text)
-            depth = len(state.queued_prompts)
-        return f"Queued for the next turn. ({depth} queued)"
-
    def _cmd_version(self, args: str, state: SessionState) -> str:
        return f"Hermes Agent v{HERMES_VERSION}"

@@ -26,33 +26,6 @@ from typing import Any, Dict, List, Optional
 logger = logging.getLogger(__name__)


-def _win_path_to_wsl(path: str) -> str | None:
-    """Convert a Windows drive path to its WSL /mnt/<drive>/... equivalent."""
-    match = re.match(r"^([A-Za-z]):[\\/](.*)$", path)
-    if not match:
-        return None
-    drive = match.group(1).lower()
-    tail = match.group(2).replace("\\", "/")
-    return f"/mnt/{drive}/{tail}"
-
-
-def _translate_acp_cwd(cwd: str) -> str:
-    """Translate Windows ACP cwd values when Hermes itself is running in WSL.
-
-    Windows ACP clients can launch ``hermes acp`` inside WSL while still sending
-    editor workspaces as Windows drive paths such as ``E:\\Projects``. Store
-    and execute against the WSL mount path so agents, tools, and persisted ACP
-    sessions all agree on the usable workspace. Native Linux/macOS keeps the
-    original cwd unchanged.
-    """
-    from hermes_constants import is_wsl
-
-    if not is_wsl():
-        return cwd
-    translated = _win_path_to_wsl(str(cwd))
-    return translated if translated is not None else cwd
-
-
 def _normalize_cwd_for_compare(cwd: str | None) -> str:
    raw = str(cwd or ".").strip()
    if not raw:
@@ -61,9 +34,11 @@ def _normalize_cwd_for_compare(cwd: str | None) -> str:

    # Normalize Windows drive paths into the equivalent WSL mount form so
    # ACP history filters match the same workspace across Windows and WSL.
-    translated = _win_path_to_wsl(expanded)
-    if translated is not None:
-        expanded = translated
+    match = re.match(r"^([A-Za-z]):[\\/](.*)$", expanded)
+    if match:
+        drive = match.group(1).lower()
+        tail = match.group(2).replace("\\", "/")
+        expanded = f"/mnt/{drive}/{tail}"
    elif re.match(r"^/mnt/[A-Za-z]/", expanded):
        expanded = f"/mnt/{expanded[5].lower()}/{expanded[7:]}"

@@ -121,18 +96,12 @@ def _acp_stderr_print(*args, **kwargs) -> None:


 def _register_task_cwd(task_id: str, cwd: str) -> None:
-    """Bind a task/session id to the editor's working directory for tools.
-
-    Zed can launch Hermes from a Windows workspace while the ACP process runs
-    inside WSL. In that case ACP sends cwd as e.g. ``E:\\Projects\\POTI``;
-    local tools need the WSL mount equivalent or subprocess creation fails
-    before the command can run.
-    """
+    """Bind a task/session id to the editor's working directory for tools."""
    if not task_id:
        return
    try:
        from tools.terminal_tool import register_task_env_overrides
-        register_task_env_overrides(task_id, {"cwd": _translate_acp_cwd(cwd)})
+        register_task_env_overrides(task_id, {"cwd": cwd})
    except Exception:
        logger.debug("Failed to register ACP task cwd override", exc_info=True)

@@ -176,11 +145,6 @@ class SessionState:
    model: str = ""
    history: List[Dict[str, Any]] = field(default_factory=list)
    cancel_event: Any = None  # threading.Event
-    is_running: bool = False
-    queued_prompts: List[str] = field(default_factory=list)
-    runtime_lock: Any = field(default_factory=Lock)
-    current_prompt_text: str = ""
-    interrupted_prompt_text: str = ""


 class SessionManager:
@@ -211,7 +175,6 @@ class SessionManager:
        """Create a new session with a unique ID and a fresh AIAgent."""
        import threading

-        cwd = _translate_acp_cwd(cwd)
        session_id = str(uuid.uuid4())
        agent = self._make_agent(session_id=session_id, cwd=cwd)
        state = SessionState(
@@ -254,7 +217,6 @@ class SessionManager:
        """Deep-copy a session's history into a new session."""
        import threading

-        cwd = _translate_acp_cwd(cwd)
        original = self.get_session(session_id)  # checks DB too
        if original is None:
            return None
@@ -356,7 +318,6 @@ class SessionManager:

    def update_cwd(self, session_id: str, cwd: str) -> Optional[SessionState]:
        """Update the working directory for a session and its tool overrides."""
-        cwd = _translate_acp_cwd(cwd)
        state = self.get_session(session_id)  # checks DB too
        if state is None:
            return None
@@ -20,27 +20,12 @@ from pathlib import Path

 from hermes_constants import get_hermes_home
 from typing import Any, Dict, List, Optional, Tuple
-from utils import base_url_host_matches, normalize_proxy_env_vars
+from utils import normalize_proxy_env_vars

-# NOTE: `import anthropic` is deliberately NOT at module top — the SDK pulls
-# ~220 ms of imports (anthropic.types, anthropic.lib.tools._beta_runner, etc.)
-# and the 3 usage sites (build_anthropic_client, build_anthropic_bedrock_client,
-# read_claude_code_credentials_from_keychain) are all on cold user-triggered
-# paths. Access via the `_get_anthropic_sdk()` accessor below, which caches
-# the module after the first call and returns None on ImportError.
-_anthropic_sdk: Any = ...  # sentinel — None means "tried and missing"
-
-
-def _get_anthropic_sdk():
-    """Return the ``anthropic`` SDK module, importing lazily. None if not installed."""
-    global _anthropic_sdk
-    if _anthropic_sdk is ...:
-        try:
-            import anthropic as _sdk
-            _anthropic_sdk = _sdk
-        except ImportError:
-            _anthropic_sdk = None
-    return _anthropic_sdk
+try:
+    import anthropic as _anthropic_sdk
+except ImportError:
+    _anthropic_sdk = None  # type: ignore[assignment]

 logger = logging.getLogger(__name__)

@@ -217,33 +202,19 @@ def _forbids_sampling_params(model: str) -> bool:


 # Beta headers for enhanced features (sent with ALL auth types).
-# As of Opus 4.7 (2026-04-16), the first two are GA on Claude 4.6+ — the
+# As of Opus 4.7 (2026-04-16), both of these are GA on Claude 4.6+ — the
 # beta headers are still accepted (harmless no-op) but not required. Kept
 # here so older Claude (4.5, 4.1) + third-party Anthropic-compat endpoints
 # that still gate on the headers continue to get the enhanced features.
-#
-# ``context-1m-2025-08-07`` unlocks the 1M context window on Claude Opus 4.6/4.7
-# and Sonnet 4.6 when served via AWS Bedrock or Azure AI Foundry. 1M is GA on
-# native Anthropic (api.anthropic.com) for Opus 4.6+, but Bedrock/Azure still
-# gate it behind this beta header as of 2026-04 — without it Bedrock caps Opus
-# at 200K even though model_metadata.py advertises 1M. The header is a harmless
-# no-op on endpoints where 1M is GA.
-#
-# Migration guide: remove these if you no longer support ≤4.5 models or once
-# Bedrock/Azure promote 1M to GA.
+# Migration guide: remove these if you no longer support ≤4.5 models.
 _COMMON_BETAS = [
    "interleaved-thinking-2025-05-14",
    "fine-grained-tool-streaming-2025-05-14",
-    "context-1m-2025-08-07",
 ]
 # MiniMax's Anthropic-compatible endpoints fail tool-use requests when
 # the fine-grained tool streaming beta is present.  Omit it so tool calls
 # fall back to the provider's default response path.
 _TOOL_STREAMING_BETA = "fine-grained-tool-streaming-2025-05-14"
-# 1M context beta — see comment on _COMMON_BETAS above. Stripped for
-# Bearer-auth (MiniMax) endpoints since they host their own models and
-# unknown Anthropic beta headers risk request rejection.
-_CONTEXT_1M_BETA = "context-1m-2025-08-07"

 # Fast mode beta — enables the ``speed: "fast"`` request parameter for
 # significantly higher output token throughput on Opus 4.6 (~2.5x).
@@ -365,88 +336,6 @@ def _is_kimi_coding_endpoint(base_url: str | None) -> bool:
    return normalized.rstrip("/").lower().startswith("https://api.kimi.com/coding")


-# Model-name prefixes that identify the Kimi / Moonshot family.  Covers
-# - official slugs: ``kimi-k2.5``, ``kimi_thinking``, ``moonshot-v1-8k``
-# - common release lines: ``k1.5-...``, ``k2-thinking``, ``k25-...``, ``k2.5-...``
-# Matched case-insensitively against the post-``normalize_model_name`` form,
-# so a caller's ``provider/vendor/model`` slug is handled the same as a
-# bare name.
-_KIMI_FAMILY_MODEL_PREFIXES = (
-    "kimi-", "kimi_",
-    "moonshot-", "moonshot_",
-    "k1.", "k1-",
-    "k2.", "k2-",
-    "k25", "k2.5",
-)
-
-
-def _model_name_is_kimi_family(model: str | None) -> bool:
-    if not isinstance(model, str):
-        return False
-    m = model.strip().lower()
-    if not m:
-        return False
-    # Strip vendor prefix (e.g. ``moonshotai/kimi-k2.5`` → ``kimi-k2.5``)
-    if "/" in m:
-        m = m.rsplit("/", 1)[-1]
-    return m.startswith(_KIMI_FAMILY_MODEL_PREFIXES)
-
-
-def _is_kimi_family_endpoint(base_url: str | None, model: str | None = None) -> bool:
-    """Return True for any Kimi / Moonshot Anthropic-Messages-speaking endpoint.
-
-    Broader than ``_is_kimi_coding_endpoint`` — matches:
-
-    - Kimi's official ``/coding`` URL (legacy check, preserved)
-    - Any ``api.kimi.com`` / ``moonshot.ai`` / ``moonshot.cn`` host
-    - Custom or proxied endpoints whose *model* name is in the Kimi / Moonshot
-      family (``kimi-*``, ``moonshot-*``, ``k1.*``, ``k2.*``, …).  Users with
-      ``api_mode: anthropic_messages`` on a private gateway fronting Kimi
-      fall into this branch — the upstream still enforces Kimi's thinking
-      semantics (reasoning_content required on every replayed tool-call
-      message) regardless of the gateway's hostname.
-
-    Used to decide whether to drop Anthropic's ``thinking`` kwarg and to
-    preserve unsigned reasoning_content-derived thinking blocks on replay.
-    See hermes-agent#13848, #17057.
-    """
-    if _is_kimi_coding_endpoint(base_url):
-        return True
-    for _domain in ("api.kimi.com", "moonshot.ai", "moonshot.cn"):
-        if base_url_host_matches(base_url or "", _domain):
-            return True
-    if _model_name_is_kimi_family(model):
-        return True
-    return False
-
-
-def _is_deepseek_anthropic_endpoint(base_url: str | None) -> bool:
-    """Return True for DeepSeek's Anthropic-compatible endpoint.
-
-    DeepSeek's ``/anthropic`` route speaks the Anthropic Messages protocol
-    but, when thinking mode is enabled, requires the ``thinking`` blocks
-    from prior assistant turns to round-trip on subsequent requests — the
-    generic third-party path strips them and triggers HTTP 400::
-
-        The content[].thinking in the thinking mode must be passed back
-        to the API.
-
-    Per DeepSeek's published compatibility matrix the blocks are unsigned
-    (no Anthropic-proprietary signature, no ``redacted_thinking`` support),
-    so this endpoint is handled with the same strip-signed / keep-unsigned
-    policy used for Kimi's ``/coding`` endpoint.  The match is pinned to
-    the ``/anthropic`` path so the OpenAI-compatible ``api.deepseek.com``
-    base URL (which never reaches this adapter) is not misclassified.
-    See hermes-agent#16748.
-    """
-    if not base_url_host_matches(base_url or "", "api.deepseek.com"):
-        return False
-    normalized = _normalize_base_url_text(base_url)
-    if not normalized:
-        return False
-    return "/anthropic" in normalized.rstrip("/").lower()
-
-
 def _requires_bearer_auth(base_url: str | None) -> bool:
    """Return True for Anthropic-compatible providers that require Bearer auth.

@@ -461,45 +350,20 @@ def _requires_bearer_auth(base_url: str | None) -> bool:
    return normalized.startswith(("https://api.minimax.io/anthropic", "https://api.minimaxi.com/anthropic"))


-def _common_betas_for_base_url(
-    base_url: str | None,
-    *,
-    drop_context_1m_beta: bool = False,
-) -> list[str]:
+def _common_betas_for_base_url(base_url: str | None) -> list[str]:
    """Return the beta headers that are safe for the configured endpoint.

    MiniMax's Anthropic-compatible endpoints (Bearer-auth) reject requests
    that include Anthropic's ``fine-grained-tool-streaming`` beta — every
    tool-use message triggers a connection error.  Strip that beta for
    Bearer-auth endpoints while keeping all other betas intact.
-
-    The ``context-1m-2025-08-07`` beta is also stripped for Bearer-auth
-    endpoints — MiniMax hosts its own models, not Claude, so the header is
-    irrelevant at best and risks request rejection at worst.
-
-    ``drop_context_1m_beta=True`` additionally strips the 1M-context beta on
-    otherwise-unrelated endpoints. The OAuth retry path flips this flag after
-    a subscription rejects the beta with
-    "The long context beta is not yet available for this subscription" so
-    subsequent requests in the same session don't repeat the probe. See the
-    reactive recovery loop in ``run_agent.py`` and issue-comment history on
-    PR #17680 for the full rationale.
    """
    if _requires_bearer_auth(base_url):
-        _stripped = {_TOOL_STREAMING_BETA, _CONTEXT_1M_BETA}
-        return [b for b in _COMMON_BETAS if b not in _stripped]
-    if drop_context_1m_beta:
-        return [b for b in _COMMON_BETAS if b != _CONTEXT_1M_BETA]
+        return [b for b in _COMMON_BETAS if b != _TOOL_STREAMING_BETA]
    return _COMMON_BETAS


-def build_anthropic_client(
-    api_key: str,
-    base_url: str = None,
-    timeout: float = None,
-    *,
-    drop_context_1m_beta: bool = False,
-):
+def build_anthropic_client(api_key: str, base_url: str = None, timeout: float = None):
    """Create an Anthropic client, auto-detecting setup-tokens vs API keys.

    If *timeout* is provided it overrides the default 900s read timeout.  The
@@ -508,15 +372,8 @@ def build_anthropic_client(
    Anthropic-compatible providers respect the same knob as OpenAI-wire
    providers.

-    ``drop_context_1m_beta=True`` strips ``context-1m-2025-08-07`` from the
-    client-level ``anthropic-beta`` header. Used by the reactive OAuth retry
-    path in ``run_agent.py`` when a subscription rejects the beta; leave at
-    its default on fresh clients so 1M-capable subscriptions keep the
-    capability.
-
    Returns an anthropic.Anthropic instance.
    """
-    _anthropic_sdk = _get_anthropic_sdk()
    if _anthropic_sdk is None:
        raise ImportError(
            "The 'anthropic' package is required for the Anthropic provider. "
@@ -543,10 +400,7 @@ def build_anthropic_client(
            kwargs["default_query"] = {"api-version": "2025-04-15"}
        else:
            kwargs["base_url"] = normalized_base_url
-    common_betas = _common_betas_for_base_url(
-        normalized_base_url,
-        drop_context_1m_beta=drop_context_1m_beta,
-    )
+    common_betas = _common_betas_for_base_url(normalized_base_url)

    if _is_kimi_coding_endpoint(base_url):
        # Kimi's /coding endpoint requires User-Agent: claude-code/0.1.0
@@ -602,16 +456,8 @@ def build_anthropic_bedrock_client(region: str):
    Claude feature parity: prompt caching, thinking budgets, adaptive
    thinking, fast mode — features not available via the Converse API.

-    Attaches the common Anthropic beta headers as client-level defaults so
-    that Bedrock-hosted Claude models get the same enhanced features as
-    native Anthropic. The ``context-1m-2025-08-07`` beta in particular
-    unlocks the 1M context window for Opus 4.6/4.7 on Bedrock — without
-    it, Bedrock caps these models at 200K even though the Anthropic API
-    serves them with 1M natively.
-
    Auth uses the boto3 default credential chain (IAM roles, SSO, env vars).
    """
-    _anthropic_sdk = _get_anthropic_sdk()
    if _anthropic_sdk is None:
        raise ImportError(
            "The 'anthropic' package is required for the Bedrock provider. "
@@ -627,7 +473,6 @@ def build_anthropic_bedrock_client(region: str):
    return _anthropic_sdk.AnthropicBedrock(
        aws_region=region,
        timeout=Timeout(timeout=900.0, connect=10.0),
-        default_headers={"anthropic-beta": ",".join(_COMMON_BETAS)},
    )


@@ -643,6 +488,9 @@ def _read_claude_code_credentials_from_keychain() -> Optional[Dict[str, Any]]:

    Returns dict with {accessToken, refreshToken?, expiresAt?} or None.
    """
+    import platform
+    import subprocess
+
    if platform.system() != "Darwin":
        return None

@@ -1187,12 +1035,9 @@ def normalize_model_name(model: str, preserve_dots: bool = False) -> str:
        # These must not be converted to hyphens.  See issue #12295.
        if _is_bedrock_model_id(model):
            return model
-        # Only convert dots to hyphens for Anthropic/Claude models.
-        # Non-Anthropic models (gpt-5.4, gemini-2.5, etc.) use dots
-        # as part of their canonical names.  See issue #17171.
-        _lower = model.lower()
-        if _lower.startswith("claude-") or _lower.startswith("anthropic/"):
-            model = model.replace(".", "-")
+        # OpenRouter uses dots for version separators (claude-opus-4.6),
+        # Anthropic uses hyphens (claude-opus-4-6). Convert dots to hyphens.
+        model = model.replace(".", "-")
    return model


@@ -1209,33 +1054,6 @@ def _sanitize_tool_id(tool_id: str) -> str:
    return sanitized or "tool_0"


-def _normalize_tool_input_schema(schema: Any) -> Dict[str, Any]:
-    """Normalize tool schemas before sending them to Anthropic.
-
-    Anthropic's tool schema validator rejects nullable unions such as
-    ``anyOf: [{"type": "string"}, {"type": "null"}]`` that Pydantic/MCP
-    commonly emits for optional fields. Tool optionality is represented by
-    the parent ``required`` array, so we delegate to the shared
-    ``strip_nullable_unions`` helper to collapse nullable unions to the
-    non-null branch while preserving metadata like description/default.
-
-    ``keep_nullable_hint=False`` because the Anthropic validator does not
-    recognize the OpenAPI-style ``nullable: true`` extension and strict
-    schema-to-grammar converters may reject unknown keywords.
-    """
-    if not schema:
-        return {"type": "object", "properties": {}}
-
-    from tools.schema_sanitizer import strip_nullable_unions
-
-    normalized = strip_nullable_unions(schema, keep_nullable_hint=False)
-    if not isinstance(normalized, dict):
-        return {"type": "object", "properties": {}}
-    if normalized.get("type") == "object" and not isinstance(normalized.get("properties"), dict):
-        normalized = {**normalized, "properties": {}}
-    return normalized
-
-
 def convert_tools_to_anthropic(tools: List[Dict]) -> List[Dict]:
    """Convert OpenAI tool definitions to Anthropic format."""
    if not tools:
@@ -1246,9 +1064,7 @@ def convert_tools_to_anthropic(tools: List[Dict]) -> List[Dict]:
        result.append({
            "name": fn.get("name", ""),
            "description": fn.get("description", ""),
-            "input_schema": _normalize_tool_input_schema(
-                fn.get("parameters", {"type": "object", "properties": {}})
-            ),
+            "input_schema": fn.get("parameters", {"type": "object", "properties": {}}),
        })
    return result

@@ -1379,7 +1195,6 @@ def _convert_content_to_anthropic(content: Any) -> Any:
 def convert_messages_to_anthropic(
    messages: List[Dict],
    base_url: str | None = None,
-    model: str | None = None,
 ) -> Tuple[Optional[Any], List[Dict]]:
    """Convert OpenAI-format messages to Anthropic format.

@@ -1391,12 +1206,6 @@ def convert_messages_to_anthropic(
    endpoint, all thinking block signatures are stripped.  Signatures are
    Anthropic-proprietary — third-party endpoints cannot validate them and will
    reject them with HTTP 400 "Invalid signature in thinking block".
-
-    When *model* is provided and matches the Kimi / Moonshot family (or
-    *base_url* is a Kimi / Moonshot host), unsigned thinking blocks
-    synthesised from ``reasoning_content`` are preserved on replayed
-    assistant tool-call messages — Kimi requires the field to exist, even
-    if empty.
    """
    system = None
    result = []
@@ -1625,16 +1434,7 @@ def convert_messages_to_anthropic(
    #    cache markers can interfere with signature validation.
    _THINKING_TYPES = frozenset(("thinking", "redacted_thinking"))
    _is_third_party = _is_third_party_anthropic_endpoint(base_url)
-    # Kimi /coding and DeepSeek /anthropic share a contract: both speak the
-    # Anthropic Messages protocol upstream but require that thinking blocks
-    # synthesised from reasoning_content round-trip on subsequent turns when
-    # thinking is enabled.  Signed Anthropic blocks still have to be stripped
-    # (neither endpoint can validate Anthropic's signatures); unsigned blocks
-    # are preserved.  See hermes-agent#13848 (Kimi) and #16748 (DeepSeek).
-    _preserve_unsigned_thinking = (
-        _is_kimi_family_endpoint(base_url, model)
-        or _is_deepseek_anthropic_endpoint(base_url)
-    )
+    _is_kimi = _is_kimi_coding_endpoint(base_url)

    last_assistant_idx = None
    for i in range(len(result) - 1, -1, -1):
@@ -1646,22 +1446,22 @@ def convert_messages_to_anthropic(
        if m.get("role") != "assistant" or not isinstance(m.get("content"), list):
            continue

-        if _preserve_unsigned_thinking:
-            # Kimi's /coding and DeepSeek's /anthropic endpoints both enable
-            # thinking server-side and require unsigned thinking blocks on
-            # replayed assistant tool-call messages.  Strip signed Anthropic
-            # blocks (neither upstream can validate Anthropic signatures) but
-            # preserve the unsigned ones we synthesised from reasoning_content.
+        if _is_kimi:
+            # Kimi's /coding endpoint enables thinking server-side and
+            # requires unsigned thinking blocks on replayed assistant
+            # tool-call messages.  Strip signed Anthropic blocks (Kimi
+            # can't validate signatures) but preserve the unsigned ones
+            # we synthesised from reasoning_content above.
            new_content = []
            for b in m["content"]:
                if not isinstance(b, dict) or b.get("type") not in _THINKING_TYPES:
                    new_content.append(b)
                    continue
                if b.get("signature") or b.get("data"):
-                    # Anthropic-signed block — upstream can't validate, strip
+                    # Anthropic-signed block — Kimi can't validate, strip
                    continue
                # Unsigned thinking (synthesised from reasoning_content) —
-                # keep it: the upstream needs it for message-history validation.
+                # keep it: Kimi needs it for message-history validation.
                new_content.append(b)
            m["content"] = new_content or [{"type": "text", "text": "(empty)"}]
        elif _is_third_party or idx != last_assistant_idx:
@@ -1718,7 +1518,6 @@ def build_anthropic_kwargs(
    context_length: Optional[int] = None,
    base_url: str | None = None,
    fast_mode: bool = False,
-    drop_context_1m_beta: bool = False,
 ) -> Dict[str, Any]:
    """Build kwargs for anthropic.messages.create().

@@ -1758,9 +1557,7 @@ def build_anthropic_kwargs(
    Currently only supported on native Anthropic endpoints (not third-party
    compatible ones).
    """
-    system, anthropic_messages = convert_messages_to_anthropic(
-        messages, base_url=base_url, model=model
-    )
+    system, anthropic_messages = convert_messages_to_anthropic(messages, base_url=base_url)
    anthropic_tools = convert_tools_to_anthropic(tools) if tools else []

    model = normalize_model_name(model, preserve_dots=preserve_dots)
@@ -1866,7 +1663,7 @@ def build_anthropic_kwargs(
    # silently hides reasoning text that Hermes surfaces in its CLI. We
    # request "summarized" so the reasoning blocks stay populated — matching
    # 4.6 behavior and preserving the activity-feed UX during long tool runs.
-    _is_kimi_coding = _is_kimi_family_endpoint(base_url, model)
+    _is_kimi_coding = _is_kimi_coding_endpoint(base_url)
    if reasoning_config and isinstance(reasoning_config, dict) and not _is_kimi_coding:
        if reasoning_config.get("enabled") is not False and "haiku" not in model.lower():
            effort = str(reasoning_config.get("effort", "medium")).lower()
@@ -1907,10 +1704,7 @@ def build_anthropic_kwargs(
        kwargs.setdefault("extra_body", {})["speed"] = "fast"
        # Build extra_headers with ALL applicable betas (the per-request
        # extra_headers override the client-level anthropic-beta header).
-        betas = list(_common_betas_for_base_url(
-            base_url,
-            drop_context_1m_beta=drop_context_1m_beta,
-        ))
+        betas = list(_common_betas_for_base_url(base_url))
        if is_oauth:
            betas.extend(_OAUTH_ONLY_BETAS)
        betas.append(_FAST_MODE_BETA)
@@ -5,11 +5,11 @@ session search, web extraction, vision analysis, browser vision) picks up
 the best available backend without duplicating fallback logic.

 Resolution order for text tasks (auto mode):
-  1. User's main provider + main model (used regardless of provider type —
-     aggregators, direct API-key providers, native Anthropic, Codex, etc.)
-  2. OpenRouter  (OPENROUTER_API_KEY)
-  3. Nous Portal (~/.hermes/auth.json active provider)
-  4. Custom endpoint (config.yaml model.base_url + OPENAI_API_KEY)
+  1. OpenRouter  (OPENROUTER_API_KEY)
+  2. Nous Portal (~/.hermes/auth.json active provider)
+  3. Custom endpoint (config.yaml model.base_url + OPENAI_API_KEY)
+  4. Codex OAuth (Responses API via chatgpt.com with gpt-5.3-codex,
+     wrapped to look like a chat.completions client)
  5. Native Anthropic
  6. Direct API-key providers (z.ai/GLM, Kimi/Moonshot, MiniMax, MiniMax-CN)
  7. None
@@ -18,16 +18,10 @@ Resolution order for vision/multimodal tasks (auto mode):
  1. Selected main provider, if it is one of the supported vision backends below
  2. OpenRouter
  3. Nous Portal
-  4. Native Anthropic
-  5. Custom endpoint (for local vision models: Qwen-VL, LLaVA, Pixtral, etc.)
-  6. None
-
-Codex OAuth (ChatGPT-account auth) is intentionally NOT in either
-fallback chain: OpenAI gates this endpoint behind an undocumented,
-shifting model allow-list, so "just try Codex with a hardcoded model"
-rots on its own.  Codex is used only when the user's main provider *is*
-openai-codex (Step 1 above) or when a caller explicitly requests it with
-a model (auxiliary.<task>.provider + auxiliary.<task>.model).
+  4. Codex OAuth (gpt-5.3-codex supports vision via Responses API)
+  5. Native Anthropic
+  6. Custom endpoint (for local vision models: Qwen-VL, LLaVA, Pixtral, etc.)
+  7. None

 Per-task overrides are configured in config.yaml under the ``auxiliary:`` section
 (e.g. ``auxiliary.vision.provider``, ``auxiliary.compression.model``).
@@ -47,57 +41,10 @@ import threading
 import time
 from pathlib import Path  # noqa: F401 — used by test mocks
 from types import SimpleNamespace
-from typing import Any, Dict, List, Optional, Tuple, TYPE_CHECKING
+from typing import Any, Dict, List, Optional, Tuple
 from urllib.parse import urlparse, parse_qs, urlunparse

-# NOTE: `from openai import OpenAI` is deliberately NOT at module top — the
-# openai SDK pulls a large type tree (~240 ms cold, including responses/*,
-# graders/*). We expose `OpenAI` here as a thin proxy that imports the SDK on
-# first call and forwards, so:
-#   (a) the 15+ in-module `OpenAI(...)` construction sites work unchanged
-#       (Python's function-scope name lookup resolves `OpenAI` to the proxy
-#       object bound in module globals here, without triggering any import);
-#   (b) external code can still do `auxiliary_client.OpenAI` or
-#       `patch("agent.auxiliary_client.OpenAI", ...)` — tests see the proxy,
-#       and patch replaces the module attribute as usual;
-#   (c) `OpenAI` as a type annotation resolves at runtime to the proxy class
-#       (which is harmless — annotations aren't type-checked at runtime).
-# See tests/agent/test_auxiliary_client.py for patch patterns this supports.
-if TYPE_CHECKING:
-    from openai import OpenAI  # noqa: F401 — type hints only
-
-_OPENAI_CLS_CACHE: Optional[type] = None
-
-
-def _load_openai_cls() -> type:
-    """Import and cache ``openai.OpenAI``."""
-    global _OPENAI_CLS_CACHE
-    if _OPENAI_CLS_CACHE is None:
-        from openai import OpenAI as _cls
-        _OPENAI_CLS_CACHE = _cls
-    return _OPENAI_CLS_CACHE
-
-
-class _OpenAIProxy:
-    """Module-level proxy that looks like the ``openai.OpenAI`` class.
-
-    Forwards ``OpenAI(...)`` calls and ``isinstance(x, OpenAI)`` checks to the
-    real SDK class, importing the SDK lazily on first use.
-    """
-
-    __slots__ = ()
-
-    def __call__(self, *args, **kwargs):
-        return _load_openai_cls()(*args, **kwargs)
-
-    def __instancecheck__(self, obj):
-        return isinstance(obj, _load_openai_cls())
-
-    def __repr__(self):
-        return "<lazy openai.OpenAI proxy>"
-
-
-OpenAI = _OpenAIProxy()  # module-level name, resolves lazily on call/isinstance
+from openai import OpenAI

 from agent.credential_pool import load_pool
 from hermes_cli.config import get_hermes_home
@@ -107,14 +54,6 @@ from utils import base_url_host_matches, base_url_hostname, normalize_proxy_env_
 logger = logging.getLogger(__name__)


-def _safe_isinstance(obj: Any, maybe_type: Any) -> bool:
-    """Return False instead of raising when a patched symbol is not a type."""
-    try:
-        return isinstance(obj, maybe_type)
-    except TypeError:
-        return False
-
-
 def _extract_url_query_params(url: str):
    """Extract query params from URL, return (clean_url, default_query dict or None)."""
    parsed = urlparse(url)
@@ -155,10 +94,6 @@ _PROVIDER_ALIASES = {
    "github-models": "copilot",
    "github-copilot-acp": "copilot-acp",
    "copilot-acp-agent": "copilot-acp",
-    "tencent": "tencent-tokenhub",
-    "tokenhub": "tencent-tokenhub",
-    "tencent-cloud": "tencent-tokenhub",
-    "tencentmaas": "tencent-tokenhub",
 }


@@ -216,25 +151,31 @@ def _fixed_temperature_for_model(
    return None

 # Default auxiliary models for direct API-key providers (cheap/fast for side tasks)
-_API_KEY_PROVIDER_AUX_MODELS: Dict[str, str] = {
-    "gemini": "gemini-3-flash-preview",
-    "zai": "glm-4.5-flash",
-    "kimi-coding": "kimi-k2-turbo-preview",
-    "stepfun": "step-3.5-flash",
-    "kimi-coding-cn": "kimi-k2-turbo-preview",
-    "gmi": "google/gemini-3.1-flash-lite-preview",
-    "minimax": "MiniMax-M2.7",
-    "minimax-oauth": "MiniMax-M2.7-highspeed",
-    "minimax-cn": "MiniMax-M2.7",
+def _get_aux_model_for_provider(provider_id: str) -> str:
+    """Return the cheap auxiliary model for a provider.
+
+    Reads from ProviderProfile.default_aux_model first, falling back to the
+    legacy hardcoded dict for providers that predate the profiles system.
+    """
+    try:
+        from providers import get_provider_profile
+        _p = get_provider_profile(provider_id)
+        if _p and _p.default_aux_model:
+            return _p.default_aux_model
+    except Exception:
+        pass
+    return _API_KEY_PROVIDER_AUX_MODELS_FALLBACK.get(provider_id, "")
+
+
+# Fallback for providers not yet migrated to ProviderProfile.default_aux_model.
+# New providers should set default_aux_model on their profile instead.
+_API_KEY_PROVIDER_AUX_MODELS_FALLBACK: Dict[str, str] = {
    "anthropic": "claude-haiku-4-5-20251001",
-    "ai-gateway": "google/gemini-3-flash",
-    "opencode-zen": "gemini-3-flash",
-    "opencode-go": "glm-5",
-    "kilocode": "google/gemini-3-flash-preview",
-    "ollama-cloud": "nemotron-3-nano:30b",
-    "tencent-tokenhub": "hy3-preview",
 }

+# Legacy alias — callers that haven't been updated yet can still use this.
+_API_KEY_PROVIDER_AUX_MODELS: Dict[str, str] = _API_KEY_PROVIDER_AUX_MODELS_FALLBACK
+
 # Vision-specific model overrides for direct providers.
 # When the user's main provider has a dedicated vision/multimodal model that
 # differs from their main chat model, map it here.  The vision auto-detect
@@ -244,21 +185,6 @@ _PROVIDER_VISION_MODELS: Dict[str, str] = {
    "zai": "glm-5v-turbo",
 }

-# Providers whose endpoint does not accept image input, even though the
-# provider's broader ecosystem has vision models available elsewhere.  When
-# `auxiliary.vision.provider: auto` sees one of these as the main provider,
-# it must skip straight to the aggregator chain instead of returning a client
-# that will 404 on every vision request.
-#
-# kimi-coding / kimi-coding-cn: the Kimi Coding Plan routes through
-# api.kimi.com/coding (Anthropic Messages wire) which Kimi's own docs
-# describe as having no image_in capability. Vision lives on the separate
-# Kimi Platform (api.moonshot.ai, OpenAI-wire, pay-as-you-go).  See #17076.
-_PROVIDERS_WITHOUT_VISION: frozenset = frozenset({
-    "kimi-coding",
-    "kimi-coding-cn",
-})
-
 # OpenRouter app attribution headers
 _OR_HEADERS = {
    "HTTP-Referer": "https://hermes-agent.nousresearch.com",
@@ -291,14 +217,12 @@ _NOUS_DEFAULT_BASE_URL = "https://inference-api.nousresearch.com/v1"
 _ANTHROPIC_DEFAULT_BASE_URL = "https://api.anthropic.com"
 _AUTH_JSON_PATH = get_hermes_home() / "auth.json"

-# Codex OAuth endpoint used when a caller explicitly requests
-# provider="openai-codex".  There is deliberately no hardcoded default
-# model: the set of models OpenAI accepts on this endpoint for
-# ChatGPT-account auth is an undocumented, shifting allow-list, and
-# pinning one here has drifted silently twice (gpt-5.3-codex → gpt-5.2-codex
-# → gpt-5.4 over 6 weeks in early 2026).  Callers must pass the model
-# they want explicitly (from config.yaml model.model, auxiliary.<task>.model,
-# or the user's active Codex model selection).
+# Codex fallback: uses the Responses API (the only endpoint the Codex
+# OAuth token can access) with a fast model for auxiliary tasks.
+# ChatGPT-backed Codex accounts currently reject gpt-5.3-codex for these
+# auxiliary flows, while gpt-5.2-codex remains broadly available and supports
+# vision via Responses.
+_CODEX_AUX_MODEL = "gpt-5.2-codex"
 _CODEX_AUX_BASE_URL = "https://chatgpt.com/backend-api/codex"


@@ -355,13 +279,6 @@ def _to_openai_base_url(base_url: str) -> str:
        rewritten = url[: -len("/anthropic")] + "/v1"
        logger.debug("Auxiliary client: rewrote base URL %s → %s", url, rewritten)
        return rewritten
-    if "api.kimi.com" in url and url.endswith("/coding"):
-        # Kimi Code uses /coding/v1/messages for Anthropic SDK (appends /v1/messages)
-        # but /coding/v1/chat/completions for OpenAI SDK (appends /chat/completions)
-        # Without /v1 here, OpenAI SDK hits /coding/chat/completions — a 404.
-        rewritten = url + "/v1"
-        logger.debug("Auxiliary client: rewrote Kimi base URL %s → %s", url, rewritten)
-        return rewritten
    return url


@@ -496,33 +413,6 @@ class _CodexCompletionsAdapter:
        # Note: the Codex endpoint (chatgpt.com/backend-api/codex) does NOT
        # support max_output_tokens or temperature — omit to avoid 400 errors.

-        # Translate extra_body.reasoning (chat.completions shape) into the
-        # Responses API's top-level reasoning + include fields.  Mirrors
-        # agent/transports/codex.py::build_kwargs() so auxiliary callers
-        # that configure reasoning via auxiliary.<task>.extra_body get the
-        # same behavior as the main agent's Codex transport.
-        extra_body = kwargs.get("extra_body") or {}
-        if isinstance(extra_body, dict):
-            reasoning_cfg = extra_body.get("reasoning")
-            if isinstance(reasoning_cfg, dict):
-                if reasoning_cfg.get("enabled") is False:
-                    # Reasoning explicitly disabled — do not set reasoning
-                    # or include.  The Codex backend still thinks by
-                    # default, but we honor the caller's intent where the
-                    # API allows it.
-                    pass
-                else:
-                    effort = reasoning_cfg.get("effort", "medium")
-                    # Codex backend rejects "minimal"; clamp to "low" to
-                    # match the main-agent Codex transport behavior.
-                    if effort == "minimal":
-                        effort = "low"
-                    resp_kwargs["reasoning"] = {
-                        "effort": effort,
-                        "summary": "auto",
-                    }
-                    resp_kwargs["include"] = ["reasoning.encrypted_content"]
-
        # Tools support for auxiliary callers (e.g. skills_hub) that pass function schemas
        tools = kwargs.get("tools")
        if tools:
@@ -832,116 +722,6 @@ class AsyncAnthropicAuxiliaryClient:
        self.base_url = sync_wrapper.base_url


-def _endpoint_speaks_anthropic_messages(base_url: str) -> bool:
-    """True if the endpoint at ``base_url`` speaks the Anthropic Messages
-    protocol instead of OpenAI chat.completions.
-
-    Mirrors ``hermes_cli.runtime_provider._detect_api_mode_for_url`` so the
-    auxiliary client and the main agent stay in sync on transport selection.
-    Covers:
-
-    - Any URL ending in ``/anthropic`` (MiniMax, Zhipu GLM, LiteLLM proxies,
-      Anthropic-compatible gateways).
-    - ``api.kimi.com/coding`` (Kimi Coding Plan — the /coding route only
-      speaks Claude-Code's native Anthropic shape; ``chat.completions``
-      returns 404 on Anthropic-only model aliases like ``kimi-for-coding``).
-    - ``api.anthropic.com`` (native Anthropic).
-    """
-    normalized = (base_url or "").strip().lower().rstrip("/")
-    if not normalized:
-        return False
-    if normalized.endswith("/anthropic"):
-        return True
-    hostname = base_url_hostname(normalized)
-    if hostname == "api.anthropic.com":
-        return True
-    if hostname == "api.kimi.com" and "/coding" in normalized:
-        return True
-    return False
-
-
-def _maybe_wrap_anthropic(
-    client_obj: Any,
-    model: str,
-    api_key: str,
-    base_url: str,
-    api_mode: Optional[str] = None,
-) -> Any:
-    """Rewrap a plain OpenAI client in ``AnthropicAuxiliaryClient`` when
-    the endpoint actually speaks Anthropic Messages.
-
-    This is the single chokepoint for aux-client transport correction.
-    Runs at the end of every ``resolve_provider_client`` branch so that
-    api_key providers (Kimi Coding Plan), the ``custom`` endpoint, and
-    future /anthropic gateways all land on the right wire format
-    regardless of which branch built the client.
-
-    Returns ``client_obj`` unchanged when:
-
-    - It's already an Anthropic/Codex/Gemini/CopilotACP wrapper.
-    - The endpoint is an OpenAI-wire endpoint.
-    - ``api_mode`` is explicitly set to a non-Anthropic transport.
-    - The ``anthropic`` SDK is not installed (falls back to OpenAI wire).
-    """
-    # Already wrapped — don't double-wrap.
-    if _safe_isinstance(client_obj, AnthropicAuxiliaryClient):
-        return client_obj
-    # Other specialized adapters we should never re-dispatch.
-    if _safe_isinstance(client_obj, CodexAuxiliaryClient):
-        return client_obj
-    try:
-        from agent.gemini_native_adapter import GeminiNativeClient
-        if _safe_isinstance(client_obj, GeminiNativeClient):
-            return client_obj
-    except ImportError:
-        pass
-    try:
-        from agent.copilot_acp_client import CopilotACPClient
-        if _safe_isinstance(client_obj, CopilotACPClient):
-            return client_obj
-    except ImportError:
-        pass
-
-    # Explicit non-anthropic api_mode wins over URL heuristics.
-    if api_mode and api_mode != "anthropic_messages":
-        return client_obj
-
-    should_wrap = (
-        api_mode == "anthropic_messages"
-        or _endpoint_speaks_anthropic_messages(base_url)
-    )
-    if not should_wrap:
-        return client_obj
-
-    try:
-        from agent.anthropic_adapter import build_anthropic_client
-    except ImportError:
-        logger.warning(
-            "Endpoint %s speaks Anthropic Messages but the anthropic SDK is "
-            "not installed — falling back to OpenAI-wire (will likely 404).",
-            base_url,
-        )
-        return client_obj
-
-    try:
-        real_client = build_anthropic_client(api_key, base_url)
-    except Exception as exc:
-        logger.warning(
-            "Failed to build Anthropic client for %s (%s) — falling back to "
-            "OpenAI-wire client.", base_url, exc,
-        )
-        return client_obj
-
-    logger.debug(
-        "Auxiliary transport: wrapping client in AnthropicAuxiliaryClient "
-        "(model=%s, base_url=%s, api_mode=%s)",
-        model, base_url[:60] if base_url else "", api_mode or "auto-detected",
-    )
-    return AnthropicAuxiliaryClient(
-        real_client, model, api_key, base_url, is_oauth=False,
-    )
-
-
 def _read_nous_auth() -> Optional[dict]:
    """Read and validate ~/.hermes/auth.json for an active Nous provider.

@@ -1093,9 +873,10 @@ def _resolve_api_key_provider() -> Tuple[Optional[OpenAI], Optional[str]]:
            if not api_key:
                continue

-            raw_base_url = _pool_runtime_base_url(entry, pconfig.inference_base_url) or pconfig.inference_base_url
-            base_url = _to_openai_base_url(raw_base_url)
-            model = _API_KEY_PROVIDER_AUX_MODELS.get(provider_id)
+            base_url = _to_openai_base_url(
+                _pool_runtime_base_url(entry, pconfig.inference_base_url) or pconfig.inference_base_url
+            )
+            model = _get_aux_model_for_provider(provider_id) or None
            if model is None:
                continue  # skip provider if we don't know a valid aux model
            logger.debug("Auxiliary text client: %s (%s) via pool", pconfig.name, model)
@@ -1104,25 +885,32 @@ def _resolve_api_key_provider() -> Tuple[Optional[OpenAI], Optional[str]]:

                if is_native_gemini_base_url(base_url):
                    return GeminiNativeClient(api_key=api_key, base_url=base_url), model
-            extra = {}
-            if base_url_host_matches(base_url, "api.kimi.com"):
-                extra["default_headers"] = {"User-Agent": "claude-code/0.1.0"}
-            elif base_url_host_matches(base_url, "api.githubcopilot.com"):
-                from hermes_cli.models import copilot_default_headers
+        extra = {}
+        if base_url_host_matches(base_url, "api.kimi.com"):
+            extra["default_headers"] = {"User-Agent": "claude-code/0.1.0"}
+        elif base_url_host_matches(base_url, "api.githubcopilot.com"):
+            from hermes_cli.models import copilot_default_headers

-                extra["default_headers"] = copilot_default_headers()
-            _client = OpenAI(api_key=api_key, base_url=base_url, **extra)
-            _client = _maybe_wrap_anthropic(_client, model, api_key, raw_base_url)
-            return _client, model
+            extra["default_headers"] = copilot_default_headers()
+        else:
+            try:
+                from providers import get_provider_profile as _gpf_aux
+                _ph_aux = _gpf_aux(provider_id)
+                if _ph_aux and _ph_aux.default_headers:
+                    extra["default_headers"] = dict(_ph_aux.default_headers)
+            except Exception:
+                pass
+        return OpenAI(api_key=api_key, base_url=base_url, **extra), model

        creds = resolve_api_key_provider_credentials(provider_id)
        api_key = str(creds.get("api_key", "")).strip()
        if not api_key:
            continue

-        raw_base_url = str(creds.get("base_url", "")).strip().rstrip("/") or pconfig.inference_base_url
-        base_url = _to_openai_base_url(raw_base_url)
-        model = _API_KEY_PROVIDER_AUX_MODELS.get(provider_id)
+        base_url = _to_openai_base_url(
+            str(creds.get("base_url", "")).strip().rstrip("/") or pconfig.inference_base_url
+        )
+        model = _get_aux_model_for_provider(provider_id) or None
        if model is None:
            continue  # skip provider if we don't know a valid aux model
        logger.debug("Auxiliary text client: %s (%s)", pconfig.name, model)
@@ -1138,9 +926,15 @@ def _resolve_api_key_provider() -> Tuple[Optional[OpenAI], Optional[str]]:
            from hermes_cli.models import copilot_default_headers

            extra["default_headers"] = copilot_default_headers()
-        _client = OpenAI(api_key=api_key, base_url=base_url, **extra)
-        _client = _maybe_wrap_anthropic(_client, model, api_key, raw_base_url)
-        return _client, model
+        else:
+            try:
+                from providers import get_provider_profile as _gpf_aux2
+                _ph_aux2 = _gpf_aux2(provider_id)
+                if _ph_aux2 and _ph_aux2.default_headers:
+                    extra["default_headers"] = dict(_ph_aux2.default_headers)
+            except Exception:
+                pass
+        return OpenAI(api_key=api_key, base_url=base_url, **extra), model

    return None, None

@@ -1424,32 +1218,10 @@ def _try_custom_endpoint() -> Tuple[Optional[Any], Optional[str]]:
            AnthropicAuxiliaryClient(real_client, model, custom_key, custom_base, is_oauth=False),
            model,
        )
-    # URL-based anthropic detection for custom endpoints that didn't set
-    # api_mode explicitly (e.g. kimi.com/coding reached via custom config).
-    _fallback_client = OpenAI(api_key=custom_key, base_url=_clean_base, **_extra)
-    _fallback_client = _maybe_wrap_anthropic(
-        _fallback_client, model, custom_key, custom_base, custom_mode,
-    )
-    return _fallback_client, model
+    return OpenAI(api_key=custom_key, base_url=_clean_base, **_extra), model


-def _build_codex_client(model: str) -> Tuple[Optional[Any], Optional[str]]:
-    """Build a CodexAuxiliaryClient for an explicitly-requested model.
-
-    There is no auto-selection of the Codex model: the ChatGPT-account
-    Codex endpoint's accepted model list is an undocumented, drifting
-    allow-list, so any hardcoded default we pick goes stale.  The caller
-    is responsible for passing the model (e.g. from the user's own
-    ``model.model`` or ``auxiliary.<task>.model`` config).
-
-    Returns (None, None) when no Codex OAuth token is available.
-    """
-    if not model:
-        logger.warning(
-            "Auxiliary client: openai-codex requested without a model; "
-            "pass model explicitly (auxiliary.<task>.model in config.yaml)."
-        )
-        return None, None
+def _try_codex() -> Tuple[Optional[Any], Optional[str]]:
    pool_present, entry = _select_pool_entry("openai-codex")
    if pool_present:
        codex_token = _pool_runtime_api_key(entry)
@@ -1465,13 +1237,13 @@ def _build_codex_client(model: str) -> Tuple[Optional[Any], Optional[str]]:
        if not codex_token:
            return None, None
        base_url = _CODEX_AUX_BASE_URL
-    logger.debug("Auxiliary client: Codex OAuth (%s via Responses API)", model)
+    logger.debug("Auxiliary client: Codex OAuth (%s via Responses API)", _CODEX_AUX_MODEL)
    real_client = OpenAI(
        api_key=codex_token,
        base_url=base_url,
        default_headers=_codex_cloudflare_headers(codex_token),
    )
-    return CodexAuxiliaryClient(real_client, model), model
+    return CodexAuxiliaryClient(real_client, _CODEX_AUX_MODEL), _CODEX_AUX_MODEL


 def _try_anthropic() -> Tuple[Optional[Any], Optional[str]]:
@@ -1510,7 +1282,7 @@ def _try_anthropic() -> Tuple[Optional[Any], Optional[str]]:

    from agent.anthropic_adapter import _is_oauth_token
    is_oauth = _is_oauth_token(token)
-    model = _API_KEY_PROVIDER_AUX_MODELS.get("anthropic", "claude-haiku-4-5-20251001")
+    model = _get_aux_model_for_provider("anthropic") or "claude-haiku-4-5-20251001"
    logger.debug("Auxiliary client: Anthropic native (%s) at %s (oauth=%s)", model, base_url, is_oauth)
    try:
        real_client = build_anthropic_client(token, base_url)
@@ -1526,6 +1298,7 @@ _AUTO_PROVIDER_LABELS = {
    "_try_openrouter": "openrouter",
    "_try_nous": "nous",
    "_try_custom_endpoint": "local/custom",
+    "_try_codex": "openai-codex",
    "_resolve_api_key_provider": "api-key",
 }

@@ -1552,18 +1325,12 @@ def _get_provider_chain() -> List[tuple]:

    Built at call time (not module level) so that test patches
    on the ``_try_*`` functions are picked up correctly.
-
-    NOTE: ``openai-codex`` is deliberately NOT in this chain.  The
-    ChatGPT-account Codex endpoint only accepts a shifting, undocumented
-    allow-list of model IDs, so falling back to it with a guessed model
-    fails more often than not.  Codex is used only when the user's main
-    provider *is* openai-codex (see Step 1 of ``_resolve_auto``) or when
-    a caller explicitly requests it with a model.
    """
    return [
        ("openrouter", _try_openrouter),
        ("nous", _try_nous),
        ("local/custom", _try_custom_endpoint),
+        ("openai-codex", _try_codex),
        ("api-key", _resolve_api_key_provider),
    ]

@@ -1899,7 +1666,7 @@ def _to_async_client(sync_client, model: str, is_vision: bool = False):
    except ImportError:
        pass
    try:
-        from agent.copilot_acp_client import CopilotACPClient
+        from acp_adapter.copilot_client import CopilotACPClient
        if isinstance(sync_client, CopilotACPClient):
            return sync_client, model
    except ImportError:
@@ -1977,12 +1744,6 @@ def resolve_provider_client(
        (client, resolved_model) or (None, None) if auth is unavailable.
    """
    _validate_proxy_env_urls()
-    # Preserve the original provider name before alias normalization so a
-    # user-declared ``custom_providers`` entry whose name coincidentally
-    # matches a built-in alias (e.g. user names their custom provider "kimi"
-    # which aliases to "kimi-coding") is still reachable via the named-custom
-    # branch below.
-    original_provider = (provider or "").strip().lower()
    # Normalise aliases
    provider = _normalize_aux_provider(provider)

@@ -2008,20 +1769,8 @@ def resolve_provider_client(
                return True
        return False

-    def _wrap_if_needed(client_obj, final_model_str: str, base_url_str: str = "",
-                        api_key_str: str = ""):
-        """Wrap a plain OpenAI client in the correct transport adapter.
-
-        Handles two cases:
-        - ``CodexAuxiliaryClient`` when the endpoint needs the Responses API
-          (explicit ``api_mode=codex_responses`` or api.openai.com + codex
-          model name).
-        - ``AnthropicAuxiliaryClient`` when the endpoint speaks Anthropic
-          Messages (explicit ``api_mode=anthropic_messages``, any ``/anthropic``
-          suffix, ``api.kimi.com/coding``, or ``api.anthropic.com``).
-
-        Clients that are already specialized wrappers pass through unchanged.
-        """
+    def _wrap_if_needed(client_obj, final_model_str: str, base_url_str: str = ""):
+        """Wrap a plain OpenAI client in CodexAuxiliaryClient if Responses API is needed."""
        if _needs_codex_wrap(client_obj, base_url_str, final_model_str):
            logger.debug(
                "resolve_provider_client: wrapping client in CodexAuxiliaryClient "
@@ -2029,11 +1778,7 @@ def resolve_provider_client(
                api_mode or "auto-detected", final_model_str,
                base_url_str[:60] if base_url_str else "")
            return CodexAuxiliaryClient(client_obj, final_model_str)
-        # Anthropic-wire endpoints: rewrap plain OpenAI clients so
-        # chat.completions.create() is translated to /v1/messages.
-        return _maybe_wrap_anthropic(
-            client_obj, final_model_str, api_key_str, base_url_str, api_mode,
-        )
+        return client_obj

    # ── Auto: try all providers in priority order ────────────────────
    if provider == "auto":
@@ -2085,13 +1830,6 @@ def resolve_provider_client(

    # ── OpenAI Codex (OAuth → Responses API) ─────────────────────────
    if provider == "openai-codex":
-        if not model:
-            logger.warning(
-                "resolve_provider_client: openai-codex requested without a "
-                "model; pass model explicitly (e.g. model.model in config.yaml "
-                "or auxiliary.<task>.model for per-task aux routing)."
-            )
-            return None, None
        if raw_codex:
            # Return the raw OpenAI client for callers that need direct
            # access to responses.stream() (e.g., the main agent loop).
@@ -2100,7 +1838,7 @@ def resolve_provider_client(
                logger.warning("resolve_provider_client: openai-codex requested "
                               "but no Codex OAuth token found (run: hermes model)")
                return None, None
-            final_model = _normalize_resolved_model(model, provider)
+            final_model = _normalize_resolved_model(model or _CODEX_AUX_MODEL, provider)
            raw_client = OpenAI(
                api_key=codex_token,
                base_url=_CODEX_AUX_BASE_URL,
@@ -2108,7 +1846,7 @@ def resolve_provider_client(
            )
            return (raw_client, final_model)
        # Standard path: wrap in CodexAuxiliaryClient adapter
-        client, default = _build_codex_client(model)
+        client, default = _try_codex()
        if client is None:
            logger.warning("resolve_provider_client: openai-codex requested "
                           "but no Codex OAuth token found (run: hermes model)")
@@ -2120,7 +1858,7 @@ def resolve_provider_client(
    # ── Custom endpoint (OPENAI_BASE_URL + OPENAI_API_KEY) ───────────
    if provider == "custom":
        if explicit_base_url:
-            custom_base = _to_openai_base_url(explicit_base_url).strip()
+            custom_base = explicit_base_url.strip()
            custom_key = (
                (explicit_api_key or "").strip()
                or os.getenv("OPENAI_API_KEY", "").strip()
@@ -2133,7 +1871,7 @@ def resolve_provider_client(
                )
                return None, None
            final_model = _normalize_resolved_model(
-                model or (main_runtime.get("model") if main_runtime else None) or "gpt-4o-mini",
+                model or _read_main_model() or "gpt-4o-mini",
                provider,
            )
            extra = {}
@@ -2148,18 +1886,17 @@ def resolve_provider_client(
                    is_agent_turn=True, is_vision=is_vision
                )
            client = OpenAI(api_key=custom_key, base_url=_clean_base, **extra)
-            client = _wrap_if_needed(client, final_model, custom_base, custom_key)
+            client = _wrap_if_needed(client, final_model, custom_base)
            return (_to_async_client(client, final_model, is_vision=is_vision) if async_mode
                    else (client, final_model))
-        # Try custom first, then API-key providers (Codex excluded here:
-        # falling through to Codex with no model is a stale-constant trap).
-        for try_fn in (_try_custom_endpoint, _resolve_api_key_provider):
+        # Try custom first, then codex, then API-key providers
+        for try_fn in (_try_custom_endpoint, _try_codex,
+                       _resolve_api_key_provider):
            client, default = try_fn()
            if client is not None:
                final_model = _normalize_resolved_model(model or default, provider)
                _cbase = str(getattr(client, "base_url", "") or "")
-                _ckey = str(getattr(client, "api_key", "") or "")
-                client = _wrap_if_needed(client, final_model, _cbase, _ckey)
+                client = _wrap_if_needed(client, final_model, _cbase)
                return (_to_async_client(client, final_model, is_vision=is_vision) if async_mode
                        else (client, final_model))
        logger.warning("resolve_provider_client: custom/main requested "
@@ -2169,18 +1906,7 @@ def resolve_provider_client(
    # ── Named custom providers (config.yaml providers dict / custom_providers list) ───
    try:
        from hermes_cli.runtime_provider import _get_named_custom_provider
-        # When the raw requested name is an alias (``kimi`` → ``kimi-coding``)
-        # and the user defined a ``custom_providers`` entry under that alias
-        # name, the custom entry is the intended target — the built-in alias
-        # rewriting would otherwise hijack the request.  Only preferred when
-        # the raw name is an alias (not a canonical provider name) so custom
-        # entries that coincidentally match a canonical provider (e.g. ``nous``)
-        # still defer to the built-in per `_get_named_custom_provider`'s guard.
-        custom_entry = None
-        if original_provider and original_provider != provider:
-            custom_entry = _get_named_custom_provider(original_provider)
-        if custom_entry is None:
-            custom_entry = _get_named_custom_provider(provider)
+        custom_entry = _get_named_custom_provider(provider)
        if custom_entry:
            custom_base = custom_entry.get("base_url", "").strip()
            custom_key = custom_entry.get("api_key", "").strip()
@@ -2193,24 +1919,10 @@ def resolve_provider_client(
            entry_api_mode = (api_mode or custom_entry.get("api_mode") or "").strip()
            if custom_base:
                final_model = _normalize_resolved_model(
-                    model
-                    or custom_entry.get("model")
-                    or (main_runtime.get("model") if main_runtime else None)
-                    or _read_main_model()
-                    or "gpt-4o-mini",
+                    model or custom_entry.get("model") or _read_main_model() or "gpt-4o-mini",
                    provider,
                )
-                # anthropic_messages talks to the /anthropic surface directly;
-                # OpenAI-wire paths (chat_completions / codex_responses) need the
-                # /v1 equivalent.  Rewrite only on the OpenAI-wire path so the
-                # Anthropic fallback SDK still sees the original URL.
-                if entry_api_mode == "anthropic_messages":
-                    openai_base = custom_base
-                    raw_base_for_wrap = custom_base
-                else:
-                    openai_base = _to_openai_base_url(custom_base)
-                    raw_base_for_wrap = custom_base
-                _clean_base2, _dq2 = _extract_url_query_params(openai_base)
+                _clean_base2, _dq2 = _extract_url_query_params(custom_base)
                _extra2 = {"default_query": _dq2} if _dq2 else {}
                logger.debug(
                    "resolve_provider_client: named custom provider %r (%s, api_mode=%s)",
@@ -2229,12 +1941,7 @@ def resolve_provider_client(
                            "installed — falling back to OpenAI-wire.",
                            provider,
                        )
-                        # Fallback went OpenAI-wire after all — redo the query
-                        # extraction against the rewritten /v1 URL.
-                        _fallback_base = _to_openai_base_url(custom_base)
-                        _fb_clean, _fb_dq = _extract_url_query_params(_fallback_base)
-                        _fb_extra = {"default_query": _fb_dq} if _fb_dq else {}
-                        client = OpenAI(api_key=custom_key, base_url=_fb_clean, **_fb_extra)
+                        client = OpenAI(api_key=custom_key, base_url=_clean_base2, **_extra2)
                        return (_to_async_client(client, final_model, is_vision=is_vision) if async_mode
                                else (client, final_model))
                    sync_anthropic = AnthropicAuxiliaryClient(
@@ -2253,7 +1960,7 @@ def resolve_provider_client(
                ):
                    client = CodexAuxiliaryClient(client, final_model)
                else:
-                    client = _wrap_if_needed(client, final_model, raw_base_for_wrap, custom_key)
+                    client = _wrap_if_needed(client, final_model, custom_base)
                return (_to_async_client(client, final_model, is_vision=is_vision) if async_mode
                        else (client, final_model))
            logger.warning(
@@ -2290,12 +1997,6 @@ def resolve_provider_client(

        creds = resolve_api_key_provider_credentials(provider)
        api_key = str(creds.get("api_key", "")).strip()
-        # Honour an explicit api_key override (e.g. from a fallback_model entry
-        # or a custom_providers entry) so callers that pass an explicit
-        # credential can authenticate against endpoints where no built-in
-        # credential is registered for this provider alias.
-        if explicit_api_key:
-            api_key = explicit_api_key.strip() or api_key
        if not api_key:
            tried_sources = list(pconfig.api_key_env_vars)
            if provider == "copilot":
@@ -2305,15 +2006,11 @@ def resolve_provider_client(
                         provider, ", ".join(tried_sources))
            return None, None

-        raw_base_url = str(creds.get("base_url", "")).strip().rstrip("/") or pconfig.inference_base_url
-        base_url = _to_openai_base_url(raw_base_url)
-        # Honour an explicit base_url override from the caller — used when a
-        # fallback_model entry (or custom_providers lookup) routes through a
-        # built-in provider name but targets a user-specified endpoint.
-        if explicit_base_url:
-            base_url = _to_openai_base_url(explicit_base_url.strip().rstrip("/"))
+        base_url = _to_openai_base_url(
+            str(creds.get("base_url", "")).strip().rstrip("/") or pconfig.inference_base_url
+        )

-        default_model = _API_KEY_PROVIDER_AUX_MODELS.get(provider, "")
+        default_model = _get_aux_model_for_provider(provider)
        final_model = _normalize_resolved_model(model or default_model, provider)

        if provider == "gemini":
@@ -2356,11 +2053,8 @@ def resolve_provider_client(

        # Honor api_mode for any API-key provider (e.g. direct OpenAI with
        # codex-family models).  The copilot-specific wrapping above handles
-        # copilot; this covers the general case (#6800).  Also rewraps
-        # Anthropic-wire endpoints (Kimi Coding Plan api.kimi.com/coding,
-        # /anthropic-suffixed gateways) so named providers like kimi-coding
-        # land on the right transport without needing per-provider branches.
-        client = _wrap_if_needed(client, final_model, raw_base_url, api_key)
+        # copilot; this covers the general case (#6800).
+        client = _wrap_if_needed(client, final_model, base_url)

        logger.debug("resolve_provider_client: %s (%s)", provider, final_model)
        return (_to_async_client(client, final_model, is_vision=is_vision) if async_mode
@@ -2368,12 +2062,7 @@ def resolve_provider_client(

    if pconfig.auth_type == "external_process":
        creds = resolve_external_process_provider_credentials(provider)
-        final_model = _normalize_resolved_model(
-            model
-            or (main_runtime.get("model") if main_runtime else None)
-            or _read_main_model(),
-            provider,
-        )
+        final_model = _normalize_resolved_model(model or _read_main_model(), provider)
        if provider == "copilot-acp":
            api_key = str(creds.get("api_key", "")).strip()
            base_url = str(creds.get("base_url", "")).strip()
@@ -2391,7 +2080,7 @@ def resolve_provider_client(
                    "process credentials are incomplete"
                )
                return None, None
-            from agent.copilot_acp_client import CopilotACPClient
+            from acp_adapter.copilot_client import CopilotACPClient

            client = CopilotACPClient(
                api_key=api_key,
@@ -2523,10 +2212,7 @@ def _resolve_strict_vision_backend(
    if provider == "nous":
        return _try_nous(vision=True)
    if provider == "openai-codex":
-        # Route through resolve_provider_client so the caller's explicit
-        # model is used.  There is no safe default Codex model (shifting
-        # allow-list); callers must specify via auxiliary.<task>.model.
-        return resolve_provider_client("openai-codex", model, is_vision=True)
+        return _try_codex()
    if provider == "anthropic":
        return _try_anthropic()
    if provider == "custom":
@@ -2631,19 +2317,6 @@ def resolve_vision_provider_client(
                        main_provider, default_model or resolved_model or main_model,
                    )
                    return _finalize(main_provider, sync_client, default_model)
-            elif main_provider in _PROVIDERS_WITHOUT_VISION:
-                # Kimi Coding Plan's /coding endpoint (Anthropic Messages wire)
-                # does not accept image input — Kimi's own docs say "Current
-                # model does not support image input, switch to a model with
-                # image_in capability" and vision lives on the separate Kimi
-                # Platform (api.moonshot.ai). Skip the main provider and fall
-                # through to the aggregator chain instead of returning a
-                # client that will 404 on every vision request (#17076).
-                logger.debug(
-                    "Vision auto-detect: skipping main provider %s (no "
-                    "vision support) — falling through to aggregator chain",
-                    main_provider,
-                )
            else:
                rpc_client, rpc_model = resolve_provider_client(
                    main_provider, vision_model,
@@ -3125,7 +2798,7 @@ def _get_task_extra_body(task: str) -> Dict[str, Any]:

 # Providers that use Anthropic-compatible endpoints (via OpenAI SDK wrapper).
 # Their image content blocks must use Anthropic format, not OpenAI format.
-_ANTHROPIC_COMPAT_PROVIDERS = frozenset({"minimax", "minimax-oauth", "minimax-cn"})
+_ANTHROPIC_COMPAT_PROVIDERS = frozenset({"minimax", "minimax-cn"})


 def _is_anthropic_compat_endpoint(provider: str, base_url: str) -> bool:
@@ -291,52 +291,14 @@ def has_aws_credentials(env: Optional[Dict[str, str]] = None) -> bool:
 def resolve_bedrock_region(env: Optional[Dict[str, str]] = None) -> str:
    """Resolve the AWS region for Bedrock API calls.

-    Priority:
-      1. AWS_REGION env var
-      2. AWS_DEFAULT_REGION env var
-      3. boto3/botocore configured region (from ~/.aws/config or SSO profile)
-      4. us-east-1 (hard fallback)
-
-    The boto3 fallback is critical for EU/AP users who configure their region
-    in ~/.aws/config via a named profile rather than env vars — without it,
-    live model discovery would always return us.* profile IDs regardless of
-    the user's actual region.
+    Priority: AWS_REGION → AWS_DEFAULT_REGION → us-east-1 (fallback).
    """
    env = env if env is not None else os.environ
-    explicit = (
+    return (
        env.get("AWS_REGION", "").strip()
        or env.get("AWS_DEFAULT_REGION", "").strip()
+        or "us-east-1"
    )
-    if explicit:
-        return explicit
-    try:
-        import botocore.session
-        region = botocore.session.get_session().get_config_variable("region")
-        if region:
-            return region
-    except Exception:
-        pass
-    return "us-east-1"
-
-
-def bedrock_model_ids_or_none() -> Optional[List[str]]:
-    """Live-discover Bedrock model IDs for the active region.
-
-    Returns a list of model ID strings if discovery succeeds and yields
-    at least one model, or ``None`` on failure / empty result.  Callers
-    should fall back to the static curated list when ``None`` is returned.
-
-    This helper consolidates the discover → extract-ids → fallback
-    pattern that was previously duplicated across ``provider_model_ids``,
-    ``list_authenticated_providers`` section 2, and section 3.
-    """
-    try:
-        discovered = discover_bedrock_models(resolve_bedrock_region())
-        if discovered:
-            return [m["id"] for m in discovered]
-    except Exception:
-        pass
-    return None


 # ---------------------------------------------------------------------------
@@ -538,7 +538,7 @@ class ContextCompressor(ContextEngine):
            # Token-budget approach: walk backward accumulating tokens
            accumulated = 0
            boundary = len(result)
-            min_protect = min(protect_tail_count, len(result))
+            min_protect = min(protect_tail_count, len(result) - 1)
            for i in range(len(result) - 1, -1, -1):
                msg = result[i]
                raw_content = msg.get("content") or ""
@@ -992,8 +992,8 @@ The user has requested that this compaction PRIORITISE preserving all informatio
    def _get_tool_call_id(tc) -> str:
        """Extract the call ID from a tool_call entry (dict or SimpleNamespace)."""
        if isinstance(tc, dict):
-            return tc.get("call_id", "") or tc.get("id", "") or ""
-        return getattr(tc, "call_id", "") or getattr(tc, "id", "") or ""
+            return tc.get("id", "")
+        return getattr(tc, "id", "") or ""

    def _sanitize_tool_pairs(self, messages: List[Dict[str, Any]]) -> List[Dict[str, Any]]:
        """Fix orphaned tool_call / tool_result pairs after compression.
@@ -1,646 +1,8 @@
-"""OpenAI-compatible shim that forwards Hermes requests to `copilot --acp`.
+"""Backward-compatibility shim.

-This adapter lets Hermes treat the GitHub Copilot ACP server as a chat-style
-backend. Each request starts a short-lived ACP session, sends the formatted
-conversation as a single prompt, collects text chunks, and converts the result
-back into the minimal shape Hermes expects from an OpenAI client.
+CopilotACPClient has moved to acp_adapter/copilot_client.py.
+This module re-exports it so existing callers continue to work.
 """
+from acp_adapter.copilot_client import CopilotACPClient  # noqa: F401

-from __future__ import annotations
-
-import json
-import os
-import queue
-import re
-import shlex
-import subprocess
-import threading
-import time
-from collections import deque
-from pathlib import Path
-from types import SimpleNamespace
-from typing import Any
-
-from agent.file_safety import get_read_block_error, is_write_denied
-from agent.redact import redact_sensitive_text
-
-ACP_MARKER_BASE_URL = "acp://copilot"
-_DEFAULT_TIMEOUT_SECONDS = 900.0
-
-_TOOL_CALL_BLOCK_RE = re.compile(r"<tool_call>\s*(\{.*?\})\s*</tool_call>", re.DOTALL)
-_TOOL_CALL_JSON_RE = re.compile(r"\{\s*\"id\"\s*:\s*\"[^\"]+\"\s*,\s*\"type\"\s*:\s*\"function\"\s*,\s*\"function\"\s*:\s*\{.*?\}\s*\}", re.DOTALL)
-
-
-def _resolve_command() -> str:
-    return (
-        os.getenv("HERMES_COPILOT_ACP_COMMAND", "").strip()
-        or os.getenv("COPILOT_CLI_PATH", "").strip()
-        or "copilot"
-    )
-
-
-def _resolve_args() -> list[str]:
-    raw = os.getenv("HERMES_COPILOT_ACP_ARGS", "").strip()
-    if not raw:
-        return ["--acp", "--stdio"]
-    return shlex.split(raw)
-
-
-def _resolve_home_dir() -> str:
-    """Return a stable HOME for child ACP processes."""
-
-    try:
-        from hermes_constants import get_subprocess_home
-
-        profile_home = get_subprocess_home()
-        if profile_home:
-            return profile_home
-    except Exception:
-        pass
-
-    home = os.environ.get("HOME", "").strip()
-    if home:
-        return home
-
-    expanded = os.path.expanduser("~")
-    if expanded and expanded != "~":
-        return expanded
-
-    try:
-        import pwd
-
-        resolved = pwd.getpwuid(os.getuid()).pw_dir.strip()
-        if resolved:
-            return resolved
-    except Exception:
-        pass
-
-    # Last resort: /tmp (writable on any POSIX system). Avoids crashing the
-    # subprocess with no HOME; callers can set HERMES_HOME explicitly if they
-    # need a different writable dir.
-    return "/tmp"
-
-
-def _build_subprocess_env() -> dict[str, str]:
-    env = os.environ.copy()
-    env["HOME"] = _resolve_home_dir()
-    return env
-
-
-def _jsonrpc_error(message_id: Any, code: int, message: str) -> dict[str, Any]:
-    return {
-        "jsonrpc": "2.0",
-        "id": message_id,
-        "error": {
-            "code": code,
-            "message": message,
-        },
-    }
-
-
-def _permission_denied(message_id: Any) -> dict[str, Any]:
-    return {
-        "jsonrpc": "2.0",
-        "id": message_id,
-        "result": {
-            "outcome": {
-                "outcome": "cancelled",
-            }
-        },
-    }
-
-
-def _format_messages_as_prompt(
-    messages: list[dict[str, Any]],
-    model: str | None = None,
-    tools: list[dict[str, Any]] | None = None,
-    tool_choice: Any = None,
-) -> str:
-    sections: list[str] = [
-        "You are being used as the active ACP agent backend for Hermes.",
-        "Use ACP capabilities to complete tasks.",
-        "IMPORTANT: If you take an action with a tool, you MUST output tool calls using <tool_call>{...}</tool_call> blocks with JSON exactly in OpenAI function-call shape.",
-        "If no tool is needed, answer normally.",
-    ]
-    if model:
-        sections.append(f"Hermes requested model hint: {model}")
-
-    if isinstance(tools, list) and tools:
-        tool_specs: list[dict[str, Any]] = []
-        for t in tools:
-            if not isinstance(t, dict):
-                continue
-            fn = t.get("function") or {}
-            if not isinstance(fn, dict):
-                continue
-            name = fn.get("name")
-            if not isinstance(name, str) or not name.strip():
-                continue
-            tool_specs.append(
-                {
-                    "name": name.strip(),
-                    "description": fn.get("description", ""),
-                    "parameters": fn.get("parameters", {}),
-                }
-            )
-        if tool_specs:
-            sections.append(
-                "Available tools (OpenAI function schema). "
-                "When using a tool, emit ONLY <tool_call>{...}</tool_call> with one JSON object "
-                "containing id/type/function{name,arguments}. arguments must be a JSON string.\n"
-                + json.dumps(tool_specs, ensure_ascii=False)
-            )
-
-    if tool_choice is not None:
-        sections.append(f"Tool choice hint: {json.dumps(tool_choice, ensure_ascii=False)}")
-
-    transcript: list[str] = []
-    for message in messages:
-        if not isinstance(message, dict):
-            continue
-        role = str(message.get("role") or "unknown").strip().lower()
-        if role == "tool":
-            role = "tool"
-        elif role not in {"system", "user", "assistant"}:
-            role = "context"
-
-        content = message.get("content")
-        rendered = _render_message_content(content)
-        if not rendered:
-            continue
-
-        label = {
-            "system": "System",
-            "user": "User",
-            "assistant": "Assistant",
-            "tool": "Tool",
-            "context": "Context",
-        }.get(role, role.title())
-        transcript.append(f"{label}:\n{rendered}")
-
-    if transcript:
-        sections.append("Conversation transcript:\n\n" + "\n\n".join(transcript))
-
-    sections.append("Continue the conversation from the latest user request.")
-    return "\n\n".join(section.strip() for section in sections if section and section.strip())
-
-
-def _render_message_content(content: Any) -> str:
-    if content is None:
-        return ""
-    if isinstance(content, str):
-        return content.strip()
-    if isinstance(content, dict):
-        if "text" in content:
-            return str(content.get("text") or "").strip()
-        if "content" in content and isinstance(content.get("content"), str):
-            return str(content.get("content") or "").strip()
-        return json.dumps(content, ensure_ascii=True)
-    if isinstance(content, list):
-        parts: list[str] = []
-        for item in content:
-            if isinstance(item, str):
-                parts.append(item)
-            elif isinstance(item, dict):
-                text = item.get("text")
-                if isinstance(text, str) and text.strip():
-                    parts.append(text.strip())
-        return "\n".join(parts).strip()
-    return str(content).strip()
-
-
-def _extract_tool_calls_from_text(text: str) -> tuple[list[SimpleNamespace], str]:
-    if not isinstance(text, str) or not text.strip():
-        return [], ""
-
-    extracted: list[SimpleNamespace] = []
-    consumed_spans: list[tuple[int, int]] = []
-
-    def _try_add_tool_call(raw_json: str) -> None:
-        try:
-            obj = json.loads(raw_json)
-        except Exception:
-            return
-        if not isinstance(obj, dict):
-            return
-        fn = obj.get("function")
-        if not isinstance(fn, dict):
-            return
-        fn_name = fn.get("name")
-        if not isinstance(fn_name, str) or not fn_name.strip():
-            return
-        fn_args = fn.get("arguments", "{}")
-        if not isinstance(fn_args, str):
-            fn_args = json.dumps(fn_args, ensure_ascii=False)
-        call_id = obj.get("id")
-        if not isinstance(call_id, str) or not call_id.strip():
-            call_id = f"acp_call_{len(extracted)+1}"
-
-        extracted.append(
-            SimpleNamespace(
-                id=call_id,
-                call_id=call_id,
-                response_item_id=None,
-                type="function",
-                function=SimpleNamespace(name=fn_name.strip(), arguments=fn_args),
-            )
-        )
-
-    for m in _TOOL_CALL_BLOCK_RE.finditer(text):
-        raw = m.group(1)
-        _try_add_tool_call(raw)
-        consumed_spans.append((m.start(), m.end()))
-
-    # Only try bare-JSON fallback when no XML blocks were found.
-    if not extracted:
-        for m in _TOOL_CALL_JSON_RE.finditer(text):
-            raw = m.group(0)
-            _try_add_tool_call(raw)
-            consumed_spans.append((m.start(), m.end()))
-
-    if not consumed_spans:
-        return extracted, text.strip()
-
-    consumed_spans.sort()
-    merged: list[tuple[int, int]] = []
-    for start, end in consumed_spans:
-        if not merged or start > merged[-1][1]:
-            merged.append((start, end))
-        else:
-            merged[-1] = (merged[-1][0], max(merged[-1][1], end))
-
-    parts: list[str] = []
-    cursor = 0
-    for start, end in merged:
-        if cursor < start:
-            parts.append(text[cursor:start])
-        cursor = max(cursor, end)
-    if cursor < len(text):
-        parts.append(text[cursor:])
-
-    cleaned = "\n".join(p.strip() for p in parts if p and p.strip()).strip()
-    return extracted, cleaned
-
-
-
-def _ensure_path_within_cwd(path_text: str, cwd: str) -> Path:
-    candidate = Path(path_text)
-    if not candidate.is_absolute():
-        raise PermissionError("ACP file-system paths must be absolute.")
-    resolved = candidate.resolve()
-    root = Path(cwd).resolve()
-    try:
-        resolved.relative_to(root)
-    except ValueError as exc:
-        raise PermissionError(f"Path '{resolved}' is outside the session cwd '{root}'.") from exc
-    return resolved
-
-
-class _ACPChatCompletions:
-    def __init__(self, client: "CopilotACPClient"):
-        self._client = client
-
-    def create(self, **kwargs: Any) -> Any:
-        return self._client._create_chat_completion(**kwargs)
-
-
-class _ACPChatNamespace:
-    def __init__(self, client: "CopilotACPClient"):
-        self.completions = _ACPChatCompletions(client)
-
-
-class CopilotACPClient:
-    """Minimal OpenAI-client-compatible facade for Copilot ACP."""
-
-    def __init__(
-        self,
-        *,
-        api_key: str | None = None,
-        base_url: str | None = None,
-        default_headers: dict[str, str] | None = None,
-        acp_command: str | None = None,
-        acp_args: list[str] | None = None,
-        acp_cwd: str | None = None,
-        command: str | None = None,
-        args: list[str] | None = None,
-        **_: Any,
-    ):
-        self.api_key = api_key or "copilot-acp"
-        self.base_url = base_url or ACP_MARKER_BASE_URL
-        self._default_headers = dict(default_headers or {})
-        self._acp_command = acp_command or command or _resolve_command()
-        self._acp_args = list(acp_args or args or _resolve_args())
-        self._acp_cwd = str(Path(acp_cwd or os.getcwd()).resolve())
-        self.chat = _ACPChatNamespace(self)
-        self.is_closed = False
-        self._active_process: subprocess.Popen[str] | None = None
-        self._active_process_lock = threading.Lock()
-
-    def close(self) -> None:
-        proc: subprocess.Popen[str] | None
-        with self._active_process_lock:
-            proc = self._active_process
-            self._active_process = None
-        self.is_closed = True
-        if proc is None:
-            return
-        try:
-            proc.terminate()
-            proc.wait(timeout=2)
-        except Exception:
-            try:
-                proc.kill()
-            except Exception:
-                pass
-
-    def _create_chat_completion(
-        self,
-        *,
-        model: str | None = None,
-        messages: list[dict[str, Any]] | None = None,
-        timeout: float | None = None,
-        tools: list[dict[str, Any]] | None = None,
-        tool_choice: Any = None,
-        **_: Any,
-    ) -> Any:
-        prompt_text = _format_messages_as_prompt(
-            messages or [],
-            model=model,
-            tools=tools,
-            tool_choice=tool_choice,
-        )
-        # Normalise timeout: run_agent.py may pass an httpx.Timeout object
-        # (used natively by the OpenAI SDK) rather than a plain float.
-        if timeout is None:
-            _effective_timeout = _DEFAULT_TIMEOUT_SECONDS
-        elif isinstance(timeout, (int, float)):
-            _effective_timeout = float(timeout)
-        else:
-            # httpx.Timeout or similar — pick the largest component so the
-            # subprocess has enough wall-clock time for the full response.
-            _candidates = [
-                getattr(timeout, attr, None)
-                for attr in ("read", "write", "connect", "pool", "timeout")
-            ]
-            _numeric = [float(v) for v in _candidates if isinstance(v, (int, float))]
-            _effective_timeout = max(_numeric) if _numeric else _DEFAULT_TIMEOUT_SECONDS
-
-        response_text, reasoning_text = self._run_prompt(
-            prompt_text,
-            timeout_seconds=_effective_timeout,
-        )
-
-        tool_calls, cleaned_text = _extract_tool_calls_from_text(response_text)
-
-        usage = SimpleNamespace(
-            prompt_tokens=0,
-            completion_tokens=0,
-            total_tokens=0,
-            prompt_tokens_details=SimpleNamespace(cached_tokens=0),
-        )
-        assistant_message = SimpleNamespace(
-            content=cleaned_text,
-            tool_calls=tool_calls,
-            reasoning=reasoning_text or None,
-            reasoning_content=reasoning_text or None,
-            reasoning_details=None,
-        )
-        finish_reason = "tool_calls" if tool_calls else "stop"
-        choice = SimpleNamespace(message=assistant_message, finish_reason=finish_reason)
-        return SimpleNamespace(
-            choices=[choice],
-            usage=usage,
-            model=model or "copilot-acp",
-        )
-
-    def _run_prompt(self, prompt_text: str, *, timeout_seconds: float) -> tuple[str, str]:
-        try:
-            proc = subprocess.Popen(
-                [self._acp_command] + self._acp_args,
-                stdin=subprocess.PIPE,
-                stdout=subprocess.PIPE,
-                stderr=subprocess.PIPE,
-                text=True,
-                bufsize=1,
-                cwd=self._acp_cwd,
-                env=_build_subprocess_env(),
-            )
-        except FileNotFoundError as exc:
-            raise RuntimeError(
-                f"Could not start Copilot ACP command '{self._acp_command}'. "
-                "Install GitHub Copilot CLI or set HERMES_COPILOT_ACP_COMMAND/COPILOT_CLI_PATH."
-            ) from exc
-
-        if proc.stdin is None or proc.stdout is None:
-            proc.kill()
-            raise RuntimeError("Copilot ACP process did not expose stdin/stdout pipes.")
-
-        self.is_closed = False
-        with self._active_process_lock:
-            self._active_process = proc
-
-        inbox: queue.Queue[dict[str, Any]] = queue.Queue()
-        stderr_tail: deque[str] = deque(maxlen=40)
-
-        def _stdout_reader() -> None:
-            if proc.stdout is None:
-                return
-            for line in proc.stdout:
-                try:
-                    inbox.put(json.loads(line))
-                except Exception:
-                    inbox.put({"raw": line.rstrip("\n")})
-
-        def _stderr_reader() -> None:
-            if proc.stderr is None:
-                return
-            for line in proc.stderr:
-                stderr_tail.append(line.rstrip("\n"))
-
-        out_thread = threading.Thread(target=_stdout_reader, daemon=True)
-        err_thread = threading.Thread(target=_stderr_reader, daemon=True)
-        out_thread.start()
-        err_thread.start()
-
-        next_id = 0
-
-        def _request(method: str, params: dict[str, Any], *, text_parts: list[str] | None = None, reasoning_parts: list[str] | None = None) -> Any:
-            nonlocal next_id
-            next_id += 1
-            request_id = next_id
-            payload = {
-                "jsonrpc": "2.0",
-                "id": request_id,
-                "method": method,
-                "params": params,
-            }
-            proc.stdin.write(json.dumps(payload) + "\n")
-            proc.stdin.flush()
-
-            deadline = time.time() + timeout_seconds
-            while time.time() < deadline:
-                if proc.poll() is not None:
-                    break
-                try:
-                    msg = inbox.get(timeout=0.1)
-                except queue.Empty:
-                    continue
-
-                if self._handle_server_message(
-                    msg,
-                    process=proc,
-                    cwd=self._acp_cwd,
-                    text_parts=text_parts,
-                    reasoning_parts=reasoning_parts,
-                ):
-                    continue
-
-                if msg.get("id") != request_id:
-                    continue
-                if "error" in msg:
-                    err = msg.get("error") or {}
-                    raise RuntimeError(
-                        f"Copilot ACP {method} failed: {err.get('message') or err}"
-                    )
-                return msg.get("result")
-
-            stderr_text = "\n".join(stderr_tail).strip()
-            if proc.poll() is not None and stderr_text:
-                raise RuntimeError(f"Copilot ACP process exited early: {stderr_text}")
-            raise TimeoutError(f"Timed out waiting for Copilot ACP response to {method}.")
-
-        try:
-            _request(
-                "initialize",
-                {
-                    "protocolVersion": 1,
-                    "clientCapabilities": {
-                        "fs": {
-                            "readTextFile": True,
-                            "writeTextFile": True,
-                        }
-                    },
-                    "clientInfo": {
-                        "name": "hermes-agent",
-                        "title": "Hermes Agent",
-                        "version": "0.0.0",
-                    },
-                },
-            )
-            session = _request(
-                "session/new",
-                {
-                    "cwd": self._acp_cwd,
-                    "mcpServers": [],
-                },
-            ) or {}
-            session_id = str(session.get("sessionId") or "").strip()
-            if not session_id:
-                raise RuntimeError("Copilot ACP did not return a sessionId.")
-
-            text_parts: list[str] = []
-            reasoning_parts: list[str] = []
-            _request(
-                "session/prompt",
-                {
-                    "sessionId": session_id,
-                    "prompt": [
-                        {
-                            "type": "text",
-                            "text": prompt_text,
-                        }
-                    ],
-                },
-                text_parts=text_parts,
-                reasoning_parts=reasoning_parts,
-            )
-            return "".join(text_parts), "".join(reasoning_parts)
-        finally:
-            self.close()
-
-    def _handle_server_message(
-        self,
-        msg: dict[str, Any],
-        *,
-        process: subprocess.Popen[str],
-        cwd: str,
-        text_parts: list[str] | None,
-        reasoning_parts: list[str] | None,
-    ) -> bool:
-        method = msg.get("method")
-        if not isinstance(method, str):
-            return False
-
-        if method == "session/update":
-            params = msg.get("params") or {}
-            update = params.get("update") or {}
-            kind = str(update.get("sessionUpdate") or "").strip()
-            content = update.get("content") or {}
-            chunk_text = ""
-            if isinstance(content, dict):
-                chunk_text = str(content.get("text") or "")
-            if kind == "agent_message_chunk" and chunk_text and text_parts is not None:
-                text_parts.append(chunk_text)
-            elif kind == "agent_thought_chunk" and chunk_text and reasoning_parts is not None:
-                reasoning_parts.append(chunk_text)
-            return True
-
-        if process.stdin is None:
-            return True
-
-        message_id = msg.get("id")
-        params = msg.get("params") or {}
-
-        if method == "session/request_permission":
-            response = _permission_denied(message_id)
-        elif method == "fs/read_text_file":
-            try:
-                path = _ensure_path_within_cwd(str(params.get("path") or ""), cwd)
-                block_error = get_read_block_error(str(path))
-                if block_error:
-                    raise PermissionError(block_error)
-                content = path.read_text() if path.exists() else ""
-                line = params.get("line")
-                limit = params.get("limit")
-                if isinstance(line, int) and line > 1:
-                    lines = content.splitlines(keepends=True)
-                    start = line - 1
-                    end = start + limit if isinstance(limit, int) and limit > 0 else None
-                    content = "".join(lines[start:end])
-                if content:
-                    content = redact_sensitive_text(content, force=True)
-                response = {
-                    "jsonrpc": "2.0",
-                    "id": message_id,
-                    "result": {
-                        "content": content,
-                    },
-                }
-            except Exception as exc:
-                response = _jsonrpc_error(message_id, -32602, str(exc))
-        elif method == "fs/write_text_file":
-            try:
-                path = _ensure_path_within_cwd(str(params.get("path") or ""), cwd)
-                if is_write_denied(str(path)):
-                    raise PermissionError(
-                        f"Write denied: '{path}' is a protected system/credential file."
-                    )
-                path.parent.mkdir(parents=True, exist_ok=True)
-                path.write_text(str(params.get("content") or ""))
-                response = {
-                    "jsonrpc": "2.0",
-                    "id": message_id,
-                    "result": None,
-                }
-            except Exception as exc:
-                response = _jsonrpc_error(message_id, -32602, str(exc))
-        else:
-            response = _jsonrpc_error(
-                message_id,
-                -32601,
-                f"ACP client method '{method}' is not supported by Hermes yet.",
-            )
-
-        process.stdin.write(json.dumps(response) + "\n")
-        process.stdin.flush()
-        return True
+__all__ = ["CopilotACPClient"]
@@ -7,6 +7,7 @@ import random
 import threading
 import time
 import uuid
+import os
 import re
 from dataclasses import dataclass, fields, replace
 from datetime import datetime
@@ -455,70 +456,6 @@ class CredentialPool:
            logger.debug("Failed to sync from credentials file: %s", exc)
        return entry

-    def _sync_codex_entry_from_auth_store(self, entry: PooledCredential) -> PooledCredential:
-        """Sync a Codex device_code pool entry from auth.json if tokens differ.
-
-        When a Codex OAuth access token expires (or the ChatGPT account hits
-        its 5h/weekly quota), the pool entry gets marked ``STATUS_EXHAUSTED``
-        with a ``last_error_reset_at`` that can be many hours in the future.
-        Meanwhile the user may run ``hermes model`` / ``hermes auth`` which
-        performs a fresh device-code login and writes new tokens to
-        ``auth.json`` under ``_auth_store_lock``.  Without this sync the pool
-        entry stays frozen until ``last_error_reset_at`` elapses — even
-        though fresh credentials are sitting on disk — and every request
-        fails with "no available entries (all exhausted or empty)".
-
-        Mirrors the Nous/Anthropic resync paths above.  Only applies to
-        device_code-sourced entries; env/API-key-sourced entries have no
-        auth.json shadow to sync from.
-        """
-        if self.provider != "openai-codex" or entry.source != "device_code":
-            return entry
-        try:
-            with _auth_store_lock():
-                auth_store = _load_auth_store()
-                state = _load_provider_state(auth_store, "openai-codex")
-            if not isinstance(state, dict):
-                return entry
-            tokens = state.get("tokens")
-            if not isinstance(tokens, dict):
-                return entry
-            store_access = tokens.get("access_token", "")
-            store_refresh = tokens.get("refresh_token", "")
-            # Adopt auth.json tokens when either side differs.  Codex refresh
-            # tokens are single-use too, so a fresh refresh_token from
-            # another process means our entry's pair is consumed/stale.
-            entry_access = entry.access_token or ""
-            entry_refresh = entry.refresh_token or ""
-            if store_access and (
-                store_access != entry_access
-                or (store_refresh and store_refresh != entry_refresh)
-            ):
-                logger.debug(
-                    "Pool entry %s: syncing Codex tokens from auth.json "
-                    "(refreshed by another process)",
-                    entry.id,
-                )
-                field_updates: Dict[str, Any] = {
-                    "access_token": store_access,
-                    "refresh_token": store_refresh or entry.refresh_token,
-                    "last_status": None,
-                    "last_status_at": None,
-                    "last_error_code": None,
-                    "last_error_reason": None,
-                    "last_error_message": None,
-                    "last_error_reset_at": None,
-                }
-                if state.get("last_refresh"):
-                    field_updates["last_refresh"] = state["last_refresh"]
-                updated = replace(entry, **field_updates)
-                self._replace_entry(entry, updated)
-                self._persist()
-                return updated
-        except Exception as exc:
-            logger.debug("Failed to sync Codex entry from auth.json: %s", exc)
-        return entry
-
    def _sync_nous_entry_from_auth_store(self, entry: PooledCredential) -> PooledCredential:
        """Sync a Nous pool entry from auth.json if tokens differ.

@@ -851,18 +788,6 @@ class CredentialPool:
                if synced is not entry:
                    entry = synced
                    cleared_any = True
-            # For openai-codex entries, same pattern: the user may have
-            # re-authed via `hermes model` / `hermes auth` after a 429/401,
-            # leaving fresh tokens on disk while the pool entry is still
-            # frozen behind last_error_reset_at (can be hours in the
-            # future for ChatGPT weekly windows).
-            if (self.provider == "openai-codex"
-                    and entry.source == "device_code"
-                    and entry.last_status == STATUS_EXHAUSTED):
-                synced = self._sync_codex_entry_from_auth_store(entry)
-                if synced is not entry:
-                    entry = synced
-                    cleared_any = True
            if entry.last_status == STATUS_EXHAUSTED:
                exhausted_until = _exhausted_until(entry)
                if exhausted_until is not None and now < exhausted_until:
@@ -1299,48 +1224,6 @@ def _seed_from_singletons(provider: str, entries: List[PooledCredential]) -> Tup
        except Exception as exc:
            logger.debug("Qwen OAuth token seed failed: %s", exc)

-    elif provider == "minimax-oauth":
-        # MiniMax OAuth tokens live in ~/.hermes/auth.json providers.minimax-oauth.
-        # Seed the pool so `/auth list` reflects the logged-in state and the
-        # standard `hermes auth remove minimax-oauth <N>` flow works.
-        # Use refresh_if_expiring=False equivalent: resolve_minimax_oauth_runtime_credentials
-        # always refreshes on expiry, so instead read raw state here to avoid
-        # surprise network calls during provider discovery.
-        try:
-            from hermes_cli.auth import get_provider_auth_state
-            state = get_provider_auth_state("minimax-oauth")
-            if state and state.get("access_token"):
-                source_name = "oauth"
-                if not _is_suppressed(provider, source_name):
-                    active_sources.add(source_name)
-                    expires_at_ms = None
-                    try:
-                        from datetime import datetime as _dt
-                        raw = state.get("expires_at", "")
-                        if raw:
-                            expires_at_ms = int(_dt.fromisoformat(raw).timestamp() * 1000)
-                    except Exception:
-                        expires_at_ms = None
-                    base_url = str(state.get("inference_base_url", "") or "").rstrip("/")
-                    changed |= _upsert_entry(
-                        entries,
-                        provider,
-                        source_name,
-                        {
-                            "source": source_name,
-                            "auth_type": AUTH_TYPE_OAUTH,
-                            "access_token": state["access_token"],
-                            "refresh_token": state.get("refresh_token"),
-                            "expires_at_ms": expires_at_ms,
-                            "base_url": base_url,
-                            "label": state.get("label", "") or label_from_token(
-                                state.get("access_token", ""), source_name
-                            ),
-                        },
-                    )
-        except Exception as exc:
-            logger.debug("MiniMax OAuth token seed failed: %s", exc)
-
    elif provider == "openai-codex":
        # Respect user suppression — `hermes auth remove openai-codex` marks
        # the device_code source as suppressed so it won't be re-seeded from
@@ -47,6 +47,7 @@ from __future__ import annotations

 import os
 from dataclasses import dataclass, field
+from pathlib import Path
 from typing import Callable, List, Optional


@@ -252,19 +253,6 @@ def _remove_nous_device_code(provider: str, removed) -> RemovalResult:
    return result


-def _remove_minimax_oauth(provider: str, removed) -> RemovalResult:
-    """MiniMax OAuth lives in auth.json providers.minimax-oauth — clear it.
-
-    Same pattern as Nous: single-source OAuth state with refresh tokens.
-    Suppression of the `oauth` source ensures the pool reseed path
-    (_seed_from_singletons) doesn't instantly undo the removal.
-    """
-    result = RemovalResult()
-    if _clear_auth_store_provider(provider):
-        result.cleaned.append(f"Cleared {provider} OAuth tokens from auth store")
-    return result
-
-
 def _remove_codex_device_code(provider: str, removed) -> RemovalResult:
    """Codex tokens live in TWO places: our auth store AND ~/.codex/auth.json.

@@ -402,11 +390,6 @@ def _register_all_sources() -> None:
        remove_fn=_remove_qwen_cli,
        description="~/.qwen/oauth_creds.json",
    ))
-    register(RemovalStep(
-        provider="minimax-oauth", source_id="oauth",
-        remove_fn=_remove_minimax_oauth,
-        description="auth.json providers.minimax-oauth",
-    ))
    register(RemovalStep(
        provider="*", source_id="config:",
        match_fn=lambda src: src.startswith("config:") or src == "model_config",
@@ -1,440 +0,0 @@
-"""Curator snapshot + rollback.
-
-A pre-run snapshot of ``~/.hermes/skills/`` (excluding ``.curator_backups/``
-itself) is taken before any mutating curator pass. Snapshots are tar.gz
-files under ``~/.hermes/skills/.curator_backups/<utc-iso>/`` with a
-companion ``manifest.json`` describing the snapshot (reason, time, size,
-counted skill files). Rollback picks a snapshot, moves the current
-``skills/`` tree aside into another snapshot so even the rollback itself
-is undoable, then extracts the chosen snapshot into place.
-
-The snapshot does NOT include:
-  - ``.curator_backups/`` (would recurse)
-  - ``.hub/`` (hub-installed skills — managed by the hub, not us)
-
-It DOES include:
-  - all SKILL.md files + their directories (``scripts/``, ``references/``,
-    ``templates/``, ``assets/``)
-  - ``.usage.json`` (usage telemetry — needed to rehydrate state cleanly)
-  - ``.archive/`` (so rollback restores previously-archived skills too)
-  - ``.curator_state`` (so rolling back also restores the last-run-at
-    pointer — otherwise the curator would immediately re-fire on the next
-    tick)
-  - ``.bundled_manifest`` (so protection markers stay consistent)
-"""
-
-from __future__ import annotations
-
-import json
-import logging
-import os
-import re
-import shutil
-import tarfile
-import tempfile
-import time
-from datetime import datetime, timezone
-from pathlib import Path
-from typing import Any, Dict, List, Optional, Tuple
-
-from hermes_constants import get_hermes_home
-
-logger = logging.getLogger(__name__)
-
-
-DEFAULT_KEEP = 5
-
-# Entries under skills/ that should NEVER be rolled up into a snapshot.
-# .hub/ is managed by the skills hub; rolling it back would break lockfile
-# invariants. .curator_backups is the backup dir itself — recursion bomb.
-_EXCLUDE_TOP_LEVEL = {".curator_backups", ".hub"}
-
-# Snapshot id regex: UTC ISO with colons replaced by dashes so the filename
-# is portable (Windows-safe). An optional ``-NN`` suffix handles two
-# snapshots landing in the same wallclock second.
-_ID_RE = re.compile(r"^\d{4}-\d{2}-\d{2}T\d{2}-\d{2}-\d{2}Z(-\d{2})?$")
-
-
-def _backups_dir() -> Path:
-    return get_hermes_home() / "skills" / ".curator_backups"
-
-
-def _skills_dir() -> Path:
-    return get_hermes_home() / "skills"
-
-
-def _utc_id(now: Optional[datetime] = None) -> str:
-    """UTC ISO-ish filesystem-safe timestamp: ``2026-05-01T13-05-42Z``."""
-    if now is None:
-        now = datetime.now(timezone.utc)
-    # isoformat → "2026-05-01T13:05:42.123456+00:00"; strip subseconds and tz.
-    s = now.replace(microsecond=0).isoformat()
-    if s.endswith("+00:00"):
-        s = s[:-6]
-    return s.replace(":", "-") + "Z"
-
-
-def _load_config() -> Dict[str, Any]:
-    try:
-        from hermes_cli.config import load_config
-        cfg = load_config()
-    except Exception as e:
-        logger.debug("Failed to load config for curator backup: %s", e)
-        return {}
-    if not isinstance(cfg, dict):
-        return {}
-    cur = cfg.get("curator") or {}
-    if not isinstance(cur, dict):
-        return {}
-    bk = cur.get("backup") or {}
-    return bk if isinstance(bk, dict) else {}
-
-
-def is_enabled() -> bool:
-    """Default ON — the whole point of the backup is safety by default."""
-    return bool(_load_config().get("enabled", True))
-
-
-def get_keep() -> int:
-    cfg = _load_config()
-    try:
-        n = int(cfg.get("keep", DEFAULT_KEEP))
-    except (TypeError, ValueError):
-        n = DEFAULT_KEEP
-    return max(1, n)
-
-
-# ---------------------------------------------------------------------------
-# Snapshot
-# ---------------------------------------------------------------------------
-
-def _count_skill_files(base: Path) -> int:
-    try:
-        return sum(1 for _ in base.rglob("SKILL.md"))
-    except OSError:
-        return 0
-
-
-def _write_manifest(dest: Path, reason: str, archive_path: Path,
-                    skills_counted: int) -> None:
-    manifest = {
-        "id": dest.name,
-        "reason": reason,
-        "created_at": datetime.now(timezone.utc).isoformat(),
-        "archive": archive_path.name,
-        "archive_bytes": archive_path.stat().st_size,
-        "skill_files": skills_counted,
-    }
-    (dest / "manifest.json").write_text(
-        json.dumps(manifest, indent=2, sort_keys=True), encoding="utf-8"
-    )
-
-
-def snapshot_skills(reason: str = "manual") -> Optional[Path]:
-    """Create a tar.gz snapshot of ``~/.hermes/skills/`` and prune old ones.
-
-    Returns the snapshot directory path, or ``None`` if the snapshot was
-    skipped (backup disabled, skills dir missing, or an IO error occurred —
-    in which case we log at debug and return None so the curator never
-    aborts a pass because of a backup failure).
-    """
-    if not is_enabled():
-        logger.debug("Curator backup disabled by config; skipping snapshot")
-        return None
-
-    skills = _skills_dir()
-    if not skills.exists():
-        logger.debug("No ~/.hermes/skills/ directory — nothing to back up")
-        return None
-
-    backups = _backups_dir()
-    try:
-        backups.mkdir(parents=True, exist_ok=True)
-    except OSError as e:
-        logger.debug("Failed to create backups dir %s: %s", backups, e)
-        return None
-
-    # Uniquify: if a snapshot with the same second already exists (can
-    # happen if two curator runs fire in the same second), append a short
-    # counter. Avoids clobbering and avoids timestamp collisions.
-    base_id = _utc_id()
-    snap_id = base_id
-    counter = 1
-    while (backups / snap_id).exists():
-        snap_id = f"{base_id}-{counter:02d}"
-        counter += 1
-
-    dest = backups / snap_id
-    try:
-        dest.mkdir(parents=True, exist_ok=False)
-    except OSError as e:
-        logger.debug("Failed to create snapshot dir %s: %s", dest, e)
-        return None
-
-    archive = dest / "skills.tar.gz"
-    try:
-        # Stream into the tarball — no tempdir copy needed.
-        with tarfile.open(archive, "w:gz", compresslevel=6) as tf:
-            for entry in sorted(skills.iterdir()):
-                if entry.name in _EXCLUDE_TOP_LEVEL:
-                    continue
-                # arcname: store paths relative to skills/ so extraction
-                # drops cleanly back into the skills dir.
-                tf.add(str(entry), arcname=entry.name, recursive=True)
-        _write_manifest(dest, reason, archive, _count_skill_files(skills))
-    except (OSError, tarfile.TarError) as e:
-        logger.debug("Curator snapshot failed: %s", e, exc_info=True)
-        # Clean up partial snapshot
-        try:
-            shutil.rmtree(dest, ignore_errors=True)
-        except OSError:
-            pass
-        return None
-
-    _prune_old(keep=get_keep())
-    logger.info("Curator snapshot created: %s (%s)", snap_id, reason)
-    return dest
-
-
-def _prune_old(keep: int) -> List[str]:
-    """Delete regular snapshots beyond the newest *keep*. Returns deleted
-    ids. Staging dirs (``.rollback-staging-*``) are implementation detail
-    and pruned independently on every call."""
-    backups = _backups_dir()
-    if not backups.exists():
-        return []
-    entries: List[Tuple[str, Path]] = []
-    stale_staging: List[Path] = []
-    for child in backups.iterdir():
-        if not child.is_dir():
-            continue
-        if child.name.startswith(".rollback-staging-"):
-            # Staging dirs are only supposed to exist briefly during a
-            # rollback. If we find one here (e.g. from a crashed rollback),
-            # clean it up opportunistically.
-            stale_staging.append(child)
-            continue
-        if _ID_RE.match(child.name):
-            entries.append((child.name, child))
-    # Newest first (lexicographic works because the id is UTC ISO).
-    entries.sort(key=lambda t: t[0], reverse=True)
-    deleted: List[str] = []
-    for _, path in entries[keep:]:
-        try:
-            shutil.rmtree(path)
-            deleted.append(path.name)
-        except OSError as e:
-            logger.debug("Failed to prune %s: %s", path, e)
-    for path in stale_staging:
-        try:
-            shutil.rmtree(path)
-        except OSError as e:
-            logger.debug("Failed to clean stale staging dir %s: %s", path, e)
-    return deleted
-
-
-# ---------------------------------------------------------------------------
-# List + rollback
-# ---------------------------------------------------------------------------
-
-def _read_manifest(snap_dir: Path) -> Dict[str, Any]:
-    mf = snap_dir / "manifest.json"
-    if not mf.exists():
-        return {}
-    try:
-        return json.loads(mf.read_text(encoding="utf-8"))
-    except (OSError, json.JSONDecodeError):
-        return {}
-
-
-def list_backups() -> List[Dict[str, Any]]:
-    """Return all restorable snapshots, newest first. Only entries with a
-    real ``skills.tar.gz`` tarball are listed — transient
-    ``.rollback-staging-*`` directories created mid-rollback are
-    implementation detail and not shown."""
-    backups = _backups_dir()
-    if not backups.exists():
-        return []
-    out: List[Dict[str, Any]] = []
-    for child in sorted(backups.iterdir(), reverse=True):
-        if not child.is_dir():
-            continue
-        if not _ID_RE.match(child.name):
-            continue
-        if not (child / "skills.tar.gz").exists():
-            continue
-        mf = _read_manifest(child)
-        mf.setdefault("id", child.name)
-        mf.setdefault("path", str(child))
-        if "archive_bytes" not in mf:
-            arc = child / "skills.tar.gz"
-            try:
-                mf["archive_bytes"] = arc.stat().st_size
-            except OSError:
-                mf["archive_bytes"] = 0
-        out.append(mf)
-    return out
-
-
-def _resolve_backup(backup_id: Optional[str]) -> Optional[Path]:
-    """Return the path of the requested backup, or the newest one if
-    *backup_id* is None. Returns None if no match."""
-    backups = _backups_dir()
-    if not backups.exists():
-        return None
-    if backup_id:
-        target = backups / backup_id
-        if (
-            target.is_dir()
-            and _ID_RE.match(backup_id)
-            and (target / "skills.tar.gz").exists()
-        ):
-            return target
-        return None
-    candidates = [
-        c for c in sorted(backups.iterdir(), reverse=True)
-        if c.is_dir() and _ID_RE.match(c.name) and (c / "skills.tar.gz").exists()
-    ]
-    return candidates[0] if candidates else None
-
-
-def rollback(backup_id: Optional[str] = None) -> Tuple[bool, str, Optional[Path]]:
-    """Restore ``~/.hermes/skills/`` from a snapshot.
-
-    Strategy:
-      1. Resolve the target snapshot (explicit id or newest regular).
-      2. Take a safety snapshot of the CURRENT skills tree under
-         ``.curator_backups/pre-rollback-<ts>/`` so the rollback itself is
-         undoable.
-      3. Move all current top-level entries (except ``.curator_backups``
-         and ``.hub``) into a tempdir.
-      4. Extract the chosen snapshot into ``~/.hermes/skills/``.
-      5. On failure during 4, move the tempdir contents back (best-effort)
-         and return failure.
-
-    Returns ``(ok, message, snapshot_path)``.
-    """
-    target = _resolve_backup(backup_id)
-    if target is None:
-        return (
-            False,
-            f"no matching backup found"
-            + (f" for id '{backup_id}'" if backup_id else "")
-            + " (use `hermes curator rollback --list` to see available snapshots)",
-            None,
-        )
-    archive = target / "skills.tar.gz"
-    if not archive.exists():
-        return (False, f"snapshot {target.name} has no skills.tar.gz — corrupted?", None)
-
-    skills = _skills_dir()
-    skills.mkdir(parents=True, exist_ok=True)
-    backups = _backups_dir()
-    backups.mkdir(parents=True, exist_ok=True)
-
-    # Step 2: safety snapshot of current state FIRST. If this fails we bail
-    # out before touching anything — otherwise a failed extract could leave
-    # the user with no skills.
-    try:
-        snapshot_skills(reason=f"pre-rollback to {target.name}")
-    except Exception as e:
-        return (False, f"pre-rollback safety snapshot failed: {e}", None)
-
-    # Additionally move current entries into an internal staging dir so
-    # the extract happens into an empty skills tree (predictable result).
-    # This dir is implementation detail — not listed as a restorable
-    # backup. The safety snapshot above is the user-facing undo handle.
-    staged = backups / f".rollback-staging-{_utc_id()}"
-    try:
-        staged.mkdir(parents=True, exist_ok=False)
-    except OSError as e:
-        return (False, f"failed to create staging dir: {e}", None)
-
-    moved: List[Tuple[Path, Path]] = []
-    try:
-        for entry in list(skills.iterdir()):
-            if entry.name in _EXCLUDE_TOP_LEVEL:
-                continue
-            dest = staged / entry.name
-            shutil.move(str(entry), str(dest))
-            moved.append((entry, dest))
-    except OSError as e:
-        # Best-effort rollback of the move
-        for orig, dest in moved:
-            try:
-                shutil.move(str(dest), str(orig))
-            except OSError:
-                pass
-        try:
-            shutil.rmtree(staged, ignore_errors=True)
-        except OSError:
-            pass
-        return (False, f"failed to stage current skills: {e}", None)
-
-    # Step 4: extract the snapshot into skills/
-    try:
-        with tarfile.open(archive, "r:gz") as tf:
-            # Python 3.12+ supports filter='data' for safer extraction.
-            # Fall back to the unfiltered call for older interpreters but
-            # still reject absolute paths and .. components defensively.
-            for member in tf.getmembers():
-                name = member.name
-                if name.startswith("/") or ".." in Path(name).parts:
-                    raise tarfile.TarError(
-                        f"refusing to extract unsafe path: {name!r}"
-                    )
-            try:
-                tf.extractall(str(skills), filter="data")  # type: ignore[call-arg]
-            except TypeError:
-                # Python < 3.12 — no filter kwarg
-                tf.extractall(str(skills))
-    except (OSError, tarfile.TarError) as e:
-        # Best-effort recover: move staged contents back
-        for orig, dest in moved:
-            try:
-                shutil.move(str(dest), str(orig))
-            except OSError:
-                pass
-        try:
-            shutil.rmtree(staged, ignore_errors=True)
-        except OSError:
-            pass
-        return (False, f"snapshot extract failed (state restored): {e}", None)
-
-    # Extract succeeded — the staging dir has served its purpose. The
-    # user's undo handle is the safety snapshot tarball we took earlier.
-    try:
-        shutil.rmtree(staged, ignore_errors=True)
-    except OSError:
-        pass
-
-    logger.info("Curator rollback: restored from %s", target.name)
-    return (True, f"restored from snapshot {target.name}", target)
-
-
-# ---------------------------------------------------------------------------
-# Human-readable summary for CLI
-# ---------------------------------------------------------------------------
-
-def format_size(n: int) -> str:
-    for unit in ("B", "KB", "MB", "GB"):
-        if n < 1024 or unit == "GB":
-            return f"{n:.1f} {unit}" if unit != "B" else f"{n} B"
-        n /= 1024
-    return f"{n:.1f} GB"
-
-
-def summarize_backups() -> str:
-    rows = list_backups()
-    if not rows:
-        return "No curator snapshots yet."
-    lines = [f"{'id':<24}  {'reason':<40}  {'skills':>6}  {'size':>8}"]
-    lines.append("─" * len(lines[0]))
-    for r in rows:
-        lines.append(
-            f"{r.get('id','?'):<24}  "
-            f"{(r.get('reason','?') or '?')[:40]:<40}  "
-            f"{r.get('skill_files', 0):>6}  "
-            f"{format_size(int(r.get('archive_bytes', 0))):>8}"
-        )
-    return "\n".join(lines)
@@ -54,7 +54,6 @@ class FailoverReason(enum.Enum):
    # Provider-specific
    thinking_signature = "thinking_signature"  # Anthropic thinking block sig invalid
    long_context_tier = "long_context_tier"    # Anthropic "extra usage" tier gate
-    oauth_long_context_beta_forbidden = "oauth_long_context_beta_forbidden"  # Anthropic OAuth subscription rejects 1M context beta — disable beta and retry

    # Catch-all
    unknown = "unknown"                  # Unclassifiable — retry with backoff
@@ -92,7 +91,6 @@ class ClassifiedError:
 _BILLING_PATTERNS = [
    "insufficient credits",
    "insufficient_quota",
-    "insufficient balance",
    "credit balance",
    "credits have been exhausted",
    "top up your credits",
@@ -451,25 +449,6 @@ def classify_api_error(
            should_compress=True,
        )

-    # Anthropic OAuth subscription rejects the 1M-context beta header.
-    # Observed error body: "The long context beta is not yet available for
-    # this subscription." Returned as HTTP 400 from native Anthropic when
-    # the subscription doesn't include 1M context, even though the request
-    # carries ``anthropic-beta: context-1m-2025-08-07``. The recovery path
-    # in run_agent.py rebuilds the Anthropic client with the beta stripped
-    # and retries once. Pattern is narrow enough that it won't collide with
-    # the 429 tier-gate pattern above (different status, different phrase).
-    if (
-        status_code == 400
-        and "long context beta" in error_msg
-        and "not yet available" in error_msg
-    ):
-        return _result(
-            FailoverReason.oauth_long_context_beta_forbidden,
-            retryable=True,
-            should_compress=False,
-        )
-
    # ── 2. HTTP status code classification ──────────────────────────

    if status_code is not None:
@@ -30,6 +30,7 @@ from __future__ import annotations

 import json
 import logging
+import os
 import time
 import uuid
 from types import SimpleNamespace
@@ -41,6 +42,7 @@ from agent import google_oauth
 from agent.gemini_schema import sanitize_gemini_tool_parameters
 from agent.google_code_assist import (
    CODE_ASSIST_ENDPOINT,
+    FREE_TIER_ID,
    CodeAssistError,
    ProjectContext,
    resolve_project_context,
@@ -2,7 +2,7 @@

 from __future__ import annotations

-from typing import Any, Dict
+from typing import Any, Dict, List

 # Gemini's ``FunctionDeclaration.parameters`` field accepts the ``Schema``
 # object, which is only a subset of OpenAPI 3.0 / JSON Schema.  Strip fields
@@ -29,6 +29,7 @@ from __future__ import annotations

 import json
 import logging
+import os
 import time
 import urllib.error
 import urllib.parse
@@ -49,13 +49,14 @@ import json
 import logging
 import os
 import secrets
+import socket
 import stat
 import threading
 import time
 import urllib.error
 import urllib.parse
 import urllib.request
-from dataclasses import dataclass
+from dataclasses import dataclass, field
 from pathlib import Path
 from typing import Any, Dict, Optional, Tuple

@@ -97,7 +98,6 @@ _DEFAULT_CLIENT_SECRET = f"GOCSPX-{_PUBLIC_CLIENT_SECRET_SUFFIX}"

 # Regex patterns for fallback scraping from an installed gemini-cli.
 import re as _re
-from utils import atomic_replace
 _CLIENT_ID_PATTERN = _re.compile(
    r"OAUTH_CLIENT_ID\s*=\s*['\"]([0-9]+-[a-z0-9]+\.apps\.googleusercontent\.com)['\"]"
 )
@@ -499,7 +499,7 @@ def save_credentials(creds: GoogleCredentials) -> Path:
                fh.flush()
                os.fsync(fh.fileno())
            os.chmod(tmp_path, stat.S_IRUSR | stat.S_IWUSR)
-            atomic_replace(tmp_path, path)
+            os.replace(tmp_path, path)
        finally:
            try:
                if tmp_path.exists():
@@ -1,48 +0,0 @@
-"""LM Studio reasoning-effort resolution shared by the chat-completions
-transport and run_agent's iteration-limit summary path.
-
-LM Studio publishes per-model ``capabilities.reasoning.allowed_options`` (e.g.
-``["off","on"]`` for toggle-style models, ``["off","minimal","low"]`` for
-graduated models). We map the user's ``reasoning_config`` onto LM Studio's
-OpenAI-compatible vocabulary, then clamp against the model's allowed set so
-the server doesn't 400 on an unsupported effort.
-"""
-
-from __future__ import annotations
-
-from typing import List, Optional
-
-# LM Studio accepts these top-level reasoning_effort values via its
-# OpenAI-compatible chat.completions endpoint.
-_LM_VALID_EFFORTS = {"none", "minimal", "low", "medium", "high", "xhigh"}
-
-# Toggle-style models publish allowed_options as ["off","on"] in /api/v1/models.
-# Map them onto the OpenAI-compatible request vocabulary.
-_LM_EFFORT_ALIASES = {"off": "none", "on": "medium"}
-
-
-def resolve_lmstudio_effort(
-    reasoning_config: Optional[dict],
-    allowed_options: Optional[List[str]],
-) -> Optional[str]:
-    """Return the ``reasoning_effort`` string to send to LM Studio, or ``None``.
-
-    ``None`` means "omit the field": the user picked a level the model can't
-    honor, so let LM Studio fall back to the model's declared default rather
-    than silently substituting a different effort. When ``allowed_options`` is
-    falsy (probe failed), skip clamping and send the resolved effort anyway.
-    """
-    effort = "medium"
-    if reasoning_config and isinstance(reasoning_config, dict):
-        if reasoning_config.get("enabled") is False:
-            effort = "none"
-        else:
-            raw = (reasoning_config.get("effort") or "").strip().lower()
-            raw = _LM_EFFORT_ALIASES.get(raw, raw)
-            if raw in _LM_VALID_EFFORTS:
-                effort = raw
-    if allowed_options:
-        allowed = {_LM_EFFORT_ALIASES.get(opt, opt) for opt in allowed_options}
-        if effort not in allowed:
-            return None
-    return effort
@@ -20,25 +20,25 @@ def summarize_manual_compression(
        headline = f"No changes from compression: {before_count} messages"
        if after_tokens == before_tokens:
            token_line = (
-                f"Approx request size: ~{before_tokens:,} tokens (unchanged)"
+                f"Rough transcript estimate: ~{before_tokens:,} tokens (unchanged)"
            )
        else:
            token_line = (
-                f"Approx request size: ~{before_tokens:,} → "
+                f"Rough transcript estimate: ~{before_tokens:,} → "
                f"~{after_tokens:,} tokens"
            )
    else:
        headline = f"Compressed: {before_count} → {after_count} messages"
        token_line = (
-            f"Approx request size: ~{before_tokens:,} → "
+            f"Rough transcript estimate: ~{before_tokens:,} → "
            f"~{after_tokens:,} tokens"
        )

    note = None
    if not noop and after_count < before_count and after_tokens > before_tokens:
        note = (
-            "Note: fewer messages can still raise this estimate when "
-            "compression rewrites the transcript into denser summaries."
+            "Note: fewer messages can still raise this rough transcript estimate "
+            "when compression rewrites the transcript into denser summaries."
        )

    return {
@@ -28,6 +28,7 @@ Usage in run_agent.py:

 from __future__ import annotations

+import json
 import logging
 import re
 import inspect
@@ -402,41 +403,6 @@ class MemoryManager:
                    provider.name, e,
                )

-    def on_session_switch(
-        self,
-        new_session_id: str,
-        *,
-        parent_session_id: str = "",
-        reset: bool = False,
-        **kwargs,
-    ) -> None:
-        """Notify all providers that the agent's session_id has rotated.
-
-        Fires on ``/resume``, ``/branch``, ``/reset``, ``/new``, and
-        context compression — any path that reassigns
-        ``AIAgent.session_id`` without tearing the provider down.
-
-        Providers keep running; they only need to refresh cached
-        per-session state so subsequent writes land in the correct
-        session's record. See ``MemoryProvider.on_session_switch`` for
-        the full contract.
-        """
-        if not new_session_id:
-            return
-        for provider in self._providers:
-            try:
-                provider.on_session_switch(
-                    new_session_id,
-                    parent_session_id=parent_session_id,
-                    reset=reset,
-                    **kwargs,
-                )
-            except Exception as e:
-                logger.debug(
-                    "Memory provider '%s' on_session_switch failed: %s",
-                    provider.name, e,
-                )
-
    def on_pre_compress(self, messages: List[Dict[str, Any]]) -> str:
        """Notify all providers before context compression.

@@ -25,7 +25,6 @@ Lifecycle (called by MemoryManager, wired in run_agent.py):
 Optional hooks (override to opt in):
  on_turn_start(turn, message, **kwargs) — per-turn tick with runtime context
  on_session_end(messages)               — end-of-session extraction
-  on_session_switch(new_session_id, **kwargs) — mid-process session_id rotation
  on_pre_compress(messages) -> str       — extract before context compression
  on_memory_write(action, target, content, metadata=None) — mirror built-in memory writes
  on_delegation(task, result, **kwargs)  — parent-side observation of subagent work
@@ -161,45 +160,6 @@ class MemoryProvider(ABC):
        (CLI exit, /reset, gateway session expiry).
        """

-    def on_session_switch(
-        self,
-        new_session_id: str,
-        *,
-        parent_session_id: str = "",
-        reset: bool = False,
-        **kwargs,
-    ) -> None:
-        """Called when the agent switches session_id mid-process.
-
-        Fires on ``/resume``, ``/branch``, ``/reset``, ``/new`` (CLI), the
-        gateway equivalents, and context compression — any path that
-        reassigns ``AIAgent.session_id`` without tearing the provider down.
-
-        Providers that cache per-session state in ``initialize()``
-        (``_session_id``, ``_document_id``, accumulated turn buffers,
-        counters) should update or reset that state here so subsequent
-        writes land in the correct session's record.
-
-        Parameters
-        ----------
-        new_session_id:
-            The session_id the agent just switched to.
-        parent_session_id:
-            The previous session_id, if meaningful — set for ``/branch``
-            (fork lineage), context compression (continuation lineage),
-            and ``/resume`` (the session we're leaving). Empty string
-            when no lineage applies.
-        reset:
-            ``True`` when this is a genuinely new conversation, not a
-            resumption of an existing one. Fired by ``/reset`` / ``/new``.
-            Providers should flush accumulated per-session buffers
-            (``_session_turns``, ``_turn_counter``, etc.) when this is
-            set. ``False`` for ``/resume`` / ``/branch`` / compression
-            where the logical conversation continues under the new id.
-
-        Default is no-op for backward compatibility.
-        """
-
    def on_pre_compress(self, messages: List[Dict[str, Any]]) -> str:
        """Called before context compression discards old messages.

@@ -46,13 +46,12 @@ def _resolve_requests_verify() -> bool | str:
 # are preserved so the full model name reaches cache lookups and server queries.
 _PROVIDER_PREFIXES: frozenset[str] = frozenset({
    "openrouter", "nous", "openai-codex", "copilot", "copilot-acp",
-    "gemini", "ollama-cloud", "zai", "kimi-coding", "kimi-coding-cn", "stepfun", "minimax", "minimax-oauth", "minimax-cn", "anthropic", "deepseek",
+    "gemini", "ollama-cloud", "zai", "kimi-coding", "kimi-coding-cn", "stepfun", "minimax", "minimax-cn", "anthropic", "deepseek",
    "opencode-zen", "opencode-go", "ai-gateway", "kilocode", "alibaba",
    "qwen-oauth",
    "xiaomi",
    "arcee",
    "gmi",
-    "tencent-tokenhub",
    "custom", "local",
    # Common aliases
    "google", "google-gemini", "google-ai-studio",
@@ -61,7 +60,6 @@ _PROVIDER_PREFIXES: frozenset[str] = frozenset({
    "ollama",
    "stepfun", "opencode", "zen", "go", "vercel", "kilo", "dashscope", "aliyun", "qwen",
    "mimo", "xiaomi-mimo",
-    "tencent", "tokenhub", "tencent-cloud", "tencentmaas",
    "arcee-ai", "arceeai",
    "gmi-cloud", "gmicloud",
    "xai", "x-ai", "x.ai", "grok",
@@ -210,8 +208,6 @@ DEFAULT_CONTEXT_LENGTHS = {
    "grok": 131072,             # catch-all (grok-beta, unknown grok-*)
    # Kimi
    "kimi": 262144,
-    # Tencent — Hy3 Preview (Hunyuan) with 256K context window
-    "hy3-preview": 256000,
    # Nemotron — NVIDIA's open-weights series (128K context across all sizes)
    "nemotron": 131072,
    # Arcee
@@ -314,10 +310,20 @@ _URL_TO_PROVIDER: Dict[str, str] = {
    "api.xiaomimimo.com": "xiaomi",
    "xiaomimimo.com": "xiaomi",
    "api.gmi-serving.com": "gmi",
-    "tokenhub.tencentmaas.com": "tencent-tokenhub",
    "ollama.com": "ollama-cloud",
 }

+# Auto-extend with hostnames derived from provider profiles.
+# Any provider with a base_url not already in the map gets added automatically.
+try:
+    from providers import list_providers as _list_providers
+    for _pp in _list_providers():
+        _host = _pp.get_hostname()
+        if _host and _host not in _URL_TO_PROVIDER:
+            _URL_TO_PROVIDER[_host] = _pp.name
+except Exception:
+    pass
+

 def _infer_provider_from_url(base_url: str) -> Optional[str]:
    """Infer the models.dev provider name from a base URL.
@@ -625,6 +631,8 @@ def fetch_endpoint_model_metadata(
                        if isinstance(ctx, int) and ctx > 0:
                            context_length = ctx
                            break
+                    if context_length is None:
+                        context_length = _extract_context_length(model)
                    if context_length is not None:
                        entry["context_length"] = context_length

@@ -1014,7 +1022,10 @@ def _query_local_context_length(model: str, base_url: str, api_key: str = "") ->
                                ctx = cfg.get("context_length")
                                if ctx and isinstance(ctx, (int, float)):
                                    return int(ctx)
-                            break
+                            # Fall back to max_context_length (theoretical model max)
+                            ctx = m.get("max_context_length") or m.get("context_length")
+                            if ctx and isinstance(ctx, (int, float)):
+                                return int(ctx)

            # LM Studio / vLLM / llama.cpp: try /v1/models/{model}
            resp = client.get(f"{server_url}/v1/models/{model}")
@@ -1247,7 +1258,7 @@ def get_model_context_length(
    6. Nous suffix-match via OpenRouter cache
    7. models.dev registry lookup (provider-aware)
    8. Thin hardcoded defaults (broad family patterns)
-    9. Default fallback (256K)
+    9. Default fallback (128K)
    """
    # 0. Explicit config override — user knows best
    if config_context_length is not None and isinstance(config_context_length, int) and config_context_length > 0:
@@ -1276,10 +1287,7 @@ def get_model_context_length(
    model = _strip_provider_prefix(model)

    # 1. Check persistent cache (model+provider)
-    # LM Studio is excluded — its loaded context length is transient (the
-    # user can reload the model with a different context_length at any time
-    # via /api/v1/models/load), so a stale cached value would mask reloads.
-    if base_url and provider != "lmstudio":
+    if base_url:
        cached = get_cached_context_length(model, base_url)
        if cached is not None:
            # Invalidate stale Codex OAuth cache entries: pre-PR #14935 builds
@@ -1332,8 +1340,7 @@ def get_model_context_length(
            if is_local_endpoint(base_url):
                local_ctx = _query_local_context_length(model, base_url, api_key=api_key)
                if local_ctx and local_ctx > 0:
-                    if provider != "lmstudio":
-                        save_context_length(model, base_url, local_ctx)
+                    save_context_length(model, base_url, local_ctx)
                    return local_ctx
            logger.info(
                "Could not detect context length for model %r at %s — "
@@ -1423,11 +1430,10 @@ def get_model_context_length(
    if base_url and is_local_endpoint(base_url):
        local_ctx = _query_local_context_length(model, base_url, api_key=api_key)
        if local_ctx and local_ctx > 0:
-            if provider != "lmstudio":
-                save_context_length(model, base_url, local_ctx)
+            save_context_length(model, base_url, local_ctx)
            return local_ctx

-    # 10. Default fallback — 256K
+    # 10. Default fallback — 128K
    return DEFAULT_FALLBACK_CONTEXT


@@ -149,7 +149,6 @@ PROVIDER_TO_MODELS_DEV: Dict[str, str] = {
    "stepfun": "stepfun",
    "kimi-coding-cn": "kimi-for-coding",
    "minimax": "minimax",
-    "minimax-oauth": "minimax",
    "minimax-cn": "minimax-cn",
    "deepseek": "deepseek",
    "alibaba": "alibaba",
@@ -81,56 +81,15 @@ def _repair_schema(node: Any, is_schema: bool = True) -> Any:
        return repaired

    # Rule 2: when anyOf is present, type belongs only on the children.
-    # Additionally, Moonshot rejects null-type branches inside anyOf
-    # (enum value (<nil>) does not match any type in [string]).
-    # Collapse the anyOf to the first non-null branch and infer its type.
    if "anyOf" in repaired and isinstance(repaired["anyOf"], list):
        repaired.pop("type", None)
-        non_null = [b for b in repaired["anyOf"]
-                    if isinstance(b, dict) and b.get("type") != "null"]
-        if non_null and len(non_null) < len(repaired["anyOf"]):
-            # Drop the anyOf wrapper — keep only the non-null branch.
-            # If there's a single non-null branch, promote it and fall
-            # through to Rules 1/3 so nullable/enum cleanup still applies
-            # to the merged node.
-            if len(non_null) == 1:
-                merge = {k: v for k, v in repaired.items() if k != "anyOf"}
-                merge.update(non_null[0])
-                repaired = merge
-            else:
-                repaired["anyOf"] = non_null
-                return repaired
-        else:
-            # Nothing to collapse — parent type stripped, children already
-            # repaired by the recursive walk above.
-            return repaired
-
-    # Moonshot also rejects non-standard keywords like ``nullable`` on
-    # parameter schemas — strip it.
-    repaired.pop("nullable", None)
+        return repaired

    # Rule 1: property schemas without type need one.  $ref nodes are exempt
    # — their type comes from the referenced definition.
-    # Fill missing type BEFORE Rule 3 so enum cleanup can check the type.
-    if "$ref" not in repaired:
-        repaired = _fill_missing_type(repaired)
-
-    # Rule 3: Moonshot rejects null/empty-string values inside enum arrays
-    # when the parent type is a scalar (string, integer, etc.).  The error:
-    #   "enum value (<nil>) does not match any type in [string]"
-    # Strip null and empty-string from enum values, and if the enum becomes
-    # empty, drop it entirely.
-    if "enum" in repaired and isinstance(repaired["enum"], list):
-        node_type = repaired.get("type")
-        if node_type in ("string", "integer", "number", "boolean"):
-            cleaned = [v for v in repaired["enum"]
-                       if v is not None and v != ""]
-            if cleaned:
-                repaired["enum"] = cleaned
-            else:
-                repaired.pop("enum")
-
-    return repaired
+    if "$ref" in repaired:
+        return repaired
+    return _fill_missing_type(repaired)


 def _fill_missing_type(node: Dict[str, Any]) -> Dict[str, Any]:
@@ -18,7 +18,6 @@ import os
 import tempfile
 import time
 from typing import Any, Mapping, Optional
-from utils import atomic_replace

 logger = logging.getLogger(__name__)

@@ -119,7 +118,7 @@ def record_nous_rate_limit(
        try:
            with os.fdopen(fd, "w") as f:
                json.dump(state, f)
-            atomic_replace(tmp_path, path)
+            os.replace(tmp_path, path)
        except Exception:
            # Clean up temp file on failure
            try:
@@ -98,19 +98,17 @@ def tool_progress_hint_cli() -> str:
 def openclaw_residue_hint_cli() -> str:
    """Banner shown the first time Hermes starts and finds ``~/.openclaw/``.

-    Points users at ``hermes claw migrate`` (non-destructive port of config,
-    memory, and skills) first. ``hermes claw cleanup`` is mentioned as the
-    follow-up step for users who have already migrated and want to archive
-    the old directory — with a warning that archiving breaks OpenClaw.
+    OpenClaw-era config, memory, and skill paths in ``~/.openclaw/`` will
+    otherwise attract the agent (memory entries like ``~/.openclaw/config.yaml``
+    get carried forward and the agent dutifully reads them). ``hermes claw
+    cleanup`` renames the directory so the agent stops finding it.
    """
    return (
-        "A legacy OpenClaw directory was detected at ~/.openclaw/.\n"
-        "To port your config, memory, and skills over to Hermes, run "
-        "`hermes claw migrate`.\n"
-        "If you've already migrated and want to archive the old directory, "
-        "run `hermes claw cleanup` (renames it to ~/.openclaw.pre-migration — "
-        "OpenClaw will stop working after this).\n"
-        "This tip only shows once."
+        "Heads up — an OpenClaw workspace was detected at ~/.openclaw/.\n"
+        "After migrating, the agent can still get confused and read that "
+        "directory's config/memory instead of Hermes's.\n"
+        "Run `hermes claw cleanup` to archive it (rename → .openclaw.pre-migration). "
+        "This tip only shows once; rerun it any time with `hermes claw cleanup`."
    )


@@ -182,64 +182,6 @@ SKILLS_GUIDANCE = (
    "Skills that aren't maintained become liabilities."
 )

-KANBAN_GUIDANCE = (
-    "# You are a Kanban worker\n"
-    "You were spawned by the Hermes Kanban dispatcher to execute ONE task from "
-    "the shared board at `~/.hermes/kanban.db`. Your task id is in "
-    "`$HERMES_KANBAN_TASK`; your workspace is `$HERMES_KANBAN_WORKSPACE`. "
-    "The `kanban_*` tools in your schema are your primary coordination surface — "
-    "they write directly to the shared SQLite DB and work regardless of terminal "
-    "backend (local/docker/modal/ssh).\n"
-    "\n"
-    "## Lifecycle\n"
-    "\n"
-    "1. **Orient.** Call `kanban_show()` first (no args — it defaults to your "
-    "task). The response includes title, body, parent-task handoffs (summary + "
-    "metadata), any prior attempts on this task if you're a retry, the full "
-    "comment thread, and a pre-formatted `worker_context` you can treat as "
-    "ground truth.\n"
-    "2. **Work inside the workspace.** `cd $HERMES_KANBAN_WORKSPACE` before "
-    "any file operations. The workspace is yours for this run. Don't modify "
-    "files outside it unless the task explicitly asks.\n"
-    "3. **Heartbeat on long operations.** Call `kanban_heartbeat(note=...)` "
-    "every few minutes during long subprocesses (training, encoding, crawling). "
-    "Skip heartbeats for short tasks.\n"
-    "4. **Block on genuine ambiguity.** If you need a human decision you cannot "
-    "infer (missing credentials, UX choice, paywalled source, peer output you "
-    "need first), call `kanban_block(reason=\"...\")` and stop. Don't guess. "
-    "The user will unblock with context and the dispatcher will respawn you.\n"
-    "5. **Complete with structured handoff.** Call `kanban_complete(summary=..., "
-    "metadata=...)`. `summary` is 1–3 human-readable sentences naming concrete "
-    "artifacts. `metadata` is machine-readable facts "
-    "(`{changed_files: [...], tests_run: N, decisions: [...]}`). Downstream "
-    "workers read both via their own `kanban_show`. Never put secrets / "
-    "tokens / raw PII in either field — run rows are durable forever.\n"
-    "6. **If follow-up work appears, create it; don't do it.** Use "
-    "`kanban_create(title=..., assignee=<right-profile>, parents=[your-task-id])` "
-    "to spawn a child task for the appropriate specialist profile instead of "
-    "scope-creeping into the next thing.\n"
-    "\n"
-    "## Orchestrator mode\n"
-    "\n"
-    "If your task is itself a decomposition task (e.g. a planner profile given "
-    "a high-level goal), use `kanban_create` to fan out into child tasks — one "
-    "per specialist, each with an explicit `assignee` and `parents=[...]` to "
-    "express dependencies. Then `kanban_complete` your own task with a summary "
-    "of the decomposition. Do NOT execute the work yourself; your job is "
-    "routing, not implementation.\n"
-    "\n"
-    "## Do NOT\n"
-    "\n"
-    "- Do not shell out to `hermes kanban <verb>` for board operations. Use "
-    "the `kanban_*` tools — they work across all terminal backends.\n"
-    "- Do not complete a task you didn't actually finish. Block it.\n"
-    "- Do not assign follow-up work to yourself. Assign it to the right "
-    "specialist profile.\n"
-    "- Do not call `delegate_task` as a board substitute. `delegate_task` is "
-    "for short reasoning subtasks inside your own run; board tasks are for "
-    "cross-agent handoffs that outlive one API loop."
-)
-
 TOOL_USE_ENFORCEMENT_GUIDANCE = (
    "# Tool-use enforcement\n"
    "You MUST use your tools to take action — do not describe what you would do "
@@ -368,10 +310,6 @@ PLATFORM_HINTS = {
        "Standard markdown is automatically converted to Telegram format. "
        "Supported: **bold**, *italic*, ~~strikethrough~~, ||spoiler||, "
        "`inline code`, ```code blocks```, [links](url), and ## headers. "
-        "Telegram has NO table syntax — prefer bullet lists or labeled "
-        "key: value pairs over pipe tables (any tables you do emit are "
-        "auto-rewritten into row-group bullets, which you can produce "
-        "directly for cleaner output). "
        "You can send media files natively: to deliver a file to the user, "
        "include MEDIA:/absolute/path/to/file in your response. Images "
        "(.png, .jpg, .webp) appear as photos, audio (.ogg) sends as voice "
@@ -56,12 +56,8 @@ _SENSITIVE_BODY_KEYS = frozenset({
 })

 # Snapshot at import time so runtime env mutations (e.g. LLM-generated
-# `export HERMES_REDACT_SECRETS=true`) cannot enable/disable redaction
-# mid-session.  OFF by default — user must opt in via
-# `security.redact_secrets: true` in config.yaml (bridged to this env var
-# in hermes_cli/main.py and gateway/run.py) or `HERMES_REDACT_SECRETS=true`
-# in ~/.hermes/.env.
-_REDACT_ENABLED = os.getenv("HERMES_REDACT_SECRETS", "").lower() in ("1", "true", "yes", "on")
+# `export HERMES_REDACT_SECRETS=false`) cannot disable redaction mid-session.
+_REDACT_ENABLED = os.getenv("HERMES_REDACT_SECRETS", "").lower() not in ("0", "false", "no", "off")

 # Known API key prefixes -- match the prefix + contiguous token chars
 _PREFIX_PATTERNS = [
@@ -184,59 +180,11 @@ _PREFIX_RE = re.compile(
 )


-def mask_secret(
-    value: str,
-    *,
-    head: int = 4,
-    tail: int = 4,
-    floor: int = 12,
-    placeholder: str = "***",
-    empty: str = "",
-) -> str:
-    """Mask a secret for display, preserving ``head`` and ``tail`` characters.
-
-    Canonical helper for display-time redaction across Hermes — used by
-    ``hermes config``, ``hermes status``, ``hermes dump``, and anywhere
-    a secret needs to be shown truncated for debuggability while still
-    keeping the bulk hidden.
-
-    Args:
-        value:       The secret to mask. ``None``/empty returns ``empty``.
-        head:        Leading characters to preserve. Default 4.
-        tail:        Trailing characters to preserve. Default 4.
-        floor:       Values shorter than ``head + tail + floor_margin`` are
-                     fully masked (returns ``placeholder``). Default 12 —
-                     matches the existing config/status/dump convention.
-        placeholder: Value returned for too-short inputs. Default ``"***"``.
-        empty:       Value returned when ``value`` is falsy (None, ""). The
-                     caller can override this to e.g. ``color("(not set)",
-                     Colors.DIM)`` for user-facing display.
-
-    Examples:
-        >>> mask_secret("sk-proj-abcdef1234567890")
-        'sk-p...7890'
-        >>> mask_secret("short")                         # fully masked
-        '***'
-        >>> mask_secret("")                              # empty default
-        ''
-        >>> mask_secret("", empty="(not set)")           # empty override
-        '(not set)'
-        >>> mask_secret("long-token", head=6, tail=4, floor=18)
-        '***'
-    """
-    if not value:
-        return empty
-    if len(value) < floor:
-        return placeholder
-    return f"{value[:head]}...{value[-tail:]}"
-
-
 def _mask_token(token: str) -> str:
-    """Mask a log token — conservative 18-char floor, preserves 6 prefix / 4 suffix."""
-    # Empty input: historically this returned "***" rather than "". Preserve.
-    if not token:
+    """Mask a token, preserving prefix for long tokens."""
+    if len(token) < 18:
        return "***"
-    return mask_secret(token, head=6, tail=4, floor=18)
+    return f"{token[:6]}...{token[-4:]}"


 def _redact_query_string(query: str) -> str:
@@ -305,13 +253,11 @@ def _redact_form_body(text: str) -> str:
    return _redact_query_string(text.strip())


-def redact_sensitive_text(text: str, *, force: bool = False) -> str:
+def redact_sensitive_text(text: str) -> str:
    """Apply all redaction patterns to a block of text.

    Safe to call on any string -- non-matching text passes through unchanged.
-    Disabled by default — enable via security.redact_secrets: true in config.yaml.
-    Set force=True for safety boundaries that must never return raw secrets
-    regardless of the user's global logging redaction preference.
+    Disabled when security.redact_secrets is false in config.yaml.
    """
    if text is None:
        return None
@@ -319,7 +265,7 @@ def redact_sensitive_text(text: str, *, force: bool = False) -> str:
        text = str(text)
    if not text:
        return text
-    if not (force or _REDACT_ENABLED):
+    if not _REDACT_ENABLED:
        return text

    # Known prefixes (sk-, ghp_, etc.)
@@ -76,7 +76,6 @@ except ImportError:  # pragma: no cover
    fcntl = None  # type: ignore[assignment]

 from hermes_constants import get_hermes_home
-from utils import atomic_replace

 logger = logging.getLogger(__name__)

@@ -569,7 +568,7 @@ def save_allowlist(data: Dict[str, Any]) -> None:
        try:
            with os.fdopen(fd, "w") as fh:
                fh.write(json.dumps(data, indent=2, sort_keys=True))
-            atomic_replace(tmp_path, p)
+            os.replace(tmp_path, p)
        except Exception:
            try:
                os.unlink(tmp_path)
@@ -234,7 +234,7 @@ def scan_skill_commands() -> Dict[str, Dict[str, Any]]:

        for scan_dir in dirs_to_scan:
            for skill_md in iter_skill_index_files(scan_dir, "SKILL.md"):
-                if any(part in ('.git', '.github', '.hub', '.archive') for part in skill_md.parts):
+                if any(part in ('.git', '.github', '.hub') for part in skill_md.parts):
                    continue
                try:
                    content = skill_md.read_text(encoding='utf-8')
@@ -284,71 +284,6 @@ def get_skill_commands() -> Dict[str, Dict[str, Any]]:
    return _skill_commands


-def reload_skills() -> Dict[str, Any]:
-    """Re-scan the skills directory and return a diff of what changed.
-
-    Rescans ``~/.hermes/skills/`` and any ``skills.external_dirs`` so the
-    slash-command map (``agent.skill_commands._skill_commands``) reflects
-    skills added or removed on disk.
-
-    This does NOT invalidate the skills system-prompt cache. Skills are
-    called by name via ``/skill-name``, ``skills_list``, or ``skill_view``
-    — they don't need to be in the system prompt for the model to use them.
-    Keeping the prompt cache intact preserves prefix caching across the
-    reload, so a user invoking ``/reload-skills`` pays no cache-reset cost.
-
-    Returns:
-        Dict with keys::
-
-            {
-              "added":      [{"name": str, "description": str}, ...],
-              "removed":    [{"name": str, "description": str}, ...],
-              "unchanged":  [skill names present before and after],
-              "total":      total skill count after rescan,
-              "commands":   total /slash-skill count after rescan,
-            }
-
-        ``description`` is the skill's full SKILL.md frontmatter
-        ``description:`` field — the same string the system prompt renders
-        as ``    - name: description`` for pre-existing skills.
-    """
-    # Snapshot pre-reload state (name -> description) from the current
-    # slash-command cache. Using dicts lets the post-rescan diff carry
-    # descriptions for newly-visible or just-removed skills without a
-    # second disk walk.
-    def _snapshot(cmds: Dict[str, Dict[str, Any]]) -> Dict[str, str]:
-        out: Dict[str, str] = {}
-        for slash_key, info in cmds.items():
-            bare = slash_key.lstrip("/")
-            out[bare] = (info or {}).get("description") or ""
-        return out
-
-    before = _snapshot(_skill_commands)
-
-    # Rescan the skills dir. ``scan_skill_commands`` resets
-    # ``_skill_commands = {}`` internally and repopulates it.
-    new_commands = scan_skill_commands()
-
-    after = _snapshot(new_commands)
-
-    added_names = sorted(set(after) - set(before))
-    removed_names = sorted(set(before) - set(after))
-    unchanged = sorted(set(after) & set(before))
-
-    added = [{"name": n, "description": after[n]} for n in added_names]
-    # For removed skills, use the description we had cached pre-rescan
-    # (the skill file is gone so we can't re-read it).
-    removed = [{"name": n, "description": before[n]} for n in removed_names]
-
-    return {
-        "added": added,
-        "removed": removed,
-        "unchanged": unchanged,
-        "total": len(after),
-        "commands": len(new_commands),
-    }
-
-
 def resolve_skill_command_key(command: str) -> Optional[str]:
    """Resolve a user-typed /command to its canonical skill_cmds key.

@@ -393,14 +328,6 @@ def build_skill_invocation_message(
        return f"[Failed to load skill: {skill_info['name']}]"

    loaded_skill, skill_dir, skill_name = loaded
-
-    # Track active usage for Curator lifecycle management (#17782)
-    try:
-        from tools.skill_usage import bump_use
-        bump_use(skill_name)
-    except Exception:
-        pass  # Non-critical — skill invocation proceeds regardless
-
    activation_note = (
        f'[IMPORTANT: The user has invoked the "{skill_name}" skill, indicating they want '
        "you to follow its instructions. The full skill content is loaded below.]"
@@ -440,14 +367,6 @@ def build_preloaded_skills_prompt(
            continue

        loaded_skill, skill_dir, skill_name = loaded
-
-        # Track active usage for Curator lifecycle management (#17782)
-        try:
-            from tools.skill_usage import bump_use
-            bump_use(skill_name)
-        except Exception:
-            pass  # Non-critical
-
        activation_note = (
            f'[IMPORTANT: The user launched this CLI session with the "{skill_name}" skill '
            "preloaded. Treat its instructions as active guidance for the duration of this "
@@ -24,7 +24,7 @@ PLATFORM_MAP = {
    "windows": "win32",
 }

-EXCLUDED_SKILL_DIRS = frozenset((".git", ".github", ".hub", ".archive"))
+EXCLUDED_SKILL_DIRS = frozenset((".git", ".github", ".hub"))

 # ── Lazy YAML loader ─────────────────────────────────────────────────────

@@ -200,9 +200,6 @@ def get_external_skills_dirs() -> List[Path]:
    if not isinstance(raw_dirs, list):
        return []

-    from hermes_constants import get_hermes_home
-
-    hermes_home = get_hermes_home()
    local_skills = get_skills_dir().resolve()
    seen: Set[Path] = set()
    result: List[Path] = []
@@ -213,12 +210,7 @@ def get_external_skills_dirs() -> List[Path]:
            continue
        # Expand ~ and environment variables
        expanded = os.path.expanduser(os.path.expandvars(entry))
-        p = Path(expanded)
-        # Resolve relative paths against HERMES_HOME, not cwd
-        if not p.is_absolute():
-            p = (hermes_home / p).resolve()
-        else:
-            p = p.resolve()
+        p = Path(expanded).resolve()
        if p == local_skills:
            continue
        if p in seen:
@@ -440,7 +432,7 @@ def extract_skill_description(frontmatter: Dict[str, Any]) -> str:
 def iter_skill_index_files(skills_dir: Path, filename: str):
    """Walk skills_dir yielding sorted paths matching *filename*.

-    Excludes ``.git``, ``.github``, ``.hub``, ``.archive`` directories.
+    Excludes ``.git``, ``.github``, ``.hub`` directories.
    """
    matches = []
    for root, dirs, files in os.walk(skills_dir, followlinks=True):
@@ -30,12 +30,10 @@ def generate_title(
    assistant_response: str,
    timeout: float = 30.0,
    failure_callback: Optional[FailureCallback] = None,
-    main_runtime: dict = None,
 ) -> Optional[str]:
    """Generate a session title from the first exchange.

-    Uses the main runtime's model when available, falling back to the
-    auxiliary LLM client (cheapest/fastest available model).
+    Uses the auxiliary LLM client (cheapest/fastest available model).
    Returns the title string or None on failure.

    ``failure_callback`` is invoked with ``(task, exception)`` when the
@@ -59,7 +57,6 @@ def generate_title(
            max_tokens=500,
            temperature=0.3,
            timeout=timeout,
-            main_runtime=main_runtime,
        )
        title = (response.choices[0].message.content or "").strip()
        # Clean up: remove quotes, trailing punctuation, prefixes like "Title: "
@@ -89,7 +86,6 @@ def auto_title_session(
    user_message: str,
    assistant_response: str,
    failure_callback: Optional[FailureCallback] = None,
-    main_runtime: dict = None,
 ) -> None:
    """Generate and set a session title if one doesn't already exist.

@@ -111,7 +107,7 @@ def auto_title_session(
        return

    title = generate_title(
-        user_message, assistant_response, failure_callback=failure_callback, main_runtime=main_runtime
+        user_message, assistant_response, failure_callback=failure_callback
    )
    if not title:
        return
@@ -130,7 +126,6 @@ def maybe_auto_title(
    assistant_response: str,
    conversation_history: list,
    failure_callback: Optional[FailureCallback] = None,
-    main_runtime: dict = None,
 ) -> None:
    """Fire-and-forget title generation after the first exchange.

@@ -152,7 +147,7 @@ def maybe_auto_title(
    thread = threading.Thread(
        target=auto_title_session,
        args=(session_db, session_id, user_message, assistant_response),
-        kwargs={"failure_callback": failure_callback, "main_runtime": main_runtime},
+        kwargs={"failure_callback": failure_callback},
        daemon=True,
        name="auto-title",
    )
@@ -1,455 +0,0 @@
-"""Pure tool-call loop guardrail primitives.
-
-The controller in this module is intentionally side-effect free: it tracks
-per-turn tool-call observations and returns decisions. Runtime code owns whether
-those decisions become warning guidance, synthetic tool results, or controlled
-turn halts.
-"""
-
-from __future__ import annotations
-
-import hashlib
-import json
-from dataclasses import dataclass, field
-from typing import Any, Mapping
-
-from utils import safe_json_loads
-
-
-IDEMPOTENT_TOOL_NAMES = frozenset(
-    {
-        "read_file",
-        "search_files",
-        "web_search",
-        "web_extract",
-        "session_search",
-        "browser_snapshot",
-        "browser_console",
-        "browser_get_images",
-        "mcp_filesystem_read_file",
-        "mcp_filesystem_read_text_file",
-        "mcp_filesystem_read_multiple_files",
-        "mcp_filesystem_list_directory",
-        "mcp_filesystem_list_directory_with_sizes",
-        "mcp_filesystem_directory_tree",
-        "mcp_filesystem_get_file_info",
-        "mcp_filesystem_search_files",
-    }
-)
-
-MUTATING_TOOL_NAMES = frozenset(
-    {
-        "terminal",
-        "execute_code",
-        "write_file",
-        "patch",
-        "todo",
-        "memory",
-        "skill_manage",
-        "browser_click",
-        "browser_type",
-        "browser_press",
-        "browser_scroll",
-        "browser_navigate",
-        "send_message",
-        "cronjob",
-        "delegate_task",
-        "process",
-    }
-)
-
-
-@dataclass(frozen=True)
-class ToolCallGuardrailConfig:
-    """Thresholds for per-turn tool-call loop detection.
-
-    Warnings are enabled by default and never prevent tool execution. Hard stops
-    are explicit opt-in so interactive CLI/TUI sessions get a gentle nudge unless
-    the user enables circuit-breaker behavior in config.yaml.
-    """
-
-    warnings_enabled: bool = True
-    hard_stop_enabled: bool = False
-    exact_failure_warn_after: int = 2
-    exact_failure_block_after: int = 5
-    same_tool_failure_warn_after: int = 3
-    same_tool_failure_halt_after: int = 8
-    no_progress_warn_after: int = 2
-    no_progress_block_after: int = 5
-    idempotent_tools: frozenset[str] = field(default_factory=lambda: IDEMPOTENT_TOOL_NAMES)
-    mutating_tools: frozenset[str] = field(default_factory=lambda: MUTATING_TOOL_NAMES)
-
-    @classmethod
-    def from_mapping(cls, data: Mapping[str, Any] | None) -> "ToolCallGuardrailConfig":
-        """Build config from the `tool_loop_guardrails` config.yaml section."""
-        if not isinstance(data, Mapping):
-            return cls()
-
-        warn_after = data.get("warn_after")
-        if not isinstance(warn_after, Mapping):
-            warn_after = {}
-        hard_stop_after = data.get("hard_stop_after")
-        if not isinstance(hard_stop_after, Mapping):
-            hard_stop_after = {}
-
-        defaults = cls()
-        return cls(
-            warnings_enabled=_as_bool(data.get("warnings_enabled"), defaults.warnings_enabled),
-            hard_stop_enabled=_as_bool(data.get("hard_stop_enabled"), defaults.hard_stop_enabled),
-            exact_failure_warn_after=_positive_int(
-                warn_after.get("exact_failure", data.get("exact_failure_warn_after")),
-                defaults.exact_failure_warn_after,
-            ),
-            same_tool_failure_warn_after=_positive_int(
-                warn_after.get("same_tool_failure", data.get("same_tool_failure_warn_after")),
-                defaults.same_tool_failure_warn_after,
-            ),
-            no_progress_warn_after=_positive_int(
-                warn_after.get("idempotent_no_progress", data.get("no_progress_warn_after")),
-                defaults.no_progress_warn_after,
-            ),
-            exact_failure_block_after=_positive_int(
-                hard_stop_after.get("exact_failure", data.get("exact_failure_block_after")),
-                defaults.exact_failure_block_after,
-            ),
-            same_tool_failure_halt_after=_positive_int(
-                hard_stop_after.get("same_tool_failure", data.get("same_tool_failure_halt_after")),
-                defaults.same_tool_failure_halt_after,
-            ),
-            no_progress_block_after=_positive_int(
-                hard_stop_after.get("idempotent_no_progress", data.get("no_progress_block_after")),
-                defaults.no_progress_block_after,
-            ),
-        )
-
-
-@dataclass(frozen=True)
-class ToolCallSignature:
-    """Stable, non-reversible identity for a tool name plus canonical args."""
-
-    tool_name: str
-    args_hash: str
-
-    @classmethod
-    def from_call(cls, tool_name: str, args: Mapping[str, Any] | None) -> "ToolCallSignature":
-        canonical = canonical_tool_args(args or {})
-        return cls(tool_name=tool_name, args_hash=_sha256(canonical))
-
-    def to_metadata(self) -> dict[str, str]:
-        """Return public metadata without raw argument values."""
-        return {"tool_name": self.tool_name, "args_hash": self.args_hash}
-
-
-@dataclass(frozen=True)
-class ToolGuardrailDecision:
-    """Decision returned by the tool-call guardrail controller."""
-
-    action: str = "allow"  # allow | warn | block | halt
-    code: str = "allow"
-    message: str = ""
-    tool_name: str = ""
-    count: int = 0
-    signature: ToolCallSignature | None = None
-
-    @property
-    def allows_execution(self) -> bool:
-        return self.action in {"allow", "warn"}
-
-    @property
-    def should_halt(self) -> bool:
-        return self.action in {"block", "halt"}
-
-    def to_metadata(self) -> dict[str, Any]:
-        data: dict[str, Any] = {
-            "action": self.action,
-            "code": self.code,
-            "message": self.message,
-            "tool_name": self.tool_name,
-            "count": self.count,
-        }
-        if self.signature is not None:
-            data["signature"] = self.signature.to_metadata()
-        return data
-
-
-def canonical_tool_args(args: Mapping[str, Any]) -> str:
-    """Return sorted compact JSON for parsed tool arguments."""
-    if not isinstance(args, Mapping):
-        raise TypeError(f"tool args must be a mapping, got {type(args).__name__}")
-    return json.dumps(
-        args,
-        ensure_ascii=False,
-        sort_keys=True,
-        separators=(",", ":"),
-        default=str,
-    )
-
-
-def classify_tool_failure(tool_name: str, result: str | None) -> tuple[bool, str]:
-    """Safety-fallback classifier used only when callers don't pass ``failed``.
-
-    Mirrors ``agent.display._detect_tool_failure`` exactly so the guardrail
-    never disagrees with the CLI's user-visible ``[error]`` tag. Production
-    callers in ``run_agent.py`` always pass an explicit ``failed=`` derived
-    from ``_detect_tool_failure``; this function exists so standalone callers
-    (tests, tooling) still get consistent behavior.
-    """
-    if result is None:
-        return False, ""
-
-    if tool_name == "terminal":
-        data = safe_json_loads(result)
-        if isinstance(data, dict):
-            exit_code = data.get("exit_code")
-            if exit_code is not None and exit_code != 0:
-                return True, f" [exit {exit_code}]"
-        return False, ""
-
-    if tool_name == "memory":
-        data = safe_json_loads(result)
-        if isinstance(data, dict):
-            if data.get("success") is False and "exceed the limit" in data.get("error", ""):
-                return True, " [full]"
-
-    lower = result[:500].lower()
-    if '"error"' in lower or '"failed"' in lower or result.startswith("Error"):
-        return True, " [error]"
-
-    return False, ""
-
-
-class ToolCallGuardrailController:
-    """Per-turn controller for repeated failed/non-progressing tool calls."""
-
-    def __init__(self, config: ToolCallGuardrailConfig | None = None):
-        self.config = config or ToolCallGuardrailConfig()
-        self.reset_for_turn()
-
-    def reset_for_turn(self) -> None:
-        self._exact_failure_counts: dict[ToolCallSignature, int] = {}
-        self._same_tool_failure_counts: dict[str, int] = {}
-        self._no_progress: dict[ToolCallSignature, tuple[str, int]] = {}
-        self._halt_decision: ToolGuardrailDecision | None = None
-
-    @property
-    def halt_decision(self) -> ToolGuardrailDecision | None:
-        return self._halt_decision
-
-    def before_call(self, tool_name: str, args: Mapping[str, Any] | None) -> ToolGuardrailDecision:
-        signature = ToolCallSignature.from_call(tool_name, _coerce_args(args))
-        if not self.config.hard_stop_enabled:
-            return ToolGuardrailDecision(tool_name=tool_name, signature=signature)
-
-        exact_count = self._exact_failure_counts.get(signature, 0)
-        if exact_count >= self.config.exact_failure_block_after:
-            decision = ToolGuardrailDecision(
-                action="block",
-                code="repeated_exact_failure_block",
-                message=(
-                    f"Blocked {tool_name}: the same tool call failed {exact_count} "
-                    "times with identical arguments. Stop retrying it unchanged; "
-                    "change strategy or explain the blocker."
-                ),
-                tool_name=tool_name,
-                count=exact_count,
-                signature=signature,
-            )
-            self._halt_decision = decision
-            return decision
-
-        if self._is_idempotent(tool_name):
-            record = self._no_progress.get(signature)
-            if record is not None:
-                _result_hash, repeat_count = record
-                if repeat_count >= self.config.no_progress_block_after:
-                    decision = ToolGuardrailDecision(
-                        action="block",
-                        code="idempotent_no_progress_block",
-                        message=(
-                            f"Blocked {tool_name}: this read-only call returned the same "
-                            f"result {repeat_count} times. Stop repeating it unchanged; "
-                            "use the result already provided or try a different query."
-                        ),
-                        tool_name=tool_name,
-                        count=repeat_count,
-                        signature=signature,
-                    )
-                    self._halt_decision = decision
-                    return decision
-
-        return ToolGuardrailDecision(tool_name=tool_name, signature=signature)
-
-    def after_call(
-        self,
-        tool_name: str,
-        args: Mapping[str, Any] | None,
-        result: str | None,
-        *,
-        failed: bool | None = None,
-    ) -> ToolGuardrailDecision:
-        args = _coerce_args(args)
-        signature = ToolCallSignature.from_call(tool_name, args)
-        if failed is None:
-            failed, _ = classify_tool_failure(tool_name, result)
-
-        if failed:
-            exact_count = self._exact_failure_counts.get(signature, 0) + 1
-            self._exact_failure_counts[signature] = exact_count
-            self._no_progress.pop(signature, None)
-
-            same_count = self._same_tool_failure_counts.get(tool_name, 0) + 1
-            self._same_tool_failure_counts[tool_name] = same_count
-
-            if self.config.hard_stop_enabled and same_count >= self.config.same_tool_failure_halt_after:
-                decision = ToolGuardrailDecision(
-                    action="halt",
-                    code="same_tool_failure_halt",
-                    message=(
-                        f"Stopped {tool_name}: it failed {same_count} times this turn. "
-                        "Stop retrying the same failing tool path and choose a different approach."
-                    ),
-                    tool_name=tool_name,
-                    count=same_count,
-                    signature=signature,
-                )
-                self._halt_decision = decision
-                return decision
-
-            if self.config.warnings_enabled and exact_count >= self.config.exact_failure_warn_after:
-                return ToolGuardrailDecision(
-                    action="warn",
-                    code="repeated_exact_failure_warning",
-                    message=(
-                        f"{tool_name} has failed {exact_count} times with identical arguments. "
-                        "This looks like a loop; inspect the error and change strategy "
-                        "instead of retrying it unchanged."
-                    ),
-                    tool_name=tool_name,
-                    count=exact_count,
-                    signature=signature,
-                )
-
-            if self.config.warnings_enabled and same_count >= self.config.same_tool_failure_warn_after:
-                return ToolGuardrailDecision(
-                    action="warn",
-                    code="same_tool_failure_warning",
-                    message=(
-                        f"{tool_name} has failed {same_count} times this turn. "
-                        "This looks like a loop; change approach before retrying."
-                    ),
-                    tool_name=tool_name,
-                    count=same_count,
-                    signature=signature,
-                )
-
-            return ToolGuardrailDecision(tool_name=tool_name, count=exact_count, signature=signature)
-
-        self._exact_failure_counts.pop(signature, None)
-        self._same_tool_failure_counts.pop(tool_name, None)
-
-        if not self._is_idempotent(tool_name):
-            self._no_progress.pop(signature, None)
-            return ToolGuardrailDecision(tool_name=tool_name, signature=signature)
-
-        result_hash = _result_hash(result)
-        previous = self._no_progress.get(signature)
-        repeat_count = 1
-        if previous is not None and previous[0] == result_hash:
-            repeat_count = previous[1] + 1
-        self._no_progress[signature] = (result_hash, repeat_count)
-
-        if self.config.warnings_enabled and repeat_count >= self.config.no_progress_warn_after:
-            return ToolGuardrailDecision(
-                action="warn",
-                code="idempotent_no_progress_warning",
-                message=(
-                    f"{tool_name} returned the same result {repeat_count} times. "
-                    "Use the result already provided or change the query instead of "
-                    "repeating it unchanged."
-                ),
-                tool_name=tool_name,
-                count=repeat_count,
-                signature=signature,
-            )
-
-        return ToolGuardrailDecision(tool_name=tool_name, count=repeat_count, signature=signature)
-
-    def _is_idempotent(self, tool_name: str) -> bool:
-        if tool_name in self.config.mutating_tools:
-            return False
-        return tool_name in self.config.idempotent_tools
-
-
-def toolguard_synthetic_result(decision: ToolGuardrailDecision) -> str:
-    """Build a synthetic role=tool content string for a blocked tool call."""
-    return json.dumps(
-        {
-            "error": decision.message,
-            "guardrail": decision.to_metadata(),
-        },
-        ensure_ascii=False,
-    )
-
-
-def append_toolguard_guidance(result: str, decision: ToolGuardrailDecision) -> str:
-    """Append runtime guidance to the current tool result content."""
-    if decision.action not in {"warn", "halt"} or not decision.message:
-        return result
-    label = "Tool loop hard stop" if decision.action == "halt" else "Tool loop warning"
-    suffix = (
-        f"\n\n[{label}: "
-        f"{decision.code}; count={decision.count}; {decision.message}]"
-    )
-    return (result or "") + suffix
-
-
-def _coerce_args(args: Mapping[str, Any] | None) -> Mapping[str, Any]:
-    return args if isinstance(args, Mapping) else {}
-
-
-def _result_hash(result: str | None) -> str:
-    parsed = safe_json_loads(result or "")
-    if parsed is not None:
-        try:
-            canonical = json.dumps(
-                parsed,
-                ensure_ascii=False,
-                sort_keys=True,
-                separators=(",", ":"),
-                default=str,
-            )
-        except TypeError:
-            canonical = str(parsed)
-    else:
-        canonical = result or ""
-    return _sha256(canonical)
-
-
-def _as_bool(value: Any, default: bool) -> bool:
-    if value is None:
-        return default
-    if isinstance(value, bool):
-        return value
-    if isinstance(value, (int, float)):
-        return bool(value)
-    if isinstance(value, str):
-        lowered = value.strip().lower()
-        if lowered in {"1", "true", "yes", "on", "enabled"}:
-            return True
-        if lowered in {"0", "false", "no", "off", "disabled"}:
-            return False
-    return default
-
-
-def _positive_int(value: Any, default: int) -> int:
-    if value is None:
-        return default
-    try:
-        parsed = int(value)
-    except (TypeError, ValueError):
-        return default
-    return parsed if parsed >= 1 else default
-
-
-def _sha256(value: str) -> str:
-    return hashlib.sha256(value.encode("utf-8")).hexdigest()
@@ -6,9 +6,16 @@ Usage:
    result = transport.normalize_response(raw_response)
 """

-from agent.transports.types import NormalizedResponse, ToolCall, Usage, build_tool_call, map_finish_reason  # noqa: F401
+from agent.transports.types import (
+    NormalizedResponse,
+    ToolCall,
+    Usage,
+    build_tool_call,
+    map_finish_reason,
+)  # noqa: F401

 _REGISTRY: dict = {}
+_discovered: bool = False


 def register_transport(api_mode: str, transport_cls: type) -> None:
@@ -23,6 +30,9 @@ def get_transport(api_mode: str):
    This allows gradual migration — call sites can check for None
    and fall back to the legacy code path.
    """
+    global _discovered
+    if not _discovered:
+        _discover_transports()
    cls = _REGISTRY.get(api_mode)
    if cls is None:
        # The registry can be partially populated when a specific transport
@@ -38,6 +48,8 @@ def get_transport(api_mode: str):

 def _discover_transports() -> None:
    """Import all transport modules to trigger auto-registration."""
+    global _discovered
+    _discovered = True
    try:
        import agent.transports.anthropic  # noqa: F401
    except ImportError:
@@ -58,7 +58,6 @@ class AnthropicTransport(ProviderTransport):
            context_length: int | None
            base_url: str | None
            fast_mode: bool
-            drop_context_1m_beta: bool
        """
        from agent.anthropic_adapter import build_anthropic_kwargs

@@ -74,7 +73,6 @@ class AnthropicTransport(ProviderTransport):
            context_length=params.get("context_length"),
            base_url=params.get("base_url"),
            fast_mode=params.get("fast_mode", False),
-            drop_context_1m_beta=params.get("drop_context_1m_beta", False),
        )

    def normalize_response(self, response: Any, **kwargs) -> NormalizedResponse:
@@ -10,95 +10,14 @@ reasoning configuration, temperature handling, and extra_body assembly.
 """

 import copy
-from typing import Any, Dict, List, Optional
+from typing import Any

-from agent.lmstudio_reasoning import resolve_lmstudio_effort
 from agent.moonshot_schema import is_moonshot_model, sanitize_moonshot_tools
 from agent.prompt_builder import DEVELOPER_ROLE_MODELS
 from agent.transports.base import ProviderTransport
 from agent.transports.types import NormalizedResponse, ToolCall, Usage


-def _build_gemini_thinking_config(model: str, reasoning_config: dict | None) -> dict | None:
-    """Translate Hermes/OpenRouter-style reasoning config to Gemini thinkingConfig."""
-    if reasoning_config is None or not isinstance(reasoning_config, dict):
-        return None
-
-    normalized_model = (model or "").strip().lower()
-    if normalized_model.startswith("google/"):
-        normalized_model = normalized_model.split("/", 1)[1]
-
-    # ``thinking_config`` is a Gemini-only request parameter. The same
-    # ``gemini`` provider also serves Gemma (and historically PaLM/Bard);
-    # those reject the field with HTTP 400 "Unknown name 'thinking_config':
-    # Cannot find field" — including the polite ``{"includeThoughts": False}``
-    # form. Omit the field entirely on non-Gemini models. (#17426)
-    if not normalized_model.startswith("gemini"):
-        return None
-
-    if reasoning_config.get("enabled") is False:
-        # Gemini can hide thought parts even when internal thinking still
-        # happens; omit thinkingLevel to avoid model-specific validation quirks.
-        return {"includeThoughts": False}
-
-    effort = str(reasoning_config.get("effort", "medium") or "medium").strip().lower()
-    if effort == "none":
-        return {"includeThoughts": False}
-
-    thinking_config: Dict[str, Any] = {"includeThoughts": True}
-
-    # Gemini 2.5 accepts thinkingBudget; don't guess a budget from Hermes'
-    # coarse effort levels. ``includeThoughts`` alone is enough to surface
-    # thought parts without risking request validation errors.
-    if normalized_model.startswith("gemini-2.5-"):
-        return thinking_config
-
-    if effort not in {"minimal", "low", "medium", "high", "xhigh"}:
-        effort = "medium"
-
-    # Gemini 3 Flash documents low/medium/high thinking levels; Gemini 3 Pro
-    # is stricter (low/high). Clamp Hermes' wider effort set to what each
-    # family accepts so we never forward an undocumented level verbatim.
-    if normalized_model.startswith(("gemini-3", "gemini-3.1")):
-        if "flash" in normalized_model:
-            if effort in {"minimal", "low"}:
-                thinking_config["thinkingLevel"] = "low"
-            elif effort in {"high", "xhigh"}:
-                thinking_config["thinkingLevel"] = "high"
-            else:
-                thinking_config["thinkingLevel"] = "medium"
-        elif "pro" in normalized_model:
-            thinking_config["thinkingLevel"] = (
-                "high" if effort in {"high", "xhigh"} else "low"
-            )
-
-    return thinking_config
-
-
-def _snake_case_gemini_thinking_config(config: dict | None) -> dict | None:
-    """Convert Gemini thinking config keys to the OpenAI-compat field names."""
-    if not isinstance(config, dict) or not config:
-        return None
-
-    translated: Dict[str, Any] = {}
-    if isinstance(config.get("includeThoughts"), bool):
-        translated["include_thoughts"] = config["includeThoughts"]
-    if isinstance(config.get("thinkingLevel"), str) and config["thinkingLevel"].strip():
-        translated["thinking_level"] = config["thinkingLevel"].strip().lower()
-    if isinstance(config.get("thinkingBudget"), (int, float)):
-        translated["thinking_budget"] = int(config["thinkingBudget"])
-    return translated or None
-
-
-def _is_gemini_openai_compat_base_url(base_url: Any) -> bool:
-    normalized = str(base_url or "").strip().rstrip("/").lower()
-    if not normalized:
-        return False
-    if "generativelanguage.googleapis.com" not in normalized:
-        return False
-    return normalized.endswith("/openai")
-
-
 class ChatCompletionsTransport(ProviderTransport):
    """Transport for api_mode='chat_completions'.

@@ -109,7 +28,9 @@ class ChatCompletionsTransport(ProviderTransport):
    def api_mode(self) -> str:
        return "chat_completions"

-    def convert_messages(self, messages: List[Dict[str, Any]], **kwargs) -> List[Dict[str, Any]]:
+    def convert_messages(
+        self, messages: list[dict[str, Any]], **kwargs
+    ) -> list[dict[str, Any]]:
        """Messages are already in OpenAI format — sanitize Codex leaks only.

        Strips Codex Responses API fields (``codex_reasoning_items`` /
@@ -126,7 +47,9 @@ class ChatCompletionsTransport(ProviderTransport):
            tool_calls = msg.get("tool_calls")
            if isinstance(tool_calls, list):
                for tc in tool_calls:
-                    if isinstance(tc, dict) and ("call_id" in tc or "response_item_id" in tc):
+                    if isinstance(tc, dict) and (
+                        "call_id" in tc or "response_item_id" in tc
+                    ):
                        needs_sanitize = True
                        break
                if needs_sanitize:
@@ -149,78 +72,52 @@ class ChatCompletionsTransport(ProviderTransport):
                        tc.pop("response_item_id", None)
        return sanitized

-    def convert_tools(self, tools: List[Dict[str, Any]]) -> List[Dict[str, Any]]:
+    def convert_tools(self, tools: list[dict[str, Any]]) -> list[dict[str, Any]]:
        """Tools are already in OpenAI format — identity."""
        return tools

    def build_kwargs(
        self,
        model: str,
-        messages: List[Dict[str, Any]],
-        tools: Optional[List[Dict[str, Any]]] = None,
+        messages: list[dict[str, Any]],
+        tools: list[dict[str, Any]] | None = None,
        **params,
-    ) -> Dict[str, Any]:
+    ) -> dict[str, Any]:
        """Build chat.completions.create() kwargs.

-        This is the most complex transport method — it handles ~16 providers
-        via params rather than subclasses.
-
-        params:
+        params (all optional):
            timeout: float — API call timeout
            max_tokens: int | None — user-configured max tokens
-            ephemeral_max_output_tokens: int | None — one-shot override (error recovery)
+            ephemeral_max_output_tokens: int | None — one-shot override
            max_tokens_param_fn: callable — returns {max_tokens: N} or {max_completion_tokens: N}
            reasoning_config: dict | None
            request_overrides: dict | None
            session_id: str | None
-            qwen_session_metadata: dict | None — {sessionId, promptId} precomputed
            model_lower: str — lowercase model name for pattern matching
-            # Provider detection flags (all optional, default False)
-            is_openrouter: bool
-            is_nous: bool
-            is_qwen_portal: bool
-            is_github_models: bool
-            is_nvidia_nim: bool
-            is_kimi: bool
-            is_lmstudio: bool
-            is_custom_provider: bool
-            ollama_num_ctx: int | None
-            # Provider routing
-            provider_preferences: dict | None
-            # Qwen-specific
-            qwen_prepare_fn: callable | None — runs AFTER codex sanitization
-            qwen_prepare_inplace_fn: callable | None — in-place variant for deepcopied lists
-            # Temperature
-            fixed_temperature: Any — from _fixed_temperature_for_model()
-            omit_temperature: bool
-            # Reasoning
+            # Provider profile path (all per-provider quirks live in providers/)
+            provider_profile: ProviderProfile | None — when present, delegates to
+                _build_kwargs_from_profile(); all flag params below are bypassed.
+            # Remaining flags — only used by the legacy fallback for unregistered
+            # providers (i.e. get_provider_profile() returned None).  Known
+            # providers all go through provider_profile.
+            qwen_session_metadata: dict | None
            supports_reasoning: bool
-            github_reasoning_extra: dict | None
-            lmstudio_reasoning_options: list[str] | None  # raw allowed_options from /api/v1/models
-            # Claude on OpenRouter/Nous max output
            anthropic_max_output: int | None
-            # Extra
-            extra_body_additions: dict | None — pre-built extra_body entries
+            extra_body_additions: dict | None
        """
        # Codex sanitization: drop reasoning_items / call_id / response_item_id
        sanitized = self.convert_messages(messages)

-        # Qwen portal prep AFTER codex sanitization.  If sanitize already
-        # deepcopied, reuse that copy via the in-place variant to avoid a
-        # second deepcopy.
-        is_qwen = params.get("is_qwen_portal", False)
-        if is_qwen:
-            qwen_prep = params.get("qwen_prepare_fn")
-            qwen_prep_inplace = params.get("qwen_prepare_inplace_fn")
-            if sanitized is messages:
-                if qwen_prep is not None:
-                    sanitized = qwen_prep(sanitized)
-            else:
-                # Already deepcopied — transform in place
-                if qwen_prep_inplace is not None:
-                    qwen_prep_inplace(sanitized)
-                elif qwen_prep is not None:
-                    sanitized = qwen_prep(sanitized)
+        # ── Provider profile: single-path when present ──────────────────
+        _profile = params.get("provider_profile")
+        if _profile:
+            return self._build_kwargs_from_profile(
+                _profile, model, sanitized, tools, params
+            )
+
+        # ── Legacy fallback (unregistered / unknown provider) ───────────
+        # Reached only when get_provider_profile() returned None.
+        # Known providers always go through the profile path above.

        # Developer role swap for GPT-5/Codex models
        model_lower = params.get("model_lower", (model or "").lower())
@@ -233,7 +130,7 @@ class ChatCompletionsTransport(ProviderTransport):
            sanitized = list(sanitized)
            sanitized[0] = {**sanitized[0], "role": "developer"}

-        api_kwargs: Dict[str, Any] = {
+        api_kwargs: dict[str, Any] = {
            "model": model,
            "messages": sanitized,
        }
@@ -242,19 +139,6 @@ class ChatCompletionsTransport(ProviderTransport):
        if timeout is not None:
            api_kwargs["timeout"] = timeout

-        # Temperature
-        fixed_temp = params.get("fixed_temperature")
-        omit_temp = params.get("omit_temperature", False)
-        if omit_temp:
-            api_kwargs.pop("temperature", None)
-        elif fixed_temp is not None:
-            api_kwargs["temperature"] = fixed_temp
-
-        # Qwen metadata (caller precomputes {sessionId, promptId})
-        qwen_meta = params.get("qwen_session_metadata")
-        if qwen_meta and is_qwen:
-            api_kwargs["metadata"] = qwen_meta
-
        # Tools
        if tools:
            # Moonshot/Kimi uses a stricter flavored JSON Schema.  Rewriting
@@ -269,144 +153,24 @@ class ChatCompletionsTransport(ProviderTransport):
        ephemeral = params.get("ephemeral_max_output_tokens")
        max_tokens = params.get("max_tokens")
        anthropic_max_out = params.get("anthropic_max_output")
-        is_nvidia_nim = params.get("is_nvidia_nim", False)
-        is_kimi = params.get("is_kimi", False)
-        is_tokenhub = params.get("is_tokenhub", False)
-        reasoning_config = params.get("reasoning_config")

        if ephemeral is not None and max_tokens_fn:
            api_kwargs.update(max_tokens_fn(ephemeral))
        elif max_tokens is not None and max_tokens_fn:
            api_kwargs.update(max_tokens_fn(max_tokens))
-        elif is_nvidia_nim and max_tokens_fn:
-            api_kwargs.update(max_tokens_fn(16384))
-        elif is_qwen and max_tokens_fn:
-            api_kwargs.update(max_tokens_fn(65536))
-        elif is_kimi and max_tokens_fn:
-            # Kimi/Moonshot: 32000 matches Kimi CLI's default
-            api_kwargs.update(max_tokens_fn(32000))
        elif anthropic_max_out is not None:
            api_kwargs["max_tokens"] = anthropic_max_out

-        # Kimi: top-level reasoning_effort (unless thinking disabled)
-        if is_kimi:
-            _kimi_thinking_off = bool(
-                reasoning_config
-                and isinstance(reasoning_config, dict)
-                and reasoning_config.get("enabled") is False
-            )
-            if not _kimi_thinking_off:
-                _kimi_effort = "medium"
-                if reasoning_config and isinstance(reasoning_config, dict):
-                    _e = (reasoning_config.get("effort") or "").strip().lower()
-                    if _e in ("low", "medium", "high"):
-                        _kimi_effort = _e
-                api_kwargs["reasoning_effort"] = _kimi_effort
-
-        # Tencent TokenHub: top-level reasoning_effort (unless thinking disabled)
-        if is_tokenhub:
-            _tokenhub_thinking_off = bool(
-                reasoning_config
-                and isinstance(reasoning_config, dict)
-                and reasoning_config.get("enabled") is False
-            )
-            if not _tokenhub_thinking_off:
-                _tokenhub_effort = "high"
-                if reasoning_config and isinstance(reasoning_config, dict):
-                    _e = (reasoning_config.get("effort") or "").strip().lower()
-                    if _e in ("low", "medium", "high"):
-                        _tokenhub_effort = _e
-                api_kwargs["reasoning_effort"] = _tokenhub_effort
-
-        # LM Studio: top-level reasoning_effort. Only emit when the model
-        # declares reasoning support via /api/v1/models capabilities (gated
-        # upstream by params["supports_reasoning"]). resolve_lmstudio_effort
-        # is shared with run_agent's summary path so both stay in sync.
-        if params.get("is_lmstudio", False) and params.get("supports_reasoning", False):
-            _lm_effort = resolve_lmstudio_effort(
-                reasoning_config,
-                params.get("lmstudio_reasoning_options"),
-            )
-            if _lm_effort is not None:
-                api_kwargs["reasoning_effort"] = _lm_effort
-
        # extra_body assembly
-        extra_body: Dict[str, Any] = {}
+        extra_body: dict[str, Any] = {}

-        is_openrouter = params.get("is_openrouter", False)
-        is_nous = params.get("is_nous", False)
-        is_github_models = params.get("is_github_models", False)
-        provider_name = str(params.get("provider_name") or "").strip().lower()
-        base_url = params.get("base_url")
-
-        provider_prefs = params.get("provider_preferences")
-        if provider_prefs and is_openrouter:
-            extra_body["provider"] = provider_prefs
-
-        # Kimi extra_body.thinking
-        if is_kimi:
-            _kimi_thinking_enabled = True
-            if reasoning_config and isinstance(reasoning_config, dict):
-                if reasoning_config.get("enabled") is False:
-                    _kimi_thinking_enabled = False
-            extra_body["thinking"] = {
-                "type": "enabled" if _kimi_thinking_enabled else "disabled",
-            }
-
-        # Reasoning. LM Studio is handled above via top-level reasoning_effort,
-        # so skip emitting extra_body.reasoning for it.
-        if params.get("supports_reasoning", False) and not params.get("is_lmstudio", False):
-            if is_github_models:
-                gh_reasoning = params.get("github_reasoning_extra")
-                if gh_reasoning is not None:
-                    extra_body["reasoning"] = gh_reasoning
+        # Generic reasoning passthrough for unknown providers
+        if params.get("supports_reasoning", False):
+            reasoning_config = params.get("reasoning_config")
+            if reasoning_config is not None:
+                extra_body["reasoning"] = dict(reasoning_config)
            else:
-                if reasoning_config is not None:
-                    rc = dict(reasoning_config)
-                    if is_nous and rc.get("enabled") is False:
-                        pass  # omit for Nous when disabled
-                    else:
-                        extra_body["reasoning"] = rc
-                else:
-                    extra_body["reasoning"] = {"enabled": True, "effort": "medium"}
-
-        if is_nous:
-            extra_body["tags"] = ["product=hermes-agent"]
-
-        # Ollama num_ctx
-        ollama_ctx = params.get("ollama_num_ctx")
-        if ollama_ctx:
-            options = extra_body.get("options", {})
-            options["num_ctx"] = ollama_ctx
-            extra_body["options"] = options
-
-        # Ollama/custom think=false
-        if params.get("is_custom_provider", False):
-            if reasoning_config and isinstance(reasoning_config, dict):
-                _effort = (reasoning_config.get("effort") or "").strip().lower()
-                _enabled = reasoning_config.get("enabled", True)
-                if _effort == "none" or _enabled is False:
-                    extra_body["think"] = False
-
-        if is_qwen:
-            extra_body["vl_high_resolution_images"] = True
-
-        if provider_name == "gemini":
-            raw_thinking_config = _build_gemini_thinking_config(model, reasoning_config)
-            if _is_gemini_openai_compat_base_url(base_url):
-                thinking_config = _snake_case_gemini_thinking_config(raw_thinking_config)
-                if thinking_config:
-                    openai_compat_extra = extra_body.get("extra_body", {})
-                    google_extra = openai_compat_extra.get("google", {})
-                    google_extra["thinking_config"] = thinking_config
-                    openai_compat_extra["google"] = google_extra
-                    extra_body["extra_body"] = openai_compat_extra
-            elif raw_thinking_config:
-                extra_body["thinking_config"] = raw_thinking_config
-        elif provider_name == "google-gemini-cli":
-            thinking_config = _build_gemini_thinking_config(model, reasoning_config)
-            if thinking_config:
-                extra_body["thinking_config"] = thinking_config
+                extra_body["reasoning"] = {"enabled": True, "effort": "medium"}

        # Merge any pre-built extra_body additions
        additions = params.get("extra_body_additions")
@@ -423,6 +187,117 @@ class ChatCompletionsTransport(ProviderTransport):

        return api_kwargs

+    def _build_kwargs_from_profile(self, profile, model, sanitized, tools, params):
+        """Build API kwargs using a ProviderProfile — single path, no legacy flags.
+
+        This method replaces the entire flag-based kwargs assembly when a
+        provider_profile is passed. Every quirk comes from the profile object.
+        """
+        from providers.base import OMIT_TEMPERATURE
+
+        # Message preprocessing
+        sanitized = profile.prepare_messages(sanitized)
+
+        # Developer role swap — model-name-based, applies to all providers
+        _model_lower = (model or "").lower()
+        if (
+            sanitized
+            and isinstance(sanitized[0], dict)
+            and sanitized[0].get("role") == "system"
+            and any(p in _model_lower for p in DEVELOPER_ROLE_MODELS)
+        ):
+            sanitized = list(sanitized)
+            sanitized[0] = {**sanitized[0], "role": "developer"}
+
+        api_kwargs: dict[str, Any] = {
+            "model": model,
+            "messages": sanitized,
+        }
+
+        # Temperature
+        if profile.fixed_temperature is OMIT_TEMPERATURE:
+            pass  # Don't include temperature at all
+        elif profile.fixed_temperature is not None:
+            api_kwargs["temperature"] = profile.fixed_temperature
+        else:
+            # Use caller's temperature if provided
+            temp = params.get("temperature")
+            if temp is not None:
+                api_kwargs["temperature"] = temp
+
+        # Timeout
+        timeout = params.get("timeout")
+        if timeout is not None:
+            api_kwargs["timeout"] = timeout
+
+        # Tools — apply Moonshot/Kimi schema sanitization regardless of path
+        if tools:
+            if is_moonshot_model(model):
+                tools = sanitize_moonshot_tools(tools)
+            api_kwargs["tools"] = tools
+
+        # max_tokens resolution — priority: ephemeral > user > profile default
+        max_tokens_fn = params.get("max_tokens_param_fn")
+        ephemeral = params.get("ephemeral_max_output_tokens")
+        user_max = params.get("max_tokens")
+        anthropic_max = params.get("anthropic_max_output")
+
+        if ephemeral is not None and max_tokens_fn:
+            api_kwargs.update(max_tokens_fn(ephemeral))
+        elif user_max is not None and max_tokens_fn:
+            api_kwargs.update(max_tokens_fn(user_max))
+        elif profile.default_max_tokens and max_tokens_fn:
+            api_kwargs.update(max_tokens_fn(profile.default_max_tokens))
+        elif anthropic_max is not None:
+            api_kwargs["max_tokens"] = anthropic_max
+
+        # Provider-specific api_kwargs extras (reasoning_effort, metadata, etc.)
+        reasoning_config = params.get("reasoning_config")
+        extra_body_from_profile, top_level_from_profile = (
+            profile.build_api_kwargs_extras(
+                reasoning_config=reasoning_config,
+                supports_reasoning=params.get("supports_reasoning", False),
+                qwen_session_metadata=params.get("qwen_session_metadata"),
+                model=model,
+                ollama_num_ctx=params.get("ollama_num_ctx"),
+            )
+        )
+        api_kwargs.update(top_level_from_profile)
+
+        # extra_body assembly
+        extra_body: dict[str, Any] = {}
+
+        # Profile's extra_body (tags, provider prefs, vl_high_resolution, etc.)
+        profile_body = profile.build_extra_body(
+            session_id=params.get("session_id"),
+            provider_preferences=params.get("provider_preferences"),
+        )
+        if profile_body:
+            extra_body.update(profile_body)
+
+        # Profile's reasoning/thinking extra_body entries
+        if extra_body_from_profile:
+            extra_body.update(extra_body_from_profile)
+
+        # Merge any pre-built extra_body additions from the caller
+        additions = params.get("extra_body_additions")
+        if additions:
+            extra_body.update(additions)
+
+        # Request overrides (user config)
+        overrides = params.get("request_overrides")
+        if overrides:
+            for k, v in overrides.items():
+                if k == "extra_body" and isinstance(v, dict):
+                    extra_body.update(v)
+                else:
+                    api_kwargs[k] = v
+
+        if extra_body:
+            api_kwargs["extra_body"] = extra_body
+
+        return api_kwargs
+
    def normalize_response(self, response: Any, **kwargs) -> NormalizedResponse:
        """Normalize OpenAI ChatCompletion to NormalizedResponse.

@@ -444,7 +319,7 @@ class ChatCompletionsTransport(ProviderTransport):
                # Gemini 3 thinking models attach extra_content with
                # thought_signature — without replay on the next turn the API
                # rejects the request with 400.
-                tc_provider_data: Dict[str, Any] = {}
+                tc_provider_data: dict[str, Any] = {}
                extra = getattr(tc, "extra_content", None)
                if extra is None and hasattr(tc, "model_extra"):
                    extra = (tc.model_extra or {}).get("extra_content")
@@ -455,12 +330,14 @@ class ChatCompletionsTransport(ProviderTransport):
                        except Exception:
                            pass
                    tc_provider_data["extra_content"] = extra
-                tool_calls.append(ToolCall(
-                    id=tc.id,
-                    name=tc.function.name,
-                    arguments=tc.function.arguments,
-                    provider_data=tc_provider_data or None,
-                ))
+                tool_calls.append(
+                    ToolCall(
+                        id=tc.id,
+                        name=tc.function.name,
+                        arguments=tc.function.arguments,
+                        provider_data=tc_provider_data or None,
+                    )
+                )

        usage = None
        if hasattr(response, "usage") and response.usage:
@@ -477,13 +354,9 @@ class ChatCompletionsTransport(ProviderTransport):
        # so keep them apart in provider_data rather than merging.
        reasoning = getattr(msg, "reasoning", None)
        reasoning_content = getattr(msg, "reasoning_content", None)
-        if reasoning_content is None and hasattr(msg, "model_extra"):
-            model_extra = getattr(msg, "model_extra", None) or {}
-            if isinstance(model_extra, dict) and "reasoning_content" in model_extra:
-                reasoning_content = model_extra["reasoning_content"]

-        provider_data: Dict[str, Any] = {}
-        if reasoning_content is not None:
+        provider_data: dict[str, Any] = {}
+        if reasoning_content:
            provider_data["reasoning_content"] = reasoning_content
        rd = getattr(msg, "reasoning_details", None)
        if rd:
@@ -508,7 +381,7 @@ class ChatCompletionsTransport(ProviderTransport):
            return False
        return True

-    def extract_cache_stats(self, response: Any) -> Optional[Dict[str, int]]:
+    def extract_cache_stats(self, response: Any) -> dict[str, int] | None:
        """Extract OpenRouter/OpenAI cache stats from prompt_tokens_details."""
        usage = getattr(response, "usage", None)
        if usage is None:
@@ -8,7 +8,7 @@ streaming, or the _run_codex_stream() call path.
 from typing import Any, Dict, List, Optional

 from agent.transports.base import ProviderTransport
-from agent.transports.types import NormalizedResponse, ToolCall
+from agent.transports.types import NormalizedResponse, ToolCall, Usage


 class ResponsesApiTransport(ProviderTransport):
@@ -151,6 +151,8 @@ class ResponsesApiTransport(ProviderTransport):
        """Normalize Codex Responses API response to NormalizedResponse."""
        from agent.codex_responses_adapter import (
            _normalize_codex_response,
+            _extract_responses_message_text,
+            _extract_responses_reasoning_text,
        )

        # _normalize_codex_response returns (SimpleNamespace, finish_reason_str)
@@ -12,7 +12,7 @@ from __future__ import annotations

 import json
 from dataclasses import dataclass, field
-from typing import Any, Dict, List, Optional
+from typing import Any


@dataclass
@@ -32,10 +32,10 @@ class ToolCall:
    * Others: ``None``
    """

-    id: Optional[str]
+    id: str | None
    name: str
    arguments: str  # JSON string
-    provider_data: Optional[Dict[str, Any]] = field(default=None, repr=False)
+    provider_data: dict[str, Any] | None = field(default=None, repr=False)

    # ── Backward compatibility ──────────────────────────────────
    # The agent loop reads tc.function.name / tc.function.arguments
@@ -47,17 +47,17 @@ class ToolCall:
        return "function"

    @property
-    def function(self) -> "ToolCall":
+    def function(self) -> ToolCall:
        """Return self so tc.function.name / tc.function.arguments work."""
        return self

    @property
-    def call_id(self) -> Optional[str]:
+    def call_id(self) -> str | None:
        """Codex call_id from provider_data, accessed via getattr by _build_assistant_message."""
        return (self.provider_data or {}).get("call_id")

    @property
-    def response_item_id(self) -> Optional[str]:
+    def response_item_id(self) -> str | None:
        """Codex response_item_id from provider_data."""
        return (self.provider_data or {}).get("response_item_id")

@@ -101,18 +101,18 @@ class NormalizedResponse:
    * Others: ``None``
    """

-    content: Optional[str]
-    tool_calls: Optional[List[ToolCall]]
+    content: str | None
+    tool_calls: list[ToolCall] | None
    finish_reason: str  # "stop", "tool_calls", "length", "content_filter"
-    reasoning: Optional[str] = None
-    usage: Optional[Usage] = None
-    provider_data: Optional[Dict[str, Any]] = field(default=None, repr=False)
+    reasoning: str | None = None
+    usage: Usage | None = None
+    provider_data: dict[str, Any] | None = field(default=None, repr=False)

    # ── Backward compatibility ──────────────────────────────────
    # The shim _nr_to_assistant_message() mapped these from provider_data.
    # These properties let NormalizedResponse pass through directly.
    @property
-    def reasoning_content(self) -> Optional[str]:
+    def reasoning_content(self) -> str | None:
        pd = self.provider_data or {}
        return pd.get("reasoning_content")

@@ -136,8 +136,9 @@ class NormalizedResponse:
 # Factory helpers
 # ---------------------------------------------------------------------------

+
 def build_tool_call(
-    id: Optional[str],
+    id: str | None,
    name: str,
    arguments: Any,
    **provider_fields: Any,
@@ -151,7 +152,7 @@ def build_tool_call(
    return ToolCall(id=id, name=name, arguments=args_str, provider_data=pd)


-def map_finish_reason(reason: Optional[str], mapping: Dict[str, str]) -> str:
+def map_finish_reason(reason: str | None, mapping: dict[str, str]) -> str:
    """Translate a provider-specific stop reason to the normalised set.

    Falls back to ``"stop"`` for unknown or ``None`` reasons.
@@ -359,25 +359,6 @@ _OFFICIAL_DOCS_PRICING: Dict[tuple[str, str], PricingEntry] = {
        source_url="https://aws.amazon.com/bedrock/pricing/",
        pricing_version="bedrock-pricing-2026-04",
    ),
-    # MiniMax
-    (
-        "minimax",
-        "minimax-m2.7",
-    ): PricingEntry(
-        input_cost_per_million=Decimal("0.30"),
-        output_cost_per_million=Decimal("1.20"),
-        source="official_docs_snapshot",
-        pricing_version="minimax-pricing-2026-04",
-    ),
-    (
-        "minimax-cn",
-        "minimax-m2.7",
-    ): PricingEntry(
-        input_cost_per_million=Decimal("0.30"),
-        output_cost_per_million=Decimal("1.20"),
-        source="official_docs_snapshot",
-        pricing_version="minimax-pricing-2026-04",
-    ),
 }


@@ -419,8 +400,6 @@ def resolve_billing_route(
        return BillingRoute(provider="anthropic", model=model.split("/")[-1], base_url=base_url or "", billing_mode="official_docs_snapshot")
    if provider_name == "openai":
        return BillingRoute(provider="openai", model=model.split("/")[-1], base_url=base_url or "", billing_mode="official_docs_snapshot")
-    if provider_name in {"minimax", "minimax-cn"}:
-        return BillingRoute(provider=provider_name, model=model.split("/")[-1], base_url=base_url or "", billing_mode="official_docs_snapshot")
    if provider_name in {"custom", "local"} or (base and "localhost" in base):
        return BillingRoute(provider=provider_name or "custom", model=model, base_url=base_url or "", billing_mode="unknown")
    return BillingRoute(provider=provider_name or "unknown", model=model.split("/")[-1] if model else "", base_url=base_url or "", billing_mode="unknown")
@@ -30,13 +30,14 @@ model:
  #   "ollama-cloud" - Ollama Cloud (requires: OLLAMA_API_KEY — https://ollama.com/settings)
  #   "kilocode"     - KiloCode gateway (requires: KILOCODE_API_KEY)
  #   "ai-gateway"   - Vercel AI Gateway (requires: AI_GATEWAY_API_KEY)
-  #   "lmstudio"     - LM Studio local server (optional: LM_API_KEY, defaults to http://127.0.0.1:1234/v1)
  #
  # Local servers (LM Studio, Ollama, vLLM, llama.cpp):
-  #   "custom"       - Any other OpenAI-compatible endpoint. Set base_url below.
-  #   Aliases: "ollama", "vllm", "llamacpp" all map to "custom".
-  #   LM Studio is first-class and uses provider: "lmstudio".
-  #   It works with both no-auth and auth-enabled server modes.
+  #   "custom"       - Any OpenAI-compatible endpoint. Set base_url below.
+  #   Aliases: "lmstudio", "ollama", "vllm", "llamacpp" all map to "custom".
+  #   Example for LM Studio:
+  #     provider: "lmstudio"
+  #     base_url: "http://localhost:1234/v1"
+  #   No API key needed — local servers typically ignore auth.
  #
  # Can also be overridden with --provider flag or HERMES_INFERENCE_PROVIDER env var.
  provider: "auto"
@@ -180,11 +181,6 @@ terminal:
 #   lifetime_seconds: 300
 #   docker_image: "nikolaik/python-nodejs:python3.11-nodejs20"
 #   docker_mount_cwd_to_workspace: true   # Explicit opt-in: mount your launch cwd into /workspace
-#   # Optional: run the container as your host user's uid:gid so files written
-#   # into bind-mounted dirs are owned by you, not root. Drops SETUID/SETGID
-#   # caps too since no gosu privilege drop is needed. Leave off if your
-#   # chosen docker_image expects to start as root.
-#   docker_run_as_host_user: true
 #   # Optional: explicitly forward selected env vars into Docker.
 #   # These values come from your current shell first, then ~/.hermes/.env.
 #   # Warning: anything forwarded here is visible to commands run in the container.
@@ -289,25 +285,6 @@ browser:
  # after this period of no activity between agent loops (default: 120 = 2 minutes)
  inactivity_timeout: 120

-# =============================================================================
-# Tool Loop Guardrails
-# =============================================================================
-# Soft warnings are enabled by default. They append guidance to repeated failed
-# or non-progressing tool results but still let the tool execute. Hard stops are
-# opt-in circuit breakers for autonomous/cron sessions where stopping a loop is
-# preferable to spending the full iteration budget.
-tool_loop_guardrails:
-  warnings_enabled: true
-  hard_stop_enabled: false
-  warn_after:
-    exact_failure: 2
-    same_tool_failure: 3
-    idempotent_no_progress: 2
-  hard_stop_after:
-    exact_failure: 5
-    same_tool_failure: 8
-    idempotent_no_progress: 5
-
 # =============================================================================
 # Context Compression (Auto-shrinks long conversations)
 # =============================================================================
@@ -589,7 +566,7 @@ agent:
 #   - A preset like "hermes-cli" or "hermes-telegram" (curated tool set)
 #   - A list of individual toolsets to compose your own (see list below)
 #
-# Supported platform keys: cli, telegram, discord, whatsapp, slack, qqbot, teams
+# Supported platform keys: cli, telegram, discord, whatsapp, slack, qqbot
 #
 # Examples:
 #
@@ -619,7 +596,6 @@ agent:
 #   signal:        hermes-signal         (same as telegram)
 #   homeassistant: hermes-homeassistant  (same as telegram)
 #   qqbot:            hermes-qqbot            (same as telegram)
-#   teams:            hermes-teams            (same as telegram)
 #
 platform_toolsets:
  cli: [hermes-cli]
@@ -631,7 +607,6 @@ platform_toolsets:
  homeassistant: [hermes-homeassistant]
  qqbot: [hermes-qqbot]
  yuanbao: [hermes-yuanbao]
-  teams: [hermes-teams]

 # =============================================================================
 # Gateway Platform Settings
@@ -953,7 +928,7 @@ display:
  #     agent_name: "My Agent"               # Banner title and branding
  #     welcome: "Welcome message"           # Shown at CLI startup
  #     response_label: " ⚔ Agent "         # Response box header label
-  #     prompt_symbol: "⚔"                  # Prompt symbol (bare token; renderers add trailing space)
+  #     prompt_symbol: "⚔ ❯ "              # Prompt symbol
  #   tool_prefix: "╎"                       # Tool output line prefix (default: ┊)
  #
  skin: default
@@ -21,7 +21,6 @@ from typing import Optional, Dict, List, Any, Union
 logger = logging.getLogger(__name__)

 from hermes_time import now as _hermes_now
-from utils import atomic_replace

 try:
    from croniter import croniter
@@ -313,21 +312,13 @@ def compute_next_run(schedule: Dict[str, Any], last_run_at: Optional[str] = None
    elif schedule["kind"] == "cron":
        if not HAS_CRONITER:
            logger.warning(
-                "Cannot compute next run for cron schedule %r: 'croniter' is "
-                "not installed. croniter is a core dependency as of v0.9.x; "
-                "reinstall hermes-agent or run 'pip install croniter' in your "
-                "runtime env.",
+                "Cannot compute next run for cron schedule %r: 'croniter' "
+                "is not installed. Install the 'cron' extra (pip install "
+                "'hermes-agent[cron]') to re-enable recurring cron jobs.",
                schedule.get("expr"),
            )
            return None
-        # Use last_run_at as the croniter base when available, consistent
-        # with interval jobs.  This ensures that after a crash/restart,
-        # the next run is anchored to the actual last execution time
-        # rather than to an arbitrary restart time.
-        base_time = now
-        if last_run_at:
-            base_time = _ensure_aware(datetime.fromisoformat(last_run_at))
-        cron = croniter(schedule["expr"], base_time)
+        cron = croniter(schedule["expr"], now)
        next_run = cron.get_next(datetime)
        return next_run.isoformat()

@@ -376,7 +367,7 @@ def save_jobs(jobs: List[Dict[str, Any]]):
            json.dump({"jobs": jobs, "updated_at": _hermes_now().isoformat()}, f, indent=2)
            f.flush()
            os.fsync(f.fileno())
-        atomic_replace(tmp_path, JOBS_FILE)
+        os.replace(tmp_path, JOBS_FILE)
        _secure_file(JOBS_FILE)
    except BaseException:
        try:
@@ -872,7 +863,7 @@ def save_job_output(job_id: str, output: str):
            f.write(output)
            f.flush()
            os.fsync(f.fileno())
-        atomic_replace(tmp_path, output_file)
+        os.replace(tmp_path, output_file)
        _secure_file(output_file)
    except BaseException:
        try:
@@ -882,121 +873,3 @@ def save_job_output(job_id: str, output: str):
        raise
    
    return output_file
-
-
-# =============================================================================
-# Skill reference rewriting (curator integration)
-# =============================================================================
-
-def rewrite_skill_refs(
-    consolidated: Optional[Dict[str, str]] = None,
-    pruned: Optional[List[str]] = None,
-) -> Dict[str, Any]:
-    """Rewrite cron job skill references after a curator consolidation pass.
-
-    When the curator consolidates a skill X into umbrella Y (or archives X
-    as pruned), any cron job that lists ``X`` in its ``skills`` field will
-    fail to load ``X`` at run time — the scheduler logs a warning and
-    skips the skill, so the job runs without the instructions it was
-    scheduled to follow. See cron/scheduler.py where ``skill_view`` is
-    called per skill name.
-
-    This function repairs cron jobs in-place:
-
-    - A skill listed in ``consolidated`` is replaced with its umbrella
-      target (the ``into`` value). If the umbrella is already in the
-      job's skill list, the stale name is dropped without duplication.
-    - A skill listed in ``pruned`` is dropped outright — there is no
-      forwarding target.
-    - Ordering and other skills in the list are preserved.
-    - The legacy ``skill`` field is realigned via ``_apply_skill_fields``.
-
-    Args:
-        consolidated: mapping of ``old_skill_name -> umbrella_skill_name``.
-        pruned: list of skill names that were archived with no forwarding
-            target.
-
-    Returns a report dict::
-
-        {
-            "rewrites": [
-                {
-                    "job_id": ...,
-                    "job_name": ...,
-                    "before": [...],
-                    "after": [...],
-                    "mapped": {"old": "new", ...},
-                    "dropped": ["old", ...],
-                },
-                ...
-            ],
-            "jobs_updated": N,
-            "jobs_scanned": M,
-        }
-
-    Best-effort: exceptions from loading/saving propagate to the caller so
-    tests can assert behaviour; the curator invocation site wraps this
-    call in a try/except so a failure here never breaks the curator.
-    """
-    consolidated = dict(consolidated or {})
-    pruned_set = set(pruned or [])
-    # A skill listed in both wins as "consolidated" — it has a target,
-    # which is the more useful of the two outcomes.
-    pruned_set -= set(consolidated.keys())
-
-    if not consolidated and not pruned_set:
-        return {"rewrites": [], "jobs_updated": 0, "jobs_scanned": 0}
-
-    with _jobs_file_lock:
-        jobs = load_jobs()
-        rewrites: List[Dict[str, Any]] = []
-        changed = False
-
-        for job in jobs:
-            skills_before = _normalize_skill_list(job.get("skill"), job.get("skills"))
-            if not skills_before:
-                continue
-
-            mapped: Dict[str, str] = {}
-            dropped: List[str] = []
-            new_skills: List[str] = []
-
-            for name in skills_before:
-                if name in consolidated:
-                    target = consolidated[name]
-                    mapped[name] = target
-                    if target and target not in new_skills:
-                        new_skills.append(target)
-                elif name in pruned_set:
-                    dropped.append(name)
-                else:
-                    if name not in new_skills:
-                        new_skills.append(name)
-
-            if not mapped and not dropped:
-                continue
-
-            job["skills"] = new_skills
-            job["skill"] = new_skills[0] if new_skills else None
-            changed = True
-
-            rewrites.append({
-                "job_id": job.get("id"),
-                "job_name": job.get("name") or job.get("id"),
-                "before": list(skills_before),
-                "after": list(new_skills),
-                "mapped": mapped,
-                "dropped": dropped,
-            })
-
-        if changed:
-            save_jobs(jobs)
-            logger.info(
-                "Curator rewrote skill references in %d cron job(s)", len(rewrites)
-            )
-
-        return {
-            "rewrites": rewrites,
-            "jobs_updated": len(rewrites),
-            "jobs_scanned": len(jobs),
-        }
@@ -198,9 +198,7 @@ def _resolve_single_delivery_target(job: dict, deliver_value: str) -> Optional[d
            if resolved:
                parsed_chat_id, parsed_thread_id, resolved_is_explicit = _parse_target_ref(platform_key, resolved)
                if resolved_is_explicit:
-                    chat_id = parsed_chat_id
-                    if parsed_thread_id is not None:
-                        thread_id = parsed_thread_id
+                    chat_id, thread_id = parsed_chat_id, parsed_thread_id
                else:
                    chat_id = resolved
        except Exception:
@@ -233,32 +231,12 @@ def _resolve_single_delivery_target(job: dict, deliver_value: str) -> Optional[d
    }


-def _normalize_deliver_value(deliver) -> str:
-    """Normalize a stored/submitted ``deliver`` value to its canonical string form.
-
-    The contract is that ``deliver`` is a string (``"local"``, ``"origin"``,
-    ``"telegram"``, ``"telegram:-1001:17"``, or comma-separated combinations).
-    Historically some callers — MCP clients passing an array, direct edits of
-    ``jobs.json``, or stale code paths — have stored a list/tuple like
-    ``["telegram"]``.  ``str(["telegram"])`` would serialize to the literal
-    string ``"['telegram']"``, which is not a known platform and fails
-    resolution silently.  Flatten lists/tuples into a comma-separated string
-    so both forms work.  Returns ``"local"`` for anything falsy.
-    """
-    if deliver is None or deliver == "":
-        return "local"
-    if isinstance(deliver, (list, tuple)):
-        parts = [str(p).strip() for p in deliver if str(p).strip()]
-        return ",".join(parts) if parts else "local"
-    return str(deliver)
-
-
 def _resolve_delivery_targets(job: dict) -> List[dict]:
    """Resolve all concrete auto-delivery targets for a cron job (supports comma-separated deliver)."""
-    deliver = _normalize_deliver_value(job.get("deliver", "local"))
+    deliver = job.get("deliver", "local")
    if deliver == "local":
        return []
-    parts = [p.strip() for p in deliver.split(",") if p.strip()]
+    parts = [p.strip() for p in str(deliver).split(",") if p.strip()]
    seen = set()
    targets = []
    for part in parts:
@@ -277,21 +255,13 @@ def _resolve_delivery_target(job: dict) -> Optional[dict]:
    return targets[0] if targets else None


-# Media extension sets — audio routing is centralized in gateway.platforms.base
-# via should_send_media_as_audio() so Telegram-specific rules stay in one place.
+# Media extension sets — keep in sync with gateway/platforms/base.py:_process_message_background
+_AUDIO_EXTS = frozenset({'.ogg', '.opus', '.mp3', '.wav', '.m4a'})
 _VIDEO_EXTS = frozenset({'.mp4', '.mov', '.avi', '.mkv', '.webm', '.3gp'})
 _IMAGE_EXTS = frozenset({'.jpg', '.jpeg', '.png', '.webp', '.gif'})


-def _send_media_via_adapter(
-    adapter,
-    chat_id: str,
-    media_files: list,
-    metadata: dict | None,
-    loop,
-    job: dict,
-    platform=None,
-) -> None:
+def _send_media_via_adapter(adapter, chat_id: str, media_files: list, metadata: dict | None, loop, job: dict) -> None:
    """Send extracted MEDIA files as native platform attachments via a live adapter.

    Routes each file to the appropriate adapter method (send_voice, send_image_file,
@@ -300,13 +270,10 @@ def _send_media_via_adapter(
    """
    from pathlib import Path

-    from gateway.platforms.base import should_send_media_as_audio
-
    for media_path, _is_voice in media_files:
        try:
            ext = Path(media_path).suffix.lower()
-            route_platform = platform if platform is not None else getattr(adapter, "platform", None)
-            if should_send_media_as_audio(route_platform, ext, is_voice=_is_voice):
+            if ext in _AUDIO_EXTS:
                coro = adapter.send_voice(chat_id=chat_id, audio_path=media_path, metadata=metadata)
            elif ext in _VIDEO_EXTS:
                coro = adapter.send_video(chat_id=chat_id, video_path=media_path, metadata=metadata)
@@ -352,6 +319,27 @@ def _deliver_result(job: dict, content: str, adapters=None, loop=None) -> Option
    from tools.send_message_tool import _send_to_platform
    from gateway.config import load_gateway_config, Platform

+    platform_map = {
+        "telegram": Platform.TELEGRAM,
+        "discord": Platform.DISCORD,
+        "slack": Platform.SLACK,
+        "whatsapp": Platform.WHATSAPP,
+        "signal": Platform.SIGNAL,
+        "matrix": Platform.MATRIX,
+        "mattermost": Platform.MATTERMOST,
+        "homeassistant": Platform.HOMEASSISTANT,
+        "dingtalk": Platform.DINGTALK,
+        "feishu": Platform.FEISHU,
+        "wecom": Platform.WECOM,
+        "wecom_callback": Platform.WECOM_CALLBACK,
+        "weixin": Platform.WEIXIN,
+        "email": Platform.EMAIL,
+        "sms": Platform.SMS,
+        "bluebubbles": Platform.BLUEBUBBLES,
+        "qqbot": Platform.QQBOT,
+        "yuanbao": Platform.YUANBAO,
+    }
+
    # Optionally wrap the content with a header/footer so the user knows this
    # is a cron delivery.  Wrapping is on by default; set cron.wrap_response: false
    # in config.yaml for clean output.
@@ -408,23 +396,13 @@ def _deliver_result(job: dict, content: str, adapters=None, loop=None) -> Option
                job["id"], platform_name, chat_id, thread_id,
            )

-        # Built-in names resolve to their enum member; plugin platform names
-        # create dynamic members via Platform._missing_().
-        try:
-            platform = Platform(platform_name.lower())
-        except (ValueError, KeyError):
+        platform = platform_map.get(platform_name.lower())
+        if not platform:
            msg = f"unknown platform '{platform_name}'"
            logger.warning("Job '%s': %s", job["id"], msg)
            delivery_errors.append(msg)
            continue

-        pconfig = config.platforms.get(platform)
-        if not pconfig or not pconfig.enabled:
-            msg = f"platform '{platform_name}' not configured/enabled"
-            logger.warning("Job '%s': %s", job["id"], msg)
-            delivery_errors.append(msg)
-            continue
-
        # Prefer the live adapter when the gateway is running — this supports E2EE
        # rooms (e.g. Matrix) where the standalone HTTP path cannot encrypt.
        runtime_adapter = (adapters or {}).get(platform)
@@ -455,15 +433,7 @@ def _deliver_result(job: dict, content: str, adapters=None, loop=None) -> Option

                # Send extracted media files as native attachments via the live adapter
                if adapter_ok and media_files:
-                    _send_media_via_adapter(
-                        runtime_adapter,
-                        chat_id,
-                        media_files,
-                        send_metadata,
-                        loop,
-                        job,
-                        platform=platform,
-                    )
+                    _send_media_via_adapter(runtime_adapter, chat_id, media_files, send_metadata, loop, job)

                if adapter_ok:
                    logger.info("Job '%s': delivered to %s:%s via live adapter", job["id"], platform_name, chat_id)
@@ -475,6 +445,13 @@ def _deliver_result(job: dict, content: str, adapters=None, loop=None) -> Option
                )

        if not delivered:
+            pconfig = config.platforms.get(platform)
+            if not pconfig or not pconfig.enabled:
+                msg = f"platform '{platform_name}' not configured/enabled"
+                logger.warning("Job '%s': %s", job["id"], msg)
+                delivery_errors.append(msg)
+                continue
+
            # Standalone path: run the async send in a fresh event loop (safe from any thread)
            coro = _send_to_platform(platform, pconfig, chat_id, cleaned_delivery_content, thread_id=thread_id, media_files=media_files)
            try:
@@ -861,13 +838,6 @@ def run_job(job: dict) -> tuple[bool, str, str, Optional[str]]:
        chat_id=str(origin["chat_id"]) if origin else "",
        chat_name=origin.get("chat_name", "") if origin else "",
    )
-    _cron_delivery_vars = (
-        "HERMES_CRON_AUTO_DELIVER_PLATFORM",
-        "HERMES_CRON_AUTO_DELIVER_CHAT_ID",
-        "HERMES_CRON_AUTO_DELIVER_THREAD_ID",
-    )
-    for _var_name in _cron_delivery_vars:
-        _VAR_MAP[_var_name].set("")

    # Per-job working directory.  When set (and validated at create/update
    # time), we point TERMINAL_CWD at it so:
@@ -906,11 +876,8 @@ def run_job(job: dict) -> tuple[bool, str, str, Optional[str]]:
        if delivery_target:
            _VAR_MAP["HERMES_CRON_AUTO_DELIVER_PLATFORM"].set(delivery_target["platform"])
            _VAR_MAP["HERMES_CRON_AUTO_DELIVER_CHAT_ID"].set(str(delivery_target["chat_id"]))
-            _VAR_MAP["HERMES_CRON_AUTO_DELIVER_THREAD_ID"].set(
-                ""
-                if delivery_target.get("thread_id") is None
-                else str(delivery_target["thread_id"])
-            )
+            if delivery_target.get("thread_id") is not None:
+                _VAR_MAP["HERMES_CRON_AUTO_DELIVER_THREAD_ID"].set(str(delivery_target["thread_id"]))

        model = job.get("model") or os.getenv("HERMES_MODEL") or ""

@@ -1044,12 +1011,10 @@ def run_job(job: dict) -> tuple[bool, str, str, Optional[str]]:
            enabled_toolsets=_resolve_cron_enabled_toolsets(job, _cfg),
            disabled_toolsets=["cronjob", "messaging", "clarify"],
            quiet_mode=True,
-            # Cron jobs should always inherit the user's SOUL.md identity from
-            # HERMES_HOME. When a workdir is configured, also inject project
-            # context files (AGENTS.md / CLAUDE.md / .cursorrules) from there.
-            # Without a workdir, keep cwd context discovery disabled.
+            # When a workdir is configured, inject AGENTS.md / CLAUDE.md /
+            # .cursorrules from that directory; otherwise preserve the old
+            # behaviour (don't inject SOUL.md/AGENTS.md from the scheduler cwd).
            skip_context_files=not bool(_job_workdir),
-            load_soul_identity=True,
            skip_memory=True,  # Cron system prompts would corrupt user representations
            platform="cron",
            session_id=_cron_session_id,
@@ -1064,18 +1029,7 @@ def run_job(job: dict) -> tuple[bool, str, str, Optional[str]]:
        #
        # Uses the agent's built-in activity tracker (updated by
        # _touch_activity() on every tool call, API call, and stream delta).
-        _raw_cron_timeout = os.getenv("HERMES_CRON_TIMEOUT", "").strip()
-        if _raw_cron_timeout:
-            try:
-                _cron_timeout = float(_raw_cron_timeout)
-            except (ValueError, TypeError):
-                logger.warning(
-                    "Invalid HERMES_CRON_TIMEOUT=%r; using default 600s",
-                    _raw_cron_timeout,
-                )
-                _cron_timeout = 600.0
-        else:
-            _cron_timeout = 600.0
+        _cron_timeout = float(os.getenv("HERMES_CRON_TIMEOUT", 600))
        _cron_inactivity_limit = _cron_timeout if _cron_timeout > 0 else None
        _POLL_INTERVAL = 5.0
        _cron_pool = concurrent.futures.ThreadPoolExecutor(max_workers=1)
@@ -1150,21 +1104,6 @@ def run_job(job: dict) -> tuple[bool, str, str, Optional[str]]:
                f"agent.run_conversation returned {type(result).__name__} instead of dict: {result!r}"
            )

-        # If the agent itself reported failure (e.g. all retries exhausted on
-        # API errors, model abort, mid-run interrupt), do not silently mark the
-        # job as successful. run_agent populates `failed=True`/`completed=False`
-        # on these paths and may put the error into `final_response`, which
-        # would otherwise be delivered as if it were the agent's reply and the
-        # job's `last_status` set to "ok". Raise so the except handler below
-        # builds the proper failure tuple. (issue #17855)
-        if result.get("failed") is True or result.get("completed") is False:
-            _err_text = (
-                result.get("error")
-                or (result.get("final_response") or "").strip()
-                or "agent reported failure"
-            )
-            raise RuntimeError(_err_text)
-
        final_response = result.get("final_response", "") or ""
        # Strip leaked placeholder text that upstream may inject on empty completions.
        if final_response.strip() == "(No response generated)":
@@ -1224,8 +1163,6 @@ def run_job(job: dict) -> tuple[bool, str, str, Optional[str]]:
                os.environ["TERMINAL_CWD"] = _prior_terminal_cwd
        # Clean up ContextVar session/delivery state for this job.
        clear_session_vars(_ctx_tokens)
-        for _var_name in _cron_delivery_vars:
-            _VAR_MAP[_var_name].set("")
        if _session_db:
            try:
                _session_db.end_session(_cron_session_id, "cron_complete")
@@ -34,13 +34,6 @@ services:
      # uncomment BOTH lines (API_SERVER_KEY is mandatory for auth):
      # - API_SERVER_HOST=0.0.0.0
      # - API_SERVER_KEY=${API_SERVER_KEY}
-      # Microsoft Teams — uncomment and fill in to enable Teams gateway.
-      # Register your bot at https://dev.botframework.com/ to get these values.
-      # - TEAMS_CLIENT_ID=${TEAMS_CLIENT_ID}
-      # - TEAMS_CLIENT_SECRET=${TEAMS_CLIENT_SECRET}
-      # - TEAMS_TENANT_ID=${TEAMS_TENANT_ID}
-      # - TEAMS_ALLOWED_USERS=${TEAMS_ALLOWED_USERS}
-      # - TEAMS_PORT=${TEAMS_PORT:-3978}
    command: ["gateway", "run"]

  dashboard:
@@ -0,0 +1,85 @@
+"""Built-in boot-md hook — run ~/.hermes/BOOT.md on gateway startup.
+
+This hook is always registered. It silently skips if no BOOT.md exists.
+To activate, create ``~/.hermes/BOOT.md`` with instructions for the
+agent to execute on every gateway restart.
+
+Example BOOT.md::
+
+    # Startup Checklist
+
+    1. Check if any cron jobs failed overnight
+    2. Send a status update to Discord #general
+    3. If there are errors in /opt/app/deploy.log, summarize them
+
+The agent runs in a background thread so it doesn't block gateway
+startup. If nothing needs attention, it replies with [SILENT] to
+suppress delivery.
+"""
+
+import logging
+import threading
+
+logger = logging.getLogger("hooks.boot-md")
+
+from hermes_constants import get_hermes_home
+HERMES_HOME = get_hermes_home()
+BOOT_FILE = HERMES_HOME / "BOOT.md"
+
+
+def _build_boot_prompt(content: str) -> str:
+    """Wrap BOOT.md content in a system-level instruction."""
+    return (
+        "You are running a startup boot checklist. Follow the BOOT.md "
+        "instructions below exactly.\n\n"
+        "---\n"
+        f"{content}\n"
+        "---\n\n"
+        "Execute each instruction. If you need to send a message to a "
+        "platform, use the send_message tool.\n"
+        "If nothing needs attention and there is nothing to report, "
+        "reply with ONLY: [SILENT]"
+    )
+
+
+def _run_boot_agent(content: str) -> None:
+    """Spawn a one-shot agent session to execute the boot instructions."""
+    try:
+        from run_agent import AIAgent
+
+        prompt = _build_boot_prompt(content)
+        agent = AIAgent(
+            quiet_mode=True,
+            skip_context_files=True,
+            skip_memory=True,
+            max_iterations=20,
+        )
+        result = agent.run_conversation(prompt)
+        response = result.get("final_response", "")
+        if response and "[SILENT]" not in response:
+            logger.info("boot-md completed: %s", response[:200])
+        else:
+            logger.info("boot-md completed (nothing to report)")
+    except Exception as e:
+        logger.error("boot-md agent failed: %s", e)
+
+
+async def handle(event_type: str, context: dict) -> None:
+    """Gateway startup handler — run BOOT.md if it exists."""
+    if not BOOT_FILE.exists():
+        return
+
+    content = BOOT_FILE.read_text(encoding="utf-8").strip()
+    if not content:
+        return
+
+    logger.info("Running BOOT.md (%d chars)", len(content))
+
+    # Run in a background thread so we don't block gateway startup.
+    thread = threading.Thread(
+        target=_run_boot_agent,
+        args=(content,),
+        name="boot-md",
+        daemon=True,
+    )
+    thread.start()
@@ -86,16 +86,6 @@ async def build_channel_directory(adapters: Dict[Any, Any]) -> Dict[str, Any]:
            continue
        platforms[plat_name] = _build_from_sessions(plat_name)

-    # Include plugin-registered platforms (dynamic enum members aren't in
-    # Platform.__members__, so the loop above misses them).
-    try:
-        from gateway.platform_registry import platform_registry
-        for entry in platform_registry.plugin_entries():
-            if entry.name not in _SKIP_SESSION_DISCOVERY and entry.name not in platforms:
-                platforms[entry.name] = _build_from_sessions(entry.name)
-    except Exception:
-        pass
-
    directory = {
        "updated_at": datetime.now().isoformat(),
        "platforms": platforms,
@@ -13,7 +13,7 @@ import os
 import json
 from pathlib import Path
 from dataclasses import dataclass, field
-from typing import Dict, List, Optional, Any, Callable
+from typing import Dict, List, Optional, Any
 from enum import Enum

 from hermes_cli.config import get_hermes_home
@@ -36,26 +36,6 @@ def _coerce_bool(value: Any, default: bool = True) -> bool:
    return is_truthy_value(value, default=default)


-def _coerce_float(value: Any, default: float) -> float:
-    """Coerce numeric config values, falling back on malformed input."""
-    if value is None:
-        return default
-    try:
-        return float(value)
-    except (TypeError, ValueError):
-        return default
-
-
-def _coerce_int(value: Any, default: int) -> int:
-    """Coerce integer config values, falling back on malformed input."""
-    if value is None:
-        return default
-    try:
-        return int(value)
-    except (TypeError, ValueError):
-        return default
-
-
 def _normalize_unauthorized_dm_behavior(value: Any, default: str = "pair") -> str:
    """Normalize unauthorized DM behavior to a supported value."""
    if isinstance(value, str):
@@ -65,28 +45,8 @@ def _normalize_unauthorized_dm_behavior(value: Any, default: str = "pair") -> st
    return default


-def _normalize_notice_delivery(value: Any, default: str = "public") -> str:
-    """Normalize notice delivery mode to a supported value."""
-    if isinstance(value, str):
-        normalized = value.strip().lower()
-        if normalized in {"public", "private"}:
-            return normalized
-    return default
-
-
-# Module-level cache for bundled platform plugin names (lives outside the
-# enum so it doesn't become an accidental enum member).
-_Platform__bundled_plugin_names: Optional[set] = None
-
-
 class Platform(Enum):
-    """Supported messaging platforms.
-
-    Built-in platforms have explicit members.  Plugin platforms use dynamic
-    members created on-demand by ``_missing_()`` so that
-    ``Platform("irc")`` works without modifying this enum.  Dynamic members
-    are cached in ``_value2member_map_`` for identity-stable comparisons.
-    """
+    """Supported messaging platforms."""
    LOCAL = "local"
    TELEGRAM = "telegram"
    DISCORD = "discord"
@@ -108,76 +68,6 @@ class Platform(Enum):
    BLUEBUBBLES = "bluebubbles"
    QQBOT = "qqbot"
    YUANBAO = "yuanbao"
-    @classmethod
-    def _missing_(cls, value):
-        """Accept unknown platform names only for known plugin adapters.
-
-        Creates a pseudo-member cached in ``_value2member_map_`` so that
-        ``Platform("irc") is Platform("irc")`` holds True (identity-stable).
-        Arbitrary strings are rejected to prevent enum pollution.
-        """
-        if not isinstance(value, str) or not value.strip():
-            return None
-        # Normalise to lowercase to avoid case mismatches in config
-        value = value.strip().lower()
-        # Check cache first (another call may have created it already)
-        if value in cls._value2member_map_:
-            return cls._value2member_map_[value]
-
-        # Only create pseudo-members for bundled plugin platforms (discovered
-        # via filesystem scan) or runtime-registered plugin platforms.
-        global _Platform__bundled_plugin_names
-        if _Platform__bundled_plugin_names is None:
-            _Platform__bundled_plugin_names = cls._scan_bundled_plugin_platforms()
-        if value in _Platform__bundled_plugin_names:
-            pseudo = object.__new__(cls)
-            pseudo._value_ = value
-            pseudo._name_ = value.upper().replace("-", "_").replace(" ", "_")
-            cls._value2member_map_[value] = pseudo
-            cls._member_map_[pseudo._name_] = pseudo
-            return pseudo
-
-        # Runtime-registered plugins (e.g. user-installed, discovered after
-        # the enum was defined).
-        try:
-            from gateway.platform_registry import platform_registry
-            if platform_registry.is_registered(value):
-                pseudo = object.__new__(cls)
-                pseudo._value_ = value
-                pseudo._name_ = value.upper().replace("-", "_").replace(" ", "_")
-                cls._value2member_map_[value] = pseudo
-                cls._member_map_[pseudo._name_] = pseudo
-                return pseudo
-        except Exception:
-            pass
-
-        return None
-
-    @classmethod
-    def _scan_bundled_plugin_platforms(cls) -> set:
-        """Return names of bundled platform plugins under ``plugins/platforms/``."""
-        names: set = set()
-        try:
-            platforms_dir = Path(__file__).parent.parent / "plugins" / "platforms"
-            if platforms_dir.is_dir():
-                for child in platforms_dir.iterdir():
-                    if (
-                        child.is_dir()
-                        and (child / "__init__.py").exists()
-                        and (
-                            (child / "plugin.yaml").exists()
-                            or (child / "plugin.yml").exists()
-                        )
-                    ):
-                        names.add(child.name.lower())
-        except Exception:
-            pass
-        return names
-
-
-# Snapshot of built-in platform values before any dynamic _missing_ lookups.
-# Used to distinguish real platforms from arbitrary strings.
-_BUILTIN_PLATFORM_VALUES = frozenset(m.value for m in Platform.__members__.values())


@dataclass
@@ -330,55 +220,17 @@ class StreamingConfig:
        if not data:
            return cls()
        return cls(
-            enabled=_coerce_bool(data.get("enabled"), False),
+            enabled=data.get("enabled", False),
            transport=data.get("transport", "edit"),
-            edit_interval=_coerce_float(data.get("edit_interval"), 1.0),
-            buffer_threshold=_coerce_int(data.get("buffer_threshold"), 40),
+            edit_interval=float(data.get("edit_interval", 1.0)),
+            buffer_threshold=int(data.get("buffer_threshold", 40)),
            cursor=data.get("cursor", " ▉"),
-            fresh_final_after_seconds=_coerce_float(
-                data.get("fresh_final_after_seconds"), 60.0
+            fresh_final_after_seconds=float(
+                data.get("fresh_final_after_seconds", 60.0)
            ),
        )


-# -----------------------------------------------------------------------------
-# Built-in platform connection checkers
-# -----------------------------------------------------------------------------
-# Each callable receives a ``PlatformConfig`` and returns ``True`` when the
-# platform is sufficiently configured to be considered "connected".  Platforms
-# that rely on the generic ``token or api_key`` check (Telegram, Discord,
-# Slack, Matrix, Mattermost, HomeAssistant) do not need an entry here.
-_PLATFORM_CONNECTED_CHECKERS: dict[Platform, Callable[[PlatformConfig], bool]] = {
-    Platform.WEIXIN: lambda cfg: bool(
-        cfg.extra.get("account_id") and (cfg.token or cfg.extra.get("token"))
-    ),
-    Platform.WHATSAPP: lambda cfg: True,  # bridge handles auth
-    Platform.SIGNAL: lambda cfg: bool(cfg.extra.get("http_url")),
-    Platform.EMAIL: lambda cfg: bool(cfg.extra.get("address")),
-    Platform.SMS: lambda cfg: bool(os.getenv("TWILIO_ACCOUNT_SID")),
-    Platform.API_SERVER: lambda cfg: True,
-    Platform.WEBHOOK: lambda cfg: True,
-    Platform.FEISHU: lambda cfg: bool(cfg.extra.get("app_id")),
-    Platform.WECOM: lambda cfg: bool(cfg.extra.get("bot_id")),
-    Platform.WECOM_CALLBACK: lambda cfg: bool(
-        cfg.extra.get("corp_id") or cfg.extra.get("apps")
-    ),
-    Platform.BLUEBUBBLES: lambda cfg: bool(
-        cfg.extra.get("server_url") and cfg.extra.get("password")
-    ),
-    Platform.QQBOT: lambda cfg: bool(
-        cfg.extra.get("app_id") and cfg.extra.get("client_secret")
-    ),
-    Platform.YUANBAO: lambda cfg: bool(
-        cfg.extra.get("app_id") and cfg.extra.get("app_secret")
-    ),
-    Platform.DINGTALK: lambda cfg: bool(
-        (cfg.extra.get("client_id") or os.getenv("DINGTALK_CLIENT_ID"))
-        and (cfg.extra.get("client_secret") or os.getenv("DINGTALK_CLIENT_SECRET"))
-    ),
-}
-
-
@dataclass
 class GatewayConfig:
    """
@@ -432,43 +284,61 @@ class GatewayConfig:
        for platform, config in self.platforms.items():
            if not config.enabled:
                continue
-            if self._is_platform_connected(platform, config):
+            # Weixin requires both a token and an account_id
+            if platform == Platform.WEIXIN:
+                if config.extra.get("account_id") and (config.token or config.extra.get("token")):
+                    connected.append(platform)
+                continue
+            # Platforms that use token/api_key auth
+            if config.token or config.api_key:
                connected.append(platform)
+            # WhatsApp uses enabled flag only (bridge handles auth)
+            elif platform == Platform.WHATSAPP:
+                connected.append(platform)
+            # Signal uses extra dict for config (http_url + account)
+            elif platform == Platform.SIGNAL and config.extra.get("http_url"):
+                connected.append(platform)
+            # Email uses extra dict for config (address + imap_host + smtp_host)
+            elif platform == Platform.EMAIL and config.extra.get("address"):
+                connected.append(platform)
+            # SMS uses api_key (Twilio auth token) — SID checked via env
+            elif platform == Platform.SMS and os.getenv("TWILIO_ACCOUNT_SID"):
+                connected.append(platform)
+            # API Server uses enabled flag only (no token needed)
+            elif platform == Platform.API_SERVER:
+                connected.append(platform)
+            # Webhook uses enabled flag only (secrets are per-route)
+            elif platform == Platform.WEBHOOK:
+                connected.append(platform)
+            # Feishu uses extra dict for app credentials
+            elif platform == Platform.FEISHU and config.extra.get("app_id"):
+                connected.append(platform)
+            # WeCom bot mode uses extra dict for bot credentials
+            elif platform == Platform.WECOM and config.extra.get("bot_id"):
+                connected.append(platform)
+            # WeCom callback mode uses corp_id or apps list
+            elif platform == Platform.WECOM_CALLBACK and (
+                config.extra.get("corp_id") or config.extra.get("apps")
+            ):
+                connected.append(platform)
+            # BlueBubbles uses extra dict for local server config
+            elif platform == Platform.BLUEBUBBLES and config.extra.get("server_url") and config.extra.get("password"):
+                connected.append(platform)
+            # QQBot uses extra dict for app credentials
+            elif platform == Platform.QQBOT and config.extra.get("app_id") and config.extra.get("client_secret"):
+                connected.append(platform)
+            # Yuanbao uses extra dict for app credentials
+            elif platform == Platform.YUANBAO and config.extra.get("app_id") and config.extra.get("app_secret"):
+                connected.append(platform)
+            # DingTalk uses client_id/client_secret from config.extra or env vars
+            elif platform == Platform.DINGTALK and (
+                config.extra.get("client_id") or os.getenv("DINGTALK_CLIENT_ID")
+            ) and (
+                config.extra.get("client_secret") or os.getenv("DINGTALK_CLIENT_SECRET")
+            ):
+                connected.append(platform)
+        
        return connected
-
-    def _is_platform_connected(self, platform: Platform, config: PlatformConfig) -> bool:
-        """Check whether a single platform is sufficiently configured."""
-        # Weixin requires both a token and an account_id (checked first so
-        # the generic token branch doesn't let it through without account_id).
-        if platform == Platform.WEIXIN:
-            return bool(
-                config.extra.get("account_id")
-                and (config.token or config.extra.get("token"))
-            )
-
-        # Generic token/api_key auth covers Telegram, Discord, Slack, etc.
-        if config.token or config.api_key:
-            return True
-
-        # Platform-specific check
-        checker = _PLATFORM_CONNECTED_CHECKERS.get(platform)
-        if checker is not None:
-            return checker(config)
-
-        # Plugin-registered platforms
-        try:
-            from gateway.platform_registry import platform_registry
-            entry = platform_registry.get(platform.value)
-            if entry:
-                if entry.is_connected is not None:
-                    return entry.is_connected(config)
-                if entry.validate_config is not None:
-                    return entry.validate_config(config)
-                return True
-        except Exception:
-            pass  # Registry not yet initialised during early import
-
-        return False
    
    def get_home_channel(self, platform: Platform) -> Optional[HomeChannel]:
        """Get the home channel for a platform."""
@@ -601,17 +471,6 @@ class GatewayConfig:
                )
        return self.unauthorized_dm_behavior

-    def get_notice_delivery(self, platform: Optional[Platform] = None) -> str:
-        """Return the effective notice-delivery mode for a platform."""
-        if platform:
-            platform_cfg = self.platforms.get(platform)
-            if platform_cfg and "notice_delivery" in platform_cfg.extra:
-                return _normalize_notice_delivery(
-                    platform_cfg.extra.get("notice_delivery"),
-                    "public",
-                )
-        return "public"
-

 def load_gateway_config() -> GatewayConfig:
    """
@@ -727,11 +586,6 @@ def load_gateway_config() -> GatewayConfig:
                        platform_cfg.get("unauthorized_dm_behavior"),
                        gw_data.get("unauthorized_dm_behavior", "pair"),
                    )
-                if "notice_delivery" in platform_cfg:
-                    bridged["notice_delivery"] = _normalize_notice_delivery(
-                        platform_cfg.get("notice_delivery"),
-                        "public",
-                    )
                if "reply_prefix" in platform_cfg:
                    bridged["reply_prefix"] = platform_cfg["reply_prefix"]
                if "reply_in_thread" in platform_cfg:
@@ -860,21 +714,11 @@ def load_gateway_config() -> GatewayConfig:
                    os.environ["TELEGRAM_REACTIONS"] = str(telegram_cfg["reactions"]).lower()
                if "proxy_url" in telegram_cfg and not os.getenv("TELEGRAM_PROXY"):
                    os.environ["TELEGRAM_PROXY"] = str(telegram_cfg["proxy_url"]).strip()
-                allowed_users = telegram_cfg.get("allow_from")
-                if allowed_users is not None and not os.getenv("TELEGRAM_ALLOWED_USERS"):
-                    if isinstance(allowed_users, list):
-                        allowed_users = ",".join(str(v) for v in allowed_users)
-                    os.environ["TELEGRAM_ALLOWED_USERS"] = str(allowed_users)
-                group_allowed_users = telegram_cfg.get("group_allow_from")
-                if group_allowed_users is not None and not os.getenv("TELEGRAM_GROUP_ALLOWED_USERS"):
-                    if isinstance(group_allowed_users, list):
-                        group_allowed_users = ",".join(str(v) for v in group_allowed_users)
-                    os.environ["TELEGRAM_GROUP_ALLOWED_USERS"] = str(group_allowed_users)
-                group_allowed_chats = telegram_cfg.get("group_allowed_chats")
-                if group_allowed_chats is not None and not os.getenv("TELEGRAM_GROUP_ALLOWED_CHATS"):
-                    if isinstance(group_allowed_chats, list):
-                        group_allowed_chats = ",".join(str(v) for v in group_allowed_chats)
-                    os.environ["TELEGRAM_GROUP_ALLOWED_CHATS"] = str(group_allowed_chats)
+                if "group_allowed_chats" in telegram_cfg and not os.getenv("TELEGRAM_GROUP_ALLOWED_USERS"):
+                    gac = telegram_cfg["group_allowed_chats"]
+                    if isinstance(gac, list):
+                        gac = ",".join(str(v) for v in gac)
+                    os.environ["TELEGRAM_GROUP_ALLOWED_USERS"] = str(gac)
                if "disable_link_previews" in telegram_cfg:
                    plat_data = platforms_data.setdefault(Platform.TELEGRAM.value, {})
                    if not isinstance(plat_data, dict):
@@ -945,12 +789,6 @@ def load_gateway_config() -> GatewayConfig:
                if "dm_mention_threads" in matrix_cfg and not os.getenv("MATRIX_DM_MENTION_THREADS"):
                    os.environ["MATRIX_DM_MENTION_THREADS"] = str(matrix_cfg["dm_mention_threads"]).lower()

-            # Feishu settings → env vars (env vars take precedence)
-            feishu_cfg = yaml_cfg.get("feishu", {})
-            if isinstance(feishu_cfg, dict):
-                if "allow_bots" in feishu_cfg and not os.getenv("FEISHU_ALLOW_BOTS"):
-                    os.environ["FEISHU_ALLOW_BOTS"] = str(feishu_cfg["allow_bots"]).lower()
-
    except Exception as e:
        logger.warning(
            "Failed to process config.yaml — falling back to .env / gateway.json values. "
@@ -1102,14 +940,7 @@ def _apply_env_overrides(config: GatewayConfig) -> None:
        if Platform.WHATSAPP not in config.platforms:
            config.platforms[Platform.WHATSAPP] = PlatformConfig()
        config.platforms[Platform.WHATSAPP].enabled = True
-    whatsapp_home = os.getenv("WHATSAPP_HOME_CHANNEL")
-    if whatsapp_home and Platform.WHATSAPP in config.platforms:
-        config.platforms[Platform.WHATSAPP].home_channel = HomeChannel(
-            platform=Platform.WHATSAPP,
-            chat_id=whatsapp_home,
-            name=os.getenv("WHATSAPP_HOME_CHANNEL_NAME", "Home"),
-        )
-
+    
    # Slack
    slack_token = os.getenv("SLACK_BOT_TOKEN")
    if slack_token:
@@ -1540,25 +1371,3 @@ def _apply_env_overrides(config: GatewayConfig) -> None:
            config.default_reset_policy.at_hour = int(reset_hour)
        except ValueError:
            pass
-
-    # Registry-driven enable for plugin platforms.  Built-ins have explicit
-    # blocks above; plugins expose check_fn() which is the single source of
-    # truth for "are my env vars set?".  When it returns True, ensure the
-    # platform is enabled so start() will create its adapter.
-    try:
-        from hermes_cli.plugins import discover_plugins
-        discover_plugins()  # idempotent
-        from gateway.platform_registry import platform_registry
-        for entry in platform_registry.plugin_entries():
-            try:
-                if not entry.check_fn():
-                    continue
-            except Exception as e:
-                logger.debug("check_fn for %s raised: %s", entry.name, e)
-                continue
-            platform = Platform(entry.name)
-            if platform not in config.platforms:
-                config.platforms[platform] = PlatformConfig()
-            config.platforms[platform].enabled = True
-    except Exception as e:
-        logger.debug("Plugin platform enable pass failed: %s", e)
@@ -53,10 +53,9 @@ class DeliveryTarget:
        - "telegram" → Telegram home channel
        - "telegram:123456" → specific Telegram chat
        """
-        target_stripped = target.strip()
-        target_lower = target_stripped.lower()
+        target = target.strip().lower()
        
-        if target_lower == "origin":
+        if target == "origin":
            if origin:
                return cls(
                    platform=origin.platform,
@@ -68,14 +67,13 @@ class DeliveryTarget:
                # Fallback to local if no origin
                return cls(platform=Platform.LOCAL, is_origin=True)
        
-        if target_lower == "local":
+        if target == "local":
            return cls(platform=Platform.LOCAL)
        
        # Check for platform:chat_id or platform:chat_id:thread_id format
-        # Use the original case for chat_id/thread_id to preserve case-sensitive IDs
-        if ":" in target_stripped:
-            parts = target_stripped.split(":", 2)
-            platform_str = parts[0].lower()  # Platform names are case-insensitive
+        if ":" in target:
+            parts = target.split(":", 2)
+            platform_str = parts[0]
            chat_id = parts[1] if len(parts) > 1 else None
            thread_id = parts[2] if len(parts) > 2 else None
            try:
@@ -87,7 +85,7 @@ class DeliveryTarget:
        
        # Just a platform name (use home channel)
        try:
-            platform = Platform(target_lower)
+            platform = Platform(target)
            return cls(platform=platform)
        except ValueError:
            # Unknown platform, treat as local
@@ -21,7 +21,6 @@ Errors in hooks are caught and logged but never block the main pipeline.

 import asyncio
 import importlib.util
-import sys
 from typing import Any, Callable, Dict, List, Optional

 import yaml
@@ -53,13 +52,19 @@ class HookRegistry:
        return list(self._loaded_hooks)

    def _register_builtin_hooks(self) -> None:
-        """Register built-in hooks that are always active.
+        """Register built-in hooks that are always active."""
+        try:
+            from gateway.builtin_hooks.boot_md import handle as boot_md_handle

-        Currently empty — no shipped built-in hooks. Kept as the extension
-        point for future always-on gateway hooks so they drop in without
-        re-plumbing discover_and_load().
-        """
-        return
+            self._handlers.setdefault("gateway:startup", []).append(boot_md_handle)
+            self._loaded_hooks.append({
+                "name": "boot-md",
+                "description": "Run ~/.hermes/BOOT.md on gateway startup",
+                "events": ["gateway:startup"],
+                "path": "(builtin)",
+            })
+        except Exception as e:
+            print(f"[hooks] Could not load built-in boot-md hook: {e}", flush=True)

    def discover_and_load(self) -> None:
        """
@@ -98,28 +103,16 @@ class HookRegistry:
                    print(f"[hooks] Skipping {hook_name}: no events declared", flush=True)
                    continue

-                # Dynamically load the handler module.
-                # Register in sys.modules BEFORE exec_module so Pydantic /
-                # dataclasses / typing introspection can resolve forward
-                # references (triggered by `from __future__ import annotations`
-                # in the handler). Without this, a handler that declares a
-                # Pydantic BaseModel for webhook/event payloads fails at first
-                # dispatch with "TypeAdapter ... is not fully defined".
-                module_name = f"hermes_hook_{hook_name}"
+                # Dynamically load the handler module
                spec = importlib.util.spec_from_file_location(
-                    module_name, handler_path
+                    f"hermes_hook_{hook_name}", handler_path
                )
                if spec is None or spec.loader is None:
                    print(f"[hooks] Skipping {hook_name}: could not load handler.py", flush=True)
                    continue

                module = importlib.util.module_from_spec(spec)
-                sys.modules[module_name] = module
-                try:
-                    spec.loader.exec_module(module)
-                except Exception:
-                    sys.modules.pop(module_name, None)
-                    raise
+                spec.loader.exec_module(module)

                handle_fn = getattr(module, "handle", None)
                if handle_fn is None:
@@ -28,7 +28,6 @@ from pathlib import Path
 from typing import Optional

 from hermes_constants import get_hermes_dir
-from utils import atomic_replace


 # Unambiguous alphabet -- excludes 0/O, 1/I to prevent confusion
@@ -60,7 +59,7 @@ def _secure_write(path: Path, data: str) -> None:
            f.write(data)
            f.flush()
            os.fsync(f.fileno())
-        atomic_replace(tmp_path, path)
+        os.replace(tmp_path, str(path))
        try:
            os.chmod(path, 0o600)
        except OSError:
@@ -1,212 +0,0 @@
-"""
-Platform Adapter Registry
-
-Allows platform adapters (built-in and plugin) to self-register so the gateway
-can discover and instantiate them without hardcoded if/elif chains.
-
-Built-in adapters continue to use the existing if/elif in _create_adapter()
-for now.  Plugin adapters register here via PluginContext.register_platform()
-and are looked up first -- if nothing is found the gateway falls through to
-the legacy code path.
-
-Usage (plugin side):
-
-    from gateway.platform_registry import platform_registry, PlatformEntry
-
-    platform_registry.register(PlatformEntry(
-        name="irc",
-        label="IRC",
-        adapter_factory=lambda cfg: IRCAdapter(cfg),
-        check_fn=check_requirements,
-        validate_config=lambda cfg: bool(cfg.extra.get("server")),
-        required_env=["IRC_SERVER"],
-        install_hint="pip install irc",
-    ))
-
-Usage (gateway side):
-
-    adapter = platform_registry.create_adapter("irc", platform_config)
-"""
-
-import logging
-from dataclasses import dataclass, field
-from typing import Any, Callable, Optional
-
-logger = logging.getLogger(__name__)
-
-
-@dataclass
-class PlatformEntry:
-    """Metadata and factory for a single platform adapter."""
-
-    # Identifier used in config.yaml (e.g. "irc", "viber").
-    name: str
-
-    # Human-readable label (e.g. "IRC", "Viber").
-    label: str
-
-    # Factory callable: receives a PlatformConfig, returns an adapter instance.
-    # Using a factory instead of a bare class lets plugins do custom init
-    # (e.g. passing extra kwargs, wrapping in try/except).
-    adapter_factory: Callable[[Any], Any]
-
-    # Returns True when the platform's dependencies are available.
-    check_fn: Callable[[], bool]
-
-    # Optional: given a PlatformConfig, is it properly configured?
-    # If None, the registry skips config validation and lets the adapter
-    # fail at connect() time with a descriptive error.
-    validate_config: Optional[Callable[[Any], bool]] = None
-
-    # Optional: given a PlatformConfig, is the platform connected/enabled?
-    # Used by ``GatewayConfig.get_connected_platforms()`` and setup UI status.
-    # If None, falls back to ``validate_config`` or ``check_fn``.
-    is_connected: Optional[Callable[[Any], bool]] = None
-
-    # Env vars this platform needs (for ``hermes setup`` display).
-    required_env: list = field(default_factory=list)
-
-    # Hint shown when check_fn returns False.
-    install_hint: str = ""
-
-    # Optional setup function for interactive configuration.
-    # Signature: () -> None (prompts user, saves env vars).
-    # If None, falls back to _setup_standard_platform (needs token_var + vars)
-    # or a generic "set these env vars" display.
-    setup_fn: Optional[Callable[[], None]] = None
-
-    # "builtin" or "plugin"
-    source: str = "plugin"
-
-    # Name of the plugin manifest that registered this entry (empty for
-    # built-ins).  Used by ``hermes gateway setup`` to auto-enable the
-    # owning plugin when the user configures its platform.
-    plugin_name: str = ""
-
-    # ── Auth env var names (for _is_user_authorized integration) ──
-    # E.g. "IRC_ALLOWED_USERS" — checked for comma-separated user IDs.
-    allowed_users_env: str = ""
-    # E.g. "IRC_ALLOW_ALL_USERS" — if truthy, all users authorized.
-    allow_all_env: str = ""
-
-    # ── Message limits ──
-    # Max message length for smart-chunking.  0 = no limit.
-    max_message_length: int = 0
-
-    # ── Privacy ──
-    # If True, session descriptions redact PII (phone numbers, etc.)
-    pii_safe: bool = False
-
-    # ── Display ──
-    # Emoji for CLI/gateway display (e.g. "💬")
-    emoji: str = "🔌"
-
-    # Whether this platform should appear in _UPDATE_ALLOWED_PLATFORMS
-    # (allows /update command from this platform).
-    allow_update_command: bool = True
-
-    # ── LLM guidance ──
-    # Platform hint injected into the system prompt (e.g. "You are on IRC.
-    # Do not use markdown.").  Empty string = no hint.
-    platform_hint: str = ""
-
-
-class PlatformRegistry:
-    """Central registry of platform adapters.
-
-    Thread-safe for reads (dict lookups are atomic under GIL).
-    Writes happen at startup during sequential discovery.
-    """
-
-    def __init__(self) -> None:
-        self._entries: dict[str, PlatformEntry] = {}
-
-    def register(self, entry: PlatformEntry) -> None:
-        """Register a platform adapter entry.
-
-        If an entry with the same name exists, it is replaced (last writer
-        wins -- this lets plugins override built-in adapters if desired).
-        """
-        if entry.name in self._entries:
-            prev = self._entries[entry.name]
-            logger.info(
-                "Platform '%s' re-registered (was %s, now %s)",
-                entry.name,
-                prev.source,
-                entry.source,
-            )
-        self._entries[entry.name] = entry
-        logger.debug("Registered platform adapter: %s (%s)", entry.name, entry.source)
-
-    def unregister(self, name: str) -> bool:
-        """Remove a platform entry.  Returns True if it existed."""
-        return self._entries.pop(name, None) is not None
-
-    def get(self, name: str) -> Optional[PlatformEntry]:
-        """Look up a platform entry by name."""
-        return self._entries.get(name)
-
-    def all_entries(self) -> list[PlatformEntry]:
-        """Return all registered platform entries."""
-        return list(self._entries.values())
-
-    def plugin_entries(self) -> list[PlatformEntry]:
-        """Return only plugin-registered platform entries."""
-        return [e for e in self._entries.values() if e.source == "plugin"]
-
-    def is_registered(self, name: str) -> bool:
-        return name in self._entries
-
-    def create_adapter(self, name: str, config: Any) -> Optional[Any]:
-        """Create an adapter instance for the given platform name.
-
-        Returns None if:
-        - No entry registered for *name*
-        - check_fn() returns False (missing deps)
-        - validate_config() returns False (misconfigured)
-        - The factory raises an exception
-        """
-        entry = self._entries.get(name)
-        if entry is None:
-            return None
-
-        if not entry.check_fn():
-            hint = f" ({entry.install_hint})" if entry.install_hint else ""
-            logger.warning(
-                "Platform '%s' requirements not met%s",
-                entry.label,
-                hint,
-            )
-            return None
-
-        if entry.validate_config is not None:
-            try:
-                if not entry.validate_config(config):
-                    logger.warning(
-                        "Platform '%s' config validation failed",
-                        entry.label,
-                    )
-                    return None
-            except Exception as e:
-                logger.warning(
-                    "Platform '%s' config validation error: %s",
-                    entry.label,
-                    e,
-                )
-                return None
-
-        try:
-            adapter = entry.adapter_factory(config)
-            return adapter
-        except Exception as e:
-            logger.error(
-                "Failed to create adapter for platform '%s': %s",
-                entry.label,
-                e,
-                exc_info=True,
-            )
-            return None
-
-
-# Module-level singleton
-platform_registry = PlatformRegistry()
@@ -1,30 +1,9 @@
 # Adding a New Messaging Platform

-There are two ways to add a platform to the Hermes gateway:
-
-## Plugin Path (Recommended for Community/Third-Party)
-
-Create a plugin directory in `~/.hermes/plugins/` with a `PLUGIN.yaml` and
-`adapter.py`.  The adapter inherits from `BasePlatformAdapter` and registers
-via `ctx.register_platform()` in the `register(ctx)` entry point.  This
-requires **zero changes to core Hermes code**.
-
-The plugin system automatically handles: adapter creation, config parsing,
-user authorization, cron delivery, send_message routing, system prompt hints,
-status display, gateway setup, and more.
-
-See `plugins/platforms/irc/` for a complete reference implementation, and
-`website/docs/developer-guide/adding-platform-adapters.md` for the full
-plugin guide with code examples.
-
---
-
-## Built-in Path (Core Contributors Only)
-
-Checklist for integrating a platform directly into the Hermes core.
-Use this as a reference when building a built-in adapter — every item here
-is a real integration point. Missing any of them will cause broken
-functionality, missing features, or inconsistent behavior.
+Checklist for integrating a new messaging platform into the Hermes gateway.
+Use this as a reference when building a new adapter — every item here is a
+real integration point that exists in the codebase. Missing any of them will
+cause broken functionality, missing features, or inconsistent behavior.

 ---

@@ -7,9 +7,7 @@ Exposes an HTTP server with endpoints:
 - GET  /v1/responses/{response_id} — Retrieve a stored response
 - DELETE /v1/responses/{response_id} — Delete a stored response
 - GET  /v1/models                  — lists hermes-agent as an available model
- GET  /v1/capabilities            — machine-readable API capabilities for external UIs
 - POST /v1/runs                    — start a run, returns run_id immediately (202)
- GET  /v1/runs/{run_id}           — retrieve current run status
 - GET  /v1/runs/{run_id}/events    — SSE stream of structured lifecycle events
 - POST /v1/runs/{run_id}/stop    — interrupt a running agent
 - GET  /health                     — health check
@@ -592,8 +590,6 @@ class APIServerAdapter(BasePlatformAdapter):
        # Active run agent/task references for stop support
        self._active_run_agents: Dict[str, Any] = {}
        self._active_run_tasks: Dict[str, "asyncio.Task"] = {}
-        # Pollable run status for dashboards and external control-plane UIs.
-        self._run_statuses: Dict[str, Dict[str, Any]] = {}
        self._session_db: Optional[Any] = None  # Lazy-init SessionDB for session continuity

    @staticmethod
@@ -812,51 +808,6 @@ class APIServerAdapter(BasePlatformAdapter):
            ],
        })

-    async def _handle_capabilities(self, request: "web.Request") -> "web.Response":
-        """GET /v1/capabilities — advertise the stable API surface.
-
-        External UIs and orchestrators use this endpoint to discover the API
-        server's plugin-safe contract without scraping docs or assuming that
-        every Hermes version exposes the same endpoints.
-        """
-        auth_err = self._check_auth(request)
-        if auth_err:
-            return auth_err
-
-        return web.json_response({
-            "object": "hermes.api_server.capabilities",
-            "platform": "hermes-agent",
-            "model": self._model_name,
-            "auth": {
-                "type": "bearer",
-                "required": bool(self._api_key),
-            },
-            "features": {
-                "chat_completions": True,
-                "chat_completions_streaming": True,
-                "responses_api": True,
-                "responses_streaming": True,
-                "run_submission": True,
-                "run_status": True,
-                "run_events_sse": True,
-                "run_stop": True,
-                "tool_progress_events": True,
-                "session_continuity_header": "X-Hermes-Session-Id",
-                "cors": bool(self._cors_origins),
-            },
-            "endpoints": {
-                "health": {"method": "GET", "path": "/health"},
-                "health_detailed": {"method": "GET", "path": "/health/detailed"},
-                "models": {"method": "GET", "path": "/v1/models"},
-                "chat_completions": {"method": "POST", "path": "/v1/chat/completions"},
-                "responses": {"method": "POST", "path": "/v1/responses"},
-                "runs": {"method": "POST", "path": "/v1/runs"},
-                "run_status": {"method": "GET", "path": "/v1/runs/{run_id}"},
-                "run_events": {"method": "GET", "path": "/v1/runs/{run_id}/events"},
-                "run_stop": {"method": "POST", "path": "/v1/runs/{run_id}/stop"},
-            },
-        })
-
    async def _handle_chat_completions(self, request: "web.Request") -> "web.Response":
        """POST /v1/chat/completions — OpenAI Chat Completions format."""
        auth_err = self._check_auth(request)
@@ -981,62 +932,39 @@ class APIServerAdapter(BasePlatformAdapter):
                if delta is not None:
                    _stream_q.put(delta)

-            # Track which tool_call_ids we've emitted a "running" lifecycle
-            # event for, so a "completed" event without a matching "running"
-            # (e.g. internal/filtered tools) is silently dropped instead of
-            # producing an orphaned event clients can't correlate.
-            _started_tool_call_ids: set[str] = set()
+            def _on_tool_progress(event_type, name, preview, args, **kwargs):
+                """Send tool progress as a separate SSE event.

-            def _on_tool_start(tool_call_id, function_name, function_args):
-                """Emit ``hermes.tool.progress`` with ``status: running``.
+                Previously, progress markers like ``⏰ list`` were injected
+                directly into ``delta.content``.  OpenAI-compatible frontends
+                (Open WebUI, LobeChat, …) store ``delta.content`` verbatim as
+                the assistant message and send it back on subsequent requests.
+                After enough turns the model learns to *emit* the markers as
+                plain text instead of issuing real tool calls — silently
+                hallucinating tool results.  See #6972.

-                Replaces the old ``tool_progress_callback("tool.started",
-                ...)`` emit so SSE consumers receive a single event per
-                tool start, carrying both the legacy ``tool``/``emoji``/
-                ``label`` payload (for #6972 frontends) and the new
-                ``toolCallId``/``status`` correlation fields (#16588).
-
-                Skips tools whose names start with ``_`` so internal
-                events (``_thinking``, …) stay off the wire — matching
-                the prior ``_on_tool_progress`` filter exactly.
+                The fix: push a tagged tuple ``("__tool_progress__", payload)``
+                onto the stream queue.  The SSE writer emits it as a custom
+                ``event: hermes.tool.progress`` line that compliant frontends
+                can render for UX but will *not* persist into conversation
+                history.  Clients that don't understand the custom event type
+                silently ignore it per the SSE specification.
                """
-                if not tool_call_id or function_name.startswith("_"):
+                if event_type != "tool.started":
                    return
-                _started_tool_call_ids.add(tool_call_id)
-                from agent.display import build_tool_preview, get_tool_emoji
-                label = build_tool_preview(function_name, function_args) or function_name
+                if name.startswith("_"):
+                    return
+                from agent.display import get_tool_emoji
+                emoji = get_tool_emoji(name)
+                label = preview or name
                _stream_q.put(("__tool_progress__", {
-                    "tool": function_name,
-                    "emoji": get_tool_emoji(function_name),
+                    "tool": name,
+                    "emoji": emoji,
                    "label": label,
-                    "toolCallId": tool_call_id,
-                    "status": "running",
-                }))
-
-            def _on_tool_complete(tool_call_id, function_name, function_args, function_result):
-                """Emit the matching ``status: completed`` event.
-
-                Dropped if the start was filtered (internal tool, missing
-                id, or never seen) so clients never get an orphaned
-                ``completed`` they can't correlate to a prior ``running``.
-                """
-                if not tool_call_id or tool_call_id not in _started_tool_call_ids:
-                    return
-                _started_tool_call_ids.discard(tool_call_id)
-                _stream_q.put(("__tool_progress__", {
-                    "tool": function_name,
-                    "toolCallId": tool_call_id,
-                    "status": "completed",
                }))

            # Start agent in background.  agent_ref is a mutable container
            # so the SSE writer can interrupt the agent on client disconnect.
-            #
-            # ``tool_progress_callback`` is intentionally not wired here:
-            # it would duplicate every emit because ``run_agent`` fires it
-            # side-by-side with ``tool_start_callback``/``tool_complete_callback``.
-            # The structured callbacks are strictly richer (they carry the
-            # tool_call id), so they own the chat-completions SSE channel.
            agent_ref = [None]
            agent_task = asyncio.ensure_future(self._run_agent(
                user_message=user_message,
@@ -1044,8 +972,7 @@ class APIServerAdapter(BasePlatformAdapter):
                ephemeral_system_prompt=system_prompt,
                session_id=session_id,
                stream_delta_callback=_on_delta,
-                tool_start_callback=_on_tool_start,
-                tool_complete_callback=_on_tool_complete,
+                tool_progress_callback=_on_tool_progress,
                agent_ref=agent_ref,
            ))

@@ -1160,8 +1087,7 @@ class APIServerAdapter(BasePlatformAdapter):
                Tagged tuples ``("__tool_progress__", payload)`` are sent
                as a custom ``event: hermes.tool.progress`` SSE event so
                frontends can display them without storing the markers in
-                conversation history.  See #6972 for the original event,
-                #16588 for the ``toolCallId``/``status`` lifecycle fields.
+                conversation history.  See #6972.
                """
                if isinstance(item, tuple) and len(item) == 2 and item[0] == "__tool_progress__":
                    event_data = json.dumps(item[1])
@@ -2351,11 +2277,10 @@ class APIServerAdapter(BasePlatformAdapter):
            )
            if agent_ref is not None:
                agent_ref[0] = agent
-            effective_task_id = session_id or str(uuid.uuid4())
            result = agent.run_conversation(
                user_message=user_message,
                conversation_history=conversation_history,
-                task_id=effective_task_id,
+                task_id="default",
            )
            usage = {
                "input_tokens": getattr(agent, "session_prompt_tokens", 0) or 0,
@@ -2372,31 +2297,10 @@ class APIServerAdapter(BasePlatformAdapter):

    _MAX_CONCURRENT_RUNS = 10  # Prevent unbounded resource allocation
    _RUN_STREAM_TTL = 300  # seconds before orphaned runs are swept
-    _RUN_STATUS_TTL = 3600  # seconds to retain terminal run status for polling
-
-    def _set_run_status(self, run_id: str, status: str, **fields: Any) -> Dict[str, Any]:
-        """Update pollable run status without exposing private agent objects."""
-        now = time.time()
-        current = self._run_statuses.get(run_id, {})
-        current.update({
-            "object": "hermes.run",
-            "run_id": run_id,
-            "status": status,
-            "updated_at": now,
-        })
-        current.setdefault("created_at", fields.pop("created_at", now))
-        current.update(fields)
-        self._run_statuses[run_id] = current
-        return current

    def _make_run_event_callback(self, run_id: str, loop: "asyncio.AbstractEventLoop"):
        """Return a tool_progress_callback that pushes structured events to the run's SSE queue."""
        def _push(event: Dict[str, Any]) -> None:
-            self._set_run_status(
-                run_id,
-                self._run_statuses.get(run_id, {}).get("status", "running"),
-                last_event=event.get("event"),
-            )
            q = self._run_streams.get(run_id)
            if q is None:
                return
@@ -2461,6 +2365,28 @@ class APIServerAdapter(BasePlatformAdapter):
        if not user_message:
            return web.json_response(_openai_error("No user message found in input"), status=400)

+        run_id = f"run_{uuid.uuid4().hex}"
+        loop = asyncio.get_running_loop()
+        q: "asyncio.Queue[Optional[Dict]]" = asyncio.Queue()
+        self._run_streams[run_id] = q
+        self._run_streams_created[run_id] = time.time()
+
+        event_cb = self._make_run_event_callback(run_id, loop)
+
+        # Also wire stream_delta_callback so message.delta events flow through
+        def _text_cb(delta: Optional[str]) -> None:
+            if delta is None:
+                return
+            try:
+                loop.call_soon_threadsafe(q.put_nowait, {
+                    "event": "message.delta",
+                    "run_id": run_id,
+                    "timestamp": time.time(),
+                    "delta": delta,
+                })
+            except Exception:
+                pass
+
        instructions = body.get("instructions")
        previous_response_id = body.get("previous_response_id")

@@ -2508,42 +2434,11 @@ class APIServerAdapter(BasePlatformAdapter):
                        )
                    conversation_history.append({"role": msg["role"], "content": str(content)})

-        run_id = f"run_{uuid.uuid4().hex}"
        session_id = body.get("session_id") or stored_session_id or run_id
        ephemeral_system_prompt = instructions
-        loop = asyncio.get_running_loop()
-        q: "asyncio.Queue[Optional[Dict]]" = asyncio.Queue()
-        created_at = time.time()
-        self._run_streams[run_id] = q
-        self._run_streams_created[run_id] = created_at
-
-        event_cb = self._make_run_event_callback(run_id, loop)
-
-        # Also wire stream_delta_callback so message.delta events flow through.
-        def _text_cb(delta: Optional[str]) -> None:
-            if delta is None:
-                return
-            try:
-                loop.call_soon_threadsafe(q.put_nowait, {
-                    "event": "message.delta",
-                    "run_id": run_id,
-                    "timestamp": time.time(),
-                    "delta": delta,
-                })
-            except Exception:
-                pass
-
-        self._set_run_status(
-            run_id,
-            "queued",
-            created_at=created_at,
-            session_id=session_id,
-            model=body.get("model", self._model_name),
-        )

        async def _run_and_close():
            try:
-                self._set_run_status(run_id, "running")
                agent = self._create_agent(
                    ephemeral_system_prompt=ephemeral_system_prompt,
                    session_id=session_id,
@@ -2552,11 +2447,10 @@ class APIServerAdapter(BasePlatformAdapter):
                )
                self._active_run_agents[run_id] = agent
                def _run_sync():
-                    effective_task_id = session_id or run_id
                    r = agent.run_conversation(
                        user_message=user_message,
                        conversation_history=conversation_history,
-                        task_id=effective_task_id,
+                        task_id="default",
                    )
                    u = {
                        "input_tokens": getattr(agent, "session_prompt_tokens", 0) or 0,
@@ -2574,36 +2468,8 @@ class APIServerAdapter(BasePlatformAdapter):
                    "output": final_response,
                    "usage": usage,
                })
-                self._set_run_status(
-                    run_id,
-                    "completed",
-                    output=final_response,
-                    usage=usage,
-                    last_event="run.completed",
-                )
-            except asyncio.CancelledError:
-                self._set_run_status(
-                    run_id,
-                    "cancelled",
-                    last_event="run.cancelled",
-                )
-                try:
-                    q.put_nowait({
-                        "event": "run.cancelled",
-                        "run_id": run_id,
-                        "timestamp": time.time(),
-                    })
-                except Exception:
-                    pass
-                raise
            except Exception as exc:
                logger.exception("[api_server] run %s failed", run_id)
-                self._set_run_status(
-                    run_id,
-                    "failed",
-                    error=str(exc),
-                    last_event="run.failed",
-                )
                try:
                    q.put_nowait({
                        "event": "run.failed",
@@ -2633,21 +2499,6 @@ class APIServerAdapter(BasePlatformAdapter):

        return web.json_response({"run_id": run_id, "status": "started"}, status=202)

-    async def _handle_get_run(self, request: "web.Request") -> "web.Response":
-        """GET /v1/runs/{run_id} — return pollable run status for external UIs."""
-        auth_err = self._check_auth(request)
-        if auth_err:
-            return auth_err
-
-        run_id = request.match_info["run_id"]
-        status = self._run_statuses.get(run_id)
-        if status is None:
-            return web.json_response(
-                _openai_error(f"Run not found: {run_id}", code="run_not_found"),
-                status=404,
-            )
-        return web.json_response(status)
-
    async def _handle_run_events(self, request: "web.Request") -> "web.StreamResponse":
        """GET /v1/runs/{run_id}/events — SSE stream of structured agent lifecycle events."""
        auth_err = self._check_auth(request)
@@ -2710,8 +2561,6 @@ class APIServerAdapter(BasePlatformAdapter):
        if agent is None and task is None:
            return web.json_response(_openai_error(f"Run not found: {run_id}", code="run_not_found"), status=404)

-        self._set_run_status(run_id, "stopping", last_event="run.stopping")
-
        if agent is not None:
            try:
                agent.interrupt("Stop requested via API")
@@ -2754,15 +2603,6 @@ class APIServerAdapter(BasePlatformAdapter):
                self._active_run_agents.pop(run_id, None)
                self._active_run_tasks.pop(run_id, None)

-            stale_statuses = [
-                run_id
-                for run_id, status in list(self._run_statuses.items())
-                if status.get("status") in {"completed", "failed", "cancelled"}
-                and now - float(status.get("updated_at", 0) or 0) > self._RUN_STATUS_TTL
-            ]
-            for run_id in stale_statuses:
-                self._run_statuses.pop(run_id, None)
-
    # ------------------------------------------------------------------
    # BasePlatformAdapter interface
    # ------------------------------------------------------------------
@@ -2781,7 +2621,6 @@ class APIServerAdapter(BasePlatformAdapter):
            self._app.router.add_get("/health/detailed", self._handle_health_detailed)
            self._app.router.add_get("/v1/health", self._handle_health)
            self._app.router.add_get("/v1/models", self._handle_models)
-            self._app.router.add_get("/v1/capabilities", self._handle_capabilities)
            self._app.router.add_post("/v1/chat/completions", self._handle_chat_completions)
            self._app.router.add_post("/v1/responses", self._handle_responses)
            self._app.router.add_get("/v1/responses/{response_id}", self._handle_get_response)
@@ -2797,7 +2636,6 @@ class APIServerAdapter(BasePlatformAdapter):
            self._app.router.add_post("/api/jobs/{job_id}/run", self._handle_run_job)
            # Structured event streaming
            self._app.router.add_post("/v1/runs", self._handle_runs)
-            self._app.router.add_get("/v1/runs/{run_id}", self._handle_get_run)
            self._app.router.add_get("/v1/runs/{run_id}/events", self._handle_run_events)
            self._app.router.add_post("/v1/runs/{run_id}/stop", self._handle_stop_run)
            # Start background sweep to clean up orphaned (unconsumed) run streams
@@ -23,45 +23,6 @@ from utils import normalize_proxy_url

 logger = logging.getLogger(__name__)

-# Audio file extensions Hermes recognizes for native audio delivery.
-# Kept in sync with tools/send_message_tool.py and cron/scheduler.py via
-# should_send_media_as_audio() below.
-_AUDIO_EXTS = frozenset({'.ogg', '.opus', '.mp3', '.wav', '.m4a', '.flac'})
-# Telegram's Bot API sendAudio only accepts MP3 / M4A. Other audio
-# formats either need to go through sendVoice (Opus/OGG) or must be
-# delivered as a regular document.
-_TELEGRAM_AUDIO_ATTACHMENT_EXTS = frozenset({'.mp3', '.m4a'})
-_TELEGRAM_VOICE_EXTS = frozenset({'.ogg', '.opus'})
-
-
-def _platform_name(platform) -> str:
-    """Normalize a Platform enum / raw string into a lowercase name."""
-    value = getattr(platform, "value", platform)
-    return str(value or "").lower()
-
-
-def should_send_media_as_audio(platform, ext: str, is_voice: bool = False) -> bool:
-    """Return True when a media file should use the platform's audio sender.
-
-    Other platforms: every recognized audio extension routes through the
-    audio sender.
-
-    Telegram: the Bot API only accepts MP3/M4A for sendAudio and
-    Opus/OGG for sendVoice. Opus/OGG is only routed as audio when the
-    caller flagged ``is_voice=True`` (so we don't turn a regular audio
-    attachment into a voice bubble just because the file happens to be
-    Opus). Everything else falls through to document delivery by
-    returning ``False``.
-    """
-    normalized_ext = (ext or "").lower()
-    if normalized_ext not in _AUDIO_EXTS:
-        return False
-    if _platform_name(platform) == "telegram":
-        if normalized_ext in _TELEGRAM_VOICE_EXTS:
-            return is_voice
-        return normalized_ext in _TELEGRAM_AUDIO_ATTACHMENT_EXTS
-    return True
-

 def utf16_len(s: str) -> int:
    """Count UTF-16 code units in *s*.
@@ -346,14 +307,9 @@ def proxy_kwargs_for_aiohttp(proxy_url: str | None) -> tuple[dict, dict]:
    """Build kwargs for standalone ``aiohttp.ClientSession`` with proxy.

    Returns ``(session_kwargs, request_kwargs)`` where:
-      - With aiohttp-socks → ``({"connector": ProxyConnector(...)}, {})``
-        for *all* proxy schemes (SOCKS **and** HTTP/HTTPS).
-      - HTTP without aiohttp-socks → ``({}, {"proxy": url})``.
-      - None → ``({}, {})``.
-
-    Prefer the connector path: it works transparently with libraries
-    (like mautrix) that call ``session.request()`` without forwarding
-    per-request ``proxy=`` kwargs.
+      - SOCKS → ``({"connector": ProxyConnector(...)}, {})``
+      - HTTP  → ``({}, {"proxy": url})``
+      - None  → ``({}, {})``

    Usage::

@@ -364,20 +320,20 @@ def proxy_kwargs_for_aiohttp(proxy_url: str | None) -> tuple[dict, dict]:
    """
    if not proxy_url:
        return {}, {}
-    try:
-        from aiohttp_socks import ProxyConnector
+    if proxy_url.lower().startswith("socks"):
+        try:
+            from aiohttp_socks import ProxyConnector

-        connector = ProxyConnector.from_url(proxy_url, rdns=True)
-        return {"connector": connector}, {}
-    except ImportError:
-        if proxy_url.lower().startswith("socks"):
+            connector = ProxyConnector.from_url(proxy_url, rdns=True)
+            return {"connector": connector}, {}
+        except ImportError:
            logger.warning(
                "aiohttp_socks not installed — SOCKS proxy %s ignored. "
                "Run: pip install aiohttp-socks",
                proxy_url,
            )
            return {}, {}
-        return {}, {"proxy": proxy_url}
+    return {}, {"proxy": proxy_url}


 def is_host_excluded_by_no_proxy(hostname: str, no_proxy_value: str | None = None) -> bool:
@@ -416,7 +372,7 @@ def is_host_excluded_by_no_proxy(hostname: str, no_proxy_value: str | None = Non
 from dataclasses import dataclass, field
 from datetime import datetime
 from pathlib import Path
-from typing import Dict, List, Optional, Any, Callable, Awaitable, Tuple, Union
+from typing import Dict, List, Optional, Any, Callable, Awaitable, Tuple
 from enum import Enum

 from pathlib import Path as _Path
@@ -946,42 +902,7 @@ class MessageEvent:
        return args


-_PLAINTEXT_GATEWAY_RESTART_PATTERNS: tuple[re.Pattern[str], ...] = (
-    re.compile(r"^(?:please\s+)?restart\s+(?:the\s+)?gateway[.!?\s]*$", re.IGNORECASE),
-    re.compile(r"^(?:please\s+)?restart\s+(?:the\s+)?hermes\s+gateway[.!?\s]*$", re.IGNORECASE),
-    re.compile(r"^(?:please\s+)?restart\s+hermes[.!?\s]*$", re.IGNORECASE),
-)
-
-
-def coerce_plaintext_gateway_command(event: "MessageEvent") -> None:
-    """Rewrite a tiny set of DM plaintext admin phrases into slash commands.
-
-    This keeps high-impact operational phrases like ``restart gateway`` out of
-    the LLM/tool path, where they can trigger a self-restart from inside the
-    currently running agent and leave the gateway stuck in ``draining`` while it
-    waits for that same agent to finish.
-
-    Scope is intentionally narrow: DM text messages only, exact restart-style
-    phrases only. Group chats keep natural-language semantics.
-    """
-    try:
-        if event is None or event.message_type != MessageType.TEXT:
-            return
-        text = (event.text or "").strip()
-        if not text or text.startswith("/"):
-            return
-        source = getattr(event, "source", None)
-        if getattr(source, "chat_type", None) != "dm":
-            return
-        for pattern in _PLAINTEXT_GATEWAY_RESTART_PATTERNS:
-            if pattern.match(text):
-                event.text = "/restart"
-                return
-    except Exception:
-        return
-
-
-@dataclass
+@dataclass 
 class SendResult:
    """Result of sending a message."""
    success: bool
@@ -991,45 +912,6 @@ class SendResult:
    retryable: bool = False  # True for transient connection errors — base will retry automatically


-class EphemeralReply(str):
-    """System-notice reply that auto-deletes after a TTL.
-
-    Slash-command handlers in ``gateway/run.py`` can return this wrapper
-    instead of a plain string to request that the reply message be deleted
-    after ``ttl_seconds`` on platforms that support ``delete_message``.
-
-    Subclassing ``str`` keeps the wrapper transparent to anything that
-    treats handler return values as text (existing tests use ``in`` /
-    ``startswith`` / equality; the ``_process_message_background`` pipeline
-    extracts attachments from the string content).  ``isinstance(r,
-    EphemeralReply)`` still distinguishes ephemeral replies from plain
-    strings so the send path can schedule deletion.
-
-    Platforms that don't override :meth:`BasePlatformAdapter.delete_message`
-    silently ignore the TTL — the message is sent normally and left in
-    place.  When ``ttl_seconds`` is ``None``, the pipeline uses the
-    configured ``display.ephemeral_system_ttl`` default.  A default of ``0``
-    disables auto-deletion globally, preserving prior behavior.
-    """
-
-    ttl_seconds: Optional[int]
-
-    def __new__(cls, text: str, ttl_seconds: Optional[int] = None):
-        instance = super().__new__(cls, text)
-        instance.ttl_seconds = ttl_seconds
-        return instance
-
-    @property
-    def text(self) -> str:
-        """Return the underlying text.
-
-        Provided for call sites that want an explicit string conversion,
-        though ``str(reply)`` and using ``reply`` directly where a string
-        is expected both work identically.
-        """
-        return str.__str__(self)
-
-
 def merge_pending_message_event(
    pending_messages: Dict[str, MessageEvent],
    session_key: str,
@@ -1073,11 +955,6 @@ def merge_pending_message_event(
                    existing.text = event.text
            if existing_is_photo or incoming_is_photo:
                existing.message_type = MessageType.PHOTO
-            elif (
-                getattr(existing, "message_type", None) == MessageType.TEXT
-                and event.message_type != MessageType.TEXT
-            ):
-                existing.message_type = event.message_type
            return

        if (
@@ -1112,10 +989,8 @@ _RETRYABLE_ERROR_PATTERNS = (
 )


-# Type for message handlers.  Handlers may return a plain string (normal
-# reply), an ``EphemeralReply`` to opt the reply into auto-deletion, or
-# ``None`` when the response was already delivered (e.g. via streaming).
-MessageHandler = Callable[[MessageEvent], Awaitable[Optional[Union[str, "EphemeralReply"]]]]
+# Type for message handlers
+MessageHandler = Callable[[MessageEvent], Awaitable[Optional[str]]]


 def resolve_channel_prompt(
@@ -1500,119 +1375,6 @@ class BasePlatformAdapter(ABC):
        """
        return False

-    def _get_ephemeral_system_ttl_default(self) -> int:
-        """Read ``display.ephemeral_system_ttl`` from config.
-
-        Returns the TTL in seconds to use when an :class:`EphemeralReply`
-        does not specify one explicitly.  ``0`` (the default) disables
-        auto-deletion.  Non-fatal if config is unreadable.
-        """
-        try:
-            from hermes_cli.config import load_config as _load_config
-        except Exception:
-            return 0
-        try:
-            cfg = _load_config()
-        except Exception:
-            return 0
-        display = cfg.get("display", {}) if isinstance(cfg, dict) else {}
-        if not isinstance(display, dict):
-            return 0
-        raw = display.get("ephemeral_system_ttl", 0)
-        try:
-            return int(raw)
-        except (TypeError, ValueError):
-            return 0
-
-    def _schedule_ephemeral_delete(
-        self,
-        chat_id: str,
-        message_id: str,
-        ttl_seconds: int,
-    ) -> None:
-        """Spawn a detached task that deletes ``message_id`` after ``ttl_seconds``.
-
-        Best-effort — failures (gateway restart, permission denied, message
-        too old for Telegram's 48h window) are swallowed at debug level.
-        Does not block the caller.
-        """
-
-        async def _run_delete() -> None:
-            try:
-                await asyncio.sleep(max(1, int(ttl_seconds)))
-                await self.delete_message(chat_id=chat_id, message_id=message_id)
-            except asyncio.CancelledError:
-                raise
-            except Exception as e:
-                logger.debug(
-                    "[%s] Ephemeral delete failed for %s/%s: %s",
-                    self.name, chat_id, message_id, e,
-                )
-
-        coro = _run_delete()
-        try:
-            asyncio.create_task(coro)
-        except RuntimeError:
-            # No running loop (e.g. unit tests that never reach the async
-            # path).  Close the coroutine cleanly so Python doesn't warn
-            # about it never being awaited, then drop silently.
-            coro.close()
-
-    async def send_slash_confirm(
-        self,
-        chat_id: str,
-        title: str,
-        message: str,
-        session_key: str,
-        confirm_id: str,
-        metadata: Optional[Dict[str, Any]] = None,
-    ) -> SendResult:
-        """Send a three-option slash-command confirmation prompt.
-
-        Used by the gateway's generic slash-confirm primitive (see
-        ``GatewayRunner._request_slash_confirm``) for commands that have a
-        non-destructive but expensive side effect the user should explicitly
-        acknowledge — the current caller is ``/reload-mcp``, which
-        invalidates the provider prompt cache.
-
-        Platforms with inline-button support (Telegram, Discord, Slack,
-        Matrix, Feishu) should override this to render three buttons:
-        Approve Once / Always Approve / Cancel.  Button callbacks MUST be
-        routed back through the gateway by calling
-        ``GatewayRunner._resolve_slash_confirm(confirm_id, choice)`` where
-        ``choice`` is ``"once"`` / ``"always"`` / ``"cancel"``.
-
-        Platforms without button UIs leave this as the default and fall
-        through to the gateway's text fallback (which sends ``message`` as
-        plain text and intercepts the next ``/approve`` / ``/always`` /
-        ``/cancel`` reply).
-
-        ``confirm_id`` is a short string generated by the gateway; the
-        adapter stores it alongside any platform-specific state needed to
-        route the callback (e.g. Telegram's ``_approval_state`` dict).
-        """
-        return SendResult(success=False, error="Not supported")
-
-    async def send_private_notice(
-        self,
-        chat_id: str,
-        user_id: Optional[str],
-        content: str,
-        reply_to: Optional[str] = None,
-        metadata: Optional[Dict[str, Any]] = None,
-    ) -> SendResult:
-        """Send a notice privately when the platform supports it.
-
-        The default implementation falls back to a normal send so callers can
-        use one code path across platforms.
-        """
-        return await self.send(
-            chat_id=chat_id,
-            content=content,
-            reply_to=reply_to,
-            metadata=metadata,
-        )
-
    async def send_typing(self, chat_id: str, metadata=None) -> None:
        """
        Send a typing indicator.
@@ -1629,64 +1391,7 @@ class BasePlatformAdapter(ABC):
        Default is a no-op for platforms with one-shot typing indicators.
        """
        pass
-
-    async def send_multiple_images(
-        self,
-        chat_id: str,
-        images: List[Tuple[str, str]],
-        metadata: Optional[Dict[str, Any]] = None,
-        human_delay: float = 0.0,
-    ) -> None:
-        """Send a batch of images.
-
-        Accepts ``http(s)://``, ``file://`` URIs in the first tuple
-        element.
-
-        Default implementation sends each item individually,
-        routing animated GIFs through ``send_animation`` and local
-        files through ``send_image_file``.
-
-        Override in subclasses to bundle into a single native API call
-        (e.g. Signal's multi-attachment RPC)
-        """
-        from urllib.parse import unquote as _unquote
-
-        for image_url, alt_text in images:
-            if human_delay > 0:
-                await asyncio.sleep(human_delay)
-            try:
-                logger.info(
-                    "[%s] Sending image: %s (alt=%s)",
-                    self.name,
-                    safe_url_for_log(image_url),
-                    alt_text[:30] if alt_text else "",
-                )
-                if image_url.startswith("file://"):
-                    img_result = await self.send_image_file(
-                        chat_id=chat_id,
-                        image_path=_unquote(image_url[7:]),
-                        caption=alt_text if alt_text else None,
-                        metadata=metadata,
-                    )
-                elif self._is_animation_url(image_url):
-                    img_result = await self.send_animation(
-                        chat_id=chat_id,
-                        animation_url=image_url,
-                        caption=alt_text if alt_text else None,
-                        metadata=metadata,
-                    )
-                else:
-                    img_result = await self.send_image(
-                        chat_id=chat_id,
-                        image_url=image_url,
-                        caption=alt_text if alt_text else None,
-                        metadata=metadata,
-                    )
-                if not img_result.success:
-                    logger.error("[%s] Failed to send image: %s", self.name, img_result.error)
-            except Exception as img_err:
-                logger.error("[%s] Error sending image: %s", self.name, img_err, exc_info=True)
-
+    
    async def send_image(
        self,
        chat_id: str,
@@ -1895,7 +1600,7 @@ class BasePlatformAdapter(ABC):
        # Extract MEDIA:<path> tags, allowing optional whitespace after the colon
        # and quoted/backticked paths for LLM-formatted outputs.
        media_pattern = re.compile(
-            r'''[`"']?MEDIA:\s*(?P<path>`[^`\n]+`|"[^"\n]+"|'[^'\n]+'|(?:~/|/)\S+(?:[^\S\n]+\S+)*?\.(?:png|jpe?g|gif|webp|mp4|mov|avi|mkv|webm|ogg|opus|mp3|wav|m4a|flac|epub|pdf|zip|rar|7z|docx?|xlsx?|pptx?|txt|csv|apk|ipa)(?=[\s`"',;:)\]}]|$)|\S+)[`"']?'''
+            r'''[`"']?MEDIA:\s*(?P<path>`[^`\n]+`|"[^"\n]+"|'[^'\n]+'|(?:~/|/)\S+(?:[^\S\n]+\S+)*?\.(?:png|jpe?g|gif|webp|mp4|mov|avi|mkv|webm|ogg|opus|mp3|wav|m4a|epub|pdf|zip|rar|7z|docx?|xlsx?|pptx?|txt|csv|apk|ipa)(?=[\s`"',;:)\]}]|$)|\S+)[`"']?'''
        )
        for match in media_pattern.finditer(content):
            path = match.group("path").strip()
@@ -2035,19 +1740,11 @@ class BasePlatformAdapter(ABC):
                if stop_event is None:
                    await asyncio.sleep(interval)
                    continue
-                loop = asyncio.get_running_loop()
-                deadline = loop.time() + interval
-                while not stop_event.is_set():
-                    remaining = deadline - loop.time()
-                    if remaining <= 0:
-                        break
-                    # Poll instead of wait_for(stop_event.wait()).  Cancelling
-                    # wait_for while it owns the inner Event.wait task can leave
-                    # shutdown paths stuck awaiting the typing task on Python
-                    # 3.11/pytest-asyncio; sleep cancellation is immediate.
-                    await asyncio.sleep(min(0.25, remaining))
-                if stop_event.is_set():
-                    return
+                try:
+                    await asyncio.wait_for(stop_event.wait(), timeout=interval)
+                except asyncio.TimeoutError:
+                    continue
+                return
        except asyncio.CancelledError:
            pass  # Normal cancellation when handler completes
        finally:
@@ -2167,28 +1864,6 @@ class BasePlatformAdapter(ABC):
        lowered = error.lower()
        return "timed out" in lowered or "readtimeout" in lowered or "writetimeout" in lowered

-    def _unwrap_ephemeral(self, response: Any) -> Tuple[Optional[str], int]:
-        """Unwrap a handler response into (text, ttl_seconds).
-
-        Accepts a plain string, ``None``, or an :class:`EphemeralReply`.
-        Returns ``(text, ttl)`` where ``ttl > 0`` means the caller should
-        schedule a deletion via :meth:`_schedule_ephemeral_delete` after
-        the send succeeds.  ``ttl`` is forced to 0 when the adapter
-        doesn't override :meth:`delete_message` so non-supporting
-        platforms silently degrade to normal sends.
-        """
-        if isinstance(response, EphemeralReply):
-            ttl = response.ttl_seconds
-            if ttl is None:
-                try:
-                    ttl = int(self._get_ephemeral_system_ttl_default())
-                except Exception:
-                    ttl = 0
-            if ttl and ttl > 0 and type(self).delete_message is BasePlatformAdapter.delete_message:
-                ttl = 0
-            return response.text, int(ttl or 0)
-        return response, 0
-
    async def _send_with_retry(
        self,
        chat_id: str,
@@ -2402,12 +2077,6 @@ class BasePlatformAdapter(ABC):
        ``release_guard=False`` keeps the adapter-level session guard in place
        so reset-like commands can finish atomically before follow-up messages
        are allowed to start a fresh background task.
-
-        Bounded by a 5s timeout so a wedged finally block in the cancelled
-        task (typing-task cleanup, on_processing_complete hook, etc.) can't
-        stall the calling dispatch coroutine — particularly under pytest-
-        asyncio where the event loop's cancellation-propagation semantics
-        differ subtly from a bare ``asyncio.run`` harness.
        """
        task = self._session_tasks.pop(session_key, None)
        if task is not None and not task.done():
@@ -2419,15 +2088,9 @@ class BasePlatformAdapter(ABC):
            self._expected_cancelled_tasks.add(task)
            task.cancel()
            try:
-                await asyncio.wait_for(asyncio.shield(task), timeout=5.0)
+                await task
            except asyncio.CancelledError:
                pass
-            except asyncio.TimeoutError:
-                logger.warning(
-                    "[%s] Cancelled task for %s did not exit within 5s; "
-                    "unblocking dispatch and letting the task unwind in the background",
-                    self.name, session_key,
-                )
            except Exception:
                logger.debug(
                    "[%s] Session cancellation raised while unwinding %s",
@@ -2496,20 +2159,13 @@ class BasePlatformAdapter(ABC):
                release_guard=False,
                discard_pending=False,
            )
-            _text, _eph_ttl = self._unwrap_ephemeral(response)
-            if _text:
-                _r = await self._send_with_retry(
+            if response:
+                await self._send_with_retry(
                    chat_id=event.source.chat_id,
-                    content=_text,
+                    content=response,
                    reply_to=event.message_id,
                    metadata=thread_meta,
                )
-                if _eph_ttl > 0 and _r.success and _r.message_id:
-                    self._schedule_ephemeral_delete(
-                        chat_id=event.source.chat_id,
-                        message_id=_r.message_id,
-                        ttl_seconds=_eph_ttl,
-                    )
        except Exception:
            # On failure, restore the original guard if one still exists so
            # we don't leave the session in a half-reset state.
@@ -2532,8 +2188,6 @@ class BasePlatformAdapter(ABC):
        """
        if not self._message_handler:
            return
-
-        coerce_plaintext_gateway_command(event)
        
        session_key = build_session_key(
            event.source,
@@ -2589,20 +2243,13 @@ class BasePlatformAdapter(ABC):
                try:
                    _thread_meta = {"thread_id": event.source.thread_id} if event.source.thread_id else None
                    response = await self._message_handler(event)
-                    _text, _eph_ttl = self._unwrap_ephemeral(response)
-                    if _text:
-                        _r = await self._send_with_retry(
+                    if response:
+                        await self._send_with_retry(
                            chat_id=event.source.chat_id,
-                            content=_text,
+                            content=response,
                            reply_to=event.message_id,
                            metadata=_thread_meta,
                        )
-                        if _eph_ttl > 0 and _r.success and _r.message_id:
-                            self._schedule_ephemeral_delete(
-                                chat_id=event.source.chat_id,
-                                message_id=_r.message_id,
-                                ttl_seconds=_eph_ttl,
-                            )
                except Exception as e:
                    logger.error("[%s] Command '/%s' dispatch failed: %s", self.name, cmd, e, exc_info=True)
                return
@@ -2676,6 +2323,7 @@ class BasePlatformAdapter(ABC):
        # Fall back to a new Event only if the entry was removed externally.
        interrupt_event = self._active_sessions.get(session_key) or asyncio.Event()
        self._active_sessions[session_key] = interrupt_event
+        callback_generation = getattr(interrupt_event, "_hermes_run_generation", None)
        
        # Start continuous typing indicator (refreshes every 2 seconds)
        _thread_metadata = {"thread_id": event.source.thread_id} if event.source.thread_id else None
@@ -2692,32 +2340,13 @@ class BasePlatformAdapter(ABC):
                **_keep_typing_kwargs,
            )
        )
-
-        async def _stop_typing_task() -> None:
-            typing_task.cancel()
-            try:
-                await asyncio.wait_for(asyncio.shield(typing_task), timeout=0.5)
-            except (asyncio.CancelledError, asyncio.TimeoutError):
-                # Cancellation cleanup must not block adapter shutdown.  The
-                # typing task is already cancelled; if the parent task is also
-                # cancelling, let this message-processing task unwind now.
-                pass
        
        try:
            await self._run_processing_hook("on_processing_start", event)

            # Call the handler (this can take a while with tool calls)
            response = await self._message_handler(event)
-
-            # Slash-command handlers may return an EphemeralReply sentinel to
-            # request that their reply message auto-delete after a TTL (used
-            # for system notices like "✨ New session started!" that the user
-            # doesn't need to keep in the thread).  Unwrap here so all the
-            # downstream extract_media / text-processing logic sees a plain
-            # string, and remember the TTL + platform capability so the
-            # post-send block can schedule the deletion.
-            response, _ephemeral_ttl = self._unwrap_ephemeral(response)
-
+            
            # Send response if any.  A None/empty response is normal when
            # streaming already delivered the text (already_sent=True) or
            # when the message was queued behind an active agent.  Log at
@@ -2806,78 +2435,53 @@ class BasePlatformAdapter(ABC):
                    )
                    _record_delivery(result)

-                    # Schedule auto-deletion of system-notice replies.
-                    # Detached so the handler returns immediately; errors
-                    # (permission denied, message too old) are swallowed.
-                    if (
-                        _ephemeral_ttl
-                        and _ephemeral_ttl > 0
-                        and result.success
-                        and result.message_id
-                    ):
-                        self._schedule_ephemeral_delete(
-                            chat_id=event.source.chat_id,
-                            message_id=result.message_id,
-                            ttl_seconds=_ephemeral_ttl,
-                        )
-
                # Human-like pacing delay between text and media
                human_delay = self._get_human_delay()

                # Send extracted images as native attachments
                if images:
                    logger.info("[%s] Extracted %d image(s) to send as attachments", self.name, len(images))
+                for image_url, alt_text in images:
+                    if human_delay > 0:
+                        await asyncio.sleep(human_delay)
                    try:
-                        await self.send_multiple_images(
-                            chat_id=event.source.chat_id,
-                            images=images,
-                            metadata=_thread_metadata,
-                            human_delay=human_delay,
+                        logger.info(
+                            "[%s] Sending image: %s (alt=%s)",
+                            self.name,
+                            safe_url_for_log(image_url),
+                            alt_text[:30] if alt_text else "",
                        )
-                    except Exception as batch_err:
-                        logger.warning("[%s] Error batching images: %s", self.name, batch_err, exc_info=True)
-
+                        # Route animated GIFs through send_animation for proper playback
+                        if self._is_animation_url(image_url):
+                            img_result = await self.send_animation(
+                                chat_id=event.source.chat_id,
+                                animation_url=image_url,
+                                caption=alt_text if alt_text else None,
+                                metadata=_thread_metadata,
+                            )
+                        else:
+                            img_result = await self.send_image(
+                                chat_id=event.source.chat_id,
+                                image_url=image_url,
+                                caption=alt_text if alt_text else None,
+                                metadata=_thread_metadata,
+                            )
+                        if not img_result.success:
+                            logger.error("[%s] Failed to send image: %s", self.name, img_result.error)
+                    except Exception as img_err:
+                        logger.error("[%s] Error sending image: %s", self.name, img_err, exc_info=True)

                # Send extracted media files — route by file type
+                _AUDIO_EXTS = {'.ogg', '.opus', '.mp3', '.wav', '.m4a'}
                _VIDEO_EXTS = {'.mp4', '.mov', '.avi', '.mkv', '.webm', '.3gp'}
                _IMAGE_EXTS = {'.jpg', '.jpeg', '.png', '.webp', '.gif'}

-                # Partition images out of media_files + local_files so they
-                # can be sent as a single batch (Signal RPC)
-                from urllib.parse import quote as _quote
-                _image_paths: list = []
-                _non_image_media: list = []
                for media_path, is_voice in media_files:
-                    _ext = Path(media_path).suffix.lower()
-                    if _ext in _IMAGE_EXTS and not is_voice:
-                        _image_paths.append(media_path)
-                    else:
-                        _non_image_media.append((media_path, is_voice))
-                _non_image_local: list = []
-                for file_path in local_files:
-                    if Path(file_path).suffix.lower() in _IMAGE_EXTS:
-                        _image_paths.append(file_path)
-                    else:
-                        _non_image_local.append(file_path)
-
-                if _image_paths:
-                    try:
-                        _batch = [(f"file://{_quote(p)}", "") for p in _image_paths]
-                        await self.send_multiple_images(
-                            chat_id=event.source.chat_id,
-                            images=_batch,
-                            metadata=_thread_metadata,
-                            human_delay=human_delay,
-                        )
-                    except Exception as batch_err:
-                        logger.warning("[%s] Error batching images: %s", self.name, batch_err, exc_info=True)
-
-                for media_path, is_voice in _non_image_media:
                    if human_delay > 0:
                        await asyncio.sleep(human_delay)
                    try:
                        ext = Path(media_path).suffix.lower()
-                        if should_send_media_as_audio(self.platform, ext, is_voice=is_voice):
+                        if ext in _AUDIO_EXTS:
                            media_result = await self.send_voice(
                                chat_id=event.source.chat_id,
                                audio_path=media_path,
@@ -2889,6 +2493,12 @@ class BasePlatformAdapter(ABC):
                                video_path=media_path,
                                metadata=_thread_metadata,
                            )
+                        elif ext in _IMAGE_EXTS:
+                            media_result = await self.send_image_file(
+                                chat_id=event.source.chat_id,
+                                image_path=media_path,
+                                metadata=_thread_metadata,
+                            )
                        else:
                            media_result = await self.send_document(
                                chat_id=event.source.chat_id,
@@ -2901,13 +2511,19 @@ class BasePlatformAdapter(ABC):
                    except Exception as media_err:
                        logger.warning("[%s] Error sending media: %s", self.name, media_err)

-                # Send auto-detected local non-image files as native attachments
-                for file_path in _non_image_local:
+                # Send auto-detected local files as native attachments
+                for file_path in local_files:
                    if human_delay > 0:
                        await asyncio.sleep(human_delay)
                    try:
                        ext = Path(file_path).suffix.lower()
-                        if ext in _VIDEO_EXTS:
+                        if ext in _IMAGE_EXTS:
+                            await self.send_image_file(
+                                chat_id=event.source.chat_id,
+                                image_path=file_path,
+                                metadata=_thread_metadata,
+                            )
+                        elif ext in _VIDEO_EXTS:
                            await self.send_video(
                                chat_id=event.source.chat_id,
                                video_path=file_path,
@@ -2946,28 +2562,14 @@ class BasePlatformAdapter(ABC):
                _active = self._active_sessions.get(session_key)
                if _active is not None:
                    _active.clear()
-                await _stop_typing_task()
-                # Spawn a fresh task for the pending message instead of
-                # recursing.  Issue #17758: `await
-                # self._process_message_background(...)` here grew the
-                # call stack one frame per chained follow-up, and under
-                # sustained pending-queue activity the C stack would
-                # exhaust at ~2000 frames and SIGSEGV the process.
-                # Mirror the late-arrival drain pattern below: hand off
-                # to a new task and return so this frame can unwind.
-                drain_task = asyncio.create_task(
-                    self._process_message_background(pending_event, session_key)
-                )
-                # Hand ownership of the session to the drain task so
-                # stale-lock detection keeps working while it runs.
-                self._session_tasks[session_key] = drain_task
+                typing_task.cancel()
                try:
-                    self._background_tasks.add(drain_task)
-                    drain_task.add_done_callback(self._background_tasks.discard)
-                except TypeError:
-                    # Tests stub create_task() with non-hashable sentinels; tolerate.
+                    await typing_task
+                except asyncio.CancelledError:
                    pass
-                return  # Drain task owns the session now.
+                # Process pending message in new background task
+                await self._process_message_background(pending_event, session_key)
+                return  # Already cleaned up
                
        except asyncio.CancelledError:
            current_task = asyncio.current_task()
@@ -2998,20 +2600,7 @@ class BasePlatformAdapter(ABC):
        finally:
            # Fire any one-shot post-delivery callback registered for this
            # session (e.g. deferred background-review notifications).
-            #
-            # Snapshot the callback generation HERE (after the agent has run),
-            # not at the top of this task.  _hermes_run_generation is set on
-            # the interrupt event by GatewayRunner._bind_adapter_run_generation
-            # during _handle_message_with_agent — which happens DURING the
-            # self._message_handler(event) await above.  Snapshotting earlier
-            # always captured None, which bypassed the generation-ownership
-            # check in pop_post_delivery_callback and let stale runs fire a
-            # fresher run's callbacks.
-            _callback_generation = getattr(
-                interrupt_event,
-                "_hermes_run_generation",
-                None,
-            )
+            _callback_generation = callback_generation
            if hasattr(self, "pop_post_delivery_callback"):
                _post_cb = self.pop_post_delivery_callback(
                    session_key,
@@ -3025,7 +2614,11 @@ class BasePlatformAdapter(ABC):
                except Exception:
                    pass
            # Stop typing indicator
-            await _stop_typing_task()
+            typing_task.cancel()
+            try:
+                await typing_task
+            except asyncio.CancelledError:
+                pass
            # Also cancel any platform-level persistent typing tasks (e.g. Discord)
            # that may have been recreated by _keep_typing after the last stop_typing()
            try:
@@ -3042,41 +2635,25 @@ class BasePlatformAdapter(ABC):
            # dropped (user never gets a reply).
            late_pending = self._pending_messages.pop(session_key, None)
            if late_pending is not None:
-                current_task = asyncio.current_task()
-                existing_task = self._session_tasks.get(session_key)
-                if (
-                    existing_task is not None
-                    and existing_task is not current_task
-                ):
-                    # The in-band drain (or an earlier late-arrival drain)
-                    # already spawned a follow-up task that owns this
-                    # session.  Re-queue the late-arrival event so that
-                    # task picks it up — avoids spawning two concurrent
-                    # _process_message_background tasks for the same key
-                    # (#17758 follow-up: prevents the create_task path
-                    # from racing with itself across the in-band/finally
-                    # boundary).
-                    self._pending_messages[session_key] = late_pending
-                else:
-                    logger.debug(
-                        "[%s] Late-arrival pending message during cleanup — spawning drain task",
-                        self.name,
-                    )
-                    _active = self._active_sessions.get(session_key)
-                    if _active is not None:
-                        _active.clear()
-                    drain_task = asyncio.create_task(
-                        self._process_message_background(late_pending, session_key)
-                    )
-                    # Hand ownership of the session to the drain task so stale-lock
-                    # detection keeps working while it runs.
-                    self._session_tasks[session_key] = drain_task
-                    try:
-                        self._background_tasks.add(drain_task)
-                        drain_task.add_done_callback(self._background_tasks.discard)
-                    except TypeError:
-                        # Tests stub create_task() with non-hashable sentinels; tolerate.
-                        pass
+                logger.debug(
+                    "[%s] Late-arrival pending message during cleanup — spawning drain task",
+                    self.name,
+                )
+                _active = self._active_sessions.get(session_key)
+                if _active is not None:
+                    _active.clear()
+                drain_task = asyncio.create_task(
+                    self._process_message_background(late_pending, session_key)
+                )
+                # Hand ownership of the session to the drain task so stale-lock
+                # detection keeps working while it runs.
+                self._session_tasks[session_key] = drain_task
+                try:
+                    self._background_tasks.add(drain_task)
+                    drain_task.add_done_callback(self._background_tasks.discard)
+                except TypeError:
+                    # Tests stub create_task() with non-hashable sentinels; tolerate.
+                    pass
                # Leave _active_sessions[session_key] populated — the drain
                # task's own lifecycle will clean it up.
            else:
@@ -3084,34 +2661,16 @@ class BasePlatformAdapter(ABC):
                # reset-like command that already swapped in its own
                # command_guard (and cancelled us) can't be accidentally
                # cleared by our unwind.  The command owns the session now.
-                #
-                # The owner-check also covers the in-band drain handoff
-                # above: when we spawned a drain_task and transferred
-                # ownership via ``_session_tasks[session_key] = drain_task``,
-                # ``_session_tasks.get(session_key) is current_task`` is
-                # False, so we leave _active_sessions populated.  Without
-                # this guard, the drain task picks up the same
-                # interrupt_event in its own _process_message_background
-                # entry, _release_session_guard's guard-match succeeds,
-                # and we'd delete the entry while the drain task is still
-                # running — letting a concurrent inbound message pass
-                # the Level-1 guard and spawn a second handler for the
-                # same session.
                current_task = asyncio.current_task()
                if current_task is not None and self._session_tasks.get(session_key) is current_task:
                    del self._session_tasks[session_key]
-                    self._release_session_guard(session_key, guard=interrupt_event)
+                self._release_session_guard(session_key, guard=interrupt_event)
    
    async def cancel_background_tasks(self) -> None:
        """Cancel any in-flight background message-processing tasks.

        Used during gateway shutdown/replacement so active sessions from the old
        process do not keep running after adapters are being torn down.
-
-        Each cancelled task is awaited with a 5s bound so a wedged finally
-        (typing-task cleanup, on_processing_complete hook) can't stall the
-        whole shutdown path.  Stragglers are released from our tracking and
-        allowed to finish unwinding on their own.
        """
        # Loop until no new tasks appear.  Without this, a message
        # arriving during the `await asyncio.gather` below would spawn
@@ -3130,21 +2689,7 @@ class BasePlatformAdapter(ABC):
            for task in tasks:
                self._expected_cancelled_tasks.add(task)
                task.cancel()
-            try:
-                await asyncio.wait_for(
-                    asyncio.gather(
-                        *(asyncio.shield(t) for t in tasks),
-                        return_exceptions=True,
-                    ),
-                    timeout=5.0,
-                )
-            except asyncio.TimeoutError:
-                logger.warning(
-                    "[%s] %d background task(s) did not exit within 5s; "
-                    "releasing tracking and letting them unwind in the background",
-                    self.name, len([t for t in tasks if not t.done()]),
-                )
-                break
+            await asyncio.gather(*tasks, return_exceptions=True)
            # Loop: late-arrival tasks spawned during the gather above
            # will be in self._background_tasks now.  Re-check.
        self._background_tasks.clear()
@@ -18,7 +18,7 @@ import tempfile
 import threading
 import time
 from collections import defaultdict
-from typing import Callable, Dict, List, Optional, Any, Tuple
+from typing import Callable, Dict, Optional, Any

 logger = logging.getLogger(__name__)

@@ -305,7 +305,7 @@ class VoiceReceiver:
        encrypted = bytes(payload_with_nonce[:-4])

        try:
-            import nacl.secret  # noqa: E402 — delayed import, only in voice path
+            import nacl.secret  # noqa: delayed import – only in voice path
            box = nacl.secret.Aead(self._secret_key)
            decrypted = box.decrypt(encrypted, header, bytes(nonce))
        except Exception as e:
@@ -813,14 +813,7 @@ class DiscordAdapter(BasePlatformAdapter):
                logger.info("[%s] Synced %d slash command(s) via bulk tree sync", self.name, len(synced))
                return

-            # Discord's per-app command-management bucket is ~5 writes / 20 s,
-            # so a mass-prune-plus-upsert reconcile (e.g. 77 orphans + 30
-            # desired = 107 writes) takes several minutes of forced waits.
-            # A flat 30 s budget blew up reliably under bucket pressure and
-            # left slash commands broken for ~60 min until the bucket fully
-            # recovered. Use a wide ceiling; the cap still guards against a
-            # true hang. (#16713)
-            summary = await asyncio.wait_for(self._safe_sync_slash_commands(), timeout=600)
+            summary = await asyncio.wait_for(self._safe_sync_slash_commands(), timeout=30)
            logger.info(
                "[%s] Safely reconciled %d slash command(s): unchanged=%d updated=%d recreated=%d created=%d deleted=%d",
                self.name,
@@ -832,11 +825,7 @@ class DiscordAdapter(BasePlatformAdapter):
                summary["deleted"],
            )
        except asyncio.TimeoutError:
-            logger.warning(
-                "[%s] Slash command sync timed out — Discord rate-limit bucket "
-                "may be saturated; will retry on next reconnect",
-                self.name,
-            )
+            logger.warning("[%s] Slash command sync timed out after 30s", self.name)
        except asyncio.CancelledError:
            raise
        except Exception as e:  # pragma: no cover - defensive logging
@@ -1343,134 +1332,6 @@ class DiscordAdapter(BasePlatformAdapter):
            msg = await channel.send(content=caption if caption else None, file=file)
        return SendResult(success=True, message_id=str(msg.id))

-    async def send_multiple_images(
-        self,
-        chat_id: str,
-        images: List[Tuple[str, str]],
-        metadata: Optional[Dict[str, Any]] = None,
-        human_delay: float = 0.0,
-    ) -> None:
-        """Send a batch of images as a single Discord message with multiple attachments.
-
-        Discord permits up to 10 file attachments per message. Batches are
-        chunked accordingly. URL images are downloaded into memory and
-        uploaded as inline attachments (same pattern as ``send_image`` so
-        they render inline, not as bare links). Local files are opened
-        directly. On per-chunk failure the remaining images in that chunk
-        fall back to the base per-image loop.
-        """
-        if not self._client:
-            return
-        if not images:
-            return
-
-        try:
-            import discord as _discord_mod
-            import io as _io
-            from urllib.parse import unquote as _unquote
-        except Exception:  # pragma: no cover
-            await super().send_multiple_images(chat_id, images, metadata, human_delay)
-            return
-
-        try:
-            channel = self._client.get_channel(int(chat_id))
-            if not channel:
-                channel = await self._client.fetch_channel(int(chat_id))
-            if not channel:
-                logger.warning("[%s] Channel %s not found for multi-image send", self.name, chat_id)
-                return
-        except Exception as e:
-            logger.warning("[%s] Failed to resolve channel for multi-image send: %s", self.name, e)
-            await super().send_multiple_images(chat_id, images, metadata, human_delay)
-            return
-
-        CHUNK = 10
-        chunks = [images[i:i + CHUNK] for i in range(0, len(images), CHUNK)]
-
-        for chunk_idx, chunk in enumerate(chunks):
-            if human_delay > 0 and chunk_idx > 0:
-                await asyncio.sleep(human_delay)
-
-            files: List[Any] = []
-            captions: List[str] = []
-            aiohttp_session = None
-            try:
-                for image_url, alt_text in chunk:
-                    if alt_text:
-                        captions.append(alt_text)
-                    if image_url.startswith("file://"):
-                        local_path = _unquote(image_url[7:])
-                        if not os.path.exists(local_path):
-                            logger.warning("[%s] Skipping missing image: %s", self.name, local_path)
-                            continue
-                        files.append(_discord_mod.File(local_path, filename=os.path.basename(local_path)))
-                    else:
-                        if not is_safe_url(image_url):
-                            logger.warning("[%s] Blocked unsafe image URL in batch", self.name)
-                            continue
-                        # Download to BytesIO so it renders inline
-                        try:
-                            import aiohttp as _aiohttp
-                            from gateway.platforms.base import resolve_proxy_url, proxy_kwargs_for_aiohttp
-                            _proxy = resolve_proxy_url(platform_env_var="DISCORD_PROXY")
-                            _sess_kw, _req_kw = proxy_kwargs_for_aiohttp(_proxy)
-                            if aiohttp_session is None:
-                                aiohttp_session = _aiohttp.ClientSession(**_sess_kw)
-                            async with aiohttp_session.get(
-                                image_url, timeout=_aiohttp.ClientTimeout(total=30), **_req_kw,
-                            ) as resp:
-                                if resp.status != 200:
-                                    logger.warning(
-                                        "[%s] Failed to download image (HTTP %d) in batch: %s",
-                                        self.name, resp.status, image_url[:80],
-                                    )
-                                    continue
-                                data = await resp.read()
-                                ct = resp.headers.get("content-type", "image/png")
-                                ext = "png"
-                                if "jpeg" in ct or "jpg" in ct:
-                                    ext = "jpg"
-                                elif "gif" in ct:
-                                    ext = "gif"
-                                elif "webp" in ct:
-                                    ext = "webp"
-                                files.append(_discord_mod.File(_io.BytesIO(data), filename=f"image_{len(files)}.{ext}"))
-                        except Exception as dl_err:
-                            logger.warning("[%s] Download failed for %s: %s", self.name, image_url[:80], dl_err)
-                            continue
-
-                if not files:
-                    continue
-
-                # Use the first caption if any (Discord only has one message body for the group)
-                content = captions[0] if captions else None
-                logger.info(
-                    "[%s] Sending %d image(s) as single Discord message (chunk %d/%d)",
-                    self.name, len(files), chunk_idx + 1, len(chunks),
-                )
-
-                if self._is_forum_parent(channel):
-                    await self._forum_post_file(
-                        channel,
-                        content=(content or "").strip(),
-                        files=files,
-                    )
-                else:
-                    await channel.send(content=content, files=files)
-            except Exception as e:
-                logger.warning(
-                    "[%s] Multi-image Discord send failed (chunk %d/%d), falling back to per-image: %s",
-                    self.name, chunk_idx + 1, len(chunks), e,
-                    exc_info=True,
-                )
-                await super().send_multiple_images(chat_id, chunk, metadata, human_delay=human_delay)
-            finally:
-                if aiohttp_session is not None:
-                    try:
-                        await aiohttp_session.close()
-                    except Exception:
-                        pass
-
    async def play_tts(
        self,
        chat_id: str,
@@ -2398,10 +2259,6 @@ class DiscordAdapter(BasePlatformAdapter):
        async def slash_reload_mcp(interaction: discord.Interaction):
            await self._run_simple_slash(interaction, "/reload-mcp")

-        @tree.command(name="reload-skills", description="Re-scan ~/.hermes/skills/ for new or removed skills")
-        async def slash_reload_skills(interaction: discord.Interaction):
-            await self._run_simple_slash(interaction, "/reload-skills")
-
        @tree.command(name="voice", description="Toggle voice reply mode")
        @discord.app_commands.describe(mode="Voice mode: on, off, tts, channel, leave, or status")
        @discord.app_commands.choices(mode=[
@@ -2851,15 +2708,8 @@ class DiscordAdapter(BasePlatformAdapter):
            raw = os.getenv("DISCORD_FREE_RESPONSE_CHANNELS", "")
        if isinstance(raw, list):
            return {str(part).strip() for part in raw if str(part).strip()}
-        # Coerce non-list scalars (str/int/float) to str before splitting.
-        # YAML parses a bare numeric value such as
-        # `free_response_channels: 1491973769726791812` as int, which was
-        # previously falling through the isinstance(str) branch and silently
-        # returning an empty set.  str() here accepts whatever scalar the YAML
-        # loader hands us without changing existing string/CSV semantics.
-        s = str(raw).strip() if raw is not None else ""
-        if s:
-            return {part.strip() for part in s.split(",") if part.strip()}
+        if isinstance(raw, str) and raw.strip():
+            return {part.strip() for part in raw.split(",") if part.strip()}
        return set()

    def _thread_parent_channel(self, channel: Any) -> Any:
@@ -3045,47 +2895,9 @@ class DiscordAdapter(BasePlatformAdapter):
        except Exception as e:
            return SendResult(success=False, error=str(e))

-    async def send_slash_confirm(
-        self, chat_id: str, title: str, message: str, session_key: str,
-        confirm_id: str, metadata: Optional[dict] = None,
-    ) -> SendResult:
-        """Send a three-button slash-command confirmation prompt."""
-        if not self._client or not DISCORD_AVAILABLE:
-            return SendResult(success=False, error="Not connected")
-
-        try:
-            target_id = chat_id
-            if metadata and metadata.get("thread_id"):
-                target_id = metadata["thread_id"]
-
-            channel = self._client.get_channel(int(target_id))
-            if not channel:
-                channel = await self._client.fetch_channel(int(target_id))
-
-            # Embed description limit is 4096; message usually fits easily.
-            max_desc = 4088
-            body = message if len(message) <= max_desc else message[: max_desc - 3] + "..."
-            embed = discord.Embed(
-                title=title or "Confirm",
-                description=body,
-                color=discord.Color.orange(),
-            )
-
-            view = SlashConfirmView(
-                session_key=session_key,
-                confirm_id=confirm_id,
-                allowed_user_ids=self._allowed_user_ids,
-            )
-
-            msg = await channel.send(embed=embed, view=view)
-            return SendResult(success=True, message_id=str(msg.id))
-        except Exception as e:
-            return SendResult(success=False, error=str(e))
-
    async def send_update_prompt(
        self, chat_id: str, prompt: str, default: str = "",
        session_key: str = "",
-        metadata: Optional[Dict[str, Any]] = None,
    ) -> SendResult:
        """Send an interactive button-based update prompt (Yes / No).

@@ -3095,10 +2907,9 @@ class DiscordAdapter(BasePlatformAdapter):
        if not self._client or not DISCORD_AVAILABLE:
            return SendResult(success=False, error="Not connected")
        try:
-            target_id = metadata.get("thread_id") if metadata and metadata.get("thread_id") else chat_id
-            channel = self._client.get_channel(int(target_id))
+            channel = self._client.get_channel(int(chat_id))
            if not channel:
-                channel = await self._client.fetch_channel(int(target_id))
+                channel = await self._client.fetch_channel(int(chat_id))

            default_hint = f" (default: {default})" if default else ""
            embed = discord.Embed(
@@ -3817,103 +3628,6 @@ if DISCORD_AVAILABLE:
            for child in self.children:
                child.disabled = True

-    class SlashConfirmView(discord.ui.View):
-        """Three-button view for generic slash-command confirmations.
-
-        Used by ``/reload-mcp`` and any future slash command routed through
-        ``GatewayRunner._request_slash_confirm``.  Buttons map to the
-        gateway's three choices:
-
-          * "Approve Once"   → ``choice="once"``
-          * "Always Approve" → ``choice="always"``
-          * "Cancel"         → ``choice="cancel"``
-
-        Clicking calls the module-level
-        ``tools.slash_confirm.resolve(session_key, confirm_id, choice)``
-        which runs the handler the runner stored for this ``session_key``.
-        Only users in the adapter's allowlist can click.  Times out after
-        5 minutes (matches the gateway primitive's timeout).
-        """
-
-        def __init__(self, session_key: str, confirm_id: str, allowed_user_ids: set):
-            super().__init__(timeout=300)
-            self.session_key = session_key
-            self.confirm_id = confirm_id
-            self.allowed_user_ids = allowed_user_ids
-            self.resolved = False
-
-        def _check_auth(self, interaction: discord.Interaction) -> bool:
-            if not self.allowed_user_ids:
-                return True
-            return str(interaction.user.id) in self.allowed_user_ids
-
-        async def _resolve(
-            self, interaction: discord.Interaction, choice: str,
-            color: discord.Color, label: str,
-        ):
-            if self.resolved:
-                await interaction.response.send_message(
-                    "This prompt has already been resolved~", ephemeral=True,
-                )
-                return
-            if not self._check_auth(interaction):
-                await interaction.response.send_message(
-                    "You're not authorized to answer this prompt~", ephemeral=True,
-                )
-                return
-
-            self.resolved = True
-
-            embed = interaction.message.embeds[0] if interaction.message.embeds else None
-            if embed:
-                embed.color = color
-                embed.set_footer(text=f"{label} by {interaction.user.display_name}")
-
-            for child in self.children:
-                child.disabled = True
-
-            await interaction.response.edit_message(embed=embed, view=self)
-
-            # Resolve via the module-level primitive.  If the handler
-            # returns a follow-up message, post it in the same channel.
-            try:
-                from tools import slash_confirm as _slash_confirm_mod
-                result_text = await _slash_confirm_mod.resolve(
-                    self.session_key, self.confirm_id, choice,
-                )
-                if result_text:
-                    await interaction.followup.send(result_text)
-                logger.info(
-                    "Discord button resolved slash-confirm for session %s "
-                    "(choice=%s, user=%s)",
-                    self.session_key, choice, interaction.user.display_name,
-                )
-            except Exception as exc:
-                logger.error("Discord slash-confirm resolve failed: %s", exc, exc_info=True)
-
-        @discord.ui.button(label="Approve Once", style=discord.ButtonStyle.green)
-        async def approve_once(
-            self, interaction: discord.Interaction, button: discord.ui.Button,
-        ):
-            await self._resolve(interaction, "once", discord.Color.green(), "Approved once")
-
-        @discord.ui.button(label="Always Approve", style=discord.ButtonStyle.blurple)
-        async def approve_always(
-            self, interaction: discord.Interaction, button: discord.ui.Button,
-        ):
-            await self._resolve(interaction, "always", discord.Color.purple(), "Always approved")
-
-        @discord.ui.button(label="Cancel", style=discord.ButtonStyle.red)
-        async def cancel(
-            self, interaction: discord.Interaction, button: discord.ui.Button,
-        ):
-            await self._resolve(interaction, "cancel", discord.Color.greyple(), "Cancelled")
-
-        async def on_timeout(self):
-            self.resolved = True
-            for child in self.children:
-                child.disabled = True
-
    class UpdatePromptView(discord.ui.View):
        """Interactive Yes/No buttons for ``hermes update`` prompts.

@@ -31,7 +31,7 @@ from email.mime.base import MIMEBase
 from email.utils import formatdate
 from email import encoders
 from pathlib import Path
-from typing import Any, Dict, List, Optional, Tuple
+from typing import Any, Dict, List, Optional

 from gateway.platforms.base import (
    BasePlatformAdapter,
@@ -540,113 +540,6 @@ class EmailAdapter(BasePlatformAdapter):
        text += f"\n\nImage: {image_url}"
        return await self.send(chat_id, text.strip(), reply_to)

-    async def send_multiple_images(
-        self,
-        chat_id: str,
-        images: List[Tuple[str, str]],
-        metadata: Optional[Dict[str, Any]] = None,
-        human_delay: float = 0.0,
-    ) -> None:
-        """Send a batch of images as a single email with multiple MIME attachments.
-
-        Local files are attached directly. URL images have their URL
-        appended to the body (email adapter does not download remote
-        images). No hard cap — email clients handle dozens of
-        attachments fine, subject to SMTP message size limits.
-        """
-        if not images:
-            return
-
-        from urllib.parse import unquote as _unquote
-
-        body_parts: List[str] = []
-        local_paths: List[str] = []
-        for image_url, alt_text in images:
-            if alt_text:
-                body_parts.append(alt_text)
-            if image_url.startswith("file://"):
-                local_path = _unquote(image_url[7:])
-                if Path(local_path).exists():
-                    local_paths.append(local_path)
-                else:
-                    logger.warning("[Email] Skipping missing image: %s", local_path)
-            else:
-                # Remote URLs just get linked in the body (parity with send_image)
-                body_parts.append(f"Image: {image_url}")
-
-        if not local_paths and not body_parts:
-            return
-
-        body = "\n\n".join(body_parts)
-
-        try:
-            loop = asyncio.get_running_loop()
-            await loop.run_in_executor(
-                None,
-                self._send_email_with_attachments,
-                chat_id,
-                body,
-                local_paths,
-            )
-        except Exception as e:
-            logger.error("[Email] Multi-image send failed, falling back: %s", e, exc_info=True)
-            await super().send_multiple_images(chat_id, images, metadata, human_delay)
-
-    def _send_email_with_attachments(
-        self,
-        to_addr: str,
-        body: str,
-        file_paths: List[str],
-    ) -> str:
-        """Send an email with multiple file attachments via SMTP."""
-        msg = MIMEMultipart()
-        msg["From"] = self._address
-        msg["To"] = to_addr
-
-        ctx = self._thread_context.get(to_addr, {})
-        subject = ctx.get("subject", "Hermes Agent")
-        if not subject.startswith("Re:"):
-            subject = f"Re: {subject}"
-        msg["Subject"] = subject
-
-        original_msg_id = ctx.get("message_id")
-        if original_msg_id:
-            msg["In-Reply-To"] = original_msg_id
-            msg["References"] = original_msg_id
-
-        msg["Date"] = formatdate(localtime=True)
-        msg_id = f"<hermes-{uuid.uuid4().hex[:12]}@{self._address.split('@')[1]}>"
-        msg["Message-ID"] = msg_id
-
-        if body:
-            msg.attach(MIMEText(body, "plain", "utf-8"))
-
-        for file_path in file_paths:
-            p = Path(file_path)
-            try:
-                with open(p, "rb") as f:
-                    part = MIMEBase("application", "octet-stream")
-                    part.set_payload(f.read())
-                    encoders.encode_base64(part)
-                    part.add_header("Content-Disposition", f"attachment; filename={p.name}")
-                    msg.attach(part)
-            except Exception as e:
-                logger.warning("[Email] Failed to attach %s: %s", file_path, e)
-
-        smtp = smtplib.SMTP(self._smtp_host, self._smtp_port, timeout=30)
-        try:
-            smtp.starttls(context=ssl.create_default_context())
-            smtp.login(self._address, self._password)
-            smtp.send_message(msg)
-        finally:
-            try:
-                smtp.quit()
-            except Exception:
-                smtp.close()
-
-        logger.info("[Email] Sent multi-attachment email to %s (%d files)", to_addr, len(file_paths))
-        return msg_id
-
    async def send_document(
        self,
        chat_id: str,
@@ -64,7 +64,7 @@ from dataclasses import dataclass, field
 from datetime import datetime
 from pathlib import Path
 from types import SimpleNamespace
-from typing import Any, Dict, List, Literal, Optional, Sequence
+from typing import Any, Dict, List, Optional, Sequence
 from urllib.error import HTTPError, URLError
 from urllib.parse import urlencode
 from urllib.request import Request, urlopen
@@ -141,7 +141,6 @@ from gateway.platforms.base import (
 )
 from gateway.status import acquire_scoped_lock, release_scoped_lock
 from hermes_constants import get_hermes_home
-from utils import atomic_json_write

 logger = logging.getLogger(__name__)

@@ -388,8 +387,6 @@ class FeishuAdapterSettings:
    admins: frozenset[str] = frozenset()
    default_group_policy: str = ""
    group_rules: Dict[str, FeishuGroupRule] = field(default_factory=dict)
-    allow_bots: str = "none"  # "none" | "mentions" | "all"
-    require_mention: bool = True


@dataclass
@@ -399,7 +396,6 @@ class FeishuGroupRule:
    policy: str  # "open" | "allowlist" | "blacklist" | "admin_only" | "disabled"
    allowlist: set[str] = field(default_factory=set)
    blacklist: set[str] = field(default_factory=set)
-    require_mention: Optional[bool] = None  # None = inherit global


@dataclass
@@ -409,40 +405,6 @@ class FeishuBatchState:
    counts: Dict[str, int] = field(default_factory=dict)


-# ---------------------------------------------------------------------------
-# Admission: policy types
-# ---------------------------------------------------------------------------
-
-
-RejectReason = Literal[
-    "self_echo",
-    "self_ids_unknown",
-    "bots_disabled",
-    "bot_not_mentioned",
-    "group_policy_rejected",
-]
-
-
-def _is_bot_sender(sender: Any) -> bool:
-    # receive_v1 docs say {user, bot}; accept "app" defensively.
-    return getattr(sender, "sender_type", "") in ("bot", "app")
-
-
-def _sender_identity(sender: Any) -> frozenset:
-    # Take any non-empty id variant — tenant sender_id_type decides which are populated.
-    sid = getattr(sender, "sender_id", None)
-    if sid is None:
-        return frozenset()
-    return frozenset(
-        v for v in (
-            getattr(sid, "open_id", None),
-            getattr(sid, "user_id", None),
-            getattr(sid, "union_id", None),
-        )
-        if v
-    )
-
-
 # ---------------------------------------------------------------------------
 # Markdown rendering helpers
 # ---------------------------------------------------------------------------
@@ -1415,16 +1377,10 @@ class FeishuAdapter(BasePlatformAdapter):
            for chat_id, rule_cfg in raw_group_rules.items():
                if not isinstance(rule_cfg, dict):
                    continue
-                # Only override when the key is explicitly set — missing vs false
-                # must not collapse.
-                per_chat_require_mention: Optional[bool] = None
-                if "require_mention" in rule_cfg:
-                    per_chat_require_mention = _to_boolean(rule_cfg.get("require_mention"))
                group_rules[str(chat_id)] = FeishuGroupRule(
                    policy=str(rule_cfg.get("policy", "open")).strip().lower(),
                    allowlist=set(str(u).strip() for u in rule_cfg.get("allowlist", []) if str(u).strip()),
                    blacklist=set(str(u).strip() for u in rule_cfg.get("blacklist", []) if str(u).strip()),
-                    require_mention=per_chat_require_mention,
                )

        # Bot-level admins
@@ -1434,16 +1390,6 @@ class FeishuAdapter(BasePlatformAdapter):
        # Default group policy (for groups not in group_rules)
        default_group_policy = str(extra.get("default_group_policy", "")).strip().lower()

-        # Env-only so adapter and gateway auth bypass share one source; yaml
-        # feishu.allow_bots is bridged to this env var at config load.
-        allow_bots = os.getenv("FEISHU_ALLOW_BOTS", "none").strip().lower()
-        if allow_bots not in ("none", "mentions", "all"):
-            logger.warning(
-                "[Feishu] Unknown allow_bots=%r, falling back to 'none'. Valid: none, mentions, all.",
-                allow_bots,
-            )
-            allow_bots = "none"
-
        return FeishuAdapterSettings(
            app_id=str(extra.get("app_id") or os.getenv("FEISHU_APP_ID", "")).strip(),
            app_secret=str(extra.get("app_secret") or os.getenv("FEISHU_APP_SECRET", "")).strip(),
@@ -1500,10 +1446,6 @@ class FeishuAdapter(BasePlatformAdapter):
            admins=admins,
            default_group_policy=default_group_policy,
            group_rules=group_rules,
-            allow_bots=allow_bots,
-            require_mention=_to_boolean(
-                extra.get("require_mention", os.getenv("FEISHU_REQUIRE_MENTION", "true"))
-            ),
        )

    def _apply_settings(self, settings: FeishuAdapterSettings) -> None:
@@ -1534,8 +1476,6 @@ class FeishuAdapter(BasePlatformAdapter):
        self._ws_reconnect_interval = settings.ws_reconnect_interval
        self._ws_ping_interval = settings.ws_ping_interval
        self._ws_ping_timeout = settings.ws_ping_timeout
-        self._allow_bots = settings.allow_bots
-        self._require_mention = settings.require_mention

    def _build_event_handler(self) -> Any:
        if EventDispatcherHandler is None:
@@ -2249,28 +2189,30 @@ class FeishuAdapter(BasePlatformAdapter):
        event = getattr(data, "event", None)
        message = getattr(event, "message", None)
        sender = getattr(event, "sender", None)
-        if not message or not sender or not getattr(sender, "sender_id", None):
-            logger.debug("[Feishu] Dropping malformed inbound event: missing message/sender")
+        sender_id = getattr(sender, "sender_id", None)
+        if not message or not sender_id:
+            logger.debug("[Feishu] Dropping malformed inbound event: missing message or sender_id")
            return

        message_id = getattr(message, "message_id", None)
        if not message_id or self._is_duplicate(message_id):
            logger.debug("[Feishu] Dropping duplicate/missing message_id: %s", message_id)
            return
-
-        reason = self._admit(sender, message)
-        if reason is not None:
-            logger.debug("[Feishu] dropping inbound event: %s", reason)
+        if self._is_self_sent_bot_message(event):
+            logger.debug("[Feishu] Dropping self-sent bot event: %s", message_id)
            return

        chat_type = getattr(message, "chat_type", "p2p")
+        chat_id = getattr(message, "chat_id", "") or ""
+        if chat_type != "p2p" and not self._should_accept_group_message(message, sender_id, chat_id):
+            logger.debug("[Feishu] Dropping group message that failed mention/policy gate: %s", message_id)
+            return
        await self._process_inbound_message(
            data=data,
            message=message,
-            sender_id=getattr(sender, "sender_id", None),
+            sender_id=sender_id,
            chat_type=chat_type,
            message_id=message_id,
-            is_bot=_is_bot_sender(sender),
        )

    def _on_message_read_event(self, data: P2ImMessageMessageReadV1) -> None:
@@ -2447,11 +2389,10 @@ class FeishuAdapter(BasePlatformAdapter):
            msg = items[0] if items else None
            if not msg:
                return
-            # GET im/v1/messages returns sender.id=app_id for bot messages —
-            # peer bots and us share sender_type="app" but differ on app_id.
            sender = getattr(msg, "sender", None)
-            if str(getattr(sender, "id", "") or "") != self._app_id:
-                return  # only route reactions on this bot's own messages
+            sender_type = str(getattr(sender, "sender_type", "") or "").lower()
+            if sender_type != "app":
+                return  # only route reactions on our own bot messages
            chat_id = str(getattr(msg, "chat_id", "") or "")
            chat_type_raw = str(getattr(msg, "chat_type", "p2p") or "p2p")
            if not chat_id:
@@ -2738,7 +2679,6 @@ class FeishuAdapter(BasePlatformAdapter):
        sender_id: Any,
        chat_type: str,
        message_id: str,
-        is_bot: bool = False,
    ) -> None:
        text, inbound_type, media_urls, media_types, mentions = await self._extract_message_content(message)

@@ -2764,27 +2704,19 @@ class FeishuAdapter(BasePlatformAdapter):
        )
        reply_to_text = await self._fetch_message_text(reply_to_message_id) if reply_to_message_id else None

-        sender_primary = (
-            getattr(sender_id, "open_id", None)
-            or getattr(sender_id, "user_id", None)
-            or getattr(sender_id, "union_id", None)
-            or "<unknown>"
-        )
        logger.info(
-            "[Feishu] Inbound %s message received: id=%s type=%s chat_id=%s sender=%s:%s text=%r media=%d",
+            "[Feishu] Inbound %s message received: id=%s type=%s chat_id=%s text=%r media=%d",
            "dm" if chat_type == "p2p" else "group",
            message_id,
            inbound_type.value,
            getattr(message, "chat_id", "") or "",
-            "bot" if is_bot else "user",
-            sender_primary,
            text[:120],
            len(media_urls),
        )

        chat_id = getattr(message, "chat_id", "") or ""
        chat_info = await self.get_chat_info(chat_id)
-        sender_profile = await self._resolve_sender_profile(sender_id, is_bot=is_bot)
+        sender_profile = await self._resolve_sender_profile(sender_id)
        source = self.build_source(
            chat_id=chat_id,
            chat_name=chat_info.get("name") or chat_id or "Feishu Chat",
@@ -2793,7 +2725,6 @@ class FeishuAdapter(BasePlatformAdapter):
            user_name=sender_profile["user_name"],
            thread_id=getattr(message, "thread_id", None) or None,
            user_id_alt=sender_profile["user_id_alt"],
-            is_bot=is_bot,
        )
        normalized = MessageEvent(
            text=text,
@@ -3516,12 +3447,7 @@ class FeishuAdapter(BasePlatformAdapter):
            return "dm"
        return "group"

-    async def _resolve_sender_profile(
-        self,
-        sender_id: Any,
-        *,
-        is_bot: bool = False,
-    ) -> Dict[str, Optional[str]]:
+    async def _resolve_sender_profile(self, sender_id: Any) -> Dict[str, Optional[str]]:
        """Map Feishu's three-tier user IDs onto Hermes' SessionSource fields.

        Preference order for the primary ``user_id`` field:
@@ -3538,11 +3464,7 @@ class FeishuAdapter(BasePlatformAdapter):
        union_id = getattr(sender_id, "union_id", None) or None
        # Prefer tenant-scoped user_id; fall back to app-scoped open_id.
        primary_id = user_id or open_id
-        # bot/v3/bots/basic_batch only accepts open_id.
-        name_lookup_id = open_id if is_bot else (primary_id or union_id)
-        display_name = await self._resolve_sender_name_from_api(
-            name_lookup_id, is_bot=is_bot,
-        )
+        display_name = await self._resolve_sender_name_from_api(primary_id or union_id)
        return {
            "user_id": primary_id,
            "user_name": display_name,
@@ -3562,14 +3484,11 @@ class FeishuAdapter(BasePlatformAdapter):
        self._sender_name_cache.pop(sender_id, None)
        return None

-    async def _resolve_sender_name_from_api(
-        self,
-        sender_id: Optional[str],
-        *,
-        is_bot: bool = False,
-    ) -> Optional[str]:
-        """Bots divert to bot/basic_batch — contact API doesn't return bot names.
-        Failures are silent so the pipeline never blocks on name resolution.
+    async def _resolve_sender_name_from_api(self, sender_id: Optional[str]) -> Optional[str]:
+        """Fetch the sender's display name from the Feishu contact API with a 10-minute cache.
+
+        ID-type detection mirrors openclaw: ou_ → open_id, on_ → union_id, else user_id.
+        Failures are silently suppressed; the message pipeline must not block on name resolution.
        """
        if not sender_id or not self._client:
            return None
@@ -3579,16 +3498,7 @@ class FeishuAdapter(BasePlatformAdapter):
        now = time.time()
        cached_name = self._get_cached_sender_name(trimmed)
        if cached_name is not None:
-            return cached_name or None  # "" cached means "known nameless"
-        if is_bot:
-            names = await self._fetch_bot_names([trimmed])
-            if names is None:
-                return None
-            expire_at = now + _FEISHU_SENDER_NAME_TTL_SECONDS
-            for oid, name in names.items():
-                self._sender_name_cache[oid] = (name, expire_at)
-            hit = self._sender_name_cache.get(trimmed)
-            return (hit[0] or None) if hit else None
+            return cached_name
        try:
            from lark_oapi.api.contact.v3 import GetUserRequest  # lazy import
            if trimmed.startswith("ou_"):
@@ -3617,35 +3527,6 @@ class FeishuAdapter(BasePlatformAdapter):
            logger.debug("[Feishu] Failed to resolve sender name for %s", sender_id, exc_info=True)
        return None

-    async def _fetch_bot_names(self, bot_ids: List[str]) -> Optional[Dict[str, str]]:
-        if not self._client or not bot_ids:
-            return None
-        try:
-            req = (
-                BaseRequest.builder()
-                .http_method(HttpMethod.GET)
-                .uri("/open-apis/bot/v3/bots/basic_batch")
-                .queries([("bot_ids", oid) for oid in bot_ids])
-                .token_types({AccessTokenType.TENANT})
-                .build()
-            )
-            resp = await asyncio.to_thread(self._client.request, req)
-            content = getattr(getattr(resp, "raw", None), "content", None)
-            if not content:
-                return None
-            payload = json.loads(content)
-            if payload.get("code") != 0:
-                return None
-            bots = (payload.get("data") or {}).get("bots") or {}
-            return {
-                oid: str(info.get("name") or "").strip()
-                for oid, info in bots.items()
-                if oid
-            }
-        except Exception:
-            logger.debug("[Feishu] Failed to fetch bot names for %s", bot_ids, exc_info=True)
-            return None
-
    async def _fetch_message_text(self, message_id: str) -> Optional[str]:
        if not self._client or not message_id:
            return None
@@ -3709,60 +3590,10 @@ class FeishuAdapter(BasePlatformAdapter):
            logger.exception("[Feishu] Background inbound processing failed")

    # =========================================================================
-    # Inbound admission
+    # Group policy and mention gating
    # =========================================================================

-    def _admit(self, sender: Any, message: Any) -> Optional[RejectReason]:
-        sender_ids = _sender_identity(sender)
-        self_ids = frozenset(v for v in (self._bot_open_id, self._bot_user_id) if v)
-        is_bot = _is_bot_sender(sender)
-        is_group = getattr(message, "chat_type", "p2p") != "p2p"
-        chat_id = getattr(message, "chat_id", "") or ""
-        require_mention = is_group and self._require_mention_for(chat_id)
-
-        # Defensive only — Feishu doesn't echo our outbound back as inbound,
-        # and open_id is always populated on both sides.
-        if self_ids and sender_ids & self_ids:
-            return "self_echo"
-
-        if is_bot:
-            mode = self._allow_bots
-            if mode != "mentions" and mode != "all":
-                return "bots_disabled"
-            # Defensive: pre-hydration or malformed payloads.
-            if not self_ids or not sender_ids:
-                return "self_ids_unknown"
-            # Step 4 covers mention enforcement for groups when require_mention
-            # is on; check here only on paths step 4 won't reach.
-            if mode == "mentions" and not require_mention and not self._mentions_self(message):
-                return "bot_not_mentioned"
-
-        if not is_group:
-            return None
-
-        if not self._allow_group_message(
-            getattr(sender, "sender_id", None), chat_id, is_bot=is_bot,
-        ):
-            return "group_policy_rejected"
-        if require_mention and not self._mentions_self(message):
-            return "group_policy_rejected"
-        return None
-
-    def _require_mention_for(self, chat_id: str) -> bool:
-        rule = self._group_rules.get(chat_id) if chat_id else None
-        if rule and rule.require_mention is not None:
-            return rule.require_mention
-        return self._require_mention
-
-    # --- Group policy ---------------------------------------------------------
-
-    def _allow_group_message(
-        self,
-        sender_id: Any,
-        chat_id: str = "",
-        *,
-        is_bot: bool = False,
-    ) -> bool:
+    def _allow_group_message(self, sender_id: Any, chat_id: str = "") -> bool:
        """Per-group policy gate for non-DM traffic."""
        sender_open_id = getattr(sender_id, "open_id", None)
        sender_user_id = getattr(sender_id, "user_id", None)
@@ -3781,17 +3612,12 @@ class FeishuAdapter(BasePlatformAdapter):
            allowlist = self._allowed_group_users
            blacklist = set()

-        # Channel locks apply to everyone; allowlist/blacklist only gate humans
-        # (bots were already cleared upstream by FEISHU_ALLOW_BOTS).
        if policy == "disabled":
            return False
        if policy == "open":
            return True
        if policy == "admin_only":
            return False
-        if is_bot:
-            return True
-
        if policy == "allowlist":
            return bool(sender_ids and (sender_ids & allowlist))
        if policy == "blacklist":
@@ -3799,16 +3625,17 @@ class FeishuAdapter(BasePlatformAdapter):

        return bool(sender_ids and (sender_ids & self._allowed_group_users))

-    # --- Mention detection ----------------------------------------------------
-
-    def _mentions_self(self, message: Any) -> bool:
-        # @_all is Feishu's @everyone placeholder.
+    def _should_accept_group_message(self, message: Any, sender_id: Any, chat_id: str = "") -> bool:
+        """Require an explicit @mention before group messages enter the agent."""
+        if not self._allow_group_message(sender_id, chat_id):
+            return False
+        # @_all is Feishu's @everyone placeholder — always route to the bot.
        raw_content = getattr(message, "content", "") or ""
        if "@_all" in raw_content:
            return True
        mentions = getattr(message, "mentions", None) or []
-        if mentions and self._message_mentions_bot(mentions):
-            return True
+        if mentions:
+            return self._message_mentions_bot(mentions)
        normalized = normalize_feishu_message(
            message_type=getattr(message, "message_type", "") or "",
            raw_content=raw_content,
@@ -3817,6 +3644,23 @@ class FeishuAdapter(BasePlatformAdapter):
        )
        return self._post_mentions_bot(normalized.mentions)

+    def _is_self_sent_bot_message(self, event: Any) -> bool:
+        """Return True only for Feishu events emitted by this Hermes bot."""
+        sender = getattr(event, "sender", None)
+        sender_type = str(getattr(sender, "sender_type", "") or "").strip().lower()
+        if sender_type not in {"bot", "app"}:
+            return False
+
+        sender_id = getattr(sender, "sender_id", None)
+        sender_open_id = str(getattr(sender_id, "open_id", "") or "").strip()
+        sender_user_id = str(getattr(sender_id, "user_id", "") or "").strip()
+
+        if self._bot_open_id and sender_open_id == self._bot_open_id:
+            return True
+        if self._bot_user_id and sender_user_id == self._bot_user_id:
+            return True
+        return False
+
    def _message_mentions_bot(self, mentions: List[Any]) -> bool:
        # IDs trump names: when both sides have open_id (or both user_id),
        # match requires equal IDs. Name fallback only when either side
@@ -3960,7 +3804,7 @@ class FeishuAdapter(BasePlatformAdapter):
            recent = self._seen_message_order[-self._dedup_cache_size:]
            # Save as {msg_id: timestamp} so TTL filtering works across restarts.
            payload = {"message_ids": {k: self._seen_message_ids[k] for k in recent if k in self._seen_message_ids}}
-            atomic_json_write(self._dedup_state_path, payload, indent=None)
+            self._dedup_state_path.write_text(json.dumps(payload, ensure_ascii=False), encoding="utf-8")
        except OSError:
            logger.warning("[Feishu] Failed to persist dedup state to %s", self._dedup_state_path, exc_info=True)

@@ -974,6 +974,7 @@ def build_whole_comment_prompt(

 def _resolve_model_and_runtime() -> Tuple[str, dict]:
    """Resolve model and provider credentials, same as gateway message handling."""
+    import os
    from gateway.run import _load_gateway_config, _resolve_gateway_model

    user_config = _load_gateway_config()
@@ -11,12 +11,10 @@ import logging
 import re
 import time
 from pathlib import Path
-from typing import TYPE_CHECKING, Dict
-
-from utils import atomic_json_write
+from typing import TYPE_CHECKING, Dict, Optional

 if TYPE_CHECKING:
-    from gateway.platforms.base import MessageEvent
+    from gateway.platforms.base import BasePlatformAdapter, MessageEvent

 logger = logging.getLogger(__name__)

@@ -239,11 +237,12 @@ class ThreadParticipationTracker:

    def _save(self) -> None:
        path = self._state_path()
+        path.parent.mkdir(parents=True, exist_ok=True)
        thread_list = list(self._threads)
        if len(thread_list) > self._max_tracked:
            thread_list = thread_list[-self._max_tracked:]
            self._threads = set(thread_list)
-        atomic_json_write(path, thread_list, indent=None)
+        path.write_text(json.dumps(thread_list), encoding="utf-8")

    def mark(self, thread_id: str) -> None:
        """Mark *thread_id* as participated and persist."""
@@ -11,7 +11,6 @@ Environment variables:
    MATRIX_PASSWORD             Password (alternative to access token)
    MATRIX_ENCRYPTION           Set "true" to enable E2EE
    MATRIX_DEVICE_ID            Stable device ID for E2EE persistence across restarts
-    MATRIX_PROXY                HTTP(S) or SOCKS proxy URL for Matrix traffic
    MATRIX_ALLOWED_USERS    Comma-separated Matrix user IDs (@user:server)
    MATRIX_HOME_ROOM        Room ID for cron/notification delivery
    MATRIX_REACTIONS        Set "false" to disable processing lifecycle reactions
@@ -19,7 +18,6 @@ Environment variables:
    MATRIX_REQUIRE_MENTION      Require @mention in rooms (default: true)
    MATRIX_FREE_RESPONSE_ROOMS  Comma-separated room IDs exempt from mention requirement
    MATRIX_AUTO_THREAD          Auto-create threads for room messages (default: true)
-    MATRIX_DM_AUTO_THREAD       Auto-create threads for DM messages (default: false)
    MATRIX_RECOVERY_KEY         Recovery key for cross-signing verification after device key rotation
    MATRIX_DM_MENTION_THREADS   Create a thread when bot is @mentioned in a DM (default: false)
 """
@@ -32,8 +30,6 @@ import mimetypes
 import os
 import re
 import time
-from dataclasses import dataclass
-
 from html import escape as _html_escape
 from pathlib import Path
 from typing import Any, Dict, Optional, Set
@@ -99,25 +95,11 @@ from gateway.platforms.base import (
    MessageType,
    ProcessingOutcome,
    SendResult,
-    resolve_proxy_url,
-    proxy_kwargs_for_aiohttp,
 )
 from gateway.platforms.helpers import ThreadParticipationTracker

 logger = logging.getLogger(__name__)

-
-@dataclass
-class _MatrixApprovalPrompt:
-    """Tracks a pending Matrix reaction-based exec approval prompt."""
-
-    def __init__(self, session_key: str, chat_id: str, message_id: str, resolved: bool = False):
-        self.session_key = session_key
-        self.chat_id = chat_id
-        self.message_id = message_id
-        self.resolved = resolved
-        self.bot_reaction_events: dict[str, str] = {}  # emoji -> event_id
-
 # Matrix message size limit (4000 chars practical, spec has no hard limit
 # but clients render poorly above this).
 MAX_MESSAGE_LENGTH = 4000
@@ -132,85 +114,11 @@ _CRYPTO_DB_PATH = _STORE_DIR / "crypto.db"
 # Grace period: ignore messages older than this many seconds before startup.
 _STARTUP_GRACE_SECONDS = 5

-_OUTBOUND_MENTION_RE = re.compile(
-    r"(?<![\w/])(@[0-9A-Za-z._=/-]+:[0-9A-Za-z.-]+(?::\d+)?)"
-)

 _E2EE_INSTALL_HINT = (
    "Install with: pip install 'mautrix[encryption]'  (requires libolm C library)"
 )

-_MATRIX_IMAGE_FILENAME_EXTS = frozenset({
-    ".jpg",
-    ".jpeg",
-    ".png",
-    ".gif",
-    ".webp",
-    ".bmp",
-    ".svg",
-    ".heic",
-    ".heif",
-    ".avif",
-})
-
-
-def _looks_like_matrix_image_filename(text: str) -> bool:
-    """Return True when Matrix image body text is probably just a transport filename.
-
-    Matrix ``m.image`` events commonly populate ``content.body`` with the uploaded
-    filename when the user did not add a caption. Treating that raw filename as
-    user-authored text confuses downstream vision enrichment.
-    """
-    candidate = str(text or "").strip()
-    if not candidate or "\n" in candidate or candidate.endswith("/"):
-        return False
-
-    name = Path(candidate).name
-    if not name or name != candidate:
-        return False
-
-    suffix = Path(name).suffix.lower()
-    if not suffix:
-        return False
-
-    guessed_type, _ = mimetypes.guess_type(name)
-    if guessed_type and guessed_type.startswith("image/"):
-        return True
-    return suffix in _MATRIX_IMAGE_FILENAME_EXTS
-
-
-def _create_matrix_session(proxy_url: str | None):
-    """Create an ``aiohttp.ClientSession`` whose proxy applies to *all* requests.
-
-    mautrix's ``HTTPAPI._send()`` calls ``session.request()`` without forwarding
-    per-request ``proxy=`` kwargs.  For HTTP(S) proxies we use aiohttp's native
-    ``proxy=`` session parameter which sets a default for every request.  For SOCKS
-    we use ``aiohttp_socks.ProxyConnector`` (connector-level).
-    When no proxy is configured we enable ``trust_env`` so standard env vars
-    (``HTTP_PROXY`` / ``HTTPS_PROXY``) are honoured automatically.
-    """
-    import aiohttp
-
-    if not proxy_url:
-        return aiohttp.ClientSession(trust_env=True)
-
-    if proxy_url.split("://")[0].lower().startswith("socks"):
-        try:
-            from aiohttp_socks import ProxyConnector
-
-            return aiohttp.ClientSession(
-                connector=ProxyConnector.from_url(proxy_url, rdns=True),
-            )
-        except ImportError:
-            logger.warning(
-                "aiohttp_socks not installed — SOCKS proxy %s ignored. "
-                "Run: pip install aiohttp-socks",
-                proxy_url,
-            )
-            return aiohttp.ClientSession(trust_env=True)
-
-    return aiohttp.ClientSession(proxy=proxy_url)
-

 def _check_e2ee_deps() -> bool:
    """Return True if mautrix E2EE dependencies (python-olm) are available."""
@@ -352,9 +260,6 @@ class MatrixAdapter(BasePlatformAdapter):
            "1",
            "yes",
        )
-        self._dm_auto_thread: bool = os.getenv(
-            "MATRIX_DM_AUTO_THREAD", "false"
-        ).lower() in ("true", "1", "yes")
        self._dm_mention_threads: bool = os.getenv(
            "MATRIX_DM_MENTION_THREADS", "false"
        ).lower() in ("true", "1", "yes")
@@ -365,11 +270,6 @@ class MatrixAdapter(BasePlatformAdapter):
        ).lower() not in ("false", "0", "no")
        self._pending_reactions: dict[tuple[str, str], str] = {}

-        # Proxy support — resolve once at init, reuse for all HTTP traffic.
-        self._proxy_url: str | None = resolve_proxy_url(platform_env_var="MATRIX_PROXY")
-        if self._proxy_url:
-            logger.info("Matrix: proxy configured — %s", self._proxy_url)
-
        # Text batching: merge rapid successive messages (Telegram-style).
        # Matrix clients split long messages around 4000 chars.
        self._text_batch_delay_seconds = float(
@@ -381,18 +281,6 @@ class MatrixAdapter(BasePlatformAdapter):
        self._pending_text_batches: Dict[str, MessageEvent] = {}
        self._pending_text_batch_tasks: Dict[str, asyncio.Task] = {}

-        # Matrix reaction-based dangerous command approvals.
-        self._approval_reaction_map = {
-            "✅": "once",
-            "❎": "deny",
-        }
-        self._approval_prompts_by_event: Dict[str, _MatrixApprovalPrompt] = {}
-        self._approval_prompt_by_session: Dict[str, str] = {}
-        allowed_users_raw = os.getenv("MATRIX_ALLOWED_USERS", "")
-        self._allowed_user_ids: Set[str] = {
-            u.strip() for u in allowed_users_raw.split(",") if u.strip()
-        }
-
    def _is_duplicate_event(self, event_id) -> bool:
        """Return True if this event was already processed. Tracks the ID otherwise."""
        if not event_id:
@@ -438,7 +326,7 @@ class MatrixAdapter(BasePlatformAdapter):
                    )
                    return False
        except Exception as exc:
-            logger.error("Matrix: post-upload key verification failed: %s", exc, exc_info=True)
+            logger.error("Matrix: post-upload key verification failed: %s", exc)
            return False
        return True

@@ -454,7 +342,6 @@ class MatrixAdapter(BasePlatformAdapter):
            logger.error(
                "Matrix: cannot verify device keys on server: %s — refusing E2EE",
                exc,
-                exc_info=True,
            )
            return False

@@ -469,7 +356,7 @@ class MatrixAdapter(BasePlatformAdapter):
            try:
                await olm.share_keys()
            except Exception as exc:
-                logger.error("Matrix: failed to re-upload device keys: %s", exc, exc_info=True)
+                logger.error("Matrix: failed to re-upload device keys: %s", exc)
                return False
            return await self._reverify_keys_after_upload(client, local_ed25519)

@@ -509,7 +396,6 @@ class MatrixAdapter(BasePlatformAdapter):
                    "Try generating a new access token to get a fresh device.",
                    client.device_id,
                    exc,
-                    exc_info=True,
                )
                return False
            return await self._reverify_keys_after_upload(client, local_ed25519)
@@ -534,11 +420,9 @@ class MatrixAdapter(BasePlatformAdapter):
        _STORE_DIR.mkdir(parents=True, exist_ok=True)

        # Create the HTTP API layer.
-        client_session = _create_matrix_session(self._proxy_url)
        api = HTTPAPI(
            base_url=self._homeserver,
            token=self._access_token or "",
-            client_session=client_session,
        )

        # Create the client.
@@ -581,7 +465,6 @@ class MatrixAdapter(BasePlatformAdapter):
                logger.error(
                    "Matrix: whoami failed — check MATRIX_ACCESS_TOKEN and MATRIX_HOMESERVER: %s",
                    exc,
-                    exc_info=True,
                )
                await api.session.close()
                return False
@@ -724,44 +607,6 @@ class MatrixAdapter(BasePlatformAdapter):
                        logger.warning(
                            "Matrix: recovery key verification failed: %s", exc
                        )
-                else:
-                    # No recovery key — bootstrap cross-signing if the bot
-                    # has none yet. Without this, Element shows "Encrypted
-                    # by a device not verified by its owner" on every
-                    # message from this bot, indefinitely. mautrix's
-                    # generate_recovery_key does the full flow: generates
-                    # MSK/SSK/USK, uploads private keys to SSSS, publishes
-                    # public keys to the homeserver, and signs the current
-                    # device with the new SSK. Some homeservers require UIA
-                    # for /keys/device_signing/upload — those will need an
-                    # alternate path; Continuwuity and Synapse-with-shared-
-                    # secret accept the unauthenticated upload.
-                    try:
-                        own_xsign = await olm.get_own_cross_signing_public_keys()
-                    except Exception as exc:
-                        own_xsign = None
-                        logger.warning(
-                            "Matrix: cross-signing key lookup failed: %s", exc
-                        )
-                    if own_xsign is None:
-                        try:
-                            new_recovery_key = await olm.generate_recovery_key()
-                            logger.warning(
-                                "Matrix: bootstrapped cross-signing for %s. "
-                                "SAVE THIS RECOVERY KEY — set "
-                                "MATRIX_RECOVERY_KEY for future restarts so "
-                                "the bot can re-sign its device after key "
-                                "rotation: %s",
-                                client.mxid,
-                                new_recovery_key,
-                            )
-                        except Exception as exc:
-                            logger.warning(
-                                "Matrix: cross-signing bootstrap failed "
-                                "(non-fatal — Element will show 'not "
-                                "verified by its owner'): %s",
-                                exc,
-                            )

                client.crypto = olm
                logger.info(
@@ -819,7 +664,6 @@ class MatrixAdapter(BasePlatformAdapter):
                        await asyncio.gather(*tasks)
                except Exception as exc:
                    logger.warning("Matrix: initial sync event dispatch error: %s", exc)
-                await self._join_pending_invites(sync_data)
            else:
                logger.warning(
                    "Matrix: initial sync returned unexpected type %s",
@@ -883,8 +727,17 @@ class MatrixAdapter(BasePlatformAdapter):
        chunks = self.truncate_message(formatted, MAX_MESSAGE_LENGTH)

        last_event_id = None
-        for i, chunk in enumerate(chunks):
-            msg_content = self._build_text_message_content(chunk)
+        for chunk in chunks:
+            msg_content: Dict[str, Any] = {
+                "msgtype": "m.text",
+                "body": chunk,
+            }
+
+            # Convert markdown to HTML for rich rendering.
+            html = self._markdown_to_html(chunk)
+            if html and html != chunk:
+                msg_content["format"] = "org.matrix.custom.html"
+                msg_content["formatted_body"] = html

            # Reply-to support.
            if reply_to:
@@ -991,21 +844,25 @@ class MatrixAdapter(BasePlatformAdapter):
        """Edit an existing message (via m.replace)."""

        formatted = self.format_message(content)
-        new_content = self._build_text_message_content(formatted)
        msg_content: Dict[str, Any] = {
            "msgtype": "m.text",
            "body": f"* {formatted}",
-            "m.new_content": new_content,
+            "m.new_content": {
+                "msgtype": "m.text",
+                "body": formatted,
+            },
+            "m.relates_to": {
+                "rel_type": "m.replace",
+                "event_id": message_id,
+            },
        }
-        if "m.mentions" in new_content:
-            msg_content["m.mentions"] = new_content["m.mentions"]
-        if "formatted_body" in new_content:
+
+        html = self._markdown_to_html(formatted)
+        if html and html != formatted:
+            msg_content["m.new_content"]["format"] = "org.matrix.custom.html"
+            msg_content["m.new_content"]["formatted_body"] = html
            msg_content["format"] = "org.matrix.custom.html"
-            msg_content["formatted_body"] = f'* {new_content["formatted_body"]}'
-        msg_content["m.relates_to"] = {
-            "rel_type": "m.replace",
-            "event_id": message_id,
-        }
+            msg_content["formatted_body"] = f"* {html}"

        try:
            event_id = await self._client.send_message_event(
@@ -1038,12 +895,10 @@ class MatrixAdapter(BasePlatformAdapter):
            # Try aiohttp first (always available), fall back to httpx
            try:
                import aiohttp as _aiohttp
-                _sess_kw, _req_kw = proxy_kwargs_for_aiohttp(self._proxy_url)
-                async with _aiohttp.ClientSession(**_sess_kw) as http:
+
+                async with _aiohttp.ClientSession(trust_env=True) as http:
                    async with http.get(
-                        image_url,
-                        timeout=_aiohttp.ClientTimeout(total=30),
-                        **_req_kw,
+                        image_url, timeout=_aiohttp.ClientTimeout(total=30)
                    ) as resp:
                        resp.raise_for_status()
                        data = await resp.read()
@@ -1053,10 +908,8 @@ class MatrixAdapter(BasePlatformAdapter):
                        )
            except ImportError:
                import httpx
-                _httpx_kw: dict = {}
-                if self._proxy_url:
-                    _httpx_kw["proxy"] = self._proxy_url
-                async with httpx.AsyncClient(**_httpx_kw) as http:
+
+                async with httpx.AsyncClient() as http:
                    resp = await http.get(image_url, follow_redirects=True, timeout=30)
                    resp.raise_for_status()
                    data = resp.content
@@ -1131,56 +984,6 @@ class MatrixAdapter(BasePlatformAdapter):
            chat_id, video_path, "m.video", caption, reply_to, metadata=metadata
        )

-    async def send_exec_approval(
-        self,
-        chat_id: str,
-        command: str,
-        session_key: str,
-        description: str = "dangerous command",
-        metadata: Optional[dict] = None,
-    ) -> SendResult:
-        """Send a reaction-based exec approval prompt for Matrix."""
-        if not self._client:
-            return SendResult(success=False, error="Not connected")
-
-        cmd_preview = command[:2000] + "..." if len(command) > 2000 else command
-        text = (
-            "⚠️ **Dangerous command requires approval**\n"
-            f"```\n{cmd_preview}\n```\n"
-            f"Reason: {description}\n\n"
-            "Reply `/approve` to execute, `/approve session` to approve this pattern for the session, "
-            "`/approve always` to approve permanently, or `/deny` to cancel.\n\n"
-            "You can also click the reaction to approve:\n"
-            "✅ = /approve\n"
-            "❎ = /deny"
-        )
-
-        result = await self.send(chat_id, text, metadata=metadata)
-        if not result.success or not result.message_id:
-            return result
-
-        prompt = _MatrixApprovalPrompt(
-            session_key=session_key,
-            chat_id=chat_id,
-            message_id=result.message_id,
-        )
-        old_event = self._approval_prompt_by_session.get(session_key)
-        if old_event:
-            self._approval_prompts_by_event.pop(old_event, None)
-        self._approval_prompts_by_event[result.message_id] = prompt
-        self._approval_prompt_by_session[session_key] = result.message_id
-
-        for emoji in ("✅", "❎"):
-            try:
-                reaction_result = await self._send_reaction(chat_id, result.message_id, emoji)
-                # Save the bot's reaction event_id for later cleanup
-                if reaction_result:
-                    prompt.bot_reaction_events[emoji] = str(reaction_result)
-            except Exception as exc:
-                logger.debug("Matrix: failed to add approval reaction %s: %s", emoji, exc)
-
-        return result
-
    def format_message(self, content: str) -> str:
        """Pass-through — Matrix supports standard Markdown natively."""
        # Strip image markdown; media is uploaded separately.
@@ -1312,15 +1115,9 @@ class MatrixAdapter(BasePlatformAdapter):
        next_batch = await client.sync_store.get_next_batch()
        while not self._closing:
            try:
-                # Wrap in asyncio.wait_for to guard against TCP-level hangs
-                # that the Matrix long-poll timeout cannot catch. Long-poll
-                # is 30s, so 45s gives 15s slack for network drain.
-                sync_data = await asyncio.wait_for(
-                    client.sync(
-                        since=next_batch,
-                        timeout=30000,
-                    ),
-                    timeout=45.0,
+                sync_data = await client.sync(
+                    since=next_batch,
+                    timeout=30000,
                )

                # nio returns SyncError objects (not exceptions) for auth
@@ -1356,7 +1153,6 @@ class MatrixAdapter(BasePlatformAdapter):
                            await asyncio.gather(*tasks)
                    except Exception as exc:
                        logger.warning("Matrix: sync event dispatch error: %s", exc)
-                    await self._join_pending_invites(sync_data)

            except asyncio.CancelledError:
                return
@@ -1443,15 +1239,6 @@ class MatrixAdapter(BasePlatformAdapter):
        room_id = str(getattr(event, "room_id", ""))
        sender = str(getattr(event, "sender", ""))

-        # Diagnostic: confirm the callback is firing at all when DEBUG is on.
-        # Helps users troubleshoot silent inbound issues like #5819, #7914, #12614.
-        logger.debug(
-            "Matrix: callback fired — event %s from %s in %s",
-            getattr(event, "event_id", "?"),
-            sender,
-            room_id,
-        )
-
        # Ignore own messages (case-insensitive; also drops when our own
        # user_id hasn't been resolved yet — see _is_self_sender docstring
        # and issue #15763).
@@ -1563,12 +1350,6 @@ class MatrixAdapter(BasePlatformAdapter):
            in_bot_thread = bool(thread_id and thread_id in self._threads)
            if self._require_mention and not is_free_room and not in_bot_thread:
                if not is_mentioned:
-                    logger.debug(
-                        "Matrix: ignoring message %s in %s — no @mention "
-                        "(set MATRIX_REQUIRE_MENTION=false to disable)",
-                        event_id,
-                        room_id,
-                    )
                    return None

        # DM mention-thread.
@@ -1581,7 +1362,7 @@ class MatrixAdapter(BasePlatformAdapter):
            body = self._strip_mention(body)

        # Auto-thread.
-        if not thread_id and ((not is_dm and self._auto_thread) or (is_dm and self._dm_auto_thread)):
+        if not is_dm and not thread_id and self._auto_thread:
            thread_id = event_id
            self._threads.mark(thread_id)

@@ -1823,9 +1604,6 @@ class MatrixAdapter(BasePlatformAdapter):
            return
        body, is_dm, chat_type, thread_id, display_name, source = ctx

-        if msgtype == "m.image" and _looks_like_matrix_image_filename(body):
-            body = ""
-
        allow_http_fallback = bool(http_url) and not is_encrypted_media
        media_urls = (
            [cached_path]
@@ -1855,35 +1633,13 @@ class MatrixAdapter(BasePlatformAdapter):
            "Matrix: invited to %s — joining",
            room_id,
        )
-        await self._join_room_by_id(room_id)
-
-    async def _join_room_by_id(self, room_id: str) -> bool:
-        """Join a room by ID and refresh local caches on success."""
-        if not room_id:
-            return False
-        if room_id in self._joined_rooms:
-            return True
        try:
            await self._client.join_room(RoomID(room_id))
            self._joined_rooms.add(room_id)
            logger.info("Matrix: joined %s", room_id)
            await self._refresh_dm_cache()
-            return True
        except Exception as exc:
            logger.warning("Matrix: error joining %s: %s", room_id, exc)
-            return False
-
-    async def _join_pending_invites(self, sync_data: Dict[str, Any]) -> None:
-        """Join rooms still present in rooms.invite after sync processing."""
-        rooms = sync_data.get("rooms", {}) if isinstance(sync_data, dict) else {}
-        invites = rooms.get("invite", {})
-        if not isinstance(invites, dict):
-            return
-        for room_id in invites:
-            if room_id in self._joined_rooms:
-                continue
-            logger.info("Matrix: reconciling pending invite for %s", room_id)
-            await self._join_room_by_id(str(room_id))

    # ------------------------------------------------------------------
    # Reactions (send, receive, processing lifecycle)
@@ -1998,51 +1754,6 @@ class MatrixAdapter(BasePlatformAdapter):
                room_id,
            )

-            # Check if this reaction resolves a pending approval prompt.
-            prompt = self._approval_prompts_by_event.get(reacts_to)
-            if prompt and not prompt.resolved:
-                if room_id != prompt.chat_id:
-                    return
-                if self._allowed_user_ids and sender not in self._allowed_user_ids:
-                    logger.info(
-                        "Matrix: ignoring approval reaction from unauthorized user %s on %s",
-                        sender, reacts_to,
-                    )
-                    return
-                choice = self._approval_reaction_map.get(key)
-                if not choice:
-                    return
-                try:
-                    from tools.approval import resolve_gateway_approval
-
-                    count = resolve_gateway_approval(prompt.session_key, choice)
-                    if count:
-                        prompt.resolved = True
-                        self._approval_prompts_by_event.pop(reacts_to, None)
-                        self._approval_prompt_by_session.pop(prompt.session_key, None)
-                        logger.info(
-                            "Matrix reaction resolved %d approval(s) for session %s "
-                            "(choice=%s, user=%s)",
-                            count, prompt.session_key, choice, sender,
-                        )
-                        # Redact bot's seed reactions, leaving only the user's
-                        await self._redact_bot_approval_reactions(room_id, prompt)
-                except Exception as exc:
-                    logger.error("Failed to resolve gateway approval from Matrix reaction: %s", exc)
-
-    async def _redact_bot_approval_reactions(
-        self,
-        room_id: str,
-        prompt: "_MatrixApprovalPrompt",
-    ) -> None:
-        """Redact the bot's seed ✅/❎ reactions, leaving only the user's reaction."""
-        for emoji, evt_id in prompt.bot_reaction_events.items():
-            try:
-                await self.redact_message(room_id, evt_id, "approval resolved")
-                logger.debug("Matrix: redacted bot reaction %s (%s)", emoji, evt_id)
-            except Exception as exc:
-                logger.debug("Matrix: failed to redact bot reaction %s: %s", emoji, exc)
-
    # ------------------------------------------------------------------
    # Text message aggregation (handles Matrix client-side splits)
    # ------------------------------------------------------------------
@@ -2268,7 +1979,11 @@ class MatrixAdapter(BasePlatformAdapter):
        if not self._client or not text:
            return SendResult(success=False, error="No client or empty text")

-        msg_content = self._build_text_message_content(text, msgtype=msgtype)
+        msg_content: Dict[str, Any] = {"msgtype": msgtype, "body": text}
+        html = self._markdown_to_html(text)
+        if html and html != text:
+            msg_content["format"] = "org.matrix.custom.html"
+            msg_content["formatted_body"] = html

        try:
            event_id = await self._client.send_message_event(
@@ -2331,77 +2046,6 @@ class MatrixAdapter(BasePlatformAdapter):
    # Mention detection helpers
    # ------------------------------------------------------------------

-    def _build_text_message_content(self, text: str, msgtype: str = "m.text") -> Dict[str, Any]:
-        """Build Matrix text content with HTML and outbound mention metadata."""
-        msg_content: Dict[str, Any] = {"msgtype": msgtype, "body": text}
-        mention_user_ids = self._extract_outbound_mentions(text)
-        if mention_user_ids:
-            msg_content["m.mentions"] = {"user_ids": mention_user_ids}
-
-        html_source = self._inject_outbound_mention_links(text)
-        html = self._markdown_to_html(html_source)
-        if html and html != text:
-            msg_content["format"] = "org.matrix.custom.html"
-            msg_content["formatted_body"] = html
-
-        return msg_content
-
-    def _extract_outbound_mentions(self, text: str) -> list[str]:
-        """Return unique Matrix user IDs mentioned in outbound text."""
-        protected, _ = self._protect_outbound_mention_regions(text)
-        seen: Set[str] = set()
-        mentions: list[str] = []
-        for match in _OUTBOUND_MENTION_RE.finditer(protected):
-            user_id = match.group(1)
-            if user_id not in seen:
-                seen.add(user_id)
-                mentions.append(user_id)
-        return mentions
-
-    def _inject_outbound_mention_links(self, text: str) -> str:
-        """Wrap outbound Matrix mentions in markdown links outside code spans."""
-        if not text:
-            return text
-
-        protected, placeholders = self._protect_outbound_mention_regions(text)
-
-        linked = _OUTBOUND_MENTION_RE.sub(
-            lambda match: f"[{match.group(1)}](https://matrix.to/#/{match.group(1)})",
-            protected,
-        )
-
-        for idx, original in enumerate(placeholders):
-            linked = linked.replace(f"\x00MENTION_PROTECTED{idx}\x00", original)
-
-        return linked
-
-    def _protect_outbound_mention_regions(self, text: str) -> tuple[str, list[str]]:
-        """Protect markdown regions where outbound mentions should stay literal."""
-        placeholders: list[str] = []
-
-        def _protect(fragment: str) -> str:
-            idx = len(placeholders)
-            placeholders.append(fragment)
-            return f"\x00MENTION_PROTECTED{idx}\x00"
-
-        protected = re.sub(
-            r"```[\s\S]*?```",
-            lambda match: _protect(match.group(0)),
-            text or "",
-        )
-        protected = re.sub(
-            r"`[^`\n]+`",
-            lambda match: _protect(match.group(0)),
-            protected,
-        )
-        protected = re.sub(
-            r"\[[^\]]+\]\([^)]+\)",
-            lambda match: _protect(match.group(0)),
-            protected,
-        )
-
-        return protected, placeholders
-
    def _is_bot_mentioned(
        self,
        body: str,
@@ -2436,33 +2080,13 @@ class MatrixAdapter(BasePlatformAdapter):
        return False

    def _strip_mention(self, body: str) -> str:
-        """Remove explicit bot mentions from message body.
+        """Strip the bot's full MXID (``@user:server``) from *body*.

-        Important: only strip explicit mention tokens (``@user:server`` or
-        ``@localpart``). Do NOT strip bare words matching the bot localpart,
-        otherwise normal phrases like "Hermes Agent" become "Agent".
+        The bare localpart is intentionally *not* stripped — it would
+        mangle file paths like ``/home/hermes/media/file.png``.
        """
-        if not body:
-            return ""
-
-        # Strip explicit full MXID mentions.
        if self._user_id:
            body = body.replace(self._user_id, "")
-
-        # Strip explicit @localpart mentions only (not bare localpart words).
-        if self._user_id and ":" in self._user_id:
-            localpart = self._user_id.split(":")[0].lstrip("@")
-            if localpart:
-                body = re.sub(
-                    r'(?<![\w])@' + re.escape(localpart) + r'\b',
-                    '',
-                    body,
-                    flags=re.IGNORECASE,
-                )
-
-        # Normalize spacing after mention removal.
-        body = re.sub(r'[ \t]{2,}', ' ', body)
-        body = re.sub(r'\s+([,.;:!?])', r'\1', body)
        return body.strip()

    async def _get_display_name(self, room_id: str, user_id: str) -> str:
@@ -19,7 +19,7 @@ import logging
 import os
 import re
 from pathlib import Path
-from typing import Any, Dict, List, Optional, Tuple
+from typing import Any, Dict, List, Optional

 from gateway.config import Platform, PlatformConfig
 from gateway.platforms.helpers import MessageDeduplicator
@@ -412,6 +412,7 @@ class MattermostAdapter(BasePlatformAdapter):

        import aiohttp

+        last_exc = None
        file_data = None
        ct = "application/octet-stream"
        fname = url.rsplit("/", 1)[-1].split("?")[0] or f"{kind}.png"
@@ -496,100 +497,6 @@ class MattermostAdapter(BasePlatformAdapter):
            return SendResult(success=False, error="Failed to post with file")
        return SendResult(success=True, message_id=data["id"])

-    async def send_multiple_images(
-        self,
-        chat_id: str,
-        images: List[Tuple[str, str]],
-        metadata: Optional[Dict[str, Any]] = None,
-        human_delay: float = 0.0,
-    ) -> None:
-        """Send a batch of images as a single Mattermost post with multiple attachments.
-
-        Mattermost supports up to 5 ``file_ids`` per post. Each image is
-        uploaded individually (Mattermost's file API is one-at-a-time),
-        then a single post is created referencing all uploaded file_ids
-        at once. Batches larger than 5 are chunked. Falls back to the
-        base per-image loop on total failure.
-        """
-        if not images:
-            return
-
-        import mimetypes
-        import aiohttp
-        from urllib.parse import unquote as _unquote
-
-        CHUNK = 5  # Mattermost post file_ids cap
-        chunks = [images[i:i + CHUNK] for i in range(0, len(images), CHUNK)]
-
-        for chunk_idx, chunk in enumerate(chunks):
-            if human_delay > 0 and chunk_idx > 0:
-                await asyncio.sleep(human_delay)
-
-            file_ids: List[str] = []
-            caption_parts: List[str] = []
-            try:
-                for image_url, alt_text in chunk:
-                    if alt_text:
-                        caption_parts.append(alt_text)
-
-                    if image_url.startswith("file://"):
-                        local_path = _unquote(image_url[7:])
-                        p = Path(local_path)
-                        if not p.exists():
-                            logger.warning("Mattermost: skipping missing image %s", local_path)
-                            continue
-                        fname = p.name
-                        ct = mimetypes.guess_type(fname)[0] or "image/png"
-                        file_data = p.read_bytes()
-                    else:
-                        from tools.url_safety import is_safe_url
-                        if not is_safe_url(image_url):
-                            logger.warning("Mattermost: blocked unsafe image URL in batch")
-                            continue
-                        try:
-                            async with self._session.get(
-                                image_url, timeout=aiohttp.ClientTimeout(total=30)
-                            ) as resp:
-                                if resp.status >= 400:
-                                    logger.warning(
-                                        "Mattermost: failed to download image (HTTP %d): %s",
-                                        resp.status, image_url[:80],
-                                    )
-                                    continue
-                                file_data = await resp.read()
-                                ct = resp.content_type or "image/png"
-                        except Exception as dl_err:
-                            logger.warning("Mattermost: download failed for %s: %s", image_url[:80], dl_err)
-                            continue
-                        fname = image_url.rsplit("/", 1)[-1].split("?")[0] or f"image_{len(file_ids)}.png"
-
-                    fid = await self._upload_file(chat_id, file_data, fname, ct)
-                    if fid:
-                        file_ids.append(fid)
-
-                if not file_ids:
-                    continue
-
-                payload: Dict[str, Any] = {
-                    "channel_id": chat_id,
-                    "message": "\n".join(caption_parts),
-                    "file_ids": file_ids,
-                }
-                logger.info(
-                    "Mattermost: sending %d image(s) as single post (chunk %d/%d)",
-                    len(file_ids), chunk_idx + 1, len(chunks),
-                )
-                data = await self._api_post("posts", payload)
-                if not data or "id" not in data:
-                    logger.warning("Mattermost: multi-image post failed, falling back")
-                    await super().send_multiple_images(chat_id, chunk, metadata, human_delay=human_delay)
-            except Exception as e:
-                logger.warning(
-                    "Mattermost: multi-image send failed (chunk %d/%d), falling back: %s",
-                    chunk_idx + 1, len(chunks), e, exc_info=True,
-                )
-                await super().send_multiple_images(chat_id, chunk, metadata, human_delay=human_delay)
-
    # ------------------------------------------------------------------
    # WebSocket
    # ------------------------------------------------------------------
@@ -976,18 +976,6 @@ class QQAdapter(BasePlatformAdapter):
        if not channel_id:
            return

-        # Apply group_policy ACL — guild channels are group-like contexts.
-        # Without this check any member of any guild the bot is in could
-        # bypass the configured allowlist.
-        guild_id = str(d.get("guild_id", ""))
-        author_id = str(author.get("id", ""))
-        if not self._is_group_allowed(guild_id or channel_id, author_id):
-            logger.debug(
-                "[%s] Guild message blocked by ACL: channel=%s user=%s",
-                self._log_tag, channel_id, author_id,
-            )
-            return
-
        member = d.get("member") if isinstance(d.get("member"), dict) else {}
        nick = str(member.get("nick", "")) or str(author.get("username", ""))

@@ -1044,17 +1032,6 @@ class QQAdapter(BasePlatformAdapter):
        if not guild_id:
            return

-        # Apply dm_policy ACL — guild DMs were previously unauthenticated.
-        # Without this check any member of any guild the bot is in could
-        # bypass the configured allowlist via direct messages.
-        author_id = str(author.get("id", ""))
-        if not self._is_dm_allowed(author_id):
-            logger.debug(
-                "[%s] Guild DM blocked by ACL: guild=%s user=%s",
-                self._log_tag, guild_id, author_id,
-            )
-            return
-
        text = content
        att_result = await self._process_attachments(d.get("attachments"))
        image_urls = att_result["image_urls"]
@@ -1980,7 +1957,7 @@ class QQAdapter(BasePlatformAdapter):
            self, openid: str, content: str, reply_to: Optional[str] = None
    ) -> SendResult:
        """Send text to a C2C user via REST API."""
-        self._next_msg_seq(reply_to or openid)
+        msg_seq = self._next_msg_seq(reply_to or openid)
        body = self._build_text_body(content, reply_to)
        if reply_to:
            body["msg_id"] = reply_to
@@ -1993,7 +1970,7 @@ class QQAdapter(BasePlatformAdapter):
            self, group_openid: str, content: str, reply_to: Optional[str] = None
    ) -> SendResult:
        """Send text to a group via REST API."""
-        self._next_msg_seq(reply_to or group_openid)
+        msg_seq = self._next_msg_seq(reply_to or group_openid)
        body = self._build_text_body(content, reply_to)
        if reply_to:
            body["msg_id"] = reply_to
@@ -2158,6 +2135,11 @@ class QQAdapter(BasePlatformAdapter):

            # Route
            chat_type = self._guess_chat_type(chat_id)
+            target_path = (
+                f"/v2/users/{chat_id}/files"
+                if chat_type == "c2c"
+                else f"/v2/groups/{chat_id}/files"
+            )

            if chat_type == "guild":
                # Guild channels don't support native media upload in the same way
@@ -21,7 +21,7 @@ import time
 import uuid
 from datetime import datetime, timezone
 from pathlib import Path
-from typing import Any, Dict, List, Optional, Tuple
+from typing import Dict, List, Optional, Any
 from urllib.parse import quote, unquote

 import httpx
@@ -31,7 +31,6 @@ from gateway.platforms.base import (
    BasePlatformAdapter,
    MessageEvent,
    MessageType,
-    ProcessingOutcome,
    SendResult,
    cache_image_from_bytes,
    cache_audio_from_bytes,
@@ -39,17 +38,6 @@ from gateway.platforms.base import (
    cache_image_from_url,
 )
 from gateway.platforms.helpers import redact_phone
-from gateway.platforms.signal_rate_limit import (
-    SIGNAL_BATCH_PACING_NOTICE_THRESHOLD,
-    SIGNAL_MAX_ATTACHMENTS_PER_MSG,
-    SIGNAL_RATE_LIMIT_MAX_ATTEMPTS,
-    SignalRateLimitError,
-    _extract_retry_after_seconds,
-    _format_wait,
-    _is_signal_rate_limit_error,
-    _signal_send_timeout,
-    get_scheduler,
-)

 logger = logging.getLogger(__name__)

@@ -64,7 +52,6 @@ SSE_RETRY_DELAY_MAX = 60.0
 HEALTH_CHECK_INTERVAL = 30.0  # seconds between health checks
 HEALTH_CHECK_STALE_THRESHOLD = 120.0  # seconds without SSE activity before concern

-
 # ---------------------------------------------------------------------------
 # Helpers
 # ---------------------------------------------------------------------------
@@ -175,10 +162,6 @@ class SignalAdapter(BasePlatformAdapter):
    """Signal messenger adapter using signal-cli HTTP daemon."""

    platform = Platform.SIGNAL
-    # Signal has no real edit API for already-sent messages. Mark it explicitly
-    # so streaming suppresses the visible cursor instead of leaving a stale tofu
-    # square behind in chat clients when edit attempts fail.
-    SUPPORTS_MESSAGE_EDITING = False

    def __init__(self, config: PlatformConfig):
        super().__init__(config, Platform.SIGNAL)
@@ -505,11 +488,6 @@ class SignalAdapter(BasePlatformAdapter):
        if text and mentions:
            text = _render_mentions(text, mentions)

-        # Extract quote (reply-to) context from Signal dataMessage
-        quote_data = data_message.get("quote") or {}
-        reply_to_id = str(quote_data.get("id")) if quote_data.get("id") else None
-        reply_to_text = quote_data.get("text")
-
        # Process attachments
        attachments_data = data_message.get("attachments", [])
        media_urls = []
@@ -534,18 +512,6 @@ class SignalAdapter(BasePlatformAdapter):
                except Exception:
                    logger.exception("Signal: failed to fetch attachment %s", att_id)

-        # Skip envelopes with no meaningful content (no text, no attachments).
-        # Catches profile key updates, empty messages, and other metadata-only
-        # envelopes that still carry a dataMessage wrapper but have nothing
-        # worth processing. See issue: signal-cli logs "Profile key update" +
-        # Hermes receives msg='' triggering a full agent turn for nothing.
-        if (not text or not text.strip()) and not media_urls:
-            logger.debug(
-                "Signal: skipping contentless envelope from %s (%d attachments)",
-                redact_phone(sender), len(media_urls) if media_urls else 0,
-            )
-            return
-
        # Build session source
        source = self.build_source(
            chat_id=chat_id,
@@ -575,9 +541,7 @@ class SignalAdapter(BasePlatformAdapter):
        else:
            timestamp = datetime.now(tz=timezone.utc)

-        # Build and dispatch event.
-        # Store raw envelope data in raw_message so on_processing_start/complete
-        # can extract targetAuthor + targetTimestamp for sendReaction.
+        # Build and dispatch event
        event = MessageEvent(
            source=source,
            text=text or "",
@@ -585,9 +549,6 @@ class SignalAdapter(BasePlatformAdapter):
            media_urls=media_urls,
            media_types=media_types,
            timestamp=timestamp,
-            raw_message={"sender": sender, "timestamp_ms": ts_ms},
-            reply_to_message_id=reply_to_id,
-            reply_to_text=reply_to_text,
        )

        logger.debug("Signal: message from %s in %s: %s",
@@ -698,8 +659,6 @@ class SignalAdapter(BasePlatformAdapter):
        rpc_id: str = None,
        *,
        log_failures: bool = True,
-        raise_on_rate_limit: bool = False,
-        timeout: float = 30.0,
    ) -> Any:
        """Send a JSON-RPC 2.0 request to signal-cli daemon.

@@ -708,11 +667,6 @@ class SignalAdapter(BasePlatformAdapter):
        repeated NETWORK_FAILURE spam for unreachable recipients while
        still preserving visibility for the first occurrence and for
        unrelated RPCs.
-
-        When ``raise_on_rate_limit=True``, a Signal ``[429]`` /
-        ``RateLimitException`` response raises ``SignalRateLimitError``
-        instead of being swallowed — lets callers (multi-attachment send)
-        opt into backoff-retry without changing default behaviour.
        """
        if not self.client:
            logger.warning("Signal: RPC called but client not connected")
@@ -732,28 +686,20 @@ class SignalAdapter(BasePlatformAdapter):
            resp = await self.client.post(
                f"{self.http_url}/api/v1/rpc",
                json=payload,
-                timeout=timeout,
+                timeout=30.0,
            )
            resp.raise_for_status()
            data = resp.json()

            if "error" in data:
-                err = data["error"]
-                if raise_on_rate_limit:
-                    if _is_signal_rate_limit_error(err):
-                        err_msg = str(err.get("message", "")) if isinstance(err, dict) else str(err)
-                        retry_after = _extract_retry_after_seconds(err)
-                        raise SignalRateLimitError(err_msg, retry_after=retry_after)
                if log_failures:
-                    logger.warning("Signal RPC error (%s): %s", method, err)
+                    logger.warning("Signal RPC error (%s): %s", method, data["error"])
                else:
-                    logger.debug("Signal RPC error (%s): %s", method, err)
+                    logger.debug("Signal RPC error (%s): %s", method, data["error"])
                return None

            return data.get("result")

-        except SignalRateLimitError:
-            raise
        except Exception as e:
            if log_failures:
                logger.warning("Signal RPC %s failed: %s", method, e)
@@ -761,159 +707,6 @@ class SignalAdapter(BasePlatformAdapter):
                logger.debug("Signal RPC %s failed: %s", method, e)
            return None

-    # ------------------------------------------------------------------
-    # Formatting — markdown → Signal body ranges
-    # ------------------------------------------------------------------
-
-    @staticmethod
-    def _markdown_to_signal(text: str) -> tuple:
-        """Convert markdown to plain text + Signal textStyles list.
-
-        Signal doesn't render markdown.  Instead it uses ``bodyRanges``
-        (exposed by signal-cli as ``textStyle`` / ``textStyles`` params)
-        with the format ``start:length:STYLE``.
-
-        Positions are measured in **UTF-16 code units** (not Python code
-        points) because that's what the Signal protocol uses.
-
-        Supported styles: BOLD, ITALIC, STRIKETHROUGH, MONOSPACE.
-        (Signal's SPOILER style is not currently mapped — no standard
-        markdown syntax for it; would need ``||spoiler||`` parsing.)
-
-        Returns ``(plain_text, styles_list)`` where *styles_list* may be
-        empty if there's nothing to format.
-        """
-        import re
-
-        def _utf16_len(s: str) -> int:
-            """Length of *s* in UTF-16 code units."""
-            return len(s.encode("utf-16-le")) // 2
-
-        # Pre-process: normalize whitespace before any position tracking
-        # so later operations don't invalidate recorded offsets.
-        text = re.sub(r"\n{3,}", "\n\n", text)
-        text = text.strip()
-
-        styles: list = []
-
-        # --- Phase 1: fenced code blocks  ```...``` → MONOSPACE ---
-        _CB = re.compile(r"```[a-zA-Z0-9_+-]*\n?(.*?)```", re.DOTALL)
-        while m := _CB.search(text):
-            inner = m.group(1).rstrip("\n")
-            start = m.start()
-            text = text[: m.start()] + inner + text[m.end() :]
-            styles.append((start, len(inner), "MONOSPACE"))
-
-        # --- Phase 2: heading markers  # Foo → Foo (BOLD) ---
-        _HEADING = re.compile(r"^#{1,6}\s+", re.MULTILINE)
-        new_text = ""
-        last_end = 0
-        for m in _HEADING.finditer(text):
-            new_text += text[last_end : m.start()]
-            last_end = m.end()
-            eol = text.find("\n", m.end())
-            if eol == -1:
-                eol = len(text)
-            heading_text = text[m.end() : eol]
-            start = len(new_text)
-            new_text += heading_text
-            styles.append((start, len(heading_text), "BOLD"))
-            last_end = eol
-        new_text += text[last_end:]
-        text = new_text
-
-        # --- Phase 3: inline patterns (single-pass to avoid offset drift) ---
-        # The old code processed each pattern sequentially, stripping markers
-        # and recording positions per-pass.  Later passes shifted text without
-        # adjusting earlier positions → bold/italic landed mid-word.
-        #
-        # Fix: collect ALL non-overlapping matches first, then strip every
-        # marker in one pass so positions are computed against the final text.
-        _PATTERNS = [
-            (re.compile(r"\*\*(.+?)\*\*", re.DOTALL), "BOLD"),
-            (re.compile(r"__(.+?)__", re.DOTALL), "BOLD"),
-            (re.compile(r"~~(.+?)~~", re.DOTALL), "STRIKETHROUGH"),
-            (re.compile(r"`(.+?)`"), "MONOSPACE"),
-            (re.compile(r"(?<!\*)\*(?!\*| )(.+?)(?<!\*)\*(?!\*)"), "ITALIC"),
-            (re.compile(r"(?<!\w)_(?!_)(.+?)(?<!_)_(?!\w)"), "ITALIC"),
-        ]
-
-        # Collect all non-overlapping matches (earlier patterns win ties).
-        all_matches: list = []  # (start, end, g1_start, g1_end, style)
-        occupied: list = []     # (start, end) intervals already claimed
-        for pat, style in _PATTERNS:
-            for m in pat.finditer(text):
-                ms, me = m.start(), m.end()
-                if not any(ms < oe and me > os for os, oe in occupied):
-                    all_matches.append((ms, me, m.start(1), m.end(1), style))
-                    occupied.append((ms, me))
-        all_matches.sort()
-
-        # Build removal list so we can adjust Phase 1/2 styles.
-        # Each match removes its prefix markers (start..g1_start) and
-        # suffix markers (g1_end..end).
-        removals: list = []  # (position, length) sorted
-        for ms, me, g1s, g1e, _ in all_matches:
-            if g1s > ms:
-                removals.append((ms, g1s - ms))
-            if me > g1e:
-                removals.append((g1e, me - g1e))
-        removals.sort()
-
-        # Adjust Phase 1/2 styles for characters about to be removed.
-        def _adj(pos: int) -> int:
-            shift = 0
-            for rp, rl in removals:
-                if rp < pos:
-                    shift += min(rl, pos - rp)
-                else:
-                    break
-            return pos - shift
-
-        adjusted_prior: list = []
-        for s, l, st in styles:
-            ns = _adj(s)
-            ne = _adj(s + l)
-            if ne > ns:
-                adjusted_prior.append((ns, ne - ns, st))
-
-        # Strip all inline markers in one pass → positions are correct.
-        result = ""
-        last_end = 0
-        inline_styles: list = []
-        for ms, me, g1s, g1e, sty in all_matches:
-            result += text[last_end:ms]
-            pos = len(result)
-            inner = text[g1s:g1e]
-            result += inner
-            inline_styles.append((pos, len(inner), sty))
-            last_end = me
-        result += text[last_end:]
-        text = result
-
-        styles = adjusted_prior + inline_styles
-
-        # Convert code-point offsets → UTF-16 code-unit offsets
-        style_strings = []
-        for cp_start, cp_len, stype in sorted(styles):
-            # Safety: skip any out-of-bounds styles
-            if cp_start < 0 or cp_start + cp_len > len(text):
-                continue
-            u16_start = _utf16_len(text[:cp_start])
-            u16_len = _utf16_len(text[cp_start : cp_start + cp_len])
-            style_strings.append(f"{u16_start}:{u16_len}:{stype}")
-
-        return text, style_strings
-
-    def format_message(self, content: str) -> str:
-        """Strip markdown for plain-text fallback (used by base class).
-
-        The actual rich formatting happens in send() via _markdown_to_signal().
-        """
-        # This is only called if someone uses the base-class send path.
-        # Our send() override bypasses this entirely.
-        return content
-
    # ------------------------------------------------------------------
    # Sending
    # ------------------------------------------------------------------
@@ -925,22 +718,14 @@ class SignalAdapter(BasePlatformAdapter):
        reply_to: Optional[str] = None,
        metadata: Optional[Dict[str, Any]] = None,
    ) -> SendResult:
-        """Send a text message with native Signal formatting."""
+        """Send a text message."""
        await self._stop_typing_indicator(chat_id)

-        plain_text, text_styles = self._markdown_to_signal(content)
-
        params: Dict[str, Any] = {
            "account": self.account,
-            "message": plain_text,
+            "message": content,
        }

-        if text_styles:
-            if len(text_styles) == 1:
-                params["textStyle"] = text_styles[0]
-            else:
-                params["textStyles"] = text_styles
-
        if chat_id.startswith("group:"):
            params["groupId"] = chat_id[6:]
        else:
@@ -950,10 +735,11 @@ class SignalAdapter(BasePlatformAdapter):

        if result is not None:
            self._track_sent_timestamp(result)
-            # Signal has no editable message identifier. Returning None keeps the
-            # stream consumer on the non-edit fallback path instead of pretending
-            # future edits can remove an in-progress cursor from the chat thread.
-            return SendResult(success=True, message_id=None)
+            # Use the timestamp from the RPC result as a pseudo message_id.
+            # Signal doesn't have real message IDs, but the stream consumer
+            # needs a truthy value to follow its edit→fallback path correctly.
+            _msg_id = str(result.get("timestamp", "")) if isinstance(result, dict) else None
+            return SendResult(success=True, message_id=_msg_id or None)
        return SendResult(success=False, error="RPC send failed")

    def _track_sent_timestamp(self, rpc_result) -> None:
@@ -1017,178 +803,6 @@ class SignalAdapter(BasePlatformAdapter):
            self._typing_failures.pop(chat_id, None)
            self._typing_skip_until.pop(chat_id, None)

-    async def send_multiple_images(
-        self,
-        chat_id: str,
-        images: List[Tuple[str, str]],
-        metadata: Optional[Dict[str, Any]] = None,
-        human_delay: float = 0.0,
-    ) -> None:
-        """Send a batch of images via chunked Signal RPC calls.
-
-        Per-image alt texts are dropped — Signal's send RPC only carries
-        one shared message body. Bad images (download failure, missing
-        file, oversize) are skipped with a warning so one bad URL
-        doesn't lose the rest of the batch. ``human_delay`` is ignored:
-        the rate-limit scheduler handles inter-batch pacing.
-        """
-        if not images:
-            return
-
-        scheduler = get_scheduler()
-        logger.info(
-            "Signal send_multiple_images: received %d image(s) for %s — "
-            "scheduler state: %s",
-            len(images), chat_id[:30], scheduler.state(),
-        )
-
-        await self._stop_typing_indicator(chat_id)
-
-        attachments: List[str] = []
-        skipped_download = 0
-        skipped_missing = 0
-        skipped_oversize = 0
-        for image_url, _alt_text in images:
-            if image_url.startswith("file://"):
-                file_path = unquote(image_url[7:])
-            else:
-                try:
-                    file_path = await cache_image_from_url(image_url)
-                except Exception as e:
-                    logger.warning("Signal: failed to download image %s: %s", image_url, e)
-                    skipped_download += 1
-                    continue
-
-            if not file_path or not Path(file_path).exists():
-                logger.warning("Signal: image file not found for %s", image_url)
-                skipped_missing += 1
-                continue
-
-            file_size = Path(file_path).stat().st_size
-            if file_size > SIGNAL_MAX_ATTACHMENT_SIZE:
-                logger.warning(
-                    "Signal: image too large (%d bytes), skipping %s", file_size, image_url
-                )
-                skipped_oversize += 1
-                continue
-
-            attachments.append(file_path)
-
-        if not attachments:
-            logger.error(
-                "Signal: no valid images in batch of %d "
-                "(download=%d missing=%d oversize=%d)",
-                len(images), skipped_download, skipped_missing, skipped_oversize,
-            )
-            return
-
-        logger.info(
-            "Signal send_multiple_images: %d/%d images valid, sending in chunks",
-            len(attachments), len(images),
-        )
-
-        base_params: Dict[str, Any] = {
-            "account": self.account,
-            "message": "",
-        }
-        if chat_id.startswith("group:"):
-            base_params["groupId"] = chat_id[6:]
-        else:
-            base_params["recipient"] = [await self._resolve_recipient(chat_id)]
-
-        att_batches = [
-            attachments[i:i + SIGNAL_MAX_ATTACHMENTS_PER_MSG]
-            for i in range(0, len(attachments), SIGNAL_MAX_ATTACHMENTS_PER_MSG)
-        ]
-
-        for idx, att_batch in enumerate(att_batches):
-            n = len(att_batch)
-            estimated = scheduler.estimate_wait(n)
-            logger.debug(
-                "Signal batch %d/%d: %d attachments, estimated wait=%.1fs",
-                idx + 1, len(att_batches), n, estimated,
-            )
-            if estimated >= SIGNAL_BATCH_PACING_NOTICE_THRESHOLD:
-                await self._notify_batch_pacing(
-                    chat_id, idx + 1, len(att_batches), estimated
-                )
-
-            params = dict(base_params, attachments=att_batch)
-            send_timeout = _signal_send_timeout(n)
-
-            for attempt in range(1, SIGNAL_RATE_LIMIT_MAX_ATTEMPTS + 1):
-                await scheduler.acquire(n)
-                try:
-                    _rpc_t0 = time.monotonic()
-                    result = await self._rpc(
-                        "send", params, raise_on_rate_limit=True, timeout=send_timeout,
-                    )
-                    _rpc_duration = time.monotonic() - _rpc_t0
-                    if result is not None:
-                        self._track_sent_timestamp(result)
-                        await scheduler.report_rpc_duration(_rpc_duration, n)
-                        logger.info(
-                            "Signal batch %d/%d: %d attachments sent in %.1fs "
-                            "(attempt %d/%d)",
-                            idx + 1, len(att_batches), n, _rpc_duration,
-                            attempt, SIGNAL_RATE_LIMIT_MAX_ATTEMPTS,
-                        )
-                    else:
-                        # Assume the server didn't accept the batch, don't deduce tokens
-                        logger.error(
-                            "Signal: RPC send failed for batch %d/%d (%d attachments, "
-                            "attempt %d/%d, rpc_duration=%.1fs)",
-                            idx + 1, len(att_batches), n,
-                            attempt, SIGNAL_RATE_LIMIT_MAX_ATTEMPTS,
-                            _rpc_duration,
-                        )
-                        # Retry transient (non-rate-limit) failures once
-                        if attempt < SIGNAL_RATE_LIMIT_MAX_ATTEMPTS:
-                            backoff = 2.0 ** attempt
-                            logger.info(
-                                "Signal: retrying batch %d/%d after %.1fs backoff",
-                                idx + 1, len(att_batches), backoff,
-                            )
-                            await asyncio.sleep(backoff)
-                            continue
-                    break
-                except SignalRateLimitError as e:
-                    scheduler.feedback(e.retry_after, n)
-                    if attempt >= SIGNAL_RATE_LIMIT_MAX_ATTEMPTS:
-                        logger.error(
-                            "Signal: rate-limit retries exhausted on batch %d/%d "
-                            "(%d attachments lost, server retry_after=%s)",
-                            idx + 1, len(att_batches), n,
-                            f"{e.retry_after:.0f}s" if e.retry_after else "unknown",
-                        )
-                        break
-                    logger.warning(
-                        "Signal: rate-limited on batch %d/%d "
-                        "(attempt %d/%d, server retry_after=%s); "
-                        "scheduler will pace the retry",
-                        idx + 1, len(att_batches),
-                        attempt, SIGNAL_RATE_LIMIT_MAX_ATTEMPTS,
-                        f"{e.retry_after:.0f}s" if e.retry_after else "unknown",
-                    )
-
-    async def _notify_batch_pacing(
-        self,
-        chat_id: str,
-        next_batch_idx: int,
-        total_batches: int,
-        wait_s: float,
-    ) -> None:
-        """Inform the user when an inter-batch pacing wait crosses the
-        notice threshold. Best-effort; logs and continues on failure."""
-        try:
-            await self.send(
-                chat_id,
-                f"(More images coming — pausing ~{_format_wait(wait_s)} "
-                f"for Signal rate limit, batch {next_batch_idx}/{total_batches}.)",
-            )
-        except Exception as e:
-            logger.warning("Signal: failed to send pacing notice: %s", e)
-
    async def send_image(
        self,
        chat_id: str,
@@ -1349,110 +963,6 @@ class SignalAdapter(BasePlatformAdapter):
        _keep_typing finally block to clean up platform-level typing tasks."""
        await self._stop_typing_indicator(chat_id)

-    # ------------------------------------------------------------------
-    # Reactions
-    # ------------------------------------------------------------------
-
-    async def send_reaction(
-        self,
-        chat_id: str,
-        emoji: str,
-        target_author: str,
-        target_timestamp: int,
-    ) -> bool:
-        """Send a reaction emoji to a specific message via signal-cli RPC.
-
-        Args:
-            chat_id: The chat (phone number or "group:<id>")
-            emoji: Reaction emoji string (e.g. "👀", "✅")
-            target_author: Phone number / UUID of the message author
-            target_timestamp: Signal timestamp (ms) of the message to react to
-        """
-        params: Dict[str, Any] = {
-            "account": self.account,
-            "emoji": emoji,
-            "targetAuthor": target_author,
-            "targetTimestamp": target_timestamp,
-        }
-
-        if chat_id.startswith("group:"):
-            params["groupId"] = chat_id[6:]
-        else:
-            params["recipient"] = [chat_id]
-
-        result = await self._rpc("sendReaction", params)
-        if result is not None:
-            return True
-        logger.debug("Signal: sendReaction failed (chat=%s, emoji=%s)", chat_id[:20], emoji)
-        return False
-
-    async def remove_reaction(
-        self,
-        chat_id: str,
-        target_author: str,
-        target_timestamp: int,
-    ) -> bool:
-        """Remove a reaction by sending an empty-string emoji."""
-        params: Dict[str, Any] = {
-            "account": self.account,
-            "emoji": "",
-            "targetAuthor": target_author,
-            "targetTimestamp": target_timestamp,
-            "remove": True,
-        }
-
-        if chat_id.startswith("group:"):
-            params["groupId"] = chat_id[6:]
-        else:
-            params["recipient"] = [chat_id]
-
-        result = await self._rpc("sendReaction", params)
-        return result is not None
-
-    # ------------------------------------------------------------------
-    # Processing Lifecycle Hooks (reactions as progress indicators)
-    # ------------------------------------------------------------------
-
-    def _extract_reaction_target(self, event: MessageEvent) -> Optional[tuple]:
-        """Extract (target_author, target_timestamp) from a MessageEvent.
-
-        Returns None if the event doesn't carry the raw Signal envelope data
-        needed for sendReaction.
-        """
-        raw = event.raw_message
-        if not isinstance(raw, dict):
-            return None
-        author = raw.get("sender")
-        ts = raw.get("timestamp_ms")
-        if not author or not ts:
-            return None
-        return (author, ts)
-
-    async def on_processing_start(self, event: MessageEvent) -> None:
-        """React with 👀 when processing begins."""
-        target = self._extract_reaction_target(event)
-        if target:
-            await self.send_reaction(event.source.chat_id, "👀", *target)
-
-    async def on_processing_complete(self, event: MessageEvent, outcome: "ProcessingOutcome") -> None:
-        """Swap the 👀 reaction for ✅ (success) or ❌ (failure).
-
-        On CANCELLED we leave the 👀 in place — no terminal outcome means
-        the reaction should keep reflecting "in progress" (matches Telegram).
-        """
-        if outcome == ProcessingOutcome.CANCELLED:
-            return
-        target = self._extract_reaction_target(event)
-        if not target:
-            return
-        chat_id = event.source.chat_id
-        # Remove the in-progress reaction, then add the final one
-        await self.remove_reaction(chat_id, *target)
-        if outcome == ProcessingOutcome.SUCCESS:
-            await self.send_reaction(chat_id, "✅", *target)
-        elif outcome == ProcessingOutcome.FAILURE:
-            await self.send_reaction(chat_id, "❌", *target)
-
    # ------------------------------------------------------------------
    # Chat Info
    # ------------------------------------------------------------------
@@ -1,369 +0,0 @@
-"""
-Signal attachment rate-limit scheduler.
-
-Process-wide token-bucket simulator that mirrors the per-account
-attachment rate limit signal-cli/Signal-Server enforce. Producers
-(``SignalAdapter.send_multiple_images`` and the ``send_message`` tool's
-Signal path) call ``acquire(n)`` before an attachment send; on a 429
-they call ``feedback(retry_after, n)`` so the model recalibrates from
-the server's authoritative hint.
-
-The scheduler serializes concurrent calls through an ``asyncio.Lock``,
-giving FIFO fairness across agent sessions sharing one signal-cli
-daemon.
-"""
-
-from __future__ import annotations
-
-import asyncio
-import logging
-import re
-import time
-from typing import Any, Optional
-
-logger = logging.getLogger(__name__)
-
-
-# ---------------------------------------------------------------------------
-# Constants
-# ---------------------------------------------------------------------------
-
-SIGNAL_MAX_ATTACHMENTS_PER_MSG = 32  # per-message attachment cap (source: Signal-{Android,Desktop} source code)
-SIGNAL_RATE_LIMIT_BUCKET_CAPACITY = 50  # server-side token-bucket capacity for attachments rate limiting
-SIGNAL_RATE_LIMIT_DEFAULT_RETRY_AFTER = 4  # fallback token refill interval for signal-cli < v0.14.3
-SIGNAL_RATE_LIMIT_MAX_ATTEMPTS = 2  # initial attempt + 1 retry
-SIGNAL_BATCH_PACING_NOTICE_THRESHOLD = 10.0  # if estimated waiting time > 10s, notify the user about the delay
-SIGNAL_RPC_ERROR_RATELIMIT = -5  # signal-cli (v0.14.3+) JSON-RPC error code for RateLimitException
-
-
-# ---------------------------------------------------------------------------
-# Errors
-# ---------------------------------------------------------------------------
-
-class SignalRateLimitError(Exception):
-    """
-    Raised by ``SignalAdapter._rpc`` for rate-limit responses when the
-    caller has opted in via ``raise_on_rate_limit=True``.
-
-    Carries the server-supplied per-token Retry-After (in seconds) on
-    signal-cli ≥ v0.14.3
-    ``retry_after`` is None when the version doesn't expose it.
-    """
-
-    def __init__(self, message: str, retry_after: Optional[float] = None) -> None:
-        super().__init__(message)
-        self.retry_after = retry_after
-
-
-class SignalSchedulerError(Exception):
-    pass
-
-# ---------------------------------------------------------------------------
-# Detection helpers — used to fish a 429 out of signal-cli's various error
-# shapes (typed code, [429] substring, libsignal-net RetryLaterException
-# leaked through AttachmentInvalidException).
-# ---------------------------------------------------------------------------
-
-# "Retry after 4 seconds" / "retry after 4 second" — libsignal-net's
-# RetryLaterException string form, surfaced when 429s hit during
-# attachment upload (signal-cli wraps these as AttachmentInvalidException
-# rather than RateLimitException, so the typed path doesn't fire).
-_RETRY_AFTER_RE = re.compile(r"Retry after (\d+(?:\.\d+)?)\s*second", re.IGNORECASE)
-
-
-def _extract_retry_after_seconds(err: Any) -> Optional[float]:
-    """Pull the per-token Retry-After window from a signal-cli rate-limit error.
-
-    Tries two sources, in order:
-    1. ``error.data.response.results[*].retryAfterSeconds`` — the
-       structured field signal-cli ≥ v0.14.3 surfaces for plain
-       RateLimitException.
-    2. ``"Retry after N seconds"`` parsed out of the message — covers
-       libsignal-net's RetryLaterException that gets wrapped as
-       AttachmentInvalidException during attachment upload, where the
-       structured field stays null.
-
-    Returns None when neither yields a value.
-    """
-    msg = ""
-    if isinstance(err, dict):
-        data = err.get("data") or {}
-        response = data.get("response") or {}
-        results = response.get("results") or []
-        candidates = [
-            r.get("retryAfterSeconds") for r in results
-            if isinstance(r, dict) and r.get("retryAfterSeconds")
-        ]
-        if candidates:
-            return float(max(candidates))
-        msg = str(err.get("message", ""))
-    else:
-        msg = str(err)
-    match = _RETRY_AFTER_RE.search(msg)
-    return float(match.group(1)) if match else None
-
-
-def _is_signal_rate_limit_error(err: Any) -> bool:
-    """True if a signal-cli RPC error reflects a rate-limit failure.
-
-    Matches three layers:
-    - typed ``RATELIMIT_ERROR`` code (signal-cli ≥ v0.14.3, plain
-      RateLimitException)
-    - legacy ``[429] / RateLimitException`` substrings
-    - libsignal-net's ``RetryLaterException`` / ``Retry after N seconds``
-      surfaced inside ``AttachmentInvalidException`` when the rate
-      limit is hit during attachment upload — signal-cli never re-tags
-      these as RateLimitException, so substring is the only signal.
-    """
-    if isinstance(err, dict) and err.get("code") == SIGNAL_RPC_ERROR_RATELIMIT:
-        return True
-
-    message = (
-        str(err.get("message", ""))
-        if isinstance(err, dict)
-        else str(err)
-    )
-    msg_lower = message.lower()
-    return (
-        "[429]" in message
-        or "ratelimit" in msg_lower
-        or "retrylaterexception" in msg_lower
-        or "retry after" in msg_lower
-    )
-
-
-# ---------------------------------------------------------------------------
-# Misc helpers
-# ---------------------------------------------------------------------------
-
-def _format_wait(seconds: float) -> str:
-    """Human-friendly wait label for user-facing pacing notices."""
-    s = max(0.0, seconds)
-    if s < 90:
-        return f"{int(round(s))}s"
-    return f"{max(1, int(round(s / 60)))} min"
-
-
-def _signal_send_timeout(num_attachments: int) -> float:
-    """HTTP timeout for a Signal ``send`` RPC.
-
-    signal-cli uploads attachments serially during the call, so the
-    server-side time scales with batch size. Default 30s is fine for
-    text-only sends but truncates large attachment batches mid-upload —
-    we then log a phantom failure even though signal-cli completes the
-    send a few seconds later. Scale at 5s/attachment with a 60s floor.
-    """
-    if num_attachments <= 0:
-        return 30.0
-    return max(60.0, 5.0 * num_attachments)
-
-
-# ---------------------------------------------------------------------------
-# Scheduler
-# ---------------------------------------------------------------------------
-
-class SignalAttachmentScheduler:
-    """Process-wide token-bucket simulator for Signal attachment sends.
-
-    The bucket holds up to ``capacity`` tokens (default 50, matching
-    Signal's server-side rate-limit bucket size). Each attachment consumes one
-    token. Tokens refill at ``refill_rate`` tokens/second, calibrated
-    from the per-token Retry-After hint we get from the server when a
-    429 fires. Until we've observed one, we use the documented default
-    (1 token / 4 seconds).
-
-    Concurrent ``acquire(n)`` calls serialize through an
-    ``asyncio.Lock`` — natural FIFO across agent sessions hitting the
-    same daemon.
-    """
-
-    def __init__(
-        self,
-        capacity: float = float(SIGNAL_RATE_LIMIT_BUCKET_CAPACITY),
-        default_retry_after: float = float(SIGNAL_RATE_LIMIT_DEFAULT_RETRY_AFTER),
-    ) -> None:
-        self.capacity = float(capacity)
-        self.tokens = float(capacity)
-        self.refill_rate = 1.0 / float(default_retry_after)
-        self.last_refill = time.monotonic()
-        self._lock = asyncio.Lock()
-
-    # ------------------------------------------------------------------
-    # Internals
-    # ------------------------------------------------------------------
-
-    def _refill(self) -> None:
-        now = time.monotonic()
-        elapsed = now - self.last_refill
-        if elapsed > 0 and self.tokens < self.capacity:
-            self.tokens = min(self.capacity, self.tokens + elapsed * self.refill_rate)
-        self.last_refill = now
-
-    # ------------------------------------------------------------------
-    # Public API
-    # ------------------------------------------------------------------
-
-    def estimate_wait(self, n: int) -> float:
-        """Best-effort estimate of the seconds until ``n`` tokens would
-        be available. Used to decide whether to emit a user-facing
-        pacing notice *before* committing to an ``acquire`` that may
-        block silently. Lock-free; small races vs. concurrent acquires
-        are benign for an informational notice.
-        """
-        now = time.monotonic()
-        elapsed = now - self.last_refill
-        projected = self.tokens
-        if elapsed > 0 and projected < self.capacity:
-            projected = min(self.capacity, projected + elapsed * self.refill_rate)
-        deficit = n - projected
-        if deficit <= 0:
-            return 0.0
-        return deficit / self.refill_rate
-
-    async def acquire(self, n: int) -> float:
-        """Block until at least ``n`` tokens are available, return the
-        seconds slept.
-
-        Does **not** deduct tokens — the bucket is a read-only model of
-        server-side capacity.  Call ``report_rpc_duration()`` after the
-        RPC to synchronise the model with the server timeline.
-
-        Not perfect in case lots of coroutines try to acquire for big
-        uploads (``report_rpc_duration`` will take a long time to get hit)
-        but this is just a simulation. Signal server is ground truth and
-        will raise rate-limit exceptions triggering requeues.
-
-        The lock is released during ``asyncio.sleep`` so other callers
-        can interleave.  A retry loop re-checks after each sleep in
-        case the deadline was pessimistic.
-        """
-        if n <= 0:
-            return 0.0
-        if n > self.capacity:
-            raise SignalSchedulerError(
-                f"Signal scheduler was called requesting {n} tokens "
-                f"(max is {self.capacity})",
-            )
-
-        total_slept = 0.0
-        first_pass = True
-        while True:
-            async with self._lock:
-                self._refill()
-                if self.tokens >= n:
-                    if not first_pass or total_slept > 0:
-                        logger.debug(
-                            "Signal scheduler: tokens sufficient for %d "
-                            "(remaining=%.1f, total_slept=%.1fs)",
-                            n, self.tokens, total_slept,
-                        )
-                    return total_slept
-                deficit = n - self.tokens
-            wait = deficit / self.refill_rate
-            if first_pass:
-                logger.info(
-                    "Signal scheduler: pausing %.1fs for %d tokens "
-                    "(available=%.1f, deficit=%.1f, refill=%.4f/s ≈ %.1fs/token)",
-                    wait, n, self.tokens, deficit,
-                    self.refill_rate, 1.0 / self.refill_rate,
-                )
-                first_pass = False
-            await asyncio.sleep(wait)
-            total_slept += wait
-
-    async def report_rpc_duration(self, rpc_duration: float, n_attachments: int) -> None:
-        """Record an attachment-send RPC that just completed.
-
-        Deducts ``n_attachments`` tokens without crediting refill during
-        the upload window. Signal's server checks the bucket at RPC start
-        and does *not* refill during request processing — refill resumes
-        after the response. Crediting upload-time refill causes cumulative
-        drift that eventually triggers 429s.
-
-        Advances ``last_refill`` so the next ``acquire`` / ``_refill``
-        starts counting from this point.
-        """
-        if n_attachments <= 0:
-            return
-
-        async with self._lock:
-            now = time.monotonic()
-            token_before = self.tokens
-            self.tokens = max(0.0, token_before - float(n_attachments))
-            self.last_refill = now
-        logger.log(
-            logging.INFO if rpc_duration > 10 and n_attachments > 5 else logging.DEBUG,
-            "Signal scheduler: RPC for %d att took %.1fs — "
-            "tokens %.1f → %.1f (deducted=%d, no upload refill credited, refill=%.4fs⁻¹)",
-            n_attachments, rpc_duration,
-            token_before, self.tokens,
-            n_attachments, self.refill_rate,
-        )
-
-    def feedback(self, retry_after: Optional[float], n_attempted: int) -> None:
-        """Apply server feedback after a 429.
-
-        ``retry_after`` is the per-*token* refill window the server
-        reports (None when signal-cli is older than v0.14.3 and didn't
-        surface it).
-
-        When present we calibrate ``refill_rate`` from it:
-        the server is authoritative.
-        """
-        if retry_after and retry_after > 0:
-            new_rate = 1.0 / float(retry_after)
-            if new_rate != self.refill_rate:
-                logger.info(
-                    "Signal scheduler: calibrating refill_rate to %.4f tokens/sec "
-                    "(server retry_after=%.1fs per token)",
-                    new_rate, retry_after,
-                )
-                self.refill_rate = new_rate
-        self.tokens = 0.0
-        self.last_refill = time.monotonic()
-
-    def state(self) -> dict:
-        """Return current scheduler state for diagnostic logging (read-only).
-
-        Does not advance ``last_refill`` — safe to call from logging paths
-        without perturbing the bucket.
-        """
-        now = time.monotonic()
-        elapsed = now - self.last_refill
-        projected = self.tokens
-        if elapsed > 0 and projected < self.capacity:
-            projected = min(self.capacity, projected + elapsed * self.refill_rate)
-        return {
-            "tokens": round(projected, 1),
-            "capacity": int(self.capacity),
-            "refill_rate": round(self.refill_rate, 4),
-            "refill_seconds_per_token": round(1.0 / self.refill_rate, 1) if self.refill_rate > 0 else float("inf"),
-        }
-
-
-# ---------------------------------------------------------------------------
-# Process-wide singleton
-# ---------------------------------------------------------------------------
-
-_scheduler: Optional[SignalAttachmentScheduler] = None
-
-
-def get_scheduler() -> SignalAttachmentScheduler:
-    """Return the process-wide scheduler, creating it on first access."""
-    global _scheduler
-    if _scheduler is None:
-        _scheduler = SignalAttachmentScheduler()
-        logger.info(
-            "Signal scheduler: created (capacity=%d tokens, refill=%.4f/s ≈ %.1fs/token)",
-            int(_scheduler.capacity),
-            _scheduler.refill_rate,
-            1.0 / _scheduler.refill_rate,
-        )
-    return _scheduler
-
-
-def _reset_scheduler() -> None:
-    """Drop the cached scheduler so the next ``get_scheduler`` call
-    builds a fresh one. Test-only — never call from production paths."""
-    global _scheduler
-    _scheduler = None
@@ -9,7 +9,6 @@ Uses slack-bolt (Python) with Socket Mode for:
 """

 import asyncio
-import contextvars
 import json
 import logging
 import os
@@ -22,7 +21,6 @@ try:
    from slack_bolt.async_app import AsyncApp
    from slack_bolt.adapter.socket_mode.async_handler import AsyncSocketModeHandler
    from slack_sdk.web.async_client import AsyncWebClient
-    import aiohttp
    SLACK_AVAILABLE = True
 except ImportError:
    SLACK_AVAILABLE = False
@@ -52,16 +50,6 @@ from gateway.platforms.base import (

 logger = logging.getLogger(__name__)

-# ContextVar carrying the user_id of the slash-command invoker.
-# Set in _handle_slash_command, read in send() to match the correct
-# stashed response_url when multiple users issue commands on the same
-# channel concurrently.  ContextVars propagate to child asyncio.Tasks
-# (Python 3.7+), so the value set in _handle_slash_command's task is
-# visible in _process_message_background's child task.
-_slash_user_id: contextvars.ContextVar[Optional[str]] = contextvars.ContextVar(
-    "_slash_user_id", default=None,
-)
-

@dataclass
 class _ThreadContextCache:
@@ -322,11 +310,6 @@ class SlackAdapter(BasePlatformAdapter):
        # Track active assistant thread status indicators so stop_typing can
        # clear them (chat_id → thread_ts).
        self._active_status_threads: Dict[str, str] = {}
-        # Slash-command contexts: stash response_url + user_id so send()
-        # can route the first reply ephemerally.  Keyed by
-        # (channel_id, user_id) to avoid cross-user collisions.
-        # Each value: {"response_url": str, "ts": float}
-        self._slash_command_contexts: Dict[Tuple[str, str], Dict[str, Any]] = {}

    def _describe_slack_api_error(self, response: Any, *, file_obj: Optional[Dict[str, Any]] = None) -> Optional[str]:
        """Convert Slack API auth/permission failures into actionable user-facing text."""
@@ -385,103 +368,6 @@ class SlackAdapter(BasePlatformAdapter):
            )
        return None

-    # ------------------------------------------------------------------
-    # Slash-command ephemeral helpers
-    # ------------------------------------------------------------------
-
-    _SLASH_CTX_TTL = 120.0  # seconds — response_url is valid for 30 min;
-    # we use a much shorter TTL to avoid routing unrelated messages
-    # as ephemeral if the command handler was slow or dropped.
-
-    def _pop_slash_context(
-        self, chat_id: str,
-    ) -> Optional[Dict[str, Any]]:
-        """Return and remove the slash-command context for *chat_id*, if fresh.
-
-        Contexts older than ``_SLASH_CTX_TTL`` seconds are silently discarded.
-
-        Uses the ``_slash_user_id`` ContextVar (set in ``_handle_slash_command``)
-        to match the exact ``(channel_id, user_id)`` key.  This prevents a
-        concurrent slash command from a different user on the same channel from
-        stealing another user's ephemeral context.  Falls back to a
-        channel-only scan when the ContextVar is unset (e.g. send() called
-        from a non-slash code path — should not match anything).
-        """
-        now = time.monotonic()
-        # Clean up stale entries on every lookup — dict is small.
-        stale_keys = [
-            k for k, v in self._slash_command_contexts.items()
-            if now - v["ts"] > self._SLASH_CTX_TTL
-        ]
-        for k in stale_keys:
-            self._slash_command_contexts.pop(k, None)
-
-        # Precise match: (channel_id, user_id) from ContextVar.
-        uid = _slash_user_id.get()
-        if uid:
-            return self._slash_command_contexts.pop((chat_id, uid), None)
-
-        # Fallback: channel-only scan (only reachable when ContextVar is
-        # unset, i.e. send() called outside a slash-command async context).
-        match_key = None
-        for key in list(self._slash_command_contexts):
-            if key[0] == chat_id:
-                match_key = key
-                break
-        if match_key is None:
-            return None
-        return self._slash_command_contexts.pop(match_key)
-
-    async def _send_slash_ephemeral(
-        self,
-        ctx: Dict[str, Any],
-        content: str,
-    ) -> "SendResult":
-        """Replace the initial ephemeral ack via ``response_url``.
-
-        Slack's ``response_url`` accepts a POST with ``replace_original``
-        for up to 30 minutes after the slash command was invoked.  This
-        lets us swap the "Running /cmd…" placeholder with the real reply,
-        and the message stays ephemeral ("Only visible to you").
-
-        Falls back to a simple ``True`` SendResult if the POST fails —
-        the user already saw the initial ack, so a delivery failure here
-        is non-critical.
-        """
-        formatted = self.format_message(content)
-        # Slack's response_url has the same ~40k char limit as chat_postMessage.
-        # Truncate to MAX_MESSAGE_LENGTH and use only the first chunk — the
-        # response_url replaces a single ephemeral ack, so multi-chunk isn't
-        # possible.  Long responses are rare for command replies.
-        chunks = self.truncate_message(formatted, self.MAX_MESSAGE_LENGTH)
-        text = chunks[0] if chunks else formatted
-        payload = {
-            "response_type": "ephemeral",
-            "replace_original": True,
-            "text": text,
-        }
-        try:
-            async with aiohttp.ClientSession() as session:
-                async with session.post(
-                    ctx["response_url"],
-                    json=payload,
-                    timeout=aiohttp.ClientTimeout(total=10),
-                ) as resp:
-                    if resp.status == 200:
-                        return SendResult(success=True, message_id=None)
-                    body = await resp.text()
-                    logger.warning(
-                        "[Slack] response_url POST returned %s: %s",
-                        resp.status,
-                        body[:200],
-                    )
-        except Exception as e:
-            logger.warning(
-                "[Slack] response_url POST failed: %s", e,
-            )
-        # Non-fatal — the user saw the initial ack already.
-        return SendResult(success=True, message_id=None)
-
    async def connect(self) -> bool:
        """Connect to Slack via Socket Mode."""
        if not SLACK_AVAILABLE:
@@ -560,16 +446,12 @@ class SlackAdapter(BasePlatformAdapter):
            async def handle_message_event(event, say):
                await self._handle_slack_message(event)

-            # Handle app_mention explicitly. In some Slack app configurations,
-            # channel mentions arrive only as app_mention events rather than the
-            # generic message event. Forward them into the normal message
-            # pipeline so @mentions reliably produce replies.
-            # NOTE: when Slack fires BOTH message and app_mention for the same
-            # @mention, they share the same event ts — the dedup in
-            # _handle_slack_message (MessageDeduplicator) suppresses the second.
+            # Acknowledge app_mention events to prevent Bolt 404 errors.
+            # The "message" handler above already processes @mentions in
+            # channels, so this is intentionally a no-op to avoid duplicates.
            @self._app.event("app_mention")
            async def handle_app_mention(event, say):
-                await self._handle_slack_message(event)
+                pass

            # File lifecycle events can arrive around snippet uploads even when
            # the actual user message is what we care about. Ack them so Slack
@@ -620,11 +502,7 @@ class SlackAdapter(BasePlatformAdapter):

            @self._app.command(_slash_pattern)
            async def handle_hermes_command(ack, command):
-                slash = (command.get("command") or "").lstrip("/")
-                await ack(
-                    response_type="ephemeral",
-                    text=f"Running `/{slash}`…",
-                )
+                await ack()
                await self._handle_slash_command(command)

            # Register Block Kit action handlers for approval buttons
@@ -636,15 +514,6 @@ class SlackAdapter(BasePlatformAdapter):
            ):
                self._app.action(_action_id)(self._handle_approval_action)

-            # Register Block Kit action handlers for slash-confirm buttons
-            # (generic three-option prompts; see tools/slash_confirm.py).
-            for _action_id in (
-                "hermes_confirm_once",
-                "hermes_confirm_always",
-                "hermes_confirm_cancel",
-            ):
-                self._app.action(_action_id)(self._handle_slash_confirm_action)
-
            # Start Socket Mode handler in background
            self._handler = AsyncSocketModeHandler(self._app, app_token, proxy=proxy_url)
            _apply_slack_proxy(self._handler.client, proxy_url)
@@ -696,17 +565,6 @@ class SlackAdapter(BasePlatformAdapter):
            return SendResult(success=False, error="Not connected")

        try:
-            # Check for a pending slash-command context.  When the user ran a
-            # native slash command (e.g. /q, /stop, /model), the initial ack
-            # already showed an ephemeral "Running /cmd…" message.  If we have
-            # a stashed response_url for this channel, replace that ack with
-            # the actual command reply ephemerally instead of posting publicly.
-            slash_ctx = self._pop_slash_context(chat_id)
-            if slash_ctx:
-                return await self._send_slash_ephemeral(
-                    slash_ctx, content,
-                )
-
            # Convert standard markdown → Slack mrkdwn
            formatted = self.format_message(content)

@@ -734,10 +592,6 @@ class SlackAdapter(BasePlatformAdapter):

                last_result = await self._get_client(chat_id).chat_postMessage(**kwargs)

-            # Clear Slack Assistant status as soon as the final message is posted.
-            if thread_ts:
-                await self.stop_typing(chat_id)
-
            # Track the sent message ts so we can auto-respond to thread
            # replies without requiring @mention.
            sent_ts = last_result.get("ts") if last_result else None
@@ -761,42 +615,6 @@ class SlackAdapter(BasePlatformAdapter):
            logger.error("[Slack] Send error: %s", e, exc_info=True)
            return SendResult(success=False, error=str(e))

-    async def send_private_notice(
-        self,
-        chat_id: str,
-        user_id: str,
-        content: str,
-        reply_to: Optional[str] = None,
-        metadata: Optional[Dict[str, Any]] = None,
-    ) -> SendResult:
-        """Send a Slack ephemeral message visible only to one user."""
-        if not self._app:
-            return SendResult(success=False, error="Not connected")
-        if not chat_id or not user_id:
-            return SendResult(success=False, error="chat_id and user_id are required")
-
-        try:
-            formatted = self.format_message(content)
-            thread_ts = self._resolve_thread_ts(reply_to, metadata)
-            kwargs = {
-                "channel": chat_id,
-                "user": user_id,
-                "text": formatted,
-                "mrkdwn": True,
-            }
-            if thread_ts:
-                kwargs["thread_ts"] = thread_ts
-
-            result = await self._get_client(chat_id).chat_postEphemeral(**kwargs)
-            return SendResult(
-                success=True,
-                message_id=result.get("message_ts") or result.get("ts"),
-                raw_response=result,
-            )
-        except Exception as e:  # pragma: no cover - defensive logging
-            logger.error("[Slack] Ephemeral send error: %s", e, exc_info=True)
-            return SendResult(success=False, error=str(e))
-
    async def edit_message(
        self,
        chat_id: str,
@@ -815,8 +633,6 @@ class SlackAdapter(BasePlatformAdapter):
                ts=message_id,
                text=formatted,
            )
-            if finalize:
-                await self.stop_typing(chat_id)
            return SendResult(success=True, message_id=message_id)
        except Exception as e:  # pragma: no cover - defensive logging
            logger.error(
@@ -857,7 +673,7 @@ class SlackAdapter(BasePlatformAdapter):
            # in an assistant-enabled context. Falls back to reactions.
            logger.debug("[Slack] assistant.threads.setStatus failed: %s", e)

-    async def stop_typing(self, chat_id: str, metadata=None) -> None:
+    async def stop_typing(self, chat_id: str) -> None:
        """Clear the assistant thread status indicator."""
        if not self._app:
            return
@@ -967,111 +783,6 @@ class SlackAdapter(BasePlatformAdapter):

        raise last_exc

-    async def send_multiple_images(
-        self,
-        chat_id: str,
-        images: List[Tuple[str, str]],
-        metadata: Optional[Dict[str, Any]] = None,
-        human_delay: float = 0.0,
-    ) -> None:
-        """Send a batch of images as a single Slack message with multiple file uploads.
-
-        Uses ``files_upload_v2`` with its ``file_uploads`` parameter so all
-        images show up attached to one ``initial_comment`` message instead
-        of N separate messages. Falls back to the base per-image loop on
-        any failure.
-
-        The batch limit is 10 file uploads per call (Slack server-side cap).
-        """
-        if not self._app:
-            return
-        if not images:
-            return
-
-        try:
-            import httpx as _httpx
-            from urllib.parse import unquote as _unquote
-            from tools.url_safety import is_safe_url as _is_safe_url
-        except Exception:
-            await super().send_multiple_images(chat_id, images, metadata, human_delay)
-            return
-
-        thread_ts = self._resolve_thread_ts(None, metadata)
-
-        CHUNK = 10
-        chunks = [images[i:i + CHUNK] for i in range(0, len(images), CHUNK)]
-
-        for chunk_idx, chunk in enumerate(chunks):
-            if human_delay > 0 and chunk_idx > 0:
-                await asyncio.sleep(human_delay)
-
-            file_uploads: List[Dict[str, Any]] = []
-            initial_comment_parts: List[str] = []
-            try:
-                async with _httpx.AsyncClient(timeout=30.0, follow_redirects=True) as http_client:
-                    for image_url, alt_text in chunk:
-                        if alt_text:
-                            initial_comment_parts.append(alt_text)
-
-                        if image_url.startswith("file://"):
-                            local_path = _unquote(image_url[7:])
-                            if not os.path.exists(local_path):
-                                logger.warning("[Slack] Skipping missing image: %s", local_path)
-                                continue
-                            file_uploads.append({
-                                "file": local_path,
-                                "filename": os.path.basename(local_path),
-                            })
-                        else:
-                            if not _is_safe_url(image_url):
-                                logger.warning("[Slack] Blocked unsafe image URL in batch")
-                                continue
-                            try:
-                                response = await http_client.get(image_url)
-                                response.raise_for_status()
-                                ext = "png"
-                                ct = response.headers.get("content-type", "")
-                                if "jpeg" in ct or "jpg" in ct:
-                                    ext = "jpg"
-                                elif "gif" in ct:
-                                    ext = "gif"
-                                elif "webp" in ct:
-                                    ext = "webp"
-                                file_uploads.append({
-                                    "content": response.content,
-                                    "filename": f"image_{len(file_uploads)}.{ext}",
-                                })
-                            except Exception as dl_err:
-                                logger.warning(
-                                    "[Slack] Download failed for %s: %s",
-                                    safe_url_for_log(image_url), dl_err,
-                                )
-                                continue
-
-                if not file_uploads:
-                    continue
-
-                initial_comment = "\n".join(initial_comment_parts) if initial_comment_parts else ""
-                logger.info(
-                    "[Slack] Sending %d image(s) in single files_upload_v2 (chunk %d/%d)",
-                    len(file_uploads), chunk_idx + 1, len(chunks),
-                )
-                result = await self._get_client(chat_id).files_upload_v2(
-                    channel=chat_id,
-                    file_uploads=file_uploads,
-                    initial_comment=initial_comment,
-                    thread_ts=thread_ts,
-                )
-                self._record_uploaded_file_thread(chat_id, thread_ts)
-                _ = result
-            except Exception as e:
-                logger.warning(
-                    "[Slack] Multi-image files_upload_v2 failed (chunk %d/%d), falling back to per-image: %s",
-                    chunk_idx + 1, len(chunks), e,
-                    exc_info=True,
-                )
-                await super().send_multiple_images(chat_id, chunk, metadata, human_delay=human_delay)
-
    def _record_uploaded_file_thread(self, chat_id: str, thread_ts: Optional[str]) -> None:
        """Treat successful file uploads as bot participation in a thread."""
        if not thread_ts:
@@ -1144,7 +855,7 @@ class SlackAdapter(BasePlatformAdapter):
            return _ph(f'<{url}|{label}>')

        text = re.sub(
-            r'(?<!!)\[([^\]]+)\]\(([^()]*(?:\([^()]*\)[^()]*)*)\)',
+            r'\[([^\]]+)\]\(([^()]*(?:\([^()]*\)[^()]*)*)\)',
            _convert_markdown_link,
            text,
        )
@@ -1191,11 +902,9 @@ class SlackAdapter(BasePlatformAdapter):
        )

        # 10) Convert italic: _text_ stays as _text_ (already Slack italic)
-        #     Single *text* → _text_ (Slack italic), but only when the
-        #     emphasized text touches non-whitespace on both sides so literal
-        #     delimiters like "a * b * c" are preserved.
+        #     Single *text* → _text_ (Slack italic)
        text = re.sub(
-            r'(?<!\*)\*(\S(?:[^*\n]*?\S)?)\*(?!\*)',
+            r'(?<!\*)\*([^*\n]+)\*(?!\*)',
            lambda m: _ph(f'_{m.group(1)}_'),
            text,
        )
@@ -2222,168 +1931,6 @@ class SlackAdapter(BasePlatformAdapter):
            logger.error("[Slack] send_exec_approval failed: %s", e, exc_info=True)
            return SendResult(success=False, error=str(e))

-    async def send_slash_confirm(
-        self, chat_id: str, title: str, message: str, session_key: str,
-        confirm_id: str, metadata: Optional[Dict[str, Any]] = None,
-    ) -> SendResult:
-        """Send a Block Kit three-option slash-command confirmation prompt."""
-        if not self._app:
-            return SendResult(success=False, error="Not connected")
-
-        try:
-            body = message[:2900] + "..." if len(message) > 2900 else message
-            thread_ts = self._resolve_thread_ts(None, metadata)
-            # Encode session_key and confirm_id into the button value so the
-            # callback handler can resolve without extra bookkeeping.
-            value = f"{session_key}|{confirm_id}"
-
-            blocks = [
-                {
-                    "type": "section",
-                    "text": {
-                        "type": "mrkdwn",
-                        "text": f"*{title or 'Confirm'}*\n\n{body}",
-                    },
-                },
-                {
-                    "type": "actions",
-                    "elements": [
-                        {
-                            "type": "button",
-                            "text": {"type": "plain_text", "text": "Approve Once"},
-                            "style": "primary",
-                            "action_id": "hermes_confirm_once",
-                            "value": value,
-                        },
-                        {
-                            "type": "button",
-                            "text": {"type": "plain_text", "text": "Always Approve"},
-                            "action_id": "hermes_confirm_always",
-                            "value": value,
-                        },
-                        {
-                            "type": "button",
-                            "text": {"type": "plain_text", "text": "Cancel"},
-                            "style": "danger",
-                            "action_id": "hermes_confirm_cancel",
-                            "value": value,
-                        },
-                    ],
-                },
-            ]
-
-            kwargs: Dict[str, Any] = {
-                "channel": chat_id,
-                "text": f"{title or 'Confirm'}: {body[:100]}",
-                "blocks": blocks,
-            }
-            if thread_ts:
-                kwargs["thread_ts"] = thread_ts
-
-            result = await self._get_client(chat_id).chat_postMessage(**kwargs)
-            return SendResult(success=True, message_id=result.get("ts", ""), raw_response=result)
-        except Exception as e:
-            logger.error("[Slack] send_slash_confirm failed: %s", e, exc_info=True)
-            return SendResult(success=False, error=str(e))
-
-    async def _handle_slash_confirm_action(self, ack, body, action) -> None:
-        """Handle a slash-confirm button click from Block Kit."""
-        await ack()
-
-        action_id = action.get("action_id", "")
-        value = action.get("value", "")
-        message = body.get("message", {})
-        msg_ts = message.get("ts", "")
-        channel_id = body.get("channel", {}).get("id", "")
-        user_name = body.get("user", {}).get("name", "unknown")
-        user_id = body.get("user", {}).get("id", "")
-
-        # Authorization — reuse the exec-approval allowlist.
-        allowed_csv = os.getenv("SLACK_ALLOWED_USERS", "").strip()
-        if allowed_csv:
-            allowed_ids = {uid.strip() for uid in allowed_csv.split(",") if uid.strip()}
-            if "*" not in allowed_ids and user_id not in allowed_ids:
-                logger.warning(
-                    "[Slack] Unauthorized slash-confirm click by %s (%s) — ignoring",
-                    user_name, user_id,
-                )
-                return
-
-        # Parse session_key|confirm_id back out
-        if "|" not in value:
-            logger.warning("[Slack] Malformed slash-confirm value: %s", value)
-            return
-        session_key, confirm_id = value.split("|", 1)
-
-        choice_map = {
-            "hermes_confirm_once": "once",
-            "hermes_confirm_always": "always",
-            "hermes_confirm_cancel": "cancel",
-        }
-        choice = choice_map.get(action_id, "cancel")
-
-        label_map = {
-            "once": f"✅ Approved once by {user_name}",
-            "always": f"🔒 Always approved by {user_name}",
-            "cancel": f"❌ Cancelled by {user_name}",
-        }
-        decision_text = label_map.get(choice, f"Resolved by {user_name}")
-
-        # Pull original prompt body out of the section block so we can show
-        # the decision inline without losing context.
-        original_text = ""
-        for block in message.get("blocks", []):
-            if block.get("type") == "section":
-                original_text = block.get("text", {}).get("text", "")
-                break
-
-        updated_blocks = [
-            {
-                "type": "section",
-                "text": {
-                    "type": "mrkdwn",
-                    "text": original_text or "Confirmation prompt",
-                },
-            },
-            {
-                "type": "context",
-                "elements": [
-                    {"type": "mrkdwn", "text": decision_text},
-                ],
-            },
-        ]
-
-        try:
-            await self._get_client(channel_id).chat_update(
-                channel=channel_id,
-                ts=msg_ts,
-                text=decision_text,
-                blocks=updated_blocks,
-            )
-        except Exception as e:
-            logger.warning("[Slack] Failed to update slash-confirm message: %s", e)
-
-        # Resolve via the module-level primitive and post any follow-up.
-        try:
-            from tools import slash_confirm as _slash_confirm_mod
-            result_text = await _slash_confirm_mod.resolve(session_key, confirm_id, choice)
-            if result_text:
-                post_kwargs: Dict[str, Any] = {
-                    "channel": channel_id,
-                    "text": result_text,
-                }
-                # Inherit the thread so the reply stays in the same place.
-                thread_ts = message.get("thread_ts") or msg_ts
-                if thread_ts:
-                    post_kwargs["thread_ts"] = thread_ts
-                await self._get_client(channel_id).chat_postMessage(**post_kwargs)
-            logger.info(
-                "Slack button resolved slash-confirm for session %s (choice=%s, user=%s)",
-                session_key, choice, user_name,
-            )
-        except Exception as exc:
-            logger.error("Failed to resolve slash-confirm from Slack button: %s", exc, exc_info=True)
-
    async def _handle_approval_action(self, ack, body, action) -> None:
        """Handle an approval button click from Block Kit."""
        await ack()
@@ -2701,14 +2248,9 @@ class SlackAdapter(BasePlatformAdapter):
            # gateway command dispatcher by prepending the slash.
            text = f"/{slash_name} {text}".strip()

-        # Slack slash commands can originate from DMs or shared channels.
-        # Preserve DM semantics only for DM channel IDs; shared channels must
-        # keep group semantics so different users do not collide into one
-        # session key.
-        is_dm = str(channel_id).startswith("D")
        source = self.build_source(
            chat_id=channel_id,
-            chat_type="dm" if is_dm else "group",
+            chat_type="dm",  # Slash commands are always in DM-like context
            user_id=user_id,
        )

@@ -2719,26 +2261,7 @@ class SlackAdapter(BasePlatformAdapter):
            raw_message=command,
        )

-        # Stash the Slack response_url so the first reply for this
-        # channel+user can be routed ephemerally (replaces the initial
-        # "Running /cmd…" ack shown by handle_hermes_command).
-        # Only stash for COMMAND events (text starts with "/") — free-form
-        # questions via "/hermes <question>" must produce public replies so
-        # the whole channel can see the agent's answer.
-        response_url = command.get("response_url", "")
-        if response_url and user_id and channel_id and text.startswith("/"):
-            self._slash_command_contexts[(channel_id, user_id)] = {
-                "response_url": response_url,
-                "ts": time.monotonic(),
-            }
-
-        # Set the ContextVar so send() can match the correct stashed
-        # response_url even when multiple users slash concurrently.
-        _slash_user_id_token = _slash_user_id.set(user_id or None)
-        try:
-            await self.handle_message(event)
-        finally:
-            _slash_user_id.reset(_slash_user_id_token)
+        await self.handle_message(event)

    def _has_active_session_for_thread(
        self,
@@ -2899,13 +2422,6 @@ class SlackAdapter(BasePlatformAdapter):
            raw = os.getenv("SLACK_FREE_RESPONSE_CHANNELS", "")
        if isinstance(raw, list):
            return {str(part).strip() for part in raw if str(part).strip()}
-        # Coerce non-list scalars (str/int/float) to str before splitting.
-        # A bare numeric YAML value (`free_response_channels: 1234567890`) is
-        # loaded as int and was previously falling through the isinstance(str)
-        # branch to return an empty set.  str() here accepts whatever scalar
-        # the YAML loader hands us without changing existing string/CSV
-        # semantics.
-        s = str(raw).strip() if raw is not None else ""
-        if s:
-            return {part.strip() for part in s.split(",") if part.strip()}
+        if isinstance(raw, str) and raw.strip():
+            return {part.strip() for part in raw.split(",") if part.strip()}
        return set()
@@ -84,7 +84,6 @@ from gateway.platforms.telegram_network import (
    discover_fallback_ips,
    parse_fallback_ip_env,
 )
-from utils import atomic_replace


 def check_telegram_requirements() -> bool:
@@ -123,12 +122,12 @@ def _strip_mdv2(text: str) -> str:


 # ---------------------------------------------------------------------------
-# Markdown table → Telegram-friendly row groups
+# Markdown table → code block conversion
 # ---------------------------------------------------------------------------
 # Telegram's MarkdownV2 has no table syntax — '|' is just an escaped literal,
 # so pipe tables render as noisy backslash-pipe text with no alignment.
-# Reformating each row into a bold heading plus bullet list keeps the content
-# readable on mobile clients while preserving the source data.
+# Wrapping the table in a fenced code block makes Telegram render it as
+# monospace preformatted text with columns intact.

 # Matches a GFM table delimiter row: optional outer pipes, cells containing
 # only dashes (with optional leading/trailing colons for alignment) separated
@@ -145,49 +144,13 @@ def _is_table_row(line: str) -> bool:
    return bool(stripped) and '|' in stripped


-def _split_markdown_table_row(line: str) -> list[str]:
-    """Split a simple GFM table row into stripped cell values."""
-    stripped = line.strip()
-    if stripped.startswith("|"):
-        stripped = stripped[1:]
-    if stripped.endswith("|"):
-        stripped = stripped[:-1]
-    return [cell.strip() for cell in stripped.split("|")]
-
-
-def _render_table_block_for_telegram(table_block: list[str]) -> str:
-    """Render a detected GFM table as Telegram-friendly row groups."""
-    if len(table_block) < 3:
-        return "\n".join(table_block)
-
-    headers = _split_markdown_table_row(table_block[0])
-    if len(headers) < 2:
-        return "\n".join(table_block)
-
-    rendered_rows: list[str] = []
-    for index, row in enumerate(table_block[2:], start=1):
-        cells = _split_markdown_table_row(row)
-        if len(cells) < len(headers):
-            cells.extend([""] * (len(headers) - len(cells)))
-        elif len(cells) > len(headers):
-            cells = cells[: len(headers)]
-
-        heading = next((cell for cell in cells if cell), f"Row {index}")
-        rendered_rows.append(f"**{heading}**")
-        rendered_rows.extend(
-            f"• {header}: {value}" for header, value in zip(headers, cells)
-        )
-
-    return "\n\n".join(rendered_rows)
-
-
 def _wrap_markdown_tables(text: str) -> str:
-    """Rewrite GFM-style pipe tables into Telegram-friendly bullet groups.
+    """Wrap GFM-style pipe tables in ``` fences so Telegram renders them.

    Detected by a row containing '|' immediately followed by a delimiter
    row matching :data:`_TABLE_SEPARATOR_RE`.  Subsequent pipe-containing
-    non-blank lines are consumed as the table body and rewritten as
-    per-row bullet groups. Tables inside existing fenced code blocks are left
+    non-blank lines are consumed as the table body and included in the
+    wrapped block.  Tables inside existing fenced code blocks are left
    alone.
    """
    if '|' not in text or '-' not in text:
@@ -224,7 +187,9 @@ def _wrap_markdown_tables(text: str) -> str:
            while j < len(lines) and _is_table_row(lines[j]):
                table_block.append(lines[j])
                j += 1
-            out.append(_render_table_block_for_telegram(table_block))
+            out.append('```')
+            out.extend(table_block)
+            out.append('```')
            i = j
            continue

@@ -237,14 +202,14 @@ def _wrap_markdown_tables(text: str) -> str:
 class TelegramAdapter(BasePlatformAdapter):
    """
    Telegram bot adapter.
-
+    
    Handles:
    - Receiving messages from users and groups
    - Sending responses with Telegram markdown
    - Forum topics (thread_id support)
    - Media messages
    """
-
+    
    # Telegram message limits
    MAX_MESSAGE_LENGTH = 4096
    # Threshold for detecting Telegram client-side message splits.
@@ -252,7 +217,7 @@ class TelegramAdapter(BasePlatformAdapter):
    _SPLIT_THRESHOLD = 4000
    MEDIA_GROUP_WAIT_SECONDS = 0.8
    _GENERAL_TOPIC_THREAD_ID = "1"
-
+    
    def __init__(self, config: PlatformConfig):
        super().__init__(config, Platform.TELEGRAM)
        self._app: Optional[Application] = None
@@ -286,57 +251,15 @@ class TelegramAdapter(BasePlatformAdapter):
        self._model_picker_state: Dict[str, dict] = {}
        # Approval button state: message_id → session_key
        self._approval_state: Dict[int, str] = {}
-        # Slash-confirm button state: confirm_id → session_key (for /reload-mcp
-        # and any other slash-confirm prompts; see GatewayRunner._request_slash_confirm).
-        self._slash_confirm_state: Dict[str, str] = {}

-    def _is_callback_user_authorized(
-        self,
-        user_id: str,
-        *,
-        chat_id: Optional[str] = None,
-        chat_type: Optional[str] = None,
-        thread_id: Optional[str] = None,
-        user_name: Optional[str] = None,
-    ) -> bool:
+    @staticmethod
+    def _is_callback_user_authorized(user_id: str) -> bool:
        """Return whether a Telegram inline-button caller may perform gated actions."""
-        normalized_user_id = str(user_id or "").strip()
-        if not normalized_user_id:
-            return False
-
-        runner = getattr(getattr(self, "_message_handler", None), "__self__", None)
-        auth_fn = getattr(runner, "_is_user_authorized", None)
-        if callable(auth_fn):
-            try:
-                from gateway.session import SessionSource
-
-                normalized_chat_type = str(chat_type or "dm").strip().lower() or "dm"
-                if normalized_chat_type == "private":
-                    normalized_chat_type = "dm"
-                elif normalized_chat_type == "supergroup":
-                    normalized_chat_type = "forum" if thread_id is not None else "group"
-
-                source = SessionSource(
-                    platform=Platform.TELEGRAM,
-                    chat_id=str(chat_id or normalized_user_id),
-                    chat_type=normalized_chat_type,
-                    user_id=normalized_user_id,
-                    user_name=str(user_name).strip() if user_name else None,
-                    thread_id=str(thread_id) if thread_id is not None else None,
-                )
-                return bool(auth_fn(source))
-            except Exception:
-                logger.debug(
-                    "[Telegram] Falling back to env-only callback auth for user %s",
-                    normalized_user_id,
-                    exc_info=True,
-                )
-
        allowed_csv = os.getenv("TELEGRAM_ALLOWED_USERS", "").strip()
        if not allowed_csv:
            return True
        allowed_ids = {uid.strip() for uid in allowed_csv.split(",") if uid.strip()}
-        return "*" in allowed_ids or normalized_user_id in allowed_ids
+        return "*" in allowed_ids or user_id in allowed_ids

    @classmethod
    def _metadata_thread_id(cls, metadata: Optional[Dict[str, Any]]) -> Optional[str]:
@@ -411,49 +334,6 @@ class TelegramAdapter(BasePlatformAdapter):
            return {"link_preview_options": LinkPreviewOptions(is_disabled=True)}
        return {"disable_web_page_preview": True}

-    async def _drain_polling_connections(self) -> None:
-        """Reset the httpx connection pool used for getUpdates polling.
-
-        Network errors (especially through proxies like sing-box) can leave
-        httpx connections in a half-closed state that still occupy pool slots.
-        After enough reconnect cycles the pool fills up entirely, causing
-        ``Pool timeout: All connections in the connection pool are occupied.``
-
-        We reset ONLY ``_request[0]`` (the getUpdates request) — the general
-        request (``_request[1]``) is left untouched so concurrent
-        ``send_message`` / ``edit_message`` calls are never interrupted.
-
-        Implementation note: accesses ``Bot._request[0]`` which is the
-        get-updates ``BaseRequest`` in the PTB 22.x internal tuple
-        ``(get_updates_request, general_request)``.  There is no public
-        accessor for the polling request; review if upgrading to PTB 23+.
-        """
-        if not (self._app and self._app.bot):
-            return
-        try:
-            # PTB 22.x: _request is a (get_updates, general) tuple;
-            # no public accessor exists for the polling request.
-            polling_req = self._app.bot._request[0]  # noqa: SLF001
-        except Exception:
-            return
-        try:
-            await polling_req.shutdown()
-        except Exception:
-            logger.debug(
-                "[%s] Polling request shutdown failed (non-fatal)",
-                self.name, exc_info=True,
-            )
-        try:
-            await polling_req.initialize()
-            logger.debug(
-                "[%s] Polling request pool drained before reconnect", self.name
-            )
-        except Exception:
-            logger.debug(
-                "[%s] Polling request re-initialize failed (non-fatal)",
-                self.name, exc_info=True,
-            )
-
    async def _handle_polling_network_error(self, error: Exception) -> None:
        """Reconnect polling after a transient network interruption.

@@ -499,8 +379,6 @@ class TelegramAdapter(BasePlatformAdapter):
        except Exception:
            pass

-        await self._drain_polling_connections()
-
        try:
            await self._app.updater.start_polling(
                allowed_updates=Update.ALL_TYPES,
@@ -548,7 +426,6 @@ class TelegramAdapter(BasePlatformAdapter):
            except Exception:
                pass
            await asyncio.sleep(RETRY_DELAY)
-            await self._drain_polling_connections()
            try:
                await self._app.updater.start_polling(
                    allowed_updates=Update.ALL_TYPES,
@@ -677,7 +554,7 @@ class TelegramAdapter(BasePlatformAdapter):
                        _yaml.dump(config, f, default_flow_style=False, sort_keys=False)
                        f.flush()
                        os.fsync(f.fileno())
-                    atomic_replace(tmp_path, config_path)
+                    os.replace(tmp_path, config_path)
                except BaseException:
                    try:
                        os.unlink(tmp_path)
@@ -761,20 +638,6 @@ class TelegramAdapter(BasePlatformAdapter):
                    # Persist thread_id to config so we don't recreate on next restart
                    self._persist_dm_topic_thread_id(int(chat_id), topic_name, thread_id)

-                    # Send a seed message so the topic is visible in Telegram's client.
-                    # Empty topics are hidden by the client UI until they contain a message.
-                    try:
-                        await self._bot.send_message(
-                            chat_id=int(chat_id),
-                            message_thread_id=thread_id,
-                            text=f"\U0001f4cc {topic_name}",
-                        )
-                    except Exception as seed_err:
-                        logger.debug(
-                            "[%s] Could not send seed message to topic '%s': %s",
-                            self.name, topic_name, seed_err,
-                        )
-
    async def connect(self) -> bool:
        """Connect to Telegram via polling or webhook.

@@ -1050,7 +913,7 @@ class TelegramAdapter(BasePlatformAdapter):
            self._set_fatal_error("telegram_connect_error", message, retryable=True)
            logger.error("[%s] Failed to connect to Telegram: %s", self.name, e, exc_info=True)
            return False
-
+    
    async def disconnect(self) -> None:
        """Stop polling/webhook, cancel pending album flushes, and disconnect."""
        pending_media_group_tasks = list(self._media_group_tasks.values())
@@ -1374,7 +1237,6 @@ class TelegramAdapter(BasePlatformAdapter):
    async def send_update_prompt(
        self, chat_id: str, prompt: str, default: str = "",
        session_key: str = "",
-        metadata: Optional[Dict[str, Any]] = None,
    ) -> SendResult:
        """Send an inline-keyboard update prompt (Yes / No buttons).

@@ -1392,14 +1254,11 @@ class TelegramAdapter(BasePlatformAdapter):
                    InlineKeyboardButton("✗ No", callback_data="update_prompt:n"),
                ]
            ])
-            thread_id = self._metadata_thread_id(metadata)
-            message_thread_id = self._message_thread_id_for_send(thread_id)
            msg = await self._bot.send_message(
                chat_id=int(chat_id),
                text=text,
                parse_mode=ParseMode.MARKDOWN,
                reply_markup=keyboard,
-                message_thread_id=message_thread_id,
                **self._link_preview_kwargs(),
            )
            return SendResult(success=True, message_id=str(msg.message_id))
@@ -1471,48 +1330,6 @@ class TelegramAdapter(BasePlatformAdapter):
            logger.warning("[%s] send_exec_approval failed: %s", self.name, e)
            return SendResult(success=False, error=str(e))

-    async def send_slash_confirm(
-        self, chat_id: str, title: str, message: str, session_key: str,
-        confirm_id: str, metadata: Optional[Dict[str, Any]] = None,
-    ) -> SendResult:
-        """Render a three-button slash-command confirmation prompt."""
-        if not self._bot:
-            return SendResult(success=False, error="Not connected")
-
-        try:
-            # Message body: render as plain text (message already contains
-            # markdown formatting from the gateway primitive).
-            preview = message if len(message) <= 3800 else message[:3800] + "..."
-
-            keyboard = InlineKeyboardMarkup([
-                [
-                    InlineKeyboardButton("✅ Approve Once", callback_data=f"sc:once:{confirm_id}"),
-                    InlineKeyboardButton("🔒 Always Approve", callback_data=f"sc:always:{confirm_id}"),
-                ],
-                [
-                    InlineKeyboardButton("❌ Cancel", callback_data=f"sc:cancel:{confirm_id}"),
-                ],
-            ])
-
-            thread_id = self._metadata_thread_id(metadata)
-            kwargs: Dict[str, Any] = {
-                "chat_id": int(chat_id),
-                "text": preview,
-                "parse_mode": ParseMode.MARKDOWN,
-                "reply_markup": keyboard,
-                **self._link_preview_kwargs(),
-            }
-            message_thread_id = self._message_thread_id_for_send(thread_id)
-            if message_thread_id is not None:
-                kwargs["message_thread_id"] = message_thread_id
-
-            msg = await self._bot.send_message(**kwargs)
-            self._slash_confirm_state[confirm_id] = session_key
-            return SendResult(success=True, message_id=str(msg.message_id))
-        except Exception as e:
-            logger.warning("[%s] send_slash_confirm failed: %s", self.name, e)
-            return SendResult(success=False, error=str(e))
-
    async def send_model_picker(
        self,
        chat_id: str,
@@ -1817,12 +1634,6 @@ class TelegramAdapter(BasePlatformAdapter):
        if not query or not query.data:
            return
        data = query.data
-        query_message = getattr(query, "message", None)
-        query_chat_id = getattr(query_message, "chat_id", None)
-        query_chat = getattr(query_message, "chat", None)
-        query_chat_type = getattr(query_chat, "type", None)
-        query_thread_id = getattr(query_message, "message_thread_id", None)
-        query_user_name = getattr(query.from_user, "first_name", None)

        # --- Model picker callbacks ---
        if data.startswith(("mp:", "mm:", "mb", "mx", "mg:")):
@@ -1844,13 +1655,7 @@ class TelegramAdapter(BasePlatformAdapter):

                # Only authorized users may click approval buttons.
                caller_id = str(getattr(query.from_user, "id", ""))
-                if not self._is_callback_user_authorized(
-                    caller_id,
-                    chat_id=query_chat_id,
-                    chat_type=str(query_chat_type) if query_chat_type is not None else None,
-                    thread_id=str(query_thread_id) if query_thread_id is not None else None,
-                    user_name=query_user_name,
-                ):
+                if not self._is_callback_user_authorized(caller_id):
                    await query.answer(text="⛔ You are not authorized to approve commands.")
                    return

@@ -1893,86 +1698,12 @@ class TelegramAdapter(BasePlatformAdapter):
                    logger.error("Failed to resolve gateway approval from Telegram button: %s", exc)
            return

-        # --- Slash-confirm callbacks (sc:choice:confirm_id) ---
-        if data.startswith("sc:"):
-            parts = data.split(":", 2)
-            if len(parts) == 3:
-                choice = parts[1]  # once, always, cancel
-                confirm_id = parts[2]
-
-                caller_id = str(getattr(query.from_user, "id", ""))
-                if not self._is_callback_user_authorized(
-                    caller_id,
-                    chat_id=query_chat_id,
-                    chat_type=str(query_chat_type) if query_chat_type is not None else None,
-                    thread_id=str(query_thread_id) if query_thread_id is not None else None,
-                    user_name=query_user_name,
-                ):
-                    await query.answer(text="⛔ You are not authorized to answer this prompt.")
-                    return
-
-                session_key = self._slash_confirm_state.pop(confirm_id, None)
-                if not session_key:
-                    await query.answer(text="This prompt has already been resolved.")
-                    return
-
-                label_map = {
-                    "once": "✅ Approved once",
-                    "always": "🔒 Always approve",
-                    "cancel": "❌ Cancelled",
-                }
-                user_display = getattr(query.from_user, "first_name", "User")
-                label = label_map.get(choice, "Resolved")
-
-                await query.answer(text=label)
-
-                try:
-                    await query.edit_message_text(
-                        text=f"{label} by {user_display}",
-                        parse_mode=ParseMode.MARKDOWN,
-                        reply_markup=None,
-                    )
-                except Exception:
-                    pass
-
-                # Resolve via the module-level primitive.  The runner stored
-                # a handler keyed by session_key; we run it on the event
-                # loop and (if it returns a string) send it as a follow-up
-                # message in the same chat.
-                try:
-                    from tools import slash_confirm as _slash_confirm_mod
-                    result_text = await _slash_confirm_mod.resolve(
-                        session_key, confirm_id, choice,
-                    )
-                    if result_text and query.message:
-                        # Inherit the prompt message's thread so the reply
-                        # lands in the same supergroup topic / reply chain.
-                        thread_id = getattr(query.message, "message_thread_id", None)
-                        send_kwargs: Dict[str, Any] = {
-                            "chat_id": int(query.message.chat_id),
-                            "text": result_text,
-                            "parse_mode": ParseMode.MARKDOWN,
-                            **self._link_preview_kwargs(),
-                        }
-                        if thread_id is not None:
-                            send_kwargs["message_thread_id"] = thread_id
-                        await self._bot.send_message(**send_kwargs)
-                except Exception as exc:
-                    logger.error("[%s] slash-confirm callback failed: %s", self.name, exc, exc_info=True)
-            return
-
        # --- Update prompt callbacks ---
        if not data.startswith("update_prompt:"):
            return
        answer = data.split(":", 1)[1]  # "y" or "n"
        caller_id = str(getattr(query.from_user, "id", ""))
-        if not self._is_callback_user_authorized(
-            caller_id,
-            chat_id=query_chat_id,
-            chat_type=str(query_chat_type) if query_chat_type is not None else None,
-            thread_id=str(query_thread_id) if query_thread_id is not None else None,
-            user_name=query_user_name,
-        ):
+        if not self._is_callback_user_authorized(caller_id):
            await query.answer(text="⛔ You are not authorized to answer update prompts.")
            return
        await query.answer(text=f"Sent '{answer}' to the update process.")
@@ -2032,9 +1763,8 @@ class TelegramAdapter(BasePlatformAdapter):
                return SendResult(success=False, error=self._missing_media_path_error("Audio", audio_path))
            
            with open(audio_path, "rb") as audio_file:
-                ext = os.path.splitext(audio_path)[1].lower()
-                # .ogg / .opus files -> send as voice (round playable bubble)
-                if ext in (".ogg", ".opus"):
+                # .ogg files -> send as voice (round playable bubble)
+                if audio_path.endswith((".ogg", ".opus")):
                    _voice_thread = self._metadata_thread_id(metadata)
                    msg = await self._bot.send_voice(
                        chat_id=int(chat_id),
@@ -2043,8 +1773,8 @@ class TelegramAdapter(BasePlatformAdapter):
                        reply_to_message_id=int(reply_to) if reply_to else None,
                        message_thread_id=self._message_thread_id_for_send(_voice_thread),
                    )
-                elif ext in (".mp3", ".m4a"):
-                    # Telegram's Bot API sendAudio only accepts MP3 / M4A.
+                else:
+                    # .mp3 and others -> send as audio file
                    _audio_thread = self._metadata_thread_id(metadata)
                    msg = await self._bot.send_audio(
                        chat_id=int(chat_id),
@@ -2053,16 +1783,6 @@ class TelegramAdapter(BasePlatformAdapter):
                        reply_to_message_id=int(reply_to) if reply_to else None,
                        message_thread_id=self._message_thread_id_for_send(_audio_thread),
                    )
-                else:
-                    # Formats Telegram can't play natively (.wav, .flac, ...)
-                    # — fall back to document delivery instead of raising.
-                    return await self.send_document(
-                        chat_id=chat_id,
-                        file_path=audio_path,
-                        caption=caption,
-                        reply_to=reply_to,
-                        metadata=metadata,
-                    )
            return SendResult(success=True, message_id=str(msg.message_id))
        except Exception as e:
            logger.error(
@@ -2072,118 +1792,7 @@ class TelegramAdapter(BasePlatformAdapter):
                exc_info=True,
            )
            return await super().send_voice(chat_id, audio_path, caption, reply_to)
-
-    async def send_multiple_images(
-        self,
-        chat_id: str,
-        images: List[tuple],
-        metadata: Optional[Dict[str, Any]] = None,
-        human_delay: float = 0.0,
-    ) -> None:
-        """Send a batch of images natively via Telegram's media group API.
-
-        Telegram's ``send_media_group`` bundles up to 10 photos/videos into
-        a single album. Larger batches are chunked. Animated GIFs cannot
-        go into a media group (they require ``send_animation``), so they
-        are peeled off and sent individually via the base default path.
-
-        URL-based photos go into the group directly; local files are
-        opened as byte streams. On failure the whole batch falls back to
-        the base adapter's per-image loop.
-        """
-        if not self._bot:
-            return
-        if not images:
-            return
-
-        try:
-            from telegram import InputMediaPhoto
-        except Exception as exc:  # pragma: no cover - missing SDK
-            logger.warning(
-                "[%s] InputMediaPhoto unavailable, falling back to per-image send: %s",
-                self.name, exc,
-            )
-            await super().send_multiple_images(chat_id, images, metadata, human_delay)
-            return
-
-        # Peel off animations — they need send_animation, not send_media_group
-        animations: List[tuple] = []
-        photos: List[tuple] = []
-        for image_url, alt_text in images:
-            if not image_url.startswith("file://") and self._is_animation_url(image_url):
-                animations.append((image_url, alt_text))
-            else:
-                photos.append((image_url, alt_text))
-
-        # Animations: route through the base default (per-image send_animation)
-        if animations:
-            await super().send_multiple_images(
-                chat_id, animations, metadata, human_delay=human_delay,
-            )
-
-        if not photos:
-            return
-
-        from urllib.parse import unquote as _unquote
-        _thread = self._metadata_thread_id(metadata)
-        _thread_id = self._message_thread_id_for_send(_thread)
-
-        # Chunk into groups of 10 (Telegram's album limit)
-        CHUNK = 10
-        chunks = [photos[i:i + CHUNK] for i in range(0, len(photos), CHUNK)]
-
-        for chunk_idx, chunk in enumerate(chunks):
-            if human_delay > 0 and chunk_idx > 0:
-                await asyncio.sleep(human_delay)
-
-            media: List[Any] = []
-            opened_files: List[Any] = []
-            try:
-                for image_url, alt_text in chunk:
-                    caption = alt_text[:1024] if alt_text else None
-                    if image_url.startswith("file://"):
-                        local_path = _unquote(image_url[7:])
-                        if not os.path.exists(local_path):
-                            logger.warning(
-                                "[%s] Skipping missing image in media group: %s",
-                                self.name, local_path,
-                            )
-                            continue
-                        fh = open(local_path, "rb")
-                        opened_files.append(fh)
-                        media.append(InputMediaPhoto(media=fh, caption=caption))
-                    else:
-                        media.append(InputMediaPhoto(media=image_url, caption=caption))
-
-                if not media:
-                    continue
-
-                logger.info(
-                    "[%s] Sending media group of %d photo(s) (chunk %d/%d)",
-                    self.name, len(media), chunk_idx + 1, len(chunks),
-                )
-                await self._bot.send_media_group(
-                    chat_id=int(chat_id),
-                    media=media,
-                    message_thread_id=_thread_id,
-                )
-            except Exception as e:
-                logger.warning(
-                    "[%s] send_media_group failed (chunk %d/%d), falling back to per-image: %s",
-                    self.name, chunk_idx + 1, len(chunks), e,
-                    exc_info=True,
-                )
-                # Fallback: send each photo in this chunk individually
-                await super().send_multiple_images(
-                    chat_id, chunk, metadata, human_delay=human_delay,
-                )
-            finally:
-                for fh in opened_files:
-                    try:
-                        fh.close()
-                    except Exception:
-                        pass
-
+    
    async def send_image_file(
        self,
        chat_id: str,
@@ -2350,7 +1959,7 @@ class TelegramAdapter(BasePlatformAdapter):
                )
                # Final fallback: send URL as text
                return await super().send_image(chat_id, image_url, caption, reply_to)
-
+    
    async def send_animation(
        self,
        chat_id: str,
@@ -2412,7 +2021,7 @@ class TelegramAdapter(BasePlatformAdapter):
                    e,
                    exc_info=True,
                )
-
+    
    async def get_chat_info(self, chat_id: str) -> Dict[str, Any]:
        """Get information about a Telegram chat."""
        if not self._bot:
@@ -2446,7 +2055,7 @@ class TelegramAdapter(BasePlatformAdapter):
                exc_info=True,
            )
            return {"name": str(chat_id), "type": "dm", "error": str(e)}
-
+    
    def format_message(self, content: str) -> str:
        """
        Convert standard markdown to Telegram MarkdownV2 format.
@@ -2471,8 +2080,10 @@ class TelegramAdapter(BasePlatformAdapter):

        text = content

-        # 0) Rewrite GFM-style pipe tables into Telegram-friendly row groups
-        #    before the normal MarkdownV2 conversions run.
+        # 0) Pre-wrap GFM-style pipe tables in ``` fences.  Telegram can't
+        #    render tables natively, but fenced code blocks render as
+        #    monospace preformatted text with columns intact.  The wrapped
+        #    tables then flow through step (1) below as protected regions.
        text = _wrap_markdown_tables(text)

        # 1) Protect fenced code blocks (``` ... ```)
@@ -2618,7 +2229,7 @@ class TelegramAdapter(BasePlatformAdapter):
        text = ''.join(_safe_parts)

        return text
-
+    
    # ── Group mention gating ──────────────────────────────────────────────

    def _telegram_require_mention(self) -> bool:
@@ -2833,7 +2444,7 @@ class TelegramAdapter(BasePlatformAdapter):
        event = self._build_message_event(update.message, MessageType.TEXT, update_id=update.update_id)
        event.text = self._clean_bot_trigger_text(event.text)
        self._enqueue_text_event(event)
-
+    
    async def _handle_command(self, update: Update, context: ContextTypes.DEFAULT_TYPE) -> None:
        """Handle incoming command messages."""
        if not update.message or not update.message.text:
@@ -2843,7 +2454,7 @@ class TelegramAdapter(BasePlatformAdapter):
        
        event = self._build_message_event(update.message, MessageType.COMMAND, update_id=update.update_id)
        await self.handle_message(event)
-
+    
    async def _handle_location_message(self, update: Update, context: ContextTypes.DEFAULT_TYPE) -> None:
        """Handle incoming location/venue pin messages."""
        if not update.message:
@@ -3201,7 +2812,7 @@ class TelegramAdapter(BasePlatformAdapter):
            return

        await self.handle_message(event)
-
+    
    async def _queue_media_group_event(self, media_group_id: str, event: MessageEvent) -> None:
        """Buffer Telegram media-group items so albums arrive as one logical event.

@@ -202,22 +202,26 @@ class WebhookAdapter(BasePlatformAdapter):
        if deliver_type == "github_comment":
            return await self._deliver_github_comment(content, delivery)

-        # Cross-platform delivery — any platform with a gateway adapter.
-        # Check both built-in names and plugin-registered platforms.
-        _BUILTIN_DELIVER_PLATFORMS = {
-            "telegram", "discord", "slack", "signal", "sms", "whatsapp",
-            "matrix", "mattermost", "homeassistant", "email", "dingtalk",
-            "feishu", "wecom", "wecom_callback", "weixin", "bluebubbles",
-            "qqbot", "yuanbao",
-        }
-        _is_known_platform = deliver_type in _BUILTIN_DELIVER_PLATFORMS
-        if not _is_known_platform:
-            try:
-                from gateway.platform_registry import platform_registry
-                _is_known_platform = platform_registry.is_registered(deliver_type)
-            except Exception:
-                pass
-        if self.gateway_runner and _is_known_platform:
+        # Cross-platform delivery — any platform with a gateway adapter
+        if self.gateway_runner and deliver_type in (
+            "telegram",
+            "discord",
+            "slack",
+            "signal",
+            "sms",
+            "whatsapp",
+            "matrix",
+            "mattermost",
+            "homeassistant",
+            "email",
+            "dingtalk",
+            "feishu",
+            "wecom",
+            "wecom_callback",
+            "weixin",
+            "bluebubbles",
+            "qqbot",
+        ):
            return await self._deliver_cross_platform(
                deliver_type, content, delivery
            )
@@ -89,21 +89,8 @@ MAX_CONSECUTIVE_FAILURES = 3
 RETRY_DELAY_SECONDS = 2
 BACKOFF_DELAY_SECONDS = 30
 SESSION_EXPIRED_ERRCODE = -14
-RATE_LIMIT_ERRCODE = -2  # iLink frequency limit — backoff and retry
 MESSAGE_DEDUP_TTL_SECONDS = 300

-
-def _is_stale_session_ret(
-    ret: "Optional[int]", errcode: "Optional[int]", errmsg: "Optional[str]",
-) -> bool:
-    """True when iLink returns ret=-2 / errcode=-2 with 'unknown error',
-    which is a stale-session signal (same as errcode=-14) rather than
-    a genuine rate limit."""
-    if ret != RATE_LIMIT_ERRCODE and errcode != RATE_LIMIT_ERRCODE:
-        return False
-    return (errmsg or "").lower() == "unknown error"
-
-
 MEDIA_IMAGE = 1
 MEDIA_VIDEO = 2
 MEDIA_FILE = 3
@@ -1126,7 +1113,7 @@ async def qr_login(
 class WeixinAdapter(BasePlatformAdapter):
    """Native Hermes adapter for Weixin personal accounts."""

-    MAX_MESSAGE_LENGTH = 2000
+    MAX_MESSAGE_LENGTH = 4000

    # WeChat does not support editing sent messages — streaming must use the
    # fallback "send-final-only" path so the cursor (▉) is never left visible.
@@ -1151,10 +1138,10 @@ class WeixinAdapter(BasePlatformAdapter):
            extra.get("cdn_base_url") or os.getenv("WEIXIN_CDN_BASE_URL", WEIXIN_CDN_BASE_URL)
        ).strip().rstrip("/")
        self._send_chunk_delay_seconds = float(
-            extra.get("send_chunk_delay_seconds") or os.getenv("WEIXIN_SEND_CHUNK_DELAY_SECONDS", "1.5")
+            extra.get("send_chunk_delay_seconds") or os.getenv("WEIXIN_SEND_CHUNK_DELAY_SECONDS", "0.35")
        )
        self._send_chunk_retries = int(
-            extra.get("send_chunk_retries") or os.getenv("WEIXIN_SEND_CHUNK_RETRIES", "4")
+            extra.get("send_chunk_retries") or os.getenv("WEIXIN_SEND_CHUNK_RETRIES", "2")
        )
        self._send_chunk_retry_delay_seconds = float(
            extra.get("send_chunk_retry_delay_seconds")
@@ -1222,17 +1209,6 @@ class WeixinAdapter(BasePlatformAdapter):
        self._mark_connected()
        _LIVE_ADAPTERS[self._token] = self
        logger.info("[%s] Connected account=%s base=%s", self.name, _safe_id(self._account_id), self._base_url)
-        if self._group_policy != "disabled":
-            logger.warning(
-                "[%s] WEIXIN_GROUP_POLICY=%s is set, but QR-login connects an iLink bot "
-                "identity (e.g. ...@im.bot) which typically cannot be invited into ordinary "
-                "WeChat groups. iLink usually does not deliver ordinary-group events for "
-                "these accounts, so group messages may never reach Hermes regardless of this "
-                "policy. If group delivery doesn't work, the limitation is on the iLink side, "
-                "not in Hermes.",
-                self.name,
-                self._group_policy,
-            )
        return True

    async def disconnect(self) -> None:
@@ -1277,8 +1253,7 @@ class WeixinAdapter(BasePlatformAdapter):
                ret = response.get("ret", 0)
                errcode = response.get("errcode", 0)
                if ret not in (0, None) or errcode not in (0, None):
-                    if (ret == SESSION_EXPIRED_ERRCODE or errcode == SESSION_EXPIRED_ERRCODE
-                            or _is_stale_session_ret(ret, errcode, response.get("errmsg"))):
+                    if ret == SESSION_EXPIRED_ERRCODE or errcode == SESSION_EXPIRED_ERRCODE:
                        logger.error("[%s] Session expired; pausing for 10 minutes", self.name)
                        await asyncio.sleep(600)
                        consecutive_failures = 0
@@ -1543,7 +1518,6 @@ class WeixinAdapter(BasePlatformAdapter):
                        is_session_expired = (
                            ret == SESSION_EXPIRED_ERRCODE
                            or errcode == SESSION_EXPIRED_ERRCODE
-                            or _is_stale_session_ret(ret, errcode, resp.get("errmsg"))
                        )
                        # Session expired — strip token and retry once
                        if is_session_expired and not retried_without_token and context_token:
@@ -1557,28 +1531,6 @@ class WeixinAdapter(BasePlatformAdapter):
                                self.name, _safe_id(chat_id),
                            )
                            continue
-                        # Rate limit (-2) — backoff and retry
-                        is_rate_limited = (
-                            ret == RATE_LIMIT_ERRCODE
-                            or errcode == RATE_LIMIT_ERRCODE
-                        )
-                        if is_rate_limited:
-                            errmsg = resp.get("errmsg") or resp.get("msg") or "rate limited"
-                            # Record the error so we raise a descriptive
-                            # RuntimeError (instead of AssertionError) if the
-                            # loop exhausts with the server still rate-limiting.
-                            last_error = RuntimeError(
-                                f"iLink sendmessage rate limited: ret={ret} errcode={errcode} errmsg={errmsg}"
-                            )
-                            if attempt >= self._send_chunk_retries:
-                                break
-                            wait = self._send_chunk_retry_delay_seconds * 3  # 3x backoff for rate limit
-                            logger.warning(
-                                "[%s] rate limited for %s; backing off %.1fs before retry",
-                                self.name, _safe_id(chat_id), wait,
-                            )
-                            await asyncio.sleep(wait)
-                            continue
                        errmsg = resp.get("errmsg") or resp.get("msg") or "unknown error"
                        raise RuntimeError(
                            f"iLink sendmessage error: ret={ret} errcode={errcode} errmsg={errmsg}"
@@ -1620,7 +1572,7 @@ class WeixinAdapter(BasePlatformAdapter):
        _, image_cleaned = self.extract_images(cleaned_content)
        local_files, final_content = self.extract_local_files(image_cleaned)

-        _AUDIO_EXTS = {".ogg", ".opus", ".mp3", ".wav", ".m4a", ".flac"}
+        _AUDIO_EXTS = {".ogg", ".opus", ".mp3", ".wav", ".m4a"}
        _VIDEO_EXTS = {".mp4", ".mov", ".avi", ".mkv", ".webm", ".3gp"}
        _IMAGE_EXTS = {".jpg", ".jpeg", ".png", ".webp", ".gif"}

@@ -90,7 +90,7 @@ from gateway.platforms.yuanbao_proto import (
    encode_get_group_member_list,
    next_seq_no,
 )
-from gateway.session import build_session_key
+from gateway.session import SessionSource, build_session_key

 logger = logging.getLogger(__name__)

@@ -1896,12 +1896,10 @@ class OwnerCommandMiddleware(InboundMiddleware):
        if cmd not in cls.ALLOWLIST:
            return None, None, False

-        # Sender identity check: bot owner <-> push.from_account == push.bot_owner_id.
-        # The allowlisted commands (/approve, /deny, /stop, /reset, ...) are
-        # privileged — leaking them to non-owners lets any group member approve
-        # a dangerous tool call, kill the owner's task, or wipe session state.
-        owner_id = str((push or {}).get("bot_owner_id") or "").strip()
-        is_owner = bool(owner_id) and owner_id == from_account
+        # Sender identity check: bot owner <-> push.from_account == push.bot_owner_id
+        owner_id = (push or {}).get("bot_owner_id") or ""
+        # is_owner = bool(owner_id) and owner_id == from_account
+        is_owner = True
        return cmd, cmd_line, is_owner

    async def handle(self, ctx: InboundContext, next_fn) -> None:
@@ -21,10 +21,12 @@ import hashlib
 import hmac
 import logging
 import os
+import re
 import secrets
 import struct
 import time
 import urllib.parse
+from datetime import datetime, timezone, timedelta
 from typing import Optional, Any

 import httpx
@@ -19,8 +19,9 @@ yuanbao_proto.py - Yuanbao WebSocket 协议编解码（纯 Python 实现）
 from __future__ import annotations

 import logging
+import struct
 import threading
-from typing import Optional
+from typing import Optional, Union

 logger = logging.getLogger(__name__)

@@ -1,150 +0,0 @@
-"""Gateway runtime-metadata footer.
-
-Renders a compact footer showing runtime state (model, context %, cwd) and
-appends it to the FINAL message of an agent turn when enabled.  Off by default
-to keep replies minimal.
-
-Config (``~/.hermes/config.yaml``)::
-
-    display:
-      runtime_footer:
-        enabled: true                       # off by default
-        fields: [model, context_pct, cwd]   # order shown; drop any to hide
-
-Per-platform overrides live under ``display.platforms.<platform>.runtime_footer``.
-Users can toggle the global setting with ``/footer on|off`` from both the CLI
-and any gateway platform.
-
-The footer is appended to the final response text in ``gateway/run.py`` right
-before returning the response to the adapter send path — so it only lands on
-the final message a user sees, not on tool-progress updates or streaming
-partials.  When streaming is on and the final text has already been delivered
-piecemeal, the footer is sent as a separate trailing message via
-``send_trailing_footer()``.
-"""
-
-from __future__ import annotations
-
-import os
-from pathlib import Path
-from typing import Any, Iterable, Optional
-
-_DEFAULT_FIELDS: tuple[str, ...] = ("model", "context_pct", "cwd")
-_SEP = " · "
-
-
-def _home_relative_cwd(cwd: str) -> str:
-    """Return *cwd* with ``$HOME`` collapsed to ``~``.  Empty string if unset."""
-    if not cwd:
-        return ""
-    try:
-        home = os.path.expanduser("~")
-        p = os.path.abspath(cwd)
-        if home and (p == home or p.startswith(home + os.sep)):
-            return "~" + p[len(home):]
-        return p
-    except Exception:
-        return cwd
-
-
-def _model_short(model: Optional[str]) -> str:
-    """Drop ``vendor/`` prefix for readability (``openai/gpt-5.4`` → ``gpt-5.4``)."""
-    if not model:
-        return ""
-    return model.rsplit("/", 1)[-1]
-
-
-def resolve_footer_config(
-    user_config: dict[str, Any] | None,
-    platform_key: str | None = None,
-) -> dict[str, Any]:
-    """Resolve effective runtime-footer config for *platform_key*.
-
-    Merge order (later wins):
-        1. Built-in defaults (enabled=False)
-        2. ``display.runtime_footer``
-        3. ``display.platforms.<platform_key>.runtime_footer``
-    """
-    resolved = {"enabled": False, "fields": list(_DEFAULT_FIELDS)}
-    cfg = (user_config or {}).get("display") or {}
-
-    global_cfg = cfg.get("runtime_footer")
-    if isinstance(global_cfg, dict):
-        if "enabled" in global_cfg:
-            resolved["enabled"] = bool(global_cfg.get("enabled"))
-        if isinstance(global_cfg.get("fields"), list) and global_cfg["fields"]:
-            resolved["fields"] = [str(f) for f in global_cfg["fields"]]
-
-    if platform_key:
-        platforms = cfg.get("platforms") or {}
-        plat_cfg = platforms.get(platform_key)
-        if isinstance(plat_cfg, dict):
-            plat_footer = plat_cfg.get("runtime_footer")
-            if isinstance(plat_footer, dict):
-                if "enabled" in plat_footer:
-                    resolved["enabled"] = bool(plat_footer.get("enabled"))
-                if isinstance(plat_footer.get("fields"), list) and plat_footer["fields"]:
-                    resolved["fields"] = [str(f) for f in plat_footer["fields"]]
-
-    return resolved
-
-
-def format_runtime_footer(
-    *,
-    model: Optional[str],
-    context_tokens: int,
-    context_length: Optional[int],
-    cwd: Optional[str] = None,
-    fields: Iterable[str] = _DEFAULT_FIELDS,
-) -> str:
-    """Render the footer line, or return "" if no fields have data.
-
-    Fields are skipped silently when their underlying data is missing — a
-    partially-populated footer is better than a line with ``?%`` or empty slots.
-    """
-    parts: list[str] = []
-    for field in fields:
-        if field == "model":
-            m = _model_short(model)
-            if m:
-                parts.append(m)
-        elif field == "context_pct":
-            if context_length and context_length > 0 and context_tokens >= 0:
-                pct = max(0, min(100, round((context_tokens / context_length) * 100)))
-                parts.append(f"{pct}%")
-        elif field == "cwd":
-            rel = _home_relative_cwd(cwd or os.environ.get("TERMINAL_CWD", ""))
-            if rel:
-                parts.append(rel)
-        # Unknown field names are silently ignored.
-
-    if not parts:
-        return ""
-    return _SEP.join(parts)
-
-
-def build_footer_line(
-    *,
-    user_config: dict[str, Any] | None,
-    platform_key: str | None,
-    model: Optional[str],
-    context_tokens: int,
-    context_length: Optional[int],
-    cwd: Optional[str] = None,
-) -> str:
-    """Top-level entry point used by gateway/run.py.
-
-    Returns the footer text (empty string when disabled or no data).  Callers
-    append this to the final response themselves, preserving a single blank
-    line of separation.
-    """
-    cfg = resolve_footer_config(user_config, platform_key)
-    if not cfg.get("enabled"):
-        return ""
-    return format_runtime_footer(
-        model=model,
-        context_tokens=context_tokens,
-        context_length=context_length,
-        cwd=cwd,
-        fields=cfg.get("fields") or _DEFAULT_FIELDS,
-    )
@@ -62,9 +62,8 @@ from .config import (
 )
 from .whatsapp_identity import (
    canonical_whatsapp_identifier,
-    normalize_whatsapp_identifier,  # noqa: F401 - re-exported for gateway.session callers
+    normalize_whatsapp_identifier,
 )
-from utils import atomic_replace


@dataclass
@@ -235,7 +234,7 @@ def build_session_context_prompt(
 ) -> str:
    """
    Build the dynamic system prompt section that tells the agent about its context.
-
+    
    This is injected into the system prompt so the agent knows:
    - Where messages are coming from
    - What platforms are connected
@@ -247,23 +246,13 @@ def build_session_context_prompt(
    Platforms like Discord are excluded because mentions need real IDs.
    Routing still uses the original values (they stay in SessionSource).
    """
-    # Only apply redaction on platforms where IDs aren't needed for mentions.
-    # Check both the hardcoded set (builtins) and the plugin registry.
-    _is_pii_safe = context.source.platform in _PII_SAFE_PLATFORMS
-    if not _is_pii_safe:
-        try:
-            from gateway.platform_registry import platform_registry
-            entry = platform_registry.get(context.source.platform.value)
-            if entry and entry.pii_safe:
-                _is_pii_safe = True
-        except Exception:
-            pass
-    redact_pii = redact_pii and _is_pii_safe
+    # Only apply redaction on platforms where IDs aren't needed for mentions
+    redact_pii = redact_pii and context.source.platform in _PII_SAFE_PLATFORMS
    lines = [
        "## Current Session Context",
        "",
    ]
-
+    
    # Source info
    platform_name = context.source.platform.value.title()
    if context.source.platform == Platform.LOCAL:
@@ -288,7 +277,7 @@ def build_session_context_prompt(
        else:
            desc = src.description
        lines.append(f"**Source:** {platform_name} ({desc})")
-
+    
    # Channel topic (if available - provides context about the channel's purpose)
    if context.source.chat_topic:
        lines.append(f"**Channel Topic:** {context.source.chat_topic}")
@@ -313,7 +302,7 @@ def build_session_context_prompt(
        if redact_pii:
            uid = _hash_sender_id(uid)
        lines.append(f"**User ID:** {uid}")
-
+    
    # Platform-specific behavioral notes
    if context.source.platform == Platform.SLACK:
        lines.append("")
@@ -379,9 +368,9 @@ def build_session_context_prompt(
    for p in context.connected_platforms:
        if p != Platform.LOCAL:
            platforms_list.append(f"{p.value}: Connected ✓")
-
+    
    lines.append(f"**Connected Platforms:** {', '.join(platforms_list)}")
-
+    
    # Home channels
    if context.home_channels:
        lines.append("")
@@ -389,11 +378,11 @@ def build_session_context_prompt(
        for platform, home in context.home_channels.items():
            hc_id = _hash_chat_id(home.chat_id) if redact_pii else home.chat_id
            lines.append(f"  - {platform.value}: {home.name} (ID: {hc_id})")
-
+    
    # Delivery options for scheduled tasks
    lines.append("")
    lines.append("**Delivery options for scheduled tasks:**")
-
+    
    from hermes_constants import display_hermes_home

    # Origin delivery
@@ -409,15 +398,15 @@ def build_session_context_prompt(
    lines.append(
        f"- `\"local\"` → Save to local files only ({display_hermes_home()}/cron/output/)"
    )
-
+    
    # Platform home channels
    for platform, home in context.home_channels.items():
        lines.append(f"- `\"{platform.value}\"` → Home channel ({home.name})")
-
+    
    # Note about explicit targeting
    lines.append("")
    lines.append("*For explicit targeting, use `\"platform:chat_id\"` format if the user provides a specific chat ID.*")
-
+    
    return "\n".join(lines)


@@ -458,15 +447,6 @@ class SessionEntry:
    was_auto_reset: bool = False
    auto_reset_reason: Optional[str] = None  # "idle" or "daily"
    reset_had_activity: bool = False  # whether the expired session had any messages
-
-    # Set by reset_session() when the user explicitly sends /new or /reset.
-    # Consumed once by _handle_message_with_agent to trigger topic/channel
-    # skill re-injection on the first message of the new session.  We can't
-    # reuse was_auto_reset for this because that flag fires the "session
-    # expired due to inactivity" user-facing notice and a misleading
-    # context-note prepend — both wrong for an explicit manual reset.
-    # See issue #6508.
-    is_fresh_reset: bool = False
    
    # Set by the background expiry watcher after it finalizes an expired
    # session (invoking on_session_finalize hooks and evicting the cached
@@ -517,7 +497,6 @@ class SessionEntry:
                if self.last_resume_marked_at
                else None
            ),
-            "is_fresh_reset": self.is_fresh_reset,
        }
        if self.origin:
            result["origin"] = self.origin.to_dict()
@@ -566,7 +545,6 @@ class SessionEntry:
            resume_pending=data.get("resume_pending", False),
            resume_reason=data.get("resume_reason"),
            last_resume_marked_at=last_resume_marked_at,
-            is_fresh_reset=data.get("is_fresh_reset", False),
        )


@@ -727,7 +705,7 @@ class SessionStore:
                json.dump(data, f, indent=2)
                f.flush()
                os.fsync(f.fileno())
-            atomic_replace(tmp_path, sessions_file)
+            os.replace(tmp_path, sessions_file)
        except BaseException:
            try:
                os.unlink(tmp_path)
@@ -1143,7 +1121,6 @@ class SessionStore:
                display_name=old_entry.display_name,
                platform=old_entry.platform,
                chat_type=old_entry.chat_type,
-                is_fresh_reset=True,
            )

            self._entries[session_key] = new_entry
@@ -1280,11 +1257,25 @@ class SessionStore:
        Used by /retry, /undo, and /compress to persist modified conversation history.
        Rewrites both SQLite and legacy JSONL storage.
        """
-        # SQLite: replace atomically so a mid-rewrite failure doesn't leave
-        # the session half-empty in the DB while JSONL still has history.
+        # SQLite: clear old messages and re-insert
        if self._db:
            try:
-                self._db.replace_messages(session_id, messages)
+                self._db.clear_messages(session_id)
+                for msg in messages:
+                    role = msg.get("role", "unknown")
+                    self._db.append_message(
+                        session_id=session_id,
+                        role=role,
+                        content=msg.get("content"),
+                        tool_name=msg.get("tool_name"),
+                        tool_calls=msg.get("tool_calls"),
+                        tool_call_id=msg.get("tool_call_id"),
+                        reasoning=msg.get("reasoning") if role == "assistant" else None,
+                        reasoning_content=msg.get("reasoning_content") if role == "assistant" else None,
+                        reasoning_details=msg.get("reasoning_details") if role == "assistant" else None,
+                        codex_reasoning_items=msg.get("codex_reasoning_items") if role == "assistant" else None,
+                        codex_message_items=msg.get("codex_message_items") if role == "assistant" else None,
+                    )
            except Exception as e:
                logger.debug("Failed to rewrite transcript in DB: %s", e)
        
@@ -21,7 +21,6 @@ from datetime import datetime, timezone
 from pathlib import Path
 from hermes_constants import get_hermes_home
 from typing import Any, Optional
-from utils import atomic_json_write

 if sys.platform == "win32":
    import msvcrt
@@ -35,10 +34,6 @@ _IS_WINDOWS = sys.platform == "win32"
 _UNSET = object()
 _GATEWAY_LOCK_FILENAME = "gateway.lock"
 _gateway_lock_handle = None
-# Windows byte-range locks are mandatory for other readers. Lock a byte well
-# past the JSON payload so runtime status / PID readers can still read the file
-# while another process holds the mutual-exclusion lock.
-_WINDOWS_LOCK_OFFSET = 1024 * 1024


 def _get_pid_path() -> Path:
@@ -210,7 +205,8 @@ def _read_json_file(path: Path) -> Optional[dict[str, Any]]:


 def _write_json_file(path: Path, payload: dict[str, Any]) -> None:
-    atomic_json_write(path, payload, indent=None, separators=(",", ":"))
+    path.parent.mkdir(parents=True, exist_ok=True)
+    path.write_text(json.dumps(payload))


 def _read_pid_record(pid_path: Optional[Path] = None) -> Optional[dict]:
@@ -290,7 +286,7 @@ def _try_acquire_file_lock(handle) -> bool:
            if handle.tell() == 0:
                handle.write("\n")
                handle.flush()
-            handle.seek(_WINDOWS_LOCK_OFFSET)
+            handle.seek(0)
            msvcrt.locking(handle.fileno(), msvcrt.LK_NBLCK, 1)
        else:
            fcntl.flock(handle.fileno(), fcntl.LOCK_EX | fcntl.LOCK_NB)
@@ -302,7 +298,7 @@ def _try_acquire_file_lock(handle) -> bool:
 def _release_file_lock(handle) -> None:
    try:
        if _IS_WINDOWS:
-            handle.seek(_WINDOWS_LOCK_OFFSET)
+            handle.seek(0)
            msvcrt.locking(handle.fileno(), msvcrt.LK_UNLCK, 1)
        else:
            fcntl.flock(handle.fileno(), fcntl.LOCK_UN)
@@ -91,20 +91,11 @@ class GatewayStreamConsumer:
        chat_id: str,
        config: Optional[StreamConsumerConfig] = None,
        metadata: Optional[dict] = None,
-        on_new_message: Optional[callable] = None,
    ):
        self.adapter = adapter
        self.chat_id = chat_id
        self.cfg = config or StreamConsumerConfig()
        self.metadata = metadata
-        # Fired whenever a fresh content bubble is created on the platform
-        # (first-send of a new message, commentary, overflow chunk, or
-        # fallback continuation). The gateway uses this to linearize the
-        # tool-progress bubble: when content resumes after a tool batch,
-        # the next tool.started should open a NEW progress bubble below
-        # the content, not edit the old bubble above it.
-        # Called with no arguments. Exceptions are swallowed.
-        self._on_new_message = on_new_message
        self._queue: queue.Queue = queue.Queue()
        self._accumulated = ""
        self._message_id: Optional[str] = None
@@ -155,16 +146,6 @@ class GatewayStreamConsumer:
        if text:
            self._queue.put((_COMMENTARY, text))

-    def _notify_new_message(self) -> None:
-        """Fire the on_new_message callback, swallowing any errors."""
-        cb = self._on_new_message
-        if cb is None:
-            return
-        try:
-            cb()
-        except Exception:
-            logger.debug("on_new_message callback error", exc_info=True)
-
    def _reset_segment_state(self, *, preserve_no_edit: bool = False) -> None:
        if preserve_no_edit and self._message_id == "__no_edit__":
            return
@@ -548,9 +529,6 @@ class GatewayStreamConsumer:
                self._message_id = str(result.message_id)
                self._already_sent = True
                self._last_sent_text = text
-                # Fresh content bubble — close off any stale tool bubble
-                # above so the next tool starts a new bubble below.
-                self._notify_new_message()
                return str(result.message_id)
            else:
                self._edit_supported = False
@@ -683,9 +661,6 @@ class GatewayStreamConsumer:
            sent_any_chunk = True
            last_successful_chunk = chunk
            last_message_id = result.message_id or last_message_id
-            # Each fallback chunk is a fresh platform message — notify
-            # so any stale tool-progress bubble gets closed off.
-            self._notify_new_message()

        self._message_id = last_message_id
        self._already_sent = True
@@ -769,11 +744,6 @@ class GatewayStreamConsumer:
            # tool..."), not the final response. Setting already_sent would cause
            # the final response to be incorrectly suppressed when there are
            # multiple tool calls. See: https://github.com/NousResearch/hermes-agent/issues/10454
-            if result.success:
-                # Commentary counts as fresh content — close off any
-                # stale tool bubble above it so the next tool starts a
-                # new bubble below.
-                self._notify_new_message()
            return result.success
        except Exception as e:
            logger.error("Commentary send error: %s", e)
@@ -1003,11 +973,6 @@ class GatewayStreamConsumer:
                        # every delta/tool boundary when platforms accept a
                        # message but do not return an editable message id.
                        self._message_id = "__no_edit__"
-                    # Notify the gateway that a fresh content bubble was
-                    # created so any accumulated tool-progress bubble above
-                    # gets closed off — the next tool fires into a new
-                    # bubble below, preserving chronological order.
-                    self._notify_new_message()
                    return True
                else:
                    # Initial send failed — disable streaming for this session
@@ -11,5 +11,5 @@ Provides subcommands for:
 - hermes cron          - Manage cron jobs
 """

-__version__ = "0.12.0"
-__release_date__ = "2026.4.30"
+__version__ = "0.11.0"
+__release_date__ = "2026.4.23"
@@ -1,373 +0,0 @@
-"""
-Top-level argparse construction for the hermes CLI.
-
-Lives in its own module so other modules (e.g. ``relaunch.py``) can
-introspect the parser to discover which flags exist without running the
-``main`` fn.
-
-Only the top-level parser and the ``chat`` subparser live here. Every other
-subparser (model, gateway, sessions, …) is built inline in ``main.py``
-because its dispatch is tightly coupled to module-level ``cmd_*`` functions.
-"""
-
-import argparse
-
-
-# `--profile` / `-p` is consumed by ``main._apply_profile_override`` before
-# argparse runs (it sets ``HERMES_HOME`` and strips itself from ``sys.argv``),
-# so it isn't on the parser. Listed here so all "carry over on relaunch"
-# metadata lives in one file.
-PRE_ARGPARSE_INHERITED_FLAGS: list[tuple[str, bool]] = [
-    ("--profile", True),
-    ("-p", True),
-]
-
-
-def _inherited_flag(parser, *args, **kwargs):
-    """Register a flag that ``hermes_cli.relaunch`` should carry over when
-    the CLI re-execs itself (e.g. after ``sessions browse`` picks a session,
-    or after the setup wizard launches chat).
-
-    Equivalent to ``parser.add_argument(...)`` plus tagging the resulting
-    Action with ``inherit_on_relaunch = True`` so the relaunch table builder
-    can find it via introspection.
-    """
-    action = parser.add_argument(*args, **kwargs)
-    action.inherit_on_relaunch = True
-    return action
-
-
-_EPILOGUE = """
-Examples:
-    hermes                        Start interactive chat
-    hermes chat -q "Hello"        Single query mode
-    hermes -c                     Resume the most recent session
-    hermes -c "my project"        Resume a session by name (latest in lineage)
-    hermes --resume <session_id>  Resume a specific session by ID
-    hermes setup                  Run setup wizard
-    hermes logout                 Clear stored authentication
-    hermes auth add <provider>    Add a pooled credential
-    hermes auth list              List pooled credentials
-    hermes auth remove <p> <t>    Remove pooled credential by index, id, or label
-    hermes auth reset <provider>  Clear exhaustion status for a provider
-    hermes model                  Select default model
-    hermes fallback [list]        Show fallback provider chain
-    hermes fallback add           Add a fallback provider (same picker as `hermes model`)
-    hermes fallback remove        Remove a fallback provider from the chain
-    hermes config                 View configuration
-    hermes config edit            Edit config in $EDITOR
-    hermes config set model gpt-4 Set a config value
-    hermes gateway                Run messaging gateway
-    hermes -s hermes-agent-dev,github-auth
-    hermes -w                     Start in isolated git worktree
-    hermes gateway install        Install gateway background service
-    hermes sessions list          List past sessions
-    hermes sessions browse        Interactive session picker
-    hermes sessions rename ID T   Rename/title a session
-    hermes logs                   View agent.log (last 50 lines)
-    hermes logs -f                Follow agent.log in real time
-    hermes logs errors            View errors.log
-    hermes logs --since 1h        Lines from the last hour
-    hermes debug share             Upload debug report for support
-    hermes update                 Update to latest version
-
-For more help on a command:
-    hermes <command> --help
-"""
-
-
-def build_top_level_parser():
-    """Build the top-level parser, the subparsers action, and the ``chat`` subparser.
-
-    Returns ``(parser, subparsers, chat_parser)``. The caller wires
-    ``chat_parser.set_defaults(func=cmd_chat)`` and continues registering
-    other subparsers via ``subparsers.add_parser(...)``.
-    """
-    parser = argparse.ArgumentParser(
-        prog="hermes",
-        description="Hermes Agent - AI assistant with tool-calling capabilities",
-        formatter_class=argparse.RawDescriptionHelpFormatter,
-        epilog=_EPILOGUE,
-    )
-
-    parser.add_argument(
-        "--version", "-V", action="store_true", help="Show version and exit"
-    )
-    parser.add_argument(
-        "-z",
-        "--oneshot",
-        metavar="PROMPT",
-        default=None,
-        help=(
-            "One-shot mode: send a single prompt and print ONLY the final "
-            "response text to stdout. No banner, no spinner, no tool "
-            "previews, no session_id line. Tools, memory, rules, and "
-            "AGENTS.md in the CWD are loaded as normal; approvals are "
-            "auto-bypassed. Intended for scripts / pipes."
-        ),
-    )
-    # --model / --provider are accepted at the top level so they can pair
-    # with -z without needing the `chat` subcommand.  If neither -z nor a
-    # subcommand consumes them, they fall through harmlessly as None.
-    # Mirrors `hermes chat --model ... --provider ...` semantics.
-    _inherited_flag(
-        parser,
-        "-m",
-        "--model",
-        default=None,
-        help=(
-            "Model override for this invocation (e.g. anthropic/claude-sonnet-4.6). "
-            "Applies to -z/--oneshot and --tui. Also settable via HERMES_INFERENCE_MODEL env var."
-        ),
-    )
-    _inherited_flag(
-        parser,
-        "--provider",
-        default=None,
-        help=(
-            "Provider override for this invocation (e.g. openrouter, anthropic). "
-            "Applies to -z/--oneshot and --tui. Also settable via HERMES_INFERENCE_PROVIDER env var."
-        ),
-    )
-    parser.add_argument(
-        "-t",
-        "--toolsets",
-        default=None,
-        help="Comma-separated toolsets to enable for this invocation. Applies to -z/--oneshot and --tui.",
-    )
-    parser.add_argument(
-        "--resume",
-        "-r",
-        metavar="SESSION",
-        default=None,
-        help="Resume a previous session by ID or title",
-    )
-    parser.add_argument(
-        "--continue",
-        "-c",
-        dest="continue_last",
-        nargs="?",
-        const=True,
-        default=None,
-        metavar="SESSION_NAME",
-        help="Resume a session by name, or the most recent if no name given",
-    )
-    parser.add_argument(
-        "--worktree",
-        "-w",
-        action="store_true",
-        default=False,
-        help="Run in an isolated git worktree (for parallel agents)",
-    )
-    _inherited_flag(
-        parser,
-        "--accept-hooks",
-        action="store_true",
-        default=False,
-        help=(
-            "Auto-approve any unseen shell hooks declared in config.yaml "
-            "without a TTY prompt.  Equivalent to HERMES_ACCEPT_HOOKS=1 or "
-            "hooks_auto_accept: true in config.yaml.  Use on CI / headless "
-            "runs that can't prompt."
-        ),
-    )
-    _inherited_flag(
-        parser,
-        "--skills",
-        "-s",
-        action="append",
-        default=None,
-        help="Preload one or more skills for the session (repeat flag or comma-separate)",
-    )
-    _inherited_flag(
-        parser,
-        "--yolo",
-        action="store_true",
-        default=False,
-        help="Bypass all dangerous command approval prompts (use at your own risk)",
-    )
-    _inherited_flag(
-        parser,
-        "--pass-session-id",
-        action="store_true",
-        default=False,
-        help="Include the session ID in the agent's system prompt",
-    )
-    _inherited_flag(
-        parser,
-        "--ignore-user-config",
-        action="store_true",
-        default=False,
-        help="Ignore ~/.hermes/config.yaml and fall back to built-in defaults (credentials in .env are still loaded)",
-    )
-    _inherited_flag(
-        parser,
-        "--ignore-rules",
-        action="store_true",
-        default=False,
-        help="Skip auto-injection of AGENTS.md, SOUL.md, .cursorrules, memory, and preloaded skills",
-    )
-    _inherited_flag(
-        parser,
-        "--tui",
-        action="store_true",
-        default=False,
-        help="Launch the modern TUI instead of the classic REPL",
-    )
-    _inherited_flag(
-        parser,
-        "--dev",
-        dest="tui_dev",
-        action="store_true",
-        default=False,
-        help="With --tui: run TypeScript sources via tsx (skip dist build)",
-    )
-
-    subparsers = parser.add_subparsers(dest="command", help="Command to run")
-
-    # =========================================================================
-    # chat command
-    # =========================================================================
-    chat_parser = subparsers.add_parser(
-        "chat",
-        help="Interactive chat with the agent",
-        description="Start an interactive chat session with Hermes Agent",
-    )
-    chat_parser.add_argument(
-        "-q", "--query", help="Single query (non-interactive mode)"
-    )
-    chat_parser.add_argument(
-        "--image", help="Optional local image path to attach to a single query"
-    )
-    _inherited_flag(
-        chat_parser,
-        "-m", "--model", help="Model to use (e.g., anthropic/claude-sonnet-4)",
-    )
-    chat_parser.add_argument(
-        "-t", "--toolsets", help="Comma-separated toolsets to enable"
-    )
-    _inherited_flag(
-        chat_parser,
-        "-s",
-        "--skills",
-        action="append",
-        default=argparse.SUPPRESS,
-        help="Preload one or more skills for the session (repeat flag or comma-separate)",
-    )
-    _inherited_flag(
-        chat_parser,
-        "--provider",
-        # No `choices=` here: user-defined providers from config.yaml `providers:`
-        # are also valid values, and runtime resolution (resolve_runtime_provider)
-        # handles validation/error reporting consistently with the top-level
-        # `--provider` flag.
-        default=None,
-        help="Inference provider (default: auto). Built-in or a user-defined name from `providers:` in config.yaml.",
-    )
-    chat_parser.add_argument(
-        "-v", "--verbose", action="store_true", help="Verbose output"
-    )
-    chat_parser.add_argument(
-        "-Q",
-        "--quiet",
-        action="store_true",
-        help="Quiet mode for programmatic use: suppress banner, spinner, and tool previews. Only output the final response and session info.",
-    )
-    chat_parser.add_argument(
-        "--resume",
-        "-r",
-        metavar="SESSION_ID",
-        default=argparse.SUPPRESS,
-        help="Resume a previous session by ID (shown on exit)",
-    )
-    chat_parser.add_argument(
-        "--continue",
-        "-c",
-        dest="continue_last",
-        nargs="?",
-        const=True,
-        default=argparse.SUPPRESS,
-        metavar="SESSION_NAME",
-        help="Resume a session by name, or the most recent if no name given",
-    )
-    chat_parser.add_argument(
-        "--worktree",
-        "-w",
-        action="store_true",
-        default=argparse.SUPPRESS,
-        help="Run in an isolated git worktree (for parallel agents on the same repo)",
-    )
-    _inherited_flag(
-        chat_parser,
-        "--accept-hooks",
-        action="store_true",
-        default=argparse.SUPPRESS,
-        help=(
-            "Auto-approve any unseen shell hooks declared in config.yaml "
-            "without a TTY prompt (see also HERMES_ACCEPT_HOOKS env var and "
-            "hooks_auto_accept: in config.yaml)."
-        ),
-    )
-    chat_parser.add_argument(
-        "--checkpoints",
-        action="store_true",
-        default=False,
-        help="Enable filesystem checkpoints before destructive file operations (use /rollback to restore)",
-    )
-    chat_parser.add_argument(
-        "--max-turns",
-        type=int,
-        default=None,
-        metavar="N",
-        help="Maximum tool-calling iterations per conversation turn (default: 90, or agent.max_turns in config)",
-    )
-    _inherited_flag(
-        chat_parser,
-        "--yolo",
-        action="store_true",
-        default=argparse.SUPPRESS,
-        help="Bypass all dangerous command approval prompts (use at your own risk)",
-    )
-    _inherited_flag(
-        chat_parser,
-        "--pass-session-id",
-        action="store_true",
-        default=argparse.SUPPRESS,
-        help="Include the session ID in the agent's system prompt",
-    )
-    _inherited_flag(
-        chat_parser,
-        "--ignore-user-config",
-        action="store_true",
-        default=argparse.SUPPRESS,
-        help="Ignore ~/.hermes/config.yaml and fall back to built-in defaults (credentials in .env are still loaded). Useful for isolated CI runs, reproduction, and third-party integrations.",
-    )
-    _inherited_flag(
-        chat_parser,
-        "--ignore-rules",
-        action="store_true",
-        default=argparse.SUPPRESS,
-        help="Skip auto-injection of AGENTS.md, SOUL.md, .cursorrules, memory, and preloaded skills. Combine with --ignore-user-config for a fully isolated run.",
-    )
-    chat_parser.add_argument(
-        "--source",
-        default=None,
-        help="Session source tag for filtering (default: cli). Use 'tool' for third-party integrations that should not appear in user session lists.",
-    )
-    _inherited_flag(
-        chat_parser,
-        "--tui",
-        action="store_true",
-        default=False,
-        help="Launch the modern TUI instead of the classic REPL",
-    )
-    _inherited_flag(
-        chat_parser,
-        "--dev",
-        dest="tui_dev",
-        action="store_true",
-        default=False,
-        help="With --tui: run TypeScript sources via tsx (skip dist build)",
-    )
-
-    return parser, subparsers, chat_parser
@@ -43,7 +43,6 @@ import yaml

 from hermes_cli.config import get_hermes_home, get_config_path, read_raw_config
 from hermes_constants import OPENROUTER_BASE_URL
-from utils import atomic_replace, atomic_yaml_write, is_truthy_value

 logger = logging.getLogger(__name__)

@@ -72,14 +71,6 @@ DEFAULT_AGENT_KEY_MIN_TTL_SECONDS = 30 * 60  # 30 minutes
 ACCESS_TOKEN_REFRESH_SKEW_SECONDS = 120       # refresh 2 min before expiry
 DEVICE_AUTH_POLL_INTERVAL_CAP_SECONDS = 1     # poll at most every 1s
 DEFAULT_CODEX_BASE_URL = "https://chatgpt.com/backend-api/codex"
-MINIMAX_OAUTH_CLIENT_ID = "78257093-7e40-4613-99e0-527b14b39113"
-MINIMAX_OAUTH_SCOPE = "group_id profile model.completion"
-MINIMAX_OAUTH_GRANT_TYPE = "urn:ietf:params:oauth:grant-type:user_code"
-MINIMAX_OAUTH_GLOBAL_BASE = "https://api.minimax.io"
-MINIMAX_OAUTH_CN_BASE = "https://api.minimaxi.com"
-MINIMAX_OAUTH_GLOBAL_INFERENCE = "https://api.minimax.io/anthropic"
-MINIMAX_OAUTH_CN_INFERENCE = "https://api.minimaxi.com/anthropic"
-MINIMAX_OAUTH_REFRESH_SKEW_SECONDS = 60
 DEFAULT_QWEN_BASE_URL = "https://portal.qwen.ai/v1"
 DEFAULT_GITHUB_MODELS_BASE_URL = "https://api.githubcopilot.com"
 DEFAULT_COPILOT_ACP_BASE_URL = "acp://copilot"
@@ -118,12 +109,6 @@ SERVICE_PROVIDER_NAMES: Dict[str, str] = {
 DEFAULT_GEMINI_CLOUDCODE_BASE_URL = "cloudcode-pa://google"
 GEMINI_OAUTH_ACCESS_TOKEN_REFRESH_SKEW_SECONDS = 60  # refresh 60s before expiry

-# LM Studio's default no-auth mode still requires *some* non-empty bearer for
-# the API-key code paths (auxiliary_client, runtime resolver) to treat the
-# provider as configured. This sentinel is sent only to LM Studio, never to
-# any remote service.
-LMSTUDIO_NOAUTH_PLACEHOLDER = "dummy-lm-api-key"
-

 # =============================================================================
 # Provider Registry
@@ -134,7 +119,7 @@ class ProviderConfig:
    """Describes a known inference provider."""
    id: str
    name: str
-    auth_type: str  # "oauth_device_code", "oauth_external", "oauth_minimax", or "api_key"
+    auth_type: str  # "oauth_device_code", "oauth_external", or "api_key"
    portal_base_url: str = ""
    inference_base_url: str = ""
    client_id: str = ""
@@ -174,14 +159,6 @@ PROVIDER_REGISTRY: Dict[str, ProviderConfig] = {
        auth_type="oauth_external",
        inference_base_url=DEFAULT_GEMINI_CLOUDCODE_BASE_URL,
    ),
-    "lmstudio": ProviderConfig(
-        id="lmstudio",
-        name="LM Studio",
-        auth_type="api_key",
-        inference_base_url="http://127.0.0.1:1234/v1",
-        api_key_env_vars=("LM_API_KEY",),
-        base_url_env_var="LM_BASE_URL",
-    ),
    "copilot": ProviderConfig(
        id="copilot",
        name="GitHub Copilot",
@@ -263,17 +240,6 @@ PROVIDER_REGISTRY: Dict[str, ProviderConfig] = {
        api_key_env_vars=("MINIMAX_API_KEY",),
        base_url_env_var="MINIMAX_BASE_URL",
    ),
-    "minimax-oauth": ProviderConfig(
-        id="minimax-oauth",
-        name="MiniMax (OAuth \u00b7 minimax.io)",
-        auth_type="oauth_minimax",
-        portal_base_url=MINIMAX_OAUTH_GLOBAL_BASE,
-        inference_base_url=MINIMAX_OAUTH_GLOBAL_INFERENCE,
-        client_id=MINIMAX_OAUTH_CLIENT_ID,
-        scope=MINIMAX_OAUTH_SCOPE,
-        extra={"region": "global", "cn_portal_base_url": MINIMAX_OAUTH_CN_BASE,
-               "cn_inference_base_url": MINIMAX_OAUTH_CN_INFERENCE},
-    ),
    "anthropic": ProviderConfig(
        id="anthropic",
        name="Anthropic",
@@ -382,14 +348,6 @@ PROVIDER_REGISTRY: Dict[str, ProviderConfig] = {
        api_key_env_vars=("XIAOMI_API_KEY",),
        base_url_env_var="XIAOMI_BASE_URL",
    ),
-    "tencent-tokenhub": ProviderConfig(
-        id="tencent-tokenhub",
-        name="Tencent TokenHub",
-        auth_type="api_key",
-        inference_base_url="https://tokenhub.tencentmaas.com/v1",
-        api_key_env_vars=("TOKENHUB_API_KEY",),
-        base_url_env_var="TOKENHUB_BASE_URL",
-    ),
    "ollama-cloud": ProviderConfig(
        id="ollama-cloud",
        name="Ollama Cloud",
@@ -416,6 +374,37 @@ PROVIDER_REGISTRY: Dict[str, ProviderConfig] = {
    ),
 }

+# Auto-extend PROVIDER_REGISTRY with any api-key provider registered in
+# providers/ that is not already declared above.  New providers only need a
+# providers/*.py file — no edits to this file required.
+try:
+    from providers import list_providers as _list_providers_for_registry
+    for _pp in _list_providers_for_registry():
+        if _pp.name in PROVIDER_REGISTRY:
+            continue
+        if _pp.auth_type != "api_key" or not _pp.env_vars:
+            continue
+        # Skip providers that need custom token resolution (copilot, kimi, zai)
+        # — those are already fully declared above.
+        if _pp.name in {"copilot", "kimi-coding", "kimi-coding-cn", "zai"}:
+            continue
+        _api_key_vars = tuple(v for v in _pp.env_vars if not v.endswith("_BASE_URL") and not v.endswith("_URL"))
+        _base_url_var = next((v for v in _pp.env_vars if v.endswith("_BASE_URL") or v.endswith("_URL")), None)
+        PROVIDER_REGISTRY[_pp.name] = ProviderConfig(
+            id=_pp.name,
+            name=_pp.display_name or _pp.name,
+            auth_type="api_key",
+            inference_base_url=_pp.base_url,
+            api_key_env_vars=_api_key_vars or _pp.env_vars,
+            base_url_env_var=_base_url_var or "",
+        )
+        # Also register aliases so resolve_provider() resolves them
+        for _alias in _pp.aliases:
+            if _alias not in PROVIDER_REGISTRY:
+                PROVIDER_REGISTRY[_alias] = PROVIDER_REGISTRY[_pp.name]
+except Exception:
+    pass
+

 # =============================================================================
 # Anthropic Key Helper
@@ -862,7 +851,7 @@ def _save_auth_store(auth_store: Dict[str, Any]) -> Path:
            handle.write(payload)
            handle.flush()
            os.fsync(handle.fileno())
-        atomic_replace(tmp_path, auth_file)
+        os.replace(tmp_path, auth_file)
        try:
            dir_fd = os.open(str(auth_file.parent), os.O_RDONLY)
        except OSError:
@@ -1172,7 +1161,6 @@ def resolve_provider(
        "arcee-ai": "arcee", "arceeai": "arcee",
        "gmi-cloud": "gmi", "gmicloud": "gmi",
        "minimax-china": "minimax-cn", "minimax_cn": "minimax-cn",
-        "minimax-portal": "minimax-oauth", "minimax-global": "minimax-oauth", "minimax_oauth": "minimax-oauth",
        "alibaba_coding": "alibaba-coding-plan", "alibaba-coding": "alibaba-coding-plan",
        "alibaba_coding_plan": "alibaba-coding-plan",
        "claude": "anthropic", "claude-code": "anthropic",
@@ -1184,17 +1172,26 @@ def resolve_provider(
        "qwen-portal": "qwen-oauth", "qwen-cli": "qwen-oauth", "qwen-oauth": "qwen-oauth", "google-gemini-cli": "google-gemini-cli", "gemini-cli": "google-gemini-cli", "gemini-oauth": "google-gemini-cli",
        "hf": "huggingface", "hugging-face": "huggingface", "huggingface-hub": "huggingface",
        "mimo": "xiaomi", "xiaomi-mimo": "xiaomi",
-        "tencent": "tencent-tokenhub", "tokenhub": "tencent-tokenhub",
-        "tencent-cloud": "tencent-tokenhub", "tencentmaas": "tencent-tokenhub",
        "aws": "bedrock", "aws-bedrock": "bedrock", "amazon-bedrock": "bedrock", "amazon": "bedrock",
        "go": "opencode-go", "opencode-go-sub": "opencode-go",
        "kilo": "kilocode", "kilo-code": "kilocode", "kilo-gateway": "kilocode",
-        "lmstudio": "lmstudio", "lm-studio": "lmstudio", "lm_studio": "lmstudio",
        # Local server aliases — route through the generic custom provider
+        "lmstudio": "custom", "lm-studio": "custom", "lm_studio": "custom",
        "ollama": "custom", "ollama_cloud": "ollama-cloud",
        "vllm": "custom", "llamacpp": "custom",
        "llama.cpp": "custom", "llama-cpp": "custom",
    }
+    # Extend with aliases declared in providers/*.py that aren't already mapped.
+    # This keeps providers/ as the single source for new aliases while the
+    # hardcoded dict above remains authoritative for existing ones.
+    try:
+        from providers import list_providers as _lp
+        for _pp in _lp():
+            for _alias in _pp.aliases:
+                if _alias not in _PROVIDER_ALIASES:
+                    _PROVIDER_ALIASES[_alias] = _pp.name
+    except Exception:
+        pass
    normalized = _PROVIDER_ALIASES.get(normalized, normalized)

    if normalized == "openrouter":
@@ -1237,11 +1234,8 @@ def resolve_provider(
            continue
        # GitHub tokens are commonly present for repo/tool access but should not
        # hijack inference auto-selection unless the user explicitly chooses
-        # Copilot/GitHub Models as the provider. LM Studio is a local server
-        # whose availability isn't implied by LM_API_KEY presence (it may be
-        # offline, and the no-auth setup uses a placeholder value), so it
-        # also requires explicit selection.
-        if pid in ("copilot", "lmstudio"):
+        # Copilot/GitHub Models as the provider.
+        if pid == "copilot":
            continue
        for env_var in pconfig.api_key_env_vars:
            if has_usable_secret(os.getenv(env_var, "")):
@@ -2480,8 +2474,8 @@ def _resolve_verify(
    tls_state = tls_state if isinstance(tls_state, dict) else {}

    effective_insecure = (
-        is_truthy_value(insecure, default=False) if insecure is not None
-        else is_truthy_value(tls_state.get("insecure", False), default=False)
+        bool(insecure) if insecure is not None
+        else bool(tls_state.get("insecure", False))
    )
    effective_ca = (
        ca_bundle
@@ -3519,13 +3513,6 @@ def resolve_api_key_provider_credentials(provider_id: str) -> Dict[str, Any]:
    key_source = ""
    api_key, key_source = _resolve_api_key_provider_secret(provider_id, pconfig)

-    # No-auth LM Studio: substitute a placeholder so runtime / auxiliary_client
-    # see the local server as configured. doctor still reports unconfigured
-    # because get_api_key_provider_status uses the raw secret resolver.
-    if not api_key and provider_id == "lmstudio":
-        api_key = LMSTUDIO_NOAUTH_PLACEHOLDER
-        key_source = key_source or "default"
-
    env_url = ""
    if pconfig.base_url_env_var:
        env_url = os.getenv(pconfig.base_url_env_var, "").strip()
@@ -3653,7 +3640,7 @@ def _update_config_for_provider(

    config["model"] = model_cfg

-    atomic_yaml_write(config_path, config, sort_keys=False)
+    config_path.write_text(yaml.safe_dump(config, sort_keys=False))
    return config_path


@@ -3712,7 +3699,7 @@ def _reset_config_provider() -> Path:
        model["provider"] = "auto"
        if "base_url" in model:
            model["base_url"] = OPENROUTER_BASE_URL
-    atomic_yaml_write(config_path, config, sort_keys=False)
+    config_path.write_text(yaml.safe_dump(config, sort_keys=False))
    return config_path


@@ -4136,326 +4123,6 @@ def _codex_device_code_login() -> Dict[str, Any]:
    }


-# ==================== MiniMax Portal OAuth ====================
-
-def _minimax_pkce_pair() -> tuple:
-    """Generate (code_verifier, code_challenge_S256, state) for MiniMax OAuth."""
-    import secrets
-    verifier = secrets.token_urlsafe(64)[:96]
-    challenge = base64.urlsafe_b64encode(
-        hashlib.sha256(verifier.encode()).digest()
-    ).decode().rstrip("=")
-    state = secrets.token_urlsafe(16)
-    return verifier, challenge, state
-
-
-def _minimax_request_user_code(
-    client: httpx.Client, *, portal_base_url: str, client_id: str,
-    code_challenge: str, state: str,
-) -> Dict[str, Any]:
-    response = client.post(
-        f"{portal_base_url}/oauth/code",
-        data={
-            "response_type": "code",
-            "client_id": client_id,
-            "scope": MINIMAX_OAUTH_SCOPE,
-            "code_challenge": code_challenge,
-            "code_challenge_method": "S256",
-            "state": state,
-        },
-        headers={
-            "Content-Type": "application/x-www-form-urlencoded",
-            "Accept": "application/json",
-            "x-request-id": str(uuid.uuid4()),
-        },
-    )
-    if response.status_code != 200:
-        raise AuthError(
-            f"MiniMax OAuth authorization failed: {response.text or response.reason_phrase}",
-            provider="minimax-oauth", code="authorization_failed",
-        )
-    payload = response.json()
-    for field in ("user_code", "verification_uri", "expired_in"):
-        if field not in payload:
-            raise AuthError(
-                f"MiniMax OAuth response missing field: {field}",
-                provider="minimax-oauth", code="authorization_incomplete",
-            )
-    if payload.get("state") != state:
-        raise AuthError(
-            "MiniMax OAuth state mismatch (possible CSRF).",
-            provider="minimax-oauth", code="state_mismatch",
-        )
-    return payload
-
-
-def _minimax_poll_token(
-    client: httpx.Client, *, portal_base_url: str, client_id: str,
-    user_code: str, code_verifier: str, expired_in: int, interval_ms: Optional[int],
-) -> Dict[str, Any]:
-    # OpenClaw treats expired_in as a unix-ms timestamp (Date.now() < expireTimeMs).
-    # Defensive parsing: if it's small enough to be a duration, treat as seconds.
-    import time as _time
-    now_ms = int(_time.time() * 1000)
-    if expired_in > now_ms // 2:
-        # Looks like a unix-ms timestamp.
-        deadline = expired_in / 1000.0
-    else:
-        # Treat as duration in seconds from now.
-        deadline = _time.time() + max(1, expired_in)
-    interval = max(2.0, (interval_ms or 2000) / 1000.0)
-
-    while _time.time() < deadline:
-        response = client.post(
-            f"{portal_base_url}/oauth/token",
-            data={
-                "grant_type": MINIMAX_OAUTH_GRANT_TYPE,
-                "client_id": client_id,
-                "user_code": user_code,
-                "code_verifier": code_verifier,
-            },
-            headers={
-                "Content-Type": "application/x-www-form-urlencoded",
-                "Accept": "application/json",
-            },
-        )
-        try:
-            payload = response.json() if response.text else {}
-        except Exception:
-            payload = {}
-
-        if response.status_code != 200:
-            msg = (payload.get("base_resp", {}) or {}).get("status_msg") or response.text
-            raise AuthError(
-                f"MiniMax OAuth error: {msg or 'unknown'}",
-                provider="minimax-oauth", code="token_exchange_failed",
-            )
-
-        status = payload.get("status")
-        if status == "error":
-            raise AuthError(
-                "MiniMax OAuth reported an error. Please try again later.",
-                provider="minimax-oauth", code="authorization_denied",
-            )
-        if status == "success":
-            if not all(payload.get(k) for k in ("access_token", "refresh_token", "expired_in")):
-                raise AuthError(
-                    "MiniMax OAuth success payload missing required token fields.",
-                    provider="minimax-oauth", code="token_incomplete",
-                )
-            return payload
-        # "pending" or any other status -> keep polling
-        _time.sleep(interval)
-
-    raise AuthError(
-        "MiniMax OAuth timed out before authorization completed.",
-        provider="minimax-oauth", code="timeout",
-    )
-
-
-def _minimax_save_auth_state(auth_state: Dict[str, Any]) -> None:
-    """Persist MiniMax OAuth state to Hermes auth store (~/.hermes/auth.json)."""
-    with _auth_store_lock():
-        auth_store = _load_auth_store()
-        _save_provider_state(auth_store, "minimax-oauth", auth_state)
-        _save_auth_store(auth_store)
-
-
-def _minimax_oauth_login(
-    *, region: str = "global", open_browser: bool = True,
-    timeout_seconds: float = 15.0,
-) -> Dict[str, Any]:
-    """Run MiniMax OAuth flow, persist tokens, return auth state dict."""
-    pconfig = PROVIDER_REGISTRY["minimax-oauth"]
-    if region == "cn":
-        portal_base_url = pconfig.extra["cn_portal_base_url"]
-        inference_base_url = pconfig.extra["cn_inference_base_url"]
-    else:
-        portal_base_url = pconfig.portal_base_url
-        inference_base_url = pconfig.inference_base_url
-
-    verifier, challenge, state = _minimax_pkce_pair()
-
-    if _is_remote_session():
-        open_browser = False
-
-    print(f"Starting Hermes login via MiniMax ({region}) OAuth...")
-    print(f"Portal: {portal_base_url}")
-
-    with httpx.Client(timeout=httpx.Timeout(timeout_seconds),
-                      headers={"Accept": "application/json"}) as client:
-        code_data = _minimax_request_user_code(
-            client, portal_base_url=portal_base_url,
-            client_id=pconfig.client_id,
-            code_challenge=challenge, state=state,
-        )
-        verification_url = str(code_data["verification_uri"])
-        user_code = str(code_data["user_code"])
-
-        print()
-        print("To continue:")
-        print(f"  1. Open: {verification_url}")
-        print(f"  2. If prompted, enter code: {user_code}")
-        if open_browser:
-            if webbrowser.open(verification_url):
-                print("  (Opened browser for verification)")
-            else:
-                print("  Could not open browser automatically -- use the URL above.")
-
-        interval_raw = code_data.get("interval")
-        interval_ms = int(interval_raw) if interval_raw is not None else None
-        print("Waiting for approval...")
-
-        token_data = _minimax_poll_token(
-            client, portal_base_url=portal_base_url,
-            client_id=pconfig.client_id,
-            user_code=user_code, code_verifier=verifier,
-            expired_in=int(code_data["expired_in"]),
-            interval_ms=interval_ms,
-        )
-
-    now = datetime.now(timezone.utc)
-    expires_in_s = int(token_data["expired_in"])
-    expires_at = now.timestamp() + expires_in_s
-
-    auth_state = {
-        "provider": "minimax-oauth",
-        "region": region,
-        "portal_base_url": portal_base_url,
-        "inference_base_url": inference_base_url,
-        "client_id": pconfig.client_id,
-        "scope": MINIMAX_OAUTH_SCOPE,
-        "token_type": token_data.get("token_type", "Bearer"),
-        "access_token": token_data["access_token"],
-        "refresh_token": token_data["refresh_token"],
-        "resource_url": token_data.get("resource_url"),
-        "obtained_at": now.isoformat(),
-        "expires_at": datetime.fromtimestamp(expires_at, tz=timezone.utc).isoformat(),
-        "expires_in": expires_in_s,
-    }
-
-    _minimax_save_auth_state(auth_state)
-    print("\u2713 MiniMax OAuth login successful.")
-    if msg := token_data.get("notification_message"):
-        print(f"Note from MiniMax: {msg}")
-    return auth_state
-
-
-def _refresh_minimax_oauth_state(
-    state: Dict[str, Any], *, timeout_seconds: float = 15.0,
-    force: bool = False,
-) -> Dict[str, Any]:
-    """Refresh MiniMax OAuth access token if close to expiry (or forced)."""
-    if not state.get("refresh_token"):
-        raise AuthError(
-            "MiniMax OAuth state has no refresh_token; please re-login.",
-            provider="minimax-oauth", code="no_refresh_token", relogin_required=True,
-        )
-    try:
-        expires_at = datetime.fromisoformat(state.get("expires_at", "")).timestamp()
-    except Exception:
-        expires_at = 0.0
-    now = time.time()
-    if not force and (expires_at - now) > MINIMAX_OAUTH_REFRESH_SKEW_SECONDS:
-        return state
-
-    portal_base_url = state["portal_base_url"]
-    with httpx.Client(timeout=httpx.Timeout(timeout_seconds)) as client:
-        response = client.post(
-            f"{portal_base_url}/oauth/token",
-            data={
-                "grant_type": "refresh_token",
-                "client_id": state["client_id"],
-                "refresh_token": state["refresh_token"],
-            },
-            headers={
-                "Content-Type": "application/x-www-form-urlencoded",
-                "Accept": "application/json",
-            },
-        )
-    if response.status_code != 200:
-        body = response.text.lower()
-        relogin = any(m in body for m in
-                      ("invalid_grant", "refresh_token_reused", "invalid_refresh_token"))
-        raise AuthError(
-            f"MiniMax OAuth refresh failed: {response.text or response.reason_phrase}",
-            provider="minimax-oauth", code="refresh_failed",
-            relogin_required=relogin,
-        )
-    payload = response.json()
-    if payload.get("status") != "success":
-        raise AuthError(
-            "MiniMax OAuth refresh did not return success.",
-            provider="minimax-oauth", code="refresh_failed",
-            relogin_required=True,
-        )
-    now_dt = datetime.now(timezone.utc)
-    expires_in_s = int(payload["expired_in"])
-    new_state = dict(state)
-    new_state.update({
-        "access_token": payload["access_token"],
-        "refresh_token": payload.get("refresh_token", state["refresh_token"]),
-        "obtained_at": now_dt.isoformat(),
-        "expires_at": datetime.fromtimestamp(now_dt.timestamp() + expires_in_s,
-                                             tz=timezone.utc).isoformat(),
-        "expires_in": expires_in_s,
-    })
-    _minimax_save_auth_state(new_state)
-    return new_state
-
-
-def resolve_minimax_oauth_runtime_credentials(
-    *, min_token_ttl_seconds: int = MINIMAX_OAUTH_REFRESH_SKEW_SECONDS,
-) -> Dict[str, Any]:
-    """Return {provider, api_key, base_url, source} for minimax-oauth."""
-    state = get_provider_auth_state("minimax-oauth")
-    if not state or not state.get("access_token"):
-        raise AuthError(
-            "Not logged into MiniMax OAuth. Run `hermes model` and select "
-            "MiniMax (OAuth).",
-            provider="minimax-oauth", code="not_logged_in", relogin_required=True,
-        )
-    state = _refresh_minimax_oauth_state(state)
-    return {
-        "provider": "minimax-oauth",
-        "api_key": state["access_token"],
-        "base_url": state["inference_base_url"].rstrip("/"),
-        "source": "oauth",
-    }
-
-
-def get_minimax_oauth_auth_status() -> Dict[str, Any]:
-    """Return auth status dict for MiniMax OAuth provider."""
-    state = get_provider_auth_state("minimax-oauth")
-    if not state or not state.get("access_token"):
-        return {"logged_in": False, "provider": "minimax-oauth"}
-    try:
-        expires_at = datetime.fromisoformat(state.get("expires_at", "")).timestamp()
-        token_valid = (expires_at - time.time()) > 0
-    except Exception:
-        token_valid = bool(state.get("access_token"))
-    return {
-        "logged_in": token_valid,
-        "provider": "minimax-oauth",
-        "region": state.get("region", "global"),
-        "expires_at": state.get("expires_at"),
-    }
-
-
-def _login_minimax_oauth(args, pconfig: ProviderConfig) -> None:
-    """CLI entry for MiniMax OAuth login."""
-    region = getattr(args, "region", None) or "global"
-    open_browser = not getattr(args, "no_browser", False)
-    timeout = getattr(args, "timeout", None) or 15.0
-    try:
-        _minimax_oauth_login(
-            region=region, open_browser=open_browser, timeout_seconds=timeout,
-        )
-    except AuthError as exc:
-        print(format_auth_error(exc))
-        raise SystemExit(1)
-
-
 def _nous_device_code_login(
    *,
    portal_base_url: Optional[str] = None,
@@ -33,7 +33,7 @@ from hermes_constants import OPENROUTER_BASE_URL


 # Providers that support OAuth login in addition to API keys.
-_OAUTH_CAPABLE_PROVIDERS = {"anthropic", "nous", "openai-codex", "qwen-oauth", "google-gemini-cli", "minimax-oauth"}
+_OAUTH_CAPABLE_PROVIDERS = {"anthropic", "nous", "openai-codex", "qwen-oauth", "google-gemini-cli"}


 def _get_custom_provider_names() -> list:
@@ -170,7 +170,7 @@ def auth_add_command(args) -> None:
        if provider.startswith(CUSTOM_POOL_PREFIX):
            requested_type = AUTH_TYPE_API_KEY
        else:
-            requested_type = AUTH_TYPE_OAUTH if provider in {"anthropic", "nous", "openai-codex", "qwen-oauth", "google-gemini-cli", "minimax-oauth"} else AUTH_TYPE_API_KEY
+            requested_type = AUTH_TYPE_OAUTH if provider in {"anthropic", "nous", "openai-codex", "qwen-oauth", "google-gemini-cli"} else AUTH_TYPE_API_KEY

    pool = load_pool(provider)

@@ -333,27 +333,6 @@ def auth_add_command(args) -> None:
        print(f'Added {provider} OAuth credential #{len(pool.entries())}: "{entry.label}"')
        return

-    if provider == "minimax-oauth":
-        from hermes_cli.auth import resolve_minimax_oauth_runtime_credentials
-        creds = resolve_minimax_oauth_runtime_credentials()
-        label = (getattr(args, "label", None) or "").strip() or label_from_token(
-            creds["api_key"],
-            _oauth_default_label(provider, len(pool.entries()) + 1),
-        )
-        entry = PooledCredential(
-            provider=provider,
-            id=uuid.uuid4().hex[:6],
-            label=label,
-            auth_type=AUTH_TYPE_OAUTH,
-            priority=0,
-            source=f"{SOURCE_MANUAL}:minimax_oauth",
-            access_token=creds["api_key"],
-            base_url=creds.get("base_url"),
-        )
-        pool.add_entry(entry)
-        print(f'Added {provider} OAuth credential #{len(pool.entries())}: "{entry.label}"')
-        return
-
    raise SystemExit(f"`hermes auth add {provider}` is not implemented for auth type {requested_type} yet.")


@@ -34,7 +34,7 @@ from dataclasses import dataclass, field
 from typing import Optional
 from urllib import request as urllib_request
 from urllib.error import HTTPError, URLError
-from urllib.parse import urlparse
+from urllib.parse import urlparse, urlunparse

 logger = logging.getLogger(__name__)

@@ -696,78 +696,6 @@ def run_quick_backup(args) -> None:
        print("No state files found to snapshot.")


-# ---------------------------------------------------------------------------
-# Shared full-zip backup helper
-# ---------------------------------------------------------------------------
-
-def _write_full_zip_backup(out_path: Path, hermes_root: Path) -> Optional[Path]:
-    """Write a full zip snapshot of ``hermes_root`` to ``out_path``.
-
-    Uses the same exclusion rules and SQLite safe-copy as :func:`run_backup`.
-    Returns the output path on success, None on failure (nothing to back up,
-    or write error — caller should surface the outcome but not raise).
-    """
-    files_to_add: list[tuple[Path, Path]] = []
-    try:
-        for dirpath, dirnames, filenames in os.walk(hermes_root, followlinks=False):
-            dp = Path(dirpath)
-            # Prune excluded directories in-place so os.walk doesn't descend
-            dirnames[:] = [d for d in dirnames if d not in _EXCLUDED_DIRS]
-
-            for fname in filenames:
-                fpath = dp / fname
-                try:
-                    rel = fpath.relative_to(hermes_root)
-                except ValueError:
-                    continue
-
-                if _should_exclude(rel):
-                    continue
-
-                # Skip the output zip itself if it already exists inside root.
-                try:
-                    if fpath.resolve() == out_path.resolve():
-                        continue
-                except (OSError, ValueError):
-                    pass
-
-                files_to_add.append((fpath, rel))
-    except OSError as exc:
-        logger.warning("Full-zip backup: walk failed: %s", exc)
-        return None
-
-    if not files_to_add:
-        return None
-
-    try:
-        with zipfile.ZipFile(out_path, "w", zipfile.ZIP_DEFLATED, compresslevel=6) as zf:
-            for abs_path, rel_path in files_to_add:
-                try:
-                    if abs_path.suffix == ".db":
-                        with tempfile.NamedTemporaryFile(suffix=".db", delete=False) as tmp:
-                            tmp_db = Path(tmp.name)
-                        try:
-                            if _safe_copy_db(abs_path, tmp_db):
-                                zf.write(tmp_db, arcname=str(rel_path))
-                        finally:
-                            tmp_db.unlink(missing_ok=True)
-                    else:
-                        zf.write(abs_path, arcname=str(rel_path))
-                except (PermissionError, OSError, ValueError) as exc:
-                    logger.debug("Skipping %s in zip backup: %s", rel_path, exc)
-                    continue
-    except OSError as exc:
-        logger.warning("Full-zip backup: zip write failed: %s", exc)
-        # Best-effort cleanup of partial file
-        try:
-            out_path.unlink(missing_ok=True)
-        except OSError:
-            pass
-        return None
-
-    return out_path
-
-
 # ---------------------------------------------------------------------------
 # Pre-update auto-backup
 # ---------------------------------------------------------------------------
@@ -840,87 +768,64 @@ def create_pre_update_backup(
    stamp = datetime.now().strftime("%Y-%m-%d-%H%M%S")
    out_path = backup_dir / f"{_PRE_UPDATE_PREFIX}{stamp}.zip"

-    result = _write_full_zip_backup(out_path, hermes_root)
-    if result is None:
+    # Collect files (same logic as run_backup, minus the chatty progress prints)
+    files_to_add: list[tuple[Path, Path]] = []
+    try:
+        for dirpath, dirnames, filenames in os.walk(hermes_root, followlinks=False):
+            dp = Path(dirpath)
+            # Prune excluded directories in-place so os.walk doesn't descend
+            dirnames[:] = [d for d in dirnames if d not in _EXCLUDED_DIRS]
+
+            for fname in filenames:
+                fpath = dp / fname
+                try:
+                    rel = fpath.relative_to(hermes_root)
+                except ValueError:
+                    continue
+
+                if _should_exclude(rel):
+                    continue
+
+                # Skip the output zip itself if it already exists
+                try:
+                    if fpath.resolve() == out_path.resolve():
+                        continue
+                except (OSError, ValueError):
+                    pass
+
+                files_to_add.append((fpath, rel))
+    except OSError as exc:
+        logger.warning("Pre-update backup: walk failed: %s", exc)
+        return None
+
+    if not files_to_add:
+        return None
+
+    try:
+        with zipfile.ZipFile(out_path, "w", zipfile.ZIP_DEFLATED, compresslevel=6) as zf:
+            for abs_path, rel_path in files_to_add:
+                try:
+                    if abs_path.suffix == ".db":
+                        with tempfile.NamedTemporaryFile(suffix=".db", delete=False) as tmp:
+                            tmp_db = Path(tmp.name)
+                        try:
+                            if _safe_copy_db(abs_path, tmp_db):
+                                zf.write(tmp_db, arcname=str(rel_path))
+                        finally:
+                            tmp_db.unlink(missing_ok=True)
+                    else:
+                        zf.write(abs_path, arcname=str(rel_path))
+                except (PermissionError, OSError, ValueError) as exc:
+                    logger.debug("Skipping %s in pre-update backup: %s", rel_path, exc)
+                    continue
+    except OSError as exc:
+        logger.warning("Pre-update backup: zip write failed: %s", exc)
+        # Best-effort cleanup of partial file
+        try:
+            out_path.unlink(missing_ok=True)
+        except OSError:
+            pass
        return None

    _prune_pre_update_backups(backup_dir, keep=keep)
    return out_path
-
-
-# ---------------------------------------------------------------------------
-# Pre-migration auto-backup (used by `hermes claw migrate`)
-# ---------------------------------------------------------------------------
-
-_PRE_MIGRATION_PREFIX = "pre-migration-"
-_PRE_MIGRATION_DEFAULT_KEEP = 5
-
-
-def _prune_pre_migration_backups(backup_dir: Path, keep: int) -> int:
-    """Remove oldest pre-migration backups beyond the keep limit.
-
-    Only touches files matching ``pre-migration-*.zip`` so other backups in
-    the same directory are never touched.
-    """
-    if keep < 0:
-        keep = 0
-    if not backup_dir.exists():
-        return 0
-
-    backups = sorted(
-        (p for p in backup_dir.iterdir()
-         if p.is_file() and p.name.startswith(_PRE_MIGRATION_PREFIX) and p.suffix.lower() == ".zip"),
-        key=lambda p: p.name,
-        reverse=True,
-    )
-
-    deleted = 0
-    for p in backups[keep:]:
-        try:
-            p.unlink()
-            deleted += 1
-        except OSError as exc:
-            logger.warning("Failed to prune pre-migration backup %s: %s", p.name, exc)
-
-    return deleted
-
-
-def create_pre_migration_backup(
-    hermes_home: Optional[Path] = None,
-    keep: int = _PRE_MIGRATION_DEFAULT_KEEP,
-) -> Optional[Path]:
-    """Create a full zip backup of HERMES_HOME under ``backups/`` before a
-    ``hermes claw migrate`` apply.
-
-    Shares implementation with :func:`create_pre_update_backup` via
-    ``_write_full_zip_backup`` — same exclusions, same SQLite safe-copy,
-    restorable with ``hermes import <archive>``.  Writes to
-    ``<HERMES_HOME>/backups/pre-migration-<timestamp>.zip`` and auto-prunes
-    old pre-migration backups.
-
-    Returns the path to the created zip, or ``None`` if nothing was found
-    to back up (fresh install) or the write failed.  Never raises — the
-    caller decides whether to abort or proceed.
-    """
-    hermes_root = hermes_home or get_default_hermes_root()
-    if not hermes_root.is_dir():
-        return None
-
-    # Reuses the shared backups/ directory so `hermes import` and the
-    # update-backup listing pick up pre-migration archives too.
-    backup_dir = _pre_update_backup_dir(hermes_root)
-    try:
-        backup_dir.mkdir(parents=True, exist_ok=True)
-    except OSError as exc:
-        logger.warning("Could not create pre-migration backup dir %s: %s", backup_dir, exc)
-        return None
-
-    stamp = datetime.now().strftime("%Y-%m-%d-%H%M%S")
-    out_path = backup_dir / f"{_PRE_MIGRATION_PREFIX}{stamp}.zip"
-
-    result = _write_full_zip_backup(out_path, hermes_root)
-    if result is None:
-        return None
-
-    _prune_pre_migration_backups(backup_dir, keep=keep)
-    return out_path
@@ -5,7 +5,6 @@ Pure display functions with no HermesCLI state dependency.

 import json
 import logging
-import os
 import shutil
 import subprocess
 import threading
@@ -123,36 +122,35 @@ def get_available_skills() -> Dict[str, List[str]]:
 # Cache update check results for 6 hours to avoid repeated git fetches
 _UPDATE_CHECK_CACHE_SECONDS = 6 * 3600

-# Sentinel returned when we know an update exists but can't count commits
-# (e.g. nix-built hermes — no local git history to count against).
-UPDATE_AVAILABLE_NO_COUNT = -1

-_UPSTREAM_REPO_URL = "https://github.com/NousResearch/hermes-agent.git"
+def check_for_updates() -> Optional[int]:
+    """Check how many commits behind origin/main the local repo is.

-
-def _check_via_rev(local_rev: str) -> Optional[int]:
-    """Compare an embedded git revision to upstream main via ls-remote.
-
-    Returns 0 if up-to-date, ``UPDATE_AVAILABLE_NO_COUNT`` if behind,
-    or ``None`` on failure.
+    Does a ``git fetch`` at most once every 6 hours (cached to
+    ``~/.hermes/.update_check``).  Returns the number of commits behind,
+    or ``None`` if the check fails or isn't applicable.
    """
+    hermes_home = get_hermes_home()
+    repo_dir = hermes_home / "hermes-agent"
+    cache_file = hermes_home / ".update_check"
+
+    # Must be a git repo — fall back to project root for dev installs
+    if not (repo_dir / ".git").exists():
+        repo_dir = Path(__file__).parent.parent.resolve()
+    if not (repo_dir / ".git").exists():
+        return None
+
+    # Read cache
+    now = time.time()
    try:
-        result = subprocess.run(
-            ["git", "ls-remote", _UPSTREAM_REPO_URL, "refs/heads/main"],
-            capture_output=True, text=True, timeout=10,
-        )
+        if cache_file.exists():
+            cached = json.loads(cache_file.read_text())
+            if now - cached.get("ts", 0) < _UPDATE_CHECK_CACHE_SECONDS:
+                return cached.get("behind")
    except Exception:
-        return None
-    if result.returncode != 0 or not result.stdout:
-        return None
-    upstream_rev = result.stdout.split()[0]
-    if not upstream_rev:
-        return None
-    return 0 if upstream_rev == local_rev else UPDATE_AVAILABLE_NO_COUNT
+        pass

-
-def _check_via_local_git(repo_dir: Path) -> Optional[int]:
-    """Count commits behind origin/main in a local checkout."""
+    # Fetch latest refs (fast — only downloads ref metadata, no files)
    try:
        subprocess.run(
            ["git", "fetch", "origin", "--quiet"],
@@ -162,6 +160,7 @@ def _check_via_local_git(repo_dir: Path) -> Optional[int]:
    except Exception:
        pass  # Offline or timeout — use stale refs, that's fine

+    # Count commits behind
    try:
        result = subprocess.run(
            ["git", "rev-list", "--count", "HEAD..origin/main"],
@@ -169,52 +168,15 @@ def _check_via_local_git(repo_dir: Path) -> Optional[int]:
            cwd=str(repo_dir),
        )
        if result.returncode == 0:
-            return int(result.stdout.strip())
+            behind = int(result.stdout.strip())
+        else:
+            behind = None
    except Exception:
-        pass
-    return None
+        behind = None

-
-def check_for_updates() -> Optional[int]:
-    """Check whether a Hermes update is available.
-
-    Two paths: if ``HERMES_REVISION`` is set (nix builds embed it), compare
-    it to upstream main via ``git ls-remote``. Otherwise look for a local
-    git checkout and count commits behind ``origin/main``.
-
-    Returns the number of commits behind, ``UPDATE_AVAILABLE_NO_COUNT`` (-1)
-    if behind but the count is unknown, ``0`` if up-to-date, or ``None`` if
-    the check failed or doesn't apply. Cached for 6 hours.
-    """
-    hermes_home = get_hermes_home()
-    cache_file = hermes_home / ".update_check"
-    embedded_rev = os.environ.get("HERMES_REVISION") or None
-
-    # Read cache — invalidate if the embedded rev has changed since last check
-    now = time.time()
+    # Write cache
    try:
-        if cache_file.exists():
-            cached = json.loads(cache_file.read_text())
-            if (
-                now - cached.get("ts", 0) < _UPDATE_CHECK_CACHE_SECONDS
-                and cached.get("rev") == embedded_rev
-            ):
-                return cached.get("behind")
-    except Exception:
-        pass
-
-    if embedded_rev:
-        behind = _check_via_rev(embedded_rev)
-    else:
-        repo_dir = hermes_home / "hermes-agent"
-        if not (repo_dir / ".git").exists():
-            repo_dir = Path(__file__).parent.parent.resolve()
-        if not (repo_dir / ".git").exists():
-            return None
-        behind = _check_via_local_git(repo_dir)
-
-    try:
-        cache_file.write_text(json.dumps({"ts": now, "behind": behind, "rev": embedded_rev}))
+        cache_file.write_text(json.dumps({"ts": now, "behind": behind}))
    except Exception:
        pass

@@ -587,29 +549,20 @@ def build_welcome_banner(console: Console, model: str, cwd: str,
    # Update check — use prefetched result if available
    try:
        behind = get_update_result(timeout=0.5)
-        if behind is not None and behind != 0:
-            from hermes_cli.config import get_managed_update_command, recommended_update_command
-            if behind > 0:
-                commits_word = "commit" if behind == 1 else "commits"
-                right_lines.append(
-                    f"[bold yellow]⚠ {behind} {commits_word} behind[/]"
-                    f"[dim yellow] — run [bold]{recommended_update_command()}[/bold] to update[/]"
-                )
-            else:
-                # UPDATE_AVAILABLE_NO_COUNT: nix-built hermes; we know an update
-                # exists but not by how much, and we don't know how the user
-                # installed it (nix run, profile, system flake, home-manager).
-                managed_cmd = get_managed_update_command()
-                line = "[bold yellow]⚠ update available[/]"
-                if managed_cmd:
-                    line += f"[dim yellow] — run [bold]{managed_cmd}[/bold][/]"
-                right_lines.append(line)
+        if behind and behind > 0:
+            from hermes_cli.config import recommended_update_command
+            commits_word = "commit" if behind == 1 else "commits"
+            right_lines.append(
+                f"[bold yellow]⚠ {behind} {commits_word} behind[/]"
+                f"[dim yellow] — run [bold]{recommended_update_command()}[/bold] to update[/]"
+            )
    except Exception:
        pass  # Never break the banner over an update check

    right_content = "\n".join(right_lines)
    layout_table.add_row(left_content, right_content)

+    agent_name = _skin_branding("agent_name", "Hermes Agent")
    title_color = _skin_color("banner_title", "#FFD700")
    border_color = _skin_color("banner_border", "#CD7F32")
    version_label = format_banner_version_label()
@@ -1,138 +0,0 @@
-"""Shared helpers for attaching Hermes to a local Chrome CDP port."""
-
-from __future__ import annotations
-
-import os
-import platform
-import shlex
-import shutil
-import subprocess
-
-from hermes_constants import get_hermes_home
-
-
-DEFAULT_BROWSER_CDP_PORT = 9222
-DEFAULT_BROWSER_CDP_URL = f"http://127.0.0.1:{DEFAULT_BROWSER_CDP_PORT}"
-
-_DARWIN_APPS = (
-    "/Applications/Google Chrome.app/Contents/MacOS/Google Chrome",
-    "/Applications/Chromium.app/Contents/MacOS/Chromium",
-    "/Applications/Brave Browser.app/Contents/MacOS/Brave Browser",
-    "/Applications/Microsoft Edge.app/Contents/MacOS/Microsoft Edge",
-)
-
-_WINDOWS_INSTALL_PARTS = (
-    ("Google", "Chrome", "Application", "chrome.exe"),
-    ("Chromium", "Application", "chrome.exe"),
-    ("Chromium", "Application", "chromium.exe"),
-    ("BraveSoftware", "Brave-Browser", "Application", "brave.exe"),
-    ("Microsoft", "Edge", "Application", "msedge.exe"),
-)
-
-_LINUX_BIN_NAMES = (
-    "google-chrome", "google-chrome-stable", "chromium-browser",
-    "chromium", "brave-browser", "microsoft-edge",
-)
-
-_WINDOWS_BIN_NAMES = (
-    "chrome.exe", "msedge.exe", "brave.exe", "chromium.exe",
-    "chrome", "msedge", "brave", "chromium",
-)
-
-
-def get_chrome_debug_candidates(system: str) -> list[str]:
-    candidates: list[str] = []
-    seen: set[str] = set()
-
-    def add(path: str | None) -> None:
-        if not path:
-            return
-        normalized = os.path.normcase(os.path.normpath(path))
-        if normalized in seen or not os.path.isfile(path):
-            return
-        candidates.append(path)
-        seen.add(normalized)
-
-    def add_install_paths(bases: tuple[str | None, ...]) -> None:
-        for base in filter(None, bases):
-            for parts in _WINDOWS_INSTALL_PARTS:
-                add(os.path.join(base, *parts))
-
-    if system == "Darwin":
-        for app in _DARWIN_APPS:
-            add(app)
-        return candidates
-
-    if system == "Windows":
-        for name in _WINDOWS_BIN_NAMES:
-            add(shutil.which(name))
-        add_install_paths((
-            os.environ.get("ProgramFiles"),
-            os.environ.get("ProgramFiles(x86)"),
-            os.environ.get("LOCALAPPDATA"),
-        ))
-        return candidates
-
-    for name in _LINUX_BIN_NAMES:
-        add(shutil.which(name))
-    add_install_paths(("/mnt/c/Program Files", "/mnt/c/Program Files (x86)"))
-    return candidates
-
-
-def chrome_debug_data_dir() -> str:
-    return str(get_hermes_home() / "chrome-debug")
-
-
-def _chrome_debug_args(port: int) -> list[str]:
-    return [
-        f"--remote-debugging-port={port}",
-        f"--user-data-dir={chrome_debug_data_dir()}",
-        "--no-first-run",
-        "--no-default-browser-check",
-    ]
-
-
-def manual_chrome_debug_command(port: int = DEFAULT_BROWSER_CDP_PORT, system: str | None = None) -> str | None:
-    system = system or platform.system()
-    candidates = get_chrome_debug_candidates(system)
-
-    if candidates:
-        argv = [candidates[0], *_chrome_debug_args(port)]
-        return subprocess.list2cmdline(argv) if system == "Windows" else shlex.join(argv)
-
-    if system == "Darwin":
-        data_dir = chrome_debug_data_dir()
-        return (
-            f'open -a "Google Chrome" --args --remote-debugging-port={port} '
-            f'--user-data-dir="{data_dir}" --no-first-run --no-default-browser-check'
-        )
-
-    return None
-
-
-def _detach_kwargs(system: str) -> dict:
-    if system != "Windows":
-        return {"start_new_session": True}
-    flags = getattr(subprocess, "DETACHED_PROCESS", 0) | getattr(
-        subprocess, "CREATE_NEW_PROCESS_GROUP", 0
-    )
-    return {"creationflags": flags} if flags else {}
-
-
-def try_launch_chrome_debug(port: int = DEFAULT_BROWSER_CDP_PORT, system: str | None = None) -> bool:
-    system = system or platform.system()
-    candidates = get_chrome_debug_candidates(system)
-    if not candidates:
-        return False
-
-    os.makedirs(chrome_debug_data_dir(), exist_ok=True)
-    try:
-        subprocess.Popen(
-            [candidates[0], *_chrome_debug_args(port)],
-            stdout=subprocess.DEVNULL,
-            stderr=subprocess.DEVNULL,
-            **_detach_kwargs(system),
-        )
-        return True
-    except Exception:
-        return False
@@ -4,8 +4,7 @@ Usage:
    hermes claw migrate              # Preview then migrate (always shows preview first)
    hermes claw migrate --dry-run    # Preview only, no changes
    hermes claw migrate --yes        # Skip confirmation prompt
-    hermes claw migrate --preset full --overwrite --migrate-secrets  # Full run w/ secrets
-    hermes claw migrate --no-backup  # Skip pre-migration snapshot
+    hermes claw migrate --preset full --overwrite  # Full migration, overwrite conflicts
    hermes claw cleanup              # Archive leftover OpenClaw directories
    hermes claw cleanup --dry-run    # Preview what would be archived
 """
@@ -16,7 +15,6 @@ import subprocess
 import sys
 from datetime import datetime
 from pathlib import Path
-from typing import Optional

 from hermes_cli.config import get_hermes_home, get_config_path, load_config, save_config
 from hermes_constants import get_optional_skills_dir
@@ -323,13 +321,10 @@ def _cmd_migrate(args):
    migrate_secrets = getattr(args, "migrate_secrets", False)
    workspace_target = getattr(args, "workspace_target", None)
    skill_conflict = getattr(args, "skill_conflict", "skip")
-    no_backup = getattr(args, "no_backup", False)

-    # Secrets are never included implicitly — they must be explicitly requested
-    # via --migrate-secrets, even under --preset full.  This mirrors OpenClaw's
-    # migrate-hermes posture (two-phase: run once without secrets, rerun with
-    # --include-secrets) and prevents a --preset full invocation from silently
-    # importing API keys that the user may not have intended to copy.
+    # If using the "full" preset, secrets are included by default
+    if preset == "full":
+        migrate_secrets = True

    print()
    print(
@@ -436,24 +431,15 @@ def _cmd_migrate(args):

    preview_summary = preview_report.get("summary", {})
    preview_count = preview_summary.get("migrated", 0)
-    preview_conflicts = preview_summary.get("conflict", 0)

-    # "Nothing to migrate" means nothing migrated AND nothing blocked by
-    # conflicts.  If there are conflicts, we still want to show the plan and
-    # surface the refusal/--overwrite guidance instead of silently bailing.
-    if preview_count == 0 and preview_conflicts == 0:
+    if preview_count == 0:
        print()
        print_info("Nothing to migrate from OpenClaw.")
        _print_migration_report(preview_report, dry_run=True)
        return

    print()
-    if preview_count > 0:
-        print_header(f"Migration Preview — {preview_count} item(s) would be imported")
-    else:
-        print_header(
-            f"Migration Preview — {preview_conflicts} conflict(s), nothing would be imported"
-        )
+    print_header(f"Migration Preview — {preview_count} item(s) would be imported")
    print_info("No changes have been made yet. Review the list below:")
    _print_migration_report(preview_report, dry_run=True)

@@ -461,24 +447,6 @@ def _cmd_migrate(args):
    if dry_run:
        return

-    # ── Phase 1b: Refuse if the plan has conflicts and --overwrite is not set ─
-    # Modelled on OpenClaw's assertConflictFreePlan() — apply is a safe no-op
-    # on conflicts unless the user explicitly opts in to overwriting.  Without
-    # this guard, the user would answer "yes, proceed" and silently end up
-    # with a migration that skipped every conflicting item.
-    if preview_conflicts > 0 and not overwrite:
-        print()
-        print_error(
-            f"Plan has {preview_conflicts} conflict(s). Refusing to apply."
-        )
-        print_info(
-            "Each conflict is an item whose target already exists in ~/.hermes/. "
-            "Re-run with --overwrite to replace conflicting targets (item-level "
-            "backups are written to the migration report directory)."
-        )
-        print_info("Or re-run with --dry-run to review the full plan.")
-        return
-
    # ── Phase 2: Confirm and execute ───────────────────────────
    print()
    if not auto_yes:
@@ -490,32 +458,6 @@ def _cmd_migrate(args):
            print_info("Migration cancelled.")
            return

-    # ── Phase 2b: Pre-apply backup of the Hermes home ─────────
-    # Delegates to hermes_cli.backup.create_pre_migration_backup(), which
-    # shares implementation with the pre-update backup (same exclusion
-    # rules, same SQLite safe-copy, zip format) so the archive is
-    # restorable with `hermes import`.  Mirrors OpenClaw's
-    # createPreMigrationBackup posture — one atomic restore point before
-    # any mutation, auto-pruned to the last 5 pre-migration zips.
-    backup_archive: Optional[Path] = None
-    if not no_backup:
-        try:
-            from hermes_cli.backup import create_pre_migration_backup, _format_size
-            backup_archive = create_pre_migration_backup(hermes_home=hermes_home)
-            if backup_archive:
-                size_str = _format_size(backup_archive.stat().st_size)
-                print()
-                print_success(f"Pre-migration backup: {backup_archive} ({size_str})")
-                print_info(f"Restore with: hermes import {backup_archive.name}")
-        except Exception as e:
-            print()
-            print_error(f"Could not create pre-migration backup: {e}")
-            print_info(
-                "Re-run with --no-backup to skip, or free up disk space under the Hermes home."
-            )
-            logger.debug("Pre-migration backup error", exc_info=True)
-            return
-
    try:
        migrator = mod.Migrator(
            source_root=source_dir.resolve(),
@@ -534,9 +476,6 @@ def _cmd_migrate(args):
        print()
        print_error(f"Migration failed: {e}")
        logger.debug("OpenClaw migration error", exc_info=True)
-        if backup_archive:
-            print_info(f"A pre-migration backup is available at: {backup_archive}")
-            print_info(f"Restore with: hermes import {backup_archive.name}")
        return

    # Print results
@@ -19,8 +19,6 @@ from collections.abc import Callable, Mapping
 from dataclasses import dataclass
 from typing import Any

-from utils import is_truthy_value
-
 # prompt_toolkit is an optional CLI dependency — only needed for
 # SlashCommandCompleter and SlashCommandAutoSuggest.  Gateway and test
 # environments that lack it must still be able to import this module
@@ -68,7 +66,6 @@ COMMAND_REGISTRY: list[CommandDef] = [
               cli_only=True),
    CommandDef("history", "Show conversation history", "Session",
               cli_only=True),
-    CommandDef("recap", "Summarize recent activity in this session", "Session"),
    CommandDef("save", "Save the current conversation", "Session",
               cli_only=True),
    CommandDef("retry", "Retry the last message (resend to agent)", "Session"),
@@ -96,8 +93,6 @@ COMMAND_REGISTRY: list[CommandDef] = [
               aliases=("q",), args_hint="<prompt>"),
    CommandDef("steer", "Inject a message after the next tool call without interrupting", "Session",
               args_hint="<prompt>"),
-    CommandDef("goal", "Set a standing goal Hermes works on across turns until achieved", "Session",
-               args_hint="[text | pause | resume | clear | status]"),
    CommandDef("status", "Show session info", "Session"),
    CommandDef("profile", "Show active profile name and home directory", "Info"),
    CommandDef("sethome", "Set this chat as the home channel", "Session",
@@ -120,9 +115,6 @@ COMMAND_REGISTRY: list[CommandDef] = [
    CommandDef("verbose", "Cycle tool progress display: off -> new -> all -> verbose",
               "Configuration", cli_only=True,
               gateway_config_gate="display.tool_progress_command"),
-    CommandDef("footer", "Toggle gateway runtime-metadata footer on final replies",
-               "Configuration", args_hint="[on|off|status]",
-               subcommands=("on", "off", "status")),
    CommandDef("yolo", "Toggle YOLO mode (skip all dangerous command approvals)",
               "Configuration"),
    CommandDef("reasoning", "Manage reasoning effort and display", "Configuration",
@@ -133,9 +125,6 @@ COMMAND_REGISTRY: list[CommandDef] = [
               subcommands=("normal", "fast", "status", "on", "off")),
    CommandDef("skin", "Show or change the display skin/theme", "Configuration",
               cli_only=True, args_hint="[name]"),
-    CommandDef("indicator", "Pick the TUI busy-indicator style", "Configuration",
-               cli_only=True, args_hint="[kaomoji|emoji|unicode|ascii]",
-               subcommands=("kaomoji", "emoji", "unicode", "ascii")),
    CommandDef("voice", "Toggle voice mode", "Configuration",
               args_hint="[on|off|tts|status]", subcommands=("on", "off", "tts", "status")),
    CommandDef("busy", "Control what Enter does while Hermes is working", "Configuration",
@@ -153,20 +142,10 @@ COMMAND_REGISTRY: list[CommandDef] = [
    CommandDef("cron", "Manage scheduled tasks", "Tools & Skills",
               cli_only=True, args_hint="[subcommand]",
               subcommands=("list", "add", "create", "edit", "pause", "resume", "run", "remove")),
-    CommandDef("curator", "Background skill maintenance (status, run, pin, archive)",
-               "Tools & Skills", args_hint="[subcommand]",
-               subcommands=("status", "run", "pause", "resume", "pin", "unpin", "restore")),
-    CommandDef("kanban", "Multi-profile collaboration board (tasks, links, comments)",
-               "Tools & Skills", args_hint="[subcommand]",
-               subcommands=("list", "ls", "show", "create", "assign", "link", "unlink",
-                            "claim", "comment", "complete", "block", "unblock", "archive",
-                            "tail", "dispatch", "context", "init", "gc")),
    CommandDef("reload", "Reload .env variables into the running session", "Tools & Skills",
               cli_only=True),
    CommandDef("reload-mcp", "Reload MCP servers from config", "Tools & Skills",
               aliases=("reload_mcp",)),
-    CommandDef("reload-skills", "Re-scan ~/.hermes/skills/ for newly installed or removed skills",
-               "Tools & Skills", aliases=("reload_skills",)),
    CommandDef("browser", "Connect browser tools to your live Chrome via CDP", "Tools & Skills",
               cli_only=True, args_hint="[connect|disconnect|status]",
               subcommands=("connect", "disconnect", "status")),
@@ -320,7 +299,6 @@ ACTIVE_SESSION_BYPASS_COMMANDS: frozenset[str] = frozenset(
        "new",
        "profile",
        "queue",
-        "recap",
        "restart",
        "status",
        "steer",
@@ -377,7 +355,7 @@ def _resolve_config_gates() -> set[str]:
            else:
                val = None
                break
-        if is_truthy_value(val, default=False):
+        if val:
            result.add(cmd.name)
    return result

@@ -840,13 +818,6 @@ def discord_skill_commands_by_category(
 _SLACK_MAX_SLASH_COMMANDS = 50
 _SLACK_NAME_LIMIT = 32
 _SLACK_INVALID_CHARS = re.compile(r"[^a-z0-9_\-]")
-_SLACK_RESERVED_COMMANDS = frozenset({
-    # Built-in Slack slash commands that cannot be registered by apps.
-    # https://slack.com/help/articles/201259356-Use-built-in-slash-commands
-    "me", "status", "away", "dnd", "shrug", "remind", "msg", "feed",
-    "who", "collapse", "expand", "leave", "join", "open", "search",
-    "topic", "mute", "pro", "shortcuts",
-})


 def _sanitize_slack_name(raw: str) -> str:
@@ -873,10 +844,6 @@ def slack_native_slashes() -> list[tuple[str, str, str]]:
    documented form (e.g. ``/background``, ``/bg``, and ``/btw`` all work).
    Plugin-registered slash commands are included too.

-    Commands whose sanitized name collides with a Slack built-in
-    (e.g. ``/status``, ``/me``, ``/join``) are silently skipped.  Users
-    can still reach them via ``/hermes <command>``.
-
    Results are clamped to Slack's 50-command limit with duplicate-name
    avoidance. ``/hermes`` is always reserved as the first entry so the
    legacy ``/hermes <subcommand>`` form keeps working for anything that
@@ -894,8 +861,6 @@ def slack_native_slashes() -> list[tuple[str, str, str]]:
        slack_name = _sanitize_slack_name(name)
        if not slack_name or slack_name in seen:
            return
-        if slack_name in _SLACK_RESERVED_COMMANDS:
-            return
        if len(entries) >= _SLACK_MAX_SLASH_COMMANDS:
            return
        # Slack description cap is 2000 chars; keep it short.
@@ -978,42 +943,6 @@ def slack_subcommand_map() -> dict[str, str]:
 # Autocomplete
 # ---------------------------------------------------------------------------

-
-# Per-process cache for /model<space> LM Studio autocomplete. Probing on
-# every keystroke would block the UI; a short TTL keeps it live without
-# hammering the server.
-_LMSTUDIO_COMPLETION_CACHE: tuple[float, list[str]] | None = None
-
-
-def _lmstudio_completion_models() -> list[str]:
-    """Locally-loaded LM Studio models for /model autocomplete (cached, gated)."""
-    global _LMSTUDIO_COMPLETION_CACHE
-    # Gate: don't probe 127.0.0.1 on every keystroke for users who don't use LM Studio.
-    if not (os.environ.get("LM_API_KEY") or os.environ.get("LM_BASE_URL")):
-        try:
-            from hermes_cli.auth import _load_auth_store
-            store = _load_auth_store() or {}
-            if "lmstudio" not in (store.get("providers") or {}) \
-               and "lmstudio" not in (store.get("credential_pool") or {}):
-                return []
-        except Exception:
-            return []
-    now = time.time()
-    if _LMSTUDIO_COMPLETION_CACHE and (now - _LMSTUDIO_COMPLETION_CACHE[0]) < 30.0:
-        return _LMSTUDIO_COMPLETION_CACHE[1]
-    try:
-        from hermes_cli.models import fetch_lmstudio_models
-        models = fetch_lmstudio_models(
-            api_key=os.environ.get("LM_API_KEY", ""),
-            base_url=os.environ.get("LM_BASE_URL") or "http://127.0.0.1:1234/v1",
-            timeout=0.8,
-        )
-    except Exception:
-        models = []
-    _LMSTUDIO_COMPLETION_CACHE = (now, models)
-    return models
-
-
 class SlashCommandCompleter(Completer):
    """Autocomplete for built-in slash commands, subcommands, and skill commands."""

@@ -1437,19 +1366,6 @@ class SlashCommandCompleter(Completer):
                    )
        except Exception:
            pass
-        # LM Studio: surface locally-loaded models. Gated on the user actually
-        # having LM Studio configured (env var or auth-store entry) so we
-        # don't probe 127.0.0.1 on every keystroke for users who don't use it.
-        for name in _lmstudio_completion_models():
-            if name in seen:
-                continue
-            if name.startswith(sub_lower) and name != sub_lower:
-                yield Completion(
-                    name,
-                    start_position=-len(sub_text),
-                    display=name,
-                    display_meta="LM Studio",
-                )

    def get_completions(self, document, complete_event):
        text = document.text_before_cursor
--- a/Show More
+++ b/Show More