feat: add fast-path setup for nous account

adds a nous account specific fast flow & autolaunches into chat if gateway isn't set up
change: always run setup on no-config run
2026-04-24 00:07:23 -04:00 · 2026-04-24 00:06:48 -04:00 · 2026-04-23 19:40:43 -05:00 · 2026-04-23 19:38:33 -05:00 · 2026-04-23 19:35:18 -05:00 · 2026-04-23 19:32:21 -05:00
667 changed files with 76448 additions and 14799 deletions
@@ -14,3 +14,6 @@ node_modules
 .env

 *.md
+
+# Runtime data (bind-mounted at /opt/data; must not leak into build context)
+data/
@@ -0,0 +1,8 @@
+name: 'Setup Nix'
+description: 'Install Nix with DeterminateSystems and enable magic-nix-cache'
+
+runs:
+  using: composite
+  steps:
+    - uses: DeterminateSystems/nix-installer-action@ef8a148080ab6020fd15196c2084a2eea5ff2d25 # v22
+    - uses: DeterminateSystems/magic-nix-cache-action@565684385bcd71bad329742eefe8d12f2e765b39 # v13
@@ -3,8 +3,13 @@ name: Docker Build and Publish
 on:
  push:
    branches: [main]
-  pull_request:
-    branches: [main]
+    paths:
+      - '**/*.py'
+      - 'pyproject.toml'
+      - 'uv.lock'
+      - 'Dockerfile'
+      - 'docker/**'
+      - '.github/workflows/docker-publish.yml'
  release:
    types: [published]

@@ -49,6 +54,14 @@ jobs:

      - name: Test image starts
        run: |
+          # The image runs as the hermes user (UID 10000).  GitHub Actions
+          # creates /tmp/hermes-test root-owned by default, which hermes
+          # can't write to — chown it to match the in-container UID before
+          # bind-mounting.  Real users doing `docker run -v ~/.hermes:...`
+          # with their own UID hit the same issue and have their own
+          # remediations (HERMES_UID env var, or chown locally).
+          mkdir -p /tmp/hermes-test
+          sudo chown -R 10000:10000 /tmp/hermes-test
          docker run --rm \
            -v /tmp/hermes-test:/opt/data \
            --entrypoint /opt/hermes/docker/entrypoint.sh \
@@ -0,0 +1,68 @@
+name: Nix Lockfile Check
+
+on:
+  pull_request:
+  workflow_dispatch:
+
+permissions:
+  contents: read
+  pull-requests: write
+
+concurrency:
+  group: nix-lockfile-check-${{ github.ref }}
+  cancel-in-progress: true
+
+jobs:
+  check:
+    runs-on: ubuntu-latest
+    timeout-minutes: 20
+    steps:
+      - uses: actions/checkout@34e114876b0b11c390a56381ad16ebd13914f8d5  # v4
+
+      - uses: ./.github/actions/nix-setup
+
+      - name: Resolve head SHA
+        id: sha
+        shell: bash
+        run: |
+          FULL="${{ github.event.pull_request.head.sha || github.sha }}"
+          echo "full=$FULL" >> "$GITHUB_OUTPUT"
+          echo "short=${FULL:0:7}" >> "$GITHUB_OUTPUT"
+
+      - name: Check lockfile hashes
+        id: check
+        continue-on-error: true
+        env:
+          LINK_SHA: ${{ steps.sha.outputs.full }}
+        run: nix run .#fix-lockfiles -- --check
+
+      - name: Post sticky PR comment (stale)
+        if: steps.check.outputs.stale == 'true' && github.event_name == 'pull_request'
+        uses: marocchino/sticky-pull-request-comment@52423e01640425a022ef5fd42c6fb5f633a02728  # v2.9.1
+        with:
+          header: nix-lockfile-check
+          message: |
+            ### ⚠️ npm lockfile hash out of date
+
+            Checked against commit [`${{ steps.sha.outputs.short }}`](${{ github.server_url }}/${{ github.repository }}/commit/${{ steps.sha.outputs.full }}) (PR head at check time).
+
+            The `hash = "sha256-..."` line in these nix files no longer matches the committed `package-lock.json`:
+
+            ${{ steps.check.outputs.report }}
+
+            #### Apply the fix
+
+            - [ ] **Apply lockfile fix** — tick to push a commit with the correct hashes to this PR branch
+            - Or [run the Nix Lockfile Fix workflow](${{ github.server_url }}/${{ github.repository }}/actions/workflows/nix-lockfile-fix.yml) manually (pass PR `#${{ github.event.pull_request.number }}`)
+            - Or locally: `nix run .#fix-lockfiles -- --apply` and commit the diff
+
+      - name: Clear sticky PR comment (resolved)
+        if: steps.check.outputs.stale == 'false' && github.event_name == 'pull_request'
+        uses: marocchino/sticky-pull-request-comment@52423e01640425a022ef5fd42c6fb5f633a02728  # v2.9.1
+        with:
+          header: nix-lockfile-check
+          delete: true
+
+      - name: Fail if stale
+        if: steps.check.outputs.stale == 'true'
+        run: exit 1
@@ -0,0 +1,149 @@
+name: Nix Lockfile Fix
+
+on:
+  workflow_dispatch:
+    inputs:
+      pr_number:
+        description: 'PR number to fix (leave empty to run on the selected branch)'
+        required: false
+        type: string
+  issue_comment:
+    types: [edited]
+
+permissions:
+  contents: write
+  pull-requests: write
+
+concurrency:
+  group: nix-lockfile-fix-${{ github.event.issue.number || github.event.inputs.pr_number || github.ref }}
+  cancel-in-progress: false
+
+jobs:
+  fix:
+    # Run on manual dispatch OR when a task-list checkbox in the sticky
+    # lockfile-check comment flips from `[ ]` to `[x]`.
+    if: |
+      github.event_name == 'workflow_dispatch' ||
+      (github.event_name == 'issue_comment'
+       && github.event.issue.pull_request != null
+       && contains(github.event.comment.body, '[x] **Apply lockfile fix**')
+       && !contains(github.event.changes.body.from, '[x] **Apply lockfile fix**'))
+    runs-on: ubuntu-latest
+    timeout-minutes: 25
+    steps:
+      - name: Authorize & resolve PR
+        id: resolve
+        uses: actions/github-script@60a0d83039c74a4aee543508d2ffcb1c3799cdea  # v7.0.1
+        with:
+          script: |
+            // 1. Verify the actor has write access — applies to both checkbox
+            //    clicks and manual dispatch.
+            const { data: perm } =
+              await github.rest.repos.getCollaboratorPermissionLevel({
+                owner: context.repo.owner,
+                repo: context.repo.repo,
+                username: context.actor,
+              });
+            if (!['admin', 'write', 'maintain'].includes(perm.permission)) {
+              core.setFailed(
+                `${context.actor} lacks write access (has: ${perm.permission})`
+              );
+              return;
+            }
+
+            // 2. Resolve which ref to check out.
+            let prNumber = '';
+            if (context.eventName === 'issue_comment') {
+              prNumber = String(context.payload.issue.number);
+            } else if (context.eventName === 'workflow_dispatch') {
+              prNumber = context.payload.inputs.pr_number || '';
+            }
+
+            if (!prNumber) {
+              core.setOutput('ref', context.ref.replace(/^refs\/heads\//, ''));
+              core.setOutput('repo', context.repo.repo);
+              core.setOutput('owner', context.repo.owner);
+              core.setOutput('pr', '');
+              return;
+            }
+
+            const { data: pr } = await github.rest.pulls.get({
+              owner: context.repo.owner,
+              repo: context.repo.repo,
+              pull_number: Number(prNumber),
+            });
+            core.setOutput('ref', pr.head.ref);
+            core.setOutput('repo', pr.head.repo.name);
+            core.setOutput('owner', pr.head.repo.owner.login);
+            core.setOutput('pr', String(pr.number));
+
+      # Wipe the sticky lockfile-check comment to a "running" state as soon
+      # as the job is authorized, so the user sees their click was picked up
+      # before the ~minute of nix build work.
+      - name: Mark sticky as running
+        if: steps.resolve.outputs.pr != ''
+        uses: marocchino/sticky-pull-request-comment@52423e01640425a022ef5fd42c6fb5f633a02728  # v2.9.1
+        with:
+          header: nix-lockfile-check
+          number: ${{ steps.resolve.outputs.pr }}
+          message: |
+            ### 🔄 Applying lockfile fix…
+
+            Triggered by @${{ github.actor }} — [workflow run](${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }}).
+
+      - uses: actions/checkout@34e114876b0b11c390a56381ad16ebd13914f8d5  # v4
+        with:
+          repository: ${{ steps.resolve.outputs.owner }}/${{ steps.resolve.outputs.repo }}
+          ref: ${{ steps.resolve.outputs.ref }}
+          token: ${{ secrets.GITHUB_TOKEN }}
+          fetch-depth: 0
+
+      - uses: ./.github/actions/nix-setup
+
+      - name: Apply lockfile hashes
+        id: apply
+        run: nix run .#fix-lockfiles -- --apply
+
+      - name: Commit & push
+        if: steps.apply.outputs.changed == 'true'
+        shell: bash
+        run: |
+          set -euo pipefail
+          git config user.name 'github-actions[bot]'
+          git config user.email '41898282+github-actions[bot]@users.noreply.github.com'
+          git add nix/tui.nix nix/web.nix
+          git commit -m "fix(nix): refresh npm lockfile hashes"
+          git push
+
+      - name: Update sticky (applied)
+        if: steps.apply.outputs.changed == 'true' && steps.resolve.outputs.pr != ''
+        uses: marocchino/sticky-pull-request-comment@52423e01640425a022ef5fd42c6fb5f633a02728  # v2.9.1
+        with:
+          header: nix-lockfile-check
+          number: ${{ steps.resolve.outputs.pr }}
+          message: |
+            ### ✅ Lockfile fix applied
+
+            Pushed a commit refreshing the npm lockfile hashes — [workflow run](${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }}).
+
+      - name: Update sticky (already current)
+        if: steps.apply.outputs.changed == 'false' && steps.resolve.outputs.pr != ''
+        uses: marocchino/sticky-pull-request-comment@52423e01640425a022ef5fd42c6fb5f633a02728  # v2.9.1
+        with:
+          header: nix-lockfile-check
+          number: ${{ steps.resolve.outputs.pr }}
+          message: |
+            ### ✅ Lockfile hashes already current
+
+            Nothing to commit — [workflow run](${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }}).
+
+      - name: Update sticky (failed)
+        if: failure() && steps.resolve.outputs.pr != ''
+        uses: marocchino/sticky-pull-request-comment@52423e01640425a022ef5fd42c6fb5f633a02728  # v2.9.1
+        with:
+          header: nix-lockfile-check
+          number: ${{ steps.resolve.outputs.pr }}
+          message: |
+            ### ❌ Lockfile fix failed
+
+            See the [workflow run](${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }}) for logs.
@@ -4,15 +4,6 @@ on:
  push:
    branches: [main]
  pull_request:
-    paths:
-      - 'flake.nix'
-      - 'flake.lock'
-      - 'nix/**'
-      - 'pyproject.toml'
-      - 'uv.lock'
-      - 'hermes_cli/**'
-      - 'run_agent.py'
-      - 'acp_adapter/**'

 permissions:
  contents: read
@@ -29,9 +20,8 @@ jobs:
    runs-on: ${{ matrix.os }}
    timeout-minutes: 30
    steps:
-      - uses: actions/checkout@34e114876b0b11c390a56381ad16ebd13914f8d5  # v4
-      - uses: DeterminateSystems/nix-installer-action@ef8a148080ab6020fd15196c2084a2eea5ff2d25  # v22
-      - uses: DeterminateSystems/magic-nix-cache-action@565684385bcd71bad329742eefe8d12f2e765b39  # v13
+      - uses: actions/checkout@34e114876b0b11c390a56381ad16ebd13914f8d5 # v4
+      - uses: ./.github/actions/nix-setup
      - name: Check flake
        if: runner.os == 'Linux'
        run: nix flake check --print-build-logs
@@ -3,14 +3,31 @@ name: Supply Chain Audit
 on:
  pull_request:
    types: [opened, synchronize, reopened]
+    paths:
+      - '**/*.py'
+      - '**/*.pth'
+      - '**/setup.py'
+      - '**/setup.cfg'
+      - '**/sitecustomize.py'
+      - '**/usercustomize.py'
+      - '**/__init__.pth'

 permissions:
  pull-requests: write
  contents: read

+# Narrow, high-signal scanner. Only fires on critical indicators of supply
+# chain attacks (e.g. the litellm-style payloads). Low-signal heuristics
+# (plain base64, plain exec/eval, dependency/Dockerfile/workflow edits,
+# Actions version unpinning, outbound POST/PUT) were intentionally
+# removed — they fired on nearly every PR and trained reviewers to ignore
+# the scanner. Keep this file's checks ruthlessly narrow: if you find
+# yourself adding WARNING-tier patterns here again, make a separate
+# advisory-only workflow instead.
+
 jobs:
  scan:
-    name: Scan PR for supply chain risks
+    name: Scan PR for critical supply chain risks
    runs-on: ubuntu-latest
    steps:
      - name: Checkout
@@ -18,7 +35,7 @@ jobs:
        with:
          fetch-depth: 0

-      - name: Scan diff for suspicious patterns
+      - name: Scan diff for critical patterns
        id: scan
        env:
          GH_TOKEN: ${{ secrets.GITHUB_TOKEN }}
@@ -28,19 +45,19 @@ jobs:
          BASE="${{ github.event.pull_request.base.sha }}"
          HEAD="${{ github.event.pull_request.head.sha }}"

-          # Get the full diff (added lines only)
+          # Added lines only, excluding lockfiles.
          DIFF=$(git diff "$BASE".."$HEAD" -- . ':!uv.lock' ':!*.lock' ':!package-lock.json' ':!yarn.lock' || true)

          FINDINGS=""
-          CRITICAL=false

          # --- .pth files (auto-execute on Python startup) ---
+          # The exact mechanism used in the litellm supply chain attack:
+          # https://github.com/BerriAI/litellm/issues/24512
          PTH_FILES=$(git diff --name-only "$BASE".."$HEAD" | grep '\.pth$' || true)
          if [ -n "$PTH_FILES" ]; then
-            CRITICAL=true
            FINDINGS="${FINDINGS}
          ### 🚨 CRITICAL: .pth file added or modified
-          Python \`.pth\` files in \`site-packages/\` execute automatically when the interpreter starts — no import required. This is the exact mechanism used in the [litellm supply chain attack](https://github.com/BerriAI/litellm/issues/24512).
+          Python \`.pth\` files in \`site-packages/\` execute automatically when the interpreter starts — no import required.

          **Files:**
          \`\`\`
@@ -49,13 +66,12 @@ jobs:
          "
          fi

-          # --- base64 + exec/eval combo (the litellm attack pattern) ---
+          # --- base64 decode + exec/eval on the same line (the litellm attack pattern) ---
          B64_EXEC_HITS=$(echo "$DIFF" | grep -n '^\+' | grep -iE 'base64\.(b64decode|decodebytes|urlsafe_b64decode)' | grep -iE 'exec\(|eval\(' | head -10 || true)
          if [ -n "$B64_EXEC_HITS" ]; then
-            CRITICAL=true
            FINDINGS="${FINDINGS}
          ### 🚨 CRITICAL: base64 decode + exec/eval combo
-          This is the exact pattern used in the [litellm supply chain attack](https://github.com/BerriAI/litellm/issues/24512) — base64-decoded strings passed to exec/eval to hide credential-stealing payloads.
+          Base64-decoded strings passed directly to exec/eval — the signature of hidden credential-stealing payloads.

          **Matches:**
          \`\`\`
@@ -64,41 +80,12 @@ jobs:
          "
          fi

-          # --- base64 decode/encode (alone — legitimate uses exist) ---
-          B64_HITS=$(echo "$DIFF" | grep -n '^\+' | grep -iE 'base64\.(b64decode|b64encode|decodebytes|encodebytes|urlsafe_b64decode)|atob\(|btoa\(|Buffer\.from\(.*base64' | head -20 || true)
-          if [ -n "$B64_HITS" ]; then
-            FINDINGS="${FINDINGS}
-          ### ⚠️ WARNING: base64 encoding/decoding detected
-          Base64 has legitimate uses (images, JWT, etc.) but is also commonly used to obfuscate malicious payloads. Verify the usage is appropriate.
-
-          **Matches (first 20):**
-          \`\`\`
-          ${B64_HITS}
-          \`\`\`
-          "
-          fi
-
-          # --- exec/eval with string arguments ---
-          EXEC_HITS=$(echo "$DIFF" | grep -n '^\+' | grep -E '(exec|eval)\s*\(' | grep -v '^\+\s*#' | grep -v 'test_\|mock\|assert\|# ' | head -20 || true)
-          if [ -n "$EXEC_HITS" ]; then
-            FINDINGS="${FINDINGS}
-          ### ⚠️ WARNING: exec() or eval() usage
-          Dynamic code execution can hide malicious behavior, especially when combined with base64 or network fetches.
-
-          **Matches (first 20):**
-          \`\`\`
-          ${EXEC_HITS}
-          \`\`\`
-          "
-          fi
-
-          # --- subprocess with encoded/obfuscated commands ---
-          PROC_HITS=$(echo "$DIFF" | grep -n '^\+' | grep -E 'subprocess\.(Popen|call|run)\s*\(' | grep -iE 'base64|decode|encode|\\x|chr\(' | head -10 || true)
+          # --- subprocess with encoded/obfuscated command argument ---
+          PROC_HITS=$(echo "$DIFF" | grep -n '^\+' | grep -E 'subprocess\.(Popen|call|run)\s*\(' | grep -iE 'base64|\\x[0-9a-f]{2}|chr\(' | head -10 || true)
          if [ -n "$PROC_HITS" ]; then
-            CRITICAL=true
            FINDINGS="${FINDINGS}
          ### 🚨 CRITICAL: subprocess with encoded/obfuscated command
-          Subprocess calls with encoded arguments are a strong indicator of payload execution.
+          Subprocess calls whose command strings are base64- or hex-encoded are a strong indicator of payload execution.

          **Matches:**
          \`\`\`
@@ -107,25 +94,12 @@ jobs:
          "
          fi

-          # --- Network calls to non-standard domains ---
-          EXFIL_HITS=$(echo "$DIFF" | grep -n '^\+' | grep -iE 'requests\.(post|put)\(|httpx\.(post|put)\(|urllib\.request\.urlopen' | grep -v '^\+\s*#' | grep -v 'test_\|mock\|assert' | head -10 || true)
-          if [ -n "$EXFIL_HITS" ]; then
-            FINDINGS="${FINDINGS}
-          ### ⚠️ WARNING: Outbound network calls (POST/PUT)
-          Outbound POST/PUT requests in new code could be data exfiltration. Verify the destination URLs are legitimate.
-
-          **Matches (first 10):**
-          \`\`\`
-          ${EXFIL_HITS}
-          \`\`\`
-          "
-          fi
-
-          # --- setup.py / setup.cfg install hooks ---
-          SETUP_HITS=$(git diff --name-only "$BASE".."$HEAD" | grep -E '(setup\.py|setup\.cfg|__init__\.pth|sitecustomize\.py|usercustomize\.py)$' || true)
+          # --- Install-hook files (setup.py/sitecustomize/usercustomize/__init__.pth) ---
+          # These execute during pip install or interpreter startup.
+          SETUP_HITS=$(git diff --name-only "$BASE".."$HEAD" | grep -E '(^|/)(setup\.py|setup\.cfg|sitecustomize\.py|usercustomize\.py|__init__\.pth)$' || true)
          if [ -n "$SETUP_HITS" ]; then
            FINDINGS="${FINDINGS}
-          ### ⚠️ WARNING: Install hook files modified
+          ### 🚨 CRITICAL: Install-hook file added or modified
          These files can execute code during package installation or interpreter startup.

          **Files:**
@@ -135,114 +109,31 @@ jobs:
          "
          fi

-          # --- Compile/marshal/pickle (code object injection) ---
-          MARSHAL_HITS=$(echo "$DIFF" | grep -n '^\+' | grep -iE 'marshal\.loads|pickle\.loads|compile\(' | grep -v '^\+\s*#' | grep -v 'test_\|re\.compile\|ast\.compile' | head -10 || true)
-          if [ -n "$MARSHAL_HITS" ]; then
-            FINDINGS="${FINDINGS}
-          ### ⚠️ WARNING: marshal/pickle/compile usage
-          These can deserialize or construct executable code objects.
-
-          **Matches:**
-          \`\`\`
-          ${MARSHAL_HITS}
-          \`\`\`
-          "
-          fi
-
-          # --- CI/CD workflow files modified ---
-          WORKFLOW_HITS=$(git diff --name-only "$BASE".."$HEAD" | grep -E '\.github/workflows/.*\.ya?ml$' || true)
-          if [ -n "$WORKFLOW_HITS" ]; then
-            FINDINGS="${FINDINGS}
-          ### ⚠️ WARNING: CI/CD workflow files modified
-          Changes to workflow files can alter build pipelines, inject steps, or modify permissions. Verify no unauthorized actions or secrets access were added.
-
-          **Files:**
-          \`\`\`
-          ${WORKFLOW_HITS}
-          \`\`\`
-          "
-          fi
-
-          # --- Dockerfile / container build files modified ---
-          DOCKER_HITS=$(git diff --name-only "$BASE".."$HEAD" | grep -iE '(Dockerfile|\.dockerignore|docker-compose)' || true)
-          if [ -n "$DOCKER_HITS" ]; then
-            FINDINGS="${FINDINGS}
-          ### ⚠️ WARNING: Container build files modified
-          Changes to Dockerfiles or compose files can alter base images, add build steps, or expose ports. Verify base image pins and build commands.
-
-          **Files:**
-          \`\`\`
-          ${DOCKER_HITS}
-          \`\`\`
-          "
-          fi
-
-          # --- Dependency manifest files modified ---
-          DEP_HITS=$(git diff --name-only "$BASE".."$HEAD" | grep -E '(pyproject\.toml|requirements.*\.txt|package\.json|Gemfile|go\.mod|Cargo\.toml)$' || true)
-          if [ -n "$DEP_HITS" ]; then
-            FINDINGS="${FINDINGS}
-          ### ⚠️ WARNING: Dependency manifest files modified
-          Changes to dependency files can introduce new packages or change version pins. Verify all dependency changes are intentional and from trusted sources.
-
-          **Files:**
-          \`\`\`
-          ${DEP_HITS}
-          \`\`\`
-          "
-          fi
-
-          # --- GitHub Actions version unpinning (mutable tags instead of SHAs) ---
-          ACTIONS_UNPIN=$(echo "$DIFF" | grep -n '^\+' | grep 'uses:' | grep -v '#' | grep -E '@v[0-9]' | head -10 || true)
-          if [ -n "$ACTIONS_UNPIN" ]; then
-            FINDINGS="${FINDINGS}
-          ### ⚠️ WARNING: GitHub Actions with mutable version tags
-          Actions should be pinned to full commit SHAs (not \`@v4\`, \`@v5\`). Mutable tags can be retargeted silently if a maintainer account is compromised.
-
-          **Matches:**
-          \`\`\`
-          ${ACTIONS_UNPIN}
-          \`\`\`
-          "
-          fi
-
-          # --- Output results ---
          if [ -n "$FINDINGS" ]; then
            echo "found=true" >> "$GITHUB_OUTPUT"
-            if [ "$CRITICAL" = true ]; then
-              echo "critical=true" >> "$GITHUB_OUTPUT"
-            else
-              echo "critical=false" >> "$GITHUB_OUTPUT"
-            fi
-            # Write findings to a file (multiline env vars are fragile)
            echo "$FINDINGS" > /tmp/findings.md
          else
            echo "found=false" >> "$GITHUB_OUTPUT"
-            echo "critical=false" >> "$GITHUB_OUTPUT"
          fi

-      - name: Post warning comment
+      - name: Post critical finding comment
        if: steps.scan.outputs.found == 'true'
        env:
          GH_TOKEN: ${{ secrets.GITHUB_TOKEN }}
        run: |
-          SEVERITY="⚠️ Supply Chain Risk Detected"
-          if [ "${{ steps.scan.outputs.critical }}" = "true" ]; then
-            SEVERITY="🚨 CRITICAL Supply Chain Risk Detected"
-          fi
+          BODY="## 🚨 CRITICAL Supply Chain Risk Detected

-          BODY="## ${SEVERITY}
-
-          This PR contains patterns commonly associated with supply chain attacks. This does **not** mean the PR is malicious — but these patterns require careful human review before merging.
+          This PR contains a pattern that has been used in real supply chain attacks. A maintainer must review the flagged code carefully before merging.

          $(cat /tmp/findings.md)

          ---
-          *Automated scan triggered by [supply-chain-audit](/.github/workflows/supply-chain-audit.yml). If this is a false positive, a maintainer can approve after manual review.*"
+          *Scanner only fires on high-signal indicators: .pth files, base64+exec/eval combos, subprocess with encoded commands, or install-hook files. Low-signal warnings were removed intentionally — if you're seeing this comment, the finding is worth inspecting.*"

          gh pr comment "${{ github.event.pull_request.number }}" --body "$BODY" || echo "::warning::Could not post PR comment (expected for fork PRs — GITHUB_TOKEN is read-only)"

      - name: Fail on critical findings
-        if: steps.scan.outputs.critical == 'true'
+        if: steps.scan.outputs.found == 'true'
        run: |
          echo "::error::CRITICAL supply chain risk patterns detected in this PR. See the PR comment for details."
          exit 1
@@ -3,8 +3,14 @@ name: Tests
 on:
  push:
    branches: [main]
+    paths-ignore:
+      - '**/*.md'
+      - 'docs/**'
  pull_request:
    branches: [main]
+    paths-ignore:
+      - '**/*.md'
+      - 'docs/**'

 permissions:
  contents: read
@@ -17,7 +23,7 @@ concurrency:
 jobs:
  test:
    runs-on: ubuntu-latest
-    timeout-minutes: 10
+    timeout-minutes: 20
    steps:
      - name: Checkout code
        uses: actions/checkout@34e114876b0b11c390a56381ad16ebd13914f8d5  # v4
@@ -1,3 +1,4 @@
+.DS_Store
 /venv/
 /_pycache/
 *.pyc*
@@ -54,6 +55,11 @@ environments/benchmarks/evals/
 # Web UI build output
 hermes_cli/web_dist/

+# Web UI assets — synced from @nous-research/ui at build time via
+# `npm run sync-assets` (see web/package.json).
+web/public/fonts/
+web/public/ds-assets/
+
 # Release script temp files
 .release_notes.md
 mini-swe-agent/
@@ -5,78 +5,61 @@ Instructions for AI coding assistants and developers working on the hermes-agent
 ## Development Environment

 ```bash
-source venv/bin/activate  # ALWAYS activate before running Python
+# Prefer .venv; fall back to venv if that's what your checkout has.
+source .venv/bin/activate   # or: source venv/bin/activate
 ```

+`scripts/run_tests.sh` probes `.venv` first, then `venv`, then
+`$HOME/.hermes/hermes-agent/venv` (for worktrees that share a venv with the
+main checkout).
+
 ## Project Structure

+File counts shift constantly — don't treat the tree below as exhaustive.
+The canonical source is the filesystem. The notes call out the load-bearing
+entry points you'll actually edit.
+
 ```
 hermes-agent/
-├── run_agent.py          # AIAgent class — core conversation loop
+├── run_agent.py          # AIAgent class — core conversation loop (~12k LOC)
 ├── model_tools.py        # Tool orchestration, discover_builtin_tools(), handle_function_call()
 ├── toolsets.py           # Toolset definitions, _HERMES_CORE_TOOLS list
-├── cli.py                # HermesCLI class — interactive CLI orchestrator
+├── cli.py                # HermesCLI class — interactive CLI orchestrator (~11k LOC)
 ├── hermes_state.py       # SessionDB — SQLite session store (FTS5 search)
-├── agent/                # Agent internals
-│   ├── prompt_builder.py     # System prompt assembly
-│   ├── context_compressor.py # Auto context compression
-│   ├── prompt_caching.py     # Anthropic prompt caching
-│   ├── auxiliary_client.py   # Auxiliary LLM client (vision, summarization)
-│   ├── model_metadata.py     # Model context lengths, token estimation
-│   ├── models_dev.py         # models.dev registry integration (provider-aware context)
-│   ├── display.py            # KawaiiSpinner, tool preview formatting
-│   ├── skill_commands.py     # Skill slash commands (shared CLI/gateway)
-│   └── trajectory.py         # Trajectory saving helpers
-├── hermes_cli/           # CLI subcommands and setup
-│   ├── main.py           # Entry point — all `hermes` subcommands
-│   ├── config.py         # DEFAULT_CONFIG, OPTIONAL_ENV_VARS, migration
-│   ├── commands.py       # Slash command definitions + SlashCommandCompleter
-│   ├── callbacks.py      # Terminal callbacks (clarify, sudo, approval)
-│   ├── setup.py          # Interactive setup wizard
-│   ├── skin_engine.py    # Skin/theme engine — CLI visual customization
-│   ├── skills_config.py  # `hermes skills` — enable/disable skills per platform
-│   ├── tools_config.py   # `hermes tools` — enable/disable tools per platform
-│   ├── skills_hub.py     # `/skills` slash command (search, browse, install)
-│   ├── models.py         # Model catalog, provider model lists
-│   ├── model_switch.py   # Shared /model switch pipeline (CLI + gateway)
-│   └── auth.py           # Provider credential resolution
-├── tools/                # Tool implementations (one file per tool)
-│   ├── registry.py       # Central tool registry (schemas, handlers, dispatch)
-│   ├── approval.py       # Dangerous command detection
-│   ├── terminal_tool.py  # Terminal orchestration
-│   ├── process_registry.py # Background process management
-│   ├── file_tools.py     # File read/write/search/patch
-│   ├── web_tools.py      # Web search/extract (Parallel + Firecrawl)
-│   ├── browser_tool.py   # Browserbase browser automation
-│   ├── code_execution_tool.py # execute_code sandbox
-│   ├── delegate_tool.py  # Subagent delegation
-│   ├── mcp_tool.py       # MCP client (~1050 lines)
+├── hermes_constants.py   # get_hermes_home(), display_hermes_home() — profile-aware paths
+├── hermes_logging.py     # setup_logging() — agent.log / errors.log / gateway.log (profile-aware)
+├── batch_runner.py       # Parallel batch processing
+├── agent/                # Agent internals (provider adapters, memory, caching, compression, etc.)
+├── hermes_cli/           # CLI subcommands, setup wizard, plugins loader, skin engine
+├── tools/                # Tool implementations — auto-discovered via tools/registry.py
 │   └── environments/     # Terminal backends (local, docker, ssh, modal, daytona, singularity)
-├── gateway/              # Messaging platform gateway
-│   ├── run.py            # Main loop, slash commands, message dispatch
-│   ├── session.py        # SessionStore — conversation persistence
-│   └── platforms/        # Adapters: telegram, discord, slack, whatsapp, homeassistant, signal, qqbot
+├── gateway/              # Messaging gateway — run.py + session.py + platforms/
+│   ├── platforms/        # Adapter per platform (telegram, discord, slack, whatsapp,
+│   │                     #   homeassistant, signal, matrix, mattermost, email, sms,
+│   │                     #   dingtalk, wecom, weixin, feishu, qqbot, bluebubbles,
+│   │                     #   webhook, api_server, ...). See ADDING_A_PLATFORM.md.
+│   └── builtin_hooks/    # Always-registered gateway hooks (boot-md, ...)
+├── plugins/              # Plugin system (see "Plugins" section below)
+│   ├── memory/           # Memory-provider plugins (honcho, mem0, supermemory, ...)
+│   ├── context_engine/   # Context-engine plugins
+│   └── <others>/         # Dashboard, image-gen, disk-cleanup, examples, ...
+├── optional-skills/      # Heavier/niche skills shipped but NOT active by default
+├── skills/               # Built-in skills bundled with the repo
 ├── ui-tui/               # Ink (React) terminal UI — `hermes --tui`
-│   ├── src/entry.tsx        # TTY gate + render()
-│   ├── src/app.tsx          # Main state machine and UI
-│   ├── src/gatewayClient.ts # Child process + JSON-RPC bridge
-│   ├── src/app/             # Decomposed app logic (event handler, slash handler, stores, hooks)
-│   ├── src/components/      # Ink components (branding, markdown, prompts, pickers, etc.)
-│   ├── src/hooks/           # useCompletion, useInputHistory, useQueue, useVirtualHistory
-│   └── src/lib/             # Pure helpers (history, osc52, text, rpc, messages)
+│   └── src/              # entry.tsx, app.tsx, gatewayClient.ts + app/components/hooks/lib
 ├── tui_gateway/          # Python JSON-RPC backend for the TUI
-│   ├── entry.py             # stdio entrypoint
-│   ├── server.py            # RPC handlers and session logic
-│   ├── render.py            # Optional rich/ANSI bridge
-│   └── slash_worker.py      # Persistent HermesCLI subprocess for slash commands
 ├── acp_adapter/          # ACP server (VS Code / Zed / JetBrains integration)
-├── cron/                 # Scheduler (jobs.py, scheduler.py)
+├── cron/                 # Scheduler — jobs.py, scheduler.py
 ├── environments/         # RL training environments (Atropos)
-├── tests/                # Pytest suite (~3000 tests)
-└── batch_runner.py       # Parallel batch processing
+├── scripts/              # run_tests.sh, release.py, auxiliary scripts
+├── website/              # Docusaurus docs site
+└── tests/                # Pytest suite (~15k tests across ~700 files as of Apr 2026)
 ```

-**User config:** `~/.hermes/config.yaml` (settings), `~/.hermes/.env` (API keys)
+**User config:** `~/.hermes/config.yaml` (settings), `~/.hermes/.env` (API keys only).
+**Logs:** `~/.hermes/logs/` — `agent.log` (INFO+), `errors.log` (WARNING+),
+`gateway.log` when running the gateway. Profile-aware via `get_hermes_home()`.
+Browse with `hermes logs [--follow] [--level ...] [--session ...]`.

 ## File Dependency Chain

@@ -94,20 +77,30 @@ run_agent.py, cli.py, batch_runner.py, environments/

 ## AIAgent Class (run_agent.py)

+The real `AIAgent.__init__` takes ~60 parameters (credentials, routing, callbacks,
+session context, budget, credential pool, etc.). The signature below is the
+minimum subset you'll usually touch — read `run_agent.py` for the full list.
+
 ```python
 class AIAgent:
    def __init__(self,
-        model: str = "anthropic/claude-opus-4.6",
-        max_iterations: int = 90,
+        base_url: str = None,
+        api_key: str = None,
+        provider: str = None,
+        api_mode: str = None,              # "chat_completions" | "codex_responses" | ...
+        model: str = "",                   # empty → resolved from config/provider later
+        max_iterations: int = 90,          # tool-calling iterations (shared with subagents)
        enabled_toolsets: list = None,
        disabled_toolsets: list = None,
        quiet_mode: bool = False,
        save_trajectories: bool = False,
-        platform: str = None,           # "cli", "telegram", etc.
+        platform: str = None,              # "cli", "telegram", etc.
        session_id: str = None,
        skip_context_files: bool = False,
        skip_memory: bool = False,
-        # ... plus provider, api_mode, callbacks, routing params
+        credential_pool=None,
+        # ... plus callbacks, thread/user/chat IDs, iteration_budget, fallback_model,
+        # checkpoints config, prefill_messages, service_tier, reasoning_config, etc.
    ): ...

    def chat(self, message: str) -> str:
@@ -120,10 +113,13 @@ class AIAgent:

 ### Agent Loop

-The core loop is inside `run_conversation()` — entirely synchronous:
+The core loop is inside `run_conversation()` — entirely synchronous, with
+interrupt checks, budget tracking, and a one-turn grace call:

 ```python
-while api_call_count < self.max_iterations and self.iteration_budget.remaining > 0:
+while (api_call_count < self.max_iterations and self.iteration_budget.remaining > 0) \
+        or self._budget_grace_call:
+    if self._interrupt_requested: break
    response = client.chat.completions.create(model=model, messages=messages, tools=tool_schemas)
    if response.tool_calls:
        for tool_call in response.tool_calls:
@@ -134,7 +130,8 @@ while api_call_count < self.max_iterations and self.iteration_budget.remaining >
        return response.content
 ```

-Messages follow OpenAI format: `{"role": "system/user/assistant/tool", ...}`. Reasoning content is stored in `assistant_msg["reasoning"]`.
+Messages follow OpenAI format: `{"role": "system/user/assistant/tool", ...}`.
+Reasoning content is stored in `assistant_msg["reasoning"]`.

 ---

@@ -280,7 +277,7 @@ The registry handles schema collection, dispatch, availability checking, and err

 **State files**: If a tool stores persistent state (caches, logs, checkpoints), use `get_hermes_home()` for the base directory — never `Path.home() / ".hermes"`. This ensures each profile gets its own state.

-**Agent-level tools** (todo, memory): intercepted by `run_agent.py` before `handle_function_call()`. See `todo_tool.py` for the pattern.
+**Agent-level tools** (todo, memory): intercepted by `run_agent.py` before `handle_function_call()`. See `tools/todo_tool.py` for the pattern.

 ---

@@ -288,9 +285,13 @@ The registry handles schema collection, dispatch, availability checking, and err

 ### config.yaml options:
 1. Add to `DEFAULT_CONFIG` in `hermes_cli/config.py`
-2. Bump `_config_version` (currently 5) to trigger migration for existing users
+2. Bump `_config_version` (check the current value at the top of `DEFAULT_CONFIG`)
+   ONLY if you need to actively migrate/transform existing user config
+   (renaming keys, changing structure). Adding a new key to an existing
+   section is handled automatically by the deep-merge and does NOT require
+   a version bump.

-### .env variables:
+### .env variables (SECRETS ONLY — API keys, tokens, passwords):
 1. Add to `OPTIONAL_ENV_VARS` in `hermes_cli/config.py` with metadata:
 ```python
 "NEW_API_KEY": {
@@ -302,13 +303,29 @@ The registry handles schema collection, dispatch, availability checking, and err
 },
 ```

-### Config loaders (two separate systems):
+Non-secret settings (timeouts, thresholds, feature flags, paths, display
+preferences) belong in `config.yaml`, not `.env`. If internal code needs an
+env var mirror for backward compatibility, bridge it from `config.yaml` to
+the env var in code (see `gateway_timeout`, `terminal.cwd` → `TERMINAL_CWD`).
+
+### Config loaders (three paths — know which one you're in):

 | Loader | Used by | Location |
 |--------|---------|----------|
-| `load_cli_config()` | CLI mode | `cli.py` |
-| `load_config()` | `hermes tools`, `hermes setup` | `hermes_cli/config.py` |
-| Direct YAML load | Gateway | `gateway/run.py` |
+| `load_cli_config()` | CLI mode | `cli.py` — merges CLI-specific defaults + user YAML |
+| `load_config()` | `hermes tools`, `hermes setup`, most CLI subcommands | `hermes_cli/config.py` — merges `DEFAULT_CONFIG` + user YAML |
+| Direct YAML load | Gateway runtime | `gateway/run.py` + `gateway/config.py` — reads user YAML raw |
+
+If you add a new key and the CLI sees it but the gateway doesn't (or vice
+versa), you're on the wrong loader. Check `DEFAULT_CONFIG` coverage.
+
+### Working directory:
+- **CLI** — uses the process's current directory (`os.getcwd()`).
+- **Messaging** — uses `terminal.cwd` from `config.yaml`. The gateway bridges this
+  to the `TERMINAL_CWD` env var for child tools. **`MESSAGING_CWD` has been
+  removed** — the config loader prints a deprecation warning if it's set in
+  `.env`. Same for `TERMINAL_CWD` in `.env`; the canonical setting is
+  `terminal.cwd` in `config.yaml`.

 ---

@@ -401,7 +418,95 @@ Activate with `/skin cyberpunk` or `display.skin: cyberpunk` in config.yaml.

 ---

+## Plugins
+
+Hermes has two plugin surfaces. Both live under `plugins/` in the repo so
+repo-shipped plugins can be discovered alongside user-installed ones in
+`~/.hermes/plugins/` and pip-installed entry points.
+
+### General plugins (`hermes_cli/plugins.py` + `plugins/<name>/`)
+
+`PluginManager` discovers plugins from `~/.hermes/plugins/`, `./.hermes/plugins/`,
+and pip entry points. Each plugin exposes a `register(ctx)` function that
+can:
+
+- Register Python-callback lifecycle hooks:
+  `pre_tool_call`, `post_tool_call`, `pre_llm_call`, `post_llm_call`,
+  `on_session_start`, `on_session_end`
+- Register new tools via `ctx.register_tool(...)`
+- Register CLI subcommands via `ctx.register_cli_command(...)` — the
+  plugin's argparse tree is wired into `hermes` at startup so
+  `hermes <pluginname> <subcmd>` works with no change to `main.py`
+
+Hooks are invoked from `model_tools.py` (pre/post tool) and `run_agent.py`
+(lifecycle). **Discovery timing pitfall:** `discover_plugins()` only runs
+as a side effect of importing `model_tools.py`. Code paths that read plugin
+state without importing `model_tools.py` first must call `discover_plugins()`
+explicitly (it's idempotent).
+
+### Memory-provider plugins (`plugins/memory/<name>/`)
+
+Separate discovery system for pluggable memory backends. Current built-in
+providers include **honcho, mem0, supermemory, byterover, hindsight,
+holographic, openviking, retaindb**.
+
+Each provider implements the `MemoryProvider` ABC (see `agent/memory_provider.py`)
+and is orchestrated by `agent/memory_manager.py`. Lifecycle hooks include
+`sync_turn(turn_messages)`, `prefetch(query)`, `shutdown()`, and optional
+`post_setup(hermes_home, config)` for setup-wizard integration.
+
+**CLI commands via `plugins/memory/<name>/cli.py`:** if a memory plugin
+defines `register_cli(subparser)`, `discover_plugin_cli_commands()` finds
+it at argparse setup time and wires it into `hermes <plugin>`. The
+framework only exposes CLI commands for the **currently active** memory
+provider (read from `memory.provider` in config.yaml), so disabled
+providers don't clutter `hermes --help`.
+
+**Rule (Teknium, May 2026):** plugins MUST NOT modify core files
+(`run_agent.py`, `cli.py`, `gateway/run.py`, `hermes_cli/main.py`, etc.).
+If a plugin needs a capability the framework doesn't expose, expand the
+generic plugin surface (new hook, new ctx method) — never hardcode
+plugin-specific logic into core. PR #5295 removed 95 lines of hardcoded
+honcho argparse from `main.py` for exactly this reason.
+
+### Dashboard / context-engine / image-gen plugin directories
+
+`plugins/context_engine/`, `plugins/image_gen/`, `plugins/example-dashboard/`,
+etc. follow the same pattern (ABC + orchestrator + per-plugin directory).
+Context engines plug into `agent/context_engine.py`; image-gen providers
+into `agent/image_gen_provider.py`.
+
+---
+
+## Skills
+
+Two parallel surfaces:
+
+- **`skills/`** — built-in skills shipped and loadable by default.
+  Organized by category directories (e.g. `skills/github/`, `skills/mlops/`).
+- **`optional-skills/`** — heavier or niche skills shipped with the repo but
+  NOT active by default. Installed explicitly via
+  `hermes skills install official/<category>/<skill>`. Adapter lives in
+  `tools/skills_hub.py` (`OptionalSkillSource`). Categories include
+  `autonomous-ai-agents`, `blockchain`, `communication`, `creative`,
+  `devops`, `email`, `health`, `mcp`, `migration`, `mlops`, `productivity`,
+  `research`, `security`, `web-development`.
+
+When reviewing skill PRs, check which directory they target — heavy-dep or
+niche skills belong in `optional-skills/`.
+
+### SKILL.md frontmatter
+
+Standard fields: `name`, `description`, `version`, `platforms`
+(OS-gating list: `[macos]`, `[linux, macos]`, ...),
+`metadata.hermes.tags`, `metadata.hermes.category`,
+`metadata.hermes.config` (config.yaml settings the skill needs — stored
+under `skills.config.<key>`, prompted during setup, injected at load time).
+
+---
+
 ## Important Policies
+
 ### Prompt Caching Must Not Break

 Hermes-Agent ensures caching remains valid throughout a conversation. **Do NOT implement changes that would:**
@@ -411,9 +516,10 @@ Hermes-Agent ensures caching remains valid throughout a conversation. **Do NOT i

 Cache-breaking forces dramatically higher costs. The ONLY time we alter context is during context compression.

-### Working Directory Behavior
- **CLI**: Uses current directory (`.` → `os.getcwd()`)
- **Messaging**: Uses `MESSAGING_CWD` env var (default: home directory)
+Slash commands that mutate system-prompt state (skills, tools, memory, etc.)
+must be **cache-aware**: default to deferred invalidation (change takes
+effect next session), with an opt-in `--now` flag for immediate
+invalidation. See `/skills install --now` for the canonical pattern.

 ### Background Process Notifications (Gateway)

@@ -435,7 +541,7 @@ Hermes supports **profiles** — multiple fully isolated instances, each with it
 `HERMES_HOME` directory (config, API keys, memory, sessions, skills, gateway, etc.).

 The core mechanism: `_apply_profile_override()` in `hermes_cli/main.py` sets
-`HERMES_HOME` before any module imports. All 119+ references to `get_hermes_home()`
+`HERMES_HOME` before any module imports. All `get_hermes_home()` references
 automatically scope to the active profile.

 ### Rules for profile-safe code
@@ -492,8 +598,12 @@ Use `get_hermes_home()` from `hermes_constants` for code paths. Use `display_her
 for user-facing print/log messages. Hardcoding `~/.hermes` breaks profiles — each profile
 has its own `HERMES_HOME` directory. This was the source of 5 bugs fixed in PR #3575.

-### DO NOT use `simple_term_menu` for interactive menus
-Rendering bugs in tmux/iTerm2 — ghosting on scroll. Use `curses` (stdlib) instead. See `hermes_cli/tools_config.py` for the pattern.
+### DO NOT introduce new `simple_term_menu` usage
+Existing call sites in `hermes_cli/main.py` remain for legacy fallback only;
+the preferred UI is curses (stdlib) because `simple_term_menu` has
+ghost-duplication rendering bugs in tmux/iTerm2 with arrow keys. New
+interactive menus must use `hermes_cli/curses_ui.py` — see
+`hermes_cli/tools_config.py` for the canonical pattern.

 ### DO NOT use `\033[K` (ANSI erase-to-EOL) in spinner/display code
 Leaks as literal `?[K` text under `prompt_toolkit`'s `patch_stdout`. Use space-padding: `f"\r{line}{' ' * pad}"`.
@@ -504,6 +614,30 @@ Leaks as literal `?[K` text under `prompt_toolkit`'s `patch_stdout`. Use space-p
 ### DO NOT hardcode cross-tool references in schema descriptions
 Tool schema descriptions must not mention tools from other toolsets by name (e.g., `browser_navigate` saying "prefer web_search"). Those tools may be unavailable (missing API keys, disabled toolset), causing the model to hallucinate calls to non-existent tools. If a cross-reference is needed, add it dynamically in `get_tool_definitions()` in `model_tools.py` — see the `browser_navigate` / `execute_code` post-processing blocks for the pattern.

+### The gateway has TWO message guards — both must bypass approval/control commands
+When an agent is running, messages pass through two sequential guards:
+(1) **base adapter** (`gateway/platforms/base.py`) queues messages in
+`_pending_messages` when `session_key in self._active_sessions`, and
+(2) **gateway runner** (`gateway/run.py`) intercepts `/stop`, `/new`,
+`/queue`, `/status`, `/approve`, `/deny` before they reach
+`running_agent.interrupt()`. Any new command that must reach the runner
+while the agent is blocked (e.g. approval prompts) MUST bypass BOTH
+guards and be dispatched inline, not via `_process_message_background()`
+(which races session lifecycle).
+
+### Squash merges from stale branches silently revert recent fixes
+Before squash-merging a PR, ensure the branch is up to date with `main`
+(`git fetch origin main && git reset --hard origin/main` in the worktree,
+then re-apply the PR's commits). A stale branch's version of an unrelated
+file will silently overwrite recent fixes on main when squashed. Verify
+with `git diff HEAD~1..HEAD` after merging — unexpected deletions are a
+red flag.
+
+### Don't wire in dead code without E2E validation
+Unused code that was never shipped was dead for a reason. Before wiring an
+unused module into a live code path, E2E test the real resolution chain
+with actual imports (not mocks) against a temp `HERMES_HOME`.
+
 ### Tests must not write to `~/.hermes/`
 The `_isolate_hermes_home` autouse fixture in `tests/conftest.py` redirects `HERMES_HOME` to a temp dir. Never hardcode `~/.hermes/` paths in tests.

@@ -559,10 +693,59 @@ If you can't use the wrapper (e.g. on Windows or inside an IDE that shells
 pytest directly), at minimum activate the venv and pass `-n 4`:

 ```bash
-source venv/bin/activate
+source .venv/bin/activate   # or: source venv/bin/activate
 python -m pytest tests/ -q -n 4
 ```

 Worker count above 4 will surface test-ordering flakes that CI never sees.

 Always run the full suite before pushing changes.
+
+### Don't write change-detector tests
+
+A test is a **change-detector** if it fails whenever data that is **expected
+to change** gets updated — model catalogs, config version numbers,
+enumeration counts, hardcoded lists of provider models. These tests add no
+behavioral coverage; they just guarantee that routine source updates break
+CI and cost engineering time to "fix."
+
+**Do not write:**
+
+```python
+# catalog snapshot — breaks every model release
+assert "gemini-2.5-pro" in _PROVIDER_MODELS["gemini"]
+assert "MiniMax-M2.7" in models
+
+# config version literal — breaks every schema bump
+assert DEFAULT_CONFIG["_config_version"] == 21
+
+# enumeration count — breaks every time a skill/provider is added
+assert len(_PROVIDER_MODELS["huggingface"]) == 8
+```
+
+**Do write:**
+
+```python
+# behavior: does the catalog plumbing work at all?
+assert "gemini" in _PROVIDER_MODELS
+assert len(_PROVIDER_MODELS["gemini"]) >= 1
+
+# behavior: does migration bump the user's version to current latest?
+assert raw["_config_version"] == DEFAULT_CONFIG["_config_version"]
+
+# invariant: no plan-only model leaks into the legacy list
+assert not (set(moonshot_models) & coding_plan_only_models)
+
+# invariant: every model in the catalog has a context-length entry
+for m in _PROVIDER_MODELS["huggingface"]:
+    assert m.lower() in DEFAULT_CONTEXT_LENGTHS_LOWER
+```
+
+The rule: if the test reads like a snapshot of current data, delete it. If
+it reads like a contract about how two pieces of data must relate, keep it.
+When a PR adds a new provider/model and you want a test, make the test
+assert the relationship (e.g. "catalog entries all have context lengths"),
+not the specific names.
+
+Reviewers should reject new change-detector tests; authors should convert
+them into invariants before re-requesting review.
@@ -9,7 +9,7 @@ Thank you for contributing to Hermes Agent! This guide covers everything you nee
 We value contributions in this order:

 1. **Bug fixes** — crashes, incorrect behavior, data loss. Always top priority.
-2. **Cross-platform compatibility** — Windows, macOS, different Linux distros, different terminal emulators. We want Hermes to work everywhere.
+2. **Cross-platform compatibility** — macOS, different Linux distros, and WSL2 on Windows. We want Hermes to work everywhere.
 3. **Security hardening** — shell injection, prompt injection, path traversal, privilege escalation. See [Security](#security-considerations).
 4. **Performance and robustness** — retry logic, error handling, graceful degradation.
 5. **New skills** — but only broadly useful ones. See [Should it be a Skill or a Tool?](#should-it-be-a-skill-or-a-tool)
@@ -55,10 +55,10 @@ If your skill is specialized, community-contributed, or niche, it's better suite

 | Requirement | Notes |
 |-------------|-------|
-| **Git** | With `--recurse-submodules` support |
+| **Git** | With `--recurse-submodules` support, and the `git-lfs` extension installed |
 | **Python 3.11+** | uv will install it if missing |
 | **uv** | Fast Python package manager ([install](https://docs.astral.sh/uv/)) |
-| **Node.js 18+** | Optional — needed for browser tools and WhatsApp bridge |
+| **Node.js 20+** | Optional — needed for browser tools and WhatsApp bridge (matches root `package.json` engines) |

 ### Clone and install

@@ -88,7 +88,7 @@ cp cli-config.yaml.example ~/.hermes/config.yaml
 touch ~/.hermes/.env

 # Add at minimum an LLM provider key:
-echo 'OPENROUTER_API_KEY=sk-or-v1-your-key' >> ~/.hermes/.env
+echo "OPENROUTER_API_KEY=***" >> ~/.hermes/.env
 ```

 ### Run
@@ -515,7 +515,7 @@ See `hermes_cli/skin_engine.py` for the full schema and existing skins as exampl

 ## Cross-Platform Compatibility

-Hermes runs on Linux, macOS, and Windows. When writing code that touches the OS:
+Hermes runs on Linux, macOS, and WSL2 on Windows. When writing code that touches the OS:

 ### Critical rules

@@ -597,7 +597,7 @@ refactor/description   # Code restructuring

 1. **Run tests**: `pytest tests/ -v`
 2. **Test manually**: Run `hermes` and exercise the code path you changed
-3. **Check cross-platform impact**: If you touch file I/O, process management, or terminal handling, consider Windows and macOS
+3. **Check cross-platform impact**: If you touch file I/O, process management, or terminal handling, consider macOS, Linux, and WSL2
 4. **Keep PRs focused**: One logical change per PR. Don't mix a bug fix with a refactor with a new feature.

 ### PR description
@@ -12,7 +12,7 @@ ENV PLAYWRIGHT_BROWSERS_PATH=/opt/hermes/.playwright
 # Install system dependencies in one layer, clear APT cache
 RUN apt-get update && \
    apt-get install -y --no-install-recommends \
-        build-essential nodejs npm python3 ripgrep ffmpeg gcc python3-dev libffi-dev procps git && \
+        build-essential nodejs npm python3 ripgrep ffmpeg gcc python3-dev libffi-dev procps git openssh-client docker-cli && \
    rm -rf /var/lib/apt/lists/*

 # Non-root user for runtime; UID can be overridden via HERMES_UID at runtime
@@ -27,12 +27,10 @@ WORKDIR /opt/hermes
 # Copy only package manifests first so npm install + Playwright are cached
 # unless the lockfiles themselves change.
 COPY package.json package-lock.json ./
-COPY scripts/whatsapp-bridge/package.json scripts/whatsapp-bridge/package-lock.json scripts/whatsapp-bridge/
 COPY web/package.json web/package-lock.json web/

 RUN npm install --prefer-offline --no-audit && \
    npx playwright install --with-deps chromium --only-shell && \
-    (cd scripts/whatsapp-bridge && npm install --prefer-offline --no-audit) && \
    (cd web && npm install --prefer-offline --no-audit) && \
    npm cache clean --force

@@ -52,5 +50,6 @@ RUN uv venv && \
 # ---------- Runtime ----------
 ENV HERMES_WEB_DIST=/opt/hermes/hermes_cli/web_dist
 ENV HERMES_HOME=/opt/data
+ENV PATH="/opt/data/.local/bin:${PATH}"
 VOLUME [ "/opt/data" ]
 ENTRYPOINT [ "/opt/hermes/docker/entrypoint.sh" ]
@@ -76,7 +76,7 @@ Hermes has two entry points: start the terminal UI with `hermes`, or run the gat
 | Set a personality | `/personality [name]` | `/personality [name]` |
 | Retry or undo the last turn | `/retry`, `/undo` | `/retry`, `/undo` |
 | Compress context / check usage | `/compress`, `/usage`, `/insights [--days N]` | `/compress`, `/usage`, `/insights [days]` |
-| Browse skills | `/skills` or `/<skill-name>` | `/skills` or `/<skill-name>` |
+| Browse skills | `/skills` or `/<skill-name>` | `/<skill-name>` |
 | Interrupt current work | `Ctrl+C` or send a new message | `/stop` or send a new message |
 | Platform-specific status | `/platforms` | `/status`, `/sethome` |

@@ -157,14 +157,10 @@ curl -LsSf https://astral.sh/uv/install.sh | sh
 uv venv venv --python 3.11
 source venv/bin/activate
 uv pip install -e ".[all,dev]"
-python -m pytest tests/ -q
+scripts/run_tests.sh
 ```

-> **RL Training (optional):** To work on the RL/Tinker-Atropos integration:
-> ```bash
-> git submodule update --init tinker-atropos
-> uv pip install -e "./tinker-atropos"
-> ```
+> **RL Training (optional):** The RL/Atropos integration (`environments/`) ships via the `atroposlib` and `tinker` dependencies pulled in by `.[all,dev]` — no submodule setup required.

 ---

@@ -173,7 +169,6 @@ python -m pytest tests/ -q
 - 💬 [Discord](https://discord.gg/NousResearch)
 - 📚 [Skills Hub](https://agentskills.io)
 - 🐛 [Issues](https://github.com/NousResearch/hermes-agent/issues)
- 💡 [Discussions](https://github.com/NousResearch/hermes-agent/discussions)
 - 🔌 [HermesClaw](https://github.com/AaronWong1999/hermesclaw) — Community WeChat bridge: Run Hermes Agent and OpenClaw on the same WeChat account.

 ---
@@ -0,0 +1,453 @@
+# Hermes Agent v0.11.0 (v2026.4.23)
+
+**Release Date:** April 23, 2026
+**Since v0.9.0:** 1,556 commits · 761 merged PRs · 1,314 files changed · 224,174 insertions · 29 community contributors (290 including co-authors)
+
+> The Interface release — a full React/Ink rewrite of the interactive CLI, a pluggable transport architecture underneath every provider, native AWS Bedrock support, five new inference paths, a 17th messaging platform (QQBot), a dramatically expanded plugin surface, and GPT-5.5 via Codex OAuth.
+
+This release also folds in all the highlights deferred from v0.10.0 (which shipped only the Nous Tool Gateway) — so it covers roughly two weeks of work across the whole stack.
+
+---
+
+## ✨ Highlights
+
+- **New Ink-based TUI** — `hermes --tui` is now a full React/Ink rewrite of the interactive CLI, with a Python JSON-RPC backend (`tui_gateway`). Sticky composer, live streaming with OSC-52 clipboard support, stable picker keys, status bar with per-turn stopwatch and git branch, `/clear` confirm, light-theme preset, and a subagent spawn observability overlay. ~310 commits to `ui-tui/` + `tui_gateway/`. (@OutThisLife + Teknium)
+
+- **Transport ABC + Native AWS Bedrock** — Format conversion and HTTP transport were extracted from `run_agent.py` into a pluggable `agent/transports/` layer. `AnthropicTransport`, `ChatCompletionsTransport`, `ResponsesApiTransport`, and `BedrockTransport` each own their own format conversion and API shape. Native AWS Bedrock support via the Converse API ships on top of the new abstraction. ([#10549](https://github.com/NousResearch/hermes-agent/pull/10549), [#13347](https://github.com/NousResearch/hermes-agent/pull/13347), [#13366](https://github.com/NousResearch/hermes-agent/pull/13366), [#13430](https://github.com/NousResearch/hermes-agent/pull/13430), [#13805](https://github.com/NousResearch/hermes-agent/pull/13805), [#13814](https://github.com/NousResearch/hermes-agent/pull/13814) — @kshitijk4poor + Teknium)
+
+- **Five new inference paths** — Native NVIDIA NIM ([#11774](https://github.com/NousResearch/hermes-agent/pull/11774)), Arcee AI ([#9276](https://github.com/NousResearch/hermes-agent/pull/9276)), Step Plan ([#13893](https://github.com/NousResearch/hermes-agent/pull/13893)), Google Gemini CLI OAuth ([#11270](https://github.com/NousResearch/hermes-agent/pull/11270)), and Vercel ai-gateway with pricing + dynamic discovery ([#13223](https://github.com/NousResearch/hermes-agent/pull/13223) — @jerilynzheng). Plus Gemini routed through the native AI Studio API for better performance ([#12674](https://github.com/NousResearch/hermes-agent/pull/12674)).
+
+- **GPT-5.5 over Codex OAuth** — OpenAI's new GPT-5.5 reasoning model is now available through your ChatGPT Codex OAuth, with live model discovery wired into the model picker so new OpenAI releases show up without catalog updates. ([#14720](https://github.com/NousResearch/hermes-agent/pull/14720))
+
+- **QQBot — 17th supported platform** — Native QQBot adapter via QQ Official API v2, with QR scan-to-configure setup wizard, streaming cursor, emoji reactions, and DM/group policy gating that matches WeCom/Weixin parity. ([#9364](https://github.com/NousResearch/hermes-agent/pull/9364), [#11831](https://github.com/NousResearch/hermes-agent/pull/11831))
+
+- **Plugin surface expanded** — Plugins can now register slash commands (`register_command`), dispatch tools directly (`dispatch_tool`), block tool execution from hooks (`pre_tool_call` can veto), rewrite tool results (`transform_tool_result`), transform terminal output (`transform_terminal_output`), ship image_gen backends, and add custom dashboard tabs. The bundled disk-cleanup plugin is opt-in by default as a reference implementation. ([#9377](https://github.com/NousResearch/hermes-agent/pull/9377), [#10626](https://github.com/NousResearch/hermes-agent/pull/10626), [#10763](https://github.com/NousResearch/hermes-agent/pull/10763), [#10951](https://github.com/NousResearch/hermes-agent/pull/10951), [#12929](https://github.com/NousResearch/hermes-agent/pull/12929), [#12944](https://github.com/NousResearch/hermes-agent/pull/12944), [#12972](https://github.com/NousResearch/hermes-agent/pull/12972), [#13799](https://github.com/NousResearch/hermes-agent/pull/13799), [#14175](https://github.com/NousResearch/hermes-agent/pull/14175))
+
+- **`/steer` — mid-run agent nudges** — `/steer <prompt>` injects a note that the running agent sees after its next tool call, without interrupting the turn or breaking prompt cache. For when you want to course-correct an agent in-flight. ([#12116](https://github.com/NousResearch/hermes-agent/pull/12116))
+
+- **Shell hooks** — Wire any shell script as a Hermes lifecycle hook (pre_tool_call, post_tool_call, on_session_start, etc.) without writing a Python plugin. ([#13296](https://github.com/NousResearch/hermes-agent/pull/13296))
+
+- **Webhook direct-delivery mode** — Webhook subscriptions can now forward payloads straight to a platform chat without going through the agent — zero-LLM push notifications for alerting, uptime checks, and event streams. ([#12473](https://github.com/NousResearch/hermes-agent/pull/12473))
+
+- **Smarter delegation** — Subagents now have an explicit `orchestrator` role that can spawn their own workers, with configurable `max_spawn_depth` (default flat). Concurrent sibling subagents share filesystem state through a file-coordination layer so they don't clobber each other's edits. ([#13691](https://github.com/NousResearch/hermes-agent/pull/13691), [#13718](https://github.com/NousResearch/hermes-agent/pull/13718))
+
+- **Auxiliary models — configurable UI + main-model-first** — `hermes model` has a dedicated "Configure auxiliary models" screen for per-task overrides (compression, vision, session_search, title_generation). `auto` routing now defaults to the main model for side tasks across all users (previously aggregator users were silently routed to a cheap provider-side default). ([#11891](https://github.com/NousResearch/hermes-agent/pull/11891), [#11900](https://github.com/NousResearch/hermes-agent/pull/11900))
+
+- **Dashboard plugin system + live theme switching** — The web dashboard is now extensible. Third-party plugins can add custom tabs, widgets, and views without forking. Paired with a live-switching theme system — themes now control colors, fonts, layout, and density — so users can hot-swap the dashboard look without a reload. Same theming discipline the CLI has, now on the web. ([#10951](https://github.com/NousResearch/hermes-agent/pull/10951), [#10687](https://github.com/NousResearch/hermes-agent/pull/10687), [#14725](https://github.com/NousResearch/hermes-agent/pull/14725))
+
+- **Dashboard polish** — i18n (English + Chinese), react-router sidebar layout, mobile-responsive, Vercel deployment, real per-session API call tracking, and one-click update + gateway restart buttons. ([#9228](https://github.com/NousResearch/hermes-agent/pull/9228), [#9370](https://github.com/NousResearch/hermes-agent/pull/9370), [#9453](https://github.com/NousResearch/hermes-agent/pull/9453), [#10686](https://github.com/NousResearch/hermes-agent/pull/10686), [#13526](https://github.com/NousResearch/hermes-agent/pull/13526), [#14004](https://github.com/NousResearch/hermes-agent/pull/14004) — @austinpickett + @DeployFaith + Teknium)
+
+---
+
+## 🏗️ Core Agent & Architecture
+
+### Transport Layer (NEW)
+- **Transport ABC** abstracts format conversion and HTTP transport from `run_agent.py` into `agent/transports/` ([#13347](https://github.com/NousResearch/hermes-agent/pull/13347))
+- **AnthropicTransport** — Anthropic Messages API path ([#13366](https://github.com/NousResearch/hermes-agent/pull/13366), @kshitijk4poor)
+- **ChatCompletionsTransport** — default path for OpenAI-compatible providers ([#13805](https://github.com/NousResearch/hermes-agent/pull/13805))
+- **ResponsesApiTransport** — OpenAI Responses API + Codex build_kwargs wiring ([#13430](https://github.com/NousResearch/hermes-agent/pull/13430), @kshitijk4poor)
+- **BedrockTransport** — AWS Bedrock Converse API transport ([#13814](https://github.com/NousResearch/hermes-agent/pull/13814))
+
+### Provider & Model Support
+- **Native AWS Bedrock provider** via Converse API ([#10549](https://github.com/NousResearch/hermes-agent/pull/10549))
+- **NVIDIA NIM native provider** (salvage of #11703) ([#11774](https://github.com/NousResearch/hermes-agent/pull/11774))
+- **Arcee AI direct provider** ([#9276](https://github.com/NousResearch/hermes-agent/pull/9276))
+- **Step Plan provider** (salvage #6005) ([#13893](https://github.com/NousResearch/hermes-agent/pull/13893), @kshitijk4poor)
+- **Google Gemini CLI OAuth** inference provider ([#11270](https://github.com/NousResearch/hermes-agent/pull/11270))
+- **Vercel ai-gateway** with pricing, attribution, and dynamic discovery ([#13223](https://github.com/NousResearch/hermes-agent/pull/13223), @jerilynzheng)
+- **GPT-5.5 over Codex OAuth** with live model discovery in the picker ([#14720](https://github.com/NousResearch/hermes-agent/pull/14720))
+- **Gemini routed through native AI Studio API** ([#12674](https://github.com/NousResearch/hermes-agent/pull/12674))
+- **xAI Grok upgraded to Responses API** ([#10783](https://github.com/NousResearch/hermes-agent/pull/10783))
+- **Ollama improvements** — Cloud provider support, GLM continuation, `think=false` control, surrogate sanitization, `/v1` hint ([#10782](https://github.com/NousResearch/hermes-agent/pull/10782))
+- **Kimi K2.6** across OpenRouter, Nous Portal, native Kimi, and HuggingFace ([#13148](https://github.com/NousResearch/hermes-agent/pull/13148), [#13152](https://github.com/NousResearch/hermes-agent/pull/13152), [#13169](https://github.com/NousResearch/hermes-agent/pull/13169))
+- **Kimi K2.5** promoted to first position in all model suggestion lists ([#11745](https://github.com/NousResearch/hermes-agent/pull/11745), @kshitijk4poor)
+- **Xiaomi MiMo v2.5-pro + v2.5** on OpenRouter, Nous Portal, and native ([#14184](https://github.com/NousResearch/hermes-agent/pull/14184), [#14635](https://github.com/NousResearch/hermes-agent/pull/14635), @kshitijk4poor)
+- **GLM-5V-Turbo** for coding plan ([#9907](https://github.com/NousResearch/hermes-agent/pull/9907))
+- **Claude Opus 4.7** in Nous Portal catalog ([#11398](https://github.com/NousResearch/hermes-agent/pull/11398))
+- **OpenRouter elephant-alpha** in curated lists ([#9378](https://github.com/NousResearch/hermes-agent/pull/9378))
+- **OpenCode-Go** — Kimi K2.6 and Qwen3.5/3.6 Plus in curated catalog ([#13429](https://github.com/NousResearch/hermes-agent/pull/13429))
+- **minimax/minimax-m2.5:free** in OpenRouter catalog ([#13836](https://github.com/NousResearch/hermes-agent/pull/13836))
+- **`/model` merges models.dev entries** for lesser-loved providers ([#14221](https://github.com/NousResearch/hermes-agent/pull/14221))
+- **Per-provider + per-model `request_timeout_seconds`** config ([#12652](https://github.com/NousResearch/hermes-agent/pull/12652))
+- **Configurable API retry count** via `agent.api_max_retries` ([#14730](https://github.com/NousResearch/hermes-agent/pull/14730))
+- **ctx_size context length key** for Lemonade server (salvage #8536) ([#14215](https://github.com/NousResearch/hermes-agent/pull/14215))
+- **Custom provider display name prompt** ([#9420](https://github.com/NousResearch/hermes-agent/pull/9420))
+- **Recommendation badges** on tool provider selection ([#9929](https://github.com/NousResearch/hermes-agent/pull/9929))
+- Fix: correct GPT-5 family context lengths in fallback defaults ([#9309](https://github.com/NousResearch/hermes-agent/pull/9309))
+- Fix: clamp `minimal` reasoning effort to `low` on Responses API ([#9429](https://github.com/NousResearch/hermes-agent/pull/9429))
+- Fix: strip reasoning item IDs from Responses API input when `store=False` ([#10217](https://github.com/NousResearch/hermes-agent/pull/10217))
+- Fix: OpenViking correct account default + commit session on `/new` and compress ([#10463](https://github.com/NousResearch/hermes-agent/pull/10463))
+- Fix: Kimi `/coding` thinking block survival + empty reasoning_content + block ordering (multiple PRs)
+- Fix: don't send Anthropic thinking to api.kimi.com/coding ([#13826](https://github.com/NousResearch/hermes-agent/pull/13826))
+- Fix: send `max_tokens`, `reasoning_effort`, and `thinking` for Kimi/Moonshot
+- Fix: stream reasoning content through OpenAI-compatible providers that emit it
+
+### Agent Loop & Conversation
+- **`/steer <prompt>`** — mid-run agent nudges after next tool call ([#12116](https://github.com/NousResearch/hermes-agent/pull/12116))
+- **Orchestrator role + configurable spawn depth** for `delegate_task` (default flat) ([#13691](https://github.com/NousResearch/hermes-agent/pull/13691))
+- **Cross-agent file state coordination** for concurrent subagents ([#13718](https://github.com/NousResearch/hermes-agent/pull/13718))
+- **Compressor smart collapse, dedup, anti-thrashing**, template upgrade, hardening ([#10088](https://github.com/NousResearch/hermes-agent/pull/10088))
+- **Compression summaries respect the conversation's language** ([#12556](https://github.com/NousResearch/hermes-agent/pull/12556))
+- **Compression model falls back to main model** on permanent 503/404 ([#10093](https://github.com/NousResearch/hermes-agent/pull/10093))
+- **Auto-continue interrupted agent work** after gateway restart ([#9934](https://github.com/NousResearch/hermes-agent/pull/9934))
+- **Activity heartbeats** prevent false gateway inactivity timeouts ([#10501](https://github.com/NousResearch/hermes-agent/pull/10501))
+- **Auxiliary models UI** — dedicated screen for per-task overrides ([#11891](https://github.com/NousResearch/hermes-agent/pull/11891))
+- **Auxiliary auto routing defaults to main model** for all users ([#11900](https://github.com/NousResearch/hermes-agent/pull/11900))
+- **PLATFORM_HINTS for Matrix, Mattermost, Feishu** ([#14428](https://github.com/NousResearch/hermes-agent/pull/14428), @alt-glitch)
+- Fix: reset retry counters after compression; stop poisoning conversation history ([#10055](https://github.com/NousResearch/hermes-agent/pull/10055))
+- Fix: break compression-exhaustion infinite loop and auto-reset session ([#10063](https://github.com/NousResearch/hermes-agent/pull/10063))
+- Fix: stale agent timeout, uv venv detection, empty response after tools ([#10065](https://github.com/NousResearch/hermes-agent/pull/10065))
+- Fix: prevent premature loop exit when weak models return empty after substantive tool calls ([#10472](https://github.com/NousResearch/hermes-agent/pull/10472))
+- Fix: preserve pre-start terminal interrupts ([#10504](https://github.com/NousResearch/hermes-agent/pull/10504))
+- Fix: improve interrupt responsiveness during concurrent tool execution ([#10935](https://github.com/NousResearch/hermes-agent/pull/10935))
+- Fix: word-wrap spinner, interruptable agent join, and delegate_task interrupt ([#10940](https://github.com/NousResearch/hermes-agent/pull/10940))
+- Fix: `/stop` no longer resets the session ([#9224](https://github.com/NousResearch/hermes-agent/pull/9224))
+- Fix: honor interrupts during MCP tool waits ([#9382](https://github.com/NousResearch/hermes-agent/pull/9382), @helix4u)
+- Fix: break stuck session resume loops after repeated restarts ([#9941](https://github.com/NousResearch/hermes-agent/pull/9941))
+- Fix: empty response nudge crash + placeholder leak to cron targets ([#11021](https://github.com/NousResearch/hermes-agent/pull/11021))
+- Fix: streaming cursor sanitization to prevent message truncation (multiple PRs)
+- Fix: resolve `context_length` for plugin context engines ([#9238](https://github.com/NousResearch/hermes-agent/pull/9238))
+
+### Session & Memory
+- **Auto-prune old sessions + VACUUM state.db** at startup ([#13861](https://github.com/NousResearch/hermes-agent/pull/13861))
+- **Honcho overhaul** — context injection, 5-tool surface, cost safety, session isolation ([#10619](https://github.com/NousResearch/hermes-agent/pull/10619))
+- **Hindsight richer session-scoped retain metadata** (salvage of #6290) ([#13987](https://github.com/NousResearch/hermes-agent/pull/13987))
+- Fix: deduplicate memory provider tools to prevent 400 on strict providers ([#10511](https://github.com/NousResearch/hermes-agent/pull/10511))
+- Fix: discover user-installed memory providers from `$HERMES_HOME/plugins/` ([#10529](https://github.com/NousResearch/hermes-agent/pull/10529))
+- Fix: add `on_memory_write` bridge to sequential tool execution path ([#10507](https://github.com/NousResearch/hermes-agent/pull/10507))
+- Fix: preserve `session_id` across `previous_response_id` chains in `/v1/responses` ([#10059](https://github.com/NousResearch/hermes-agent/pull/10059))
+
+---
+
+## 🖥️ New Ink-based TUI
+
+A full React/Ink rewrite of the interactive CLI — invoked via `hermes --tui` or `HERMES_TUI=1`. Shipped across ~310 commits to `ui-tui/` and `tui_gateway/`.
+
+### TUI Foundations
+- New TUI based on Ink + Python JSON-RPC backend
+- Prettier + ESLint + vitest tooling for `ui-tui/`
+- Entry split between `src/entry.tsx` (TTY gate) and `src/app.tsx` (state machine)
+- Persistent `_SlashWorker` subprocess for slash command dispatch
+
+### UX & Features
+- **Stable picker keys, /clear confirm, light-theme preset** ([#12312](https://github.com/NousResearch/hermes-agent/pull/12312), @OutThisLife)
+- **Git branch in status bar** cwd label ([#12305](https://github.com/NousResearch/hermes-agent/pull/12305), @OutThisLife)
+- **Per-turn elapsed stopwatch in FaceTicker + done-in sys line** ([#13105](https://github.com/NousResearch/hermes-agent/pull/13105), @OutThisLife)
+- **Subagent spawn observability overlay** ([#14045](https://github.com/NousResearch/hermes-agent/pull/14045), @OutThisLife)
+- **Per-prompt elapsed stopwatch in status bar** ([#12948](https://github.com/NousResearch/hermes-agent/pull/12948))
+- Sticky composer that freezes during scroll
+- OSC-52 clipboard support for copy across SSH sessions
+- Virtualized history rendering for performance
+- Slash command autocomplete via `complete.slash` RPC
+- Path autocomplete via `complete.path` RPC
+- Dozens of resize/ghosting/sticky-prompt fixes landed through the week
+
+### Structural Refactors
+- Decomposed `app.tsx` into `app/event-handler`, `app/slash-handler`, `app/stores`, `app/hooks` ([#14640](https://github.com/NousResearch/hermes-agent/pull/14640) and surrounding)
+- Component split: `branding.tsx`, `markdown.tsx`, `prompts.tsx`, `sessionPicker.tsx`, `messageLine.tsx`, `thinking.tsx`, `maskedPrompt.tsx`
+- Hook split: `useCompletion`, `useInputHistory`, `useQueue`, `useVirtualHistory`
+
+---
+
+## 📱 Messaging Platforms (Gateway)
+
+### New Platforms
+- **QQBot (17th platform)** — QQ Official API v2 adapter with QR setup, streaming, package split ([#9364](https://github.com/NousResearch/hermes-agent/pull/9364), [#11831](https://github.com/NousResearch/hermes-agent/pull/11831))
+
+### Telegram
+- **Dedicated `TELEGRAM_PROXY` env var + config.yaml proxy support** (closes #9414, #6530, #9074, #7786) ([#10681](https://github.com/NousResearch/hermes-agent/pull/10681))
+- **`ignored_threads` config** for Telegram groups ([#9530](https://github.com/NousResearch/hermes-agent/pull/9530))
+- **Config option to disable link previews** (closes #8728) ([#10610](https://github.com/NousResearch/hermes-agent/pull/10610))
+- **Auto-wrap markdown tables** in code blocks ([#11794](https://github.com/NousResearch/hermes-agent/pull/11794))
+- Fix: prevent duplicate replies when stream task is cancelled ([#9319](https://github.com/NousResearch/hermes-agent/pull/9319))
+- Fix: prevent streaming cursor (▉) from appearing as standalone messages ([#9538](https://github.com/NousResearch/hermes-agent/pull/9538))
+- Fix: retry transient tool sends + cold-boot budget ([#10947](https://github.com/NousResearch/hermes-agent/pull/10947))
+- Fix: Markdown special char escaping in `send_exec_approval`
+- Fix: parentheses in URLs during MarkdownV2 link conversion
+- Fix: Unicode dash normalization in model switch (closes iOS smart-punctuation issue)
+- Many platform hint / streaming / session-key fixes
+
+### Discord
+- **Forum channel support** (salvage of #10145 + media + polish) ([#11920](https://github.com/NousResearch/hermes-agent/pull/11920))
+- **`DISCORD_ALLOWED_ROLES`** for role-based access control ([#11608](https://github.com/NousResearch/hermes-agent/pull/11608))
+- **Config option to disable slash commands** (salvage #13130) ([#14315](https://github.com/NousResearch/hermes-agent/pull/14315))
+- **Native `send_animation`** for inline GIF playback ([#10283](https://github.com/NousResearch/hermes-agent/pull/10283))
+- **`send_message` Discord media attachments** ([#10246](https://github.com/NousResearch/hermes-agent/pull/10246))
+- **`/skill` command group** with category subcommands ([#9909](https://github.com/NousResearch/hermes-agent/pull/9909))
+- **Extract reply text from message references** ([#9781](https://github.com/NousResearch/hermes-agent/pull/9781))
+
+### Feishu
+- **Intelligent reply on document comments** with 3-tier access control ([#11898](https://github.com/NousResearch/hermes-agent/pull/11898))
+- **Show processing state via reactions** on user messages ([#12927](https://github.com/NousResearch/hermes-agent/pull/12927))
+- **Preserve @mention context for agent consumption** (salvage #13874) ([#14167](https://github.com/NousResearch/hermes-agent/pull/14167))
+
+### DingTalk
+- **`require_mention` + `allowed_users` gating** (parity with Slack/Telegram/Discord) ([#11564](https://github.com/NousResearch/hermes-agent/pull/11564))
+- **QR-code device-flow authorization** for setup wizard ([#11574](https://github.com/NousResearch/hermes-agent/pull/11574))
+- **AI Cards streaming, emoji reactions, and media handling** (salvage of #10985) ([#11910](https://github.com/NousResearch/hermes-agent/pull/11910))
+
+### WhatsApp
+- **`send_voice`** — native audio message delivery ([#13002](https://github.com/NousResearch/hermes-agent/pull/13002))
+- **`dm_policy` and `group_policy`** parity with WeCom/Weixin/QQ adapters ([#13151](https://github.com/NousResearch/hermes-agent/pull/13151))
+
+### WeCom / Weixin
+- **WeCom QR-scan bot creation + interactive setup wizard** (salvage #13923) ([#13961](https://github.com/NousResearch/hermes-agent/pull/13961))
+
+### Signal
+- **Media delivery support** via `send_message` ([#13178](https://github.com/NousResearch/hermes-agent/pull/13178))
+
+### Slack
+- **Per-thread sessions for DMs by default** ([#10987](https://github.com/NousResearch/hermes-agent/pull/10987))
+
+### BlueBubbles (iMessage)
+- Group chat session separation, webhook registration & auth fixes ([#9806](https://github.com/NousResearch/hermes-agent/pull/9806))
+
+### Gateway Core
+- **Gateway proxy mode** — forward messages to a remote API server ([#9787](https://github.com/NousResearch/hermes-agent/pull/9787))
+- **Per-channel ephemeral prompts** (Discord, Telegram, Slack, Mattermost) ([#10564](https://github.com/NousResearch/hermes-agent/pull/10564))
+- **Surface plugin slash commands** natively on all platforms + decision-capable command hook ([#14175](https://github.com/NousResearch/hermes-agent/pull/14175))
+- **Support document/archive extensions in MEDIA: tag extraction** (salvage #8255) ([#14307](https://github.com/NousResearch/hermes-agent/pull/14307))
+- **Recognize `.pdf` in MEDIA: tag extraction** ([#13683](https://github.com/NousResearch/hermes-agent/pull/13683))
+- **`--all` flag for `gateway start` and `restart`** ([#10043](https://github.com/NousResearch/hermes-agent/pull/10043))
+- **Notify active sessions on gateway shutdown** + update health check ([#9850](https://github.com/NousResearch/hermes-agent/pull/9850))
+- **Block agent from self-destructing the gateway** via terminal (closes #6666) ([#9895](https://github.com/NousResearch/hermes-agent/pull/9895))
+- Fix: suppress duplicate replies on interrupt and streaming flood control ([#10235](https://github.com/NousResearch/hermes-agent/pull/10235))
+- Fix: close temporary agents after one-off tasks ([#11028](https://github.com/NousResearch/hermes-agent/pull/11028), @kshitijk4poor)
+- Fix: busy-session ack when user messages during active agent run ([#10068](https://github.com/NousResearch/hermes-agent/pull/10068))
+- Fix: route watch-pattern notifications to the originating session ([#10460](https://github.com/NousResearch/hermes-agent/pull/10460))
+- Fix: preserve notify context in executor threads ([#10921](https://github.com/NousResearch/hermes-agent/pull/10921), @kshitijk4poor)
+- Fix: avoid duplicate replies after interrupted long tasks ([#11018](https://github.com/NousResearch/hermes-agent/pull/11018))
+- Fix: unlink stale PID + lock files on cleanup
+- Fix: force-unlink stale PID file after `--replace` takeover
+
+---
+
+## 🔧 Tool System
+
+### Plugin Surface (major expansion)
+- **`register_command()`** — plugins can now add slash commands ([#10626](https://github.com/NousResearch/hermes-agent/pull/10626))
+- **`dispatch_tool()`** — plugins can invoke tools from their code ([#10763](https://github.com/NousResearch/hermes-agent/pull/10763))
+- **`pre_tool_call` blocking** — plugins can veto tool execution ([#9377](https://github.com/NousResearch/hermes-agent/pull/9377))
+- **`transform_tool_result`** — plugins rewrite tool results generically ([#12972](https://github.com/NousResearch/hermes-agent/pull/12972))
+- **`transform_terminal_output`** — plugins rewrite terminal tool output ([#12929](https://github.com/NousResearch/hermes-agent/pull/12929))
+- **Namespaced skill registration** for plugin skill bundles ([#9786](https://github.com/NousResearch/hermes-agent/pull/9786))
+- **Opt-in-by-default + bundled disk-cleanup plugin** (salvage #12212) ([#12944](https://github.com/NousResearch/hermes-agent/pull/12944))
+- **Pluggable `image_gen` backends + OpenAI provider** ([#13799](https://github.com/NousResearch/hermes-agent/pull/13799))
+- **`openai-codex` image_gen plugin** (gpt-image-2 via Codex OAuth) ([#14317](https://github.com/NousResearch/hermes-agent/pull/14317))
+- **Shell hooks** — wire shell scripts as hook callbacks ([#13296](https://github.com/NousResearch/hermes-agent/pull/13296))
+
+### Browser
+- **`browser_cdp` raw DevTools Protocol passthrough** ([#12369](https://github.com/NousResearch/hermes-agent/pull/12369))
+- Camofox hardening + connection stability across the window
+
+### Execute Code
+- **Project/strict execution modes** (default: project) ([#11971](https://github.com/NousResearch/hermes-agent/pull/11971))
+
+### Image Generation
+- **Multi-model FAL support** with picker in `hermes tools` ([#11265](https://github.com/NousResearch/hermes-agent/pull/11265))
+- **Recraft V3 → V4 Pro, Nano Banana → Pro upgrades** ([#11406](https://github.com/NousResearch/hermes-agent/pull/11406))
+- **GPT Image 2** in FAL catalog ([#13677](https://github.com/NousResearch/hermes-agent/pull/13677))
+- **xAI image generation provider** (grok-imagine-image) ([#14765](https://github.com/NousResearch/hermes-agent/pull/14765))
+
+### TTS / STT / Voice
+- **Google Gemini TTS provider** ([#11229](https://github.com/NousResearch/hermes-agent/pull/11229))
+- **xAI Grok STT provider** ([#14473](https://github.com/NousResearch/hermes-agent/pull/14473))
+- **xAI TTS** (shipped with Responses API upgrade) ([#10783](https://github.com/NousResearch/hermes-agent/pull/10783))
+- **KittenTTS local provider** (salvage of #2109) ([#13395](https://github.com/NousResearch/hermes-agent/pull/13395))
+- **CLI record beep toggle** ([#13247](https://github.com/NousResearch/hermes-agent/pull/13247), @helix4u)
+
+### Webhook / Cron
+- **Webhook direct-delivery mode** — zero-LLM push notifications ([#12473](https://github.com/NousResearch/hermes-agent/pull/12473))
+- **Cron `wakeAgent` gate** — scripts can skip the agent entirely ([#12373](https://github.com/NousResearch/hermes-agent/pull/12373))
+- **Cron per-job `enabled_toolsets`** — cap token overhead + cost per job ([#14767](https://github.com/NousResearch/hermes-agent/pull/14767))
+
+### Delegate
+- **Orchestrator role** + configurable spawn depth (default flat) ([#13691](https://github.com/NousResearch/hermes-agent/pull/13691))
+- **Cross-agent file state coordination** ([#13718](https://github.com/NousResearch/hermes-agent/pull/13718))
+
+### File / Patch
+- **`patch` — "did you mean?" feedback** when patch fails to match ([#13435](https://github.com/NousResearch/hermes-agent/pull/13435))
+
+### API Server
+- **Stream `/v1/responses` SSE tool events** (salvage #9779) ([#10049](https://github.com/NousResearch/hermes-agent/pull/10049))
+- **Inline image inputs** on `/v1/chat/completions` and `/v1/responses` ([#12969](https://github.com/NousResearch/hermes-agent/pull/12969))
+
+### Docker / Podman
+- **Entry-level Podman support** — `find_docker()` + rootless entrypoint ([#10066](https://github.com/NousResearch/hermes-agent/pull/10066))
+- **Add docker-cli to Docker image** (salvage #10096) ([#14232](https://github.com/NousResearch/hermes-agent/pull/14232))
+- **File-sync back to host on teardown** (salvage of #8189 + hardening) ([#11291](https://github.com/NousResearch/hermes-agent/pull/11291))
+
+### MCP
+- 12 MCP improvements across the window (status, timeout handling, tool-call forwarding, etc.)
+
+---
+
+## 🧩 Skills Ecosystem
+
+### Skill System
+- **Namespaced skill registration** for plugin bundles ([#9786](https://github.com/NousResearch/hermes-agent/pull/9786))
+- **`hermes skills reset`** to un-stick bundled skills ([#11468](https://github.com/NousResearch/hermes-agent/pull/11468))
+- **Skills guard opt-in** — `config.skills.guard_agent_created` (default off) ([#14557](https://github.com/NousResearch/hermes-agent/pull/14557))
+- **Bundled skill scripts runnable out of the box** ([#13384](https://github.com/NousResearch/hermes-agent/pull/13384))
+- **`xitter` replaced with `xurl`** — the official X API CLI ([#12303](https://github.com/NousResearch/hermes-agent/pull/12303))
+- **MiniMax-AI/cli as default skill tap** (salvage #7501) ([#14493](https://github.com/NousResearch/hermes-agent/pull/14493))
+- **Fuzzy `@` file completions + mtime sorting** ([#9467](https://github.com/NousResearch/hermes-agent/pull/9467))
+
+### New Skills
+- **concept-diagrams** (salvage of #11045, @v1k22) ([#11363](https://github.com/NousResearch/hermes-agent/pull/11363))
+- **architecture-diagram** (Cocoon AI port) ([#9906](https://github.com/NousResearch/hermes-agent/pull/9906))
+- **pixel-art** with hardware palettes and video animation ([#12663](https://github.com/NousResearch/hermes-agent/pull/12663), [#12725](https://github.com/NousResearch/hermes-agent/pull/12725))
+- **baoyu-comic** ([#13257](https://github.com/NousResearch/hermes-agent/pull/13257), @JimLiu)
+- **baoyu-infographic** — 21 layouts × 21 styles (salvage #9901) ([#12254](https://github.com/NousResearch/hermes-agent/pull/12254))
+- **page-agent** — embed Alibaba's in-page GUI agent in your webapp ([#13976](https://github.com/NousResearch/hermes-agent/pull/13976))
+- **fitness-nutrition** optional skill + optional env var support ([#9355](https://github.com/NousResearch/hermes-agent/pull/9355))
+- **drug-discovery** — ChEMBL, PubChem, OpenFDA, ADMET ([#9443](https://github.com/NousResearch/hermes-agent/pull/9443))
+- **touchdesigner-mcp** (salvage of #10081) ([#12298](https://github.com/NousResearch/hermes-agent/pull/12298))
+- **adversarial-ux-test** optional skill (salvage of #2494, @omnissiah-comelse) ([#13425](https://github.com/NousResearch/hermes-agent/pull/13425))
+- **maps** — added `guest_house`, `camp_site`, and dual-key bakery lookup ([#13398](https://github.com/NousResearch/hermes-agent/pull/13398))
+- **llm-wiki** — port provenance markers, source hashing, and quality signals ([#13700](https://github.com/NousResearch/hermes-agent/pull/13700))
+
+---
+
+## 📊 Web Dashboard
+
+- **i18n (English + Chinese) language switcher** ([#9453](https://github.com/NousResearch/hermes-agent/pull/9453))
+- **Live-switching theme system** ([#10687](https://github.com/NousResearch/hermes-agent/pull/10687))
+- **Dashboard plugin system** — extend the web UI with custom tabs ([#10951](https://github.com/NousResearch/hermes-agent/pull/10951))
+- **react-router, sidebar layout, sticky header, dropdown component** ([#9370](https://github.com/NousResearch/hermes-agent/pull/9370), @austinpickett)
+- **Responsive for mobile** ([#9228](https://github.com/NousResearch/hermes-agent/pull/9228), @DeployFaith)
+- **Vercel deployment** ([#10686](https://github.com/NousResearch/hermes-agent/pull/10686), [#11061](https://github.com/NousResearch/hermes-agent/pull/11061), @austinpickett)
+- **Context window config support** ([#9357](https://github.com/NousResearch/hermes-agent/pull/9357))
+- **HTTP health probe for cross-container gateway detection** ([#9894](https://github.com/NousResearch/hermes-agent/pull/9894))
+- **Update + restart gateway buttons** ([#13526](https://github.com/NousResearch/hermes-agent/pull/13526), @austinpickett)
+- **Real API call count per session** (salvages #10140) ([#14004](https://github.com/NousResearch/hermes-agent/pull/14004))
+
+---
+
+## 🖱️ CLI & User Experience
+
+- **Dynamic shell completion for bash, zsh, and fish** ([#9785](https://github.com/NousResearch/hermes-agent/pull/9785))
+- **Light-mode skins + skin-aware completion menus** ([#9461](https://github.com/NousResearch/hermes-agent/pull/9461))
+- **Numbered keyboard shortcuts** on approval and clarify prompts ([#13416](https://github.com/NousResearch/hermes-agent/pull/13416))
+- **Markdown stripping, compact multiline previews, external editor** ([#12934](https://github.com/NousResearch/hermes-agent/pull/12934))
+- **`--ignore-user-config` and `--ignore-rules` flags** (port codex#18646) ([#14277](https://github.com/NousResearch/hermes-agent/pull/14277))
+- **Account limits section in `/usage`** ([#13428](https://github.com/NousResearch/hermes-agent/pull/13428))
+- **Doctor: Command Installation check** for `hermes` bin symlink ([#10112](https://github.com/NousResearch/hermes-agent/pull/10112))
+- **ESC cancels secret/sudo prompts**, clearer skip messaging ([#9902](https://github.com/NousResearch/hermes-agent/pull/9902))
+- Fix: agent-facing text uses `display_hermes_home()` instead of hardcoded `~/.hermes` ([#10285](https://github.com/NousResearch/hermes-agent/pull/10285))
+- Fix: enforce `config.yaml` as sole CWD source + deprecate `.env` CWD vars + add `hermes memory reset` ([#11029](https://github.com/NousResearch/hermes-agent/pull/11029))
+
+---
+
+## 🔒 Security & Reliability
+
+- **Global toggle to allow private/internal URL resolution** ([#14166](https://github.com/NousResearch/hermes-agent/pull/14166))
+- **Block agent from self-destructing the gateway** via terminal (closes #6666) ([#9895](https://github.com/NousResearch/hermes-agent/pull/9895))
+- **Telegram callback authorization** on update prompts ([#10536](https://github.com/NousResearch/hermes-agent/pull/10536))
+- **SECURITY.md** added ([#10532](https://github.com/NousResearch/hermes-agent/pull/10532), @I3eg1nner)
+- **Warn about legacy hermes.service units** during `hermes update` ([#11918](https://github.com/NousResearch/hermes-agent/pull/11918))
+- **Complete ASCII-locale UnicodeEncodeError recovery** for `api_messages`/`reasoning_content` (closes #6843) ([#10537](https://github.com/NousResearch/hermes-agent/pull/10537))
+- **Prevent stale `os.environ` leak** after `clear_session_vars` ([#10527](https://github.com/NousResearch/hermes-agent/pull/10527))
+- **Prevent agent hang when backgrounding processes** via terminal tool ([#10584](https://github.com/NousResearch/hermes-agent/pull/10584))
+- Many smaller session-resume, interrupt, streaming, and memory-race fixes throughout the window
+
+---
+
+## 🐛 Notable Bug Fixes
+
+The `fix:` category in this window covers 482 PRs. Highlights:
+
+- Streaming cursor artifacts filtered from Matrix, Telegram, WhatsApp, Discord (multiple PRs)
+- `<think>` and `<thought>` blocks filtered from gateway stream consumers ([#9408](https://github.com/NousResearch/hermes-agent/pull/9408))
+- Gateway display.streaming root-config override regression ([#9799](https://github.com/NousResearch/hermes-agent/pull/9799))
+- Context `session_search` coerces limit to int (prevents TypeError) ([#10522](https://github.com/NousResearch/hermes-agent/pull/10522))
+- Memory tool stays available when `fcntl` is unavailable (Windows) ([#9783](https://github.com/NousResearch/hermes-agent/pull/9783))
+- Trajectory compressor credentials load from `HERMES_HOME/.env` ([#9632](https://github.com/NousResearch/hermes-agent/pull/9632), @Dusk1e)
+- `@_context_completions` no longer crashes on `@` mention ([#9683](https://github.com/NousResearch/hermes-agent/pull/9683), @kshitijk4poor)
+- Group session `user_id` no longer treated as `thread_id` in shutdown notifications ([#10546](https://github.com/NousResearch/hermes-agent/pull/10546))
+- Telegram `platform_hint` — markdown is supported (closes #8261) ([#10612](https://github.com/NousResearch/hermes-agent/pull/10612))
+- Doctor checks for Kimi China credentials fixed
+- Streaming: don't suppress final response when commentary message is sent ([#10540](https://github.com/NousResearch/hermes-agent/pull/10540))
+- Rapid Telegram follow-ups no longer get cut off
+
+---
+
+## 🧪 Testing & CI
+
+- **Contributor attribution CI check** on PRs ([#9376](https://github.com/NousResearch/hermes-agent/pull/9376))
+- Hermetic test parity (`scripts/run_tests.sh`) held across this window
+- Test count stabilized post-Transport refactor; CI matrix held green through the transport rollout
+
+---
+
+## 📚 Documentation
+
+- Atropos + wandb links in user guide
+- ACP / VS Code / Zed / JetBrains integration docs refresh
+- Webhook subscription docs updated for direct-delivery mode
+- Plugin author guide expanded for new hooks (`register_command`, `dispatch_tool`, `transform_tool_result`)
+- Transport layer developer guide added
+- Website removed Discussions link from README
+
+---
+
+## 👥 Contributors
+
+### Core
+- **@teknium1** (Teknium)
+
+### Top Community Contributors (by merged PR count)
+- **@kshitijk4poor** — 49 PRs · Transport refactor (AnthropicTransport, ResponsesApiTransport), Step Plan provider, Xiaomi MiMo v2.5 support, numerous gateway fixes, promoted Kimi K2.5, @ mention crash fix
+- **@OutThisLife** (Brooklyn) — 31 PRs · TUI polish, git branch in status bar, per-turn stopwatch, stable picker keys, `/clear` confirm, light-theme preset, subagent spawn observability overlay
+- **@helix4u** — 11 PRs · Voice CLI record beep, MCP tool interrupt handling, assorted stability fixes
+- **@austinpickett** — 8 PRs · Dashboard react-router + sidebar + sticky header + dropdown, Vercel deployment, update + restart buttons
+- **@alt-glitch** — 8 PRs · PLATFORM_HINTS for Matrix/Mattermost/Feishu, Matrix fixes
+- **@ethernet8023** — 3 PRs
+- **@benbarclay** — 3 PRs
+- **@Aslaaen** — 2 PRs
+
+### Also contributing
+@jerilynzheng (ai-gateway pricing), @JimLiu (baoyu-comic skill), @Dusk1e (trajectory compressor credentials), @DeployFaith (mobile-responsive dashboard), @LeonSGP43, @v1k22 (concept-diagrams), @omnissiah-comelse (adversarial-ux-test), @coekfung (Telegram MarkdownV2 expandable blockquotes), @liftaris (TUI provider resolution), @arihantsethia (skill analytics dashboard), @topcheer + @xing8star (QQBot foundation), @kovyrin, @I3eg1nner (SECURITY.md), @PeterBerthelsen, @lengxii, @priveperfumes, @sjz-ks, @cuyua9, @Disaster-Terminator, @leozeli, @LehaoLin, @trevthefoolish, @loongfay, @MrNiceRicee, @WideLee, @bluefishs, @malaiwah, @bobashopcashier, @dsocolobsky, @iamagenius00, @IAvecilla, @aniruddhaadak80, @Es1la, @asheriif, @walli, @jquesnelle (original Tool Gateway work).
+
+### All Contributors (alphabetical)
+
+@0xyg3n, @10ishq, @A-afflatus, @Abnertheforeman, @admin28980, @adybag14-cyber, @akhater, @alexzhu0,
+@AllardQuek, @alt-glitch, @aniruddhaadak80, @anna-oake, @anniesurla, @anthhub, @areu01or00, @arihantsethia,
+@arthurbr11, @asheriif, @Aslaaen, @Asunfly, @austinpickett, @AviArora02-commits, @AxDSan, @azhengbot, @Bartok9,
+@benbarclay, @bennytimz, @bernylinville, @bingo906, @binhnt92, @bkadish, @bluefishs, @bobashopcashier,
+@brantzh6, @BrennerSpear, @brianclemens, @briandevans, @brooklynnicholson, @bugkill3r, @buray, @burtenshaw,
+@cdanis, @cgarwood82, @ChimingLiu, @chongweiliu, @christopherwoodall, @coekfung, @cola-runner, @corazzione,
+@counterposition, @cresslank, @cuyua9, @cypres0099, @danieldoderlein, @davetist, @davidvv, @DeployFaith,
+@Dev-Mriganka, @devorun, @dieutx, @Disaster-Terminator, @dodo-reach, @draix, @DrStrangerUJN, @dsocolobsky,
+@Dusk1e, @dyxushuai, @elkimek, @elmatadorgh, @emozilla, @entropidelic, @Erosika, @erosika, @Es1la, @etcircle,
+@etherman-os, @ethernet8023, @fancydirty, @farion1231, @fatinghenji, @Fatty911, @fengtianyu88, @Feranmi10,
+@flobo3, @francip, @fuleinist, @g-guthrie, @GenKoKo, @gianfrancopiana, @gnanam1990, @GuyCui, @haileymarshall,
+@haimu0x, @handsdiff, @hansnow, @hedgeho9X, @helix4u, @hengm3467, @HenkDz, @heykb, @hharry11, @HiddenPuppy,
+@honghua, @houko, @houziershi, @hsy5571616, @huangke19, @hxp-plus, @Hypn0sis, @I3eg1nner, @iacker,
+@iamagenius00, @IAvecilla, @iborazzi, @Ifkellx, @ifrederico, @imink, @isaachuangGMICLOUD, @ismell0992-afk,
+@j0sephz, @Jaaneek, @jackjin1997, @JackTheGit, @jaffarkeikei, @jerilynzheng, @JiaDe-Wu, @Jiawen-lee, @JimLiu,
+@jinzheng8115, @jneeee, @jplew, @jquesnelle, @Julientalbot, @Junass1, @jvcl, @kagura-agent, @keifergu,
+@kevinskysunny, @keyuyuan, @konsisumer, @kovyrin, @kshitijk4poor, @leeyang1990, @LehaoLin, @lengxii,
+@LeonSGP43, @leozeli, @li0near, @liftaris, @Lind3ey, @Linux2010, @liujinkun2025, @LLQWQ, @Llugaes, @lmoncany,
+@longsizhuo, @lrawnsley, @Lubrsy706, @lumenradley, @luyao618, @lvnilesh, @LVT382009, @m0n5t3r, @Magaav,
+@MagicRay1217, @malaiwah, @manuelschipper, @Marvae, @MassiveMassimo, @mavrickdeveloper, @maxchernin, @memosr,
+@meng93, @mengjian-github, @MestreY0d4-Uninter, @Mibayy, @MikeFac, @mikewaters, @milkoor, @minorgod,
+@MrNiceRicee, @ms-alan, @mvanhorn, @n-WN, @N0nb0at, @Nan93, @NIDNASSER-Abdelmajid, @nish3451, @niyoh120,
+@nocoo, @nosleepcassette, @NousResearch, @ogzerber, @omnissiah-comelse, @Only-Code-A, @opriz, @OwenYWT, @pedh,
+@pefontana, @PeterBerthelsen, @phpoh, @pinion05, @plgonzalezrx8, @pradeep7127, @priveperfumes,
+@projectadmin-dev, @PStarH, @rnijhara, @Roy-oss1, @roytian1217, @RucchiZ, @Ruzzgar, @RyanLee-Dev, @Salt-555,
+@Sanjays2402, @sgaofen, @sharziki, @shenuu, @shin4, @SHL0MS, @shushuzn, @sicnuyudidi, @simon-gtcl,
+@simon-marcus, @sirEven, @Sisyphus, @sjz-ks, @snreynolds, @Societus, @Somme4096, @sontianye, @sprmn24,
+@StefanIsMe, @stephenschoettler, @Swift42, @taeng0204, @taeuk178, @tannerfokkens-maker, @TaroballzChen,
+@ten-ltw, @teyrebaz33, @Tianworld, @topcheer, @Tranquil-Flow, @trevthefoolish, @TroyMitchell911, @UNLINEARITY,
+@v1k22, @vivganes, @vominh1919, @vrinek, @VTRiot, @WadydX, @walli, @wenhao7, @WhiteWorld, @WideLee, @wujhsu,
+@WuTianyi123, @Wysie, @xandersbell, @xiaoqiang243, @xiayh0107, @xinpengdr, @Xowiek, @ycbai, @yeyitech, @ygd58,
+@youngDoo, @yudaiyan, @Yukipukii1, @yule975, @yyq4193, @yzx9, @ZaynJarvis, @zhang9w0v5, @zhanggttry,
+@zhangxicen, @zhongyueming1121, @zhouxiaoya12, @zons-zhaozhy
+
+Also: @maelrx, @Marco Rutsch, @MaxsolcuCrypto, @Mind-Dragon, @Paul Bergeron, @say8hi, @whitehatjr1001.
+
+
+---
+
+**Full Changelog**: [v2026.4.13...v2026.4.23](https://github.com/NousResearch/hermes-agent/compare/v2026.4.13...v2026.4.23)
@@ -20,6 +20,46 @@ from pathlib import Path
 from hermes_constants import get_hermes_home


+# Methods clients send as periodic liveness probes. They are not part of the
+# ACP schema, so the acp router correctly returns JSON-RPC -32601 to the
+# caller — but the supervisor task that dispatches the request then surfaces
+# the raised RequestError via ``logging.exception("Background task failed")``,
+# which dumps a traceback to stderr every probe interval. Clients like
+# acp-bridge already treat the -32601 response as "agent alive", so the
+# traceback is pure noise. We keep the protocol response intact and only
+# silence the stderr noise for this specific benign case.
+_BENIGN_PROBE_METHODS = frozenset({"ping", "health", "healthcheck"})
+
+
+class _BenignProbeMethodFilter(logging.Filter):
+    """Suppress acp 'Background task failed' tracebacks caused by unknown
+    liveness-probe methods (e.g. ``ping``) while leaving every other
+    background-task error — including method_not_found for any non-probe
+    method — visible in stderr.
+    """
+
+    def filter(self, record: logging.LogRecord) -> bool:
+        if record.getMessage() != "Background task failed":
+            return True
+        exc_info = record.exc_info
+        if not exc_info:
+            return True
+        exc = exc_info[1]
+        # Imported lazily so this module stays importable when the optional
+        # ``agent-client-protocol`` dependency is not installed.
+        try:
+            from acp.exceptions import RequestError
+        except ImportError:
+            return True
+        if not isinstance(exc, RequestError):
+            return True
+        if getattr(exc, "code", None) != -32601:
+            return True
+        data = getattr(exc, "data", None)
+        method = data.get("method") if isinstance(data, dict) else None
+        return method not in _BENIGN_PROBE_METHODS
+
+
 def _setup_logging() -> None:
    """Route all logging to stderr so stdout stays clean for ACP stdio."""
    handler = logging.StreamHandler(sys.stderr)
@@ -29,6 +69,7 @@ def _setup_logging() -> None:
            datefmt="%Y-%m-%d %H:%M:%S",
        )
    )
+    handler.addFilter(_BenignProbeMethodFilter())
    root = logging.getLogger()
    root.handlers.clear()
    root.addHandler(handler)
@@ -63,6 +63,9 @@ def make_approval_callback(
            logger.warning("Permission request timed out or failed: %s", exc)
            return "deny"

+        if response is None:
+            return "deny"
+
        outcome = response.outcome
        if isinstance(outcome, AllowedOutcome):
            option_id = outcome.option_id
@@ -4,6 +4,7 @@ from __future__ import annotations

 import asyncio
 import logging
+import os
 from collections import defaultdict, deque
 from concurrent.futures import ThreadPoolExecutor
 from typing import Any, Deque, Optional
@@ -51,7 +52,7 @@ try:
 except ImportError:
    from acp.schema import AuthMethod as AuthMethodAgent  # type: ignore[attr-defined]

-from acp_adapter.auth import detect_provider, has_provider
+from acp_adapter.auth import detect_provider
 from acp_adapter.events import (
    make_message_cb,
    make_step_cb,
@@ -71,6 +72,11 @@ except Exception:
 # Thread pool for running AIAgent (synchronous) in parallel.
 _executor = ThreadPoolExecutor(max_workers=4, thread_name_prefix="acp-agent")

+# Server-side page size for list_sessions. The ACP ListSessionsRequest schema
+# does not expose a client-side limit, so this is a fixed cap that clients
+# paginate against using `cursor` / `next_cursor`.
+_LIST_SESSIONS_PAGE_SIZE = 50
+

 def _extract_text(
    prompt: list[
@@ -351,9 +357,18 @@ class HermesACPAgent(acp.Agent):
        )

    async def authenticate(self, method_id: str, **kwargs: Any) -> AuthenticateResponse | None:
-        if has_provider():
-            return AuthenticateResponse()
-        return None
+        # Only accept authenticate() calls whose method_id matches the
+        # provider we advertised in initialize(). Without this check,
+        # authenticate() would acknowledge any method_id as long as the
+        # server has provider credentials configured — harmless under
+        # Hermes' threat model (ACP is stdio-only, local-trust), but poor
+        # API hygiene and confusing if ACP ever grows multi-method auth.
+        provider = detect_provider()
+        if not provider:
+            return None
+        if not isinstance(method_id, str) or method_id.strip().lower() != provider:
+            return None
+        return AuthenticateResponse()

    # ---- Session management -------------------------------------------------

@@ -437,7 +452,28 @@ class HermesACPAgent(acp.Agent):
        cwd: str | None = None,
        **kwargs: Any,
    ) -> ListSessionsResponse:
+        """List ACP sessions with optional ``cwd`` filtering and cursor pagination.
+
+        ``cwd`` is passed through to ``SessionManager.list_sessions`` which already
+        normalizes and filters by working directory. ``cursor`` is a ``session_id``
+        previously returned as ``next_cursor``; results resume after that entry.
+        Server-side page size is capped at ``_LIST_SESSIONS_PAGE_SIZE``; when more
+        results remain, ``next_cursor`` is set to the last returned ``session_id``.
+        """
        infos = self.session_manager.list_sessions(cwd=cwd)
+
+        if cursor:
+            for idx, s in enumerate(infos):
+                if s["session_id"] == cursor:
+                    infos = infos[idx + 1:]
+                    break
+            else:
+                # Unknown cursor -> empty page (do not fall back to full list).
+                infos = []
+
+        has_more = len(infos) > _LIST_SESSIONS_PAGE_SIZE
+        infos = infos[:_LIST_SESSIONS_PAGE_SIZE]
+
        sessions = []
        for s in infos:
            updated_at = s.get("updated_at")
@@ -451,7 +487,9 @@ class HermesACPAgent(acp.Agent):
                    updated_at=updated_at,
                )
            )
-        return ListSessionsResponse(sessions=sessions)
+
+        next_cursor = sessions[-1].session_id if has_more and sessions else None
+        return ListSessionsResponse(sessions=sessions, next_cursor=next_cursor)

    # ---- Prompt (core) ------------------------------------------------------

@@ -517,15 +555,32 @@ class HermesACPAgent(acp.Agent):
        agent.step_callback = step_cb
        agent.message_callback = message_cb

-        if approval_cb:
-            try:
-                from tools import terminal_tool as _terminal_tool
-                previous_approval_cb = getattr(_terminal_tool, "_approval_callback", None)
-                _terminal_tool.set_approval_callback(approval_cb)
-            except Exception:
-                logger.debug("Could not set ACP approval callback", exc_info=True)
+        # Approval callback is per-thread (thread-local, GHSA-qg5c-hvr5-hjgr).
+        # Set it INSIDE _run_agent so the TLS write happens in the executor
+        # thread — setting it here would write to the event-loop thread's TLS,
+        # not the executor's. Also set HERMES_INTERACTIVE so approval.py
+        # takes the CLI-interactive path (which calls the registered
+        # callback via prompt_dangerous_approval) instead of the
+        # non-interactive auto-approve branch (GHSA-96vc-wcxf-jjff).
+        # ACP's conn.request_permission maps cleanly to the interactive
+        # callback shape — not the gateway-queue HERMES_EXEC_ASK path,
+        # which requires a notify_cb registered in _gateway_notify_cbs.
+        previous_approval_cb = None
+        previous_interactive = None

        def _run_agent() -> dict:
+            nonlocal previous_approval_cb, previous_interactive
+            if approval_cb:
+                try:
+                    from tools import terminal_tool as _terminal_tool
+                    previous_approval_cb = _terminal_tool._get_approval_callback()
+                    _terminal_tool.set_approval_callback(approval_cb)
+                except Exception:
+                    logger.debug("Could not set ACP approval callback", exc_info=True)
+            # Signal to tools.approval that we have an interactive callback
+            # and the non-interactive auto-approve path must not fire.
+            previous_interactive = os.environ.get("HERMES_INTERACTIVE")
+            os.environ["HERMES_INTERACTIVE"] = "1"
            try:
                result = agent.run_conversation(
                    user_message=user_text,
@@ -537,6 +592,11 @@ class HermesACPAgent(acp.Agent):
                logger.exception("Agent error in session %s", session_id)
                return {"final_response": f"Error: {e}", "messages": state.history}
            finally:
+                # Restore HERMES_INTERACTIVE.
+                if previous_interactive is None:
+                    os.environ.pop("HERMES_INTERACTIVE", None)
+                else:
+                    os.environ["HERMES_INTERACTIVE"] = previous_interactive
                if approval_cb:
                    try:
                        from tools import terminal_tool as _terminal_tool
@@ -613,8 +673,8 @@ class HermesACPAgent(acp.Agent):
            await self._conn.session_update(
                session_id=session_id,
                update=AvailableCommandsUpdate(
-                    sessionUpdate="available_commands_update",
-                    availableCommands=self._available_commands(),
+                    session_update="available_commands_update",
+                    available_commands=self._available_commands(),
                ),
            )
        except Exception:
@@ -0,0 +1,326 @@
+from __future__ import annotations
+
+from dataclasses import dataclass
+from datetime import datetime, timezone
+from typing import Any, Optional
+
+import httpx
+
+from agent.anthropic_adapter import _is_oauth_token, resolve_anthropic_token
+from hermes_cli.auth import _read_codex_tokens, resolve_codex_runtime_credentials
+from hermes_cli.runtime_provider import resolve_runtime_provider
+
+
+def _utc_now() -> datetime:
+    return datetime.now(timezone.utc)
+
+
+@dataclass(frozen=True)
+class AccountUsageWindow:
+    label: str
+    used_percent: Optional[float] = None
+    reset_at: Optional[datetime] = None
+    detail: Optional[str] = None
+
+
+@dataclass(frozen=True)
+class AccountUsageSnapshot:
+    provider: str
+    source: str
+    fetched_at: datetime
+    title: str = "Account limits"
+    plan: Optional[str] = None
+    windows: tuple[AccountUsageWindow, ...] = ()
+    details: tuple[str, ...] = ()
+    unavailable_reason: Optional[str] = None
+
+    @property
+    def available(self) -> bool:
+        return bool(self.windows or self.details) and not self.unavailable_reason
+
+
+def _title_case_slug(value: Optional[str]) -> Optional[str]:
+    cleaned = str(value or "").strip()
+    if not cleaned:
+        return None
+    return cleaned.replace("_", " ").replace("-", " ").title()
+
+
+def _parse_dt(value: Any) -> Optional[datetime]:
+    if value in (None, ""):
+        return None
+    if isinstance(value, (int, float)):
+        return datetime.fromtimestamp(float(value), tz=timezone.utc)
+    if isinstance(value, str):
+        text = value.strip()
+        if not text:
+            return None
+        if text.endswith("Z"):
+            text = text[:-1] + "+00:00"
+        try:
+            dt = datetime.fromisoformat(text)
+            return dt if dt.tzinfo else dt.replace(tzinfo=timezone.utc)
+        except ValueError:
+            return None
+    return None
+
+
+def _format_reset(dt: Optional[datetime]) -> str:
+    if not dt:
+        return "unknown"
+    local_dt = dt.astimezone()
+    delta = dt - _utc_now()
+    total_seconds = int(delta.total_seconds())
+    if total_seconds <= 0:
+        return f"now ({local_dt.strftime('%Y-%m-%d %H:%M %Z')})"
+    hours, rem = divmod(total_seconds, 3600)
+    minutes = rem // 60
+    if hours >= 24:
+        days, hours = divmod(hours, 24)
+        rel = f"in {days}d {hours}h"
+    elif hours > 0:
+        rel = f"in {hours}h {minutes}m"
+    else:
+        rel = f"in {minutes}m"
+    return f"{rel} ({local_dt.strftime('%Y-%m-%d %H:%M %Z')})"
+
+
+def render_account_usage_lines(snapshot: Optional[AccountUsageSnapshot], *, markdown: bool = False) -> list[str]:
+    if not snapshot:
+        return []
+    header = f"📈 {'**' if markdown else ''}{snapshot.title}{'**' if markdown else ''}"
+    lines = [header]
+    if snapshot.plan:
+        lines.append(f"Provider: {snapshot.provider} ({snapshot.plan})")
+    else:
+        lines.append(f"Provider: {snapshot.provider}")
+    for window in snapshot.windows:
+        if window.used_percent is None:
+            base = f"{window.label}: unavailable"
+        else:
+            remaining = max(0, round(100 - float(window.used_percent)))
+            used = max(0, round(float(window.used_percent)))
+            base = f"{window.label}: {remaining}% remaining ({used}% used)"
+        if window.reset_at:
+            base += f" • resets {_format_reset(window.reset_at)}"
+        elif window.detail:
+            base += f" • {window.detail}"
+        lines.append(base)
+    for detail in snapshot.details:
+        lines.append(detail)
+    if snapshot.unavailable_reason:
+        lines.append(f"Unavailable: {snapshot.unavailable_reason}")
+    return lines
+
+
+def _resolve_codex_usage_url(base_url: str) -> str:
+    normalized = (base_url or "").strip().rstrip("/")
+    if not normalized:
+        normalized = "https://chatgpt.com/backend-api/codex"
+    if normalized.endswith("/codex"):
+        normalized = normalized[: -len("/codex")]
+    if "/backend-api" in normalized:
+        return normalized + "/wham/usage"
+    return normalized + "/api/codex/usage"
+
+
+def _fetch_codex_account_usage() -> Optional[AccountUsageSnapshot]:
+    creds = resolve_codex_runtime_credentials(refresh_if_expiring=True)
+    token_data = _read_codex_tokens()
+    tokens = token_data.get("tokens") or {}
+    account_id = str(tokens.get("account_id", "") or "").strip() or None
+    headers = {
+        "Authorization": f"Bearer {creds['api_key']}",
+        "Accept": "application/json",
+        "User-Agent": "codex-cli",
+    }
+    if account_id:
+        headers["ChatGPT-Account-Id"] = account_id
+    with httpx.Client(timeout=15.0) as client:
+        response = client.get(_resolve_codex_usage_url(creds.get("base_url", "")), headers=headers)
+        response.raise_for_status()
+    payload = response.json() or {}
+    rate_limit = payload.get("rate_limit") or {}
+    windows: list[AccountUsageWindow] = []
+    for key, label in (("primary_window", "Session"), ("secondary_window", "Weekly")):
+        window = rate_limit.get(key) or {}
+        used = window.get("used_percent")
+        if used is None:
+            continue
+        windows.append(
+            AccountUsageWindow(
+                label=label,
+                used_percent=float(used),
+                reset_at=_parse_dt(window.get("reset_at")),
+            )
+        )
+    details: list[str] = []
+    credits = payload.get("credits") or {}
+    if credits.get("has_credits"):
+        balance = credits.get("balance")
+        if isinstance(balance, (int, float)):
+            details.append(f"Credits balance: ${float(balance):.2f}")
+        elif credits.get("unlimited"):
+            details.append("Credits balance: unlimited")
+    return AccountUsageSnapshot(
+        provider="openai-codex",
+        source="usage_api",
+        fetched_at=_utc_now(),
+        plan=_title_case_slug(payload.get("plan_type")),
+        windows=tuple(windows),
+        details=tuple(details),
+    )
+
+
+def _fetch_anthropic_account_usage() -> Optional[AccountUsageSnapshot]:
+    token = (resolve_anthropic_token() or "").strip()
+    if not token:
+        return None
+    if not _is_oauth_token(token):
+        return AccountUsageSnapshot(
+            provider="anthropic",
+            source="oauth_usage_api",
+            fetched_at=_utc_now(),
+            unavailable_reason="Anthropic account limits are only available for OAuth-backed Claude accounts.",
+        )
+    headers = {
+        "Authorization": f"Bearer {token}",
+        "Accept": "application/json",
+        "Content-Type": "application/json",
+        "anthropic-beta": "oauth-2025-04-20",
+        "User-Agent": "claude-code/2.1.0",
+    }
+    with httpx.Client(timeout=15.0) as client:
+        response = client.get("https://api.anthropic.com/api/oauth/usage", headers=headers)
+        response.raise_for_status()
+    payload = response.json() or {}
+    windows: list[AccountUsageWindow] = []
+    mapping = (
+        ("five_hour", "Current session"),
+        ("seven_day", "Current week"),
+        ("seven_day_opus", "Opus week"),
+        ("seven_day_sonnet", "Sonnet week"),
+    )
+    for key, label in mapping:
+        window = payload.get(key) or {}
+        util = window.get("utilization")
+        if util is None:
+            continue
+        used = float(util) * 100 if float(util) <= 1 else float(util)
+        windows.append(
+            AccountUsageWindow(
+                label=label,
+                used_percent=used,
+                reset_at=_parse_dt(window.get("resets_at")),
+            )
+        )
+    details: list[str] = []
+    extra = payload.get("extra_usage") or {}
+    if extra.get("is_enabled"):
+        used_credits = extra.get("used_credits")
+        monthly_limit = extra.get("monthly_limit")
+        currency = extra.get("currency") or "USD"
+        if isinstance(used_credits, (int, float)) and isinstance(monthly_limit, (int, float)):
+            details.append(
+                f"Extra usage: {used_credits:.2f} / {monthly_limit:.2f} {currency}"
+            )
+    return AccountUsageSnapshot(
+        provider="anthropic",
+        source="oauth_usage_api",
+        fetched_at=_utc_now(),
+        windows=tuple(windows),
+        details=tuple(details),
+    )
+
+
+def _fetch_openrouter_account_usage(base_url: Optional[str], api_key: Optional[str]) -> Optional[AccountUsageSnapshot]:
+    runtime = resolve_runtime_provider(
+        requested="openrouter",
+        explicit_base_url=base_url,
+        explicit_api_key=api_key,
+    )
+    token = str(runtime.get("api_key", "") or "").strip()
+    if not token:
+        return None
+    normalized = str(runtime.get("base_url", "") or "").rstrip("/")
+    credits_url = f"{normalized}/credits"
+    key_url = f"{normalized}/key"
+    headers = {
+        "Authorization": f"Bearer {token}",
+        "Accept": "application/json",
+    }
+    with httpx.Client(timeout=10.0) as client:
+        credits_resp = client.get(credits_url, headers=headers)
+        credits_resp.raise_for_status()
+        credits = (credits_resp.json() or {}).get("data") or {}
+        try:
+            key_resp = client.get(key_url, headers=headers)
+            key_resp.raise_for_status()
+            key_data = (key_resp.json() or {}).get("data") or {}
+        except Exception:
+            key_data = {}
+    total_credits = float(credits.get("total_credits") or 0.0)
+    total_usage = float(credits.get("total_usage") or 0.0)
+    details = [f"Credits balance: ${max(0.0, total_credits - total_usage):.2f}"]
+    windows: list[AccountUsageWindow] = []
+    limit = key_data.get("limit")
+    limit_remaining = key_data.get("limit_remaining")
+    limit_reset = str(key_data.get("limit_reset") or "").strip()
+    usage = key_data.get("usage")
+    if (
+        isinstance(limit, (int, float))
+        and float(limit) > 0
+        and isinstance(limit_remaining, (int, float))
+        and 0 <= float(limit_remaining) <= float(limit)
+    ):
+        limit_value = float(limit)
+        remaining_value = float(limit_remaining)
+        used_percent = ((limit_value - remaining_value) / limit_value) * 100
+        detail_parts = [f"${remaining_value:.2f} of ${limit_value:.2f} remaining"]
+        if limit_reset:
+            detail_parts.append(f"resets {limit_reset}")
+        windows.append(
+            AccountUsageWindow(
+                label="API key quota",
+                used_percent=used_percent,
+                detail=" • ".join(detail_parts),
+            )
+        )
+    if isinstance(usage, (int, float)):
+        usage_parts = [f"API key usage: ${float(usage):.2f} total"]
+        for value, label in (
+            (key_data.get("usage_daily"), "today"),
+            (key_data.get("usage_weekly"), "this week"),
+            (key_data.get("usage_monthly"), "this month"),
+        ):
+            if isinstance(value, (int, float)) and float(value) > 0:
+                usage_parts.append(f"${float(value):.2f} {label}")
+        details.append(" • ".join(usage_parts))
+    return AccountUsageSnapshot(
+        provider="openrouter",
+        source="credits_api",
+        fetched_at=_utc_now(),
+        windows=tuple(windows),
+        details=tuple(details),
+    )
+
+
+def fetch_account_usage(
+    provider: Optional[str],
+    *,
+    base_url: Optional[str] = None,
+    api_key: Optional[str] = None,
+) -> Optional[AccountUsageSnapshot]:
+    normalized = str(provider or "").strip().lower()
+    if normalized in {"", "auto", "custom"}:
+        return None
+    try:
+        if normalized == "openai-codex":
+            return _fetch_codex_account_usage()
+        if normalized == "anthropic":
+            return _fetch_anthropic_account_usage()
+        if normalized == "openrouter":
+            return _fetch_openrouter_account_usage(base_url, api_key)
+    except Exception:
+        return None
+    return None
@@ -17,8 +17,8 @@ import os
 from pathlib import Path

 from hermes_constants import get_hermes_home
-from types import SimpleNamespace
 from typing import Any, Dict, List, Optional, Tuple
+from utils import normalize_proxy_env_vars

 try:
    import anthropic as _anthropic_sdk
@@ -116,6 +116,63 @@ def _get_anthropic_max_output(model: str) -> int:
    return best_val


+def _resolve_positive_anthropic_max_tokens(value) -> Optional[int]:
+    """Return ``value`` floored to a positive int, or ``None`` if it is not a
+    finite positive number. Ported from openclaw/openclaw#66664.
+
+    Anthropic's Messages API rejects ``max_tokens`` values that are 0,
+    negative, non-integer, or non-finite with HTTP 400. Python's ``or``
+    idiom (``max_tokens or fallback``) correctly catches ``0`` but lets
+    negative ints and fractional floats (``-1``, ``0.5``) through to the
+    API, producing a user-visible failure instead of a local error.
+    """
+    # Booleans are a subclass of int — exclude explicitly so ``True`` doesn't
+    # silently become 1 and ``False`` doesn't become 0.
+    if isinstance(value, bool):
+        return None
+    if not isinstance(value, (int, float)):
+        return None
+    try:
+        import math
+        if not math.isfinite(value):
+            return None
+    except Exception:
+        return None
+    floored = int(value)  # truncates toward zero for floats
+    return floored if floored > 0 else None
+
+
+def _resolve_anthropic_messages_max_tokens(
+    requested,
+    model: str,
+    context_length: Optional[int] = None,
+) -> int:
+    """Resolve the ``max_tokens`` budget for an Anthropic Messages call.
+
+    Prefers ``requested`` when it is a positive finite number; otherwise
+    falls back to the model's output ceiling. Raises ``ValueError`` if no
+    positive budget can be resolved (should not happen with current model
+    table defaults, but guards against a future regression where
+    ``_get_anthropic_max_output`` could return ``0``).
+
+    Separately, callers apply a context-window clamp — this resolver does
+    not, to keep the positive-value contract independent of endpoint
+    specifics.
+
+    Ported from openclaw/openclaw#66664 (resolveAnthropicMessagesMaxTokens).
+    """
+    resolved = _resolve_positive_anthropic_max_tokens(requested)
+    if resolved is not None:
+        return resolved
+    fallback = _get_anthropic_max_output(model)
+    if fallback > 0:
+        return fallback
+    raise ValueError(
+        f"Anthropic Messages adapter requires a positive max_tokens value for "
+        f"model {model!r}; got {requested!r} and no model default resolved."
+    )
+
+
 def _supports_adaptive_thinking(model: str) -> bool:
    """Return True for Claude 4.6+ models that support adaptive thinking."""
    return any(v in model for v in _ADAPTIVE_THINKING_SUBSTRINGS)
@@ -265,6 +322,14 @@ def _is_third_party_anthropic_endpoint(base_url: str | None) -> bool:
    return True  # Any other endpoint is a third-party proxy


+def _is_kimi_coding_endpoint(base_url: str | None) -> bool:
+    """Return True for Kimi's /coding endpoint that requires claude-code UA."""
+    normalized = _normalize_base_url_text(base_url)
+    if not normalized:
+        return False
+    return normalized.rstrip("/").lower().startswith("https://api.kimi.com/coding")
+
+
 def _requires_bearer_auth(base_url: str | None) -> bool:
    """Return True for Anthropic-compatible providers that require Bearer auth.

@@ -308,6 +373,9 @@ def build_anthropic_client(api_key: str, base_url: str = None, timeout: float =
            "The 'anthropic' package is required for the Anthropic provider. "
            "Install it with: pip install 'anthropic>=0.39.0'"
        )
+
+    normalize_proxy_env_vars()
+
    from httpx import Timeout

    normalized_base_url = _normalize_base_url_text(base_url)
@@ -319,9 +387,18 @@ def build_anthropic_client(api_key: str, base_url: str = None, timeout: float =
        kwargs["base_url"] = normalized_base_url
    common_betas = _common_betas_for_base_url(normalized_base_url)

-    if _requires_bearer_auth(normalized_base_url):
+    if _is_kimi_coding_endpoint(base_url):
+        # Kimi's /coding endpoint requires User-Agent: claude-code/0.1.0
+        # to be recognized as a valid Coding Agent. Without it, returns 403.
+        # Check this BEFORE _requires_bearer_auth since both match api.kimi.com/coding.
+        kwargs["api_key"] = api_key
+        kwargs["default_headers"] = {
+            "User-Agent": "claude-code/0.1.0",
+            **( {"anthropic-beta": ",".join(common_betas)} if common_betas else {} )
+        }
+    elif _requires_bearer_auth(normalized_base_url):
        # Some Anthropic-compatible providers (e.g. MiniMax) expect the API key in
-        # Authorization: Bearer even for regular API keys. Route those endpoints
+        # Authorization: Bearer *** for regular API keys. Route those endpoints
        # through auth_token so the SDK sends Bearer auth instead of x-api-key.
        # Check this before OAuth token shape detection because MiniMax secrets do
        # not use Anthropic's sk-ant-api prefix and would otherwise be misread as
@@ -1062,6 +1139,31 @@ def convert_messages_to_anthropic(
                    "name": fn.get("name", ""),
                    "input": parsed_args,
                })
+            # Kimi's /coding endpoint (Anthropic protocol) requires assistant
+            # tool-call messages to carry reasoning_content when thinking is
+            # enabled server-side.  Preserve it as a thinking block so Kimi
+            # can validate the message history.  See hermes-agent#13848.
+            #
+            # Accept empty string "" — _copy_reasoning_content_for_api()
+            # injects "" as a tier-3 fallback for Kimi tool-call messages
+            # that had no reasoning.  Kimi requires the field to exist, even
+            # if empty.
+            #
+            # Prepend (not append): Anthropic protocol requires thinking
+            # blocks before text and tool_use blocks.
+            #
+            # Guard: only add when reasoning_details didn't already contribute
+            # thinking blocks.  On native Anthropic, reasoning_details produces
+            # signed thinking blocks — adding another unsigned one from
+            # reasoning_content would create a duplicate (same text) that gets
+            # downgraded to a spurious text block on the last assistant message.
+            reasoning_content = m.get("reasoning_content")
+            _already_has_thinking = any(
+                isinstance(b, dict) and b.get("type") in ("thinking", "redacted_thinking")
+                for b in blocks
+            )
+            if isinstance(reasoning_content, str) and not _already_has_thinking:
+                blocks.insert(0, {"type": "thinking", "thinking": reasoning_content})
            # Anthropic rejects empty assistant content
            effective = blocks or content
            if not effective or effective == "":
@@ -1217,6 +1319,7 @@ def convert_messages_to_anthropic(
    #    cache markers can interfere with signature validation.
    _THINKING_TYPES = frozenset(("thinking", "redacted_thinking"))
    _is_third_party = _is_third_party_anthropic_endpoint(base_url)
+    _is_kimi = _is_kimi_coding_endpoint(base_url)

    last_assistant_idx = None
    for i in range(len(result) - 1, -1, -1):
@@ -1228,7 +1331,25 @@ def convert_messages_to_anthropic(
        if m.get("role") != "assistant" or not isinstance(m.get("content"), list):
            continue

-        if _is_third_party or idx != last_assistant_idx:
+        if _is_kimi:
+            # Kimi's /coding endpoint enables thinking server-side and
+            # requires unsigned thinking blocks on replayed assistant
+            # tool-call messages.  Strip signed Anthropic blocks (Kimi
+            # can't validate signatures) but preserve the unsigned ones
+            # we synthesised from reasoning_content above.
+            new_content = []
+            for b in m["content"]:
+                if not isinstance(b, dict) or b.get("type") not in _THINKING_TYPES:
+                    new_content.append(b)
+                    continue
+                if b.get("signature") or b.get("data"):
+                    # Anthropic-signed block — Kimi can't validate, strip
+                    continue
+                # Unsigned thinking (synthesised from reasoning_content) —
+                # keep it: Kimi needs it for message-history validation.
+                new_content.append(b)
+            m["content"] = new_content or [{"type": "text", "text": "(empty)"}]
+        elif _is_third_party or idx != last_assistant_idx:
            # Third-party endpoint: strip ALL thinking blocks from every
            # assistant message — signatures are Anthropic-proprietary.
            # Direct Anthropic: strip from non-latest assistant messages only.
@@ -1326,7 +1447,12 @@ def build_anthropic_kwargs(

    model = normalize_model_name(model, preserve_dots=preserve_dots)
    # effective_max_tokens = output cap for this call (≠ total context window)
-    effective_max_tokens = max_tokens or _get_anthropic_max_output(model)
+    # Use the resolver helper so non-positive values (negative ints,
+    # fractional floats, NaN, non-numeric) fail locally with a clear error
+    # rather than 400-ing at the Anthropic API. See openclaw/openclaw#66664.
+    effective_max_tokens = _resolve_anthropic_messages_max_tokens(
+        max_tokens, model, context_length=context_length
+    )

    # Clamp output cap to fit inside the total context window.
    # Only matters for small custom endpoints where context_length < native
@@ -1405,11 +1531,25 @@ def build_anthropic_kwargs(
    # MiniMax Anthropic-compat endpoints support thinking (manual mode only,
    # not adaptive).  Haiku does NOT support extended thinking — skip entirely.
    #
+    # Kimi's /coding endpoint speaks the Anthropic Messages protocol but has
+    # its own thinking semantics: when ``thinking.enabled`` is sent, Kimi
+    # validates the message history and requires every prior assistant
+    # tool-call message to carry OpenAI-style ``reasoning_content``.  The
+    # Anthropic path never populates that field, and
+    # ``convert_messages_to_anthropic`` strips all Anthropic thinking blocks
+    # on third-party endpoints — so the request fails with HTTP 400
+    # "thinking is enabled but reasoning_content is missing in assistant
+    # tool call message at index N".  Kimi's reasoning is driven server-side
+    # on the /coding route, so skip Anthropic's thinking parameter entirely
+    # for that host.  (Kimi on chat_completions enables thinking via
+    # extra_body in the ChatCompletionsTransport — see #13503.)
+    #
    # On 4.7+ the `thinking.display` field defaults to "omitted", which
    # silently hides reasoning text that Hermes surfaces in its CLI. We
    # request "summarized" so the reasoning blocks stay populated — matching
    # 4.6 behavior and preserving the activity-feed UX during long tool runs.
-    if reasoning_config and isinstance(reasoning_config, dict):
+    _is_kimi_coding = _is_kimi_coding_endpoint(base_url)
+    if reasoning_config and isinstance(reasoning_config, dict) and not _is_kimi_coding:
        if reasoning_config.get("enabled") is not False and "haiku" not in model.lower():
            effort = str(reasoning_config.get("effort", "medium")).lower()
            budget = THINKING_BUDGET.get(effort, 8000)
@@ -1458,70 +1598,4 @@ def build_anthropic_kwargs(
    return kwargs


-def normalize_anthropic_response(
-    response,
-    strip_tool_prefix: bool = False,
-) -> Tuple[SimpleNamespace, str]:
-    """Normalize Anthropic response to match the shape expected by AIAgent.

-    Returns (assistant_message, finish_reason) where assistant_message has
-    .content, .tool_calls, and .reasoning attributes.
-
-    When *strip_tool_prefix* is True, removes the ``mcp_`` prefix that was
-    added to tool names for OAuth Claude Code compatibility.
-    """
-    text_parts = []
-    reasoning_parts = []
-    reasoning_details = []
-    tool_calls = []
-
-    for block in response.content:
-        if block.type == "text":
-            text_parts.append(block.text)
-        elif block.type == "thinking":
-            reasoning_parts.append(block.thinking)
-            block_dict = _to_plain_data(block)
-            if isinstance(block_dict, dict):
-                reasoning_details.append(block_dict)
-        elif block.type == "tool_use":
-            name = block.name
-            if strip_tool_prefix and name.startswith(_MCP_TOOL_PREFIX):
-                name = name[len(_MCP_TOOL_PREFIX):]
-            tool_calls.append(
-                SimpleNamespace(
-                    id=block.id,
-                    type="function",
-                    function=SimpleNamespace(
-                        name=name,
-                        arguments=json.dumps(block.input),
-                    ),
-                )
-            )
-
-    # Map Anthropic stop_reason to OpenAI finish_reason.
-    # Newer stop reasons added in Claude 4.5+ / 4.7:
-    #   - refusal: the model declined to answer (cyber safeguards, CSAM, etc.)
-    #   - model_context_window_exceeded: hit context limit (not max_tokens)
-    # Both need distinct handling upstream — a refusal should surface to the
-    # user with a clear message, and a context-window overflow should trigger
-    # compression/truncation rather than be treated as normal end-of-turn.
-    stop_reason_map = {
-        "end_turn": "stop",
-        "tool_use": "tool_calls",
-        "max_tokens": "length",
-        "stop_sequence": "stop",
-        "refusal": "content_filter",
-        "model_context_window_exceeded": "length",
-    }
-    finish_reason = stop_reason_map.get(response.stop_reason, "stop")
-
-    return (
-        SimpleNamespace(
-            content="\n".join(text_parts) if text_parts else None,
-            tool_calls=tool_calls or None,
-            reasoning="\n\n".join(reasoning_parts) if reasoning_parts else None,
-            reasoning_content=None,
-            reasoning_details=reasoning_details or None,
-        ),
-        finish_reason,
-    )
@@ -48,6 +48,7 @@ from openai import OpenAI
 from agent.credential_pool import load_pool
 from hermes_cli.config import get_hermes_home
 from hermes_constants import OPENROUTER_BASE_URL
+from utils import base_url_host_matches, base_url_hostname, normalize_proxy_env_vars

 logger = logging.getLogger(__name__)

@@ -95,51 +96,37 @@ def _normalize_aux_provider(provider: Optional[str]) -> str:
    return _PROVIDER_ALIASES.get(normalized, normalized)


-_FIXED_TEMPERATURE_MODELS: Dict[str, float] = {
-    "kimi-for-coding": 0.6,
-}
-
-# Moonshot's kimi-for-coding endpoint (api.kimi.com/coding) documents:
-# "k2.5 model will use a fixed value 1.0, non-thinking mode will use a fixed
-# value 0.6.  Any other value will result in an error."  The same lock applies
-# to the other k2.* models served on that endpoint.  Enumerated explicitly so
-# non-coding siblings like `kimi-k2-instruct` (variable temperature, served on
-# the standard chat API and third parties) are NOT clamped.
-# Source: https://platform.kimi.ai/docs/guide/kimi-k2-5-quickstart
-_KIMI_INSTANT_MODELS: frozenset = frozenset({
-    "kimi-k2.5",
-    "kimi-k2-turbo-preview",
-    "kimi-k2-0905-preview",
-})
-_KIMI_THINKING_MODELS: frozenset = frozenset({
-    "kimi-k2-thinking",
-    "kimi-k2-thinking-turbo",
-})
+# Sentinel: when returned by _fixed_temperature_for_model(), callers must
+# strip the ``temperature`` key from API kwargs entirely so the provider's
+# server-side default applies.  Kimi/Moonshot models manage temperature
+# internally — sending *any* value (even the "correct" one) can conflict
+# with gateway-side mode selection (thinking → 1.0, non-thinking → 0.6).
+OMIT_TEMPERATURE: object = object()


-def _fixed_temperature_for_model(model: Optional[str]) -> Optional[float]:
-    """Return a required temperature override for models with strict contracts.
+def _is_kimi_model(model: Optional[str]) -> bool:
+    """True for any Kimi / Moonshot model that manages temperature server-side."""
+    bare = (model or "").strip().lower().rsplit("/", 1)[-1]
+    return bare.startswith("kimi-") or bare == "kimi"

-    Moonshot's kimi-for-coding endpoint rejects any non-approved temperature on
-    the k2.5 family.  Non-thinking variants require exactly 0.6; thinking
-    variants require 1.0.  An optional ``vendor/`` prefix (e.g.
-    ``moonshotai/kimi-k2.5``) is tolerated for aggregator routings.

-    Returns ``None`` for every other model, including ``kimi-k2-instruct*``
-    which is the separate non-coding K2 family with variable temperature.
+def _fixed_temperature_for_model(
+    model: Optional[str],
+    base_url: Optional[str] = None,
+) -> "Optional[float] | object":
+    """Return a temperature directive for models with strict contracts.
+
+    Returns:
+        ``OMIT_TEMPERATURE`` — caller must remove the ``temperature`` key so the
+            provider chooses its own default.  Used for all Kimi / Moonshot
+            models whose gateway selects temperature server-side.
+        ``float`` — a specific value the caller must use (reserved for future
+            models with fixed-temperature contracts).
+        ``None`` — no override; caller should use its own default.
    """
-    normalized = (model or "").strip().lower()
-    fixed = _FIXED_TEMPERATURE_MODELS.get(normalized)
-    if fixed is not None:
-        logger.debug("Forcing temperature=%s for model %r (fixed map)", fixed, model)
-        return fixed
-    bare = normalized.rsplit("/", 1)[-1]
-    if bare in _KIMI_THINKING_MODELS:
-        logger.debug("Forcing temperature=1.0 for kimi thinking model %r", model)
-        return 1.0
-    if bare in _KIMI_INSTANT_MODELS:
-        logger.debug("Forcing temperature=0.6 for kimi instant model %r", model)
-        return 0.6
+    if _is_kimi_model(model):
+        logger.debug("Omitting temperature for Kimi model %r (server-managed)", model)
+        return OMIT_TEMPERATURE
    return None

 # Default auxiliary models for direct API-key providers (cheap/fast for side tasks)
@@ -147,6 +134,7 @@ _API_KEY_PROVIDER_AUX_MODELS: Dict[str, str] = {
    "gemini": "gemini-3-flash-preview",
    "zai": "glm-4.5-flash",
    "kimi-coding": "kimi-k2-turbo-preview",
+    "stepfun": "step-3.5-flash",
    "kimi-coding-cn": "kimi-k2-turbo-preview",
    "minimax": "MiniMax-M2.7",
    "minimax-cn": "MiniMax-M2.7",
@@ -163,7 +151,7 @@ _API_KEY_PROVIDER_AUX_MODELS: Dict[str, str] = {
 # differs from their main chat model, map it here.  The vision auto-detect
 # "exotic provider" branch checks this before falling back to the main model.
 _PROVIDER_VISION_MODELS: Dict[str, str] = {
-    "xiaomi": "mimo-v2-omni",
+    "xiaomi": "mimo-v2.5",
    "zai": "glm-5v-turbo",
 }

@@ -174,6 +162,16 @@ _OR_HEADERS = {
    "X-OpenRouter-Categories": "productivity,cli-agent",
 }

+# Vercel AI Gateway app attribution headers. HTTP-Referer maps to
+# referrerUrl and X-Title maps to appName in the gateway's analytics.
+from hermes_cli import __version__ as _HERMES_VERSION
+
+_AI_GATEWAY_HEADERS = {
+    "HTTP-Referer": "https://hermes-agent.nousresearch.com",
+    "X-Title": "Hermes Agent",
+    "User-Agent": f"HermesAgent/{_HERMES_VERSION}",
+}
+
 # Nous Portal extra_body for product attribution.
 # Callers should pass this as extra_body in chat.completions.create()
 # when the auxiliary client is backed by Nous Portal.
@@ -185,8 +183,6 @@ auxiliary_is_nous: bool = False
 # Default auxiliary models per provider
 _OPENROUTER_MODEL = "google/gemini-3-flash-preview"
 _NOUS_MODEL = "google/gemini-3-flash-preview"
-_NOUS_FREE_TIER_VISION_MODEL = "xiaomi/mimo-v2-omni"
-_NOUS_FREE_TIER_AUX_MODEL = "xiaomi/mimo-v2-pro"
 _NOUS_DEFAULT_BASE_URL = "https://inference-api.nousresearch.com/v1"
 _ANTHROPIC_DEFAULT_BASE_URL = "https://api.anthropic.com"
 _AUTH_JSON_PATH = get_hermes_home() / "auth.json"
@@ -577,7 +573,8 @@ class _AnthropicCompletionsAdapter:
        self._is_oauth = is_oauth

    def create(self, **kwargs) -> Any:
-        from agent.anthropic_adapter import build_anthropic_kwargs, normalize_anthropic_response
+        from agent.anthropic_adapter import build_anthropic_kwargs
+        from agent.transports import get_transport

        messages = kwargs.get("messages", [])
        model = kwargs.get("model", self._model)
@@ -614,7 +611,19 @@ class _AnthropicCompletionsAdapter:
                anthropic_kwargs["temperature"] = temperature

        response = self._client.messages.create(**anthropic_kwargs)
-        assistant_message, finish_reason = normalize_anthropic_response(response)
+        _transport = get_transport("anthropic_messages")
+        _nr = _transport.normalize_response(
+            response, strip_tool_prefix=self._is_oauth
+        )
+
+        # ToolCall already duck-types as OpenAI shape (.type, .function.name,
+        # .function.arguments) via properties, so no wrapping needed.
+        assistant_message = SimpleNamespace(
+            content=_nr.content,
+            tool_calls=_nr.tool_calls,
+            reasoning=_nr.reasoning,
+        )
+        finish_reason = _nr.finish_reason

        usage = None
        if hasattr(response, "usage") and response.usage:
@@ -731,6 +740,33 @@ def _nous_base_url() -> str:
    return os.getenv("NOUS_INFERENCE_BASE_URL", _NOUS_DEFAULT_BASE_URL)


+def _resolve_nous_runtime_api(*, force_refresh: bool = False) -> Optional[tuple[str, str]]:
+    """Return fresh Nous runtime credentials when available.
+
+    This mirrors the main agent's 401 recovery path and keeps auxiliary
+    clients aligned with the singleton auth store + mint flow instead of
+    relying only on whatever raw tokens happen to be sitting in auth.json
+    or the credential pool.
+    """
+    try:
+        from hermes_cli.auth import resolve_nous_runtime_credentials
+
+        creds = resolve_nous_runtime_credentials(
+            min_key_ttl_seconds=max(60, int(os.getenv("HERMES_NOUS_MIN_KEY_TTL_SECONDS", "1800"))),
+            timeout_seconds=float(os.getenv("HERMES_NOUS_TIMEOUT_SECONDS", "15")),
+            force_mint=force_refresh,
+        )
+    except Exception as exc:
+        logger.debug("Auxiliary Nous runtime credential resolution failed: %s", exc)
+        return None
+
+    api_key = str(creds.get("api_key") or "").strip()
+    base_url = str(creds.get("base_url") or "").strip().rstrip("/")
+    if not api_key or not base_url:
+        return None
+    return api_key, base_url
+
+
 def _read_codex_access_token() -> Optional[str]:
    """Read a valid, non-expired Codex OAuth access token from Hermes auth store.

@@ -814,10 +850,15 @@ def _resolve_api_key_provider() -> Tuple[Optional[OpenAI], Optional[str]]:
            if model is None:
                continue  # skip provider if we don't know a valid aux model
            logger.debug("Auxiliary text client: %s (%s) via pool", pconfig.name, model)
+            if provider_id == "gemini":
+                from agent.gemini_native_adapter import GeminiNativeClient, is_native_gemini_base_url
+
+                if is_native_gemini_base_url(base_url):
+                    return GeminiNativeClient(api_key=api_key, base_url=base_url), model
            extra = {}
-            if "api.kimi.com" in base_url.lower():
-                extra["default_headers"] = {"User-Agent": "KimiCLI/1.30.0"}
-            elif "api.githubcopilot.com" in base_url.lower():
+            if base_url_host_matches(base_url, "api.kimi.com"):
+                extra["default_headers"] = {"User-Agent": "claude-code/0.1.0"}
+            elif base_url_host_matches(base_url, "api.githubcopilot.com"):
                from hermes_cli.models import copilot_default_headers

                extra["default_headers"] = copilot_default_headers()
@@ -835,10 +876,15 @@ def _resolve_api_key_provider() -> Tuple[Optional[OpenAI], Optional[str]]:
        if model is None:
            continue  # skip provider if we don't know a valid aux model
        logger.debug("Auxiliary text client: %s (%s)", pconfig.name, model)
+        if provider_id == "gemini":
+            from agent.gemini_native_adapter import GeminiNativeClient, is_native_gemini_base_url
+
+            if is_native_gemini_base_url(base_url):
+                return GeminiNativeClient(api_key=api_key, base_url=base_url), model
        extra = {}
-        if "api.kimi.com" in base_url.lower():
-            extra["default_headers"] = {"User-Agent": "KimiCLI/1.30.0"}
-        elif "api.githubcopilot.com" in base_url.lower():
+        if base_url_host_matches(base_url, "api.kimi.com"):
+            extra["default_headers"] = {"User-Agent": "claude-code/0.1.0"}
+        elif base_url_host_matches(base_url, "api.githubcopilot.com"):
            from hermes_cli.models import copilot_default_headers

            extra["default_headers"] = copilot_default_headers()
@@ -870,6 +916,19 @@ def _try_openrouter() -> Tuple[Optional[OpenAI], Optional[str]]:
                   default_headers=_OR_HEADERS), _OPENROUTER_MODEL


+def _describe_openrouter_unavailable() -> str:
+    """Return a more precise OpenRouter auth failure reason for logs."""
+    pool_present, entry = _select_pool_entry("openrouter")
+    if pool_present:
+        if entry is None:
+            return "OpenRouter credential pool has no usable entries (credentials may be exhausted)"
+        if not _pool_runtime_api_key(entry):
+            return "OpenRouter credential pool entry is missing a runtime API key"
+    if not str(os.getenv("OPENROUTER_API_KEY") or "").strip():
+        return "OPENROUTER_API_KEY not set"
+    return "no usable OpenRouter credentials found"
+
+
 def _try_nous(vision: bool = False) -> Tuple[Optional[OpenAI], Optional[str]]:
    # Check cross-session rate limit guard before attempting Nous —
    # if another session already recorded a 429, skip Nous entirely
@@ -887,29 +946,50 @@ def _try_nous(vision: bool = False) -> Tuple[Optional[OpenAI], Optional[str]]:
        pass

    nous = _read_nous_auth()
-    if not nous:
+    runtime = _resolve_nous_runtime_api(force_refresh=False)
+    if runtime is None and not nous:
        return None, None
    global auxiliary_is_nous
    auxiliary_is_nous = True
    logger.debug("Auxiliary client: Nous Portal")
-    if nous.get("source") == "pool":
-        model = "gemini-3-flash"
-    else:
-        model = _NOUS_MODEL
-    # Free-tier users can't use paid auxiliary models — use the free
-    # models instead: mimo-v2-omni for vision, mimo-v2-pro for text tasks.
+
+    # Ask the Portal which model it currently recommends for this task type.
+    # The /api/nous/recommended-models endpoint is the authoritative source:
+    # it distinguishes paid vs free tier recommendations, and get_nous_recommended_aux_model
+    # auto-detects the caller's tier via check_nous_free_tier().  Fall back to
+    # _NOUS_MODEL (google/gemini-3-flash-preview) when the Portal is unreachable
+    # or returns a null recommendation for this task type.
+    model = _NOUS_MODEL
    try:
-        from hermes_cli.models import check_nous_free_tier
-        if check_nous_free_tier():
-            model = _NOUS_FREE_TIER_VISION_MODEL if vision else _NOUS_FREE_TIER_AUX_MODEL
-            logger.debug("Free-tier Nous account — using %s for auxiliary/%s",
-                         model, "vision" if vision else "text")
-    except Exception:
-        pass
+        from hermes_cli.models import get_nous_recommended_aux_model
+        recommended = get_nous_recommended_aux_model(vision=vision)
+        if recommended:
+            model = recommended
+            logger.debug(
+                "Auxiliary/%s: using Portal-recommended model %s",
+                "vision" if vision else "text", model,
+            )
+        else:
+            logger.debug(
+                "Auxiliary/%s: no Portal recommendation, falling back to %s",
+                "vision" if vision else "text", model,
+            )
+    except Exception as exc:
+        logger.debug(
+            "Auxiliary/%s: recommended-models lookup failed (%s); "
+            "falling back to %s",
+            "vision" if vision else "text", exc, model,
+        )
+
+    if runtime is not None:
+        api_key, base_url = runtime
+    else:
+        api_key = _nous_api_key(nous or {})
+        base_url = str((nous or {}).get("inference_base_url") or _nous_base_url()).rstrip("/")
    return (
        OpenAI(
-            api_key=_nous_api_key(nous),
-            base_url=str(nous.get("inference_base_url") or _nous_base_url()).rstrip("/"),
+            api_key=api_key,
+            base_url=base_url,
        ),
        model,
    )
@@ -987,7 +1067,7 @@ def _resolve_custom_runtime() -> Tuple[Optional[str], Optional[str], Optional[st
        return None, None, None

    custom_base = custom_base.strip().rstrip("/")
-    if "openrouter.ai" in custom_base.lower():
+    if base_url_host_matches(custom_base, "openrouter.ai"):
        # requested='custom' falls back to OpenRouter when no custom endpoint is
        # configured. Treat that as "no custom endpoint" for auxiliary routing.
        return None, None, None
@@ -1021,6 +1101,8 @@ def _validate_proxy_env_urls() -> None:
    """
    from urllib.parse import urlparse

+    normalize_proxy_env_vars()
+
    for key in ("HTTPS_PROXY", "HTTP_PROXY", "ALL_PROXY",
                "https_proxy", "http_proxy", "all_proxy"):
        value = str(os.environ.get(key) or "").strip()
@@ -1055,7 +1137,7 @@ def _validate_base_url(base_url: str) -> None:
        ) from exc


-def _try_custom_endpoint() -> Tuple[Optional[OpenAI], Optional[str]]:
+def _try_custom_endpoint() -> Tuple[Optional[Any], Optional[str]]:
    runtime = _resolve_custom_runtime()
    if len(runtime) == 2:
        custom_base, custom_key = runtime
@@ -1071,6 +1153,23 @@ def _try_custom_endpoint() -> Tuple[Optional[OpenAI], Optional[str]]:
    if custom_mode == "codex_responses":
        real_client = OpenAI(api_key=custom_key, base_url=custom_base)
        return CodexAuxiliaryClient(real_client, model), model
+    if custom_mode == "anthropic_messages":
+        # Third-party Anthropic-compatible gateway (MiniMax, Zhipu GLM,
+        # LiteLLM proxies, etc.).  Must NEVER be treated as OAuth —
+        # Anthropic OAuth claims only apply to api.anthropic.com.
+        try:
+            from agent.anthropic_adapter import build_anthropic_client
+            real_client = build_anthropic_client(custom_key, custom_base)
+        except ImportError:
+            logger.warning(
+                "Custom endpoint declares api_mode=anthropic_messages but the "
+                "anthropic SDK is not installed — falling back to OpenAI-wire."
+            )
+            return OpenAI(api_key=custom_key, base_url=custom_base), model
+        return (
+            AnthropicAuxiliaryClient(real_client, model, custom_key, custom_base, is_oauth=False),
+            model,
+        )
    return OpenAI(api_key=custom_key, base_url=custom_base), model


@@ -1234,6 +1333,15 @@ def _is_connection_error(exc: Exception) -> bool:
    return False


+def _is_auth_error(exc: Exception) -> bool:
+    """Detect auth failures that should trigger provider-specific refresh."""
+    status = getattr(exc, "status_code", None)
+    if status == 401:
+        return True
+    err_lower = str(exc).lower()
+    return "error code: 401" in err_lower or "authenticationerror" in type(exc).__name__.lower()
+
+
 def _try_payment_fallback(
    failed_provider: str,
    task: str = None,
@@ -1391,6 +1499,13 @@ def _to_async_client(sync_client, model: str):
        return AsyncCodexAuxiliaryClient(sync_client), model
    if isinstance(sync_client, AnthropicAuxiliaryClient):
        return AsyncAnthropicAuxiliaryClient(sync_client), model
+    try:
+        from agent.gemini_native_adapter import GeminiNativeClient, AsyncGeminiNativeClient
+
+        if isinstance(sync_client, GeminiNativeClient):
+            return AsyncGeminiNativeClient(sync_client), model
+    except ImportError:
+        pass
    try:
        from agent.copilot_acp_client import CopilotACPClient
        if isinstance(sync_client, CopilotACPClient):
@@ -1402,15 +1517,15 @@ def _to_async_client(sync_client, model: str):
        "api_key": sync_client.api_key,
        "base_url": str(sync_client.base_url),
    }
-    base_lower = str(sync_client.base_url).lower()
-    if "openrouter" in base_lower:
+    sync_base_url = str(sync_client.base_url)
+    if base_url_host_matches(sync_base_url, "openrouter.ai"):
        async_kwargs["default_headers"] = dict(_OR_HEADERS)
-    elif "api.githubcopilot.com" in base_lower:
+    elif base_url_host_matches(sync_base_url, "api.githubcopilot.com"):
        from hermes_cli.models import copilot_default_headers

        async_kwargs["default_headers"] = copilot_default_headers()
-    elif "api.kimi.com" in base_lower:
-        async_kwargs["default_headers"] = {"User-Agent": "KimiCLI/1.30.0"}
+    elif base_url_host_matches(sync_base_url, "api.kimi.com"):
+        async_kwargs["default_headers"] = {"User-Agent": "claude-code/0.1.0"}
    return AsyncOpenAI(**async_kwargs), model


@@ -1486,8 +1601,7 @@ def resolve_provider_client(
        # Auto-detect: api.openai.com + codex model name pattern
        if api_mode and api_mode != "codex_responses":
            return False  # explicit non-codex mode
-        normalized_base = (base_url_str or "").strip().lower()
-        if "api.openai.com" in normalized_base and "openrouter" not in normalized_base:
+        if base_url_hostname(base_url_str) == "api.openai.com":
            model_lower = (model_str or "").lower()
            if "codex" in model_lower:
                return True
@@ -1526,8 +1640,10 @@ def resolve_provider_client(
    if provider == "openrouter":
        client, default = _try_openrouter()
        if client is None:
-            logger.warning("resolve_provider_client: openrouter requested "
-                           "but OPENROUTER_API_KEY not set")
+            logger.warning(
+                "resolve_provider_client: openrouter requested but %s",
+                _describe_openrouter_unavailable(),
+            )
            return None, None
        final_model = _normalize_resolved_model(model or default, provider)
        return (_to_async_client(client, final_model) if async_mode
@@ -1535,7 +1651,13 @@ def resolve_provider_client(

    # ── Nous Portal (OAuth) ──────────────────────────────────────────
    if provider == "nous":
-        client, default = _try_nous()
+        # Detect vision tasks: either explicit model override from
+        # _PROVIDER_VISION_MODELS, or caller passed a known vision model.
+        _is_vision = (
+            model in _PROVIDER_VISION_MODELS.values()
+            or (model or "").strip().lower() == "mimo-v2-omni"
+        )
+        client, default = _try_nous(vision=_is_vision)
        if client is None:
            logger.warning("resolve_provider_client: nous requested "
                           "but Nous Portal not configured (run: hermes auth)")
@@ -1591,9 +1713,9 @@ def resolve_provider_client(
                provider,
            )
            extra = {}
-            if "api.kimi.com" in custom_base.lower():
-                extra["default_headers"] = {"User-Agent": "KimiCLI/1.30.0"}
-            elif "api.githubcopilot.com" in custom_base.lower():
+            if base_url_host_matches(custom_base, "api.kimi.com"):
+                extra["default_headers"] = {"User-Agent": "claude-code/0.1.0"}
+            elif base_url_host_matches(custom_base, "api.githubcopilot.com"):
                from hermes_cli.models import copilot_default_headers
                extra["default_headers"] = copilot_default_headers()
            client = OpenAI(api_key=custom_key, base_url=custom_base, **extra)
@@ -1687,11 +1809,20 @@ def resolve_provider_client(
        default_model = _API_KEY_PROVIDER_AUX_MODELS.get(provider, "")
        final_model = _normalize_resolved_model(model or default_model, provider)

+        if provider == "gemini":
+            from agent.gemini_native_adapter import GeminiNativeClient, is_native_gemini_base_url
+
+            if is_native_gemini_base_url(base_url):
+                client = GeminiNativeClient(api_key=api_key, base_url=base_url)
+                logger.debug("resolve_provider_client: %s (%s)", provider, final_model)
+                return (_to_async_client(client, final_model) if async_mode
+                        else (client, final_model))
+
        # Provider-specific headers
        headers = {}
-        if "api.kimi.com" in base_url.lower():
-            headers["User-Agent"] = "KimiCLI/1.30.0"
-        elif "api.githubcopilot.com" in base_url.lower():
+        if base_url_host_matches(base_url, "api.kimi.com"):
+            headers["User-Agent"] = "claude-code/0.1.0"
+        elif base_url_host_matches(base_url, "api.githubcopilot.com"):
            from hermes_cli.models import copilot_default_headers

            headers.update(copilot_default_headers())
@@ -1922,24 +2053,35 @@ def resolve_vision_provider_client(
        #      _PROVIDER_VISION_MODELS provides per-provider vision model
        #      overrides when the provider has a dedicated multimodal model
        #      that differs from the chat model (e.g. xiaomi → mimo-v2-omni,
-        #      zai → glm-5v-turbo).
+        #      zai → glm-5v-turbo). Nous is the exception: it has a dedicated
+        #      strict vision backend with tier-aware defaults, so it must not
+        #      fall through to the user's text chat model here.
        #   2. OpenRouter  (vision-capable aggregator fallback)
        #   3. Nous Portal (vision-capable aggregator fallback)
        #   4. Stop
        main_provider = _read_main_provider()
        main_model = _read_main_model()
        if main_provider and main_provider not in ("auto", ""):
-            vision_model = _PROVIDER_VISION_MODELS.get(main_provider, main_model)
-            rpc_client, rpc_model = resolve_provider_client(
-                main_provider, vision_model,
-                api_mode=resolved_api_mode)
-            if rpc_client is not None:
-                logger.info(
-                    "Vision auto-detect: using main provider %s (%s)",
-                    main_provider, rpc_model or vision_model,
-                )
-                return _finalize(
-                    main_provider, rpc_client, rpc_model or vision_model)
+            if main_provider == "nous":
+                sync_client, default_model = _resolve_strict_vision_backend(main_provider)
+                if sync_client is not None:
+                    logger.info(
+                        "Vision auto-detect: using main provider %s (%s)",
+                        main_provider, default_model or resolved_model or main_model,
+                    )
+                    return _finalize(main_provider, sync_client, default_model)
+            else:
+                vision_model = _PROVIDER_VISION_MODELS.get(main_provider, main_model)
+                rpc_client, rpc_model = resolve_provider_client(
+                    main_provider, vision_model,
+                    api_mode=resolved_api_mode)
+                if rpc_client is not None:
+                    logger.info(
+                        "Vision auto-detect: using main provider %s (%s)",
+                        main_provider, rpc_model or vision_model,
+                    )
+                    return _finalize(
+                        main_provider, rpc_client, rpc_model or vision_model)

        # Fall back through aggregators (uses their dedicated vision model,
        # not the user's main model) when main provider has no client.
@@ -1986,7 +2128,7 @@ def auxiliary_max_tokens_param(value: int) -> dict:
    # Only use max_completion_tokens for direct OpenAI custom endpoints
    if (not or_key
            and _read_nous_auth() is None
-            and "api.openai.com" in custom_base.lower()):
+            and base_url_hostname(custom_base) == "api.openai.com"):
        return {"max_completion_tokens": value}
    return {"max_tokens": value}

@@ -2014,6 +2156,76 @@ _client_cache_lock = threading.Lock()
 _CLIENT_CACHE_MAX_SIZE = 64  # safety belt — evict oldest when exceeded


+def _client_cache_key(
+    provider: str,
+    *,
+    async_mode: bool,
+    base_url: Optional[str] = None,
+    api_key: Optional[str] = None,
+    api_mode: Optional[str] = None,
+    main_runtime: Optional[Dict[str, Any]] = None,
+) -> tuple:
+    runtime = _normalize_main_runtime(main_runtime)
+    runtime_key = tuple(runtime.get(field, "") for field in _MAIN_RUNTIME_FIELDS) if provider == "auto" else ()
+    return (provider, async_mode, base_url or "", api_key or "", api_mode or "", runtime_key)
+
+
+def _store_cached_client(cache_key: tuple, client: Any, default_model: Optional[str], *, bound_loop: Any = None) -> None:
+    with _client_cache_lock:
+        old_entry = _client_cache.get(cache_key)
+        if old_entry is not None and old_entry[0] is not client:
+            _force_close_async_httpx(old_entry[0])
+            try:
+                close_fn = getattr(old_entry[0], "close", None)
+                if callable(close_fn):
+                    close_fn()
+            except Exception:
+                pass
+        _client_cache[cache_key] = (client, default_model, bound_loop)
+
+
+def _refresh_nous_auxiliary_client(
+    *,
+    cache_provider: str,
+    model: Optional[str],
+    async_mode: bool,
+    base_url: Optional[str] = None,
+    api_key: Optional[str] = None,
+    api_mode: Optional[str] = None,
+    main_runtime: Optional[Dict[str, Any]] = None,
+) -> Tuple[Optional[Any], Optional[str]]:
+    """Refresh Nous runtime creds, rebuild the client, and replace the cache entry."""
+    runtime = _resolve_nous_runtime_api(force_refresh=True)
+    if runtime is None:
+        return None, model
+
+    fresh_key, fresh_base_url = runtime
+    sync_client = OpenAI(api_key=fresh_key, base_url=fresh_base_url)
+    final_model = model
+
+    current_loop = None
+    if async_mode:
+        try:
+            import asyncio as _aio
+            current_loop = _aio.get_event_loop()
+        except RuntimeError:
+            pass
+        client, final_model = _to_async_client(sync_client, final_model or "")
+    else:
+        client = sync_client
+
+    cache_key = _client_cache_key(
+        cache_provider,
+        async_mode=async_mode,
+        base_url=base_url,
+        api_key=api_key,
+        api_mode=api_mode,
+        main_runtime=main_runtime,
+    )
+    _store_cached_client(cache_key, client, final_model, bound_loop=current_loop)
+    return client, final_model
+
+
 def neuter_async_httpx_del() -> None:
    """Monkey-patch ``AsyncHttpxClientWrapper.__del__`` to be a no-op.

@@ -2115,7 +2327,7 @@ def cleanup_stale_async_clients() -> None:

 def _is_openrouter_client(client: Any) -> bool:
    for obj in (client, getattr(client, "_client", None), getattr(client, "client", None)):
-        if obj and "openrouter" in str(getattr(obj, "base_url", "") or "").lower():
+        if obj and base_url_host_matches(str(getattr(obj, "base_url", "") or ""), "openrouter.ai"):
            return True
    return False

@@ -2167,8 +2379,14 @@ def _get_cached_client(
        except RuntimeError:
            pass
    runtime = _normalize_main_runtime(main_runtime)
-    runtime_key = tuple(runtime.get(field, "") for field in _MAIN_RUNTIME_FIELDS) if provider == "auto" else ()
-    cache_key = (provider, async_mode, base_url or "", api_key or "", api_mode or "", runtime_key)
+    cache_key = _client_cache_key(
+        provider,
+        async_mode=async_mode,
+        base_url=base_url,
+        api_key=api_key,
+        api_mode=api_mode,
+        main_runtime=main_runtime,
+    )
    with _client_cache_lock:
        if cache_key in _client_cache:
            cached_client, cached_default, cached_loop = _client_cache[cache_key]
@@ -2237,7 +2455,6 @@ def _resolve_task_provider_model(
    to "custom" and the task uses that direct endpoint. api_mode is one of
    "chat_completions", "codex_responses", or None (auto-detect).
    """
-    config = {}
    cfg_provider = None
    cfg_model = None
    cfg_base_url = None
@@ -2245,16 +2462,7 @@ def _resolve_task_provider_model(
    cfg_api_mode = None

    if task:
-        try:
-            from hermes_cli.config import load_config
-            config = load_config()
-        except ImportError:
-            config = {}
-
-        aux = config.get("auxiliary", {}) if isinstance(config, dict) else {}
-        task_config = aux.get(task, {}) if isinstance(aux, dict) else {}
-        if not isinstance(task_config, dict):
-            task_config = {}
+        task_config = _get_auxiliary_task_config(task)
        cfg_provider = str(task_config.get("provider", "")).strip() or None
        cfg_model = str(task_config.get("model", "")).strip() or None
        cfg_base_url = str(task_config.get("base_url", "")).strip() or None
@@ -2284,17 +2492,25 @@ def _resolve_task_provider_model(
 _DEFAULT_AUX_TIMEOUT = 30.0


-def _get_task_timeout(task: str, default: float = _DEFAULT_AUX_TIMEOUT) -> float:
-    """Read timeout from auxiliary.{task}.timeout in config, falling back to *default*."""
+def _get_auxiliary_task_config(task: str) -> Dict[str, Any]:
+    """Return the config dict for auxiliary.<task>, or {} when unavailable."""
    if not task:
-        return default
+        return {}
    try:
        from hermes_cli.config import load_config
        config = load_config()
    except ImportError:
-        return default
+        return {}
    aux = config.get("auxiliary", {}) if isinstance(config, dict) else {}
    task_config = aux.get(task, {}) if isinstance(aux, dict) else {}
+    return task_config if isinstance(task_config, dict) else {}
+
+
+def _get_task_timeout(task: str, default: float = _DEFAULT_AUX_TIMEOUT) -> float:
+    """Read timeout from auxiliary.{task}.timeout in config, falling back to *default*."""
+    if not task:
+        return default
+    task_config = _get_auxiliary_task_config(task)
    raw = task_config.get("timeout")
    if raw is not None:
        try:
@@ -2304,6 +2520,15 @@ def _get_task_timeout(task: str, default: float = _DEFAULT_AUX_TIMEOUT) -> float
    return default


+def _get_task_extra_body(task: str) -> Dict[str, Any]:
+    """Read auxiliary.<task>.extra_body and return a shallow copy when valid."""
+    task_config = _get_auxiliary_task_config(task)
+    raw = task_config.get("extra_body")
+    if isinstance(raw, dict):
+        return dict(raw)
+    return {}
+
+
 # ---------------------------------------------------------------------------
 # Anthropic-compatible endpoint detection + image block conversion
 # ---------------------------------------------------------------------------
@@ -2391,8 +2616,10 @@ def _build_call_kwargs(
        "timeout": timeout,
    }

-    fixed_temperature = _fixed_temperature_for_model(model)
-    if fixed_temperature is not None:
+    fixed_temperature = _fixed_temperature_for_model(model, base_url)
+    if fixed_temperature is OMIT_TEMPERATURE:
+        temperature = None  # strip — let server choose
+    elif fixed_temperature is not None:
        temperature = fixed_temperature

    # Opus 4.7+ rejects any non-default temperature/top_p/top_k — silently
@@ -2412,7 +2639,7 @@ def _build_call_kwargs(
        # Direct OpenAI api.openai.com with newer models needs max_completion_tokens.
        if provider == "custom":
            custom_base = base_url or _current_custom_base_url()
-            if "api.openai.com" in custom_base.lower():
+            if base_url_hostname(custom_base) == "api.openai.com":
                kwargs["max_completion_tokens"] = max_tokens
            else:
                kwargs["max_tokens"] = max_tokens
@@ -2504,6 +2731,8 @@ def call_llm(
    """
    resolved_provider, resolved_model, resolved_base_url, resolved_api_key, resolved_api_mode = _resolve_task_provider_model(
        task, provider, model, base_url, api_key)
+    effective_extra_body = _get_task_extra_body(task)
+    effective_extra_body.update(extra_body or {})

    if task == "vision":
        effective_provider, client, final_model = resolve_vision_provider_client(
@@ -2572,11 +2801,14 @@ def call_llm(
                     task, resolved_provider or "auto", final_model or "default",
                     f" at {_base_info}" if _base_info and "openrouter" not in _base_info else "")

+    # Pass the client's actual base_url (not just resolved_base_url) so
+    # endpoint-specific temperature overrides can distinguish
+    # api.moonshot.ai vs api.kimi.com/coding even on auto-detected routes.
    kwargs = _build_call_kwargs(
        resolved_provider, final_model, messages,
        temperature=temperature, max_tokens=max_tokens,
-        tools=tools, timeout=effective_timeout, extra_body=extra_body,
-        base_url=resolved_base_url)
+        tools=tools, timeout=effective_timeout, extra_body=effective_extra_body,
+        base_url=_base_info or resolved_base_url)

    # Convert image blocks for Anthropic-compatible endpoints (e.g. MiniMax)
    _client_base = str(getattr(client, "base_url", "") or "")
@@ -2602,6 +2834,29 @@ def call_llm(
                    raise
                first_err = retry_err

+        # ── Nous auth refresh parity with main agent ──────────────────
+        client_is_nous = (
+            resolved_provider == "nous"
+            or base_url_host_matches(_base_info, "inference-api.nousresearch.com")
+        )
+        if _is_auth_error(first_err) and client_is_nous:
+            refreshed_client, refreshed_model = _refresh_nous_auxiliary_client(
+                cache_provider=resolved_provider or "nous",
+                model=final_model,
+                async_mode=False,
+                base_url=resolved_base_url,
+                api_key=resolved_api_key,
+                api_mode=resolved_api_mode,
+                main_runtime=main_runtime,
+            )
+            if refreshed_client is not None:
+                logger.info("Auxiliary %s: refreshed Nous runtime credentials after 401, retrying",
+                            task or "call")
+                if refreshed_model and refreshed_model != kwargs.get("model"):
+                    kwargs["model"] = refreshed_model
+                return _validate_llm_response(
+                    refreshed_client.chat.completions.create(**kwargs), task)
+
        # ── Payment / credit exhaustion fallback ──────────────────────
        # When the resolved provider returns 402 or a credit-related error,
        # try alternative providers instead of giving up.  This handles the
@@ -2630,7 +2885,8 @@ def call_llm(
                    fb_label, fb_model, messages,
                    temperature=temperature, max_tokens=max_tokens,
                    tools=tools, timeout=effective_timeout,
-                    extra_body=extra_body)
+                    extra_body=effective_extra_body,
+                    base_url=str(getattr(fb_client, "base_url", "") or ""))
                return _validate_llm_response(
                    fb_client.chat.completions.create(**fb_kwargs), task)
        raise
@@ -2712,6 +2968,8 @@ async def async_call_llm(
    """
    resolved_provider, resolved_model, resolved_base_url, resolved_api_key, resolved_api_mode = _resolve_task_provider_model(
        task, provider, model, base_url, api_key)
+    effective_extra_body = _get_task_extra_body(task)
+    effective_extra_body.update(extra_body or {})

    if task == "vision":
        effective_provider, client, final_model = resolve_vision_provider_client(
@@ -2765,14 +3023,17 @@ async def async_call_llm(

    effective_timeout = timeout if timeout is not None else _get_task_timeout(task)

+    # Pass the client's actual base_url (not just resolved_base_url) so
+    # endpoint-specific temperature overrides can distinguish
+    # api.moonshot.ai vs api.kimi.com/coding even on auto-detected routes.
+    _client_base = str(getattr(client, "base_url", "") or "")
    kwargs = _build_call_kwargs(
        resolved_provider, final_model, messages,
        temperature=temperature, max_tokens=max_tokens,
-        tools=tools, timeout=effective_timeout, extra_body=extra_body,
-        base_url=resolved_base_url)
+        tools=tools, timeout=effective_timeout, extra_body=effective_extra_body,
+        base_url=_client_base or resolved_base_url)

    # Convert image blocks for Anthropic-compatible endpoints (e.g. MiniMax)
-    _client_base = str(getattr(client, "base_url", "") or "")
    if _is_anthropic_compat_endpoint(resolved_provider, _client_base):
        kwargs["messages"] = _convert_openai_images_to_anthropic(kwargs["messages"])

@@ -2794,6 +3055,28 @@ async def async_call_llm(
                    raise
                first_err = retry_err

+        # ── Nous auth refresh parity with main agent ──────────────────
+        client_is_nous = (
+            resolved_provider == "nous"
+            or base_url_host_matches(_client_base, "inference-api.nousresearch.com")
+        )
+        if _is_auth_error(first_err) and client_is_nous:
+            refreshed_client, refreshed_model = _refresh_nous_auxiliary_client(
+                cache_provider=resolved_provider or "nous",
+                model=final_model,
+                async_mode=True,
+                base_url=resolved_base_url,
+                api_key=resolved_api_key,
+                api_mode=resolved_api_mode,
+            )
+            if refreshed_client is not None:
+                logger.info("Auxiliary %s (async): refreshed Nous runtime credentials after 401, retrying",
+                            task or "call")
+                if refreshed_model and refreshed_model != kwargs.get("model"):
+                    kwargs["model"] = refreshed_model
+                return _validate_llm_response(
+                    await refreshed_client.chat.completions.create(**kwargs), task)
+
        # ── Payment / connection fallback (mirrors sync call_llm) ─────
        should_fallback = _is_payment_error(first_err) or _is_connection_error(first_err)
        is_auto = resolved_provider in ("auto", "", None)
@@ -2808,7 +3091,8 @@ async def async_call_llm(
                    fb_label, fb_model, messages,
                    temperature=temperature, max_tokens=max_tokens,
                    tools=tools, timeout=effective_timeout,
-                    extra_body=extra_body)
+                    extra_body=effective_extra_body,
+                    base_url=str(getattr(fb_client, "base_url", "") or ""))
                # Convert sync fallback client to async
                async_fb, async_fb_model = _to_async_client(fb_client, fb_model or "")
                if async_fb_model and async_fb_model != fb_kwargs.get("model"):
@@ -0,0 +1,813 @@
+"""Codex Responses API adapter.
+
+Pure format-conversion and normalization logic for the OpenAI Responses API
+(used by OpenAI Codex, xAI, GitHub Models, and other Responses-compatible endpoints).
+
+Extracted from run_agent.py to isolate Responses API-specific logic from the
+core agent loop. All functions are stateless — they operate on the data passed
+in and return transformed results.
+"""
+
+from __future__ import annotations
+
+import hashlib
+import json
+import logging
+import re
+import uuid
+from types import SimpleNamespace
+from typing import Any, Dict, List, Optional
+
+from agent.prompt_builder import DEFAULT_AGENT_IDENTITY
+
+logger = logging.getLogger(__name__)
+
+
+# ---------------------------------------------------------------------------
+# Multimodal content helpers
+# ---------------------------------------------------------------------------
+
+def _chat_content_to_responses_parts(content: Any) -> List[Dict[str, Any]]:
+    """Convert chat-style multimodal content to Responses API input parts.
+
+    Input:  ``[{"type":"text"|"image_url", ...}]`` (native OpenAI Chat format)
+    Output: ``[{"type":"input_text"|"input_image", ...}]`` (Responses format)
+
+    Returns an empty list when ``content`` is not a list or contains no
+    recognized parts — callers fall back to the string path.
+    """
+    if not isinstance(content, list):
+        return []
+    converted: List[Dict[str, Any]] = []
+    for part in content:
+        if isinstance(part, str):
+            if part:
+                converted.append({"type": "input_text", "text": part})
+            continue
+        if not isinstance(part, dict):
+            continue
+        ptype = str(part.get("type") or "").strip().lower()
+        if ptype in {"text", "input_text", "output_text"}:
+            text = part.get("text")
+            if isinstance(text, str) and text:
+                converted.append({"type": "input_text", "text": text})
+            continue
+        if ptype in {"image_url", "input_image"}:
+            image_ref = part.get("image_url")
+            detail = part.get("detail")
+            if isinstance(image_ref, dict):
+                url = image_ref.get("url")
+                detail = image_ref.get("detail", detail)
+            else:
+                url = image_ref
+            if not isinstance(url, str) or not url:
+                continue
+            image_part: Dict[str, Any] = {"type": "input_image", "image_url": url}
+            if isinstance(detail, str) and detail.strip():
+                image_part["detail"] = detail.strip()
+            converted.append(image_part)
+    return converted
+
+
+def _summarize_user_message_for_log(content: Any) -> str:
+    """Return a short text summary of a user message for logging/trajectory.
+
+    Multimodal messages arrive as a list of ``{type:"text"|"image_url", ...}``
+    parts from the API server.  Logging, spinner previews, and trajectory
+    files all want a plain string — this helper extracts the first chunk of
+    text and notes any attached images.  Returns an empty string for empty
+    lists and ``str(content)`` for unexpected scalar types.
+    """
+    if content is None:
+        return ""
+    if isinstance(content, str):
+        return content
+    if isinstance(content, list):
+        text_bits: List[str] = []
+        image_count = 0
+        for part in content:
+            if isinstance(part, str):
+                if part:
+                    text_bits.append(part)
+                continue
+            if not isinstance(part, dict):
+                continue
+            ptype = str(part.get("type") or "").strip().lower()
+            if ptype in {"text", "input_text", "output_text"}:
+                text = part.get("text")
+                if isinstance(text, str) and text:
+                    text_bits.append(text)
+            elif ptype in {"image_url", "input_image"}:
+                image_count += 1
+        summary = " ".join(text_bits).strip()
+        if image_count:
+            note = f"[{image_count} image{'s' if image_count != 1 else ''}]"
+            summary = f"{note} {summary}" if summary else note
+        return summary
+    try:
+        return str(content)
+    except Exception:
+        return ""
+
+
+# ---------------------------------------------------------------------------
+# ID helpers
+# ---------------------------------------------------------------------------
+
+def _deterministic_call_id(fn_name: str, arguments: str, index: int = 0) -> str:
+    """Generate a deterministic call_id from tool call content.
+
+    Used as a fallback when the API doesn't provide a call_id.
+    Deterministic IDs prevent cache invalidation — random UUIDs would
+    make every API call's prefix unique, breaking OpenAI's prompt cache.
+    """
+    seed = f"{fn_name}:{arguments}:{index}"
+    digest = hashlib.sha256(seed.encode("utf-8", errors="replace")).hexdigest()[:12]
+    return f"call_{digest}"
+
+
+def _split_responses_tool_id(raw_id: Any) -> tuple[Optional[str], Optional[str]]:
+    """Split a stored tool id into (call_id, response_item_id)."""
+    if not isinstance(raw_id, str):
+        return None, None
+    value = raw_id.strip()
+    if not value:
+        return None, None
+    if "|" in value:
+        call_id, response_item_id = value.split("|", 1)
+        call_id = call_id.strip() or None
+        response_item_id = response_item_id.strip() or None
+        return call_id, response_item_id
+    if value.startswith("fc_"):
+        return None, value
+    return value, None
+
+
+def _derive_responses_function_call_id(
+    call_id: str,
+    response_item_id: Optional[str] = None,
+) -> str:
+    """Build a valid Responses `function_call.id` (must start with `fc_`)."""
+    if isinstance(response_item_id, str):
+        candidate = response_item_id.strip()
+        if candidate.startswith("fc_"):
+            return candidate
+
+    source = (call_id or "").strip()
+    if source.startswith("fc_"):
+        return source
+    if source.startswith("call_") and len(source) > len("call_"):
+        return f"fc_{source[len('call_'):]}"
+
+    sanitized = re.sub(r"[^A-Za-z0-9_-]", "", source)
+    if sanitized.startswith("fc_"):
+        return sanitized
+    if sanitized.startswith("call_") and len(sanitized) > len("call_"):
+        return f"fc_{sanitized[len('call_'):]}"
+    if sanitized:
+        return f"fc_{sanitized[:48]}"
+
+    seed = source or str(response_item_id or "") or uuid.uuid4().hex
+    digest = hashlib.sha1(seed.encode("utf-8")).hexdigest()[:24]
+    return f"fc_{digest}"
+
+
+# ---------------------------------------------------------------------------
+# Schema conversion
+# ---------------------------------------------------------------------------
+
+def _responses_tools(tools: Optional[List[Dict[str, Any]]] = None) -> Optional[List[Dict[str, Any]]]:
+    """Convert chat-completions tool schemas to Responses function-tool schemas."""
+    if not tools:
+        return None
+
+    converted: List[Dict[str, Any]] = []
+    for item in tools:
+        fn = item.get("function", {}) if isinstance(item, dict) else {}
+        name = fn.get("name")
+        if not isinstance(name, str) or not name.strip():
+            continue
+        converted.append({
+            "type": "function",
+            "name": name,
+            "description": fn.get("description", ""),
+            "strict": False,
+            "parameters": fn.get("parameters", {"type": "object", "properties": {}}),
+        })
+    return converted or None
+
+
+# ---------------------------------------------------------------------------
+# Message format conversion
+# ---------------------------------------------------------------------------
+
+def _chat_messages_to_responses_input(messages: List[Dict[str, Any]]) -> List[Dict[str, Any]]:
+    """Convert internal chat-style messages to Responses input items."""
+    items: List[Dict[str, Any]] = []
+    seen_item_ids: set = set()
+
+    for msg in messages:
+        if not isinstance(msg, dict):
+            continue
+        role = msg.get("role")
+        if role == "system":
+            continue
+
+        if role in {"user", "assistant"}:
+            content = msg.get("content", "")
+            if isinstance(content, list):
+                content_parts = _chat_content_to_responses_parts(content)
+                content_text = "".join(
+                    p.get("text", "") for p in content_parts if p.get("type") == "input_text"
+                )
+            else:
+                content_parts = []
+                content_text = str(content) if content is not None else ""
+
+            if role == "assistant":
+                # Replay encrypted reasoning items from previous turns
+                # so the API can maintain coherent reasoning chains.
+                codex_reasoning = msg.get("codex_reasoning_items")
+                has_codex_reasoning = False
+                if isinstance(codex_reasoning, list):
+                    for ri in codex_reasoning:
+                        if isinstance(ri, dict) and ri.get("encrypted_content"):
+                            item_id = ri.get("id")
+                            if item_id and item_id in seen_item_ids:
+                                continue
+                            # Strip the "id" field — with store=False the
+                            # Responses API cannot look up items by ID and
+                            # returns 404.  The encrypted_content blob is
+                            # self-contained for reasoning chain continuity.
+                            replay_item = {k: v for k, v in ri.items() if k != "id"}
+                            items.append(replay_item)
+                            if item_id:
+                                seen_item_ids.add(item_id)
+                            has_codex_reasoning = True
+
+                if content_parts:
+                    items.append({"role": "assistant", "content": content_parts})
+                elif content_text.strip():
+                    items.append({"role": "assistant", "content": content_text})
+                elif has_codex_reasoning:
+                    # The Responses API requires a following item after each
+                    # reasoning item (otherwise: missing_following_item error).
+                    # When the assistant produced only reasoning with no visible
+                    # content, emit an empty assistant message as the required
+                    # following item.
+                    items.append({"role": "assistant", "content": ""})
+
+                tool_calls = msg.get("tool_calls")
+                if isinstance(tool_calls, list):
+                    for tc in tool_calls:
+                        if not isinstance(tc, dict):
+                            continue
+                        fn = tc.get("function", {})
+                        fn_name = fn.get("name")
+                        if not isinstance(fn_name, str) or not fn_name.strip():
+                            continue
+
+                        embedded_call_id, embedded_response_item_id = _split_responses_tool_id(
+                            tc.get("id")
+                        )
+                        call_id = tc.get("call_id")
+                        if not isinstance(call_id, str) or not call_id.strip():
+                            call_id = embedded_call_id
+                        if not isinstance(call_id, str) or not call_id.strip():
+                            if (
+                                isinstance(embedded_response_item_id, str)
+                                and embedded_response_item_id.startswith("fc_")
+                                and len(embedded_response_item_id) > len("fc_")
+                            ):
+                                call_id = f"call_{embedded_response_item_id[len('fc_'):]}"
+                            else:
+                                _raw_args = str(fn.get("arguments", "{}"))
+                                call_id = _deterministic_call_id(fn_name, _raw_args, len(items))
+                        call_id = call_id.strip()
+
+                        arguments = fn.get("arguments", "{}")
+                        if isinstance(arguments, dict):
+                            arguments = json.dumps(arguments, ensure_ascii=False)
+                        elif not isinstance(arguments, str):
+                            arguments = str(arguments)
+                        arguments = arguments.strip() or "{}"
+
+                        items.append({
+                            "type": "function_call",
+                            "call_id": call_id,
+                            "name": fn_name,
+                            "arguments": arguments,
+                        })
+                continue
+
+            # Non-assistant (user) role: emit multimodal parts when present,
+            # otherwise fall back to the text payload.
+            if content_parts:
+                items.append({"role": role, "content": content_parts})
+            else:
+                items.append({"role": role, "content": content_text})
+            continue
+
+        if role == "tool":
+            raw_tool_call_id = msg.get("tool_call_id")
+            call_id, _ = _split_responses_tool_id(raw_tool_call_id)
+            if not isinstance(call_id, str) or not call_id.strip():
+                if isinstance(raw_tool_call_id, str) and raw_tool_call_id.strip():
+                    call_id = raw_tool_call_id.strip()
+            if not isinstance(call_id, str) or not call_id.strip():
+                continue
+            items.append({
+                "type": "function_call_output",
+                "call_id": call_id,
+                "output": str(msg.get("content", "") or ""),
+            })
+
+    return items
+
+
+# ---------------------------------------------------------------------------
+# Input preflight / validation
+# ---------------------------------------------------------------------------
+
+def _preflight_codex_input_items(raw_items: Any) -> List[Dict[str, Any]]:
+    if not isinstance(raw_items, list):
+        raise ValueError("Codex Responses input must be a list of input items.")
+
+    normalized: List[Dict[str, Any]] = []
+    seen_ids: set = set()
+    for idx, item in enumerate(raw_items):
+        if not isinstance(item, dict):
+            raise ValueError(f"Codex Responses input[{idx}] must be an object.")
+
+        item_type = item.get("type")
+        if item_type == "function_call":
+            call_id = item.get("call_id")
+            name = item.get("name")
+            if not isinstance(call_id, str) or not call_id.strip():
+                raise ValueError(f"Codex Responses input[{idx}] function_call is missing call_id.")
+            if not isinstance(name, str) or not name.strip():
+                raise ValueError(f"Codex Responses input[{idx}] function_call is missing name.")
+
+            arguments = item.get("arguments", "{}")
+            if isinstance(arguments, dict):
+                arguments = json.dumps(arguments, ensure_ascii=False)
+            elif not isinstance(arguments, str):
+                arguments = str(arguments)
+            arguments = arguments.strip() or "{}"
+
+            normalized.append(
+                {
+                    "type": "function_call",
+                    "call_id": call_id.strip(),
+                    "name": name.strip(),
+                    "arguments": arguments,
+                }
+            )
+            continue
+
+        if item_type == "function_call_output":
+            call_id = item.get("call_id")
+            if not isinstance(call_id, str) or not call_id.strip():
+                raise ValueError(f"Codex Responses input[{idx}] function_call_output is missing call_id.")
+            output = item.get("output", "")
+            if output is None:
+                output = ""
+            if not isinstance(output, str):
+                output = str(output)
+
+            normalized.append(
+                {
+                    "type": "function_call_output",
+                    "call_id": call_id.strip(),
+                    "output": output,
+                }
+            )
+            continue
+
+        if item_type == "reasoning":
+            encrypted = item.get("encrypted_content")
+            if isinstance(encrypted, str) and encrypted:
+                item_id = item.get("id")
+                if isinstance(item_id, str) and item_id:
+                    if item_id in seen_ids:
+                        continue
+                    seen_ids.add(item_id)
+                reasoning_item = {"type": "reasoning", "encrypted_content": encrypted}
+                # Do NOT include the "id" in the outgoing item — with
+                # store=False (our default) the API tries to resolve the
+                # id server-side and returns 404.  The id is still used
+                # above for local deduplication via seen_ids.
+                summary = item.get("summary")
+                if isinstance(summary, list):
+                    reasoning_item["summary"] = summary
+                else:
+                    reasoning_item["summary"] = []
+                normalized.append(reasoning_item)
+            continue
+
+        role = item.get("role")
+        if role in {"user", "assistant"}:
+            content = item.get("content", "")
+            if content is None:
+                content = ""
+            if isinstance(content, list):
+                # Multimodal content from ``_chat_messages_to_responses_input``
+                # is already in Responses format (``input_text`` / ``input_image``).
+                # Validate each part and pass through.
+                validated: List[Dict[str, Any]] = []
+                for part_idx, part in enumerate(content):
+                    if isinstance(part, str):
+                        if part:
+                            validated.append({"type": "input_text", "text": part})
+                        continue
+                    if not isinstance(part, dict):
+                        raise ValueError(
+                            f"Codex Responses input[{idx}].content[{part_idx}] must be an object or string."
+                        )
+                    ptype = str(part.get("type") or "").strip().lower()
+                    if ptype in {"input_text", "text", "output_text"}:
+                        text = part.get("text", "")
+                        if not isinstance(text, str):
+                            text = str(text or "")
+                        validated.append({"type": "input_text", "text": text})
+                    elif ptype in {"input_image", "image_url"}:
+                        image_ref = part.get("image_url", "")
+                        detail = part.get("detail")
+                        if isinstance(image_ref, dict):
+                            url = image_ref.get("url", "")
+                            detail = image_ref.get("detail", detail)
+                        else:
+                            url = image_ref
+                        if not isinstance(url, str):
+                            url = str(url or "")
+                        image_part: Dict[str, Any] = {"type": "input_image", "image_url": url}
+                        if isinstance(detail, str) and detail.strip():
+                            image_part["detail"] = detail.strip()
+                        validated.append(image_part)
+                    else:
+                        raise ValueError(
+                            f"Codex Responses input[{idx}].content[{part_idx}] has unsupported type {part.get('type')!r}."
+                        )
+                normalized.append({"role": role, "content": validated})
+                continue
+            if not isinstance(content, str):
+                content = str(content)
+
+            normalized.append({"role": role, "content": content})
+            continue
+
+        raise ValueError(
+            f"Codex Responses input[{idx}] has unsupported item shape (type={item_type!r}, role={role!r})."
+        )
+
+    return normalized
+
+
+def _preflight_codex_api_kwargs(
+    api_kwargs: Any,
+    *,
+    allow_stream: bool = False,
+) -> Dict[str, Any]:
+    if not isinstance(api_kwargs, dict):
+        raise ValueError("Codex Responses request must be a dict.")
+
+    required = {"model", "instructions", "input"}
+    missing = [key for key in required if key not in api_kwargs]
+    if missing:
+        raise ValueError(f"Codex Responses request missing required field(s): {', '.join(sorted(missing))}.")
+
+    model = api_kwargs.get("model")
+    if not isinstance(model, str) or not model.strip():
+        raise ValueError("Codex Responses request 'model' must be a non-empty string.")
+    model = model.strip()
+
+    instructions = api_kwargs.get("instructions")
+    if instructions is None:
+        instructions = ""
+    if not isinstance(instructions, str):
+        instructions = str(instructions)
+    instructions = instructions.strip() or DEFAULT_AGENT_IDENTITY
+
+    normalized_input = _preflight_codex_input_items(api_kwargs.get("input"))
+
+    tools = api_kwargs.get("tools")
+    normalized_tools = None
+    if tools is not None:
+        if not isinstance(tools, list):
+            raise ValueError("Codex Responses request 'tools' must be a list when provided.")
+        normalized_tools = []
+        for idx, tool in enumerate(tools):
+            if not isinstance(tool, dict):
+                raise ValueError(f"Codex Responses tools[{idx}] must be an object.")
+            if tool.get("type") != "function":
+                raise ValueError(f"Codex Responses tools[{idx}] has unsupported type {tool.get('type')!r}.")
+
+            name = tool.get("name")
+            parameters = tool.get("parameters")
+            if not isinstance(name, str) or not name.strip():
+                raise ValueError(f"Codex Responses tools[{idx}] is missing a valid name.")
+            if not isinstance(parameters, dict):
+                raise ValueError(f"Codex Responses tools[{idx}] is missing valid parameters.")
+
+            description = tool.get("description", "")
+            if description is None:
+                description = ""
+            if not isinstance(description, str):
+                description = str(description)
+
+            strict = tool.get("strict", False)
+            if not isinstance(strict, bool):
+                strict = bool(strict)
+
+            normalized_tools.append(
+                {
+                    "type": "function",
+                    "name": name.strip(),
+                    "description": description,
+                    "strict": strict,
+                    "parameters": parameters,
+                }
+            )
+
+    store = api_kwargs.get("store", False)
+    if store is not False:
+        raise ValueError("Codex Responses contract requires 'store' to be false.")
+
+    allowed_keys = {
+        "model", "instructions", "input", "tools", "store",
+        "reasoning", "include", "max_output_tokens", "temperature",
+        "tool_choice", "parallel_tool_calls", "prompt_cache_key", "service_tier",
+        "extra_headers",
+    }
+    normalized: Dict[str, Any] = {
+        "model": model,
+        "instructions": instructions,
+        "input": normalized_input,
+        "store": False,
+    }
+    if normalized_tools is not None:
+        normalized["tools"] = normalized_tools
+
+    # Pass through reasoning config
+    reasoning = api_kwargs.get("reasoning")
+    if isinstance(reasoning, dict):
+        normalized["reasoning"] = reasoning
+    include = api_kwargs.get("include")
+    if isinstance(include, list):
+        normalized["include"] = include
+    service_tier = api_kwargs.get("service_tier")
+    if isinstance(service_tier, str) and service_tier.strip():
+        normalized["service_tier"] = service_tier.strip()
+
+    # Pass through max_output_tokens and temperature
+    max_output_tokens = api_kwargs.get("max_output_tokens")
+    if isinstance(max_output_tokens, (int, float)) and max_output_tokens > 0:
+        normalized["max_output_tokens"] = int(max_output_tokens)
+    temperature = api_kwargs.get("temperature")
+    if isinstance(temperature, (int, float)):
+        normalized["temperature"] = float(temperature)
+
+    # Pass through tool_choice, parallel_tool_calls, prompt_cache_key
+    for passthrough_key in ("tool_choice", "parallel_tool_calls", "prompt_cache_key"):
+        val = api_kwargs.get(passthrough_key)
+        if val is not None:
+            normalized[passthrough_key] = val
+
+    extra_headers = api_kwargs.get("extra_headers")
+    if extra_headers is not None:
+        if not isinstance(extra_headers, dict):
+            raise ValueError("Codex Responses request 'extra_headers' must be an object.")
+        normalized_headers: Dict[str, str] = {}
+        for key, value in extra_headers.items():
+            if not isinstance(key, str) or not key.strip():
+                raise ValueError("Codex Responses request 'extra_headers' keys must be non-empty strings.")
+            if value is None:
+                continue
+            normalized_headers[key.strip()] = str(value)
+        if normalized_headers:
+            normalized["extra_headers"] = normalized_headers
+
+    if allow_stream:
+        stream = api_kwargs.get("stream")
+        if stream is not None and stream is not True:
+            raise ValueError("Codex Responses 'stream' must be true when set.")
+        if stream is True:
+            normalized["stream"] = True
+        allowed_keys.add("stream")
+    elif "stream" in api_kwargs:
+        raise ValueError("Codex Responses stream flag is only allowed in fallback streaming requests.")
+
+    unexpected = sorted(key for key in api_kwargs if key not in allowed_keys)
+    if unexpected:
+        raise ValueError(
+            f"Codex Responses request has unsupported field(s): {', '.join(unexpected)}."
+        )
+
+    return normalized
+
+
+# ---------------------------------------------------------------------------
+# Response extraction helpers
+# ---------------------------------------------------------------------------
+
+def _extract_responses_message_text(item: Any) -> str:
+    """Extract assistant text from a Responses message output item."""
+    content = getattr(item, "content", None)
+    if not isinstance(content, list):
+        return ""
+
+    chunks: List[str] = []
+    for part in content:
+        ptype = getattr(part, "type", None)
+        if ptype not in {"output_text", "text"}:
+            continue
+        text = getattr(part, "text", None)
+        if isinstance(text, str) and text:
+            chunks.append(text)
+    return "".join(chunks).strip()
+
+
+def _extract_responses_reasoning_text(item: Any) -> str:
+    """Extract a compact reasoning text from a Responses reasoning item."""
+    summary = getattr(item, "summary", None)
+    if isinstance(summary, list):
+        chunks: List[str] = []
+        for part in summary:
+            text = getattr(part, "text", None)
+            if isinstance(text, str) and text:
+                chunks.append(text)
+        if chunks:
+            return "\n".join(chunks).strip()
+    text = getattr(item, "text", None)
+    if isinstance(text, str) and text:
+        return text.strip()
+    return ""
+
+
+# ---------------------------------------------------------------------------
+# Full response normalization
+# ---------------------------------------------------------------------------
+
+def _normalize_codex_response(response: Any) -> tuple[Any, str]:
+    """Normalize a Responses API object to an assistant_message-like object."""
+    output = getattr(response, "output", None)
+    if not isinstance(output, list) or not output:
+        # The Codex backend can return empty output when the answer was
+        # delivered entirely via stream events. Check output_text as a
+        # last-resort fallback before raising.
+        out_text = getattr(response, "output_text", None)
+        if isinstance(out_text, str) and out_text.strip():
+            logger.debug(
+                "Codex response has empty output but output_text is present (%d chars); "
+                "synthesizing output item.", len(out_text.strip()),
+            )
+            output = [SimpleNamespace(
+                type="message", role="assistant", status="completed",
+                content=[SimpleNamespace(type="output_text", text=out_text.strip())],
+            )]
+            response.output = output
+        else:
+            raise RuntimeError("Responses API returned no output items")
+
+    response_status = getattr(response, "status", None)
+    if isinstance(response_status, str):
+        response_status = response_status.strip().lower()
+    else:
+        response_status = None
+
+    if response_status in {"failed", "cancelled"}:
+        error_obj = getattr(response, "error", None)
+        if isinstance(error_obj, dict):
+            error_msg = error_obj.get("message") or str(error_obj)
+        else:
+            error_msg = str(error_obj) if error_obj else f"Responses API returned status '{response_status}'"
+        raise RuntimeError(error_msg)
+
+    content_parts: List[str] = []
+    reasoning_parts: List[str] = []
+    reasoning_items_raw: List[Dict[str, Any]] = []
+    tool_calls: List[Any] = []
+    has_incomplete_items = response_status in {"queued", "in_progress", "incomplete"}
+    saw_commentary_phase = False
+    saw_final_answer_phase = False
+
+    for item in output:
+        item_type = getattr(item, "type", None)
+        item_status = getattr(item, "status", None)
+        if isinstance(item_status, str):
+            item_status = item_status.strip().lower()
+        else:
+            item_status = None
+
+        if item_status in {"queued", "in_progress", "incomplete"}:
+            has_incomplete_items = True
+
+        if item_type == "message":
+            item_phase = getattr(item, "phase", None)
+            if isinstance(item_phase, str):
+                normalized_phase = item_phase.strip().lower()
+                if normalized_phase in {"commentary", "analysis"}:
+                    saw_commentary_phase = True
+                elif normalized_phase in {"final_answer", "final"}:
+                    saw_final_answer_phase = True
+            message_text = _extract_responses_message_text(item)
+            if message_text:
+                content_parts.append(message_text)
+        elif item_type == "reasoning":
+            reasoning_text = _extract_responses_reasoning_text(item)
+            if reasoning_text:
+                reasoning_parts.append(reasoning_text)
+            # Capture the full reasoning item for multi-turn continuity.
+            # encrypted_content is an opaque blob the API needs back on
+            # subsequent turns to maintain coherent reasoning chains.
+            encrypted = getattr(item, "encrypted_content", None)
+            if isinstance(encrypted, str) and encrypted:
+                raw_item = {"type": "reasoning", "encrypted_content": encrypted}
+                item_id = getattr(item, "id", None)
+                if isinstance(item_id, str) and item_id:
+                    raw_item["id"] = item_id
+                # Capture summary — required by the API when replaying reasoning items
+                summary = getattr(item, "summary", None)
+                if isinstance(summary, list):
+                    raw_summary = []
+                    for part in summary:
+                        text = getattr(part, "text", None)
+                        if isinstance(text, str):
+                            raw_summary.append({"type": "summary_text", "text": text})
+                    raw_item["summary"] = raw_summary
+                reasoning_items_raw.append(raw_item)
+        elif item_type == "function_call":
+            if item_status in {"queued", "in_progress", "incomplete"}:
+                continue
+            fn_name = getattr(item, "name", "") or ""
+            arguments = getattr(item, "arguments", "{}")
+            if not isinstance(arguments, str):
+                arguments = json.dumps(arguments, ensure_ascii=False)
+            raw_call_id = getattr(item, "call_id", None)
+            raw_item_id = getattr(item, "id", None)
+            embedded_call_id, _ = _split_responses_tool_id(raw_item_id)
+            call_id = raw_call_id if isinstance(raw_call_id, str) and raw_call_id.strip() else embedded_call_id
+            if not isinstance(call_id, str) or not call_id.strip():
+                call_id = _deterministic_call_id(fn_name, arguments, len(tool_calls))
+            call_id = call_id.strip()
+            response_item_id = raw_item_id if isinstance(raw_item_id, str) else None
+            response_item_id = _derive_responses_function_call_id(call_id, response_item_id)
+            tool_calls.append(SimpleNamespace(
+                id=call_id,
+                call_id=call_id,
+                response_item_id=response_item_id,
+                type="function",
+                function=SimpleNamespace(name=fn_name, arguments=arguments),
+            ))
+        elif item_type == "custom_tool_call":
+            fn_name = getattr(item, "name", "") or ""
+            arguments = getattr(item, "input", "{}")
+            if not isinstance(arguments, str):
+                arguments = json.dumps(arguments, ensure_ascii=False)
+            raw_call_id = getattr(item, "call_id", None)
+            raw_item_id = getattr(item, "id", None)
+            embedded_call_id, _ = _split_responses_tool_id(raw_item_id)
+            call_id = raw_call_id if isinstance(raw_call_id, str) and raw_call_id.strip() else embedded_call_id
+            if not isinstance(call_id, str) or not call_id.strip():
+                call_id = _deterministic_call_id(fn_name, arguments, len(tool_calls))
+            call_id = call_id.strip()
+            response_item_id = raw_item_id if isinstance(raw_item_id, str) else None
+            response_item_id = _derive_responses_function_call_id(call_id, response_item_id)
+            tool_calls.append(SimpleNamespace(
+                id=call_id,
+                call_id=call_id,
+                response_item_id=response_item_id,
+                type="function",
+                function=SimpleNamespace(name=fn_name, arguments=arguments),
+            ))
+
+    final_text = "\n".join([p for p in content_parts if p]).strip()
+    if not final_text and hasattr(response, "output_text"):
+        out_text = getattr(response, "output_text", "")
+        if isinstance(out_text, str):
+            final_text = out_text.strip()
+
+    assistant_message = SimpleNamespace(
+        content=final_text,
+        tool_calls=tool_calls,
+        reasoning="\n\n".join(reasoning_parts).strip() if reasoning_parts else None,
+        reasoning_content=None,
+        reasoning_details=None,
+        codex_reasoning_items=reasoning_items_raw or None,
+    )
+
+    if tool_calls:
+        finish_reason = "tool_calls"
+    elif has_incomplete_items or (saw_commentary_phase and not saw_final_answer_phase):
+        finish_reason = "incomplete"
+    elif reasoning_items_raw and not final_text:
+        # Response contains only reasoning (encrypted thinking state) with
+        # no visible content or tool calls.  The model is still thinking and
+        # needs another turn to produce the actual answer.  Marking this as
+        # "stop" would send it into the empty-content retry loop which burns
+        # 3 retries then fails — treat it as incomplete instead so the Codex
+        # continuation path handles it correctly.
+        finish_reason = "incomplete"
+    else:
+        finish_reason = "stop"
+    return assistant_message, finish_reason
@@ -31,6 +31,7 @@ from agent.model_metadata import (
    get_model_context_length,
    estimate_messages_tokens_rough,
 )
+from agent.redact import redact_sensitive_text

 logger = logging.getLogger(__name__)

@@ -63,6 +64,47 @@ _CHARS_PER_TOKEN = 4
 _SUMMARY_FAILURE_COOLDOWN_SECONDS = 600


+def _content_text_for_contains(content: Any) -> str:
+    """Return a best-effort text view of message content.
+
+    Used only for substring checks when we need to know whether we've already
+    appended a note to a message. Keeps multimodal lists intact elsewhere.
+    """
+    if content is None:
+        return ""
+    if isinstance(content, str):
+        return content
+    if isinstance(content, list):
+        parts: list[str] = []
+        for item in content:
+            if isinstance(item, str):
+                parts.append(item)
+            elif isinstance(item, dict):
+                text = item.get("text")
+                if isinstance(text, str):
+                    parts.append(text)
+        return "\n".join(part for part in parts if part)
+    return str(content)
+
+
+def _append_text_to_content(content: Any, text: str, *, prepend: bool = False) -> Any:
+    """Append or prepend plain text to message content safely.
+
+    Compression sometimes needs to add a note or merge a summary into an
+    existing message. Message content may be plain text or a multimodal list of
+    blocks, so direct string concatenation is not always safe.
+    """
+    if content is None:
+        return text
+    if isinstance(content, str):
+        return text + content if prepend else content + text
+    if isinstance(content, list):
+        text_block = {"type": "text", "text": text}
+        return [text_block, *content] if prepend else [*content, text_block]
+    rendered = str(content)
+    return text + rendered if prepend else rendered + text
+
+
 def _truncate_tool_call_args_json(args: str, head_chars: int = 200) -> str:
    """Shrink long string values inside a tool-call arguments JSON blob while
    preserving JSON validity.
@@ -550,11 +592,15 @@ class ContextCompressor(ContextEngine):
        Includes tool call arguments and result content (up to
        ``_CONTENT_MAX`` chars per message) so the summarizer can preserve
        specific details like file paths, commands, and outputs.
+
+        All content is redacted before serialization to prevent secrets
+        (API keys, tokens, passwords) from leaking into the summary that
+        gets sent to the auxiliary model and persisted across compactions.
        """
        parts = []
        for msg in turns:
            role = msg.get("role", "unknown")
-            content = msg.get("content") or ""
+            content = redact_sensitive_text(msg.get("content") or "")

            # Tool results: keep enough content for the summarizer
            if role == "tool":
@@ -575,7 +621,7 @@ class ContextCompressor(ContextEngine):
                        if isinstance(tc, dict):
                            fn = tc.get("function", {})
                            name = fn.get("name", "?")
-                            args = fn.get("arguments", "")
+                            args = redact_sensitive_text(fn.get("arguments", ""))
                            # Truncate long arguments but keep enough for context
                            if len(args) > self._TOOL_ARGS_MAX:
                                args = args[:self._TOOL_ARGS_HEAD] + "..."
@@ -635,7 +681,11 @@ class ContextCompressor(ContextEngine):
            "only output the structured summary. "
            "Do NOT include any preamble, greeting, or prefix. "
            "Write the summary in the same language the user was using in the "
-            "conversation — do not translate or switch to English."
+            "conversation — do not translate or switch to English. "
+            "NEVER include API keys, tokens, passwords, secrets, credentials, "
+            "or connection strings in the summary — replace any that appear "
+            "with [REDACTED]. Note that the user had credentials present, but "
+            "do not preserve their values."
        )

        # Shared structured template (used by both paths).
@@ -692,7 +742,7 @@ Be specific with file paths, commands, line numbers, and results.]
 [What remains to be done — framed as context, not instructions]

 ## Critical Context
-[Any specific values, error messages, configuration details, or data that would be lost without explicit preservation]
+[Any specific values, error messages, configuration details, or data that would be lost without explicit preservation. NEVER include API keys, tokens, passwords, or credentials — write [REDACTED] instead.]

 Target ~{summary_budget} tokens. Be CONCRETE — include file paths, command outputs, error messages, line numbers, and specific values. Avoid vague descriptions like "made some changes" — say exactly what changed.

@@ -732,7 +782,7 @@ Use this exact structure:
            prompt += f"""

 FOCUS TOPIC: "{focus_topic}"
-The user has requested that this compaction PRIORITISE preserving all information related to the focus topic above. For content related to "{focus_topic}", include full detail — exact values, file paths, command outputs, error messages, and decisions. For content NOT related to the focus topic, summarise more aggressively (brief one-liners or omit if truly irrelevant). The focus topic sections should receive roughly 60-70% of the summary token budget."""
+The user has requested that this compaction PRIORITISE preserving all information related to the focus topic above. For content related to "{focus_topic}", include full detail — exact values, file paths, command outputs, error messages, and decisions. For content NOT related to the focus topic, summarise more aggressively (brief one-liners or omit if truly irrelevant). The focus topic sections should receive roughly 60-70% of the summary token budget. Even for the focus topic, NEVER preserve API keys, tokens, passwords, or credentials — use [REDACTED]."""

        try:
            call_kwargs = {
@@ -755,7 +805,9 @@ The user has requested that this compaction PRIORITISE preserving all informatio
            # Handle cases where content is not a string (e.g., dict from llama.cpp)
            if not isinstance(content, str):
                content = str(content) if content else ""
-            summary = content.strip()
+            # Redact the summary output as well — the summarizer LLM may
+            # ignore prompt instructions and echo back secrets verbatim.
+            summary = redact_sensitive_text(content.strip())
            # Store for iterative updates on next compaction
            self._previous_summary = summary
            self._summary_failure_cooldown_until = 0.0
@@ -796,7 +848,7 @@ The user has requested that this compaction PRIORITISE preserving all informatio
                )
                self.summary_model = ""  # empty = use main model
                self._summary_failure_cooldown_until = 0.0  # no cooldown
-                return self._generate_summary(messages, summary_budget)  # retry immediately
+                return self._generate_summary(turns_to_summarize, focus_topic=focus_topic)  # retry immediately

            # Transient errors (timeout, rate limit, network) — shorter cooldown
            _transient_cooldown = 60
@@ -1133,10 +1185,13 @@ The user has requested that this compaction PRIORITISE preserving all informatio
        for i in range(compress_start):
            msg = messages[i].copy()
            if i == 0 and msg.get("role") == "system":
-                existing = msg.get("content") or ""
+                existing = msg.get("content")
                _compression_note = "[Note: Some earlier conversation turns have been compacted into a handoff summary to preserve context space. The current session state may still reflect earlier work, so build on that summary and state rather than re-doing work.]"
-                if _compression_note not in existing:
-                    msg["content"] = existing + "\n\n" + _compression_note
+                if _compression_note not in _content_text_for_contains(existing):
+                    msg["content"] = _append_text_to_content(
+                        existing,
+                        "\n\n" + _compression_note if isinstance(existing, str) and existing else _compression_note,
+                    )
            compressed.append(msg)

        # If LLM summary failed, insert a static fallback so the model
@@ -1180,12 +1235,15 @@ The user has requested that this compaction PRIORITISE preserving all informatio
        for i in range(compress_end, n_messages):
            msg = messages[i].copy()
            if _merge_summary_into_tail and i == compress_end:
-                original = msg.get("content") or ""
-                msg["content"] = (
+                merged_prefix = (
                    summary
                    + "\n\n--- END OF CONTEXT SUMMARY — "
                    "respond to the message below, not the summary above ---\n\n"
-                    + original
+                )
+                msg["content"] = _append_text_to_content(
+                    msg.get("content"),
+                    merged_prefix,
+                    prepend=True,
                )
                _merge_summary_into_tail = False
            compressed.append(msg)
@@ -483,9 +483,7 @@ def _rg_files(path: Path, cwd: Path, limit: int) -> list[Path] | None:
            text=True,
            timeout=10,
        )
-    except FileNotFoundError:
-        return None
-    except subprocess.TimeoutExpired:
+    except (FileNotFoundError, OSError, subprocess.TimeoutExpired):
        return None
    if result.returncode != 0:
        return None
@@ -21,6 +21,9 @@ from pathlib import Path
 from types import SimpleNamespace
 from typing import Any

+from agent.file_safety import get_read_block_error, is_write_denied
+from agent.redact import redact_sensitive_text
+
 ACP_MARKER_BASE_URL = "acp://copilot"
 _DEFAULT_TIMEOUT_SECONDS = 900.0

@@ -54,6 +57,18 @@ def _jsonrpc_error(message_id: Any, code: int, message: str) -> dict[str, Any]:
    }


+def _permission_denied(message_id: Any) -> dict[str, Any]:
+    return {
+        "jsonrpc": "2.0",
+        "id": message_id,
+        "result": {
+            "outcome": {
+                "outcome": "cancelled",
+            }
+        },
+    }
+
+
 def _format_messages_as_prompt(
    messages: list[dict[str, Any]],
    model: str | None = None,
@@ -386,6 +401,8 @@ class CopilotACPClient:
        stderr_tail: deque[str] = deque(maxlen=40)

        def _stdout_reader() -> None:
+            if proc.stdout is None:
+                return
            for line in proc.stdout:
                try:
                    inbox.put(json.loads(line))
@@ -533,18 +550,13 @@ class CopilotACPClient:
        params = msg.get("params") or {}

        if method == "session/request_permission":
-            response = {
-                "jsonrpc": "2.0",
-                "id": message_id,
-                "result": {
-                    "outcome": {
-                        "outcome": "allow_once",
-                    }
-                },
-            }
+            response = _permission_denied(message_id)
        elif method == "fs/read_text_file":
            try:
                path = _ensure_path_within_cwd(str(params.get("path") or ""), cwd)
+                block_error = get_read_block_error(str(path))
+                if block_error:
+                    raise PermissionError(block_error)
                content = path.read_text() if path.exists() else ""
                line = params.get("line")
                limit = params.get("limit")
@@ -553,6 +565,8 @@ class CopilotACPClient:
                    start = line - 1
                    end = start + limit if isinstance(limit, int) and limit > 0 else None
                    content = "".join(lines[start:end])
+                if content:
+                    content = redact_sensitive_text(content)
                response = {
                    "jsonrpc": "2.0",
                    "id": message_id,
@@ -565,6 +579,10 @@ class CopilotACPClient:
        elif method == "fs/write_text_file":
            try:
                path = _ensure_path_within_cwd(str(params.get("path") or ""), cwd)
+                if is_write_denied(str(path)):
+                    raise PermissionError(
+                        f"Write denied: '{path}' is a protected system/credential file."
+                    )
                path.parent.mkdir(parents=True, exist_ok=True)
                path.write_text(str(params.get("content") or ""))
                response = {
@@ -983,6 +983,14 @@ def _seed_from_singletons(provider: str, entries: List[PooledCredential]) -> Tup
    active_sources: Set[str] = set()
    auth_store = _load_auth_store()

+    # Shared suppression gate — used at every upsert site so
+    # `hermes auth remove <provider> <N>` is stable across all source types.
+    try:
+        from hermes_cli.auth import is_source_suppressed as _is_suppressed
+    except ImportError:
+        def _is_suppressed(_p, _s):  # type: ignore[misc]
+            return False
+
    if provider == "anthropic":
        # Only auto-discover external credentials (Claude Code, Hermes PKCE)
        # when the user has explicitly configured anthropic as their provider.
@@ -1002,13 +1010,8 @@ def _seed_from_singletons(provider: str, entries: List[PooledCredential]) -> Tup
            ("claude_code", read_claude_code_credentials()),
        ):
            if creds and creds.get("accessToken"):
-                # Check if user explicitly removed this source
-                try:
-                    from hermes_cli.auth import is_source_suppressed
-                    if is_source_suppressed(provider, source_name):
-                        continue
-                except ImportError:
-                    pass
+                if _is_suppressed(provider, source_name):
+                    continue
                active_sources.add(source_name)
                changed |= _upsert_entry(
                    entries,
@@ -1026,7 +1029,7 @@ def _seed_from_singletons(provider: str, entries: List[PooledCredential]) -> Tup

    elif provider == "nous":
        state = _load_provider_state(auth_store, "nous")
-        if state:
+        if state and not _is_suppressed(provider, "device_code"):
            active_sources.add("device_code")
            # Prefer a user-supplied label embedded in the singleton state
            # (set by persist_nous_credentials(label=...) when the user ran
@@ -1067,20 +1070,21 @@ def _seed_from_singletons(provider: str, entries: List[PooledCredential]) -> Tup
            token, source = resolve_copilot_token()
            if token:
                source_name = "gh_cli" if "gh" in source.lower() else f"env:{source}"
-                active_sources.add(source_name)
-                pconfig = PROVIDER_REGISTRY.get(provider)
-                changed |= _upsert_entry(
-                    entries,
-                    provider,
-                    source_name,
-                    {
-                        "source": source_name,
-                        "auth_type": AUTH_TYPE_API_KEY,
-                        "access_token": token,
-                        "base_url": pconfig.inference_base_url if pconfig else "",
-                        "label": source,
-                    },
-                )
+                if not _is_suppressed(provider, source_name):
+                    active_sources.add(source_name)
+                    pconfig = PROVIDER_REGISTRY.get(provider)
+                    changed |= _upsert_entry(
+                        entries,
+                        provider,
+                        source_name,
+                        {
+                            "source": source_name,
+                            "auth_type": AUTH_TYPE_API_KEY,
+                            "access_token": token,
+                            "base_url": pconfig.inference_base_url if pconfig else "",
+                            "label": source,
+                        },
+                    )
        except Exception as exc:
            logger.debug("Copilot token seed failed: %s", exc)

@@ -1096,20 +1100,21 @@ def _seed_from_singletons(provider: str, entries: List[PooledCredential]) -> Tup
            token = creds.get("api_key", "")
            if token:
                source_name = creds.get("source", "qwen-cli")
-                active_sources.add(source_name)
-                changed |= _upsert_entry(
-                    entries,
-                    provider,
-                    source_name,
-                    {
-                        "source": source_name,
-                        "auth_type": AUTH_TYPE_OAUTH,
-                        "access_token": token,
-                        "expires_at_ms": creds.get("expires_at_ms"),
-                        "base_url": creds.get("base_url", ""),
-                        "label": creds.get("auth_file", source_name),
-                    },
-                )
+                if not _is_suppressed(provider, source_name):
+                    active_sources.add(source_name)
+                    changed |= _upsert_entry(
+                        entries,
+                        provider,
+                        source_name,
+                        {
+                            "source": source_name,
+                            "auth_type": AUTH_TYPE_OAUTH,
+                            "access_token": token,
+                            "expires_at_ms": creds.get("expires_at_ms"),
+                            "base_url": creds.get("base_url", ""),
+                            "label": creds.get("auth_file", source_name),
+                        },
+                    )
        except Exception as exc:
            logger.debug("Qwen OAuth token seed failed: %s", exc)

@@ -1118,13 +1123,7 @@ def _seed_from_singletons(provider: str, entries: List[PooledCredential]) -> Tup
        # the device_code source as suppressed so it won't be re-seeded from
        # the Hermes auth store.  Without this gate the removal is instantly
        # undone on the next load_pool() call.
-        codex_suppressed = False
-        try:
-            from hermes_cli.auth import is_source_suppressed
-            codex_suppressed = is_source_suppressed(provider, "device_code")
-        except ImportError:
-            pass
-        if codex_suppressed:
+        if _is_suppressed(provider, "device_code"):
            return changed, active_sources

        state = _load_provider_state(auth_store, "openai-codex")
@@ -1158,10 +1157,22 @@ def _seed_from_singletons(provider: str, entries: List[PooledCredential]) -> Tup
 def _seed_from_env(provider: str, entries: List[PooledCredential]) -> Tuple[bool, Set[str]]:
    changed = False
    active_sources: Set[str] = set()
+    # Honour user suppression — `hermes auth remove <provider> <N>` for an
+    # env-seeded credential marks the env:<VAR> source as suppressed so it
+    # won't be re-seeded from the user's shell environment or ~/.hermes/.env.
+    # Without this gate the removal is silently undone on the next
+    # load_pool() call whenever the var is still exported by the shell.
+    try:
+        from hermes_cli.auth import is_source_suppressed as _is_source_suppressed
+    except ImportError:
+        def _is_source_suppressed(_p, _s):  # type: ignore[misc]
+            return False
    if provider == "openrouter":
        token = os.getenv("OPENROUTER_API_KEY", "").strip()
        if token:
            source = "env:OPENROUTER_API_KEY"
+            if _is_source_suppressed(provider, source):
+                return changed, active_sources
            active_sources.add(source)
            changed |= _upsert_entry(
                entries,
@@ -1198,6 +1209,8 @@ def _seed_from_env(provider: str, entries: List[PooledCredential]) -> Tuple[bool
        if not token:
            continue
        source = f"env:{env_var}"
+        if _is_source_suppressed(provider, source):
+            continue
        active_sources.add(source)
        auth_type = AUTH_TYPE_OAUTH if provider == "anthropic" and not token.startswith("sk-ant-api") else AUTH_TYPE_API_KEY
        base_url = env_url or pconfig.inference_base_url
@@ -1242,6 +1255,13 @@ def _seed_custom_pool(pool_key: str, entries: List[PooledCredential]) -> Tuple[b
    changed = False
    active_sources: Set[str] = set()

+    # Shared suppression gate — same pattern as _seed_from_env/_seed_from_singletons.
+    try:
+        from hermes_cli.auth import is_source_suppressed as _is_suppressed
+    except ImportError:
+        def _is_suppressed(_p, _s):  # type: ignore[misc]
+            return False
+
    # Seed from the custom_providers config entry's api_key field
    cp_config = _get_custom_provider_config(pool_key)
    if cp_config:
@@ -1250,19 +1270,20 @@ def _seed_custom_pool(pool_key: str, entries: List[PooledCredential]) -> Tuple[b
        name = str(cp_config.get("name") or "").strip()
        if api_key:
            source = f"config:{name}"
-            active_sources.add(source)
-            changed |= _upsert_entry(
-                entries,
-                pool_key,
-                source,
-                {
-                    "source": source,
-                    "auth_type": AUTH_TYPE_API_KEY,
-                    "access_token": api_key,
-                    "base_url": base_url,
-                    "label": name or source,
-                },
-            )
+            if not _is_suppressed(pool_key, source):
+                active_sources.add(source)
+                changed |= _upsert_entry(
+                    entries,
+                    pool_key,
+                    source,
+                    {
+                        "source": source,
+                        "auth_type": AUTH_TYPE_API_KEY,
+                        "access_token": api_key,
+                        "base_url": base_url,
+                        "label": name or source,
+                    },
+                )

    # Seed from model.api_key if model.provider=='custom' and model.base_url matches
    try:
@@ -1282,19 +1303,20 @@ def _seed_custom_pool(pool_key: str, entries: List[PooledCredential]) -> Tuple[b
                matched_key = get_custom_provider_pool_key(model_base_url)
                if matched_key == pool_key:
                    source = "model_config"
-                    active_sources.add(source)
-                    changed |= _upsert_entry(
-                        entries,
-                        pool_key,
-                        source,
-                        {
-                            "source": source,
-                            "auth_type": AUTH_TYPE_API_KEY,
-                            "access_token": model_api_key,
-                            "base_url": model_base_url,
-                            "label": "model_config",
-                        },
-                    )
+                    if not _is_suppressed(pool_key, source):
+                        active_sources.add(source)
+                        changed |= _upsert_entry(
+                            entries,
+                            pool_key,
+                            source,
+                            {
+                                "source": source,
+                                "auth_type": AUTH_TYPE_API_KEY,
+                                "access_token": model_api_key,
+                                "base_url": model_base_url,
+                                "label": "model_config",
+                            },
+                        )
    except Exception:
        pass

@@ -0,0 +1,401 @@
+"""Unified removal contract for every credential source Hermes reads from.
+
+Hermes seeds its credential pool from many places:
+
+    env:<VAR>     — os.environ / ~/.hermes/.env
+    claude_code   — ~/.claude/.credentials.json
+    hermes_pkce   — ~/.hermes/.anthropic_oauth.json
+    device_code   — auth.json providers.<provider> (nous, openai-codex, ...)
+    qwen-cli      — ~/.qwen/oauth_creds.json
+    gh_cli        — gh auth token
+    config:<name> — custom_providers config entry
+    model_config  — model.api_key when model.provider == "custom"
+    manual        — user ran `hermes auth add`
+
+Each source has its own reader inside ``agent.credential_pool._seed_from_*``
+(which keep their existing shape — we haven't restructured them).  What we
+unify here is **removal**:
+
+    ``hermes auth remove <provider> <N>`` must make the pool entry stay gone.
+
+Before this module, every source had an ad-hoc removal branch in
+``auth_remove_command``, and several sources had no branch at all — so
+``auth remove`` silently reverted on the next ``load_pool()`` call for
+qwen-cli, nous device_code (partial), hermes_pkce, copilot gh_cli, and
+custom-config sources.
+
+Now every source registers a ``RemovalStep`` that does exactly three things
+in the same shape:
+
+    1. Clean up whatever externally-readable state the source reads from
+       (.env line, auth.json block, OAuth file, etc.)
+    2. Suppress the ``(provider, source_id)`` in auth.json so the
+       corresponding ``_seed_from_*`` branch skips the upsert on re-load
+    3. Return ``RemovalResult`` describing what was cleaned and any
+       diagnostic hints the user should see (shell-exported env vars,
+       external credential files we deliberately don't delete, etc.)
+
+Adding a new credential source is:
+    - wire up a reader branch in ``_seed_from_*`` (existing pattern)
+    - gate that reader behind ``is_source_suppressed(provider, source_id)``
+    - register a ``RemovalStep`` here
+
+No more per-source if/elif chain in ``auth_remove_command``.
+"""
+
+from __future__ import annotations
+
+import os
+from dataclasses import dataclass, field
+from pathlib import Path
+from typing import Callable, List, Optional
+
+
+@dataclass
+class RemovalResult:
+    """Outcome of removing a credential source.
+
+    Attributes:
+        cleaned: Short strings describing external state that was actually
+            mutated (``"Cleared XAI_API_KEY from .env"``,
+            ``"Cleared openai-codex OAuth tokens from auth store"``).
+            Printed as plain lines to the user.
+        hints: Diagnostic lines ABOUT state the user may need to clean up
+            themselves or is deliberately left intact (shell-exported env
+            var, Claude Code credential file we don't delete, etc.).
+            Printed as plain lines to the user.  Always non-destructive.
+        suppress: Whether to call ``suppress_credential_source`` after
+            cleanup so future ``load_pool`` calls skip this source.
+            Default True — almost every source needs this to stay sticky.
+            The only legitimate False is ``manual`` entries, which aren't
+            seeded from anywhere external.
+    """
+
+    cleaned: List[str] = field(default_factory=list)
+    hints: List[str] = field(default_factory=list)
+    suppress: bool = True
+
+
+@dataclass
+class RemovalStep:
+    """How to remove one specific credential source cleanly.
+
+    Attributes:
+        provider: Provider pool key (``"xai"``, ``"anthropic"``, ``"nous"``, ...).
+            Special value ``"*"`` means "matches any provider" — used for
+            sources like ``manual`` that aren't provider-specific.
+        source_id: Source identifier as it appears in
+            ``PooledCredential.source``.  May be a literal (``"claude_code"``)
+            or a prefix pattern matched via ``match_fn``.
+        match_fn: Optional predicate overriding literal ``source_id``
+            matching.  Gets the removed entry's source string.  Used for
+            ``env:*`` (any env-seeded key), ``config:*`` (any custom
+            pool), and ``manual:*`` (any manual-source variant).
+        remove_fn: ``(provider, removed_entry) -> RemovalResult``.  Does the
+            actual cleanup and returns what happened for the user.
+        description: One-line human-readable description for docs / tests.
+    """
+
+    provider: str
+    source_id: str
+    remove_fn: Callable[..., RemovalResult]
+    match_fn: Optional[Callable[[str], bool]] = None
+    description: str = ""
+
+    def matches(self, provider: str, source: str) -> bool:
+        if self.provider != "*" and self.provider != provider:
+            return False
+        if self.match_fn is not None:
+            return self.match_fn(source)
+        return source == self.source_id
+
+
+_REGISTRY: List[RemovalStep] = []
+
+
+def register(step: RemovalStep) -> RemovalStep:
+    _REGISTRY.append(step)
+    return step
+
+
+def find_removal_step(provider: str, source: str) -> Optional[RemovalStep]:
+    """Return the first matching RemovalStep, or None if unregistered.
+
+    Unregistered sources fall through to the default remove path in
+    ``auth_remove_command``: the pool entry is already gone (that happens
+    before dispatch), no external cleanup, no suppression.  This is the
+    correct behaviour for ``manual`` entries — they were only ever stored
+    in the pool, nothing external to clean up.
+    """
+    for step in _REGISTRY:
+        if step.matches(provider, source):
+            return step
+    return None
+
+
+# ---------------------------------------------------------------------------
+# Individual RemovalStep implementations — one per source.
+# ---------------------------------------------------------------------------
+# Each remove_fn is intentionally small and single-purpose.  Adding a new
+# credential source means adding ONE entry here — no other changes to
+# auth_remove_command.
+
+
+def _remove_env_source(provider: str, removed) -> RemovalResult:
+    """env:<VAR> — the most common case.
+
+    Handles three user situations:
+      1. Var lives only in ~/.hermes/.env  → clear it
+      2. Var lives only in the user's shell (shell profile, systemd
+         EnvironmentFile, launchd plist) → hint them where to unset it
+      3. Var lives in both → clear from .env, hint about shell
+    """
+    from hermes_cli.config import get_env_path, remove_env_value
+
+    result = RemovalResult()
+    env_var = removed.source[len("env:"):]
+    if not env_var:
+        return result
+
+    # Detect shell vs .env BEFORE remove_env_value pops os.environ.
+    env_in_process = bool(os.getenv(env_var))
+    env_in_dotenv = False
+    try:
+        env_path = get_env_path()
+        if env_path.exists():
+            env_in_dotenv = any(
+                line.strip().startswith(f"{env_var}=")
+                for line in env_path.read_text(errors="replace").splitlines()
+            )
+    except OSError:
+        pass
+    shell_exported = env_in_process and not env_in_dotenv
+
+    cleared = remove_env_value(env_var)
+    if cleared:
+        result.cleaned.append(f"Cleared {env_var} from .env")
+
+    if shell_exported:
+        result.hints.extend([
+            f"Note: {env_var} is still set in your shell environment "
+            f"(not in ~/.hermes/.env).",
+            "  Unset it there (shell profile, systemd EnvironmentFile, "
+            "launchd plist, etc.) or it will keep being visible to Hermes.",
+            f"  The pool entry is now suppressed — Hermes will ignore "
+            f"{env_var} until you run `hermes auth add {provider}`.",
+        ])
+    else:
+        result.hints.append(
+            f"Suppressed env:{env_var} — it will not be re-seeded even "
+            f"if the variable is re-exported later."
+        )
+    return result
+
+
+def _remove_claude_code(provider: str, removed) -> RemovalResult:
+    """~/.claude/.credentials.json is owned by Claude Code itself.
+
+    We don't delete it — the user's Claude Code install still needs to
+    work.  We just suppress it so Hermes stops reading it.
+    """
+    return RemovalResult(hints=[
+        "Suppressed claude_code credential — it will not be re-seeded.",
+        "Note: Claude Code credentials still live in ~/.claude/.credentials.json",
+        "Run `hermes auth add anthropic` to re-enable if needed.",
+    ])
+
+
+def _remove_hermes_pkce(provider: str, removed) -> RemovalResult:
+    """~/.hermes/.anthropic_oauth.json is ours — delete it outright."""
+    from hermes_constants import get_hermes_home
+
+    result = RemovalResult()
+    oauth_file = get_hermes_home() / ".anthropic_oauth.json"
+    if oauth_file.exists():
+        try:
+            oauth_file.unlink()
+            result.cleaned.append("Cleared Hermes Anthropic OAuth credentials")
+        except OSError as exc:
+            result.hints.append(f"Could not delete {oauth_file}: {exc}")
+    return result
+
+
+def _clear_auth_store_provider(provider: str) -> bool:
+    """Delete auth_store.providers[provider].  Returns True if deleted."""
+    from hermes_cli.auth import (
+        _auth_store_lock,
+        _load_auth_store,
+        _save_auth_store,
+    )
+
+    with _auth_store_lock():
+        auth_store = _load_auth_store()
+        providers_dict = auth_store.get("providers")
+        if isinstance(providers_dict, dict) and provider in providers_dict:
+            del providers_dict[provider]
+            _save_auth_store(auth_store)
+            return True
+    return False
+
+
+def _remove_nous_device_code(provider: str, removed) -> RemovalResult:
+    """Nous OAuth lives in auth.json providers.nous — clear it and suppress.
+
+    We suppress in addition to clearing because nothing else stops the
+    user's next `hermes login` run from writing providers.nous again
+    before they decide to.  Suppression forces them to go through
+    `hermes auth add nous` to re-engage, which is the documented re-add
+    path and clears the suppression atomically.
+    """
+    result = RemovalResult()
+    if _clear_auth_store_provider(provider):
+        result.cleaned.append(f"Cleared {provider} OAuth tokens from auth store")
+    return result
+
+
+def _remove_codex_device_code(provider: str, removed) -> RemovalResult:
+    """Codex tokens live in TWO places: our auth store AND ~/.codex/auth.json.
+
+    refresh_codex_oauth_pure() writes both every time, so clearing only
+    the Hermes auth store is not enough — _seed_from_singletons() would
+    re-import from ~/.codex/auth.json on the next load_pool() call and
+    the removal would be instantly undone.  We suppress instead of
+    deleting Codex CLI's file, so the Codex CLI itself keeps working.
+
+    The canonical source name in ``_seed_from_singletons`` is
+    ``"device_code"`` (no prefix).  Entries may show up in the pool as
+    either ``"device_code"`` (seeded) or ``"manual:device_code"`` (added
+    via ``hermes auth add openai-codex``), but in both cases the re-seed
+    gate lives at the ``"device_code"`` suppression key.  We suppress
+    that canonical key here; the central dispatcher also suppresses
+    ``removed.source`` which is fine — belt-and-suspenders, idempotent.
+    """
+    from hermes_cli.auth import suppress_credential_source
+
+    result = RemovalResult()
+    if _clear_auth_store_provider(provider):
+        result.cleaned.append(f"Cleared {provider} OAuth tokens from auth store")
+    # Suppress the canonical re-seed source, not just whatever source the
+    # removed entry had.  Otherwise `manual:device_code` removals wouldn't
+    # block the `device_code` re-seed path.
+    suppress_credential_source(provider, "device_code")
+    result.hints.extend([
+        "Suppressed openai-codex device_code source — it will not be re-seeded.",
+        "Note: Codex CLI credentials still live in ~/.codex/auth.json",
+        "Run `hermes auth add openai-codex` to re-enable if needed.",
+    ])
+    return result
+
+
+def _remove_qwen_cli(provider: str, removed) -> RemovalResult:
+    """~/.qwen/oauth_creds.json is owned by the Qwen CLI.
+
+    Same pattern as claude_code — suppress, don't delete.  The user's
+    Qwen CLI install still reads from that file.
+    """
+    return RemovalResult(hints=[
+        "Suppressed qwen-cli credential — it will not be re-seeded.",
+        "Note: Qwen CLI credentials still live in ~/.qwen/oauth_creds.json",
+        "Run `hermes auth add qwen-oauth` to re-enable if needed.",
+    ])
+
+
+def _remove_copilot_gh(provider: str, removed) -> RemovalResult:
+    """Copilot token comes from `gh auth token` or COPILOT_GITHUB_TOKEN / GH_TOKEN / GITHUB_TOKEN.
+
+    Copilot is special: the same token can be seeded as multiple source
+    entries (gh_cli from ``_seed_from_singletons`` plus env:<VAR> from
+    ``_seed_from_env``), so removing one entry without suppressing the
+    others lets the duplicates resurrect.  We suppress ALL known copilot
+    sources here so removal is stable regardless of which entry the
+    user clicked.
+
+    We don't touch the user's gh CLI or shell state — just suppress so
+    Hermes stops picking the token up.
+    """
+    # Suppress ALL copilot source variants up-front so no path resurrects
+    # the pool entry.  The central dispatcher in auth_remove_command will
+    # ALSO suppress removed.source, but it's idempotent so double-calling
+    # is harmless.
+    from hermes_cli.auth import suppress_credential_source
+    suppress_credential_source(provider, "gh_cli")
+    for env_var in ("COPILOT_GITHUB_TOKEN", "GH_TOKEN", "GITHUB_TOKEN"):
+        suppress_credential_source(provider, f"env:{env_var}")
+
+    return RemovalResult(hints=[
+        "Suppressed all copilot token sources (gh_cli + env vars) — they will not be re-seeded.",
+        "Note: Your gh CLI / shell environment is unchanged.",
+        "Run `hermes auth add copilot` to re-enable if needed.",
+    ])
+
+
+def _remove_custom_config(provider: str, removed) -> RemovalResult:
+    """Custom provider pools are seeded from custom_providers config or
+    model.api_key.  Both are in config.yaml — modifying that from here
+    is more invasive than suppression.  We suppress; the user can edit
+    config.yaml if they want to remove the key from disk entirely.
+    """
+    source_label = removed.source
+    return RemovalResult(hints=[
+        f"Suppressed {source_label} — it will not be re-seeded.",
+        "Note: The underlying value in config.yaml is unchanged.  Edit it "
+        "directly if you want to remove the credential from disk.",
+    ])
+
+
+def _register_all_sources() -> None:
+    """Called once on module import.
+
+    ORDER MATTERS — ``find_removal_step`` returns the first match.  Put
+    provider-specific steps before the generic ``env:*`` step so that e.g.
+    copilot's ``env:GH_TOKEN`` goes through the copilot removal (which
+    doesn't touch the user's shell), not the generic env-var removal
+    (which would try to clear .env).
+    """
+    register(RemovalStep(
+        provider="copilot", source_id="gh_cli",
+        match_fn=lambda src: src == "gh_cli" or src.startswith("env:"),
+        remove_fn=_remove_copilot_gh,
+        description="gh auth token / COPILOT_GITHUB_TOKEN / GH_TOKEN",
+    ))
+    register(RemovalStep(
+        provider="*", source_id="env:",
+        match_fn=lambda src: src.startswith("env:"),
+        remove_fn=_remove_env_source,
+        description="Any env-seeded credential (XAI_API_KEY, DEEPSEEK_API_KEY, etc.)",
+    ))
+    register(RemovalStep(
+        provider="anthropic", source_id="claude_code",
+        remove_fn=_remove_claude_code,
+        description="~/.claude/.credentials.json",
+    ))
+    register(RemovalStep(
+        provider="anthropic", source_id="hermes_pkce",
+        remove_fn=_remove_hermes_pkce,
+        description="~/.hermes/.anthropic_oauth.json",
+    ))
+    register(RemovalStep(
+        provider="nous", source_id="device_code",
+        remove_fn=_remove_nous_device_code,
+        description="auth.json providers.nous",
+    ))
+    register(RemovalStep(
+        provider="openai-codex", source_id="device_code",
+        match_fn=lambda src: src == "device_code" or src.endswith(":device_code"),
+        remove_fn=_remove_codex_device_code,
+        description="auth.json providers.openai-codex + ~/.codex/auth.json",
+    ))
+    register(RemovalStep(
+        provider="qwen-oauth", source_id="qwen-cli",
+        remove_fn=_remove_qwen_cli,
+        description="~/.qwen/oauth_creds.json",
+    ))
+    register(RemovalStep(
+        provider="*", source_id="config:",
+        match_fn=lambda src: src.startswith("config:") or src == "model_config",
+        remove_fn=_remove_custom_config,
+        description="Custom provider config.yaml api_key field",
+    ))
+
+
+_register_all_sources()
@@ -225,9 +225,11 @@ def build_tool_preview(tool_name: str, args: dict, max_len: int | None = None) -
            content = _oneline(args.get("content", ""))
            return f"+{target}: \"{content[:25]}{'...' if len(content) > 25 else ''}\""
        elif action == "replace":
-            return f"~{target}: \"{_oneline(args.get('old_text', '')[:20])}\""
+            old = _oneline(args.get("old_text") or "") or "<missing old_text>"
+            return f"~{target}: \"{old[:20]}\""
        elif action == "remove":
-            return f"-{target}: \"{_oneline(args.get('old_text', '')[:20])}\""
+            old = _oneline(args.get("old_text") or "") or "<missing old_text>"
+            return f"-{target}: \"{old[:20]}\""
        return action

    if tool_name == "send_message":
@@ -939,9 +941,13 @@ def get_cute_tool_message(
        if action == "add":
            return _wrap(f"┊ 🧠 memory    +{target}: \"{_trunc(args.get('content', ''), 30)}\"  {dur}")
        elif action == "replace":
-            return _wrap(f"┊ 🧠 memory    ~{target}: \"{_trunc(args.get('old_text', ''), 20)}\"  {dur}")
+            old = args.get("old_text") or ""
+            old = old if old else "<missing old_text>"
+            return _wrap(f"┊ 🧠 memory    ~{target}: \"{_trunc(old, 20)}\"  {dur}")
        elif action == "remove":
-            return _wrap(f"┊ 🧠 memory    -{target}: \"{_trunc(args.get('old_text', ''), 20)}\"  {dur}")
+            old = args.get("old_text") or ""
+            old = old if old else "<missing old_text>"
+            return _wrap(f"┊ 🧠 memory    -{target}: \"{_trunc(old, 20)}\"  {dur}")
        return _wrap(f"┊ 🧠 memory    {action}  {dur}")
    if tool_name == "skills_list":
        return _wrap(f"┊ 📚 skills    list {args.get('category', 'all')}  {dur}")
@@ -220,12 +220,25 @@ _TRANSPORT_ERROR_TYPES = frozenset({
    "ConnectionAbortedError", "BrokenPipeError",
    "TimeoutError", "ReadError",
    "ServerDisconnectedError",
+    # SSL/TLS transport errors — transient mid-stream handshake/record
+    # failures that should retry rather than surface as a stalled session.
+    # ssl.SSLError subclasses OSError (caught by isinstance) but we list
+    # the type names here so provider-wrapped SSL errors (e.g. when the
+    # SDK re-raises without preserving the exception chain) still classify
+    # as transport rather than falling through to the unknown bucket.
+    "SSLError", "SSLZeroReturnError", "SSLWantReadError",
+    "SSLWantWriteError", "SSLEOFError", "SSLSyscallError",
    # OpenAI SDK errors (not subclasses of Python builtins)
    "APIConnectionError",
    "APITimeoutError",
 })

-# Server disconnect patterns (no status code, but transport-level)
+# Server disconnect patterns (no status code, but transport-level).
+# These are the "ambiguous" patterns — a plain connection close could be
+# transient transport hiccup OR server-side context overflow rejection
+# (common when the API gateway disconnects instead of returning an HTTP
+# error for oversized requests).  A large session + one of these patterns
+# triggers the context-overflow-with-compression recovery path.
 _SERVER_DISCONNECT_PATTERNS = [
    "server disconnected",
    "peer closed connection",
@@ -236,6 +249,40 @@ _SERVER_DISCONNECT_PATTERNS = [
    "incomplete chunked read",
 ]

+# SSL/TLS transient failure patterns — intentionally distinct from
+# _SERVER_DISCONNECT_PATTERNS above.
+#
+# An SSL alert mid-stream is almost always a transport-layer hiccup
+# (flaky network, mid-session TLS renegotiation failure, load balancer
+# dropping the connection) — NOT a server-side context overflow signal.
+# So we want the retry path but NOT the compression path; lumping these
+# into _SERVER_DISCONNECT_PATTERNS would trigger unnecessary (and
+# expensive) context compression on any large-session SSL hiccup.
+#
+# The OpenSSL library constructs error codes by prepending a format string
+# to the uppercased alert reason; OpenSSL 3.x changed the separator
+# (e.g. `SSLV3_ALERT_BAD_RECORD_MAC` → `SSL/TLS_ALERT_BAD_RECORD_MAC`),
+# which silently stopped matching anything explicit.  Matching on the
+# stable substrings (`bad record mac`, `ssl alert`, `tls alert`, etc.)
+# survives future OpenSSL format churn without code changes.
+_SSL_TRANSIENT_PATTERNS = [
+    # Space-separated (human-readable form, Python ssl module, most SDKs)
+    "bad record mac",
+    "ssl alert",
+    "tls alert",
+    "ssl handshake failure",
+    "tlsv1 alert",
+    "sslv3 alert",
+    # Underscore-separated (OpenSSL error code tokens, e.g.
+    # `ERR_SSL_SSL/TLS_ALERT_BAD_RECORD_MAC`, `SSLV3_ALERT_BAD_RECORD_MAC`)
+    "bad_record_mac",
+    "ssl_alert",
+    "tls_alert",
+    "tls_alert_internal_error",
+    # Python ssl module prefix, e.g. "[SSL: BAD_RECORD_MAC]"
+    "[ssl:",
+]
+

 # ── Classification pipeline ─────────────────────────────────────────────

@@ -255,9 +302,10 @@ def classify_api_error(
      2. HTTP status code + message-aware refinement
      3. Error code classification (from body)
      4. Message pattern matching (billing vs rate_limit vs context vs auth)
-      5. Transport error heuristics
+      5. SSL/TLS transient alert patterns → retry as timeout
      6. Server disconnect + large session → context overflow
-      7. Fallback: unknown (retryable with backoff)
+      7. Transport error heuristics
+      8. Fallback: unknown (retryable with backoff)

    Args:
        error: The exception from the API call.
@@ -290,7 +338,7 @@ def classify_api_error(
    if isinstance(body, dict):
        _err_obj = body.get("error", {})
        if isinstance(_err_obj, dict):
-            _body_msg = (_err_obj.get("message") or "").lower()
+            _body_msg = str(_err_obj.get("message") or "").lower()
            # Parse metadata.raw for wrapped provider errors
            _metadata = _err_obj.get("metadata", {})
            if isinstance(_metadata, dict):
@@ -302,11 +350,11 @@ def classify_api_error(
                        if isinstance(_inner, dict):
                            _inner_err = _inner.get("error", {})
                            if isinstance(_inner_err, dict):
-                                _metadata_msg = (_inner_err.get("message") or "").lower()
+                                _metadata_msg = str(_inner_err.get("message") or "").lower()
                    except (json.JSONDecodeError, TypeError):
                        pass
        if not _body_msg:
-            _body_msg = (body.get("message") or "").lower()
+            _body_msg = str(body.get("message") or "").lower()
    # Combine all message sources for pattern matching
    parts = [_raw_msg]
    if _body_msg and _body_msg not in _raw_msg:
@@ -388,7 +436,18 @@ def classify_api_error(
    if classified is not None:
        return classified

-    # ── 5. Server disconnect + large session → context overflow ─────
+    # ── 5. SSL/TLS transient errors → retry as timeout (not compression) ──
+    # SSL alerts mid-stream are transport hiccups, not server-side context
+    # overflow signals.  Classify before the disconnect check so a large
+    # session doesn't incorrectly trigger context compression when the real
+    # cause is a flaky TLS handshake.  Also matches when the error is
+    # wrapped in a generic exception whose message string carries the SSL
+    # alert text but the type isn't ssl.SSLError (happens with some SDKs
+    # that re-raise without chaining).
+    if any(p in error_msg for p in _SSL_TRANSIENT_PATTERNS):
+        return _result(FailoverReason.timeout, retryable=True)
+
+    # ── 6. Server disconnect + large session → context overflow ─────
    # Must come BEFORE generic transport error catch — a disconnect on
    # a large session is more likely context overflow than a transient
    # transport hiccup.  Without this ordering, RemoteProtocolError
@@ -405,12 +464,12 @@ def classify_api_error(
            )
        return _result(FailoverReason.timeout, retryable=True)

-    # ── 6. Transport / timeout heuristics ───────────────────────────
+    # ── 7. Transport / timeout heuristics ───────────────────────────

    if error_type in _TRANSPORT_ERROR_TYPES or isinstance(error, (TimeoutError, ConnectionError, OSError)):
        return _result(FailoverReason.timeout, retryable=True)

-    # ── 7. Fallback: unknown ────────────────────────────────────────
+    # ── 8. Fallback: unknown ────────────────────────────────────────

    return _result(FailoverReason.unknown, retryable=True)

@@ -470,11 +529,16 @@ def _classify_by_status(
                retryable=False,
                should_fallback=True,
            )
-        # Generic 404 — could be model or endpoint
+        # Generic 404 with no "model not found" signal — could be a wrong
+        # endpoint path (common with local llama.cpp / Ollama / vLLM when
+        # the URL is slightly misconfigured), a proxy routing glitch, or
+        # a transient backend issue.  Classifying these as model_not_found
+        # silently falls back to a different provider and tells the model
+        # the model is missing, which is wrong and wastes a turn.  Treat
+        # as unknown so the retry loop surfaces the real error instead.
        return result_fn(
-            FailoverReason.model_not_found,
-            retryable=False,
-            should_fallback=True,
+            FailoverReason.unknown,
+            retryable=True,
        )

    if status_code == 413:
@@ -606,10 +670,10 @@ def _classify_400(
    if isinstance(body, dict):
        err_obj = body.get("error", {})
        if isinstance(err_obj, dict):
-            err_body_msg = (err_obj.get("message") or "").strip().lower()
+            err_body_msg = str(err_obj.get("message") or "").strip().lower()
        # Responses API (and some providers) use flat body: {"message": "..."}
        if not err_body_msg:
-            err_body_msg = (body.get("message") or "").strip().lower()
+            err_body_msg = str(body.get("message") or "").strip().lower()
    is_generic = len(err_body_msg) < 30 or err_body_msg in ("error", "")
    is_large = approx_tokens > context_length * 0.4 or approx_tokens > 80000 or num_messages > 80

@@ -0,0 +1,111 @@
+"""Shared file safety rules used by both tools and ACP shims."""
+
+from __future__ import annotations
+
+import os
+from pathlib import Path
+from typing import Optional
+
+
+def _hermes_home_path() -> Path:
+    """Resolve the active HERMES_HOME (profile-aware) without circular imports."""
+    try:
+        from hermes_constants import get_hermes_home  # local import to avoid cycles
+        return get_hermes_home()
+    except Exception:
+        return Path(os.path.expanduser("~/.hermes"))
+
+
+def build_write_denied_paths(home: str) -> set[str]:
+    """Return exact sensitive paths that must never be written."""
+    hermes_home = _hermes_home_path()
+    return {
+        os.path.realpath(p)
+        for p in [
+            os.path.join(home, ".ssh", "authorized_keys"),
+            os.path.join(home, ".ssh", "id_rsa"),
+            os.path.join(home, ".ssh", "id_ed25519"),
+            os.path.join(home, ".ssh", "config"),
+            str(hermes_home / ".env"),
+            os.path.join(home, ".bashrc"),
+            os.path.join(home, ".zshrc"),
+            os.path.join(home, ".profile"),
+            os.path.join(home, ".bash_profile"),
+            os.path.join(home, ".zprofile"),
+            os.path.join(home, ".netrc"),
+            os.path.join(home, ".pgpass"),
+            os.path.join(home, ".npmrc"),
+            os.path.join(home, ".pypirc"),
+            "/etc/sudoers",
+            "/etc/passwd",
+            "/etc/shadow",
+        ]
+    }
+
+
+def build_write_denied_prefixes(home: str) -> list[str]:
+    """Return sensitive directory prefixes that must never be written."""
+    return [
+        os.path.realpath(p) + os.sep
+        for p in [
+            os.path.join(home, ".ssh"),
+            os.path.join(home, ".aws"),
+            os.path.join(home, ".gnupg"),
+            os.path.join(home, ".kube"),
+            "/etc/sudoers.d",
+            "/etc/systemd",
+            os.path.join(home, ".docker"),
+            os.path.join(home, ".azure"),
+            os.path.join(home, ".config", "gh"),
+        ]
+    ]
+
+
+def get_safe_write_root() -> Optional[str]:
+    """Return the resolved HERMES_WRITE_SAFE_ROOT path, or None if unset."""
+    root = os.getenv("HERMES_WRITE_SAFE_ROOT", "")
+    if not root:
+        return None
+    try:
+        return os.path.realpath(os.path.expanduser(root))
+    except Exception:
+        return None
+
+
+def is_write_denied(path: str) -> bool:
+    """Return True if path is blocked by the write denylist or safe root."""
+    home = os.path.realpath(os.path.expanduser("~"))
+    resolved = os.path.realpath(os.path.expanduser(str(path)))
+
+    if resolved in build_write_denied_paths(home):
+        return True
+    for prefix in build_write_denied_prefixes(home):
+        if resolved.startswith(prefix):
+            return True
+
+    safe_root = get_safe_write_root()
+    if safe_root and not (resolved == safe_root or resolved.startswith(safe_root + os.sep)):
+        return True
+
+    return False
+
+
+def get_read_block_error(path: str) -> Optional[str]:
+    """Return an error message when a read targets internal Hermes cache files."""
+    resolved = Path(path).expanduser().resolve()
+    hermes_home = _hermes_home_path().resolve()
+    blocked_dirs = [
+        hermes_home / "skills" / ".hub" / "index-cache",
+        hermes_home / "skills" / ".hub",
+    ]
+    for blocked in blocked_dirs:
+        try:
+            resolved.relative_to(blocked)
+        except ValueError:
+            continue
+        return (
+            f"Access denied: {path} is an internal Hermes cache file "
+            "and cannot be read directly to prevent prompt injection. "
+            "Use the skills_list or skill_view tools instead."
+        )
+    return None
@@ -39,6 +39,7 @@ from typing import Any, Dict, Iterator, List, Optional
 import httpx

 from agent import google_oauth
+from agent.gemini_schema import sanitize_gemini_tool_parameters
 from agent.google_code_assist import (
    CODE_ASSIST_ENDPOINT,
    FREE_TIER_ID,
@@ -205,7 +206,7 @@ def _translate_tools_to_gemini(tools: Any) -> List[Dict[str, Any]]:
            decl["description"] = str(fn["description"])
        params = fn.get("parameters")
        if isinstance(params, dict):
-            decl["parameters"] = params
+            decl["parameters"] = sanitize_gemini_tool_parameters(params)
        declarations.append(decl)
    if not declarations:
        return []
@@ -504,9 +505,16 @@ def _iter_sse_events(response: httpx.Response) -> Iterator[Dict[str, Any]]:
 def _translate_stream_event(
    event: Dict[str, Any],
    model: str,
-    tool_call_indices: Dict[str, int],
+    tool_call_counter: List[int],
 ) -> List[_GeminiStreamChunk]:
-    """Unwrap Code Assist envelope and emit OpenAI-shaped chunk(s)."""
+    """Unwrap Code Assist envelope and emit OpenAI-shaped chunk(s).
+
+    ``tool_call_counter`` is a single-element list used as a mutable counter
+    across events in the same stream. Each ``functionCall`` part gets a
+    fresh, unique OpenAI ``index`` — keying by function name would collide
+    whenever the model issues parallel calls to the same tool (e.g. reading
+    three files in one turn).
+    """
    inner = event.get("response") if isinstance(event.get("response"), dict) else event
    candidates = inner.get("candidates") or []
    if not candidates:
@@ -532,7 +540,8 @@ def _translate_stream_event(
        fc = part.get("functionCall")
        if isinstance(fc, dict) and fc.get("name"):
            name = str(fc["name"])
-            idx = tool_call_indices.setdefault(name, len(tool_call_indices))
+            idx = tool_call_counter[0]
+            tool_call_counter[0] += 1
            try:
                args_str = json.dumps(fc.get("args") or {}, ensure_ascii=False)
            except (TypeError, ValueError):
@@ -549,7 +558,7 @@ def _translate_stream_event(
    finish_reason_raw = str(cand.get("finishReason") or "")
    if finish_reason_raw:
        mapped = _map_gemini_finish_reason(finish_reason_raw)
-        if tool_call_indices:
+        if tool_call_counter[0] > 0:
            mapped = "tool_calls"
        chunks.append(_make_stream_chunk(model=model, finish_reason=mapped))
    return chunks
@@ -733,9 +742,9 @@ class GeminiCloudCodeClient:
                        # Materialize error body for better diagnostics
                        response.read()
                        raise _gemini_http_error(response)
-                    tool_call_indices: Dict[str, int] = {}
+                    tool_call_counter: List[int] = [0]
                    for event in _iter_sse_events(response):
-                        for chunk in _translate_stream_event(event, model, tool_call_indices):
+                        for chunk in _translate_stream_event(event, model, tool_call_counter):
                            yield chunk
            except httpx.HTTPError as exc:
                raise CodeAssistError(
@@ -790,7 +799,8 @@ def _gemini_http_error(response: httpx.Response) -> CodeAssistError:
        err_obj = {}
    err_status = str(err_obj.get("status") or "").strip()
    err_message = str(err_obj.get("message") or "").strip()
-    err_details_list = err_obj.get("details") if isinstance(err_obj.get("details"), list) else []
+    _raw_details = err_obj.get("details")
+    err_details_list = _raw_details if isinstance(_raw_details, list) else []

    # Extract google.rpc.ErrorInfo reason + metadata.  There may be more
    # than one ErrorInfo (rare), so we pick the first one with a reason.
@@ -0,0 +1,847 @@
+"""OpenAI-compatible facade over Google AI Studio's native Gemini API.
+
+Hermes keeps ``api_mode='chat_completions'`` for the ``gemini`` provider so the
+main agent loop can keep using its existing OpenAI-shaped message flow.
+This adapter is the transport shim that converts those OpenAI-style
+``messages[]`` / ``tools[]`` requests into Gemini's native
+``models/{model}:generateContent`` schema and converts the responses back.
+
+Why this exists
+---------------
+Google's OpenAI-compatible endpoint has been brittle for Hermes's multi-turn
+agent/tool loop (auth churn, tool-call replay quirks, thought-signature
+requirements).  The native Gemini API is the canonical path and avoids the
+OpenAI-compat layer entirely.
+"""
+
+from __future__ import annotations
+
+import asyncio
+import base64
+import json
+import logging
+import time
+import uuid
+from types import SimpleNamespace
+from typing import Any, Dict, Iterator, List, Optional
+
+import httpx
+
+from agent.gemini_schema import sanitize_gemini_tool_parameters
+
+logger = logging.getLogger(__name__)
+
+DEFAULT_GEMINI_BASE_URL = "https://generativelanguage.googleapis.com/v1beta"
+
+
+def is_native_gemini_base_url(base_url: str) -> bool:
+    """Return True when the endpoint speaks Gemini's native REST API."""
+    normalized = str(base_url or "").strip().rstrip("/").lower()
+    if not normalized:
+        return False
+    if "generativelanguage.googleapis.com" not in normalized:
+        return False
+    return not normalized.endswith("/openai")
+
+
+class GeminiAPIError(Exception):
+    """Error shape compatible with Hermes retry/error classification."""
+
+    def __init__(
+        self,
+        message: str,
+        *,
+        code: str = "gemini_api_error",
+        status_code: Optional[int] = None,
+        response: Optional[httpx.Response] = None,
+        retry_after: Optional[float] = None,
+        details: Optional[Dict[str, Any]] = None,
+    ) -> None:
+        super().__init__(message)
+        self.code = code
+        self.status_code = status_code
+        self.response = response
+        self.retry_after = retry_after
+        self.details = details or {}
+
+
+def _coerce_content_to_text(content: Any) -> str:
+    if content is None:
+        return ""
+    if isinstance(content, str):
+        return content
+    if isinstance(content, list):
+        pieces: List[str] = []
+        for part in content:
+            if isinstance(part, str):
+                pieces.append(part)
+            elif isinstance(part, dict) and part.get("type") == "text":
+                text = part.get("text")
+                if isinstance(text, str):
+                    pieces.append(text)
+        return "\n".join(pieces)
+    return str(content)
+
+
+def _extract_multimodal_parts(content: Any) -> List[Dict[str, Any]]:
+    if not isinstance(content, list):
+        text = _coerce_content_to_text(content)
+        return [{"text": text}] if text else []
+
+    parts: List[Dict[str, Any]] = []
+    for item in content:
+        if isinstance(item, str):
+            parts.append({"text": item})
+            continue
+        if not isinstance(item, dict):
+            continue
+        ptype = item.get("type")
+        if ptype == "text":
+            text = item.get("text")
+            if isinstance(text, str) and text:
+                parts.append({"text": text})
+        elif ptype == "image_url":
+            url = ((item.get("image_url") or {}).get("url") or "")
+            if not isinstance(url, str) or not url.startswith("data:"):
+                continue
+            try:
+                header, encoded = url.split(",", 1)
+                mime = header.split(":", 1)[1].split(";", 1)[0]
+                raw = base64.b64decode(encoded)
+            except Exception:
+                continue
+            parts.append(
+                {
+                    "inlineData": {
+                        "mimeType": mime,
+                        "data": base64.b64encode(raw).decode("ascii"),
+                    }
+                }
+            )
+    return parts
+
+
+def _tool_call_extra_signature(tool_call: Dict[str, Any]) -> Optional[str]:
+    extra = tool_call.get("extra_content") or {}
+    if not isinstance(extra, dict):
+        return None
+    google = extra.get("google") or extra.get("thought_signature")
+    if isinstance(google, dict):
+        sig = google.get("thought_signature") or google.get("thoughtSignature")
+        return str(sig) if isinstance(sig, str) and sig else None
+    if isinstance(google, str) and google:
+        return google
+    return None
+
+
+def _translate_tool_call_to_gemini(tool_call: Dict[str, Any]) -> Dict[str, Any]:
+    fn = tool_call.get("function") or {}
+    args_raw = fn.get("arguments", "")
+    try:
+        args = json.loads(args_raw) if isinstance(args_raw, str) and args_raw else {}
+    except json.JSONDecodeError:
+        args = {"_raw": args_raw}
+    if not isinstance(args, dict):
+        args = {"_value": args}
+
+    part: Dict[str, Any] = {
+        "functionCall": {
+            "name": str(fn.get("name") or ""),
+            "args": args,
+        }
+    }
+    thought_signature = _tool_call_extra_signature(tool_call)
+    if thought_signature:
+        part["thoughtSignature"] = thought_signature
+    return part
+
+
+def _translate_tool_result_to_gemini(
+    message: Dict[str, Any],
+    tool_name_by_call_id: Optional[Dict[str, str]] = None,
+) -> Dict[str, Any]:
+    tool_name_by_call_id = tool_name_by_call_id or {}
+    tool_call_id = str(message.get("tool_call_id") or "")
+    name = str(
+        message.get("name")
+        or tool_name_by_call_id.get(tool_call_id)
+        or tool_call_id
+        or "tool"
+    )
+    content = _coerce_content_to_text(message.get("content"))
+    try:
+        parsed = json.loads(content) if content.strip().startswith(("{", "[")) else None
+    except json.JSONDecodeError:
+        parsed = None
+    response = parsed if isinstance(parsed, dict) else {"output": content}
+    return {
+        "functionResponse": {
+            "name": name,
+            "response": response,
+        }
+    }
+
+
+def _build_gemini_contents(messages: List[Dict[str, Any]]) -> tuple[List[Dict[str, Any]], Optional[Dict[str, Any]]]:
+    system_text_parts: List[str] = []
+    contents: List[Dict[str, Any]] = []
+    tool_name_by_call_id: Dict[str, str] = {}
+
+    for msg in messages:
+        if not isinstance(msg, dict):
+            continue
+        role = str(msg.get("role") or "user")
+
+        if role == "system":
+            system_text_parts.append(_coerce_content_to_text(msg.get("content")))
+            continue
+
+        if role in {"tool", "function"}:
+            contents.append(
+                {
+                    "role": "user",
+                    "parts": [
+                        _translate_tool_result_to_gemini(
+                            msg,
+                            tool_name_by_call_id=tool_name_by_call_id,
+                        )
+                    ],
+                }
+            )
+            continue
+
+        gemini_role = "model" if role == "assistant" else "user"
+        parts: List[Dict[str, Any]] = []
+
+        content_parts = _extract_multimodal_parts(msg.get("content"))
+        parts.extend(content_parts)
+
+        tool_calls = msg.get("tool_calls") or []
+        if isinstance(tool_calls, list):
+            for tool_call in tool_calls:
+                if isinstance(tool_call, dict):
+                    tool_call_id = str(tool_call.get("id") or tool_call.get("call_id") or "")
+                    tool_name = str(((tool_call.get("function") or {}).get("name") or ""))
+                    if tool_call_id and tool_name:
+                        tool_name_by_call_id[tool_call_id] = tool_name
+                    parts.append(_translate_tool_call_to_gemini(tool_call))
+
+        if parts:
+            contents.append({"role": gemini_role, "parts": parts})
+
+    system_instruction = None
+    joined_system = "\n".join(part for part in system_text_parts if part).strip()
+    if joined_system:
+        system_instruction = {"parts": [{"text": joined_system}]}
+    return contents, system_instruction
+
+
+def _translate_tools_to_gemini(tools: Any) -> List[Dict[str, Any]]:
+    if not isinstance(tools, list):
+        return []
+    declarations: List[Dict[str, Any]] = []
+    for tool in tools:
+        if not isinstance(tool, dict):
+            continue
+        fn = tool.get("function") or {}
+        if not isinstance(fn, dict):
+            continue
+        name = fn.get("name")
+        if not isinstance(name, str) or not name:
+            continue
+        decl: Dict[str, Any] = {"name": name}
+        description = fn.get("description")
+        if isinstance(description, str) and description:
+            decl["description"] = description
+        parameters = fn.get("parameters")
+        if isinstance(parameters, dict):
+            decl["parameters"] = sanitize_gemini_tool_parameters(parameters)
+        declarations.append(decl)
+    return [{"functionDeclarations": declarations}] if declarations else []
+
+
+def _translate_tool_choice_to_gemini(tool_choice: Any) -> Optional[Dict[str, Any]]:
+    if tool_choice is None:
+        return None
+    if isinstance(tool_choice, str):
+        if tool_choice == "auto":
+            return {"functionCallingConfig": {"mode": "AUTO"}}
+        if tool_choice == "required":
+            return {"functionCallingConfig": {"mode": "ANY"}}
+        if tool_choice == "none":
+            return {"functionCallingConfig": {"mode": "NONE"}}
+    if isinstance(tool_choice, dict):
+        fn = tool_choice.get("function") or {}
+        name = fn.get("name")
+        if isinstance(name, str) and name:
+            return {"functionCallingConfig": {"mode": "ANY", "allowedFunctionNames": [name]}}
+    return None
+
+
+def _normalize_thinking_config(config: Any) -> Optional[Dict[str, Any]]:
+    if not isinstance(config, dict) or not config:
+        return None
+    budget = config.get("thinkingBudget", config.get("thinking_budget"))
+    include = config.get("includeThoughts", config.get("include_thoughts"))
+    level = config.get("thinkingLevel", config.get("thinking_level"))
+    normalized: Dict[str, Any] = {}
+    if isinstance(budget, (int, float)):
+        normalized["thinkingBudget"] = int(budget)
+    if isinstance(include, bool):
+        normalized["includeThoughts"] = include
+    if isinstance(level, str) and level.strip():
+        normalized["thinkingLevel"] = level.strip().lower()
+    return normalized or None
+
+
+def build_gemini_request(
+    *,
+    messages: List[Dict[str, Any]],
+    tools: Any = None,
+    tool_choice: Any = None,
+    temperature: Optional[float] = None,
+    max_tokens: Optional[int] = None,
+    top_p: Optional[float] = None,
+    stop: Any = None,
+    thinking_config: Any = None,
+) -> Dict[str, Any]:
+    contents, system_instruction = _build_gemini_contents(messages)
+    request: Dict[str, Any] = {"contents": contents}
+    if system_instruction:
+        request["systemInstruction"] = system_instruction
+
+    gemini_tools = _translate_tools_to_gemini(tools)
+    if gemini_tools:
+        request["tools"] = gemini_tools
+
+    tool_config = _translate_tool_choice_to_gemini(tool_choice)
+    if tool_config:
+        request["toolConfig"] = tool_config
+
+    generation_config: Dict[str, Any] = {}
+    if temperature is not None:
+        generation_config["temperature"] = temperature
+    if max_tokens is not None:
+        generation_config["maxOutputTokens"] = max_tokens
+    if top_p is not None:
+        generation_config["topP"] = top_p
+    if stop:
+        generation_config["stopSequences"] = stop if isinstance(stop, list) else [str(stop)]
+    normalized_thinking = _normalize_thinking_config(thinking_config)
+    if normalized_thinking:
+        generation_config["thinkingConfig"] = normalized_thinking
+    if generation_config:
+        request["generationConfig"] = generation_config
+
+    return request
+
+
+def _map_gemini_finish_reason(reason: str) -> str:
+    mapping = {
+        "STOP": "stop",
+        "MAX_TOKENS": "length",
+        "SAFETY": "content_filter",
+        "RECITATION": "content_filter",
+        "OTHER": "stop",
+    }
+    return mapping.get(str(reason or "").upper(), "stop")
+
+
+def _tool_call_extra_from_part(part: Dict[str, Any]) -> Optional[Dict[str, Any]]:
+    sig = part.get("thoughtSignature")
+    if isinstance(sig, str) and sig:
+        return {"google": {"thought_signature": sig}}
+    return None
+
+
+def _empty_response(model: str) -> SimpleNamespace:
+    message = SimpleNamespace(
+        role="assistant",
+        content="",
+        tool_calls=None,
+        reasoning=None,
+        reasoning_content=None,
+        reasoning_details=None,
+    )
+    choice = SimpleNamespace(index=0, message=message, finish_reason="stop")
+    usage = SimpleNamespace(
+        prompt_tokens=0,
+        completion_tokens=0,
+        total_tokens=0,
+        prompt_tokens_details=SimpleNamespace(cached_tokens=0),
+    )
+    return SimpleNamespace(
+        id=f"chatcmpl-{uuid.uuid4().hex[:12]}",
+        object="chat.completion",
+        created=int(time.time()),
+        model=model,
+        choices=[choice],
+        usage=usage,
+    )
+
+
+def translate_gemini_response(resp: Dict[str, Any], model: str) -> SimpleNamespace:
+    candidates = resp.get("candidates") or []
+    if not isinstance(candidates, list) or not candidates:
+        return _empty_response(model)
+
+    cand = candidates[0] if isinstance(candidates[0], dict) else {}
+    content_obj = cand.get("content") if isinstance(cand, dict) else {}
+    parts = content_obj.get("parts") if isinstance(content_obj, dict) else []
+
+    text_pieces: List[str] = []
+    reasoning_pieces: List[str] = []
+    tool_calls: List[SimpleNamespace] = []
+
+    for index, part in enumerate(parts or []):
+        if not isinstance(part, dict):
+            continue
+        if part.get("thought") is True and isinstance(part.get("text"), str):
+            reasoning_pieces.append(part["text"])
+            continue
+        if isinstance(part.get("text"), str):
+            text_pieces.append(part["text"])
+            continue
+        fc = part.get("functionCall")
+        if isinstance(fc, dict) and fc.get("name"):
+            try:
+                args_str = json.dumps(fc.get("args") or {}, ensure_ascii=False)
+            except (TypeError, ValueError):
+                args_str = "{}"
+            tool_call = SimpleNamespace(
+                id=f"call_{uuid.uuid4().hex[:12]}",
+                type="function",
+                index=index,
+                function=SimpleNamespace(name=str(fc["name"]), arguments=args_str),
+            )
+            extra_content = _tool_call_extra_from_part(part)
+            if extra_content:
+                tool_call.extra_content = extra_content
+            tool_calls.append(tool_call)
+
+    finish_reason = "tool_calls" if tool_calls else _map_gemini_finish_reason(str(cand.get("finishReason") or ""))
+    usage_meta = resp.get("usageMetadata") or {}
+    usage = SimpleNamespace(
+        prompt_tokens=int(usage_meta.get("promptTokenCount") or 0),
+        completion_tokens=int(usage_meta.get("candidatesTokenCount") or 0),
+        total_tokens=int(usage_meta.get("totalTokenCount") or 0),
+        prompt_tokens_details=SimpleNamespace(
+            cached_tokens=int(usage_meta.get("cachedContentTokenCount") or 0),
+        ),
+    )
+    reasoning = "".join(reasoning_pieces) or None
+    message = SimpleNamespace(
+        role="assistant",
+        content="".join(text_pieces) if text_pieces else None,
+        tool_calls=tool_calls or None,
+        reasoning=reasoning,
+        reasoning_content=reasoning,
+        reasoning_details=None,
+    )
+    choice = SimpleNamespace(index=0, message=message, finish_reason=finish_reason)
+    return SimpleNamespace(
+        id=f"chatcmpl-{uuid.uuid4().hex[:12]}",
+        object="chat.completion",
+        created=int(time.time()),
+        model=model,
+        choices=[choice],
+        usage=usage,
+    )
+
+
+class _GeminiStreamChunk(SimpleNamespace):
+    pass
+
+
+def _make_stream_chunk(
+    *,
+    model: str,
+    content: str = "",
+    tool_call_delta: Optional[Dict[str, Any]] = None,
+    finish_reason: Optional[str] = None,
+    reasoning: str = "",
+) -> _GeminiStreamChunk:
+    delta_kwargs: Dict[str, Any] = {
+        "role": "assistant",
+        "content": None,
+        "tool_calls": None,
+        "reasoning": None,
+        "reasoning_content": None,
+    }
+    if content:
+        delta_kwargs["content"] = content
+    if tool_call_delta is not None:
+        tool_delta = SimpleNamespace(
+            index=tool_call_delta.get("index", 0),
+            id=tool_call_delta.get("id") or f"call_{uuid.uuid4().hex[:12]}",
+            type="function",
+            function=SimpleNamespace(
+                name=tool_call_delta.get("name") or "",
+                arguments=tool_call_delta.get("arguments") or "",
+            ),
+        )
+        extra_content = tool_call_delta.get("extra_content")
+        if isinstance(extra_content, dict):
+            tool_delta.extra_content = extra_content
+        delta_kwargs["tool_calls"] = [tool_delta]
+    if reasoning:
+        delta_kwargs["reasoning"] = reasoning
+        delta_kwargs["reasoning_content"] = reasoning
+    delta = SimpleNamespace(**delta_kwargs)
+    choice = SimpleNamespace(index=0, delta=delta, finish_reason=finish_reason)
+    return _GeminiStreamChunk(
+        id=f"chatcmpl-{uuid.uuid4().hex[:12]}",
+        object="chat.completion.chunk",
+        created=int(time.time()),
+        model=model,
+        choices=[choice],
+        usage=None,
+    )
+
+
+def _iter_sse_events(response: httpx.Response) -> Iterator[Dict[str, Any]]:
+    buffer = ""
+    for chunk in response.iter_text():
+        if not chunk:
+            continue
+        buffer += chunk
+        while "\n" in buffer:
+            line, buffer = buffer.split("\n", 1)
+            line = line.rstrip("\r")
+            if not line:
+                continue
+            if not line.startswith("data: "):
+                continue
+            data = line[6:]
+            if data == "[DONE]":
+                return
+            try:
+                payload = json.loads(data)
+            except json.JSONDecodeError:
+                logger.debug("Non-JSON Gemini SSE line: %s", data[:200])
+                continue
+            if isinstance(payload, dict):
+                yield payload
+
+
+def translate_stream_event(event: Dict[str, Any], model: str, tool_call_indices: Dict[str, Dict[str, Any]]) -> List[_GeminiStreamChunk]:
+    candidates = event.get("candidates") or []
+    if not candidates:
+        return []
+    cand = candidates[0] if isinstance(candidates[0], dict) else {}
+    parts = ((cand.get("content") or {}).get("parts") or []) if isinstance(cand, dict) else []
+    chunks: List[_GeminiStreamChunk] = []
+
+    for part_index, part in enumerate(parts):
+        if not isinstance(part, dict):
+            continue
+        if part.get("thought") is True and isinstance(part.get("text"), str):
+            chunks.append(_make_stream_chunk(model=model, reasoning=part["text"]))
+            continue
+        if isinstance(part.get("text"), str) and part["text"]:
+            chunks.append(_make_stream_chunk(model=model, content=part["text"]))
+        fc = part.get("functionCall")
+        if isinstance(fc, dict) and fc.get("name"):
+            name = str(fc["name"])
+            try:
+                args_str = json.dumps(fc.get("args") or {}, ensure_ascii=False, sort_keys=True)
+            except (TypeError, ValueError):
+                args_str = "{}"
+            thought_signature = part.get("thoughtSignature") if isinstance(part.get("thoughtSignature"), str) else ""
+            call_key = json.dumps(
+                {
+                    "part_index": part_index,
+                    "name": name,
+                    "thought_signature": thought_signature,
+                },
+                sort_keys=True,
+            )
+            slot = tool_call_indices.get(call_key)
+            if slot is None:
+                slot = {
+                    "index": len(tool_call_indices),
+                    "id": f"call_{uuid.uuid4().hex[:12]}",
+                    "last_arguments": "",
+                }
+                tool_call_indices[call_key] = slot
+            emitted_arguments = args_str
+            last_arguments = str(slot.get("last_arguments") or "")
+            if last_arguments:
+                if args_str == last_arguments:
+                    emitted_arguments = ""
+                elif args_str.startswith(last_arguments):
+                    emitted_arguments = args_str[len(last_arguments):]
+            slot["last_arguments"] = args_str
+            chunks.append(
+                _make_stream_chunk(
+                    model=model,
+                    tool_call_delta={
+                        "index": slot["index"],
+                        "id": slot["id"],
+                        "name": name,
+                        "arguments": emitted_arguments,
+                        "extra_content": _tool_call_extra_from_part(part),
+                    },
+                )
+            )
+
+    finish_reason_raw = str(cand.get("finishReason") or "")
+    if finish_reason_raw:
+        mapped = "tool_calls" if tool_call_indices else _map_gemini_finish_reason(finish_reason_raw)
+        chunks.append(_make_stream_chunk(model=model, finish_reason=mapped))
+    return chunks
+
+
+def gemini_http_error(response: httpx.Response) -> GeminiAPIError:
+    status = response.status_code
+    body_text = ""
+    body_json: Dict[str, Any] = {}
+    try:
+        body_text = response.text
+    except Exception:
+        body_text = ""
+    if body_text:
+        try:
+            parsed = json.loads(body_text)
+            if isinstance(parsed, dict):
+                body_json = parsed
+        except (ValueError, TypeError):
+            body_json = {}
+
+    err_obj = body_json.get("error") if isinstance(body_json, dict) else None
+    if not isinstance(err_obj, dict):
+        err_obj = {}
+    err_status = str(err_obj.get("status") or "").strip()
+    err_message = str(err_obj.get("message") or "").strip()
+    _raw_details = err_obj.get("details")
+    details_list = _raw_details if isinstance(_raw_details, list) else []
+
+    reason = ""
+    retry_after: Optional[float] = None
+    metadata: Dict[str, Any] = {}
+    for detail in details_list:
+        if not isinstance(detail, dict):
+            continue
+        type_url = str(detail.get("@type") or "")
+        if not reason and type_url.endswith("/google.rpc.ErrorInfo"):
+            reason_value = detail.get("reason")
+            if isinstance(reason_value, str):
+                reason = reason_value
+            md = detail.get("metadata")
+            if isinstance(md, dict):
+                metadata = md
+    header_retry = response.headers.get("Retry-After") or response.headers.get("retry-after")
+    if header_retry:
+        try:
+            retry_after = float(header_retry)
+        except (TypeError, ValueError):
+            retry_after = None
+
+    code = f"gemini_http_{status}"
+    if status == 401:
+        code = "gemini_unauthorized"
+    elif status == 429:
+        code = "gemini_rate_limited"
+    elif status == 404:
+        code = "gemini_model_not_found"
+
+    if err_message:
+        message = f"Gemini HTTP {status} ({err_status or 'error'}): {err_message}"
+    else:
+        message = f"Gemini returned HTTP {status}: {body_text[:500]}"
+
+    return GeminiAPIError(
+        message,
+        code=code,
+        status_code=status,
+        response=response,
+        retry_after=retry_after,
+        details={
+            "status": err_status,
+            "reason": reason,
+            "metadata": metadata,
+            "message": err_message,
+        },
+    )
+
+
+class _GeminiChatCompletions:
+    def __init__(self, client: "GeminiNativeClient"):
+        self._client = client
+
+    def create(self, **kwargs: Any) -> Any:
+        return self._client._create_chat_completion(**kwargs)
+
+
+class _AsyncGeminiChatCompletions:
+    def __init__(self, client: "AsyncGeminiNativeClient"):
+        self._client = client
+
+    async def create(self, **kwargs: Any) -> Any:
+        return await self._client._create_chat_completion(**kwargs)
+
+
+class _GeminiChatNamespace:
+    def __init__(self, client: "GeminiNativeClient"):
+        self.completions = _GeminiChatCompletions(client)
+
+
+class _AsyncGeminiChatNamespace:
+    def __init__(self, client: "AsyncGeminiNativeClient"):
+        self.completions = _AsyncGeminiChatCompletions(client)
+
+
+class GeminiNativeClient:
+    """Minimal OpenAI-SDK-compatible facade over Gemini's native REST API."""
+
+    def __init__(
+        self,
+        *,
+        api_key: str,
+        base_url: Optional[str] = None,
+        default_headers: Optional[Dict[str, str]] = None,
+        timeout: Any = None,
+        http_client: Optional[httpx.Client] = None,
+        **_: Any,
+    ) -> None:
+        self.api_key = api_key
+        normalized_base = (base_url or DEFAULT_GEMINI_BASE_URL).rstrip("/")
+        if normalized_base.endswith("/openai"):
+            normalized_base = normalized_base[: -len("/openai")]
+        self.base_url = normalized_base
+        self._default_headers = dict(default_headers or {})
+        self.chat = _GeminiChatNamespace(self)
+        self.is_closed = False
+        self._http = http_client or httpx.Client(
+            timeout=timeout or httpx.Timeout(connect=15.0, read=600.0, write=30.0, pool=30.0)
+        )
+
+    def close(self) -> None:
+        self.is_closed = True
+        try:
+            self._http.close()
+        except Exception:
+            pass
+
+    def __enter__(self):
+        return self
+
+    def __exit__(self, exc_type, exc_val, exc_tb):
+        self.close()
+
+    def _headers(self) -> Dict[str, str]:
+        headers = {
+            "Content-Type": "application/json",
+            "Accept": "application/json",
+            "x-goog-api-key": self.api_key,
+            "User-Agent": "hermes-agent (gemini-native)",
+        }
+        headers.update(self._default_headers)
+        return headers
+
+    @staticmethod
+    def _advance_stream_iterator(iterator: Iterator[_GeminiStreamChunk]) -> tuple[bool, Optional[_GeminiStreamChunk]]:
+        try:
+            return False, next(iterator)
+        except StopIteration:
+            return True, None
+
+    def _create_chat_completion(
+        self,
+        *,
+        model: str = "gemini-2.5-flash",
+        messages: Optional[List[Dict[str, Any]]] = None,
+        stream: bool = False,
+        tools: Any = None,
+        tool_choice: Any = None,
+        temperature: Optional[float] = None,
+        max_tokens: Optional[int] = None,
+        top_p: Optional[float] = None,
+        stop: Any = None,
+        extra_body: Optional[Dict[str, Any]] = None,
+        timeout: Any = None,
+        **_: Any,
+    ) -> Any:
+        thinking_config = None
+        if isinstance(extra_body, dict):
+            thinking_config = extra_body.get("thinking_config") or extra_body.get("thinkingConfig")
+
+        request = build_gemini_request(
+            messages=messages or [],
+            tools=tools,
+            tool_choice=tool_choice,
+            temperature=temperature,
+            max_tokens=max_tokens,
+            top_p=top_p,
+            stop=stop,
+            thinking_config=thinking_config,
+        )
+
+        if stream:
+            return self._stream_completion(model=model, request=request, timeout=timeout)
+
+        url = f"{self.base_url}/models/{model}:generateContent"
+        response = self._http.post(url, json=request, headers=self._headers(), timeout=timeout)
+        if response.status_code != 200:
+            raise gemini_http_error(response)
+        try:
+            payload = response.json()
+        except ValueError as exc:
+            raise GeminiAPIError(
+                f"Invalid JSON from Gemini native API: {exc}",
+                code="gemini_invalid_json",
+                status_code=response.status_code,
+                response=response,
+            ) from exc
+        return translate_gemini_response(payload, model=model)
+
+    def _stream_completion(self, *, model: str, request: Dict[str, Any], timeout: Any = None) -> Iterator[_GeminiStreamChunk]:
+        url = f"{self.base_url}/models/{model}:streamGenerateContent?alt=sse"
+        stream_headers = dict(self._headers())
+        stream_headers["Accept"] = "text/event-stream"
+
+        def _generator() -> Iterator[_GeminiStreamChunk]:
+            try:
+                with self._http.stream("POST", url, json=request, headers=stream_headers, timeout=timeout) as response:
+                    if response.status_code != 200:
+                        response.read()
+                        raise gemini_http_error(response)
+                    tool_call_indices: Dict[str, Dict[str, Any]] = {}
+                    for event in _iter_sse_events(response):
+                        for chunk in translate_stream_event(event, model, tool_call_indices):
+                            yield chunk
+            except httpx.HTTPError as exc:
+                raise GeminiAPIError(
+                    f"Gemini streaming request failed: {exc}",
+                    code="gemini_stream_error",
+                ) from exc
+
+        return _generator()
+
+
+class AsyncGeminiNativeClient:
+    """Async wrapper used by auxiliary_client for native Gemini calls."""
+
+    def __init__(self, sync_client: GeminiNativeClient):
+        self._sync = sync_client
+        self.api_key = sync_client.api_key
+        self.base_url = sync_client.base_url
+        self.chat = _AsyncGeminiChatNamespace(self)
+
+    async def _create_chat_completion(self, **kwargs: Any) -> Any:
+        stream = bool(kwargs.get("stream"))
+        result = await asyncio.to_thread(self._sync.chat.completions.create, **kwargs)
+        if not stream:
+            return result
+
+        async def _async_stream() -> Any:
+            while True:
+                done, chunk = await asyncio.to_thread(self._sync._advance_stream_iterator, result)
+                if done:
+                    break
+                yield chunk
+
+        return _async_stream()
+
+    async def close(self) -> None:
+        await asyncio.to_thread(self._sync.close)
@@ -0,0 +1,85 @@
+"""Helpers for translating OpenAI-style tool schemas to Gemini's schema subset."""
+
+from __future__ import annotations
+
+from typing import Any, Dict, List
+
+# Gemini's ``FunctionDeclaration.parameters`` field accepts the ``Schema``
+# object, which is only a subset of OpenAPI 3.0 / JSON Schema.  Strip fields
+# outside that subset before sending Hermes tool schemas to Google.
+_GEMINI_SCHEMA_ALLOWED_KEYS = {
+    "type",
+    "format",
+    "title",
+    "description",
+    "nullable",
+    "enum",
+    "maxItems",
+    "minItems",
+    "properties",
+    "required",
+    "minProperties",
+    "maxProperties",
+    "minLength",
+    "maxLength",
+    "pattern",
+    "example",
+    "anyOf",
+    "propertyOrdering",
+    "default",
+    "items",
+    "minimum",
+    "maximum",
+}
+
+
+def sanitize_gemini_schema(schema: Any) -> Dict[str, Any]:
+    """Return a Gemini-compatible copy of a tool parameter schema.
+
+    Hermes tool schemas are OpenAI-flavored JSON Schema and may contain keys
+    such as ``$schema`` or ``additionalProperties`` that Google's Gemini
+    ``Schema`` object rejects.  This helper preserves the documented Gemini
+    subset and recursively sanitizes nested ``properties`` / ``items`` /
+    ``anyOf`` definitions.
+    """
+
+    if not isinstance(schema, dict):
+        return {}
+
+    cleaned: Dict[str, Any] = {}
+    for key, value in schema.items():
+        if key not in _GEMINI_SCHEMA_ALLOWED_KEYS:
+            continue
+        if key == "properties":
+            if not isinstance(value, dict):
+                continue
+            props: Dict[str, Any] = {}
+            for prop_name, prop_schema in value.items():
+                if not isinstance(prop_name, str):
+                    continue
+                props[prop_name] = sanitize_gemini_schema(prop_schema)
+            cleaned[key] = props
+            continue
+        if key == "items":
+            cleaned[key] = sanitize_gemini_schema(value)
+            continue
+        if key == "anyOf":
+            if not isinstance(value, list):
+                continue
+            cleaned[key] = [
+                sanitize_gemini_schema(item)
+                for item in value
+                if isinstance(item, dict)
+            ]
+            continue
+        cleaned[key] = value
+    return cleaned
+
+
+def sanitize_gemini_tool_parameters(parameters: Any) -> Dict[str, Any]:
+    """Normalize tool parameters to a valid Gemini object schema."""
+
+    cleaned = sanitize_gemini_schema(parameters)
+    if not cleaned:
+        return {"type": "object", "properties": {}}
+    return cleaned
@@ -0,0 +1,242 @@
+"""
+Image Generation Provider ABC
+=============================
+
+Defines the pluggable-backend interface for image generation. Providers register
+instances via ``PluginContext.register_image_gen_provider()``; the active one
+(selected via ``image_gen.provider`` in ``config.yaml``) services every
+``image_generate`` tool call.
+
+Providers live in ``<repo>/plugins/image_gen/<name>/`` (built-in, auto-loaded
+as ``kind: backend``) or ``~/.hermes/plugins/image_gen/<name>/`` (user, opt-in
+via ``plugins.enabled``).
+
+Response shape
+--------------
+All providers return a dict that :func:`success_response` / :func:`error_response`
+produce. The tool wrapper JSON-serializes it. Keys:
+
+    success        bool
+    image          str | None       URL or absolute file path
+    model          str              provider-specific model identifier
+    prompt         str              echoed prompt
+    aspect_ratio   str              "landscape" | "square" | "portrait"
+    provider       str              provider name (for diagnostics)
+    error          str              only when success=False
+    error_type     str              only when success=False
+"""
+
+from __future__ import annotations
+
+import abc
+import base64
+import datetime
+import logging
+import uuid
+from pathlib import Path
+from typing import Any, Dict, List, Optional, Tuple
+
+logger = logging.getLogger(__name__)
+
+
+VALID_ASPECT_RATIOS: Tuple[str, ...] = ("landscape", "square", "portrait")
+DEFAULT_ASPECT_RATIO = "landscape"
+
+
+# ---------------------------------------------------------------------------
+# ABC
+# ---------------------------------------------------------------------------
+
+
+class ImageGenProvider(abc.ABC):
+    """Abstract base class for an image generation backend.
+
+    Subclasses must implement :meth:`generate`. Everything else has sane
+    defaults — override only what your provider needs.
+    """
+
+    @property
+    @abc.abstractmethod
+    def name(self) -> str:
+        """Stable short identifier used in ``image_gen.provider`` config.
+
+        Lowercase, no spaces. Examples: ``fal``, ``openai``, ``replicate``.
+        """
+
+    @property
+    def display_name(self) -> str:
+        """Human-readable label shown in ``hermes tools``. Defaults to ``name.title()``."""
+        return self.name.title()
+
+    def is_available(self) -> bool:
+        """Return True when this provider can service calls.
+
+        Typically checks for a required API key. Default: True
+        (providers with no external dependencies are always available).
+        """
+        return True
+
+    def list_models(self) -> List[Dict[str, Any]]:
+        """Return catalog entries for ``hermes tools`` model picker.
+
+        Each entry::
+
+            {
+                "id": "gpt-image-1.5",               # required
+                "display": "GPT Image 1.5",          # optional; defaults to id
+                "speed": "~10s",                     # optional
+                "strengths": "...",                  # optional
+                "price": "$...",                     # optional
+            }
+
+        Default: empty list (provider has no user-selectable models).
+        """
+        return []
+
+    def get_setup_schema(self) -> Dict[str, Any]:
+        """Return provider metadata for the ``hermes tools`` picker.
+
+        Used by ``tools_config.py`` to inject this provider as a row in
+        the Image Generation provider list. Shape::
+
+            {
+                "name": "OpenAI",                     # picker label
+                "badge": "paid",                      # optional short tag
+                "tag": "One-line description...",     # optional subtitle
+                "env_vars": [                         # keys to prompt for
+                    {"key": "OPENAI_API_KEY",
+                     "prompt": "OpenAI API key",
+                     "url": "https://platform.openai.com/api-keys"},
+                ],
+            }
+
+        Default: minimal entry derived from ``display_name``. Override to
+        expose API key prompts and custom badges.
+        """
+        return {
+            "name": self.display_name,
+            "badge": "",
+            "tag": "",
+            "env_vars": [],
+        }
+
+    def default_model(self) -> Optional[str]:
+        """Return the default model id, or None if not applicable."""
+        models = self.list_models()
+        if models:
+            return models[0].get("id")
+        return None
+
+    @abc.abstractmethod
+    def generate(
+        self,
+        prompt: str,
+        aspect_ratio: str = DEFAULT_ASPECT_RATIO,
+        **kwargs: Any,
+    ) -> Dict[str, Any]:
+        """Generate an image.
+
+        Implementations should return the dict from :func:`success_response`
+        or :func:`error_response`. ``kwargs`` may contain forward-compat
+        parameters future versions of the schema will expose — implementations
+        should ignore unknown keys.
+        """
+
+
+# ---------------------------------------------------------------------------
+# Helpers
+# ---------------------------------------------------------------------------
+
+
+def resolve_aspect_ratio(value: Optional[str]) -> str:
+    """Clamp an aspect_ratio value to the valid set, defaulting to landscape.
+
+    Invalid values are coerced rather than rejected so the tool surface is
+    forgiving of agent mistakes.
+    """
+    if not isinstance(value, str):
+        return DEFAULT_ASPECT_RATIO
+    v = value.strip().lower()
+    if v in VALID_ASPECT_RATIOS:
+        return v
+    return DEFAULT_ASPECT_RATIO
+
+
+def _images_cache_dir() -> Path:
+    """Return ``$HERMES_HOME/cache/images/``, creating parents as needed."""
+    from hermes_constants import get_hermes_home
+
+    path = get_hermes_home() / "cache" / "images"
+    path.mkdir(parents=True, exist_ok=True)
+    return path
+
+
+def save_b64_image(
+    b64_data: str,
+    *,
+    prefix: str = "image",
+    extension: str = "png",
+) -> Path:
+    """Decode base64 image data and write it under ``$HERMES_HOME/cache/images/``.
+
+    Returns the absolute :class:`Path` to the saved file.
+
+    Filename format: ``<prefix>_<YYYYMMDD_HHMMSS>_<short-uuid>.<ext>``.
+    """
+    raw = base64.b64decode(b64_data)
+    ts = datetime.datetime.now().strftime("%Y%m%d_%H%M%S")
+    short = uuid.uuid4().hex[:8]
+    path = _images_cache_dir() / f"{prefix}_{ts}_{short}.{extension}"
+    path.write_bytes(raw)
+    return path
+
+
+def success_response(
+    *,
+    image: str,
+    model: str,
+    prompt: str,
+    aspect_ratio: str,
+    provider: str,
+    extra: Optional[Dict[str, Any]] = None,
+) -> Dict[str, Any]:
+    """Build a uniform success response dict.
+
+    ``image`` may be an HTTP URL or an absolute filesystem path (for b64
+    providers like OpenAI). Callers that need to pass through additional
+    backend-specific fields can supply ``extra``.
+    """
+    payload: Dict[str, Any] = {
+        "success": True,
+        "image": image,
+        "model": model,
+        "prompt": prompt,
+        "aspect_ratio": aspect_ratio,
+        "provider": provider,
+    }
+    if extra:
+        for k, v in extra.items():
+            payload.setdefault(k, v)
+    return payload
+
+
+def error_response(
+    *,
+    error: str,
+    error_type: str = "provider_error",
+    provider: str = "",
+    model: str = "",
+    prompt: str = "",
+    aspect_ratio: str = DEFAULT_ASPECT_RATIO,
+) -> Dict[str, Any]:
+    """Build a uniform error response dict."""
+    return {
+        "success": False,
+        "image": None,
+        "error": error,
+        "error_type": error_type,
+        "model": model,
+        "prompt": prompt,
+        "aspect_ratio": aspect_ratio,
+        "provider": provider,
+    }
@@ -0,0 +1,120 @@
+"""
+Image Generation Provider Registry
+==================================
+
+Central map of registered providers. Populated by plugins at import-time via
+``PluginContext.register_image_gen_provider()``; consumed by the
+``image_generate`` tool to dispatch each call to the active backend.
+
+Active selection
+----------------
+The active provider is chosen by ``image_gen.provider`` in ``config.yaml``.
+If unset, :func:`get_active_provider` applies fallback logic:
+
+1. If exactly one provider is registered, use it.
+2. Otherwise if a provider named ``fal`` is registered, use it (legacy
+   default — matches pre-plugin behavior).
+3. Otherwise return ``None`` (the tool surfaces a helpful error pointing
+   the user at ``hermes tools``).
+"""
+
+from __future__ import annotations
+
+import logging
+import threading
+from typing import Dict, List, Optional
+
+from agent.image_gen_provider import ImageGenProvider
+
+logger = logging.getLogger(__name__)
+
+
+_providers: Dict[str, ImageGenProvider] = {}
+_lock = threading.Lock()
+
+
+def register_provider(provider: ImageGenProvider) -> None:
+    """Register an image generation provider.
+
+    Re-registration (same ``name``) overwrites the previous entry and logs
+    a debug message — this makes hot-reload scenarios (tests, dev loops)
+    behave predictably.
+    """
+    if not isinstance(provider, ImageGenProvider):
+        raise TypeError(
+            f"register_provider() expects an ImageGenProvider instance, "
+            f"got {type(provider).__name__}"
+        )
+    name = provider.name
+    if not isinstance(name, str) or not name.strip():
+        raise ValueError("Image gen provider .name must be a non-empty string")
+    with _lock:
+        existing = _providers.get(name)
+        _providers[name] = provider
+    if existing is not None:
+        logger.debug("Image gen provider '%s' re-registered (was %r)", name, type(existing).__name__)
+    else:
+        logger.debug("Registered image gen provider '%s' (%s)", name, type(provider).__name__)
+
+
+def list_providers() -> List[ImageGenProvider]:
+    """Return all registered providers, sorted by name."""
+    with _lock:
+        items = list(_providers.values())
+    return sorted(items, key=lambda p: p.name)
+
+
+def get_provider(name: str) -> Optional[ImageGenProvider]:
+    """Return the provider registered under *name*, or None."""
+    if not isinstance(name, str):
+        return None
+    with _lock:
+        return _providers.get(name.strip())
+
+
+def get_active_provider() -> Optional[ImageGenProvider]:
+    """Resolve the currently-active provider.
+
+    Reads ``image_gen.provider`` from config.yaml; falls back per the
+    module docstring.
+    """
+    configured: Optional[str] = None
+    try:
+        from hermes_cli.config import load_config
+
+        cfg = load_config()
+        section = cfg.get("image_gen") if isinstance(cfg, dict) else None
+        if isinstance(section, dict):
+            raw = section.get("provider")
+            if isinstance(raw, str) and raw.strip():
+                configured = raw.strip()
+    except Exception as exc:
+        logger.debug("Could not read image_gen.provider from config: %s", exc)
+
+    with _lock:
+        snapshot = dict(_providers)
+
+    if configured:
+        provider = snapshot.get(configured)
+        if provider is not None:
+            return provider
+        logger.debug(
+            "image_gen.provider='%s' configured but not registered; falling back",
+            configured,
+        )
+
+    # Fallback: single-provider case
+    if len(snapshot) == 1:
+        return next(iter(snapshot.values()))
+
+    # Fallback: prefer legacy FAL for backward compat
+    if "fal" in snapshot:
+        return snapshot["fal"]
+
+    return None
+
+
+def _reset_for_tests() -> None:
+    """Clear the registry. **Test-only.**"""
+    with _lock:
+        _providers.clear()
@@ -124,6 +124,7 @@ class InsightsEngine:
        # Gather raw data
        sessions = self._get_sessions(cutoff, source)
        tool_usage = self._get_tool_usage(cutoff, source)
+        skill_usage = self._get_skill_usage(cutoff, source)
        message_stats = self._get_message_stats(cutoff, source)

        if not sessions:
@@ -135,6 +136,15 @@ class InsightsEngine:
                "models": [],
                "platforms": [],
                "tools": [],
+                "skills": {
+                    "summary": {
+                        "total_skill_loads": 0,
+                        "total_skill_edits": 0,
+                        "total_skill_actions": 0,
+                        "distinct_skills_used": 0,
+                    },
+                    "top_skills": [],
+                },
                "activity": {},
                "top_sessions": [],
            }
@@ -144,6 +154,7 @@ class InsightsEngine:
        models = self._compute_model_breakdown(sessions)
        platforms = self._compute_platform_breakdown(sessions)
        tools = self._compute_tool_breakdown(tool_usage)
+        skills = self._compute_skill_breakdown(skill_usage)
        activity = self._compute_activity_patterns(sessions)
        top_sessions = self._compute_top_sessions(sessions)

@@ -156,6 +167,7 @@ class InsightsEngine:
            "models": models,
            "platforms": platforms,
            "tools": tools,
+            "skills": skills,
            "activity": activity,
            "top_sessions": top_sessions,
        }
@@ -284,6 +296,82 @@ class InsightsEngine:
            for name, count in tool_counts.most_common()
        ]

+    def _get_skill_usage(self, cutoff: float, source: str = None) -> List[Dict]:
+        """Extract per-skill usage from assistant tool calls."""
+        skill_counts: Dict[str, Dict[str, Any]] = {}
+
+        if source:
+            cursor = self._conn.execute(
+                """SELECT m.tool_calls, m.timestamp
+                   FROM messages m
+                   JOIN sessions s ON s.id = m.session_id
+                   WHERE s.started_at >= ? AND s.source = ?
+                     AND m.role = 'assistant' AND m.tool_calls IS NOT NULL""",
+                (cutoff, source),
+            )
+        else:
+            cursor = self._conn.execute(
+                """SELECT m.tool_calls, m.timestamp
+                   FROM messages m
+                   JOIN sessions s ON s.id = m.session_id
+                   WHERE s.started_at >= ?
+                     AND m.role = 'assistant' AND m.tool_calls IS NOT NULL""",
+                (cutoff,),
+            )
+
+        for row in cursor.fetchall():
+            try:
+                calls = row["tool_calls"]
+                if isinstance(calls, str):
+                    calls = json.loads(calls)
+                if not isinstance(calls, list):
+                    continue
+            except (json.JSONDecodeError, TypeError):
+                continue
+
+            timestamp = row["timestamp"]
+            for call in calls:
+                if not isinstance(call, dict):
+                    continue
+                func = call.get("function", {})
+                tool_name = func.get("name")
+                if tool_name not in {"skill_view", "skill_manage"}:
+                    continue
+
+                args = func.get("arguments")
+                if isinstance(args, str):
+                    try:
+                        args = json.loads(args)
+                    except (json.JSONDecodeError, TypeError):
+                        continue
+                if not isinstance(args, dict):
+                    continue
+
+                skill_name = args.get("name")
+                if not isinstance(skill_name, str) or not skill_name.strip():
+                    continue
+
+                entry = skill_counts.setdefault(
+                    skill_name,
+                    {
+                        "skill": skill_name,
+                        "view_count": 0,
+                        "manage_count": 0,
+                        "last_used_at": None,
+                    },
+                )
+                if tool_name == "skill_view":
+                    entry["view_count"] += 1
+                else:
+                    entry["manage_count"] += 1
+
+                if timestamp is not None and (
+                    entry["last_used_at"] is None or timestamp > entry["last_used_at"]
+                ):
+                    entry["last_used_at"] = timestamp
+
+        return list(skill_counts.values())
+
    def _get_message_stats(self, cutoff: float, source: str = None) -> Dict:
        """Get aggregate message statistics."""
        if source:
@@ -475,6 +563,46 @@ class InsightsEngine:
            })
        return result

+    def _compute_skill_breakdown(self, skill_usage: List[Dict]) -> Dict[str, Any]:
+        """Process per-skill usage into summary + ranked list."""
+        total_skill_loads = sum(s["view_count"] for s in skill_usage) if skill_usage else 0
+        total_skill_edits = sum(s["manage_count"] for s in skill_usage) if skill_usage else 0
+        total_skill_actions = total_skill_loads + total_skill_edits
+
+        top_skills = []
+        for skill in skill_usage:
+            total_count = skill["view_count"] + skill["manage_count"]
+            percentage = (total_count / total_skill_actions * 100) if total_skill_actions else 0
+            top_skills.append({
+                "skill": skill["skill"],
+                "view_count": skill["view_count"],
+                "manage_count": skill["manage_count"],
+                "total_count": total_count,
+                "percentage": percentage,
+                "last_used_at": skill.get("last_used_at"),
+            })
+
+        top_skills.sort(
+            key=lambda s: (
+                s["total_count"],
+                s["view_count"],
+                s["manage_count"],
+                s["last_used_at"] or 0,
+                s["skill"],
+            ),
+            reverse=True,
+        )
+
+        return {
+            "summary": {
+                "total_skill_loads": total_skill_loads,
+                "total_skill_edits": total_skill_edits,
+                "total_skill_actions": total_skill_actions,
+                "distinct_skills_used": len(skill_usage),
+            },
+            "top_skills": top_skills,
+        }
+
    def _compute_activity_patterns(self, sessions: List[Dict]) -> Dict:
        """Analyze activity patterns by day of week and hour."""
        day_counts = Counter()  # 0=Monday ... 6=Sunday
@@ -670,6 +798,28 @@ class InsightsEngine:
                lines.append(f"  ... and {len(report['tools']) - 15} more tools")
            lines.append("")

+        # Skill usage
+        skills = report.get("skills", {})
+        top_skills = skills.get("top_skills", [])
+        if top_skills:
+            lines.append("  🧠 Top Skills")
+            lines.append("  " + "─" * 56)
+            lines.append(f"  {'Skill':<28} {'Loads':>7} {'Edits':>7} {'Last used':>11}")
+            for skill in top_skills[:10]:
+                last_used = "—"
+                if skill.get("last_used_at"):
+                    last_used = datetime.fromtimestamp(skill["last_used_at"]).strftime("%b %d")
+                lines.append(
+                    f"  {skill['skill'][:28]:<28} {skill['view_count']:>7,} {skill['manage_count']:>7,} {last_used:>11}"
+                )
+            summary = skills.get("summary", {})
+            lines.append(
+                f"  Distinct skills: {summary.get('distinct_skills_used', 0)}  "
+                f"Loads: {summary.get('total_skill_loads', 0):,}  "
+                f"Edits: {summary.get('total_skill_edits', 0):,}"
+            )
+            lines.append("")
+
        # Activity patterns
        act = report.get("activity", {})
        if act.get("by_day"):
@@ -753,6 +903,18 @@ class InsightsEngine:
                lines.append(f"  {t['tool']} — {t['count']:,} calls ({t['percentage']:.1f}%)")
            lines.append("")

+        skills = report.get("skills", {})
+        if skills.get("top_skills"):
+            lines.append("**🧠 Top Skills:**")
+            for skill in skills["top_skills"][:5]:
+                suffix = ""
+                if skill.get("last_used_at"):
+                    suffix = f", last used {datetime.fromtimestamp(skill['last_used_at']).strftime('%b %d')}"
+                lines.append(
+                    f"  {skill['skill']} — {skill['view_count']:,} loads, {skill['manage_count']:,} edits{suffix}"
+                )
+            lines.append("")
+
        # Activity summary
        act = report.get("activity", {})
        if act.get("busiest_day") and act.get("busiest_hour"):
@@ -4,6 +4,7 @@ Pure utility functions with no AIAgent dependency. Used by ContextCompressor
 and run_agent.py for pre-flight context checks.
 """

+import ipaddress
 import logging
 import re
 import time
@@ -14,6 +15,8 @@ from urllib.parse import urlparse
 import requests
 import yaml

+from utils import base_url_host_matches, base_url_hostname
+
 from hermes_constants import OPENROUTER_MODELS_URL

 logger = logging.getLogger(__name__)
@@ -23,7 +26,7 @@ logger = logging.getLogger(__name__)
 # are preserved so the full model name reaches cache lookups and server queries.
 _PROVIDER_PREFIXES: frozenset[str] = frozenset({
    "openrouter", "nous", "openai-codex", "copilot", "copilot-acp",
-    "gemini", "ollama-cloud", "zai", "kimi-coding", "kimi-coding-cn", "minimax", "minimax-cn", "anthropic", "deepseek",
+    "gemini", "ollama-cloud", "zai", "kimi-coding", "kimi-coding-cn", "stepfun", "minimax", "minimax-cn", "anthropic", "deepseek",
    "opencode-zen", "opencode-go", "ai-gateway", "kilocode", "alibaba",
    "qwen-oauth",
    "xiaomi",
@@ -34,7 +37,7 @@ _PROVIDER_PREFIXES: frozenset[str] = frozenset({
    "glm", "z-ai", "z.ai", "zhipu", "github", "github-copilot",
    "github-models", "kimi", "moonshot", "kimi-cn", "moonshot-cn", "claude", "deep-seek",
    "ollama",
-    "opencode", "zen", "go", "vercel", "kilo", "dashscope", "aliyun", "qwen",
+    "stepfun", "opencode", "zen", "go", "vercel", "kilo", "dashscope", "aliyun", "qwen",
    "mimo", "xiaomi-mimo",
    "arcee-ai", "arceeai",
    "xai", "x-ai", "x.ai", "grok",
@@ -49,6 +52,13 @@ _OLLAMA_TAG_PATTERN = re.compile(
 )


+# Tailscale's CGNAT range (RFC 6598). `ipaddress.is_private` excludes this
+# block, so without an explicit check Ollama reached over Tailscale (e.g.
+# `http://100.77.243.5:11434`) wouldn't be treated as local and its stream
+# read / stale timeouts wouldn't get auto-bumped. Built once at import time.
+_TAILSCALE_CGNAT = ipaddress.IPv4Network("100.64.0.0/10")
+
+
 def _strip_provider_prefix(model: str) -> str:
    """Strip a recognised provider prefix from a model string.

@@ -113,10 +123,12 @@ DEFAULT_CONTEXT_LENGTHS = {
    "claude": 200000,
    # OpenAI — GPT-5 family (most have 400k; specific overrides first)
    # Source: https://developers.openai.com/api/docs/models
+    # GPT-5.5 (launched Apr 23 2026). Verified via live ChatGPT codex/models
+    # endpoint: bare slug `gpt-5.5`, no -pro/-mini variants. 400k context on Codex.
+    "gpt-5.5": 400000,
    "gpt-5.4-nano": 400000,           # 400k (not 1.05M like full 5.4)
    "gpt-5.4-mini": 400000,           # 400k (not 1.05M like full 5.4)
    "gpt-5.4": 1050000,               # GPT-5.4, GPT-5.4 Pro (1.05M context)
-    "gpt-5.3-codex-spark": 128000,    # Spark variant has reduced 128k context
    "gpt-5.1-chat": 128000,           # Chat variant has 128k context
    "gpt-5": 400000,                  # GPT-5.x base, mini, codex variants (400k)
    "gpt-4.1": 1047576,
@@ -124,6 +136,8 @@ DEFAULT_CONTEXT_LENGTHS = {
    # Google
    "gemini": 1048576,
    # Gemma (open models served via AI Studio)
+    "gemma-4": 256000,  # Gemma 4 family
+    "gemma4": 256000,  # Ollama-style naming (e.g. gemma4:31b-cloud)
    "gemma-4-31b": 256000,
    "gemma-3": 131072,
    "gemma": 8192,  # fallback for older gemma models
@@ -169,12 +183,15 @@ DEFAULT_CONTEXT_LENGTHS = {
    "Qwen/Qwen3.5-35B-A3B": 131072,
    "deepseek-ai/DeepSeek-V3.2": 65536,
    "moonshotai/Kimi-K2.5": 262144,
+    "moonshotai/Kimi-K2.6": 262144,
    "moonshotai/Kimi-K2-Thinking": 262144,
    "MiniMaxAI/MiniMax-M2.5": 204800,
-    "XiaomiMiMo/MiMo-V2-Flash": 256000,
-    "mimo-v2-pro": 1000000,
-    "mimo-v2-omni": 256000,
-    "mimo-v2-flash": 256000,
+    "XiaomiMiMo/MiMo-V2-Flash": 262144,
+    "mimo-v2-pro": 1048576,
+    "mimo-v2.5-pro": 1048576,
+    "mimo-v2.5": 1048576,
+    "mimo-v2-omni": 262144,
+    "mimo-v2-flash": 262144,
    "zai-org/GLM-5": 202752,
 }

@@ -189,6 +206,7 @@ _CONTEXT_LENGTH_KEYS = (
    "max_seq_len",
    "n_ctx_train",
    "n_ctx",
+    "ctx_size",
 )

 _MAX_COMPLETION_KEYS = (
@@ -211,8 +229,15 @@ def _normalize_base_url(base_url: str) -> str:
    return (base_url or "").strip().rstrip("/")


+def _auth_headers(api_key: str = "") -> Dict[str, str]:
+    token = str(api_key or "").strip()
+    if not token:
+        return {}
+    return {"Authorization": f"Bearer {token}"}
+
+
 def _is_openrouter_base_url(base_url: str) -> bool:
-    return "openrouter.ai" in _normalize_base_url(base_url).lower()
+    return base_url_host_matches(base_url, "openrouter.ai")


 def _is_custom_endpoint(base_url: str) -> bool:
@@ -225,9 +250,12 @@ _URL_TO_PROVIDER: Dict[str, str] = {
    "chatgpt.com": "openai",
    "api.anthropic.com": "anthropic",
    "api.z.ai": "zai",
+    "open.bigmodel.cn": "zai",
    "api.moonshot.ai": "kimi-coding",
    "api.moonshot.cn": "kimi-coding-cn",
    "api.kimi.com": "kimi-coding",
+    "api.stepfun.ai": "stepfun",
+    "api.stepfun.com": "stepfun",
    "api.arcee.ai": "arcee",
    "api.minimax": "minimax",
    "dashscope.aliyuncs.com": "alibaba",
@@ -272,7 +300,15 @@ def _is_known_provider_base_url(base_url: str) -> bool:


 def is_local_endpoint(base_url: str) -> bool:
-    """Return True if base_url points to a local machine (localhost / RFC-1918 / WSL)."""
+    """Return True if base_url points to a local machine.
+
+    Recognises loopback (``localhost``, ``127.0.0.0/8``, ``::1``),
+    container-internal DNS names (``host.docker.internal`` et al.),
+    RFC-1918 private ranges (``10/8``, ``172.16/12``, ``192.168/16``),
+    link-local, and Tailscale CGNAT (``100.64.0.0/10``). Tailscale CGNAT
+    is included so remote-but-trusted Ollama boxes reached over a
+    Tailscale mesh get the same timeout auto-bumps as localhost Ollama.
+    """
    normalized = _normalize_base_url(base_url)
    if not normalized:
        return False
@@ -287,14 +323,17 @@ def is_local_endpoint(base_url: str) -> bool:
    # Docker / Podman / Lima internal DNS names (e.g. host.docker.internal)
    if any(host.endswith(suffix) for suffix in _CONTAINER_LOCAL_SUFFIXES):
        return True
-    # RFC-1918 private ranges and link-local
-    import ipaddress
+    # RFC-1918 private ranges, link-local, and Tailscale CGNAT
    try:
        addr = ipaddress.ip_address(host)
-        return addr.is_private or addr.is_loopback or addr.is_link_local
+        if addr.is_private or addr.is_loopback or addr.is_link_local:
+            return True
+        if isinstance(addr, ipaddress.IPv4Address) and addr in _TAILSCALE_CGNAT:
+            return True
    except ValueError:
        pass
    # Bare IP that looks like a private range (e.g. 172.26.x.x for WSL)
+    # or Tailscale CGNAT (100.64.x.x–100.127.x.x).
    parts = host.split(".")
    if len(parts) == 4:
        try:
@@ -305,12 +344,14 @@ def is_local_endpoint(base_url: str) -> bool:
                return True
            if first == 192 and second == 168:
                return True
+            if first == 100 and 64 <= second <= 127:
+                return True
        except ValueError:
            pass
    return False


-def detect_local_server_type(base_url: str) -> Optional[str]:
+def detect_local_server_type(base_url: str, api_key: str = "") -> Optional[str]:
    """Detect which local server is running at base_url by probing known endpoints.

    Returns one of: "ollama", "lm-studio", "vllm", "llamacpp", or None.
@@ -322,8 +363,10 @@ def detect_local_server_type(base_url: str) -> Optional[str]:
    if server_url.endswith("/v1"):
        server_url = server_url[:-3]

+    headers = _auth_headers(api_key)
+
    try:
-        with httpx.Client(timeout=2.0) as client:
+        with httpx.Client(timeout=2.0, headers=headers) as client:
            # LM Studio exposes /api/v1/models — check first (most specific)
            try:
                r = client.get(f"{server_url}/api/v1/models")
@@ -510,6 +553,59 @@ def fetch_endpoint_model_metadata(
    headers = {"Authorization": f"Bearer {api_key}"} if api_key else {}
    last_error: Optional[Exception] = None

+    if is_local_endpoint(normalized):
+        try:
+            if detect_local_server_type(normalized, api_key=api_key) == "lm-studio":
+                server_url = normalized[:-3].rstrip("/") if normalized.endswith("/v1") else normalized
+                response = requests.get(
+                    server_url.rstrip("/") + "/api/v1/models",
+                    headers=headers,
+                    timeout=10,
+                )
+                response.raise_for_status()
+                payload = response.json()
+                cache: Dict[str, Dict[str, Any]] = {}
+                for model in payload.get("models", []):
+                    if not isinstance(model, dict):
+                        continue
+                    model_id = model.get("key") or model.get("id")
+                    if not model_id:
+                        continue
+                    entry: Dict[str, Any] = {"name": model.get("name", model_id)}
+
+                    context_length = None
+                    for inst in model.get("loaded_instances", []) or []:
+                        if not isinstance(inst, dict):
+                            continue
+                        cfg = inst.get("config", {})
+                        ctx = cfg.get("context_length") if isinstance(cfg, dict) else None
+                        if isinstance(ctx, int) and ctx > 0:
+                            context_length = ctx
+                            break
+                    if context_length is None:
+                        context_length = _extract_context_length(model)
+                    if context_length is not None:
+                        entry["context_length"] = context_length
+
+                    max_completion_tokens = _extract_max_completion_tokens(model)
+                    if max_completion_tokens is not None:
+                        entry["max_completion_tokens"] = max_completion_tokens
+
+                    pricing = _extract_pricing(model)
+                    if pricing:
+                        entry["pricing"] = pricing
+
+                    _add_model_aliases(cache, model_id, entry)
+                    alt_id = model.get("id")
+                    if isinstance(alt_id, str) and alt_id and alt_id != model_id:
+                        _add_model_aliases(cache, alt_id, entry)
+
+                _endpoint_model_metadata_cache[normalized] = cache
+                _endpoint_model_metadata_cache_time[normalized] = time.time()
+                return cache
+        except Exception as exc:
+            last_error = exc
+
    for candidate in candidates:
        url = candidate.rstrip("/") + "/models"
        try:
@@ -716,7 +812,7 @@ def _model_id_matches(candidate_id: str, lookup_model: str) -> bool:
    return False


-def query_ollama_num_ctx(model: str, base_url: str) -> Optional[int]:
+def query_ollama_num_ctx(model: str, base_url: str, api_key: str = "") -> Optional[int]:
    """Query an Ollama server for the model's context length.

    Returns the model's maximum context from GGUF metadata via ``/api/show``,
@@ -734,14 +830,16 @@ def query_ollama_num_ctx(model: str, base_url: str) -> Optional[int]:
        server_url = server_url[:-3]

    try:
-        server_type = detect_local_server_type(base_url)
+        server_type = detect_local_server_type(base_url, api_key=api_key)
    except Exception:
        return None
    if server_type != "ollama":
        return None

+    headers = _auth_headers(api_key)
+
    try:
-        with httpx.Client(timeout=3.0) as client:
+        with httpx.Client(timeout=3.0, headers=headers) as client:
            resp = client.post(f"{server_url}/api/show", json={"name": bare_model})
            if resp.status_code != 200:
                return None
@@ -769,7 +867,7 @@ def query_ollama_num_ctx(model: str, base_url: str) -> Optional[int]:
    return None


-def _query_local_context_length(model: str, base_url: str) -> Optional[int]:
+def _query_local_context_length(model: str, base_url: str, api_key: str = "") -> Optional[int]:
    """Query a local server for the model's context length."""
    import httpx

@@ -782,13 +880,15 @@ def _query_local_context_length(model: str, base_url: str) -> Optional[int]:
    if server_url.endswith("/v1"):
        server_url = server_url[:-3]

+    headers = _auth_headers(api_key)
+
    try:
-        server_type = detect_local_server_type(base_url)
+        server_type = detect_local_server_type(base_url, api_key=api_key)
    except Exception:
        server_type = None

    try:
-        with httpx.Client(timeout=3.0) as client:
+        with httpx.Client(timeout=3.0, headers=headers) as client:
            # Ollama: /api/show returns model details with context info
            if server_type == "ollama":
                resp = client.post(f"{server_url}/api/show", json={"name": model})
@@ -999,7 +1099,7 @@ def get_model_context_length(
        if not _is_known_provider_base_url(base_url):
            # 3. Try querying local server directly
            if is_local_endpoint(base_url):
-                local_ctx = _query_local_context_length(model, base_url)
+                local_ctx = _query_local_context_length(model, base_url, api_key=api_key)
                if local_ctx and local_ctx > 0:
                    save_context_length(model, base_url, local_ctx)
                    return local_ctx
@@ -1013,7 +1113,7 @@ def get_model_context_length(

    # 4. Anthropic /v1/models API (only for regular API keys, not OAuth)
    if provider == "anthropic" or (
-        base_url and "api.anthropic.com" in base_url
+        base_url and base_url_hostname(base_url) == "api.anthropic.com"
    ):
        ctx = _query_anthropic_context_length(model, base_url or "https://api.anthropic.com", api_key)
        if ctx:
@@ -1022,7 +1122,11 @@ def get_model_context_length(
    # 4b. AWS Bedrock — use static context length table.
    # Bedrock's ListFoundationModels doesn't expose context window sizes,
    # so we maintain a curated table in bedrock_adapter.py.
-    if provider == "bedrock" or (base_url and "bedrock-runtime" in base_url):
+    if provider == "bedrock" or (
+        base_url
+        and base_url_hostname(base_url).startswith("bedrock-runtime.")
+        and base_url_host_matches(base_url, "amazonaws.com")
+    ):
        try:
            from agent.bedrock_adapter import get_bedrock_context_length
            return get_bedrock_context_length(model)
@@ -1069,7 +1173,7 @@ def get_model_context_length(

    # 9. Query local server as last resort
    if base_url and is_local_endpoint(base_url):
-        local_ctx = _query_local_context_length(model, base_url)
+        local_ctx = _query_local_context_length(model, base_url, api_key=api_key)
        if local_ctx and local_ctx > 0:
            save_context_length(model, base_url, local_ctx)
            return local_ctx
@@ -146,6 +146,7 @@ PROVIDER_TO_MODELS_DEV: Dict[str, str] = {
    "openai-codex": "openai",
    "zai": "zai",
    "kimi-coding": "kimi-for-coding",
+    "stepfun": "stepfun",
    "kimi-coding-cn": "kimi-for-coding",
    "minimax": "minimax",
    "minimax-cn": "minimax-cn",
@@ -417,6 +418,9 @@ def list_provider_models(provider: str) -> List[str]:

    Returns an empty list if the provider is unknown or has no data.
    """
+    from hermes_cli.models import normalize_provider
+    provider = normalize_provider(provider) or provider
+    
    models = _get_provider_models(provider)
    if models is None:
        return []
@@ -0,0 +1,190 @@
+"""Helpers for translating OpenAI-style tool schemas to Moonshot's schema subset.
+
+Moonshot (Kimi) accepts a stricter subset of JSON Schema than standard OpenAI
+tool calling.  Requests that violate it fail with HTTP 400:
+
+    tools.function.parameters is not a valid moonshot flavored json schema,
+    details: <...>
+
+Known rejection modes documented at
+https://forum.moonshot.ai/t/tool-calling-specification-violation-on-moonshot-api/102
+and MoonshotAI/kimi-cli#1595:
+
+1. Every property schema must carry a ``type``.  Standard JSON Schema allows
+   type to be omitted (the value is then unconstrained); Moonshot refuses.
+2. When ``anyOf`` is used, ``type`` must be on the ``anyOf`` children, not
+   the parent.  Presence of both causes "type should be defined in anyOf
+   items instead of the parent schema".
+
+The ``#/definitions/...`` → ``#/$defs/...`` rewrite for draft-07 refs is
+handled separately in ``tools/mcp_tool._normalize_mcp_input_schema`` so it
+applies at MCP registration time for all providers.
+"""
+
+from __future__ import annotations
+
+import copy
+from typing import Any, Dict, List
+
+# Keys whose values are maps of name → schema (not schemas themselves).
+# When we recurse, we walk the values of these maps as schemas, but we do
+# NOT apply the missing-type repair to the map itself.
+_SCHEMA_MAP_KEYS = frozenset({"properties", "patternProperties", "$defs", "definitions"})
+
+# Keys whose values are lists of schemas.
+_SCHEMA_LIST_KEYS = frozenset({"anyOf", "oneOf", "allOf", "prefixItems"})
+
+# Keys whose values are a single nested schema.
+_SCHEMA_NODE_KEYS = frozenset({"items", "contains", "not", "additionalProperties", "propertyNames"})
+
+
+def _repair_schema(node: Any, is_schema: bool = True) -> Any:
+    """Recursively apply Moonshot repairs to a schema node.
+
+    ``is_schema=True`` means this dict is a JSON Schema node and gets the
+    missing-type + anyOf-parent repairs applied.  ``is_schema=False`` means
+    it's a container map (e.g. the value of ``properties``) and we only
+    recurse into its values.
+    """
+    if isinstance(node, list):
+        # Lists only show up under schema-list keys (anyOf/oneOf/allOf), so
+        # every element is itself a schema.
+        return [_repair_schema(item, is_schema=True) for item in node]
+    if not isinstance(node, dict):
+        return node
+
+    # Walk the dict, deciding per-key whether recursion is into a schema
+    # node, a container map, or a scalar.
+    repaired: Dict[str, Any] = {}
+    for key, value in node.items():
+        if key in _SCHEMA_MAP_KEYS and isinstance(value, dict):
+            # Map of name → schema.  Don't treat the map itself as a schema
+            # (it has no type / properties of its own), but each value is.
+            repaired[key] = {
+                sub_key: _repair_schema(sub_val, is_schema=True)
+                for sub_key, sub_val in value.items()
+            }
+        elif key in _SCHEMA_LIST_KEYS and isinstance(value, list):
+            repaired[key] = [_repair_schema(v, is_schema=True) for v in value]
+        elif key in _SCHEMA_NODE_KEYS:
+            # items / not / additionalProperties: single nested schema.
+            # additionalProperties can also be a bool — leave those alone.
+            if isinstance(value, dict):
+                repaired[key] = _repair_schema(value, is_schema=True)
+            else:
+                repaired[key] = value
+        else:
+            # Scalars (description, title, format, enum values, etc.) pass through.
+            repaired[key] = value
+
+    if not is_schema:
+        return repaired
+
+    # Rule 2: when anyOf is present, type belongs only on the children.
+    if "anyOf" in repaired and isinstance(repaired["anyOf"], list):
+        repaired.pop("type", None)
+        return repaired
+
+    # Rule 1: property schemas without type need one.  $ref nodes are exempt
+    # — their type comes from the referenced definition.
+    if "$ref" in repaired:
+        return repaired
+    return _fill_missing_type(repaired)
+
+
+def _fill_missing_type(node: Dict[str, Any]) -> Dict[str, Any]:
+    """Infer a reasonable ``type`` if this schema node has none."""
+    if "type" in node and node["type"] not in (None, ""):
+        return node
+
+    # Heuristic: presence of ``properties`` → object, ``items`` → array, ``enum``
+    # → type of first enum value, else fall back to ``string`` (safest scalar).
+    if "properties" in node or "required" in node or "additionalProperties" in node:
+        inferred = "object"
+    elif "items" in node or "prefixItems" in node:
+        inferred = "array"
+    elif "enum" in node and isinstance(node["enum"], list) and node["enum"]:
+        sample = node["enum"][0]
+        if isinstance(sample, bool):
+            inferred = "boolean"
+        elif isinstance(sample, int):
+            inferred = "integer"
+        elif isinstance(sample, float):
+            inferred = "number"
+        else:
+            inferred = "string"
+    else:
+        inferred = "string"
+
+    return {**node, "type": inferred}
+
+
+def sanitize_moonshot_tool_parameters(parameters: Any) -> Dict[str, Any]:
+    """Normalize tool parameters to a Moonshot-compatible object schema.
+
+    Returns a deep-copied schema with the two flavored-JSON-Schema repairs
+    applied.  Input is not mutated.
+    """
+    if not isinstance(parameters, dict):
+        return {"type": "object", "properties": {}}
+
+    repaired = _repair_schema(copy.deepcopy(parameters), is_schema=True)
+    if not isinstance(repaired, dict):
+        return {"type": "object", "properties": {}}
+
+    # Top-level must be an object schema
+    if repaired.get("type") != "object":
+        repaired["type"] = "object"
+    if "properties" not in repaired:
+        repaired["properties"] = {}
+
+    return repaired
+
+
+def sanitize_moonshot_tools(tools: List[Dict[str, Any]]) -> List[Dict[str, Any]]:
+    """Apply ``sanitize_moonshot_tool_parameters`` to every tool's parameters."""
+    if not tools:
+        return tools
+
+    sanitized: List[Dict[str, Any]] = []
+    any_change = False
+    for tool in tools:
+        if not isinstance(tool, dict):
+            sanitized.append(tool)
+            continue
+        fn = tool.get("function")
+        if not isinstance(fn, dict):
+            sanitized.append(tool)
+            continue
+        params = fn.get("parameters")
+        repaired = sanitize_moonshot_tool_parameters(params)
+        if repaired is not params:
+            any_change = True
+            new_fn = {**fn, "parameters": repaired}
+            sanitized.append({**tool, "function": new_fn})
+        else:
+            sanitized.append(tool)
+
+    return sanitized if any_change else tools
+
+
+def is_moonshot_model(model: str | None) -> bool:
+    """True for any Kimi / Moonshot model slug, regardless of aggregator prefix.
+
+    Matches bare names (``kimi-k2.6``, ``moonshotai/Kimi-K2.6``) and aggregator-
+    prefixed slugs (``nous/moonshotai/kimi-k2.6``, ``openrouter/moonshotai/...``).
+    Detection by model name covers Nous / OpenRouter / other aggregators that
+    route to Moonshot's inference, where the base URL is the aggregator's, not
+    ``api.moonshot.ai``.
+    """
+    if not model:
+        return False
+    bare = model.strip().lower()
+    # Last path segment (covers aggregator-prefixed slugs)
+    tail = bare.rsplit("/", 1)[-1]
+    if tail.startswith("kimi-") or tail == "kimi":
+        return True
+    # Vendor-prefixed forms commonly used on aggregators
+    if "moonshot" in bare or "/kimi" in bare or bare.startswith("kimi"):
+        return True
+    return False
@@ -350,7 +350,13 @@ PLATFORM_HINTS = {
    ),
    "cli": (
        "You are a CLI AI Agent. Try not to use markdown but simple text "
-        "renderable inside a terminal."
+        "renderable inside a terminal. "
+        "File delivery: there is no attachment channel — the user reads your "
+        "response directly in their terminal. Do NOT emit MEDIA:/path tags "
+        "(those are only intercepted on messaging platforms like Telegram, "
+        "Discord, Slack, etc.; on the CLI they render as literal text). "
+        "When referring to a file you created or changed, just state its "
+        "absolute path in plain text; the user can open it from there."
    ),
    "sms": (
        "You are communicating via SMS. Keep responses concise and use plain text "
@@ -364,6 +370,32 @@ PLATFORM_HINTS = {
        "MEDIA:/absolute/path/to/file in your response. Images (.jpg, .png, "
        ".heic) appear as photos and other files arrive as attachments."
    ),
+    "mattermost": (
+        "You are in a Mattermost workspace communicating with your user. "
+        "Mattermost renders standard Markdown — headings, bold, italic, code "
+        "blocks, and tables all work. "
+        "You can send media files natively: include MEDIA:/absolute/path/to/file "
+        "in your response. Images (.jpg, .png, .webp) are uploaded as photo "
+        "attachments, audio and video as file attachments. "
+        "Image URLs in markdown format ![alt](url) are rendered as inline previews automatically."
+    ),
+    "matrix": (
+        "You are in a Matrix room communicating with your user. "
+        "Matrix renders Markdown — bold, italic, code blocks, and links work; "
+        "the adapter converts your Markdown to HTML for rich display. "
+        "You can send media files natively: include MEDIA:/absolute/path/to/file "
+        "in your response. Images (.jpg, .png, .webp) are sent as inline photos, "
+        "audio (.ogg, .mp3) as voice/audio messages, video (.mp4) inline, "
+        "and other files as downloadable attachments."
+    ),
+    "feishu": (
+        "You are in a Feishu (Lark) workspace communicating with your user. "
+        "Feishu renders Markdown in messages — bold, italic, code blocks, and "
+        "links are supported. "
+        "You can send media files natively: include MEDIA:/absolute/path/to/file "
+        "in your response. Images (.jpg, .png, .webp) are uploaded and displayed "
+        "inline, audio files as voice messages, and other files as attachments."
+    ),
    "weixin": (
        "You are on Weixin/WeChat. Markdown formatting is supported, so you may use it when "
        "it improves readability, but keep the message compact and chat-friendly. You can send media files natively: "
@@ -13,6 +13,48 @@ import re

 logger = logging.getLogger(__name__)

+# Sensitive query-string parameter names (case-insensitive exact match).
+# Ported from nearai/ironclaw#2529 — catches tokens whose values don't match
+# any known vendor prefix regex (e.g. opaque tokens, short OAuth codes).
+_SENSITIVE_QUERY_PARAMS = frozenset({
+    "access_token",
+    "refresh_token",
+    "id_token",
+    "token",
+    "api_key",
+    "apikey",
+    "client_secret",
+    "password",
+    "auth",
+    "jwt",
+    "session",
+    "secret",
+    "key",
+    "code",           # OAuth authorization codes
+    "signature",      # pre-signed URL signatures
+    "x-amz-signature",
+})
+
+# Sensitive form-urlencoded / JSON body key names (case-insensitive exact match).
+# Exact match, NOT substring — "token_count" and "session_id" must NOT match.
+# Ported from nearai/ironclaw#2529.
+_SENSITIVE_BODY_KEYS = frozenset({
+    "access_token",
+    "refresh_token",
+    "id_token",
+    "token",
+    "api_key",
+    "apikey",
+    "client_secret",
+    "password",
+    "auth",
+    "jwt",
+    "secret",
+    "private_key",
+    "authorization",
+    "key",
+})
+
 # Snapshot at import time so runtime env mutations (e.g. LLM-generated
 # `export HERMES_REDACT_SECRETS=false`) cannot disable redaction mid-session.
 _REDACT_ENABLED = os.getenv("HERMES_REDACT_SECRETS", "").lower() not in ("0", "false", "no", "off")
@@ -108,6 +150,30 @@ _DISCORD_MENTION_RE = re.compile(r"<@!?(\d{17,20})>")
 # Negative lookahead prevents matching hex strings or identifiers
 _SIGNAL_PHONE_RE = re.compile(r"(\+[1-9]\d{6,14})(?![A-Za-z0-9])")

+# URLs containing query strings — matches `scheme://...?...[# or end]`.
+# Used to scan text for URLs whose query params may contain secrets.
+# Ported from nearai/ironclaw#2529.
+_URL_WITH_QUERY_RE = re.compile(
+    r"(https?|wss?|ftp)://"          # scheme
+    r"([^\s/?#]+)"                    # authority (may include userinfo)
+    r"([^\s?#]*)"                     # path
+    r"\?([^\s#]+)"                    # query (required)
+    r"(#\S*)?",                       # optional fragment
+)
+
+# URLs containing userinfo — `scheme://user:password@host` for ANY scheme
+# (not just DB protocols already covered by _DB_CONNSTR_RE above).
+# Catches things like `https://user:token@api.example.com/v1/foo`.
+_URL_USERINFO_RE = re.compile(
+    r"(https?|wss?|ftp)://([^/\s:@]+):([^/\s@]+)@",
+)
+
+# Form-urlencoded body detection: conservative — only applies when the entire
+# text looks like a query string (k=v&k=v pattern with no newlines).
+_FORM_BODY_RE = re.compile(
+    r"^[A-Za-z_][A-Za-z0-9_.-]*=[^&\s]*(?:&[A-Za-z_][A-Za-z0-9_.-]*=[^&\s]*)+$"
+)
+
 # Compile known prefix patterns into one alternation
 _PREFIX_RE = re.compile(
    r"(?<![A-Za-z0-9_-])(" + "|".join(_PREFIX_PATTERNS) + r")(?![A-Za-z0-9_-])"
@@ -121,6 +187,72 @@ def _mask_token(token: str) -> str:
    return f"{token[:6]}...{token[-4:]}"


+def _redact_query_string(query: str) -> str:
+    """Redact sensitive parameter values in a URL query string.
+
+    Handles `k=v&k=v` format. Sensitive keys (case-insensitive) have values
+    replaced with `***`. Non-sensitive keys pass through unchanged.
+    Empty or malformed pairs are preserved as-is.
+    """
+    if not query:
+        return query
+    parts = []
+    for pair in query.split("&"):
+        if "=" not in pair:
+            parts.append(pair)
+            continue
+        key, _, value = pair.partition("=")
+        if key.lower() in _SENSITIVE_QUERY_PARAMS:
+            parts.append(f"{key}=***")
+        else:
+            parts.append(pair)
+    return "&".join(parts)
+
+
+def _redact_url_query_params(text: str) -> str:
+    """Scan text for URLs with query strings and redact sensitive params.
+
+    Catches opaque tokens that don't match vendor prefix regexes, e.g.
+    `https://example.com/cb?code=ABC123&state=xyz` → `...?code=***&state=xyz`.
+    """
+    def _sub(m: re.Match) -> str:
+        scheme = m.group(1)
+        authority = m.group(2)
+        path = m.group(3)
+        query = _redact_query_string(m.group(4))
+        fragment = m.group(5) or ""
+        return f"{scheme}://{authority}{path}?{query}{fragment}"
+    return _URL_WITH_QUERY_RE.sub(_sub, text)
+
+
+def _redact_url_userinfo(text: str) -> str:
+    """Strip `user:password@` from HTTP/WS/FTP URLs.
+
+    DB protocols (postgres, mysql, mongodb, redis, amqp) are handled
+    separately by `_DB_CONNSTR_RE`.
+    """
+    return _URL_USERINFO_RE.sub(
+        lambda m: f"{m.group(1)}://{m.group(2)}:***@",
+        text,
+    )
+
+
+def _redact_form_body(text: str) -> str:
+    """Redact sensitive values in a form-urlencoded body.
+
+    Only applies when the entire input looks like a pure form body
+    (k=v&k=v with no newlines, no other text). Single-line non-form
+    text passes through unchanged. This is a conservative pass — the
+    `_redact_url_query_params` function handles embedded query strings.
+    """
+    if not text or "\n" in text or "&" not in text:
+        return text
+    # The body-body form check is strict: only trigger on clean k=v&k=v.
+    if not _FORM_BODY_RE.match(text.strip()):
+        return text
+    return _redact_query_string(text.strip())
+
+
 def redact_sensitive_text(text: str) -> str:
    """Apply all redaction patterns to a block of text.

@@ -173,6 +305,16 @@ def redact_sensitive_text(text: str) -> str:
    # JWT tokens (eyJ... — base64-encoded JSON headers)
    text = _JWT_RE.sub(lambda m: _mask_token(m.group(0)), text)

+    # URL userinfo (http(s)://user:pass@host) — redact for non-DB schemes.
+    # DB schemes are handled above by _DB_CONNSTR_RE.
+    text = _redact_url_userinfo(text)
+
+    # URL query params containing opaque tokens (?access_token=…&code=…)
+    text = _redact_url_query_params(text)
+
+    # Form-urlencoded bodies (only triggers on clean k=v&k=v inputs).
+    text = _redact_form_body(text)
+
    # Discord user/role mentions (<@snowflake_id>)
    text = _DISCORD_MENTION_RE.sub(lambda m: f"<@{'!' if '!' in m.group(0) else ''}***>", text)

@@ -0,0 +1,831 @@
+"""
+Shell-script hooks bridge.
+
+Reads the ``hooks:`` block from ``cli-config.yaml``, prompts the user for
+consent on first use of each ``(event, command)`` pair, and registers
+callbacks on the existing plugin hook manager so every existing
+``invoke_hook()`` site dispatches to the configured shell scripts — with
+zero changes to call sites.
+
+Design notes
+------------
+* Python plugins and shell hooks compose naturally: both flow through
+  :func:`hermes_cli.plugins.invoke_hook` and its aggregators.  Python
+  plugins are registered first (via ``discover_and_load()``) so their
+  block decisions win ties over shell-hook blocks.
+* Subprocess execution uses ``shlex.split(os.path.expanduser(command))``
+  with ``shell=False`` — no shell injection footguns.  Users that need
+  pipes/redirection wrap their logic in a script.
+* First-use consent is gated by the allowlist under
+  ``~/.hermes/shell-hooks-allowlist.json``.  Non-TTY callers must pass
+  ``accept_hooks=True`` (resolved from ``--accept-hooks``,
+  ``HERMES_ACCEPT_HOOKS``, or ``hooks_auto_accept: true`` in config)
+  for registration to succeed without a prompt.
+* Registration is idempotent — safe to invoke from both the CLI entry
+  point (``hermes_cli/main.py``) and the gateway entry point
+  (``gateway/run.py``).
+
+Wire protocol
+-------------
+**stdin** (JSON, piped to the script)::
+
+    {
+        "hook_event_name": "pre_tool_call",
+        "tool_name":       "terminal",
+        "tool_input":      {"command": "rm -rf /"},
+        "session_id":      "sess_abc123",
+        "cwd":             "/home/user/project",
+        "extra":           {...}   # event-specific kwargs
+    }
+
+**stdout** (JSON, optional — anything else is ignored)::
+
+    # Block a pre_tool_call (either shape accepted; normalised internally):
+    {"decision": "block", "reason":  "Forbidden command"}   # Claude-Code-style
+    {"action":   "block", "message": "Forbidden command"}   # Hermes-canonical
+
+    # Inject context for pre_llm_call:
+    {"context": "Today is Friday"}
+
+    # Silent no-op:
+    <empty or any non-matching JSON object>
+"""
+
+from __future__ import annotations
+
+import difflib
+import json
+import logging
+import os
+import re
+import shlex
+import subprocess
+import sys
+import tempfile
+import threading
+import time
+from contextlib import contextmanager
+from dataclasses import dataclass, field
+from datetime import datetime, timezone
+from pathlib import Path
+from typing import Any, Callable, Dict, Iterator, List, Optional, Set, Tuple
+
+try:
+    import fcntl  # POSIX only; Windows falls back to best-effort without flock.
+except ImportError:  # pragma: no cover
+    fcntl = None  # type: ignore[assignment]
+
+from hermes_constants import get_hermes_home
+
+logger = logging.getLogger(__name__)
+
+DEFAULT_TIMEOUT_SECONDS = 60
+MAX_TIMEOUT_SECONDS = 300
+ALLOWLIST_FILENAME = "shell-hooks-allowlist.json"
+
+# (event, matcher, command) triples that have been wired to the plugin
+# manager in the current process.  Matcher is part of the key because
+# the same script can legitimately register for different matchers under
+# the same event (e.g. one entry per tool the user wants to gate).
+# Second registration attempts for the exact same triple become no-ops
+# so the CLI and gateway can both call register_from_config() safely.
+_registered: Set[Tuple[str, Optional[str], str]] = set()
+_registered_lock = threading.Lock()
+
+# Intra-process lock for allowlist read-modify-write on platforms that
+# lack ``fcntl`` (non-POSIX).  Kept separate from ``_registered_lock``
+# because ``register_from_config`` already holds ``_registered_lock`` when
+# it triggers ``_record_approval`` — reusing it here would self-deadlock
+# (``threading.Lock`` is non-reentrant).  POSIX callers use the sibling
+# ``.lock`` file via ``fcntl.flock`` and bypass this.
+_allowlist_write_lock = threading.Lock()
+
+
+@dataclass
+class ShellHookSpec:
+    """Parsed and validated representation of a single ``hooks:`` entry."""
+
+    event: str
+    command: str
+    matcher: Optional[str] = None
+    timeout: int = DEFAULT_TIMEOUT_SECONDS
+    compiled_matcher: Optional[re.Pattern] = field(default=None, repr=False)
+
+    def __post_init__(self) -> None:
+        # Strip whitespace introduced by YAML quirks (e.g. multi-line string
+        # folding) — a matcher of " terminal" would otherwise silently fail
+        # to match "terminal" without any diagnostic.
+        if isinstance(self.matcher, str):
+            stripped = self.matcher.strip()
+            self.matcher = stripped if stripped else None
+        if self.matcher:
+            try:
+                self.compiled_matcher = re.compile(self.matcher)
+            except re.error as exc:
+                logger.warning(
+                    "shell hook matcher %r is invalid (%s) — treating as "
+                    "literal equality", self.matcher, exc,
+                )
+                self.compiled_matcher = None
+
+    def matches_tool(self, tool_name: Optional[str]) -> bool:
+        if not self.matcher:
+            return True
+        if tool_name is None:
+            return False
+        if self.compiled_matcher is not None:
+            return self.compiled_matcher.fullmatch(tool_name) is not None
+        # compiled_matcher is None only when the regex failed to compile,
+        # in which case we already warned and fall back to literal equality.
+        return tool_name == self.matcher
+
+
+# ---------------------------------------------------------------------------
+# Public API
+# ---------------------------------------------------------------------------
+
+def register_from_config(
+    cfg: Optional[Dict[str, Any]],
+    *,
+    accept_hooks: bool = False,
+) -> List[ShellHookSpec]:
+    """Register every configured shell hook on the plugin manager.
+
+    ``cfg`` is the full parsed config dict (``hermes_cli.config.load_config``
+    output).  The ``hooks:`` key is read out of it.  Missing, empty, or
+    non-dict ``hooks`` is treated as zero configured hooks.
+
+    ``accept_hooks=True`` skips the TTY consent prompt — the caller is
+    promising that the user has opted in via a flag, env var, or config
+    setting.  ``HERMES_ACCEPT_HOOKS=1`` and ``hooks_auto_accept: true`` are
+    also honored inside this function so either CLI or gateway call sites
+    pick them up.
+
+    Returns the list of :class:`ShellHookSpec` entries that ended up wired
+    up on the plugin manager.  Skipped entries (unknown events, malformed,
+    not allowlisted, already registered) are logged but not returned.
+    """
+    if not isinstance(cfg, dict):
+        return []
+
+    effective_accept = _resolve_effective_accept(cfg, accept_hooks)
+
+    specs = _parse_hooks_block(cfg.get("hooks"))
+    if not specs:
+        return []
+
+    registered: List[ShellHookSpec] = []
+
+    # Import lazily — avoids circular imports at module-load time.
+    from hermes_cli.plugins import get_plugin_manager
+
+    manager = get_plugin_manager()
+
+    # Idempotence + allowlist read happen under the lock; the TTY
+    # prompt runs outside so other threads aren't parked on a blocking
+    # input().  Mutation re-takes the lock with a defensive idempotence
+    # re-check in case two callers ever race through the prompt.
+    for spec in specs:
+        key = (spec.event, spec.matcher, spec.command)
+        with _registered_lock:
+            if key in _registered:
+                continue
+            already_allowlisted = _is_allowlisted(spec.event, spec.command)
+
+        if not already_allowlisted:
+            if not _prompt_and_record(
+                spec.event, spec.command, accept_hooks=effective_accept,
+            ):
+                logger.warning(
+                    "shell hook for %s (%s) not allowlisted — skipped. "
+                    "Use --accept-hooks / HERMES_ACCEPT_HOOKS=1 / "
+                    "hooks_auto_accept: true, or approve at the TTY "
+                    "prompt next run.",
+                    spec.event, spec.command,
+                )
+                continue
+
+        with _registered_lock:
+            if key in _registered:
+                continue
+            manager._hooks.setdefault(spec.event, []).append(_make_callback(spec))
+            _registered.add(key)
+            registered.append(spec)
+            logger.info(
+                "shell hook registered: %s -> %s (matcher=%s, timeout=%ds)",
+                spec.event, spec.command, spec.matcher, spec.timeout,
+            )
+
+    return registered
+
+
+def iter_configured_hooks(cfg: Optional[Dict[str, Any]]) -> List[ShellHookSpec]:
+    """Return the parsed ``ShellHookSpec`` entries from config without
+    registering anything.  Used by ``hermes hooks list`` and ``doctor``."""
+    if not isinstance(cfg, dict):
+        return []
+    return _parse_hooks_block(cfg.get("hooks"))
+
+
+def reset_for_tests() -> None:
+    """Clear the idempotence set.  Test-only helper."""
+    with _registered_lock:
+        _registered.clear()
+
+
+# ---------------------------------------------------------------------------
+# Config parsing
+# ---------------------------------------------------------------------------
+
+def _parse_hooks_block(hooks_cfg: Any) -> List[ShellHookSpec]:
+    """Normalise the ``hooks:`` dict into a flat list of ``ShellHookSpec``.
+
+    Malformed entries warn-and-skip — we never raise from config parsing
+    because a broken hook must not crash the agent.
+    """
+    from hermes_cli.plugins import VALID_HOOKS
+
+    if not isinstance(hooks_cfg, dict):
+        return []
+
+    specs: List[ShellHookSpec] = []
+
+    for event_name, entries in hooks_cfg.items():
+        if event_name not in VALID_HOOKS:
+            suggestion = difflib.get_close_matches(
+                str(event_name), VALID_HOOKS, n=1, cutoff=0.6,
+            )
+            if suggestion:
+                logger.warning(
+                    "unknown hook event %r in hooks: config — did you mean %r?",
+                    event_name, suggestion[0],
+                )
+            else:
+                logger.warning(
+                    "unknown hook event %r in hooks: config (valid: %s)",
+                    event_name, ", ".join(sorted(VALID_HOOKS)),
+                )
+            continue
+
+        if entries is None:
+            continue
+
+        if not isinstance(entries, list):
+            logger.warning(
+                "hooks.%s must be a list of hook definitions; got %s",
+                event_name, type(entries).__name__,
+            )
+            continue
+
+        for i, raw in enumerate(entries):
+            spec = _parse_single_entry(event_name, i, raw)
+            if spec is not None:
+                specs.append(spec)
+
+    return specs
+
+
+def _parse_single_entry(
+    event: str, index: int, raw: Any,
+) -> Optional[ShellHookSpec]:
+    if not isinstance(raw, dict):
+        logger.warning(
+            "hooks.%s[%d] must be a mapping with a 'command' key; got %s",
+            event, index, type(raw).__name__,
+        )
+        return None
+
+    command = raw.get("command")
+    if not isinstance(command, str) or not command.strip():
+        logger.warning(
+            "hooks.%s[%d] is missing a non-empty 'command' field",
+            event, index,
+        )
+        return None
+
+    matcher = raw.get("matcher")
+    if matcher is not None and not isinstance(matcher, str):
+        logger.warning(
+            "hooks.%s[%d].matcher must be a string regex; ignoring",
+            event, index,
+        )
+        matcher = None
+
+    if matcher is not None and event not in ("pre_tool_call", "post_tool_call"):
+        logger.warning(
+            "hooks.%s[%d].matcher=%r will be ignored at runtime — the "
+            "matcher field is only honored for pre_tool_call / "
+            "post_tool_call.  The hook will fire on every %s event.",
+            event, index, matcher, event,
+        )
+        matcher = None
+
+    timeout_raw = raw.get("timeout", DEFAULT_TIMEOUT_SECONDS)
+    try:
+        timeout = int(timeout_raw)
+    except (TypeError, ValueError):
+        logger.warning(
+            "hooks.%s[%d].timeout must be an int (got %r); using default %ds",
+            event, index, timeout_raw, DEFAULT_TIMEOUT_SECONDS,
+        )
+        timeout = DEFAULT_TIMEOUT_SECONDS
+
+    if timeout < 1:
+        logger.warning(
+            "hooks.%s[%d].timeout must be >=1; using default %ds",
+            event, index, DEFAULT_TIMEOUT_SECONDS,
+        )
+        timeout = DEFAULT_TIMEOUT_SECONDS
+
+    if timeout > MAX_TIMEOUT_SECONDS:
+        logger.warning(
+            "hooks.%s[%d].timeout=%ds exceeds max %ds; clamping",
+            event, index, timeout, MAX_TIMEOUT_SECONDS,
+        )
+        timeout = MAX_TIMEOUT_SECONDS
+
+    return ShellHookSpec(
+        event=event,
+        command=command.strip(),
+        matcher=matcher,
+        timeout=timeout,
+    )
+
+
+# ---------------------------------------------------------------------------
+# Subprocess callback
+# ---------------------------------------------------------------------------
+
+_TOP_LEVEL_PAYLOAD_KEYS = {"tool_name", "args", "session_id", "parent_session_id"}
+
+
+def _spawn(spec: ShellHookSpec, stdin_json: str) -> Dict[str, Any]:
+    """Run ``spec.command`` as a subprocess with ``stdin_json`` on stdin.
+
+    Returns a diagnostic dict with the same keys for every outcome
+    (``returncode``, ``stdout``, ``stderr``, ``timed_out``,
+    ``elapsed_seconds``, ``error``).  This is the single place the
+    subprocess is actually invoked — both the live callback path
+    (:func:`_make_callback`) and the CLI test helper (:func:`run_once`)
+    go through it.
+    """
+    result: Dict[str, Any] = {
+        "returncode": None,
+        "stdout": "",
+        "stderr": "",
+        "timed_out": False,
+        "elapsed_seconds": 0.0,
+        "error": None,
+    }
+    try:
+        argv = shlex.split(os.path.expanduser(spec.command))
+    except ValueError as exc:
+        result["error"] = f"command {spec.command!r} cannot be parsed: {exc}"
+        return result
+    if not argv:
+        result["error"] = "empty command"
+        return result
+
+    t0 = time.monotonic()
+    try:
+        proc = subprocess.run(
+            argv,
+            input=stdin_json,
+            capture_output=True,
+            timeout=spec.timeout,
+            text=True,
+            shell=False,
+        )
+    except subprocess.TimeoutExpired:
+        result["timed_out"] = True
+        result["elapsed_seconds"] = round(time.monotonic() - t0, 3)
+        return result
+    except FileNotFoundError:
+        result["error"] = "command not found"
+        return result
+    except PermissionError:
+        result["error"] = "command not executable"
+        return result
+    except Exception as exc:  # pragma: no cover — defensive
+        result["error"] = str(exc)
+        return result
+
+    result["returncode"] = proc.returncode
+    result["stdout"] = proc.stdout or ""
+    result["stderr"] = proc.stderr or ""
+    result["elapsed_seconds"] = round(time.monotonic() - t0, 3)
+    return result
+
+
+def _make_callback(spec: ShellHookSpec) -> Callable[..., Optional[Dict[str, Any]]]:
+    """Build the closure that ``invoke_hook()`` will call per firing."""
+
+    def _callback(**kwargs: Any) -> Optional[Dict[str, Any]]:
+        # Matcher gate — only meaningful for tool-scoped events.
+        if spec.event in ("pre_tool_call", "post_tool_call"):
+            if not spec.matches_tool(kwargs.get("tool_name")):
+                return None
+
+        r = _spawn(spec, _serialize_payload(spec.event, kwargs))
+
+        if r["error"]:
+            logger.warning(
+                "shell hook failed (event=%s command=%s): %s",
+                spec.event, spec.command, r["error"],
+            )
+            return None
+        if r["timed_out"]:
+            logger.warning(
+                "shell hook timed out after %.2fs (event=%s command=%s)",
+                r["elapsed_seconds"], spec.event, spec.command,
+            )
+            return None
+
+        stderr = r["stderr"].strip()
+        if stderr:
+            logger.debug(
+                "shell hook stderr (event=%s command=%s): %s",
+                spec.event, spec.command, stderr[:400],
+            )
+        # Non-zero exits: log but still parse stdout so scripts that
+        # signal failure via exit code can also return a block directive.
+        if r["returncode"] != 0:
+            logger.warning(
+                "shell hook exited %d (event=%s command=%s); stderr=%s",
+                r["returncode"], spec.event, spec.command, stderr[:400],
+            )
+        return _parse_response(spec.event, r["stdout"])
+
+    _callback.__name__ = f"shell_hook[{spec.event}:{spec.command}]"
+    _callback.__qualname__ = _callback.__name__
+    return _callback
+
+
+def _serialize_payload(event: str, kwargs: Dict[str, Any]) -> str:
+    """Render the stdin JSON payload.  Unserialisable values are
+    stringified via ``default=str`` rather than dropped."""
+    extras = {k: v for k, v in kwargs.items() if k not in _TOP_LEVEL_PAYLOAD_KEYS}
+    try:
+        cwd = str(Path.cwd())
+    except OSError:
+        cwd = ""
+    payload = {
+        "hook_event_name": event,
+        "tool_name": kwargs.get("tool_name"),
+        "tool_input": kwargs.get("args") if isinstance(kwargs.get("args"), dict) else None,
+        "session_id": kwargs.get("session_id") or kwargs.get("parent_session_id") or "",
+        "cwd": cwd,
+        "extra": extras,
+    }
+    return json.dumps(payload, ensure_ascii=False, default=str)
+
+
+def _parse_response(event: str, stdout: str) -> Optional[Dict[str, Any]]:
+    """Translate stdout JSON into a Hermes wire-shape dict.
+
+    For ``pre_tool_call`` the Claude-Code-style ``{"decision": "block",
+    "reason": "..."}`` payload is translated into the canonical Hermes
+    ``{"action": "block", "message": "..."}`` shape expected by
+    :func:`hermes_cli.plugins.get_pre_tool_call_block_message`.  This is
+    the single most important correctness invariant in this module —
+    skipping the translation silently breaks every ``pre_tool_call``
+    block directive.
+
+    For ``pre_llm_call``, ``{"context": "..."}`` is passed through
+    unchanged to match the existing plugin-hook contract.
+
+    Anything else returns ``None``.
+    """
+    stdout = (stdout or "").strip()
+    if not stdout:
+        return None
+
+    try:
+        data = json.loads(stdout)
+    except json.JSONDecodeError:
+        logger.warning(
+            "shell hook stdout was not valid JSON (event=%s): %s",
+            event, stdout[:200],
+        )
+        return None
+
+    if not isinstance(data, dict):
+        return None
+
+    if event == "pre_tool_call":
+        if data.get("action") == "block":
+            message = data.get("message") or data.get("reason") or ""
+            if isinstance(message, str) and message:
+                return {"action": "block", "message": message}
+        if data.get("decision") == "block":
+            message = data.get("reason") or data.get("message") or ""
+            if isinstance(message, str) and message:
+                return {"action": "block", "message": message}
+        return None
+
+    context = data.get("context")
+    if isinstance(context, str) and context.strip():
+        return {"context": context}
+
+    return None
+
+
+# ---------------------------------------------------------------------------
+# Allowlist / consent
+# ---------------------------------------------------------------------------
+
+def allowlist_path() -> Path:
+    """Path to the per-user shell-hook allowlist file."""
+    return get_hermes_home() / ALLOWLIST_FILENAME
+
+
+def load_allowlist() -> Dict[str, Any]:
+    """Return the parsed allowlist, or an empty skeleton if absent."""
+    try:
+        raw = json.loads(allowlist_path().read_text())
+    except (FileNotFoundError, json.JSONDecodeError, OSError):
+        return {"approvals": []}
+    if not isinstance(raw, dict):
+        return {"approvals": []}
+    approvals = raw.get("approvals")
+    if not isinstance(approvals, list):
+        raw["approvals"] = []
+    return raw
+
+
+def save_allowlist(data: Dict[str, Any]) -> None:
+    """Atomically persist the allowlist via per-process ``mkstemp`` +
+    ``os.replace``.  Cross-process read-modify-write races are handled
+    by :func:`_locked_update_approvals` (``fcntl.flock``).  On OSError
+    the failure is logged; the in-process hook still registers but
+    the approval won't survive across runs."""
+    p = allowlist_path()
+    try:
+        p.parent.mkdir(parents=True, exist_ok=True)
+        fd, tmp_path = tempfile.mkstemp(
+            prefix=f"{p.name}.", suffix=".tmp", dir=str(p.parent),
+        )
+        try:
+            with os.fdopen(fd, "w") as fh:
+                fh.write(json.dumps(data, indent=2, sort_keys=True))
+            os.replace(tmp_path, p)
+        except Exception:
+            try:
+                os.unlink(tmp_path)
+            except OSError:
+                pass
+            raise
+    except OSError as exc:
+        logger.warning(
+            "Failed to persist shell hook allowlist to %s: %s. "
+            "The approval is in-memory for this run, but the next "
+            "startup will re-prompt (or skip registration on non-TTY "
+            "runs without --accept-hooks / HERMES_ACCEPT_HOOKS).",
+            p, exc,
+        )
+
+
+def _is_allowlisted(event: str, command: str) -> bool:
+    data = load_allowlist()
+    return any(
+        isinstance(e, dict)
+        and e.get("event") == event
+        and e.get("command") == command
+        for e in data.get("approvals", [])
+    )
+
+
+@contextmanager
+def _locked_update_approvals() -> Iterator[Dict[str, Any]]:
+    """Serialise read-modify-write on the allowlist across processes.
+
+    Holds an exclusive ``flock`` on a sibling lock file for the duration
+    of the update so concurrent ``_record_approval``/``revoke`` callers
+    cannot clobber each other's changes (the race Codex reproduced with
+    20–50 simultaneous writers).  Falls back to an in-process lock on
+    platforms without ``fcntl``.
+    """
+    p = allowlist_path()
+    p.parent.mkdir(parents=True, exist_ok=True)
+    lock_path = p.with_suffix(p.suffix + ".lock")
+
+    if fcntl is None:  # pragma: no cover — non-POSIX fallback
+        with _allowlist_write_lock:
+            data = load_allowlist()
+            yield data
+            save_allowlist(data)
+        return
+
+    with open(lock_path, "a+") as lock_fh:
+        fcntl.flock(lock_fh.fileno(), fcntl.LOCK_EX)
+        try:
+            data = load_allowlist()
+            yield data
+            save_allowlist(data)
+        finally:
+            fcntl.flock(lock_fh.fileno(), fcntl.LOCK_UN)
+
+
+def _prompt_and_record(
+    event: str, command: str, *, accept_hooks: bool,
+) -> bool:
+    """Decide whether to approve an unseen ``(event, command)`` pair.
+    Returns ``True`` iff the approval was granted and recorded.
+    """
+    if accept_hooks:
+        _record_approval(event, command)
+        logger.info(
+            "shell hook auto-approved via --accept-hooks / env / config: "
+            "%s -> %s", event, command,
+        )
+        return True
+
+    if not sys.stdin.isatty():
+        return False
+
+    print(
+        f"\n⚠ Hermes is about to register a shell hook that will run a\n"
+        f"  command on your behalf.\n\n"
+        f"    Event:   {event}\n"
+        f"    Command: {command}\n\n"
+        f"  Commands run with your full user credentials.  Only approve\n"
+        f"  commands you trust."
+    )
+    try:
+        answer = input("Allow this hook to run? [y/N]: ").strip().lower()
+    except (EOFError, KeyboardInterrupt):
+        print()  # keep the terminal tidy after ^C
+        return False
+
+    if answer in ("y", "yes"):
+        _record_approval(event, command)
+        return True
+
+    return False
+
+
+def _record_approval(event: str, command: str) -> None:
+    entry = {
+        "event": event,
+        "command": command,
+        "approved_at": _utc_now_iso(),
+        "script_mtime_at_approval": script_mtime_iso(command),
+    }
+    with _locked_update_approvals() as data:
+        data["approvals"] = [
+            e for e in data.get("approvals", [])
+            if not (
+                isinstance(e, dict)
+                and e.get("event") == event
+                and e.get("command") == command
+            )
+        ] + [entry]
+
+
+def _utc_now_iso() -> str:
+    return datetime.now(tz=timezone.utc).isoformat().replace("+00:00", "Z")
+
+
+def revoke(command: str) -> int:
+    """Remove every allowlist entry matching ``command``.
+
+    Returns the number of entries removed.  Does not unregister any
+    callbacks that are already live on the plugin manager in the current
+    process — restart the CLI / gateway to drop them.
+    """
+    with _locked_update_approvals() as data:
+        before = len(data.get("approvals", []))
+        data["approvals"] = [
+            e for e in data.get("approvals", [])
+            if not (isinstance(e, dict) and e.get("command") == command)
+        ]
+        after = len(data["approvals"])
+    return before - after
+
+
+_SCRIPT_EXTENSIONS: Tuple[str, ...] = (
+    ".sh", ".bash", ".zsh", ".fish",
+    ".py", ".pyw",
+    ".rb", ".pl", ".lua",
+    ".js", ".mjs", ".cjs", ".ts",
+)
+
+
+def _command_script_path(command: str) -> str:
+    """Return the script path from ``command`` for doctor / drift checks.
+
+    Prefers a token ending in a known script extension, then a token
+    containing ``/`` or leading ``~``, then the first token.  Handles
+    ``python3 /path/hook.py``, ``/usr/bin/env bash hook.sh``, and the
+    common bare-path form.
+    """
+    try:
+        parts = shlex.split(command)
+    except ValueError:
+        return command
+    if not parts:
+        return command
+    for part in parts:
+        if part.lower().endswith(_SCRIPT_EXTENSIONS):
+            return part
+    for part in parts:
+        if "/" in part or part.startswith("~"):
+            return part
+    return parts[0]
+
+
+# ---------------------------------------------------------------------------
+# Helpers for accept-hooks resolution
+# ---------------------------------------------------------------------------
+
+def _resolve_effective_accept(
+    cfg: Dict[str, Any], accept_hooks_arg: bool,
+) -> bool:
+    """Combine all three opt-in channels into a single boolean.
+
+    Precedence (any truthy source flips us on):
+      1. ``--accept-hooks`` flag (CLI) / explicit argument
+      2. ``HERMES_ACCEPT_HOOKS`` env var
+      3. ``hooks_auto_accept: true`` in ``cli-config.yaml``
+    """
+    if accept_hooks_arg:
+        return True
+    env = os.environ.get("HERMES_ACCEPT_HOOKS", "").strip().lower()
+    if env in ("1", "true", "yes", "on"):
+        return True
+    cfg_val = cfg.get("hooks_auto_accept", False)
+    return bool(cfg_val)
+
+
+# ---------------------------------------------------------------------------
+# Introspection (used by `hermes hooks` CLI)
+# ---------------------------------------------------------------------------
+
+def allowlist_entry_for(event: str, command: str) -> Optional[Dict[str, Any]]:
+    """Return the allowlist record for this pair, if any."""
+    for e in load_allowlist().get("approvals", []):
+        if (
+            isinstance(e, dict)
+            and e.get("event") == event
+            and e.get("command") == command
+        ):
+            return e
+    return None
+
+
+def script_mtime_iso(command: str) -> Optional[str]:
+    """ISO-8601 mtime of the resolved script path, or ``None`` if the
+    script is missing."""
+    path = _command_script_path(command)
+    if not path:
+        return None
+    try:
+        expanded = os.path.expanduser(path)
+        return datetime.fromtimestamp(
+            os.path.getmtime(expanded), tz=timezone.utc,
+        ).isoformat().replace("+00:00", "Z")
+    except OSError:
+        return None
+
+
+def script_is_executable(command: str) -> bool:
+    """Return ``True`` iff ``command`` is runnable as configured.
+
+    For a bare invocation (``/path/hook.sh``) the script itself must be
+    executable.  For interpreter-prefixed commands (``python3
+    /path/hook.py``, ``/usr/bin/env bash hook.sh``) the script just has
+    to be readable — the interpreter doesn't care about the ``X_OK``
+    bit.  Mirrors what ``_spawn`` would actually do at runtime."""
+    path = _command_script_path(command)
+    if not path:
+        return False
+    expanded = os.path.expanduser(path)
+    if not os.path.isfile(expanded):
+        return False
+    try:
+        argv = shlex.split(command)
+    except ValueError:
+        return False
+    is_bare_invocation = bool(argv) and argv[0] == path
+    required = os.X_OK if is_bare_invocation else os.R_OK
+    return os.access(expanded, required)
+
+
+def run_once(
+    spec: ShellHookSpec, kwargs: Dict[str, Any],
+) -> Dict[str, Any]:
+    """Fire a single shell-hook invocation with a synthetic payload.
+    Used by ``hermes hooks test`` and ``hermes hooks doctor``.
+
+    ``kwargs`` is the same dict that :func:`hermes_cli.plugins.invoke_hook`
+    would pass at runtime.  It is routed through :func:`_serialize_payload`
+    so the synthetic stdin exactly matches what a real hook firing would
+    produce — otherwise scripts tested via ``hermes hooks test`` could
+    diverge silently from production behaviour.
+
+    Returns the :func:`_spawn` diagnostic dict plus a ``parsed`` field
+    holding the canonical Hermes-wire-shape response."""
+    stdin_json = _serialize_payload(spec.event, kwargs)
+    result = _spawn(spec, stdin_json)
+    result["parsed"] = _parse_response(spec.event, result["stdout"])
+    return result
@@ -8,6 +8,7 @@ can invoke skills via /skill-name commands and prompt-only built-ins like
 import json
 import logging
 import re
+import subprocess
 from datetime import datetime
 from pathlib import Path
 from typing import Any, Dict, Optional
@@ -22,6 +23,110 @@ _PLAN_SLUG_RE = re.compile(r"[^a-z0-9]+")
 _SKILL_INVALID_CHARS = re.compile(r"[^a-z0-9-]")
 _SKILL_MULTI_HYPHEN = re.compile(r"-{2,}")

+# Matches ${HERMES_SKILL_DIR} / ${HERMES_SESSION_ID} tokens in SKILL.md.
+# Tokens that don't resolve (e.g. ${HERMES_SESSION_ID} with no session) are
+# left as-is so the user can debug them.
+_SKILL_TEMPLATE_RE = re.compile(r"\$\{(HERMES_SKILL_DIR|HERMES_SESSION_ID)\}")
+
+# Matches inline shell snippets like:  !`date +%Y-%m-%d`
+# Non-greedy, single-line only — no newlines inside the backticks.
+_INLINE_SHELL_RE = re.compile(r"!`([^`\n]+)`")
+
+# Cap inline-shell output so a runaway command can't blow out the context.
+_INLINE_SHELL_MAX_OUTPUT = 4000
+
+
+def _load_skills_config() -> dict:
+    """Load the ``skills`` section of config.yaml (best-effort)."""
+    try:
+        from hermes_cli.config import load_config
+
+        cfg = load_config() or {}
+        skills_cfg = cfg.get("skills")
+        if isinstance(skills_cfg, dict):
+            return skills_cfg
+    except Exception:
+        logger.debug("Could not read skills config", exc_info=True)
+    return {}
+
+
+def _substitute_template_vars(
+    content: str,
+    skill_dir: Path | None,
+    session_id: str | None,
+) -> str:
+    """Replace ${HERMES_SKILL_DIR} / ${HERMES_SESSION_ID} in skill content.
+
+    Only substitutes tokens for which a concrete value is available —
+    unresolved tokens are left in place so the author can spot them.
+    """
+    if not content:
+        return content
+
+    skill_dir_str = str(skill_dir) if skill_dir else None
+
+    def _replace(match: re.Match) -> str:
+        token = match.group(1)
+        if token == "HERMES_SKILL_DIR" and skill_dir_str:
+            return skill_dir_str
+        if token == "HERMES_SESSION_ID" and session_id:
+            return str(session_id)
+        return match.group(0)
+
+    return _SKILL_TEMPLATE_RE.sub(_replace, content)
+
+
+def _run_inline_shell(command: str, cwd: Path | None, timeout: int) -> str:
+    """Execute a single inline-shell snippet and return its stdout (trimmed).
+
+    Failures return a short ``[inline-shell error: ...]`` marker instead of
+    raising, so one bad snippet can't wreck the whole skill message.
+    """
+    try:
+        completed = subprocess.run(
+            ["bash", "-c", command],
+            cwd=str(cwd) if cwd else None,
+            capture_output=True,
+            text=True,
+            timeout=max(1, int(timeout)),
+            check=False,
+        )
+    except subprocess.TimeoutExpired:
+        return f"[inline-shell timeout after {timeout}s: {command}]"
+    except FileNotFoundError:
+        return f"[inline-shell error: bash not found]"
+    except Exception as exc:
+        return f"[inline-shell error: {exc}]"
+
+    output = (completed.stdout or "").rstrip("\n")
+    if not output and completed.stderr:
+        output = completed.stderr.rstrip("\n")
+    if len(output) > _INLINE_SHELL_MAX_OUTPUT:
+        output = output[:_INLINE_SHELL_MAX_OUTPUT] + "…[truncated]"
+    return output
+
+
+def _expand_inline_shell(
+    content: str,
+    skill_dir: Path | None,
+    timeout: int,
+) -> str:
+    """Replace every !`cmd` snippet in ``content`` with its stdout.
+
+    Runs each snippet with the skill directory as CWD so relative paths in
+    the snippet work the way the author expects.
+    """
+    if "!`" not in content:
+        return content
+
+    def _replace(match: re.Match) -> str:
+        cmd = match.group(1).strip()
+        if not cmd:
+            return ""
+        return _run_inline_shell(cmd, skill_dir, timeout)
+
+    return _INLINE_SHELL_RE.sub(_replace, content)
+

 def build_plan_path(
    user_instruction: str = "",
@@ -133,14 +238,36 @@ def _build_skill_message(
    activation_note: str,
    user_instruction: str = "",
    runtime_note: str = "",
+    session_id: str | None = None,
 ) -> str:
    """Format a loaded skill into a user/system message payload."""
    from tools.skills_tool import SKILLS_DIR

    content = str(loaded_skill.get("content") or "")

+    # ── Template substitution and inline-shell expansion ──
+    # Done before anything else so downstream blocks (setup notes,
+    # supporting-file hints) see the expanded content.
+    skills_cfg = _load_skills_config()
+    if skills_cfg.get("template_vars", True):
+        content = _substitute_template_vars(content, skill_dir, session_id)
+    if skills_cfg.get("inline_shell", False):
+        timeout = int(skills_cfg.get("inline_shell_timeout", 10) or 10)
+        content = _expand_inline_shell(content, skill_dir, timeout)
+
    parts = [activation_note, "", content.strip()]

+    # ── Inject the absolute skill directory so the agent can reference
+    #    bundled scripts without an extra skill_view() round-trip. ──
+    if skill_dir:
+        parts.append("")
+        parts.append(f"[Skill directory: {skill_dir}]")
+        parts.append(
+            "Resolve any relative paths in this skill (e.g. `scripts/foo.js`, "
+            "`templates/config.yaml`) against that directory, then run them "
+            "with the terminal tool using the absolute path."
+        )
+
    # ── Inject resolved skill config values ──
    _inject_skill_config(loaded_skill, parts)

@@ -188,11 +315,13 @@ def _build_skill_message(
            # Skill is from an external dir — use the skill name instead
            skill_view_target = skill_dir.name
        parts.append("")
-        parts.append("[This skill has supporting files you can load with the skill_view tool:]")
+        parts.append("[This skill has supporting files:]")
        for sf in supporting:
-            parts.append(f"- {sf}")
+            parts.append(f"- {sf}  ->  {skill_dir / sf}")
        parts.append(
-            f'\nTo view any of these, use: skill_view(name="{skill_view_target}", file_path="<path>")'
+            f'\nLoad any of these with skill_view(name="{skill_view_target}", '
+            f'file_path="<path>"), or run scripts directly by absolute path '
+            f"(e.g. `node {skill_dir}/scripts/foo.js`)."
        )

    if user_instruction:
@@ -216,7 +345,7 @@ def scan_skill_commands() -> Dict[str, Dict[str, Any]]:
    _skill_commands = {}
    try:
        from tools.skills_tool import SKILLS_DIR, _parse_frontmatter, skill_matches_platform, _get_disabled_skill_names
-        from agent.skill_utils import get_external_skills_dirs
+        from agent.skill_utils import get_external_skills_dirs, iter_skill_index_files
        disabled = _get_disabled_skill_names()
        seen_names: set = set()

@@ -227,7 +356,7 @@ def scan_skill_commands() -> Dict[str, Dict[str, Any]]:
        dirs_to_scan.extend(get_external_skills_dirs())

        for scan_dir in dirs_to_scan:
-            for skill_md in scan_dir.rglob("SKILL.md"):
+            for skill_md in iter_skill_index_files(scan_dir, "SKILL.md"):
                if any(part in ('.git', '.github', '.hub') for part in skill_md.parts):
                    continue
                try:
@@ -332,6 +461,7 @@ def build_skill_invocation_message(
        activation_note,
        user_instruction=user_instruction,
        runtime_note=runtime_note,
+        session_id=task_id,
    )


@@ -370,6 +500,7 @@ def build_preloaded_skills_prompt(
                loaded_skill,
                skill_dir,
                activation_note,
+                session_id=task_id,
            )
        )
        loaded_names.append(skill_name)
@@ -435,7 +435,7 @@ def iter_skill_index_files(skills_dir: Path, filename: str):
    Excludes ``.git``, ``.github``, ``.hub`` directories.
    """
    matches = []
-    for root, dirs, files in os.walk(skills_dir):
+    for root, dirs, files in os.walk(skills_dir, followlinks=True):
        dirs[:] = [d for d in dirs if d not in EXCLUDED_SKILL_DIRS]
        if filename in files:
            matches.append(Path(root) / filename)
@@ -1,195 +0,0 @@
-"""Helpers for optional cheap-vs-strong model routing."""
-
-from __future__ import annotations
-
-import os
-import re
-from typing import Any, Dict, Optional
-
-from utils import is_truthy_value
-
-_COMPLEX_KEYWORDS = {
-    "debug",
-    "debugging",
-    "implement",
-    "implementation",
-    "refactor",
-    "patch",
-    "traceback",
-    "stacktrace",
-    "exception",
-    "error",
-    "analyze",
-    "analysis",
-    "investigate",
-    "architecture",
-    "design",
-    "compare",
-    "benchmark",
-    "optimize",
-    "optimise",
-    "review",
-    "terminal",
-    "shell",
-    "tool",
-    "tools",
-    "pytest",
-    "test",
-    "tests",
-    "plan",
-    "planning",
-    "delegate",
-    "subagent",
-    "cron",
-    "docker",
-    "kubernetes",
-}
-
-_URL_RE = re.compile(r"https?://|www\.", re.IGNORECASE)
-
-
-def _coerce_bool(value: Any, default: bool = False) -> bool:
-    return is_truthy_value(value, default=default)
-
-
-def _coerce_int(value: Any, default: int) -> int:
-    try:
-        return int(value)
-    except (TypeError, ValueError):
-        return default
-
-
-def choose_cheap_model_route(user_message: str, routing_config: Optional[Dict[str, Any]]) -> Optional[Dict[str, Any]]:
-    """Return the configured cheap-model route when a message looks simple.
-
-    Conservative by design: if the message has signs of code/tool/debugging/
-    long-form work, keep the primary model.
-    """
-    cfg = routing_config or {}
-    if not _coerce_bool(cfg.get("enabled"), False):
-        return None
-
-    cheap_model = cfg.get("cheap_model") or {}
-    if not isinstance(cheap_model, dict):
-        return None
-    provider = str(cheap_model.get("provider") or "").strip().lower()
-    model = str(cheap_model.get("model") or "").strip()
-    if not provider or not model:
-        return None
-
-    text = (user_message or "").strip()
-    if not text:
-        return None
-
-    max_chars = _coerce_int(cfg.get("max_simple_chars"), 160)
-    max_words = _coerce_int(cfg.get("max_simple_words"), 28)
-
-    if len(text) > max_chars:
-        return None
-    if len(text.split()) > max_words:
-        return None
-    if text.count("\n") > 1:
-        return None
-    if "```" in text or "`" in text:
-        return None
-    if _URL_RE.search(text):
-        return None
-
-    lowered = text.lower()
-    words = {token.strip(".,:;!?()[]{}\"'`") for token in lowered.split()}
-    if words & _COMPLEX_KEYWORDS:
-        return None
-
-    route = dict(cheap_model)
-    route["provider"] = provider
-    route["model"] = model
-    route["routing_reason"] = "simple_turn"
-    return route
-
-
-def resolve_turn_route(user_message: str, routing_config: Optional[Dict[str, Any]], primary: Dict[str, Any]) -> Dict[str, Any]:
-    """Resolve the effective model/runtime for one turn.
-
-    Returns a dict with model/runtime/signature/label fields.
-    """
-    route = choose_cheap_model_route(user_message, routing_config)
-    if not route:
-        return {
-            "model": primary.get("model"),
-            "runtime": {
-                "api_key": primary.get("api_key"),
-                "base_url": primary.get("base_url"),
-                "provider": primary.get("provider"),
-                "api_mode": primary.get("api_mode"),
-                "command": primary.get("command"),
-                "args": list(primary.get("args") or []),
-                "credential_pool": primary.get("credential_pool"),
-            },
-            "label": None,
-            "signature": (
-                primary.get("model"),
-                primary.get("provider"),
-                primary.get("base_url"),
-                primary.get("api_mode"),
-                primary.get("command"),
-                tuple(primary.get("args") or ()),
-            ),
-        }
-
-    from hermes_cli.runtime_provider import resolve_runtime_provider
-
-    explicit_api_key = None
-    api_key_env = str(route.get("api_key_env") or "").strip()
-    if api_key_env:
-        explicit_api_key = os.getenv(api_key_env) or None
-
-    try:
-        runtime = resolve_runtime_provider(
-            requested=route.get("provider"),
-            explicit_api_key=explicit_api_key,
-            explicit_base_url=route.get("base_url"),
-        )
-    except Exception:
-        return {
-            "model": primary.get("model"),
-            "runtime": {
-                "api_key": primary.get("api_key"),
-                "base_url": primary.get("base_url"),
-                "provider": primary.get("provider"),
-                "api_mode": primary.get("api_mode"),
-                "command": primary.get("command"),
-                "args": list(primary.get("args") or []),
-                "credential_pool": primary.get("credential_pool"),
-            },
-            "label": None,
-            "signature": (
-                primary.get("model"),
-                primary.get("provider"),
-                primary.get("base_url"),
-                primary.get("api_mode"),
-                primary.get("command"),
-                tuple(primary.get("args") or ()),
-            ),
-        }
-
-    return {
-        "model": route.get("model"),
-        "runtime": {
-            "api_key": runtime.get("api_key"),
-            "base_url": runtime.get("base_url"),
-            "provider": runtime.get("provider"),
-            "api_mode": runtime.get("api_mode"),
-            "command": runtime.get("command"),
-            "args": list(runtime.get("args") or []),
-            "credential_pool": runtime.get("credential_pool"),
-        },
-        "label": f"smart route → {route.get('model')} ({runtime.get('provider')})",
-        "signature": (
-            route.get("model"),
-            runtime.get("provider"),
-            runtime.get("base_url"),
-            runtime.get("api_mode"),
-            runtime.get("command"),
-            tuple(runtime.get("args") or ()),
-        ),
-    }
@@ -38,7 +38,7 @@ def generate_title(user_message: str, assistant_response: str, timeout: float =
        response = call_llm(
            task="title_generation",
            messages=messages,
-            max_tokens=30,
+            max_tokens=500,
            temperature=0.3,
            timeout=timeout,
        )
@@ -0,0 +1,51 @@
+"""Transport layer types and registry for provider response normalization.
+
+Usage:
+    from agent.transports import get_transport
+    transport = get_transport("anthropic_messages")
+    result = transport.normalize_response(raw_response)
+"""
+
+from agent.transports.types import NormalizedResponse, ToolCall, Usage, build_tool_call, map_finish_reason  # noqa: F401
+
+_REGISTRY: dict = {}
+
+
+def register_transport(api_mode: str, transport_cls: type) -> None:
+    """Register a transport class for an api_mode string."""
+    _REGISTRY[api_mode] = transport_cls
+
+
+def get_transport(api_mode: str):
+    """Get a transport instance for the given api_mode.
+
+    Returns None if no transport is registered for this api_mode.
+    This allows gradual migration — call sites can check for None
+    and fall back to the legacy code path.
+    """
+    if not _REGISTRY:
+        _discover_transports()
+    cls = _REGISTRY.get(api_mode)
+    if cls is None:
+        return None
+    return cls()
+
+
+def _discover_transports() -> None:
+    """Import all transport modules to trigger auto-registration."""
+    try:
+        import agent.transports.anthropic  # noqa: F401
+    except ImportError:
+        pass
+    try:
+        import agent.transports.codex  # noqa: F401
+    except ImportError:
+        pass
+    try:
+        import agent.transports.chat_completions  # noqa: F401
+    except ImportError:
+        pass
+    try:
+        import agent.transports.bedrock  # noqa: F401
+    except ImportError:
+        pass
@@ -0,0 +1,177 @@
+"""Anthropic Messages API transport.
+
+Delegates to the existing adapter functions in agent/anthropic_adapter.py.
+This transport owns format conversion and normalization — NOT client lifecycle.
+"""
+
+from typing import Any, Dict, List, Optional
+
+from agent.transports.base import ProviderTransport
+from agent.transports.types import NormalizedResponse
+
+
+class AnthropicTransport(ProviderTransport):
+    """Transport for api_mode='anthropic_messages'.
+
+    Wraps the existing functions in anthropic_adapter.py behind the
+    ProviderTransport ABC.  Each method delegates — no logic is duplicated.
+    """
+
+    @property
+    def api_mode(self) -> str:
+        return "anthropic_messages"
+
+    def convert_messages(self, messages: List[Dict[str, Any]], **kwargs) -> Any:
+        """Convert OpenAI messages to Anthropic (system, messages) tuple.
+
+        kwargs:
+            base_url: Optional[str] — affects thinking signature handling.
+        """
+        from agent.anthropic_adapter import convert_messages_to_anthropic
+
+        base_url = kwargs.get("base_url")
+        return convert_messages_to_anthropic(messages, base_url=base_url)
+
+    def convert_tools(self, tools: List[Dict[str, Any]]) -> Any:
+        """Convert OpenAI tool schemas to Anthropic input_schema format."""
+        from agent.anthropic_adapter import convert_tools_to_anthropic
+
+        return convert_tools_to_anthropic(tools)
+
+    def build_kwargs(
+        self,
+        model: str,
+        messages: List[Dict[str, Any]],
+        tools: Optional[List[Dict[str, Any]]] = None,
+        **params,
+    ) -> Dict[str, Any]:
+        """Build Anthropic messages.create() kwargs.
+
+        Calls convert_messages and convert_tools internally.
+
+        params (all optional):
+            max_tokens: int
+            reasoning_config: dict | None
+            tool_choice: str | None
+            is_oauth: bool
+            preserve_dots: bool
+            context_length: int | None
+            base_url: str | None
+            fast_mode: bool
+        """
+        from agent.anthropic_adapter import build_anthropic_kwargs
+
+        return build_anthropic_kwargs(
+            model=model,
+            messages=messages,
+            tools=tools,
+            max_tokens=params.get("max_tokens", 16384),
+            reasoning_config=params.get("reasoning_config"),
+            tool_choice=params.get("tool_choice"),
+            is_oauth=params.get("is_oauth", False),
+            preserve_dots=params.get("preserve_dots", False),
+            context_length=params.get("context_length"),
+            base_url=params.get("base_url"),
+            fast_mode=params.get("fast_mode", False),
+        )
+
+    def normalize_response(self, response: Any, **kwargs) -> NormalizedResponse:
+        """Normalize Anthropic response to NormalizedResponse.
+
+        Parses content blocks (text, thinking, tool_use), maps stop_reason
+        to OpenAI finish_reason, and collects reasoning_details in provider_data.
+        """
+        import json
+        from agent.anthropic_adapter import _to_plain_data
+        from agent.transports.types import ToolCall
+
+        strip_tool_prefix = kwargs.get("strip_tool_prefix", False)
+        _MCP_PREFIX = "mcp_"
+
+        text_parts = []
+        reasoning_parts = []
+        reasoning_details = []
+        tool_calls = []
+
+        for block in response.content:
+            if block.type == "text":
+                text_parts.append(block.text)
+            elif block.type == "thinking":
+                reasoning_parts.append(block.thinking)
+                block_dict = _to_plain_data(block)
+                if isinstance(block_dict, dict):
+                    reasoning_details.append(block_dict)
+            elif block.type == "tool_use":
+                name = block.name
+                if strip_tool_prefix and name.startswith(_MCP_PREFIX):
+                    name = name[len(_MCP_PREFIX):]
+                tool_calls.append(
+                    ToolCall(
+                        id=block.id,
+                        name=name,
+                        arguments=json.dumps(block.input),
+                    )
+                )
+
+        finish_reason = self._STOP_REASON_MAP.get(response.stop_reason, "stop")
+
+        provider_data = {}
+        if reasoning_details:
+            provider_data["reasoning_details"] = reasoning_details
+
+        return NormalizedResponse(
+            content="\n".join(text_parts) if text_parts else None,
+            tool_calls=tool_calls or None,
+            finish_reason=finish_reason,
+            reasoning="\n\n".join(reasoning_parts) if reasoning_parts else None,
+            usage=None,
+            provider_data=provider_data or None,
+        )
+
+    def validate_response(self, response: Any) -> bool:
+        """Check Anthropic response structure is valid.
+
+        An empty content list is legitimate when ``stop_reason == "end_turn"``
+        — the model's canonical way of signalling "nothing more to add" after
+        a tool turn that already delivered the user-facing text. Treating it
+        as invalid falsely retries a completed response.
+        """
+        if response is None:
+            return False
+        content_blocks = getattr(response, "content", None)
+        if not isinstance(content_blocks, list):
+            return False
+        if not content_blocks:
+            return getattr(response, "stop_reason", None) == "end_turn"
+        return True
+
+    def extract_cache_stats(self, response: Any) -> Optional[Dict[str, int]]:
+        """Extract Anthropic cache_read and cache_creation token counts."""
+        usage = getattr(response, "usage", None)
+        if usage is None:
+            return None
+        cached = getattr(usage, "cache_read_input_tokens", 0) or 0
+        written = getattr(usage, "cache_creation_input_tokens", 0) or 0
+        if cached or written:
+            return {"cached_tokens": cached, "creation_tokens": written}
+        return None
+
+    # Promote the adapter's canonical mapping to module level so it's shared
+    _STOP_REASON_MAP = {
+        "end_turn": "stop",
+        "tool_use": "tool_calls",
+        "max_tokens": "length",
+        "stop_sequence": "stop",
+        "refusal": "content_filter",
+        "model_context_window_exceeded": "length",
+    }
+
+    def map_finish_reason(self, raw_reason: str) -> str:
+        """Map Anthropic stop_reason to OpenAI finish_reason."""
+        return self._STOP_REASON_MAP.get(raw_reason, "stop")
+
+
+# Auto-register on import
+from agent.transports import register_transport  # noqa: E402
+
+register_transport("anthropic_messages", AnthropicTransport)
@@ -0,0 +1,89 @@
+"""Abstract base for provider transports.
+
+A transport owns the data path for one api_mode:
+  convert_messages → convert_tools → build_kwargs → normalize_response
+
+It does NOT own: client construction, streaming, credential refresh,
+prompt caching, interrupt handling, or retry logic.  Those stay on AIAgent.
+"""
+
+from abc import ABC, abstractmethod
+from typing import Any, Dict, List, Optional
+
+from agent.transports.types import NormalizedResponse
+
+
+class ProviderTransport(ABC):
+    """Base class for provider-specific format conversion and normalization."""
+
+    @property
+    @abstractmethod
+    def api_mode(self) -> str:
+        """The api_mode string this transport handles (e.g. 'anthropic_messages')."""
+        ...
+
+    @abstractmethod
+    def convert_messages(self, messages: List[Dict[str, Any]], **kwargs) -> Any:
+        """Convert OpenAI-format messages to provider-native format.
+
+        Returns provider-specific structure (e.g. (system, messages) for Anthropic,
+        or the messages list unchanged for chat_completions).
+        """
+        ...
+
+    @abstractmethod
+    def convert_tools(self, tools: List[Dict[str, Any]]) -> Any:
+        """Convert OpenAI-format tool definitions to provider-native format.
+
+        Returns provider-specific tool list (e.g. Anthropic input_schema format).
+        """
+        ...
+
+    @abstractmethod
+    def build_kwargs(
+        self,
+        model: str,
+        messages: List[Dict[str, Any]],
+        tools: Optional[List[Dict[str, Any]]] = None,
+        **params,
+    ) -> Dict[str, Any]:
+        """Build the complete API call kwargs dict.
+
+        This is the primary entry point — it typically calls convert_messages()
+        and convert_tools() internally, then adds model-specific config.
+
+        Returns a dict ready to be passed to the provider's SDK client.
+        """
+        ...
+
+    @abstractmethod
+    def normalize_response(self, response: Any, **kwargs) -> NormalizedResponse:
+        """Normalize a raw provider response to the shared NormalizedResponse type.
+
+        This is the only method that returns a transport-layer type.
+        """
+        ...
+
+    def validate_response(self, response: Any) -> bool:
+        """Optional: check if the raw response is structurally valid.
+
+        Returns True if valid, False if the response should be treated as invalid.
+        Default implementation always returns True.
+        """
+        return True
+
+    def extract_cache_stats(self, response: Any) -> Optional[Dict[str, int]]:
+        """Optional: extract provider-specific cache hit/creation stats.
+
+        Returns dict with 'cached_tokens' and 'creation_tokens', or None.
+        Default returns None.
+        """
+        return None
+
+    def map_finish_reason(self, raw_reason: str) -> str:
+        """Optional: map provider-specific stop reason to OpenAI equivalent.
+
+        Default returns the raw reason unchanged.  Override for providers
+        with different stop reason vocabularies.
+        """
+        return raw_reason
@@ -0,0 +1,154 @@
+"""AWS Bedrock Converse API transport.
+
+Delegates to the existing adapter functions in agent/bedrock_adapter.py.
+Bedrock uses its own boto3 client (not the OpenAI SDK), so the transport
+owns format conversion and normalization, while client construction and
+boto3 calls stay on AIAgent.
+"""
+
+from typing import Any, Dict, List, Optional
+
+from agent.transports.base import ProviderTransport
+from agent.transports.types import NormalizedResponse, ToolCall, Usage
+
+
+class BedrockTransport(ProviderTransport):
+    """Transport for api_mode='bedrock_converse'."""
+
+    @property
+    def api_mode(self) -> str:
+        return "bedrock_converse"
+
+    def convert_messages(self, messages: List[Dict[str, Any]], **kwargs) -> Any:
+        """Convert OpenAI messages to Bedrock Converse format."""
+        from agent.bedrock_adapter import convert_messages_to_converse
+        return convert_messages_to_converse(messages)
+
+    def convert_tools(self, tools: List[Dict[str, Any]]) -> Any:
+        """Convert OpenAI tool schemas to Bedrock Converse toolConfig."""
+        from agent.bedrock_adapter import convert_tools_to_converse
+        return convert_tools_to_converse(tools)
+
+    def build_kwargs(
+        self,
+        model: str,
+        messages: List[Dict[str, Any]],
+        tools: Optional[List[Dict[str, Any]]] = None,
+        **params,
+    ) -> Dict[str, Any]:
+        """Build Bedrock converse() kwargs.
+
+        Calls convert_messages and convert_tools internally.
+
+        params:
+            max_tokens: int — output token limit (default 4096)
+            temperature: float | None
+            guardrail_config: dict | None — Bedrock guardrails
+            region: str — AWS region (default 'us-east-1')
+        """
+        from agent.bedrock_adapter import build_converse_kwargs
+
+        region = params.get("region", "us-east-1")
+        guardrail = params.get("guardrail_config")
+
+        kwargs = build_converse_kwargs(
+            model=model,
+            messages=messages,
+            tools=tools,
+            max_tokens=params.get("max_tokens", 4096),
+            temperature=params.get("temperature"),
+            guardrail_config=guardrail,
+        )
+        # Sentinel keys for dispatch — agent pops these before the boto3 call
+        kwargs["__bedrock_converse__"] = True
+        kwargs["__bedrock_region__"] = region
+        return kwargs
+
+    def normalize_response(self, response: Any, **kwargs) -> NormalizedResponse:
+        """Normalize Bedrock response to NormalizedResponse.
+
+        Handles two shapes:
+        1. Raw boto3 dict (from direct converse() calls)
+        2. Already-normalized SimpleNamespace with .choices (from dispatch site)
+        """
+        from agent.bedrock_adapter import normalize_converse_response
+
+        # Normalize to OpenAI-compatible SimpleNamespace
+        if hasattr(response, "choices") and response.choices:
+            # Already normalized at dispatch site
+            ns = response
+        else:
+            # Raw boto3 dict
+            ns = normalize_converse_response(response)
+
+        choice = ns.choices[0]
+        msg = choice.message
+        finish_reason = choice.finish_reason or "stop"
+
+        tool_calls = None
+        if msg.tool_calls:
+            tool_calls = [
+                ToolCall(
+                    id=tc.id,
+                    name=tc.function.name,
+                    arguments=tc.function.arguments,
+                )
+                for tc in msg.tool_calls
+            ]
+
+        usage = None
+        if hasattr(ns, "usage") and ns.usage:
+            u = ns.usage
+            usage = Usage(
+                prompt_tokens=getattr(u, "prompt_tokens", 0) or 0,
+                completion_tokens=getattr(u, "completion_tokens", 0) or 0,
+                total_tokens=getattr(u, "total_tokens", 0) or 0,
+            )
+
+        reasoning = getattr(msg, "reasoning", None) or getattr(msg, "reasoning_content", None)
+
+        return NormalizedResponse(
+            content=msg.content,
+            tool_calls=tool_calls,
+            finish_reason=finish_reason,
+            reasoning=reasoning,
+            usage=usage,
+        )
+
+    def validate_response(self, response: Any) -> bool:
+        """Check Bedrock response structure.
+
+        After normalize_converse_response, the response has OpenAI-compatible
+        .choices — same check as chat_completions.
+        """
+        if response is None:
+            return False
+        # Raw Bedrock dict response — check for 'output' key
+        if isinstance(response, dict):
+            return "output" in response
+        # Already-normalized SimpleNamespace
+        if hasattr(response, "choices"):
+            return bool(response.choices)
+        return False
+
+    def map_finish_reason(self, raw_reason: str) -> str:
+        """Map Bedrock stop reason to OpenAI finish_reason.
+
+        The adapter already does this mapping inside normalize_converse_response,
+        so this is only used for direct access to raw responses.
+        """
+        _MAP = {
+            "end_turn": "stop",
+            "tool_use": "tool_calls",
+            "max_tokens": "length",
+            "stop_sequence": "stop",
+            "guardrail_intervened": "content_filter",
+            "content_filtered": "content_filter",
+        }
+        return _MAP.get(raw_reason, "stop")
+
+
+# Auto-register on import
+from agent.transports import register_transport  # noqa: E402
+
+register_transport("bedrock_converse", BedrockTransport)
@@ -0,0 +1,393 @@
+"""OpenAI Chat Completions transport.
+
+Handles the default api_mode ('chat_completions') used by ~16 OpenAI-compatible
+providers (OpenRouter, Nous, NVIDIA, Qwen, Ollama, DeepSeek, xAI, Kimi, etc.).
+
+Messages and tools are already in OpenAI format — convert_messages and
+convert_tools are near-identity.  The complexity lives in build_kwargs
+which has provider-specific conditionals for max_tokens defaults,
+reasoning configuration, temperature handling, and extra_body assembly.
+"""
+
+import copy
+from typing import Any, Dict, List, Optional
+
+from agent.moonshot_schema import is_moonshot_model, sanitize_moonshot_tools
+from agent.prompt_builder import DEVELOPER_ROLE_MODELS
+from agent.transports.base import ProviderTransport
+from agent.transports.types import NormalizedResponse, ToolCall, Usage
+
+
+class ChatCompletionsTransport(ProviderTransport):
+    """Transport for api_mode='chat_completions'.
+
+    The default path for OpenAI-compatible providers.
+    """
+
+    @property
+    def api_mode(self) -> str:
+        return "chat_completions"
+
+    def convert_messages(self, messages: List[Dict[str, Any]], **kwargs) -> List[Dict[str, Any]]:
+        """Messages are already in OpenAI format — sanitize Codex leaks only.
+
+        Strips Codex Responses API fields (``codex_reasoning_items`` on the
+        message, ``call_id``/``response_item_id`` on tool_calls) that strict
+        chat-completions providers reject with 400/422.
+        """
+        needs_sanitize = False
+        for msg in messages:
+            if not isinstance(msg, dict):
+                continue
+            if "codex_reasoning_items" in msg:
+                needs_sanitize = True
+                break
+            tool_calls = msg.get("tool_calls")
+            if isinstance(tool_calls, list):
+                for tc in tool_calls:
+                    if isinstance(tc, dict) and ("call_id" in tc or "response_item_id" in tc):
+                        needs_sanitize = True
+                        break
+                if needs_sanitize:
+                    break
+
+        if not needs_sanitize:
+            return messages
+
+        sanitized = copy.deepcopy(messages)
+        for msg in sanitized:
+            if not isinstance(msg, dict):
+                continue
+            msg.pop("codex_reasoning_items", None)
+            tool_calls = msg.get("tool_calls")
+            if isinstance(tool_calls, list):
+                for tc in tool_calls:
+                    if isinstance(tc, dict):
+                        tc.pop("call_id", None)
+                        tc.pop("response_item_id", None)
+        return sanitized
+
+    def convert_tools(self, tools: List[Dict[str, Any]]) -> List[Dict[str, Any]]:
+        """Tools are already in OpenAI format — identity."""
+        return tools
+
+    def build_kwargs(
+        self,
+        model: str,
+        messages: List[Dict[str, Any]],
+        tools: Optional[List[Dict[str, Any]]] = None,
+        **params,
+    ) -> Dict[str, Any]:
+        """Build chat.completions.create() kwargs.
+
+        This is the most complex transport method — it handles ~16 providers
+        via params rather than subclasses.
+
+        params:
+            timeout: float — API call timeout
+            max_tokens: int | None — user-configured max tokens
+            ephemeral_max_output_tokens: int | None — one-shot override (error recovery)
+            max_tokens_param_fn: callable — returns {max_tokens: N} or {max_completion_tokens: N}
+            reasoning_config: dict | None
+            request_overrides: dict | None
+            session_id: str | None
+            qwen_session_metadata: dict | None — {sessionId, promptId} precomputed
+            model_lower: str — lowercase model name for pattern matching
+            # Provider detection flags (all optional, default False)
+            is_openrouter: bool
+            is_nous: bool
+            is_qwen_portal: bool
+            is_github_models: bool
+            is_nvidia_nim: bool
+            is_kimi: bool
+            is_custom_provider: bool
+            ollama_num_ctx: int | None
+            # Provider routing
+            provider_preferences: dict | None
+            # Qwen-specific
+            qwen_prepare_fn: callable | None — runs AFTER codex sanitization
+            qwen_prepare_inplace_fn: callable | None — in-place variant for deepcopied lists
+            # Temperature
+            fixed_temperature: Any — from _fixed_temperature_for_model()
+            omit_temperature: bool
+            # Reasoning
+            supports_reasoning: bool
+            github_reasoning_extra: dict | None
+            # Claude on OpenRouter/Nous max output
+            anthropic_max_output: int | None
+            # Extra
+            extra_body_additions: dict | None — pre-built extra_body entries
+        """
+        # Codex sanitization: drop reasoning_items / call_id / response_item_id
+        sanitized = self.convert_messages(messages)
+
+        # Qwen portal prep AFTER codex sanitization.  If sanitize already
+        # deepcopied, reuse that copy via the in-place variant to avoid a
+        # second deepcopy.
+        is_qwen = params.get("is_qwen_portal", False)
+        if is_qwen:
+            qwen_prep = params.get("qwen_prepare_fn")
+            qwen_prep_inplace = params.get("qwen_prepare_inplace_fn")
+            if sanitized is messages:
+                if qwen_prep is not None:
+                    sanitized = qwen_prep(sanitized)
+            else:
+                # Already deepcopied — transform in place
+                if qwen_prep_inplace is not None:
+                    qwen_prep_inplace(sanitized)
+                elif qwen_prep is not None:
+                    sanitized = qwen_prep(sanitized)
+
+        # Developer role swap for GPT-5/Codex models
+        model_lower = params.get("model_lower", (model or "").lower())
+        if (
+            sanitized
+            and isinstance(sanitized[0], dict)
+            and sanitized[0].get("role") == "system"
+            and any(p in model_lower for p in DEVELOPER_ROLE_MODELS)
+        ):
+            sanitized = list(sanitized)
+            sanitized[0] = {**sanitized[0], "role": "developer"}
+
+        api_kwargs: Dict[str, Any] = {
+            "model": model,
+            "messages": sanitized,
+        }
+
+        timeout = params.get("timeout")
+        if timeout is not None:
+            api_kwargs["timeout"] = timeout
+
+        # Temperature
+        fixed_temp = params.get("fixed_temperature")
+        omit_temp = params.get("omit_temperature", False)
+        if omit_temp:
+            api_kwargs.pop("temperature", None)
+        elif fixed_temp is not None:
+            api_kwargs["temperature"] = fixed_temp
+
+        # Qwen metadata (caller precomputes {sessionId, promptId})
+        qwen_meta = params.get("qwen_session_metadata")
+        if qwen_meta and is_qwen:
+            api_kwargs["metadata"] = qwen_meta
+
+        # Tools
+        if tools:
+            # Moonshot/Kimi uses a stricter flavored JSON Schema.  Rewriting
+            # tool parameters here keeps aggregator routes (Nous, OpenRouter,
+            # etc.) compatible, in addition to direct moonshot.ai endpoints.
+            if is_moonshot_model(model):
+                tools = sanitize_moonshot_tools(tools)
+            api_kwargs["tools"] = tools
+
+        # max_tokens resolution — priority: ephemeral > user > provider default
+        max_tokens_fn = params.get("max_tokens_param_fn")
+        ephemeral = params.get("ephemeral_max_output_tokens")
+        max_tokens = params.get("max_tokens")
+        anthropic_max_out = params.get("anthropic_max_output")
+        is_nvidia_nim = params.get("is_nvidia_nim", False)
+        is_kimi = params.get("is_kimi", False)
+        reasoning_config = params.get("reasoning_config")
+
+        if ephemeral is not None and max_tokens_fn:
+            api_kwargs.update(max_tokens_fn(ephemeral))
+        elif max_tokens is not None and max_tokens_fn:
+            api_kwargs.update(max_tokens_fn(max_tokens))
+        elif is_nvidia_nim and max_tokens_fn:
+            api_kwargs.update(max_tokens_fn(16384))
+        elif is_qwen and max_tokens_fn:
+            api_kwargs.update(max_tokens_fn(65536))
+        elif is_kimi and max_tokens_fn:
+            # Kimi/Moonshot: 32000 matches Kimi CLI's default
+            api_kwargs.update(max_tokens_fn(32000))
+        elif anthropic_max_out is not None:
+            api_kwargs["max_tokens"] = anthropic_max_out
+
+        # Kimi: top-level reasoning_effort (unless thinking disabled)
+        if is_kimi:
+            _kimi_thinking_off = bool(
+                reasoning_config
+                and isinstance(reasoning_config, dict)
+                and reasoning_config.get("enabled") is False
+            )
+            if not _kimi_thinking_off:
+                _kimi_effort = "medium"
+                if reasoning_config and isinstance(reasoning_config, dict):
+                    _e = (reasoning_config.get("effort") or "").strip().lower()
+                    if _e in ("low", "medium", "high"):
+                        _kimi_effort = _e
+                api_kwargs["reasoning_effort"] = _kimi_effort
+
+        # extra_body assembly
+        extra_body: Dict[str, Any] = {}
+
+        is_openrouter = params.get("is_openrouter", False)
+        is_nous = params.get("is_nous", False)
+        is_github_models = params.get("is_github_models", False)
+
+        provider_prefs = params.get("provider_preferences")
+        if provider_prefs and is_openrouter:
+            extra_body["provider"] = provider_prefs
+
+        # Kimi extra_body.thinking
+        if is_kimi:
+            _kimi_thinking_enabled = True
+            if reasoning_config and isinstance(reasoning_config, dict):
+                if reasoning_config.get("enabled") is False:
+                    _kimi_thinking_enabled = False
+            extra_body["thinking"] = {
+                "type": "enabled" if _kimi_thinking_enabled else "disabled",
+            }
+
+        # Reasoning
+        if params.get("supports_reasoning", False):
+            if is_github_models:
+                gh_reasoning = params.get("github_reasoning_extra")
+                if gh_reasoning is not None:
+                    extra_body["reasoning"] = gh_reasoning
+            else:
+                if reasoning_config is not None:
+                    rc = dict(reasoning_config)
+                    if is_nous and rc.get("enabled") is False:
+                        pass  # omit for Nous when disabled
+                    else:
+                        extra_body["reasoning"] = rc
+                else:
+                    extra_body["reasoning"] = {"enabled": True, "effort": "medium"}
+
+        if is_nous:
+            extra_body["tags"] = ["product=hermes-agent"]
+
+        # Ollama num_ctx
+        ollama_ctx = params.get("ollama_num_ctx")
+        if ollama_ctx:
+            options = extra_body.get("options", {})
+            options["num_ctx"] = ollama_ctx
+            extra_body["options"] = options
+
+        # Ollama/custom think=false
+        if params.get("is_custom_provider", False):
+            if reasoning_config and isinstance(reasoning_config, dict):
+                _effort = (reasoning_config.get("effort") or "").strip().lower()
+                _enabled = reasoning_config.get("enabled", True)
+                if _effort == "none" or _enabled is False:
+                    extra_body["think"] = False
+
+        if is_qwen:
+            extra_body["vl_high_resolution_images"] = True
+
+        # Merge any pre-built extra_body additions
+        additions = params.get("extra_body_additions")
+        if additions:
+            extra_body.update(additions)
+
+        if extra_body:
+            api_kwargs["extra_body"] = extra_body
+
+        # Request overrides last (service_tier etc.)
+        overrides = params.get("request_overrides")
+        if overrides:
+            api_kwargs.update(overrides)
+
+        return api_kwargs
+
+    def normalize_response(self, response: Any, **kwargs) -> NormalizedResponse:
+        """Normalize OpenAI ChatCompletion to NormalizedResponse.
+
+        For chat_completions, this is near-identity — the response is already
+        in OpenAI format.  extra_content on tool_calls (Gemini thought_signature)
+        is preserved via ToolCall.provider_data.  reasoning_details (OpenRouter
+        unified format) and reasoning_content (DeepSeek/Moonshot) are also
+        preserved for downstream replay.
+        """
+        choice = response.choices[0]
+        msg = choice.message
+        finish_reason = choice.finish_reason or "stop"
+
+        tool_calls = None
+        if msg.tool_calls:
+            tool_calls = []
+            for tc in msg.tool_calls:
+                # Preserve provider-specific extras on the tool call.
+                # Gemini 3 thinking models attach extra_content with
+                # thought_signature — without replay on the next turn the API
+                # rejects the request with 400.
+                tc_provider_data: Dict[str, Any] = {}
+                extra = getattr(tc, "extra_content", None)
+                if extra is None and hasattr(tc, "model_extra"):
+                    extra = (tc.model_extra or {}).get("extra_content")
+                if extra is not None:
+                    if hasattr(extra, "model_dump"):
+                        try:
+                            extra = extra.model_dump()
+                        except Exception:
+                            pass
+                    tc_provider_data["extra_content"] = extra
+                tool_calls.append(ToolCall(
+                    id=tc.id,
+                    name=tc.function.name,
+                    arguments=tc.function.arguments,
+                    provider_data=tc_provider_data or None,
+                ))
+
+        usage = None
+        if hasattr(response, "usage") and response.usage:
+            u = response.usage
+            usage = Usage(
+                prompt_tokens=getattr(u, "prompt_tokens", 0) or 0,
+                completion_tokens=getattr(u, "completion_tokens", 0) or 0,
+                total_tokens=getattr(u, "total_tokens", 0) or 0,
+            )
+
+        # Preserve reasoning fields separately.  DeepSeek/Moonshot use
+        # ``reasoning_content``; others use ``reasoning``.  Downstream code
+        # (_extract_reasoning, thinking-prefill retry) reads both distinctly,
+        # so keep them apart in provider_data rather than merging.
+        reasoning = getattr(msg, "reasoning", None)
+        reasoning_content = getattr(msg, "reasoning_content", None)
+
+        provider_data: Dict[str, Any] = {}
+        if reasoning_content:
+            provider_data["reasoning_content"] = reasoning_content
+        rd = getattr(msg, "reasoning_details", None)
+        if rd:
+            provider_data["reasoning_details"] = rd
+
+        return NormalizedResponse(
+            content=msg.content,
+            tool_calls=tool_calls,
+            finish_reason=finish_reason,
+            reasoning=reasoning,
+            usage=usage,
+            provider_data=provider_data or None,
+        )
+
+    def validate_response(self, response: Any) -> bool:
+        """Check that response has valid choices."""
+        if response is None:
+            return False
+        if not hasattr(response, "choices") or response.choices is None:
+            return False
+        if not response.choices:
+            return False
+        return True
+
+    def extract_cache_stats(self, response: Any) -> Optional[Dict[str, int]]:
+        """Extract OpenRouter/OpenAI cache stats from prompt_tokens_details."""
+        usage = getattr(response, "usage", None)
+        if usage is None:
+            return None
+        details = getattr(usage, "prompt_tokens_details", None)
+        if details is None:
+            return None
+        cached = getattr(details, "cached_tokens", 0) or 0
+        written = getattr(details, "cache_write_tokens", 0) or 0
+        if cached or written:
+            return {"cached_tokens": cached, "creation_tokens": written}
+        return None
+
+
+# Auto-register on import
+from agent.transports import register_transport  # noqa: E402
+
+register_transport("chat_completions", ChatCompletionsTransport)
@@ -0,0 +1,217 @@
+"""OpenAI Responses API (Codex) transport.
+
+Delegates to the existing adapter functions in agent/codex_responses_adapter.py.
+This transport owns format conversion and normalization — NOT client lifecycle,
+streaming, or the _run_codex_stream() call path.
+"""
+
+from typing import Any, Dict, List, Optional
+
+from agent.transports.base import ProviderTransport
+from agent.transports.types import NormalizedResponse, ToolCall, Usage
+
+
+class ResponsesApiTransport(ProviderTransport):
+    """Transport for api_mode='codex_responses'.
+
+    Wraps the functions extracted into codex_responses_adapter.py (PR 1).
+    """
+
+    @property
+    def api_mode(self) -> str:
+        return "codex_responses"
+
+    def convert_messages(self, messages: List[Dict[str, Any]], **kwargs) -> Any:
+        """Convert OpenAI chat messages to Responses API input items."""
+        from agent.codex_responses_adapter import _chat_messages_to_responses_input
+        return _chat_messages_to_responses_input(messages)
+
+    def convert_tools(self, tools: List[Dict[str, Any]]) -> Any:
+        """Convert OpenAI tool schemas to Responses API function definitions."""
+        from agent.codex_responses_adapter import _responses_tools
+        return _responses_tools(tools)
+
+    def build_kwargs(
+        self,
+        model: str,
+        messages: List[Dict[str, Any]],
+        tools: Optional[List[Dict[str, Any]]] = None,
+        **params,
+    ) -> Dict[str, Any]:
+        """Build Responses API kwargs.
+
+        Calls convert_messages and convert_tools internally.
+
+        params:
+            instructions: str — system prompt (extracted from messages[0] if not given)
+            reasoning_config: dict | None — {effort, enabled}
+            session_id: str | None — used for prompt_cache_key + xAI conv header
+            max_tokens: int | None — max_output_tokens
+            request_overrides: dict | None — extra kwargs merged in
+            provider: str | None — provider name for backend-specific logic
+            base_url: str | None — endpoint URL
+            base_url_hostname: str | None — hostname for backend detection
+            is_github_responses: bool — Copilot/GitHub models backend
+            is_codex_backend: bool — chatgpt.com/backend-api/codex
+            is_xai_responses: bool — xAI/Grok backend
+            github_reasoning_extra: dict | None — Copilot reasoning params
+        """
+        from agent.codex_responses_adapter import (
+            _chat_messages_to_responses_input,
+            _responses_tools,
+        )
+
+        from run_agent import DEFAULT_AGENT_IDENTITY
+
+        instructions = params.get("instructions", "")
+        payload_messages = messages
+        if not instructions:
+            if messages and messages[0].get("role") == "system":
+                instructions = str(messages[0].get("content") or "").strip()
+                payload_messages = messages[1:]
+        if not instructions:
+            instructions = DEFAULT_AGENT_IDENTITY
+
+        is_github_responses = params.get("is_github_responses", False)
+        is_codex_backend = params.get("is_codex_backend", False)
+        is_xai_responses = params.get("is_xai_responses", False)
+
+        # Resolve reasoning effort
+        reasoning_effort = "medium"
+        reasoning_enabled = True
+        reasoning_config = params.get("reasoning_config")
+        if reasoning_config and isinstance(reasoning_config, dict):
+            if reasoning_config.get("enabled") is False:
+                reasoning_enabled = False
+            elif reasoning_config.get("effort"):
+                reasoning_effort = reasoning_config["effort"]
+
+        _effort_clamp = {"minimal": "low"}
+        reasoning_effort = _effort_clamp.get(reasoning_effort, reasoning_effort)
+
+        kwargs = {
+            "model": model,
+            "instructions": instructions,
+            "input": _chat_messages_to_responses_input(payload_messages),
+            "tools": _responses_tools(tools),
+            "tool_choice": "auto",
+            "parallel_tool_calls": True,
+            "store": False,
+        }
+
+        session_id = params.get("session_id")
+        if not is_github_responses and session_id:
+            kwargs["prompt_cache_key"] = session_id
+
+        if reasoning_enabled and is_xai_responses:
+            kwargs["include"] = ["reasoning.encrypted_content"]
+        elif reasoning_enabled:
+            if is_github_responses:
+                github_reasoning = params.get("github_reasoning_extra")
+                if github_reasoning is not None:
+                    kwargs["reasoning"] = github_reasoning
+            else:
+                kwargs["reasoning"] = {"effort": reasoning_effort, "summary": "auto"}
+                kwargs["include"] = ["reasoning.encrypted_content"]
+        elif not is_github_responses and not is_xai_responses:
+            kwargs["include"] = []
+
+        request_overrides = params.get("request_overrides")
+        if request_overrides:
+            kwargs.update(request_overrides)
+
+        max_tokens = params.get("max_tokens")
+        if max_tokens is not None and not is_codex_backend:
+            kwargs["max_output_tokens"] = max_tokens
+
+        if is_xai_responses and session_id:
+            kwargs["extra_headers"] = {"x-grok-conv-id": session_id}
+
+        return kwargs
+
+    def normalize_response(self, response: Any, **kwargs) -> NormalizedResponse:
+        """Normalize Codex Responses API response to NormalizedResponse."""
+        from agent.codex_responses_adapter import (
+            _normalize_codex_response,
+            _extract_responses_message_text,
+            _extract_responses_reasoning_text,
+        )
+
+        # _normalize_codex_response returns (SimpleNamespace, finish_reason_str)
+        msg, finish_reason = _normalize_codex_response(response)
+
+        tool_calls = None
+        if msg and msg.tool_calls:
+            tool_calls = []
+            for tc in msg.tool_calls:
+                provider_data = {}
+                if hasattr(tc, "call_id") and tc.call_id:
+                    provider_data["call_id"] = tc.call_id
+                if hasattr(tc, "response_item_id") and tc.response_item_id:
+                    provider_data["response_item_id"] = tc.response_item_id
+                tool_calls.append(ToolCall(
+                    id=tc.id if hasattr(tc, "id") else (tc.function.name if hasattr(tc, "function") else None),
+                    name=tc.function.name if hasattr(tc, "function") else getattr(tc, "name", ""),
+                    arguments=tc.function.arguments if hasattr(tc, "function") else getattr(tc, "arguments", "{}"),
+                    provider_data=provider_data or None,
+                ))
+
+        # Extract reasoning items for provider_data
+        provider_data = {}
+        if msg and hasattr(msg, "codex_reasoning_items") and msg.codex_reasoning_items:
+            provider_data["codex_reasoning_items"] = msg.codex_reasoning_items
+        if msg and hasattr(msg, "reasoning_details") and msg.reasoning_details:
+            provider_data["reasoning_details"] = msg.reasoning_details
+
+        return NormalizedResponse(
+            content=msg.content if msg else None,
+            tool_calls=tool_calls,
+            finish_reason=finish_reason or "stop",
+            reasoning=msg.reasoning if msg and hasattr(msg, "reasoning") else None,
+            usage=None,  # Codex usage is extracted separately in normalize_usage()
+            provider_data=provider_data or None,
+        )
+
+    def validate_response(self, response: Any) -> bool:
+        """Check Codex Responses API response has valid output structure.
+
+        Returns True only if response.output is a non-empty list.
+        Does NOT check output_text fallback — the caller handles that
+        with diagnostic logging for stream backfill recovery.
+        """
+        if response is None:
+            return False
+        output = getattr(response, "output", None)
+        if not isinstance(output, list) or not output:
+            return False
+        return True
+
+    def preflight_kwargs(self, api_kwargs: Any, *, allow_stream: bool = False) -> dict:
+        """Validate and sanitize Codex API kwargs before the call.
+
+        Normalizes input items, strips unsupported fields, validates structure.
+        """
+        from agent.codex_responses_adapter import _preflight_codex_api_kwargs
+        return _preflight_codex_api_kwargs(api_kwargs, allow_stream=allow_stream)
+
+    def map_finish_reason(self, raw_reason: str) -> str:
+        """Map Codex response.status to OpenAI finish_reason.
+
+        Codex uses response.status ('completed', 'incomplete') +
+        response.incomplete_details.reason for granular mapping.
+        This method handles the simple status string; the caller
+        should check incomplete_details separately for 'max_output_tokens'.
+        """
+        _MAP = {
+            "completed": "stop",
+            "incomplete": "length",
+            "failed": "stop",
+            "cancelled": "stop",
+        }
+        return _MAP.get(raw_reason, "stop")
+
+
+# Auto-register on import
+from agent.transports import register_transport  # noqa: E402
+
+register_transport("codex_responses", ResponsesApiTransport)
@@ -0,0 +1,156 @@
+"""Shared types for normalized provider responses.
+
+These dataclasses define the canonical shape that all provider adapters
+normalize responses to.  The shared surface is intentionally minimal —
+only fields that every downstream consumer reads are top-level.
+Protocol-specific state goes in ``provider_data`` dicts (response-level
+and per-tool-call) so that protocol-aware code paths can access it
+without polluting the shared type.
+"""
+
+from __future__ import annotations
+
+import json
+from dataclasses import dataclass, field
+from typing import Any, Dict, List, Optional
+
+
+@dataclass
+class ToolCall:
+    """A normalized tool call from any provider.
+
+    ``id`` is the protocol's canonical identifier — what gets used in
+    ``tool_call_id`` / ``tool_use_id`` when constructing tool result
+    messages.  May be ``None`` when the provider omits it; the agent
+    fills it via ``_deterministic_call_id()`` before storing in history.
+
+    ``provider_data`` carries per-tool-call protocol metadata that only
+    protocol-aware code reads:
+
+    * Codex: ``{"call_id": "call_XXX", "response_item_id": "fc_XXX"}``
+    * Gemini: ``{"extra_content": {"google": {"thought_signature": "..."}}}``
+    * Others: ``None``
+    """
+
+    id: Optional[str]
+    name: str
+    arguments: str  # JSON string
+    provider_data: Optional[Dict[str, Any]] = field(default=None, repr=False)
+
+    # ── Backward compatibility ──────────────────────────────────
+    # The agent loop reads tc.function.name / tc.function.arguments
+    # throughout run_agent.py (45+ sites).  These properties let
+    # NormalizedResponse pass through without the _nr_to_assistant_message
+    # shim, while keeping ToolCall's canonical fields flat.
+    @property
+    def type(self) -> str:
+        return "function"
+
+    @property
+    def function(self) -> "ToolCall":
+        """Return self so tc.function.name / tc.function.arguments work."""
+        return self
+
+    @property
+    def call_id(self) -> Optional[str]:
+        """Codex call_id from provider_data, accessed via getattr by _build_assistant_message."""
+        return (self.provider_data or {}).get("call_id")
+
+    @property
+    def response_item_id(self) -> Optional[str]:
+        """Codex response_item_id from provider_data."""
+        return (self.provider_data or {}).get("response_item_id")
+
+    @property
+    def extra_content(self) -> Optional[Dict[str, Any]]:
+        """Gemini extra_content (thought_signature) from provider_data.
+
+        Gemini 3 thinking models attach ``extra_content`` with a
+        ``thought_signature`` to each tool call.  This signature must be
+        replayed on subsequent API calls — without it the API rejects the
+        request with HTTP 400.  The chat_completions transport stores this
+        in ``provider_data["extra_content"]``; this property exposes it so
+        ``_build_assistant_message`` can ``getattr(tc, "extra_content")``
+        uniformly.
+        """
+        return (self.provider_data or {}).get("extra_content")
+
+
+@dataclass
+class Usage:
+    """Token usage from an API response."""
+
+    prompt_tokens: int = 0
+    completion_tokens: int = 0
+    total_tokens: int = 0
+    cached_tokens: int = 0
+
+
+@dataclass
+class NormalizedResponse:
+    """Normalized API response from any provider.
+
+    Shared fields are truly cross-provider — every caller can rely on
+    them without branching on api_mode.  Protocol-specific state goes in
+    ``provider_data`` so that only protocol-aware code paths read it.
+
+    Response-level ``provider_data`` examples:
+
+    * Anthropic: ``{"reasoning_details": [...]}``
+    * Codex: ``{"codex_reasoning_items": [...]}``
+    * Others: ``None``
+    """
+
+    content: Optional[str]
+    tool_calls: Optional[List[ToolCall]]
+    finish_reason: str  # "stop", "tool_calls", "length", "content_filter"
+    reasoning: Optional[str] = None
+    usage: Optional[Usage] = None
+    provider_data: Optional[Dict[str, Any]] = field(default=None, repr=False)
+
+    # ── Backward compatibility ──────────────────────────────────
+    # The shim _nr_to_assistant_message() mapped these from provider_data.
+    # These properties let NormalizedResponse pass through directly.
+    @property
+    def reasoning_content(self) -> Optional[str]:
+        pd = self.provider_data or {}
+        return pd.get("reasoning_content")
+
+    @property
+    def reasoning_details(self):
+        pd = self.provider_data or {}
+        return pd.get("reasoning_details")
+
+    @property
+    def codex_reasoning_items(self):
+        pd = self.provider_data or {}
+        return pd.get("codex_reasoning_items")
+
+
+# ---------------------------------------------------------------------------
+# Factory helpers
+# ---------------------------------------------------------------------------
+
+def build_tool_call(
+    id: Optional[str],
+    name: str,
+    arguments: Any,
+    **provider_fields: Any,
+) -> ToolCall:
+    """Build a ``ToolCall``, auto-serialising *arguments* if it's a dict.
+
+    Any extra keyword arguments are collected into ``provider_data``.
+    """
+    args_str = json.dumps(arguments) if isinstance(arguments, dict) else str(arguments)
+    pd = dict(provider_fields) if provider_fields else None
+    return ToolCall(id=id, name=name, arguments=args_str, provider_data=pd)
+
+
+def map_finish_reason(reason: Optional[str], mapping: Dict[str, str]) -> str:
+    """Translate a provider-specific stop reason to the normalised set.
+
+    Falls back to ``"stop"`` for unknown or ``None`` reasons.
+    """
+    if reason is None:
+        return "stop"
+    return mapping.get(reason, "stop")
@@ -6,6 +6,7 @@ from decimal import Decimal
 from typing import Any, Dict, Literal, Optional

 from agent.model_metadata import fetch_endpoint_model_metadata, fetch_model_metadata
+from utils import base_url_host_matches

 DEFAULT_PRICING = {"input": 0.0, "output": 0.0}

@@ -393,7 +394,7 @@ def resolve_billing_route(

    if provider_name == "openai-codex":
        return BillingRoute(provider="openai-codex", model=model, base_url=base_url or "", billing_mode="subscription_included")
-    if provider_name == "openrouter" or "openrouter.ai" in base:
+    if provider_name == "openrouter" or base_url_host_matches(base_url or "", "openrouter.ai"):
        return BillingRoute(provider="openrouter", model=model, base_url=base_url or "", billing_mode="official_models_api")
    if provider_name == "anthropic":
        return BillingRoute(provider="anthropic", model=model.split("/")[-1], base_url=base_url or "", billing_mode="official_docs_snapshot")
@@ -532,10 +533,22 @@ def normalize_usage(
        prompt_total = _to_int(getattr(response_usage, "prompt_tokens", 0))
        output_tokens = _to_int(getattr(response_usage, "completion_tokens", 0))
        details = getattr(response_usage, "prompt_tokens_details", None)
+        # Primary: OpenAI-style prompt_tokens_details. Fallback: Anthropic-style
+        # top-level fields that some OpenAI-compatible proxies (OpenRouter, Vercel
+        # AI Gateway, Cline) expose when routing Claude models — without this
+        # fallback, cache writes are undercounted as 0 and cache reads can be
+        # missed when the proxy only surfaces them at the top level.
+        # Port of cline/cline#10266.
        cache_read_tokens = _to_int(getattr(details, "cached_tokens", 0) if details else 0)
+        if not cache_read_tokens:
+            cache_read_tokens = _to_int(getattr(response_usage, "cache_read_input_tokens", 0))
        cache_write_tokens = _to_int(
            getattr(details, "cache_write_tokens", 0) if details else 0
        )
+        if not cache_write_tokens:
+            cache_write_tokens = _to_int(
+                getattr(response_usage, "cache_creation_input_tokens", 0)
+            )
        input_tokens = max(0, prompt_total - cache_read_tokens - cache_write_tokens)

    reasoning_tokens = 0
@@ -444,6 +444,7 @@ def _process_batch_worker(args: Tuple) -> Dict[str, Any]:
            if not reasoning.get("has_any_reasoning", True):
                print(f"   🚫 Prompt {prompt_index} discarded (no reasoning in any turn)")
                discarded_no_reasoning += 1
+                completed_in_batch.append(prompt_index)
                continue
            
            # Get and normalize tool stats for consistent schema across all entries
@@ -1189,12 +1190,12 @@ def main(
    """
    # Handle list distributions
    if list_distributions:
-        from toolset_distributions import list_distributions as get_all_dists, print_distribution_info
-        
+        from toolset_distributions import print_distribution_info
+
        print("📊 Available Toolset Distributions")
        print("=" * 70)
-        
-        all_dists = get_all_dists()
+
+        all_dists = list_distributions()
        for dist_name in sorted(all_dists.keys()):
            print_distribution_info(dist_name)
        
@@ -66,15 +66,18 @@ model:
 # max_tokens: 8192

 # Named provider overrides (optional)
-# Use this for per-provider request timeouts and per-model exceptions.
+# Use this for per-provider request timeouts, non-stream stale timeouts,
+# and per-model exceptions.
 # Applies to the primary turn client on every api_mode (OpenAI-wire, native
 # Anthropic, and Anthropic-compatible providers), the fallback chain, and
 # client rebuilds during credential rotation.  For OpenAI-wire chat
 # completions (streaming and non-streaming) the configured value is also
 # used as the per-request ``timeout=`` kwarg so it wins over the legacy
 # HERMES_API_TIMEOUT env var (which still applies when no config is set).
-# Leaving these unset keeps the legacy defaults (HERMES_API_TIMEOUT=1800s,
-# native Anthropic 900s).
+# ``stale_timeout_seconds`` controls the non-streaming stale-call detector and
+# wins over the legacy HERMES_API_CALL_STALE_TIMEOUT env var. Leaving these
+# unset keeps the legacy defaults (HERMES_API_TIMEOUT=1800s,
+# HERMES_API_CALL_STALE_TIMEOUT=300s, native Anthropic 900s).
 #
 # Not currently wired for AWS Bedrock (bedrock_converse + AnthropicBedrock
 # SDK paths) — those use boto3 with its own timeout configuration.
@@ -82,11 +85,16 @@ model:
 # providers:
 #   ollama-local:
 #     request_timeout_seconds: 300   # Longer timeout for local cold-starts
+#     stale_timeout_seconds: 900     # Explicitly re-enable stale detection on local endpoints
 #   anthropic:
 #     request_timeout_seconds: 30    # Fast-fail cloud requests
 #     models:
 #       claude-opus-4.6:
 #         timeout_seconds: 600       # Longer timeout for extended-thinking Opus calls
+#   openai-codex:
+#     models:
+#       gpt-5.4:
+#         stale_timeout_seconds: 1800  # Longer non-stream stale timeout for slow large-context turns

 # =============================================================================
 # OpenRouter Provider Routing (only applies when using OpenRouter)
@@ -114,20 +122,6 @@ model:
 #   # Data policy: "allow" (default) or "deny" to exclude providers that may store data
 #   # data_collection: "deny"

-# =============================================================================
-# Smart Model Routing (optional)
-# =============================================================================
-# Use a cheaper model for short/simple turns while keeping your main model for
-# more complex requests. Disabled by default.
-#
-# smart_model_routing:
-#   enabled: true
-#   max_simple_chars: 160
-#   max_simple_words: 28
-#   cheap_model:
-#     provider: openrouter
-#     model: google/gemini-2.5-flash
-
 # =============================================================================
 # Git Worktree Isolation
 # =============================================================================
@@ -380,6 +374,18 @@ compression:
 #   web_extract:
 #     provider: "auto"
 #     model: ""
+#
+#   # Session search — summarizes matching past sessions
+#   session_search:
+#     provider: "auto"
+#     model: ""
+#     timeout: 30
+#     max_concurrency: 3    # Limit parallel summaries to reduce request-burst 429s
+#     extra_body: {}        # Provider-specific OpenAI-compatible request fields
+#                           # Example for providers that support request-body
+#                           # reasoning controls:
+#                           # extra_body:
+#                           #   enable_thinking: false

 # =============================================================================
 # Persistent Memory
@@ -501,6 +507,13 @@ agent:
  # finish, then interrupts anything still running after this timeout.
  # 0 = no drain, interrupt immediately.
  # restart_drain_timeout: 60
+
+  # Max app-level retry attempts for API errors (connection drops, provider
+  # timeouts, 5xx, etc.) before the agent surfaces the failure. Lower this
+  # to 1 if you use fallback providers and want fast failover on flaky
+  # primaries (default 3). The OpenAI SDK does its own low-level retries
+  # underneath this wrapper — this is the Hermes-level loop.
+  # api_max_retries: 3
  
  # Enable verbose logging
  verbose: false
@@ -764,10 +777,13 @@ code_execution:
 # Subagent Delegation
 # =============================================================================
 # The delegate_task tool spawns child agents with isolated context.
-# Supports single tasks and batch mode (up to 3 parallel).
+# Supports single tasks and batch mode (default 3 parallel, configurable).
 delegation:
  max_iterations: 50                          # Max tool-calling turns per child (default: 50)
-  default_toolsets: ["terminal", "file", "web"]  # Default toolsets for subagents
+  # max_concurrent_children: 3                # Max parallel child agents (default: 3)
+  # max_spawn_depth: 1                        # Tree depth cap (1-3, default: 1 = flat). Raise to 2 or 3 to allow orchestrator children to spawn their own workers.
+  # orchestrator_enabled: true                # Kill switch for role="orchestrator" children (default: true).
+  # inherit_mcp_toolsets: true                # When explicit child toolsets are narrowed, also keep the parent's MCP toolsets (default: true). Set false for strict intersection.
  # model: "google/gemini-3-flash-preview"    # Override model for subagents (empty = inherit parent)
  # provider: "openrouter"                    # Override provider for subagents (empty = inherit parent)
  #                                           # Resolves full credentials (base_url, api_key) automatically.
@@ -911,3 +927,39 @@ display:
 #   # Names and usernames are NOT affected (user-chosen, publicly visible).
 #   # Routing/delivery still uses the original values internally.
 #   redact_pii: false
+
+# =============================================================================
+# Shell-script hooks
+# =============================================================================
+# Register shell scripts as plugin-hook callbacks.  Each entry is executed as
+# a subprocess (shell=False, shlex.split) with a JSON payload on stdin.  On
+# stdout the script may return JSON that either blocks the tool call or
+# injects context into the next LLM call.
+#
+# Valid events (mirror hermes_cli.plugins.VALID_HOOKS):
+#   pre_tool_call, post_tool_call, pre_llm_call, post_llm_call,
+#   pre_api_request, post_api_request, on_session_start, on_session_end,
+#   on_session_finalize, on_session_reset, subagent_stop
+#
+# First-use consent: each (event, command) pair prompts once on a TTY, then
+# is persisted to ~/.hermes/shell-hooks-allowlist.json.  Non-interactive
+# runs (gateway, cron) need --accept-hooks, HERMES_ACCEPT_HOOKS=1, or the
+# hooks_auto_accept key below.
+#
+# See website/docs/user-guide/features/hooks.md for the full JSON wire
+# protocol and worked examples.
+#
+# hooks:
+#   pre_tool_call:
+#     - matcher: "terminal"
+#       command: "~/.hermes/agent-hooks/block-rm-rf.sh"
+#       timeout: 10
+#   post_tool_call:
+#     - matcher: "write_file|patch"
+#       command: "~/.hermes/agent-hooks/auto-format.sh"
+#   pre_llm_call:
+#     - command: "~/.hermes/agent-hooks/inject-cwd-context.sh"
+#   subagent_stop:
+#     - command: "~/.hermes/agent-hooks/log-orchestration.sh"
+#
+# hooks_auto_accept: false
@@ -9,6 +9,7 @@ import copy
 import json
 import logging
 import tempfile
+import threading
 import os
 import re
 import uuid
@@ -34,6 +35,11 @@ except ImportError:
 HERMES_DIR = get_hermes_home().resolve()
 CRON_DIR = HERMES_DIR / "cron"
 JOBS_FILE = CRON_DIR / "jobs.json"
+
+# In-process lock protecting load_jobs→modify→save_jobs cycles.
+# Required when tick() runs jobs in parallel threads — without this,
+# concurrent mark_job_run / advance_next_run calls can clobber each other.
+_jobs_file_lock = threading.Lock()
 OUTPUT_DIR = CRON_DIR / "output"
 ONESHOT_GRACE_SECONDS = 120

@@ -378,6 +384,7 @@ def create_job(
    provider: Optional[str] = None,
    base_url: Optional[str] = None,
    script: Optional[str] = None,
+    enabled_toolsets: Optional[List[str]] = None,
 ) -> Dict[str, Any]:
    """
    Create a new cron job.
@@ -397,6 +404,9 @@ def create_job(
        script: Optional path to a Python script whose stdout is injected into the
                prompt each run.  The script runs before the agent turn, and its output
                is prepended as context.  Useful for data collection / change detection.
+        enabled_toolsets: Optional list of toolset names to restrict the agent to.
+                          When set, only tools from these toolsets are loaded, reducing
+                          token overhead. When omitted, all default tools are loaded.

    Returns:
        The created job dict
@@ -427,6 +437,8 @@ def create_job(
    normalized_base_url = normalized_base_url or None
    normalized_script = str(script).strip() if isinstance(script, str) else None
    normalized_script = normalized_script or None
+    normalized_toolsets = [str(t).strip() for t in enabled_toolsets if str(t).strip()] if enabled_toolsets else None
+    normalized_toolsets = normalized_toolsets or None

    label_source = (prompt or (normalized_skills[0] if normalized_skills else None)) or "cron job"
    job = {
@@ -458,6 +470,7 @@ def create_job(
        # Delivery configuration
        "deliver": deliver,
        "origin": origin,  # Tracks where job was created for "origin" delivery
+        "enabled_toolsets": normalized_toolsets,
    }

    jobs = load_jobs()
@@ -594,43 +607,44 @@ def mark_job_run(job_id: str, success: bool, error: Optional[str] = None,
    ``delivery_error`` is tracked separately from the agent error — a job
    can succeed (agent produced output) but fail delivery (platform down).
    """
-    jobs = load_jobs()
-    for i, job in enumerate(jobs):
-        if job["id"] == job_id:
-            now = _hermes_now().isoformat()
-            job["last_run_at"] = now
-            job["last_status"] = "ok" if success else "error"
-            job["last_error"] = error if not success else None
-            # Track delivery failures separately — cleared on successful delivery
-            job["last_delivery_error"] = delivery_error
-            
-            # Increment completed count
-            if job.get("repeat"):
-                job["repeat"]["completed"] = job["repeat"].get("completed", 0) + 1
+    with _jobs_file_lock:
+        jobs = load_jobs()
+        for i, job in enumerate(jobs):
+            if job["id"] == job_id:
+                now = _hermes_now().isoformat()
+                job["last_run_at"] = now
+                job["last_status"] = "ok" if success else "error"
+                job["last_error"] = error if not success else None
+                # Track delivery failures separately — cleared on successful delivery
+                job["last_delivery_error"] = delivery_error
                
-                # Check if we've hit the repeat limit
-                times = job["repeat"].get("times")
-                completed = job["repeat"]["completed"]
-                if times is not None and times > 0 and completed >= times:
-                    # Remove the job (limit reached)
-                    jobs.pop(i)
-                    save_jobs(jobs)
-                    return
-            
-            # Compute next run
-            job["next_run_at"] = compute_next_run(job["schedule"], now)
+                # Increment completed count
+                if job.get("repeat"):
+                    job["repeat"]["completed"] = job["repeat"].get("completed", 0) + 1
+                    
+                    # Check if we've hit the repeat limit
+                    times = job["repeat"].get("times")
+                    completed = job["repeat"]["completed"]
+                    if times is not None and times > 0 and completed >= times:
+                        # Remove the job (limit reached)
+                        jobs.pop(i)
+                        save_jobs(jobs)
+                        return
+                
+                # Compute next run
+                job["next_run_at"] = compute_next_run(job["schedule"], now)

-            # If no next run (one-shot completed), disable
-            if job["next_run_at"] is None:
-                job["enabled"] = False
-                job["state"] = "completed"
-            elif job.get("state") != "paused":
-                job["state"] = "scheduled"
+                # If no next run (one-shot completed), disable
+                if job["next_run_at"] is None:
+                    job["enabled"] = False
+                    job["state"] = "completed"
+                elif job.get("state") != "paused":
+                    job["state"] = "scheduled"

-            save_jobs(jobs)
-            return
+                save_jobs(jobs)
+                return

-    logger.warning("mark_job_run: job_id %s not found, skipping save", job_id)
+        logger.warning("mark_job_run: job_id %s not found, skipping save", job_id)


 def advance_next_run(job_id: str) -> bool:
@@ -645,20 +659,21 @@ def advance_next_run(job_id: str) -> bool:

    Returns True if next_run_at was advanced, False otherwise.
    """
-    jobs = load_jobs()
-    for job in jobs:
-        if job["id"] == job_id:
-            kind = job.get("schedule", {}).get("kind")
-            if kind not in ("cron", "interval"):
+    with _jobs_file_lock:
+        jobs = load_jobs()
+        for job in jobs:
+            if job["id"] == job_id:
+                kind = job.get("schedule", {}).get("kind")
+                if kind not in ("cron", "interval"):
+                    return False
+                now = _hermes_now().isoformat()
+                new_next = compute_next_run(job["schedule"], now)
+                if new_next and new_next != job.get("next_run_at"):
+                    job["next_run_at"] = new_next
+                    save_jobs(jobs)
+                    return True
                return False
-            now = _hermes_now().isoformat()
-            new_next = compute_next_run(job["schedule"], now)
-            if new_next and new_next != job.get("next_run_at"):
-                job["next_run_at"] = new_next
-                save_jobs(jobs)
-                return True
-            return False
-    return False
+        return False


 def get_due_jobs() -> List[Dict[str, Any]]:
@@ -40,6 +40,37 @@ from hermes_time import now as _hermes_now

 logger = logging.getLogger(__name__)

+
+def _resolve_cron_enabled_toolsets(job: dict, cfg: dict) -> list[str] | None:
+    """Resolve the toolset list for a cron job.
+
+    Precedence:
+    1. Per-job ``enabled_toolsets`` (set via ``cronjob`` tool on create/update).
+       Keeps the agent's job-scoped toolset override intact — #6130.
+    2. Per-platform ``hermes tools`` config for the ``cron`` platform.
+       Mirrors gateway behavior (``_get_platform_tools(cfg, platform_key)``)
+       so users can gate cron toolsets globally without recreating every job.
+    3. ``None`` on any lookup failure — AIAgent loads the full default set
+       (legacy behavior before this change, preserved as the safety net).
+
+    _DEFAULT_OFF_TOOLSETS ({moa, homeassistant, rl}) are removed by
+    ``_get_platform_tools`` for unconfigured platforms, so fresh installs
+    get cron WITHOUT ``moa`` by default (issue reported by Norbert —
+    surprise $4.63 run).
+    """
+    per_job = job.get("enabled_toolsets")
+    if per_job:
+        return per_job
+    try:
+        from hermes_cli.tools_config import _get_platform_tools  # lazy: avoid heavy import at cron module load
+        return sorted(_get_platform_tools(cfg or {}, "cron"))
+    except Exception as exc:
+        logger.warning(
+            "Cron toolset resolution failed, falling back to full default toolset: %s",
+            exc,
+        )
+        return None
+
 # Valid delivery platforms — used to validate user-supplied platform names
 # in cron delivery targets, preventing env var enumeration via crafted names.
 _KNOWN_DELIVERY_PLATFORMS = frozenset({
@@ -252,7 +283,11 @@ def _send_media_via_adapter(adapter, chat_id: str, media_files: list, metadata:
                coro = adapter.send_document(chat_id=chat_id, file_path=media_path, metadata=metadata)

            future = asyncio.run_coroutine_threadsafe(coro, loop)
-            result = future.result(timeout=30)
+            try:
+                result = future.result(timeout=30)
+            except TimeoutError:
+                future.cancel()
+                raise
            if result and not getattr(result, "success", True):
                logger.warning(
                    "Job '%s': media send failed for %s: %s",
@@ -382,7 +417,11 @@ def _deliver_result(job: dict, content: str, adapters=None, loop=None) -> Option
                        runtime_adapter.send(chat_id, text_to_send, metadata=send_metadata),
                        loop,
                    )
-                    send_result = future.result(timeout=60)
+                    try:
+                        send_result = future.result(timeout=60)
+                    except TimeoutError:
+                        future.cancel()
+                        raise
                    if send_result and not getattr(send_result, "success", True):
                        err = getattr(send_result, "error", "unknown")
                        logger.warning(
@@ -422,7 +461,6 @@ def _deliver_result(job: dict, content: str, adapters=None, loop=None) -> Option
                # prevent "coroutine was never awaited" RuntimeWarning, then retry in a
                # fresh thread that has no running loop.
                coro.close()
-                import concurrent.futures
                with concurrent.futures.ThreadPoolExecutor(max_workers=1) as pool:
                    future = pool.submit(asyncio.run, _send_to_platform(platform, pconfig, chat_id, cleaned_delivery_content, thread_id=thread_id, media_files=media_files))
                    result = future.result(timeout=30)
@@ -747,14 +785,17 @@ def run_job(job: dict) -> tuple[bool, str, str, Optional[str]]:
    # scheduler process — every job this process runs is a cron job.
    os.environ["HERMES_CRON_SESSION"] = "1"

+    # Use ContextVars for per-job session/delivery state so parallel jobs
+    # don't clobber each other's targets (os.environ is process-global).
+    from gateway.session_context import set_session_vars, clear_session_vars, _VAR_MAP
+
+    _ctx_tokens = set_session_vars(
+        platform=origin["platform"] if origin else "",
+        chat_id=str(origin["chat_id"]) if origin else "",
+        chat_name=origin.get("chat_name", "") if origin else "",
+    )
+
    try:
-        # Inject origin context so the agent's send_message tool knows the chat.
-        # Must be INSIDE the try block so the finally cleanup always runs.
-        if origin:
-            os.environ["HERMES_SESSION_PLATFORM"] = origin["platform"]
-            os.environ["HERMES_SESSION_CHAT_ID"] = str(origin["chat_id"])
-            if origin.get("chat_name"):
-                os.environ["HERMES_SESSION_CHAT_NAME"] = origin["chat_name"]
        # Re-read .env and config.yaml fresh every run so provider/key
        # changes take effect without a gateway restart.
        from dotenv import load_dotenv
@@ -765,10 +806,10 @@ def run_job(job: dict) -> tuple[bool, str, str, Optional[str]]:

        delivery_target = _resolve_delivery_target(job)
        if delivery_target:
-            os.environ["HERMES_CRON_AUTO_DELIVER_PLATFORM"] = delivery_target["platform"]
-            os.environ["HERMES_CRON_AUTO_DELIVER_CHAT_ID"] = str(delivery_target["chat_id"])
+            _VAR_MAP["HERMES_CRON_AUTO_DELIVER_PLATFORM"].set(delivery_target["platform"])
+            _VAR_MAP["HERMES_CRON_AUTO_DELIVER_CHAT_ID"].set(str(delivery_target["chat_id"]))
            if delivery_target.get("thread_id") is not None:
-                os.environ["HERMES_CRON_AUTO_DELIVER_THREAD_ID"] = str(delivery_target["thread_id"])
+                _VAR_MAP["HERMES_CRON_AUTO_DELIVER_THREAD_ID"].set(str(delivery_target["thread_id"]))

        model = job.get("model") or os.getenv("HERMES_MODEL") or ""

@@ -807,14 +848,13 @@ def run_job(job: dict) -> tuple[bool, str, str, Optional[str]]:
        prefill_messages = None
        prefill_file = os.getenv("HERMES_PREFILL_MESSAGES_FILE", "") or _cfg.get("prefill_messages_file", "")
        if prefill_file:
-            import json as _json
            pfpath = Path(prefill_file).expanduser()
            if not pfpath.is_absolute():
                pfpath = _hermes_home / pfpath
            if pfpath.exists():
                try:
                    with open(pfpath, "r", encoding="utf-8") as _pf:
-                        prefill_messages = _json.load(_pf)
+                        prefill_messages = json.load(_pf)
                    if not isinstance(prefill_messages, list):
                        prefill_messages = None
                except Exception as e:
@@ -826,7 +866,6 @@ def run_job(job: dict) -> tuple[bool, str, str, Optional[str]]:

        # Provider routing
        pr = _cfg.get("provider_routing", {})
-        smart_routing = _cfg.get("smart_model_routing", {}) or {}

        from hermes_cli.runtime_provider import (
            resolve_runtime_provider,
@@ -843,24 +882,9 @@ def run_job(job: dict) -> tuple[bool, str, str, Optional[str]]:
            message = format_runtime_provider_error(exc)
            raise RuntimeError(message) from exc

-        from agent.smart_model_routing import resolve_turn_route
-        turn_route = resolve_turn_route(
-            prompt,
-            smart_routing,
-            {
-                "model": model,
-                "api_key": runtime.get("api_key"),
-                "base_url": runtime.get("base_url"),
-                "provider": runtime.get("provider"),
-                "api_mode": runtime.get("api_mode"),
-                "command": runtime.get("command"),
-                "args": list(runtime.get("args") or []),
-            },
-        )
-
        fallback_model = _cfg.get("fallback_providers") or _cfg.get("fallback_model") or None
        credential_pool = None
-        runtime_provider = str(turn_route["runtime"].get("provider") or "").strip().lower()
+        runtime_provider = str(runtime.get("provider") or "").strip().lower()
        if runtime_provider:
            try:
                from agent.credential_pool import load_pool
@@ -877,13 +901,13 @@ def run_job(job: dict) -> tuple[bool, str, str, Optional[str]]:
                logger.debug("Job '%s': failed to load credential pool for %s: %s", job_id, runtime_provider, e)

        agent = AIAgent(
-            model=turn_route["model"],
-            api_key=turn_route["runtime"].get("api_key"),
-            base_url=turn_route["runtime"].get("base_url"),
-            provider=turn_route["runtime"].get("provider"),
-            api_mode=turn_route["runtime"].get("api_mode"),
-            acp_command=turn_route["runtime"].get("command"),
-            acp_args=turn_route["runtime"].get("args"),
+            model=model,
+            api_key=runtime.get("api_key"),
+            base_url=runtime.get("base_url"),
+            provider=runtime.get("provider"),
+            api_mode=runtime.get("api_mode"),
+            acp_command=runtime.get("command"),
+            acp_args=runtime.get("args"),
            max_iterations=max_iterations,
            reasoning_config=reasoning_config,
            prefill_messages=prefill_messages,
@@ -893,6 +917,7 @@ def run_job(job: dict) -> tuple[bool, str, str, Optional[str]]:
            providers_ignored=pr.get("ignore"),
            providers_order=pr.get("order"),
            provider_sort=pr.get("sort"),
+            enabled_toolsets=_resolve_cron_enabled_toolsets(job, _cfg),
            disabled_toolsets=["cronjob", "messaging", "clarify"],
            quiet_mode=True,
            skip_context_files=True,  # Don't inject SOUL.md/AGENTS.md from scheduler cwd
@@ -979,6 +1004,12 @@ def run_job(job: dict) -> tuple[bool, str, str, Optional[str]]:
                f"— last activity: {_last_desc}"
            )

+        # Guard against non-dict returns from run_conversation under error conditions
+        if not isinstance(result, dict):
+            raise RuntimeError(
+                f"agent.run_conversation returned {type(result).__name__} instead of dict: {result!r}"
+            )
+
        final_response = result.get("final_response", "") or ""
        # Strip leaked placeholder text that upstream may inject on empty completions.
        if final_response.strip() == "(No response generated)":
@@ -1028,16 +1059,8 @@ def run_job(job: dict) -> tuple[bool, str, str, Optional[str]]:
        return False, output, "", error_msg

    finally:
-        # Clean up injected env vars so they don't leak to other jobs
-        for key in (
-            "HERMES_SESSION_PLATFORM",
-            "HERMES_SESSION_CHAT_ID",
-            "HERMES_SESSION_CHAT_NAME",
-            "HERMES_CRON_AUTO_DELIVER_PLATFORM",
-            "HERMES_CRON_AUTO_DELIVER_CHAT_ID",
-            "HERMES_CRON_AUTO_DELIVER_THREAD_ID",
-        ):
-            os.environ.pop(key, None)
+        # Clean up ContextVar session/delivery state for this job.
+        clear_session_vars(_ctx_tokens)
        if _session_db:
            try:
                _session_db.end_session(_cron_session_id, "cron_complete")
@@ -1090,15 +1113,41 @@ def tick(verbose: bool = True, adapters=None, loop=None) -> int:
        if verbose:
            logger.info("%s - %s job(s) due", _hermes_now().strftime('%H:%M:%S'), len(due_jobs))

-        executed = 0
+        # Advance next_run_at for all recurring jobs FIRST, under the file lock,
+        # before any execution begins.  This preserves at-most-once semantics.
        for job in due_jobs:
-            try:
-                # For recurring jobs (cron/interval), advance next_run_at to the
-                # next future occurrence BEFORE execution.  This way, if the
-                # process crashes mid-run, the job won't re-fire on restart.
-                # One-shot jobs are left alone so they can retry on restart.
-                advance_next_run(job["id"])
+            advance_next_run(job["id"])

+        # Resolve max parallel workers: env var > config.yaml > unbounded.
+        # Set HERMES_CRON_MAX_PARALLEL=1 to restore old serial behaviour.
+        _max_workers: Optional[int] = None
+        try:
+            _env_par = os.getenv("HERMES_CRON_MAX_PARALLEL", "").strip()
+            if _env_par:
+                _max_workers = int(_env_par) or None
+        except (ValueError, TypeError):
+            logger.warning("Invalid HERMES_CRON_MAX_PARALLEL value; defaulting to unbounded")
+        if _max_workers is None:
+            try:
+                _ucfg = load_config() or {}
+                _cfg_par = (
+                    _ucfg.get("cron", {}) if isinstance(_ucfg, dict) else {}
+                ).get("max_parallel_jobs")
+                if _cfg_par is not None:
+                    _max_workers = int(_cfg_par) or None
+            except Exception:
+                pass
+
+        if verbose:
+            logger.info(
+                "Running %d job(s) in parallel (max_workers=%s)",
+                len(due_jobs),
+                _max_workers if _max_workers else "unbounded",
+            )
+
+        def _process_job(job: dict) -> bool:
+            """Run one due job end-to-end: execute, save, deliver, mark."""
+            try:
                success, output, final_response, error = run_job(job)

                output_file = save_job_output(job["id"], output)
@@ -1130,13 +1179,23 @@ def tick(verbose: bool = True, adapters=None, loop=None) -> int:
                    error = "Agent completed but produced empty response (model error, timeout, or misconfiguration)"

                mark_job_run(job["id"], success, error, delivery_error=delivery_error)
-                executed += 1
+                return True

            except Exception as e:
                logger.error("Error processing job %s: %s", job['id'], e)
                mark_job_run(job["id"], False, str(e))
+                return False

-        return executed
+        # Run all due jobs concurrently, each in its own ContextVar copy
+        # so session/delivery state stays isolated per-thread.
+        with concurrent.futures.ThreadPoolExecutor(max_workers=_max_workers) as _tick_pool:
+            _futures = []
+            for job in due_jobs:
+                _ctx = contextvars.copy_context()
+                _futures.append(_tick_pool.submit(_ctx.run, _process_job, job))
+            _results = [f.result() for f in _futures]
+
+        return sum(_results)
    finally:
        if fcntl:
            fcntl.flock(lock_fd, fcntl.LOCK_UN)
@@ -58,6 +58,13 @@ if [ ! -f "$HERMES_HOME/config.yaml" ]; then
    cp "$INSTALL_DIR/cli-config.yaml.example" "$HERMES_HOME/config.yaml"
 fi

+# Ensure the main config file remains accessible to the hermes runtime user
+# even if it was edited on the host after initial ownership setup.
+if [ -f "$HERMES_HOME/config.yaml" ]; then
+    chown hermes:hermes "$HERMES_HOME/config.yaml"
+    chmod 640 "$HERMES_HOME/config.yaml"
+fi
+
 # SOUL.md
 if [ ! -f "$HERMES_HOME/SOUL.md" ]; then
    cp "$INSTALL_DIR/docker/SOUL.md" "$HERMES_HOME/SOUL.md"
@@ -68,4 +75,19 @@ if [ -d "$INSTALL_DIR/skills" ]; then
    python3 "$INSTALL_DIR/tools/skills_sync.py"
 fi

+# Final exec: two supported invocation patterns.
+#
+#   docker run <image>                 -> exec `hermes` with no args (legacy default)
+#   docker run <image> chat -q "..."   -> exec `hermes chat -q "..."` (legacy wrap)
+#   docker run <image> sleep infinity  -> exec `sleep infinity` directly
+#   docker run <image> bash            -> exec `bash` directly
+#
+# If the first positional arg resolves to an executable on PATH, we assume the
+# caller wants to run it directly (needed by the launcher which runs long-lived
+# `sleep infinity` sandbox containers — see tools/environments/docker.py).
+# Otherwise we treat the args as a hermes subcommand and wrap with `hermes`,
+# preserving the documented `docker run <image> <subcommand>` behavior.
+if [ $# -gt 0 ] && command -v "$1" >/dev/null 2>&1; then
+    exec "$@"
+fi
 exec hermes "$@"
@@ -1,228 +0,0 @@
-# Hermes Agent — ACP (Agent Client Protocol) Setup Guide
-
-Hermes Agent supports the **Agent Client Protocol (ACP)**, allowing it to run as
-a coding agent inside your editor. ACP lets your IDE send tasks to Hermes, and
-Hermes responds with file edits, terminal commands, and explanations — all shown
-natively in the editor UI.
-
---
-
-## Prerequisites
-
- Hermes Agent installed and configured (`hermes setup` completed)
- An API key / provider set up in `~/.hermes/.env` or via `hermes login`
- Python 3.11+
-
-Install the ACP extra:
-
-```bash
-pip install -e ".[acp]"
-```
-
---
-
-## VS Code Setup
-
-### 1. Install the ACP Client extension
-
-Open VS Code and install **ACP Client** from the marketplace:
-
- Press `Ctrl+Shift+X` (or `Cmd+Shift+X` on macOS)
- Search for **"ACP Client"**
- Click **Install**
-
-Or install from the command line:
-
-```bash
-code --install-extension anysphere.acp-client
-```
-
-### 2. Configure settings.json
-
-Open your VS Code settings (`Ctrl+,` → click the `{}` icon for JSON) and add:
-
-```json
-{
-  "acpClient.agents": [
-    {
-      "name": "hermes-agent",
-      "registryDir": "/path/to/hermes-agent/acp_registry"
-    }
-  ]
-}
-```
-
-Replace `/path/to/hermes-agent` with the actual path to your Hermes Agent
-installation (e.g. `~/.hermes/hermes-agent`).
-
-Alternatively, if `hermes` is on your PATH, the ACP Client can discover it
-automatically via the registry directory.
-
-### 3. Restart VS Code
-
-After configuring, restart VS Code. You should see **Hermes Agent** appear in
-the ACP agent picker in the chat/agent panel.
-
---
-
-## Zed Setup
-
-Zed has built-in ACP support.
-
-### 1. Configure Zed settings
-
-Open Zed settings (`Cmd+,` on macOS or `Ctrl+,` on Linux) and add to your
-`settings.json`:
-
-```json
-{
-  "agent_servers": {
-    "hermes-agent": {
-      "type": "custom",
-      "command": "hermes",
-      "args": ["acp"],
-    },
-  },
-}
-```
-
-### 2. Restart Zed
-
-Hermes Agent will appear in the agent panel. Select it and start a conversation.
-
---
-
-## JetBrains Setup (IntelliJ, PyCharm, WebStorm, etc.)
-
-### 1. Install the ACP plugin
-
- Open **Settings** → **Plugins** → **Marketplace**
- Search for **"ACP"** or **"Agent Client Protocol"**
- Install and restart the IDE
-
-### 2. Configure the agent
-
- Open **Settings** → **Tools** → **ACP Agents**
- Click **+** to add a new agent
- Set the registry directory to your `acp_registry/` folder:
-  `/path/to/hermes-agent/acp_registry`
- Click **OK**
-
-### 3. Use the agent
-
-Open the ACP panel (usually in the right sidebar) and select **Hermes Agent**.
-
---
-
-## What You Will See
-
-Once connected, your editor provides a native interface to Hermes Agent:
-
-### Chat Panel
-A conversational interface where you can describe tasks, ask questions, and
-give instructions. Hermes responds with explanations and actions.
-
-### File Diffs
-When Hermes edits files, you see standard diffs in the editor. You can:
- **Accept** individual changes
- **Reject** changes you don't want
- **Review** the full diff before applying
-
-### Terminal Commands
-When Hermes needs to run shell commands (builds, tests, installs), the editor
-shows them in an integrated terminal. Depending on your settings:
- Commands may run automatically
- Or you may be prompted to **approve** each command
-
-### Approval Flow
-For potentially destructive operations, the editor will prompt you for
-approval before Hermes proceeds. This includes:
- File deletions
- Shell commands
- Git operations
-
---
-
-## Configuration
-
-Hermes Agent under ACP uses the **same configuration** as the CLI:
-
- **API keys / providers**: `~/.hermes/.env`
- **Agent config**: `~/.hermes/config.yaml`
- **Skills**: `~/.hermes/skills/`
- **Sessions**: `~/.hermes/state.db`
-
-You can run `hermes setup` to configure providers, or edit `~/.hermes/.env`
-directly.
-
-### Changing the model
-
-Edit `~/.hermes/config.yaml`:
-
-```yaml
-model: openrouter/nous/hermes-3-llama-3.1-70b
-```
-
-Or set the `HERMES_MODEL` environment variable.
-
-### Toolsets
-
-ACP sessions use the curated `hermes-acp` toolset by default. It is designed for editor workflows and intentionally excludes things like messaging delivery, cronjob management, and audio-first UX features.
-
---
-
-## Troubleshooting
-
-### Agent doesn't appear in the editor
-
-1. **Check the registry path** — make sure the `acp_registry/` directory path
-   in your editor settings is correct and contains `agent.json`.
-2. **Check `hermes` is on PATH** — run `which hermes` in a terminal. If not
-   found, you may need to activate your virtualenv or add it to PATH.
-3. **Restart the editor** after changing settings.
-
-### Agent starts but errors immediately
-
-1. Run `hermes doctor` to check your configuration.
-2. Check that you have a valid API key: `hermes status`
-3. Try running `hermes acp` directly in a terminal to see error output.
-
-### "Module not found" errors
-
-Make sure you installed the ACP extra:
-
-```bash
-pip install -e ".[acp]"
-```
-
-### Slow responses
-
- ACP streams responses, so you should see incremental output. If the agent
-  appears stuck, check your network connection and API provider status.
- Some providers have rate limits. Try switching to a different model/provider.
-
-### Permission denied for terminal commands
-
-If the editor blocks terminal commands, check your ACP Client extension
-settings for auto-approval or manual-approval preferences.
-
-### Logs
-
-Hermes logs are written to stderr when running in ACP mode. Check:
- VS Code: **Output** panel → select **ACP Client** or **Hermes Agent**
- Zed: **View** → **Toggle Terminal** and check the process output
- JetBrains: **Event Log** or the ACP tool window
-
-You can also enable verbose logging:
-
-```bash
-HERMES_LOG_LEVEL=DEBUG hermes acp
-```
-
---
-
-## Further Reading
-
- [ACP Specification](https://github.com/anysphere/acp)
- [Hermes Agent Documentation](https://github.com/NousResearch/hermes-agent)
- Run `hermes --help` for all CLI options
@@ -1,698 +0,0 @@
-<!DOCTYPE html>
-<html lang="en">
-<head>
-<meta charset="UTF-8">
-<meta name="viewport" content="width=device-width, initial-scale=1.0">
-<title>honcho-integration-spec</title>
-<style>
-  :root {
-    --bg:             #0b0e14;
-    --bg-surface:     #11151c;
-    --bg-elevated:    #181d27;
-    --bg-code:        #0d1018;
-    --fg:             #c9d1d9;
-    --fg-bright:      #e6edf3;
-    --fg-muted:       #6e7681;
-    --fg-subtle:      #484f58;
-    --accent:         #7eb8f6;
-    --accent-dim:     #3d6ea5;
-    --accent-glow:    rgba(126, 184, 246, 0.08);
-    --green:          #7ee6a8;
-    --green-dim:      #2ea04f;
-    --orange:         #e6a855;
-    --red:            #f47067;
-    --purple:         #bc8cff;
-    --cyan:           #56d4dd;
-    --border:         #21262d;
-    --border-subtle:  #161b22;
-    --radius:         6px;
-    --font-sans:      'New York', ui-serif, 'Iowan Old Style', 'Apple Garamond', Baskerville, 'Times New Roman', 'Noto Emoji', serif;
-    --font-mono:      'Departure Mono', 'Noto Emoji', monospace;
-  }
-
-  *, *::before, *::after { box-sizing: border-box; margin: 0; padding: 0; }
-  html { scroll-behavior: smooth; scroll-padding-top: 2rem; }
-  body {
-    font-family: var(--font-sans);
-    background: var(--bg);
-    color: var(--fg);
-    line-height: 1.7;
-    font-size: 15px;
-    -webkit-font-smoothing: antialiased;
-  }
-
-  .container { max-width: 860px; margin: 0 auto; padding: 3rem 2rem 6rem; }
-
-  .hero {
-    text-align: center;
-    padding: 4rem 0 3rem;
-    border-bottom: 1px solid var(--border);
-    margin-bottom: 3rem;
-  }
-  .hero h1 { font-family: var(--font-mono); font-size: 2.2rem; font-weight: 700; color: var(--fg-bright); letter-spacing: -0.03em; margin-bottom: 0.5rem; }
-  .hero h1 span { color: var(--accent); }
-  .hero .subtitle { font-family: var(--font-sans); color: var(--fg-muted); font-size: 0.92rem; max-width: 560px; margin: 0 auto; line-height: 1.6; }
-  .hero .meta { margin-top: 1.5rem; display: flex; justify-content: center; gap: 1.5rem; flex-wrap: wrap; }
-  .hero .meta span { font-size: 0.8rem; color: var(--fg-subtle); font-family: var(--font-mono); }
-
-  .toc { background: var(--bg-surface); border: 1px solid var(--border); border-radius: var(--radius); padding: 1.5rem 2rem; margin-bottom: 3rem; }
-  .toc h2 { font-size: 0.75rem; text-transform: uppercase; letter-spacing: 0.1em; color: var(--fg-muted); margin-bottom: 1rem; }
-  .toc ol { list-style: none; counter-reset: toc; columns: 2; column-gap: 2rem; }
-  .toc li { counter-increment: toc; break-inside: avoid; margin-bottom: 0.35rem; }
-  .toc li::before { content: counter(toc, decimal-leading-zero) " "; color: var(--fg-subtle); font-family: var(--font-mono); font-size: 0.75rem; margin-right: 0.25rem; }
-  .toc a { font-family: var(--font-mono); color: var(--fg); text-decoration: none; font-size: 0.82rem; transition: color 0.15s; }
-  .toc a:hover { color: var(--accent); }
-
-  section { margin-bottom: 4rem; }
-  section + section { padding-top: 1rem; }
-
-  h2 { font-family: var(--font-mono); font-size: 1.3rem; font-weight: 700; color: var(--fg-bright); letter-spacing: -0.01em; margin-bottom: 1.25rem; padding-bottom: 0.5rem; border-bottom: 1px solid var(--border); }
-  h3 { font-family: var(--font-mono); font-size: 1rem; font-weight: 600; color: var(--fg-bright); margin-top: 2rem; margin-bottom: 0.75rem; }
-  h4 { font-family: var(--font-mono); font-size: 0.9rem; font-weight: 600; color: var(--accent); margin-top: 1.5rem; margin-bottom: 0.5rem; }
-
-  p { margin-bottom: 1rem; font-size: 0.95rem; line-height: 1.75; }
-  strong { color: var(--fg-bright); font-weight: 600; }
-  a { color: var(--accent); text-decoration: none; }
-  a:hover { text-decoration: underline; }
-
-  ul, ol { margin-bottom: 1rem; padding-left: 1.5rem; font-size: 0.93rem; line-height: 1.7; }
-  li { margin-bottom: 0.35rem; }
-  li::marker { color: var(--fg-subtle); }
-
-  .table-wrap { overflow-x: auto; margin-bottom: 1.5rem; }
-  table { width: 100%; border-collapse: collapse; font-size: 0.88rem; }
-  th, td { text-align: left; padding: 0.6rem 1rem; border-bottom: 1px solid var(--border-subtle); }
-  th { font-family: var(--font-mono); font-size: 0.72rem; text-transform: uppercase; letter-spacing: 0.06em; color: var(--fg-muted); background: var(--bg-surface); border-bottom-color: var(--border); white-space: nowrap; }
-  td { font-family: var(--font-sans); font-size: 0.88rem; color: var(--fg); }
-  tr:hover td { background: var(--accent-glow); }
-  td code { background: var(--bg-elevated); padding: 0.15em 0.4em; border-radius: 3px; font-family: var(--font-mono); font-size: 0.82em; color: var(--cyan); }
-
-  pre { background: var(--bg-code); border: 1px solid var(--border); border-radius: var(--radius); padding: 1.25rem 1.5rem; overflow-x: auto; margin-bottom: 1.5rem; font-family: var(--font-mono); font-size: 0.82rem; line-height: 1.65; color: var(--fg); }
-  pre code { background: none; padding: 0; color: inherit; font-size: inherit; }
-  code { font-family: var(--font-mono); font-size: 0.85em; }
-  p code, li code { background: var(--bg-elevated); padding: 0.15em 0.4em; border-radius: 3px; color: var(--cyan); font-size: 0.85em; }
-
-  .kw { color: var(--purple); }
-  .str { color: var(--green); }
-  .cm { color: var(--fg-subtle); font-style: italic; }
-  .num { color: var(--orange); }
-  .key { color: var(--accent); }
-
-  .mermaid { margin: 1.5rem 0 2rem; text-align: center; }
-  .mermaid svg { max-width: 100%; height: auto; }
-
-  .callout { font-family: var(--font-sans); background: var(--bg-surface); border-left: 3px solid var(--accent-dim); border-radius: 0 var(--radius) var(--radius) 0; padding: 1rem 1.25rem; margin-bottom: 1.5rem; font-size: 0.88rem; color: var(--fg-muted); line-height: 1.6; }
-  .callout strong { font-family: var(--font-mono); color: var(--fg-bright); }
-  .callout.success { border-left-color: var(--green-dim); }
-  .callout.warn { border-left-color: var(--orange); }
-
-  .badge { display: inline-block; font-family: var(--font-mono); font-size: 0.65rem; font-weight: 600; text-transform: uppercase; letter-spacing: 0.05em; padding: 0.2em 0.6em; border-radius: 3px; vertical-align: middle; margin-left: 0.4rem; }
-  .badge-done { background: var(--green-dim); color: #fff; }
-  .badge-wip { background: var(--orange); color: #0b0e14; }
-  .badge-todo { background: var(--fg-subtle); color: var(--fg); }
-
-  .checklist { list-style: none; padding-left: 0; }
-  .checklist li { padding-left: 1.5rem; position: relative; margin-bottom: 0.5rem; }
-  .checklist li::before { position: absolute; left: 0; font-family: var(--font-mono); font-size: 0.85rem; }
-  .checklist li.done { color: var(--fg-muted); }
-  .checklist li.done::before { content: "\2713"; color: var(--green); }
-  .checklist li.todo::before { content: "\25CB"; color: var(--fg-subtle); }
-  .checklist li.wip::before { content: "\25D4"; color: var(--orange); }
-
-  .compare { display: grid; grid-template-columns: 1fr 1fr; gap: 1rem; margin-bottom: 2rem; }
-  .compare-card { background: var(--bg-surface); border: 1px solid var(--border); border-radius: var(--radius); padding: 1.25rem; }
-  .compare-card h4 { margin-top: 0; font-size: 0.82rem; }
-  .compare-card.after { border-color: var(--accent-dim); }
-  .compare-card ul { font-family: var(--font-mono); padding-left: 1.25rem; font-size: 0.8rem; }
-
-  hr { border: none; border-top: 1px solid var(--border); margin: 3rem 0; }
-
-  .progress-bar { position: fixed; top: 0; left: 0; height: 2px; background: var(--accent); z-index: 999; transition: width 0.1s linear; }
-
-  @media (max-width: 640px) {
-    .container { padding: 2rem 1rem 4rem; }
-    .hero h1 { font-size: 1.6rem; }
-    .toc ol { columns: 1; }
-    .compare { grid-template-columns: 1fr; }
-    table { font-size: 0.8rem; }
-    th, td { padding: 0.4rem 0.6rem; }
-  }
-</style>
-<link rel="preconnect" href="https://fonts.googleapis.com">
-<link href="https://fonts.googleapis.com/css2?family=Noto+Emoji&display=swap" rel="stylesheet">
-<style>
-  @font-face {
-    font-family: 'Departure Mono';
-    src: url('https://cdn.jsdelivr.net/gh/rektdeckard/departure-mono@latest/fonts/DepartureMono-Regular.woff2') format('woff2');
-    font-weight: normal;
-    font-style: normal;
-    font-display: swap;
-  }
-</style>
-</head>
-<body>
-
-<div class="progress-bar" id="progress"></div>
-
-<div class="container">
-
-<header class="hero">
-  <h1>honcho<span>-integration-spec</span></h1>
-  <p class="subtitle">Comparison of Hermes Agent vs. openclaw-honcho — and a porting spec for bringing Hermes patterns into other Honcho integrations.</p>
-  <div class="meta">
-    <span>hermes-agent / openclaw-honcho</span>
-    <span>Python + TypeScript</span>
-    <span>2026-03-09</span>
-  </div>
-</header>
-
-<nav class="toc">
-  <h2>Contents</h2>
-  <ol>
-    <li><a href="#overview">Overview</a></li>
-    <li><a href="#architecture">Architecture comparison</a></li>
-    <li><a href="#diff-table">Diff table</a></li>
-    <li><a href="#patterns">Hermes patterns to port</a></li>
-    <li><a href="#spec-async">Spec: async prefetch</a></li>
-    <li><a href="#spec-reasoning">Spec: dynamic reasoning level</a></li>
-    <li><a href="#spec-modes">Spec: per-peer memory modes</a></li>
-    <li><a href="#spec-identity">Spec: AI peer identity formation</a></li>
-    <li><a href="#spec-sessions">Spec: session naming strategies</a></li>
-    <li><a href="#spec-cli">Spec: CLI surface injection</a></li>
-    <li><a href="#openclaw-checklist">openclaw-honcho checklist</a></li>
-    <li><a href="#nanobot-checklist">nanobot-honcho checklist</a></li>
-  </ol>
-</nav>
-
-<!-- OVERVIEW -->
-<section id="overview">
-  <h2>Overview</h2>
-
-  <p>Two independent Honcho integrations have been built for two different agent runtimes: <strong>Hermes Agent</strong> (Python, baked into the runner) and <strong>openclaw-honcho</strong> (TypeScript plugin via hook/tool API). Both use the same Honcho peer paradigm — dual peer model, <code>session.context()</code>, <code>peer.chat()</code> — but they made different tradeoffs at every layer.</p>
-
-  <p>This document maps those tradeoffs and defines a porting spec: a set of Hermes-originated patterns, each stated as an integration-agnostic interface, that any Honcho integration can adopt regardless of runtime or language.</p>
-
-  <div class="callout">
-    <strong>Scope</strong> Both integrations work correctly today. This spec is about the delta — patterns in Hermes that are worth propagating and patterns in openclaw-honcho that Hermes should eventually adopt. The spec is additive, not prescriptive.
-  </div>
-</section>
-
-<!-- ARCHITECTURE -->
-<section id="architecture">
-  <h2>Architecture comparison</h2>
-
-  <h3>Hermes: baked-in runner</h3>
-  <p>Honcho is initialised directly inside <code>AIAgent.__init__</code>. There is no plugin boundary. Session management, context injection, async prefetch, and CLI surface are all first-class concerns of the runner. Context is injected once per session (baked into <code>_cached_system_prompt</code>) and never re-fetched mid-session — this maximises prefix cache hits at the LLM provider.</p>
-
-  <div class="mermaid">
-%%{init: {'theme': 'dark', 'themeVariables': { 'primaryColor': '#1f3150', 'primaryTextColor': '#c9d1d9', 'primaryBorderColor': '#3d6ea5', 'lineColor': '#3d6ea5', 'secondaryColor': '#162030', 'tertiaryColor': '#11151c' }}}%%
-flowchart TD
-    U["user message"] --> P["_honcho_prefetch()<br/>(reads cache — no HTTP)"]
-    P --> SP["_build_system_prompt()<br/>(first turn only, cached)"]
-    SP --> LLM["LLM call"]
-    LLM --> R["response"]
-    R --> FP["_honcho_fire_prefetch()<br/>(daemon threads, turn end)"]
-    FP --> C1["prefetch_context() thread"]
-    FP --> C2["prefetch_dialectic() thread"]
-    C1 --> CACHE["_context_cache / _dialectic_cache"]
-    C2 --> CACHE
-
-    style U fill:#162030,stroke:#3d6ea5,color:#c9d1d9
-    style P fill:#1f3150,stroke:#3d6ea5,color:#c9d1d9
-    style SP fill:#1f3150,stroke:#3d6ea5,color:#c9d1d9
-    style LLM fill:#162030,stroke:#3d6ea5,color:#c9d1d9
-    style R fill:#162030,stroke:#3d6ea5,color:#c9d1d9
-    style FP fill:#2a1a40,stroke:#bc8cff,color:#c9d1d9
-    style C1 fill:#2a1a40,stroke:#bc8cff,color:#c9d1d9
-    style C2 fill:#2a1a40,stroke:#bc8cff,color:#c9d1d9
-    style CACHE fill:#11151c,stroke:#484f58,color:#6e7681
-  </div>
-
-  <h3>openclaw-honcho: hook-based plugin</h3>
-  <p>The plugin registers hooks against OpenClaw's event bus. Context is fetched synchronously inside <code>before_prompt_build</code> on every turn. Message capture happens in <code>agent_end</code>. The multi-agent hierarchy is tracked via <code>subagent_spawned</code>. This model is correct but every turn pays a blocking Honcho round-trip before the LLM call can begin.</p>
-
-  <div class="mermaid">
-%%{init: {'theme': 'dark', 'themeVariables': { 'primaryColor': '#1f3150', 'primaryTextColor': '#c9d1d9', 'primaryBorderColor': '#3d6ea5', 'lineColor': '#3d6ea5', 'secondaryColor': '#162030', 'tertiaryColor': '#11151c' }}}%%
-flowchart TD
-    U2["user message"] --> BPB["before_prompt_build<br/>(BLOCKING HTTP — every turn)"]
-    BPB --> CTX["session.context()"]
-    CTX --> SP2["system prompt assembled"]
-    SP2 --> LLM2["LLM call"]
-    LLM2 --> R2["response"]
-    R2 --> AE["agent_end hook"]
-    AE --> SAVE["session.addMessages()<br/>session.setMetadata()"]
-
-    style U2 fill:#162030,stroke:#3d6ea5,color:#c9d1d9
-    style BPB fill:#3a1515,stroke:#f47067,color:#c9d1d9
-    style CTX fill:#3a1515,stroke:#f47067,color:#c9d1d9
-    style SP2 fill:#1f3150,stroke:#3d6ea5,color:#c9d1d9
-    style LLM2 fill:#162030,stroke:#3d6ea5,color:#c9d1d9
-    style R2 fill:#162030,stroke:#3d6ea5,color:#c9d1d9
-    style AE fill:#162030,stroke:#3d6ea5,color:#c9d1d9
-    style SAVE fill:#11151c,stroke:#484f58,color:#6e7681
-  </div>
-</section>
-
-<!-- DIFF TABLE -->
-<section id="diff-table">
-  <h2>Diff table</h2>
-
-  <div class="table-wrap">
-    <table>
-      <thead>
-        <tr>
-          <th>Dimension</th>
-          <th>Hermes Agent</th>
-          <th>openclaw-honcho</th>
-        </tr>
-      </thead>
-      <tbody>
-        <tr>
-          <td><strong>Context injection timing</strong></td>
-          <td>Once per session (cached). Zero HTTP on response path after turn 1.</td>
-          <td>Every turn, blocking. Fresh context per turn but adds latency.</td>
-        </tr>
-        <tr>
-          <td><strong>Prefetch strategy</strong></td>
-          <td>Daemon threads fire at turn end; consumed next turn from cache.</td>
-          <td>None. Blocking call at prompt-build time.</td>
-        </tr>
-        <tr>
-          <td><strong>Dialectic (peer.chat)</strong></td>
-          <td>Prefetched async; result injected into system prompt next turn.</td>
-          <td>On-demand via <code>honcho_recall</code> / <code>honcho_analyze</code> tools.</td>
-        </tr>
-        <tr>
-          <td><strong>Reasoning level</strong></td>
-          <td>Dynamic: scales with message length. Floor = config default. Cap = "high".</td>
-          <td>Fixed per tool: recall=minimal, analyze=medium.</td>
-        </tr>
-        <tr>
-          <td><strong>Memory modes</strong></td>
-          <td><code>user_memory_mode</code> / <code>agent_memory_mode</code>: hybrid / honcho / local.</td>
-          <td>None. Always writes to Honcho.</td>
-        </tr>
-        <tr>
-          <td><strong>Write frequency</strong></td>
-          <td>async (background queue), turn, session, N turns.</td>
-          <td>After every agent_end (no control).</td>
-        </tr>
-        <tr>
-          <td><strong>AI peer identity</strong></td>
-          <td><code>observe_me=True</code>, <code>seed_ai_identity()</code>, <code>get_ai_representation()</code>, SOUL.md → AI peer.</td>
-          <td>Agent files uploaded to agent peer at setup. No ongoing self-observation seeding.</td>
-        </tr>
-        <tr>
-          <td><strong>Context scope</strong></td>
-          <td>User peer + AI peer representation, both injected.</td>
-          <td>User peer (owner) representation + conversation summary. <code>peerPerspective</code> on context call.</td>
-        </tr>
-        <tr>
-          <td><strong>Session naming</strong></td>
-          <td>per-directory / global / manual map / title-based.</td>
-          <td>Derived from platform session key.</td>
-        </tr>
-        <tr>
-          <td><strong>Multi-agent</strong></td>
-          <td>Single-agent only.</td>
-          <td>Parent observer hierarchy via <code>subagent_spawned</code>.</td>
-        </tr>
-        <tr>
-          <td><strong>Tool surface</strong></td>
-          <td>Single <code>query_user_context</code> tool (on-demand dialectic).</td>
-          <td>6 tools: session, profile, search, context (fast) + recall, analyze (LLM).</td>
-        </tr>
-        <tr>
-          <td><strong>Platform metadata</strong></td>
-          <td>Not stripped.</td>
-          <td>Explicitly stripped before Honcho storage.</td>
-        </tr>
-        <tr>
-          <td><strong>Message dedup</strong></td>
-          <td>None (sends on every save cycle).</td>
-          <td><code>lastSavedIndex</code> in session metadata prevents re-sending.</td>
-        </tr>
-        <tr>
-          <td><strong>CLI surface in prompt</strong></td>
-          <td>Management commands injected into system prompt. Agent knows its own CLI.</td>
-          <td>Not injected.</td>
-        </tr>
-        <tr>
-          <td><strong>AI peer name in identity</strong></td>
-          <td>Replaces "Hermes Agent" in DEFAULT_AGENT_IDENTITY when configured.</td>
-          <td>Not implemented.</td>
-        </tr>
-        <tr>
-          <td><strong>QMD / local file search</strong></td>
-          <td>Not implemented.</td>
-          <td>Passthrough tools when QMD backend configured.</td>
-        </tr>
-        <tr>
-          <td><strong>Workspace metadata</strong></td>
-          <td>Not implemented.</td>
-          <td><code>agentPeerMap</code> in workspace metadata tracks agent&#8594;peer ID.</td>
-        </tr>
-      </tbody>
-    </table>
-  </div>
-</section>
-
-<!-- PATTERNS -->
-<section id="patterns">
-  <h2>Hermes patterns to port</h2>
-
-  <p>Six patterns from Hermes are worth adopting in any Honcho integration. They are described below as integration-agnostic interfaces — the implementation will differ per runtime, but the contract is the same.</p>
-
-  <div class="compare">
-    <div class="compare-card">
-      <h4>Patterns Hermes contributes</h4>
-      <ul>
-        <li>Async prefetch (zero-latency)</li>
-        <li>Dynamic reasoning level</li>
-        <li>Per-peer memory modes</li>
-        <li>AI peer identity formation</li>
-        <li>Session naming strategies</li>
-        <li>CLI surface injection</li>
-      </ul>
-    </div>
-    <div class="compare-card after">
-      <h4>Patterns openclaw contributes back</h4>
-      <ul>
-        <li>lastSavedIndex dedup</li>
-        <li>Platform metadata stripping</li>
-        <li>Multi-agent observer hierarchy</li>
-        <li>peerPerspective on context()</li>
-        <li>Tiered tool surface (fast/LLM)</li>
-        <li>Workspace agentPeerMap</li>
-      </ul>
-    </div>
-  </div>
-</section>
-
-<!-- SPEC: ASYNC PREFETCH -->
-<section id="spec-async">
-  <h2>Spec: async prefetch</h2>
-
-  <h3>Problem</h3>
-  <p>Calling <code>session.context()</code> and <code>peer.chat()</code> synchronously before each LLM call adds 200–800ms of Honcho round-trip latency to every turn. Users experience this as the agent "thinking slowly."</p>
-
-  <h3>Pattern</h3>
-  <p>Fire both calls as non-blocking background work at the <strong>end</strong> of each turn. Store results in a per-session cache keyed by session ID. At the <strong>start</strong> of the next turn, pop from cache — the HTTP is already done. First turn is cold (empty cache); all subsequent turns are zero-latency on the response path.</p>
-
-  <h3>Interface contract</h3>
-  <pre><code><span class="cm">// TypeScript (openclaw / nanobot plugin shape)</span>
-
-<span class="kw">interface</span> <span class="key">AsyncPrefetch</span> {
-  <span class="cm">// Fire context + dialectic fetches at turn end. Non-blocking.</span>
-  firePrefetch(sessionId: <span class="str">string</span>, userMessage: <span class="str">string</span>): <span class="kw">void</span>;
-
-  <span class="cm">// Pop cached results at turn start. Returns empty if cache is cold.</span>
-  popContextResult(sessionId: <span class="str">string</span>): ContextResult | <span class="kw">null</span>;
-  popDialecticResult(sessionId: <span class="str">string</span>): <span class="str">string</span> | <span class="kw">null</span>;
-}
-
-<span class="kw">type</span> <span class="key">ContextResult</span> = {
-  representation: <span class="str">string</span>;
-  card: <span class="str">string</span>[];
-  aiRepresentation?: <span class="str">string</span>;  <span class="cm">// AI peer context if enabled</span>
-  summary?: <span class="str">string</span>;            <span class="cm">// conversation summary if fetched</span>
-};</code></pre>
-
-  <h3>Implementation notes</h3>
-  <ul>
-    <li>Python: <code>threading.Thread(daemon=True)</code>. Write to <code>dict[session_id, result]</code> — GIL makes this safe for simple writes.</li>
-    <li>TypeScript: <code>Promise</code> stored in <code>Map&lt;string, Promise&lt;ContextResult&gt;&gt;</code>. Await at pop time. If not resolved yet, skip (return null) — do not block.</li>
-    <li>The pop is destructive: clears the cache entry after reading so stale data never accumulates.</li>
-    <li>Prefetch should also fire on first turn (even though it won't be consumed until turn 2) — this ensures turn 2 is never cold.</li>
-  </ul>
-
-  <h3>openclaw-honcho adoption</h3>
-  <p>Move <code>session.context()</code> from <code>before_prompt_build</code> to a post-<code>agent_end</code> background task. Store result in <code>state.contextCache</code>. In <code>before_prompt_build</code>, read from cache instead of calling Honcho. If cache is empty (turn 1), inject nothing — the prompt is still valid without Honcho context on the first turn.</p>
-</section>
-
-<!-- SPEC: DYNAMIC REASONING LEVEL -->
-<section id="spec-reasoning">
-  <h2>Spec: dynamic reasoning level</h2>
-
-  <h3>Problem</h3>
-  <p>Honcho's dialectic endpoint supports reasoning levels from <code>minimal</code> to <code>max</code>. A fixed level per tool wastes budget on simple queries and under-serves complex ones.</p>
-
-  <h3>Pattern</h3>
-  <p>Select the reasoning level dynamically based on the user's message. Use the configured default as a floor. Bump by message length. Cap auto-selection at <code>high</code> — never select <code>max</code> automatically.</p>
-
-  <h3>Interface contract</h3>
-  <pre><code><span class="cm">// Shared helper — identical logic in any language</span>
-
-<span class="kw">const</span> LEVELS = [<span class="str">"minimal"</span>, <span class="str">"low"</span>, <span class="str">"medium"</span>, <span class="str">"high"</span>, <span class="str">"max"</span>];
-
-<span class="kw">function</span> <span class="key">dynamicReasoningLevel</span>(
-  query: <span class="str">string</span>,
-  configDefault: <span class="str">string</span> = <span class="str">"low"</span>
-): <span class="str">string</span> {
-  <span class="kw">const</span> baseIdx = Math.max(<span class="num">0</span>, LEVELS.indexOf(configDefault));
-  <span class="kw">const</span> n = query.length;
-  <span class="kw">const</span> bump = n &lt; <span class="num">120</span> ? <span class="num">0</span> : n &lt; <span class="num">400</span> ? <span class="num">1</span> : <span class="num">2</span>;
-  <span class="kw">return</span> LEVELS[Math.min(baseIdx + bump, <span class="num">3</span>)]; <span class="cm">// cap at "high" (idx 3)</span>
-}</code></pre>
-
-  <h3>Config key</h3>
-  <p>Add a <code>dialecticReasoningLevel</code> config field (string, default <code>"low"</code>). This sets the floor. Users can raise or lower it. The dynamic bump always applies on top.</p>
-
-  <h3>openclaw-honcho adoption</h3>
-  <p>Apply in <code>honcho_recall</code> and <code>honcho_analyze</code>: replace the fixed <code>reasoningLevel</code> with the dynamic selector. <code>honcho_recall</code> should use floor <code>"minimal"</code> and <code>honcho_analyze</code> floor <code>"medium"</code> — both still bump with message length.</p>
-</section>
-
-<!-- SPEC: PER-PEER MEMORY MODES -->
-<section id="spec-modes">
-  <h2>Spec: per-peer memory modes</h2>
-
-  <h3>Problem</h3>
-  <p>Users want independent control over whether user context and agent context are written locally, to Honcho, or both. A single <code>memoryMode</code> shorthand is not granular enough.</p>
-
-  <h3>Pattern</h3>
-  <p>Three modes per peer: <code>hybrid</code> (write both local + Honcho), <code>honcho</code> (Honcho only, disable local files), <code>local</code> (local files only, skip Honcho sync for this peer). Two orthogonal axes: user peer and agent peer.</p>
-
-  <h3>Config schema</h3>
-  <pre><code><span class="cm">// ~/.openclaw/openclaw.json  (or ~/.nanobot/config.json)</span>
-{
-  <span class="str">"plugins"</span>: {
-    <span class="str">"openclaw-honcho"</span>: {
-      <span class="str">"config"</span>: {
-        <span class="str">"apiKey"</span>: <span class="str">"..."</span>,
-        <span class="str">"memoryMode"</span>: <span class="str">"hybrid"</span>,          <span class="cm">// shorthand: both peers</span>
-        <span class="str">"userMemoryMode"</span>: <span class="str">"honcho"</span>,       <span class="cm">// override for user peer</span>
-        <span class="str">"agentMemoryMode"</span>: <span class="str">"hybrid"</span>       <span class="cm">// override for agent peer</span>
-      }
-    }
-  }
-}</code></pre>
-
-  <h3>Resolution order</h3>
-  <ol>
-    <li>Per-peer field (<code>userMemoryMode</code> / <code>agentMemoryMode</code>) — wins if present.</li>
-    <li>Shorthand <code>memoryMode</code> — applies to both peers as default.</li>
-    <li>Hardcoded default: <code>"hybrid"</code>.</li>
-  </ol>
-
-  <h3>Effect on Honcho sync</h3>
-  <ul>
-    <li><code>userMemoryMode=local</code>: skip adding user peer messages to Honcho.</li>
-    <li><code>agentMemoryMode=local</code>: skip adding assistant peer messages to Honcho.</li>
-    <li>Both local: skip <code>session.addMessages()</code> entirely.</li>
-    <li><code>userMemoryMode=honcho</code>: disable local USER.md writes.</li>
-    <li><code>agentMemoryMode=honcho</code>: disable local MEMORY.md / SOUL.md writes.</li>
-  </ul>
-</section>
-
-<!-- SPEC: AI PEER IDENTITY -->
-<section id="spec-identity">
-  <h2>Spec: AI peer identity formation</h2>
-
-  <h3>Problem</h3>
-  <p>Honcho builds the user's representation organically by observing what the user says. The same mechanism exists for the AI peer — but only if <code>observe_me=True</code> is set for the agent peer. Without it, the agent peer accumulates nothing and Honcho's AI-side model never forms.</p>
-
-  <p>Additionally, existing persona files (SOUL.md, IDENTITY.md) should seed the AI peer's Honcho representation at first activation, rather than waiting for it to emerge from scratch.</p>
-
-  <h3>Part A: observe_me=True for agent peer</h3>
-  <pre><code><span class="cm">// TypeScript — in session.addPeers() call</span>
-<span class="kw">await</span> session.addPeers([
-  [ownerPeer.id, { observeMe: <span class="kw">true</span>,  observeOthers: <span class="kw">false</span> }],
-  [agentPeer.id, { observeMe: <span class="kw">true</span>,  observeOthers: <span class="kw">true</span>  }], <span class="cm">// was false</span>
-]);</code></pre>
-
-  <p>This is a one-line change but foundational. Without it, Honcho's AI peer representation stays empty regardless of what the agent says.</p>
-
-  <h3>Part B: seedAiIdentity()</h3>
-  <pre><code><span class="kw">async function</span> <span class="key">seedAiIdentity</span>(
-  session: HonchoSession,
-  agentPeer: Peer,
-  content: <span class="str">string</span>,
-  source: <span class="str">string</span>
-): Promise&lt;<span class="kw">boolean</span>&gt; {
-  <span class="kw">const</span> wrapped = [
-    <span class="str">`&lt;ai_identity_seed&gt;`</span>,
-    <span class="str">`&lt;source&gt;${source}&lt;/source&gt;`</span>,
-    <span class="str">``</span>,
-    content.trim(),
-    <span class="str">`&lt;/ai_identity_seed&gt;`</span>,
-  ].join(<span class="str">"\n"</span>);
-
-  <span class="kw">await</span> agentPeer.addMessage(<span class="str">"assistant"</span>, wrapped);
-  <span class="kw">return true</span>;
-}</code></pre>
-
-  <h3>Part C: migrate agent files at setup</h3>
-  <p>During <code>openclaw honcho setup</code>, upload agent-self files (SOUL.md, IDENTITY.md, AGENTS.md, BOOTSTRAP.md) to the agent peer using <code>seedAiIdentity()</code> instead of <code>session.uploadFile()</code>. This routes the content through Honcho's observation pipeline rather than the file store.</p>
-
-  <h3>Part D: AI peer name in identity</h3>
-  <p>When the agent has a configured name (non-default), inject it into the agent's self-identity prefix. In OpenClaw this means adding to the injected system prompt section:</p>
-  <pre><code><span class="cm">// In context hook return value</span>
-<span class="kw">return</span> {
-  systemPrompt: [
-    agentName ? <span class="str">`You are ${agentName}.`</span> : <span class="str">""</span>,
-    <span class="str">"## User Memory Context"</span>,
-    ...sections,
-  ].filter(Boolean).join(<span class="str">"\n\n"</span>)
-};</code></pre>
-
-  <h3>CLI surface: honcho identity subcommand</h3>
-  <pre><code>openclaw honcho identity &lt;file&gt;    <span class="cm"># seed from file</span>
-openclaw honcho identity --show    <span class="cm"># show current AI peer representation</span></code></pre>
-</section>
-
-<!-- SPEC: SESSION NAMING -->
-<section id="spec-sessions">
-  <h2>Spec: session naming strategies</h2>
-
-  <h3>Problem</h3>
-  <p>When Honcho is used across multiple projects or directories, a single global session means every project shares the same context. Per-directory sessions provide isolation without requiring users to name sessions manually.</p>
-
-  <h3>Strategies</h3>
-  <div class="table-wrap">
-    <table>
-      <thead><tr><th>Strategy</th><th>Session key</th><th>When to use</th></tr></thead>
-      <tbody>
-        <tr><td><code>per-directory</code></td><td>basename of CWD</td><td>Default. Each project gets its own session.</td></tr>
-        <tr><td><code>global</code></td><td>fixed string <code>"global"</code></td><td>Single cross-project session.</td></tr>
-        <tr><td>manual map</td><td>user-configured per path</td><td><code>sessions</code> config map overrides directory basename.</td></tr>
-        <tr><td>title-based</td><td>sanitized session title</td><td>When agent supports named sessions; title set mid-conversation.</td></tr>
-      </tbody>
-    </table>
-  </div>
-
-  <h3>Config schema</h3>
-  <pre><code>{
-  <span class="str">"sessionStrategy"</span>: <span class="str">"per-directory"</span>,   <span class="cm">// "per-directory" | "global"</span>
-  <span class="str">"sessionPeerPrefix"</span>: <span class="kw">false</span>,            <span class="cm">// prepend peer name to session key</span>
-  <span class="str">"sessions"</span>: {                            <span class="cm">// manual overrides</span>
-    <span class="str">"/home/user/projects/foo"</span>: <span class="str">"foo-project"</span>
-  }
-}</code></pre>
-
-  <h3>CLI surface</h3>
-  <pre><code>openclaw honcho sessions              <span class="cm"># list all mappings</span>
-openclaw honcho map &lt;name&gt;           <span class="cm"># map cwd to session name</span>
-openclaw honcho map                   <span class="cm"># no-arg = list mappings</span></code></pre>
-
-  <p>Resolution order: manual map wins &rarr; session title &rarr; directory basename &rarr; platform key.</p>
-</section>
-
-<!-- SPEC: CLI SURFACE INJECTION -->
-<section id="spec-cli">
-  <h2>Spec: CLI surface injection</h2>
-
-  <h3>Problem</h3>
-  <p>When a user asks "how do I change my memory settings?" or "what Honcho commands are available?" the agent either hallucinates or says it doesn't know. The agent should know its own management interface.</p>
-
-  <h3>Pattern</h3>
-  <p>When Honcho is active, append a compact command reference to the system prompt. The agent can cite these commands directly instead of guessing.</p>
-
-  <pre><code><span class="cm">// In context hook, append to systemPrompt</span>
-<span class="kw">const</span> honchoSection = [
-  <span class="str">"# Honcho memory integration"</span>,
-  <span class="str">`Active. Session: ${sessionKey}. Mode: ${mode}.`</span>,
-  <span class="str">"Management commands:"</span>,
-  <span class="str">"  openclaw honcho status                    — show config + connection"</span>,
-  <span class="str">"  openclaw honcho mode [hybrid|honcho|local] — show or set memory mode"</span>,
-  <span class="str">"  openclaw honcho sessions                  — list session mappings"</span>,
-  <span class="str">"  openclaw honcho map &lt;name&gt;                — map directory to session"</span>,
-  <span class="str">"  openclaw honcho identity [file] [--show]  — seed or show AI identity"</span>,
-  <span class="str">"  openclaw honcho setup                     — full interactive wizard"</span>,
-].join(<span class="str">"\n"</span>);</code></pre>
-
-  <div class="callout warn">
-    <strong>Keep it compact.</strong> This section is injected every turn. Keep it under 300 chars of context. List commands, not explanations — the agent can explain them on request.
-  </div>
-</section>
-
-<!-- OPENCLAW CHECKLIST -->
-<section id="openclaw-checklist">
-  <h2>openclaw-honcho checklist</h2>
-
-  <p>Ordered by impact. Each item maps to a spec section above.</p>
-
-  <ul class="checklist">
-    <li class="todo"><strong>Async prefetch</strong> — move <code>session.context()</code> out of <code>before_prompt_build</code> into post-<code>agent_end</code> background Promise. Pop from cache at prompt build. (<a href="#spec-async">spec</a>)</li>
-    <li class="todo"><strong>observe_me=True for agent peer</strong> — one-line change in <code>session.addPeers()</code> config for agent peer. (<a href="#spec-identity">spec</a>)</li>
-    <li class="todo"><strong>Dynamic reasoning level</strong> — add <code>dynamicReasoningLevel()</code> helper; apply in <code>honcho_recall</code> and <code>honcho_analyze</code>. Add <code>dialecticReasoningLevel</code> to config schema. (<a href="#spec-reasoning">spec</a>)</li>
-    <li class="todo"><strong>Per-peer memory modes</strong> — add <code>userMemoryMode</code> / <code>agentMemoryMode</code> to config; gate Honcho sync and local writes accordingly. (<a href="#spec-modes">spec</a>)</li>
-    <li class="todo"><strong>seedAiIdentity()</strong> — add helper; apply during setup migration for SOUL.md / IDENTITY.md instead of <code>session.uploadFile()</code>. (<a href="#spec-identity">spec</a>)</li>
-    <li class="todo"><strong>Session naming strategies</strong> — add <code>sessionStrategy</code>, <code>sessions</code> map, <code>sessionPeerPrefix</code> to config; implement resolution function. (<a href="#spec-sessions">spec</a>)</li>
-    <li class="todo"><strong>CLI surface injection</strong> — append command reference to <code>before_prompt_build</code> return value when Honcho is active. (<a href="#spec-cli">spec</a>)</li>
-    <li class="todo"><strong>honcho identity subcommand</strong> — add <code>openclaw honcho identity</code> CLI command. (<a href="#spec-identity">spec</a>)</li>
-    <li class="todo"><strong>AI peer name injection</strong> — if <code>aiPeer</code> name configured, prepend to injected system prompt. (<a href="#spec-identity">spec</a>)</li>
-    <li class="todo"><strong>honcho mode / honcho sessions / honcho map</strong> — CLI parity with Hermes. (<a href="#spec-sessions">spec</a>)</li>
-  </ul>
-
-  <div class="callout success">
-    <strong>Already done in openclaw-honcho (do not re-implement):</strong> lastSavedIndex dedup, platform metadata stripping, multi-agent parent observer hierarchy, peerPerspective on context(), tiered tool surface (fast/LLM), workspace agentPeerMap, QMD passthrough, self-hosted Honcho support.
-  </div>
-</section>
-
-<!-- NANOBOT CHECKLIST -->
-<section id="nanobot-checklist">
-  <h2>nanobot-honcho checklist</h2>
-
-  <p>nanobot-honcho is a greenfield integration. Start from openclaw-honcho's architecture (hook-based, dual peer) and apply all Hermes patterns from day one rather than retrofitting. Priority order:</p>
-
-  <h3>Phase 1 — core correctness</h3>
-  <ul class="checklist">
-    <li class="todo">Dual peer model (owner + agent peer), both with <code>observe_me=True</code></li>
-    <li class="todo">Message capture at turn end with <code>lastSavedIndex</code> dedup</li>
-    <li class="todo">Platform metadata stripping before Honcho storage</li>
-    <li class="todo">Async prefetch from day one — do not implement blocking context injection</li>
-    <li class="todo">Legacy file migration at first activation (USER.md → owner peer, SOUL.md → <code>seedAiIdentity()</code>)</li>
-  </ul>
-
-  <h3>Phase 2 — configuration</h3>
-  <ul class="checklist">
-    <li class="todo">Config schema: <code>apiKey</code>, <code>workspaceId</code>, <code>baseUrl</code>, <code>memoryMode</code>, <code>userMemoryMode</code>, <code>agentMemoryMode</code>, <code>dialecticReasoningLevel</code>, <code>sessionStrategy</code>, <code>sessions</code></li>
-    <li class="todo">Per-peer memory mode gating</li>
-    <li class="todo">Dynamic reasoning level</li>
-    <li class="todo">Session naming strategies</li>
-  </ul>
-
-  <h3>Phase 3 — tools and CLI</h3>
-  <ul class="checklist">
-    <li class="todo">Tool surface: <code>honcho_profile</code>, <code>honcho_recall</code>, <code>honcho_analyze</code>, <code>honcho_search</code>, <code>honcho_context</code></li>
-    <li class="todo">CLI: <code>setup</code>, <code>status</code>, <code>sessions</code>, <code>map</code>, <code>mode</code>, <code>identity</code></li>
-    <li class="todo">CLI surface injection into system prompt</li>
-    <li class="todo">AI peer name wired into agent identity</li>
-  </ul>
-</section>
-
-</div>
-
-<script type="module">
-  import mermaid from 'https://cdn.jsdelivr.net/npm/mermaid@11/dist/mermaid.esm.min.mjs';
-  mermaid.initialize({ startOnLoad: true, securityLevel: 'loose', fontFamily: 'Departure Mono, Noto Emoji, monospace' });
-</script>
-<script>
-  window.addEventListener('scroll', () => {
-    const bar = document.getElementById('progress');
-    const max = document.documentElement.scrollHeight - window.innerHeight;
-    bar.style.width = (max > 0 ? (window.scrollY / max) * 100 : 0) + '%';
-  });
-</script>
-</body>
-</html>
@@ -1,377 +0,0 @@
-# honcho-integration-spec
-
-Comparison of Hermes Agent vs. openclaw-honcho — and a porting spec for bringing Hermes patterns into other Honcho integrations.
-
---
-
-## Overview
-
-Two independent Honcho integrations have been built for two different agent runtimes: **Hermes Agent** (Python, baked into the runner) and **openclaw-honcho** (TypeScript plugin via hook/tool API). Both use the same Honcho peer paradigm — dual peer model, `session.context()`, `peer.chat()` — but they made different tradeoffs at every layer.
-
-This document maps those tradeoffs and defines a porting spec: a set of Hermes-originated patterns, each stated as an integration-agnostic interface, that any Honcho integration can adopt regardless of runtime or language.
-
-> **Scope** Both integrations work correctly today. This spec is about the delta — patterns in Hermes that are worth propagating and patterns in openclaw-honcho that Hermes should eventually adopt. The spec is additive, not prescriptive.
-
---
-
-## Architecture comparison
-
-### Hermes: baked-in runner
-
-Honcho is initialised directly inside `AIAgent.__init__`. There is no plugin boundary. Session management, context injection, async prefetch, and CLI surface are all first-class concerns of the runner. Context is injected once per session (baked into `_cached_system_prompt`) and never re-fetched mid-session — this maximises prefix cache hits at the LLM provider.
-
-Turn flow:
-
-```
-user message
-  → _honcho_prefetch()       (reads cache — no HTTP)
-  → _build_system_prompt()   (first turn only, cached)
-  → LLM call
-  → response
-  → _honcho_fire_prefetch()  (daemon threads, turn end)
-       → prefetch_context() thread  ──┐
-       → prefetch_dialectic() thread ─┴→ _context_cache / _dialectic_cache
-```
-
-### openclaw-honcho: hook-based plugin
-
-The plugin registers hooks against OpenClaw's event bus. Context is fetched synchronously inside `before_prompt_build` on every turn. Message capture happens in `agent_end`. The multi-agent hierarchy is tracked via `subagent_spawned`. This model is correct but every turn pays a blocking Honcho round-trip before the LLM call can begin.
-
-Turn flow:
-
-```
-user message
-  → before_prompt_build (BLOCKING HTTP — every turn)
-       → session.context()
-  → system prompt assembled
-  → LLM call
-  → response
-  → agent_end hook
-       → session.addMessages()
-       → session.setMetadata()
-```
-
---
-
-## Diff table
-
-| Dimension | Hermes Agent | openclaw-honcho |
-|---|---|---|
-| **Context injection timing** | Once per session (cached). Zero HTTP on response path after turn 1. | Every turn, blocking. Fresh context per turn but adds latency. |
-| **Prefetch strategy** | Daemon threads fire at turn end; consumed next turn from cache. | None. Blocking call at prompt-build time. |
-| **Dialectic (peer.chat)** | Prefetched async; result injected into system prompt next turn. | On-demand via `honcho_recall` / `honcho_analyze` tools. |
-| **Reasoning level** | Dynamic: scales with message length. Floor = config default. Cap = "high". | Fixed per tool: recall=minimal, analyze=medium. |
-| **Memory modes** | `user_memory_mode` / `agent_memory_mode`: hybrid / honcho / local. | None. Always writes to Honcho. |
-| **Write frequency** | async (background queue), turn, session, N turns. | After every agent_end (no control). |
-| **AI peer identity** | `observe_me=True`, `seed_ai_identity()`, `get_ai_representation()`, SOUL.md → AI peer. | Agent files uploaded to agent peer at setup. No ongoing self-observation. |
-| **Context scope** | User peer + AI peer representation, both injected. | User peer (owner) representation + conversation summary. `peerPerspective` on context call. |
-| **Session naming** | per-directory / global / manual map / title-based. | Derived from platform session key. |
-| **Multi-agent** | Single-agent only. | Parent observer hierarchy via `subagent_spawned`. |
-| **Tool surface** | Single `query_user_context` tool (on-demand dialectic). | 6 tools: session, profile, search, context (fast) + recall, analyze (LLM). |
-| **Platform metadata** | Not stripped. | Explicitly stripped before Honcho storage. |
-| **Message dedup** | None. | `lastSavedIndex` in session metadata prevents re-sending. |
-| **CLI surface in prompt** | Management commands injected into system prompt. Agent knows its own CLI. | Not injected. |
-| **AI peer name in identity** | Replaces "Hermes Agent" in DEFAULT_AGENT_IDENTITY when configured. | Not implemented. |
-| **QMD / local file search** | Not implemented. | Passthrough tools when QMD backend configured. |
-| **Workspace metadata** | Not implemented. | `agentPeerMap` in workspace metadata tracks agent→peer ID. |
-
---
-
-## Patterns
-
-Six patterns from Hermes are worth adopting in any Honcho integration. Each is described as an integration-agnostic interface.
-
-**Hermes contributes:**
- Async prefetch (zero-latency)
- Dynamic reasoning level
- Per-peer memory modes
- AI peer identity formation
- Session naming strategies
- CLI surface injection
-
-**openclaw-honcho contributes back (Hermes should adopt):**
- `lastSavedIndex` dedup
- Platform metadata stripping
- Multi-agent observer hierarchy
- `peerPerspective` on `context()`
- Tiered tool surface (fast/LLM)
- Workspace `agentPeerMap`
-
---
-
-## Spec: async prefetch
-
-### Problem
-
-Calling `session.context()` and `peer.chat()` synchronously before each LLM call adds 200–800ms of Honcho round-trip latency to every turn.
-
-### Pattern
-
-Fire both calls as non-blocking background work at the **end** of each turn. Store results in a per-session cache keyed by session ID. At the **start** of the next turn, pop from cache — the HTTP is already done. First turn is cold (empty cache); all subsequent turns are zero-latency on the response path.
-
-### Interface contract
-
-```typescript
-interface AsyncPrefetch {
-  // Fire context + dialectic fetches at turn end. Non-blocking.
-  firePrefetch(sessionId: string, userMessage: string): void;
-
-  // Pop cached results at turn start. Returns empty if cache is cold.
-  popContextResult(sessionId: string): ContextResult | null;
-  popDialecticResult(sessionId: string): string | null;
-}
-
-type ContextResult = {
-  representation: string;
-  card: string[];
-  aiRepresentation?: string;  // AI peer context if enabled
-  summary?: string;           // conversation summary if fetched
-};
-```
-
-### Implementation notes
-
- **Python:** `threading.Thread(daemon=True)`. Write to `dict[session_id, result]` — GIL makes this safe for simple writes.
- **TypeScript:** `Promise` stored in `Map<string, Promise<ContextResult>>`. Await at pop time. If not resolved yet, return null — do not block.
- The pop is destructive: clears the cache entry after reading so stale data never accumulates.
- Prefetch should also fire on first turn (even though it won't be consumed until turn 2).
-
-### openclaw-honcho adoption
-
-Move `session.context()` from `before_prompt_build` to a post-`agent_end` background task. Store result in `state.contextCache`. In `before_prompt_build`, read from cache instead of calling Honcho. If cache is empty (turn 1), inject nothing — the prompt is still valid without Honcho context on the first turn.
-
---
-
-## Spec: dynamic reasoning level
-
-### Problem
-
-Honcho's dialectic endpoint supports reasoning levels from `minimal` to `max`. A fixed level per tool wastes budget on simple queries and under-serves complex ones.
-
-### Pattern
-
-Select the reasoning level dynamically based on the user's message. Use the configured default as a floor. Bump by message length. Cap auto-selection at `high` — never select `max` automatically.
-
-### Logic
-
-```
-< 120 chars  → default (typically "low")
-120–400 chars → one level above default (cap at "high")
-> 400 chars  → two levels above default (cap at "high")
-```
-
-### Config key
-
-Add `dialecticReasoningLevel` (string, default `"low"`). This sets the floor. The dynamic bump always applies on top.
-
-### openclaw-honcho adoption
-
-Apply in `honcho_recall` and `honcho_analyze`: replace fixed `reasoningLevel` with the dynamic selector. `honcho_recall` uses floor `"minimal"`, `honcho_analyze` uses floor `"medium"` — both still bump with message length.
-
---
-
-## Spec: per-peer memory modes
-
-### Problem
-
-Users want independent control over whether user context and agent context are written locally, to Honcho, or both.
-
-### Modes
-
-| Mode | Effect |
-|---|---|
-| `hybrid` | Write to both local files and Honcho (default) |
-| `honcho` | Honcho only — disable corresponding local file writes |
-| `local` | Local files only — skip Honcho sync for this peer |
-
-### Config schema
-
-```json
-{
-  "memoryMode": "hybrid",
-  "userMemoryMode": "honcho",
-  "agentMemoryMode": "hybrid"
-}
-```
-
-Resolution order: per-peer field wins → shorthand `memoryMode` → default `"hybrid"`.
-
-### Effect on Honcho sync
-
- `userMemoryMode=local`: skip adding user peer messages to Honcho
- `agentMemoryMode=local`: skip adding assistant peer messages to Honcho
- Both local: skip `session.addMessages()` entirely
- `userMemoryMode=honcho`: disable local USER.md writes
- `agentMemoryMode=honcho`: disable local MEMORY.md / SOUL.md writes
-
---
-
-## Spec: AI peer identity formation
-
-### Problem
-
-Honcho builds the user's representation organically by observing what the user says. The same mechanism exists for the AI peer — but only if `observe_me=True` is set for the agent peer. Without it, the agent peer accumulates nothing.
-
-Additionally, existing persona files (SOUL.md, IDENTITY.md) should seed the AI peer's Honcho representation at first activation.
-
-### Part A: observe_me=True for agent peer
-
-```typescript
-await session.addPeers([
-  [ownerPeer.id, { observeMe: true,  observeOthers: false }],
-  [agentPeer.id, { observeMe: true,  observeOthers: true  }], // was false
-]);
-```
-
-One-line change. Foundational. Without it, the AI peer representation stays empty regardless of what the agent says.
-
-### Part B: seedAiIdentity()
-
-```typescript
-async function seedAiIdentity(
-  agentPeer: Peer,
-  content: string,
-  source: string
-): Promise<boolean> {
-  const wrapped = [
-    `<ai_identity_seed>`,
-    `<source>${source}</source>`,
-    ``,
-    content.trim(),
-    `</ai_identity_seed>`,
-  ].join("\n");
-
-  await agentPeer.addMessage("assistant", wrapped);
-  return true;
-}
-```
-
-### Part C: migrate agent files at setup
-
-During `honcho setup`, upload agent-self files (SOUL.md, IDENTITY.md, AGENTS.md) to the agent peer via `seedAiIdentity()` instead of `session.uploadFile()`. This routes content through Honcho's observation pipeline.
-
-### Part D: AI peer name in identity
-
-When the agent has a configured name, prepend it to the injected system prompt:
-
-```typescript
-const namePrefix = agentName ? `You are ${agentName}.\n\n` : "";
-return { systemPrompt: namePrefix + "## User Memory Context\n\n" + sections };
-```
-
-### CLI surface
-
-```
-honcho identity <file>    # seed from file
-honcho identity --show    # show current AI peer representation
-```
-
---
-
-## Spec: session naming strategies
-
-### Problem
-
-A single global session means every project shares the same Honcho context. Per-directory sessions provide isolation without requiring users to name sessions manually.
-
-### Strategies
-
-| Strategy | Session key | When to use |
-|---|---|---|
-| `per-directory` | basename of CWD | Default. Each project gets its own session. |
-| `global` | fixed string `"global"` | Single cross-project session. |
-| manual map | user-configured per path | `sessions` config map overrides directory basename. |
-| title-based | sanitized session title | When agent supports named sessions set mid-conversation. |
-
-### Config schema
-
-```json
-{
-  "sessionStrategy": "per-directory",
-  "sessionPeerPrefix": false,
-  "sessions": {
-    "/home/user/projects/foo": "foo-project"
-  }
-}
-```
-
-### CLI surface
-
-```
-honcho sessions              # list all mappings
-honcho map <name>            # map cwd to session name
-honcho map                   # no-arg = list mappings
-```
-
-Resolution order: manual map → session title → directory basename → platform key.
-
---
-
-## Spec: CLI surface injection
-
-### Problem
-
-When a user asks "how do I change my memory settings?" the agent either hallucinates or says it doesn't know. The agent should know its own management interface.
-
-### Pattern
-
-When Honcho is active, append a compact command reference to the system prompt. Keep it under 300 chars.
-
-```
-# Honcho memory integration
-Active. Session: {sessionKey}. Mode: {mode}.
-Management commands:
-  honcho status                    — show config + connection
-  honcho mode [hybrid|honcho|local] — show or set memory mode
-  honcho sessions                  — list session mappings
-  honcho map <name>                — map directory to session
-  honcho identity [file] [--show]  — seed or show AI identity
-  honcho setup                     — full interactive wizard
-```
-
---
-
-## openclaw-honcho checklist
-
-Ordered by impact:
-
- [ ] **Async prefetch** — move `session.context()` out of `before_prompt_build` into post-`agent_end` background Promise
- [ ] **observe_me=True for agent peer** — one-line change in `session.addPeers()`
- [ ] **Dynamic reasoning level** — add helper; apply in `honcho_recall` and `honcho_analyze`; add `dialecticReasoningLevel` to config
- [ ] **Per-peer memory modes** — add `userMemoryMode` / `agentMemoryMode` to config; gate Honcho sync and local writes
- [ ] **seedAiIdentity()** — add helper; use during setup migration for SOUL.md / IDENTITY.md
- [ ] **Session naming strategies** — add `sessionStrategy`, `sessions` map, `sessionPeerPrefix`
- [ ] **CLI surface injection** — append command reference to `before_prompt_build` return value
- [ ] **honcho identity subcommand** — seed from file or `--show` current representation
- [ ] **AI peer name injection** — if `aiPeer` name configured, prepend to injected system prompt
- [ ] **honcho mode / sessions / map** — CLI parity with Hermes
-
-Already done in openclaw-honcho (do not re-implement): `lastSavedIndex` dedup, platform metadata stripping, multi-agent parent observer, `peerPerspective` on `context()`, tiered tool surface, workspace `agentPeerMap`, QMD passthrough, self-hosted Honcho.
-
---
-
-## nanobot-honcho checklist
-
-Greenfield integration. Start from openclaw-honcho's architecture and apply all Hermes patterns from day one.
-
-### Phase 1 — core correctness
-
- [ ] Dual peer model (owner + agent peer), both with `observe_me=True`
- [ ] Message capture at turn end with `lastSavedIndex` dedup
- [ ] Platform metadata stripping before Honcho storage
- [ ] Async prefetch from day one — do not implement blocking context injection
- [ ] Legacy file migration at first activation (USER.md → owner peer, SOUL.md → `seedAiIdentity()`)
-
-### Phase 2 — configuration
-
- [ ] Config schema: `apiKey`, `workspaceId`, `baseUrl`, `memoryMode`, `userMemoryMode`, `agentMemoryMode`, `dialecticReasoningLevel`, `sessionStrategy`, `sessions`
- [ ] Per-peer memory mode gating
- [ ] Dynamic reasoning level
- [ ] Session naming strategies
-
-### Phase 3 — tools and CLI
-
- [ ] Tool surface: `honcho_profile`, `honcho_recall`, `honcho_analyze`, `honcho_search`, `honcho_context`
- [ ] CLI: `setup`, `status`, `sessions`, `map`, `mode`, `identity`
- [ ] CLI surface injection into system prompt
- [ ] AI peer name wired into agent identity
@@ -1,142 +0,0 @@
-# Migrating from OpenClaw to Hermes Agent
-
-This guide covers how to import your OpenClaw settings, memories, skills, and API keys into Hermes Agent.
-
-## Three Ways to Migrate
-
-### 1. Automatic (during first-time setup)
-
-When you run `hermes setup` for the first time and Hermes detects `~/.openclaw`, it automatically offers to import your OpenClaw data before configuration begins. Just accept the prompt and everything is handled for you.
-
-### 2. CLI Command (quick, scriptable)
-
-```bash
-hermes claw migrate                      # Preview then migrate (always shows preview first)
-hermes claw migrate --dry-run            # Preview only, no changes
-hermes claw migrate --preset user-data   # Migrate without API keys/secrets
-hermes claw migrate --yes                # Skip confirmation prompt
-```
-
-The migration always shows a full preview of what will be imported before making any changes. You review the preview and confirm before anything is written.
-
-**All options:**
-
-| Flag | Description |
-|------|-------------|
-| `--source PATH` | Path to OpenClaw directory (default: `~/.openclaw`) |
-| `--dry-run` | Preview only — no files are modified |
-| `--preset {user-data,full}` | Migration preset (default: `full`). `user-data` excludes secrets |
-| `--overwrite` | Overwrite existing files (default: skip conflicts) |
-| `--migrate-secrets` | Include allowlisted secrets (auto-enabled with `full` preset) |
-| `--workspace-target PATH` | Copy workspace instructions (AGENTS.md) to this absolute path |
-| `--skill-conflict {skip,overwrite,rename}` | How to handle skill name conflicts (default: `skip`) |
-| `--yes`, `-y` | Skip confirmation prompts |
-
-### 3. Agent-Guided (interactive, with previews)
-
-Ask the agent to run the migration for you:
-
-```
-> Migrate my OpenClaw setup to Hermes
-```
-
-The agent will use the `openclaw-migration` skill to:
-1. Run a preview first to show what would change
-2. Ask about conflict resolution (SOUL.md, skills, etc.)
-3. Let you choose between `user-data` and `full` presets
-4. Execute the migration with your choices
-5. Print a detailed summary of what was migrated
-
-## What Gets Migrated
-
-### `user-data` preset
-| Item | Source | Destination |
-|------|--------|-------------|
-| SOUL.md | `~/.openclaw/workspace/SOUL.md` | `~/.hermes/SOUL.md` |
-| Memory entries | `~/.openclaw/workspace/MEMORY.md` | `~/.hermes/memories/MEMORY.md` |
-| User profile | `~/.openclaw/workspace/USER.md` | `~/.hermes/memories/USER.md` |
-| Skills | `~/.openclaw/workspace/skills/` | `~/.hermes/skills/openclaw-imports/` |
-| Command allowlist | `~/.openclaw/workspace/exec_approval_patterns.yaml` | Merged into `~/.hermes/config.yaml` |
-| Messaging settings | `~/.openclaw/config.yaml` (TELEGRAM_ALLOWED_USERS, MESSAGING_CWD) | `~/.hermes/.env` |
-| TTS assets | `~/.openclaw/workspace/tts/` | `~/.hermes/tts/` |
-
-Workspace files are also checked at `workspace.default/` and `workspace-main/` as fallback paths (OpenClaw renamed `workspace/` to `workspace-main/` in recent versions).
-
-### `full` preset (adds to `user-data`)
-| Item | Source | Destination |
-|------|--------|-------------|
-| Telegram bot token | `openclaw.json` channels config | `~/.hermes/.env` |
-| OpenRouter API key | `.env`, `openclaw.json`, or `openclaw.json["env"]` | `~/.hermes/.env` |
-| OpenAI API key | `.env`, `openclaw.json`, or `openclaw.json["env"]` | `~/.hermes/.env` |
-| Anthropic API key | `.env`, `openclaw.json`, or `openclaw.json["env"]` | `~/.hermes/.env` |
-| ElevenLabs API key | `.env`, `openclaw.json`, or `openclaw.json["env"]` | `~/.hermes/.env` |
-
-API keys are searched across four sources: inline config values, `~/.openclaw/.env`, the `openclaw.json` `"env"` sub-object, and per-agent auth profiles.
-
-Only allowlisted secrets are ever imported. Other credentials are skipped and reported.
-
-## OpenClaw Schema Compatibility
-
-The migration handles both old and current OpenClaw config layouts:
-
- **Channel tokens**: Reads from flat paths (`channels.telegram.botToken`) and the newer `accounts.default` layout (`channels.telegram.accounts.default.botToken`)
- **TTS provider**: OpenClaw renamed "edge" to "microsoft" — both are recognized and mapped to Hermes' "edge"
- **Provider API types**: Both short (`openai`, `anthropic`) and hyphenated (`openai-completions`, `anthropic-messages`, `google-generative-ai`) values are mapped correctly
- **thinkingDefault**: All enum values are handled including newer ones (`minimal`, `xhigh`, `adaptive`)
- **Matrix**: Uses `accessToken` field (not `botToken`)
- **SecretRef formats**: Plain strings, env templates (`${VAR}`), and `source: "env"` SecretRefs are resolved. `source: "file"` and `source: "exec"` SecretRefs produce a warning — add those keys manually after migration.
-
-## Conflict Handling
-
-By default, the migration **will not overwrite** existing Hermes data:
-
- **SOUL.md** — skipped if one already exists in `~/.hermes/`
- **Memory entries** — skipped if memories already exist (to avoid duplicates)
- **Skills** — skipped if a skill with the same name already exists
- **API keys** — skipped if the key is already set in `~/.hermes/.env`
-
-To overwrite conflicts, use `--overwrite`. The migration creates backups before overwriting.
-
-For skills, you can also use `--skill-conflict rename` to import conflicting skills under a new name (e.g., `skill-name-imported`).
-
-## Migration Report
-
-Every migration produces a report showing:
- **Migrated items** — what was successfully imported
- **Conflicts** — items skipped because they already exist
- **Skipped items** — items not found in the source
- **Errors** — items that failed to import
-
-For executed migrations, the full report is saved to `~/.hermes/migration/openclaw/<timestamp>/`.
-
-## Post-Migration Notes
-
- **Skills require a new session** — imported skills take effect after restarting your agent or starting a new chat.
- **WhatsApp requires re-pairing** — WhatsApp uses QR-code pairing, not token-based auth. Run `hermes whatsapp` to pair.
- **Archive cleanup** — after migration, you'll be offered to rename `~/.openclaw/` to `.openclaw.pre-migration/` to prevent state confusion. You can also run `hermes claw cleanup` later.
-
-## Troubleshooting
-
-### "OpenClaw directory not found"
-The migration looks for `~/.openclaw` by default, then tries `~/.clawdbot` and `~/.moltbot`. If your OpenClaw is installed elsewhere, use `--source`:
-```bash
-hermes claw migrate --source /path/to/.openclaw
-```
-
-### "Migration script not found"
-The migration script ships with Hermes Agent. If you installed via pip (not git clone), the `optional-skills/` directory may not be present. Install the skill from the Skills Hub:
-```bash
-hermes skills install openclaw-migration
-```
-
-### Memory overflow
-If your OpenClaw MEMORY.md or USER.md exceeds Hermes' character limits, excess entries are exported to an overflow file in the migration report directory. You can manually review and add the most important ones.
-
-### API keys not found
-Keys might be stored in different places depending on your OpenClaw setup:
- `~/.openclaw/.env` file
- Inline in `openclaw.json` under `models.providers.*.apiKey`
- In `openclaw.json` under the `"env"` or `"env.vars"` sub-objects
- In `~/.openclaw/agents/main/agent/auth-profiles.json`
-
-The migration checks all four. If keys use `source: "file"` or `source: "exec"` SecretRefs, they can't be resolved automatically — add them via `hermes config set`.
@@ -1,608 +0,0 @@
-# Pricing Accuracy Architecture
-
-Date: 2026-03-16
-
-## Goal
-
-Hermes should only show dollar costs when they are backed by an official source for the user's actual billing path.
-
-This design replaces the current static, heuristic pricing flow in:
-
- `run_agent.py`
- `agent/usage_pricing.py`
- `agent/insights.py`
- `cli.py`
-
-with a provider-aware pricing system that:
-
- handles cache billing correctly
- distinguishes `actual` vs `estimated` vs `included` vs `unknown`
- reconciles post-hoc costs when providers expose authoritative billing data
- supports direct providers, OpenRouter, subscriptions, enterprise pricing, and custom endpoints
-
-## Problems In The Current Design
-
-Current Hermes behavior has four structural issues:
-
-1. It stores only `prompt_tokens` and `completion_tokens`, which is insufficient for providers that bill cache reads and cache writes separately.
-2. It uses a static model price table and fuzzy heuristics, which can drift from current official pricing.
-3. It assumes public API list pricing matches the user's real billing path.
-4. It has no distinction between live estimates and reconciled billed cost.
-
-## Design Principles
-
-1. Normalize usage before pricing.
-2. Never fold cached tokens into plain input cost.
-3. Track certainty explicitly.
-4. Treat the billing path as part of the model identity.
-5. Prefer official machine-readable sources over scraped docs.
-6. Use post-hoc provider cost APIs when available.
-7. Show `n/a` rather than inventing precision.
-
-## High-Level Architecture
-
-The new system has four layers:
-
-1. `usage_normalization`
-   Converts raw provider usage into a canonical usage record.
-2. `pricing_source_resolution`
-   Determines the billing path, source of truth, and applicable pricing source.
-3. `cost_estimation_and_reconciliation`
-   Produces an immediate estimate when possible, then replaces or annotates it with actual billed cost later.
-4. `presentation`
-   `/usage`, `/insights`, and the status bar display cost with certainty metadata.
-
-## Canonical Usage Record
-
-Add a canonical usage model that every provider path maps into before any pricing math happens.
-
-Suggested structure:
-
-```python
-@dataclass
-class CanonicalUsage:
-    provider: str
-    billing_provider: str
-    model: str
-    billing_route: str
-
-    input_tokens: int = 0
-    output_tokens: int = 0
-    cache_read_tokens: int = 0
-    cache_write_tokens: int = 0
-    reasoning_tokens: int = 0
-    request_count: int = 1
-
-    raw_usage: dict[str, Any] | None = None
-    raw_usage_fields: dict[str, str] | None = None
-    computed_fields: set[str] | None = None
-
-    provider_request_id: str | None = None
-    provider_generation_id: str | None = None
-    provider_response_id: str | None = None
-```
-
-Rules:
-
- `input_tokens` means non-cached input only.
- `cache_read_tokens` and `cache_write_tokens` are never merged into `input_tokens`.
- `output_tokens` excludes cache metrics.
- `reasoning_tokens` is telemetry unless a provider officially bills it separately.
-
-This is the same normalization pattern used by `opencode`, extended with provenance and reconciliation ids.
-
-## Provider Normalization Rules
-
-### OpenAI Direct
-
-Source usage fields:
-
- `prompt_tokens`
- `completion_tokens`
- `prompt_tokens_details.cached_tokens`
-
-Normalization:
-
- `cache_read_tokens = cached_tokens`
- `input_tokens = prompt_tokens - cached_tokens`
- `cache_write_tokens = 0` unless OpenAI exposes it in the relevant route
- `output_tokens = completion_tokens`
-
-### Anthropic Direct
-
-Source usage fields:
-
- `input_tokens`
- `output_tokens`
- `cache_read_input_tokens`
- `cache_creation_input_tokens`
-
-Normalization:
-
- `input_tokens = input_tokens`
- `output_tokens = output_tokens`
- `cache_read_tokens = cache_read_input_tokens`
- `cache_write_tokens = cache_creation_input_tokens`
-
-### OpenRouter
-
-Estimate-time usage normalization should use the response usage payload with the same rules as the underlying provider when possible.
-
-Reconciliation-time records should also store:
-
- OpenRouter generation id
- native token fields when available
- `total_cost`
- `cache_discount`
- `upstream_inference_cost`
- `is_byok`
-
-### Gemini / Vertex
-
-Use official Gemini or Vertex usage fields where available.
-
-If cached content tokens are exposed:
-
- map them to `cache_read_tokens`
-
-If a route exposes no cache creation metric:
-
- store `cache_write_tokens = 0`
- preserve the raw usage payload for later extension
-
-### DeepSeek And Other Direct Providers
-
-Normalize only the fields that are officially exposed.
-
-If a provider does not expose cache buckets:
-
- do not infer them unless the provider explicitly documents how to derive them
-
-### Subscription / Included-Cost Routes
-
-These still use the canonical usage model.
-
-Tokens are tracked normally. Cost depends on billing mode, not on whether usage exists.
-
-## Billing Route Model
-
-Hermes must stop keying pricing solely by `model`.
-
-Introduce a billing route descriptor:
-
-```python
-@dataclass
-class BillingRoute:
-    provider: str
-    base_url: str | None
-    model: str
-    billing_mode: str
-    organization_hint: str | None = None
-```
-
-`billing_mode` values:
-
- `official_cost_api`
- `official_generation_api`
- `official_models_api`
- `official_docs_snapshot`
- `subscription_included`
- `user_override`
- `custom_contract`
- `unknown`
-
-Examples:
-
- OpenAI direct API with Costs API access: `official_cost_api`
- Anthropic direct API with Usage & Cost API access: `official_cost_api`
- OpenRouter request before reconciliation: `official_models_api`
- OpenRouter request after generation lookup: `official_generation_api`
- GitHub Copilot style subscription route: `subscription_included`
- local OpenAI-compatible server: `unknown`
- enterprise contract with configured rates: `custom_contract`
-
-## Cost Status Model
-
-Every displayed cost should have:
-
-```python
-@dataclass
-class CostResult:
-    amount_usd: Decimal | None
-    status: Literal["actual", "estimated", "included", "unknown"]
-    source: Literal[
-        "provider_cost_api",
-        "provider_generation_api",
-        "provider_models_api",
-        "official_docs_snapshot",
-        "user_override",
-        "custom_contract",
-        "none",
-    ]
-    label: str
-    fetched_at: datetime | None
-    pricing_version: str | None
-    notes: list[str]
-```
-
-Presentation rules:
-
- `actual`: show dollar amount as final
- `estimated`: show dollar amount with estimate labeling
- `included`: show `included` or `$0.00 (included)` depending on UX choice
- `unknown`: show `n/a`
-
-## Official Source Hierarchy
-
-Resolve cost using this order:
-
-1. Request-level or account-level official billed cost
-2. Official machine-readable model pricing
-3. Official docs snapshot
-4. User override or custom contract
-5. Unknown
-
-The system must never skip to a lower level if a higher-confidence source exists for the current billing route.
-
-## Provider-Specific Truth Rules
-
-### OpenAI Direct
-
-Preferred truth:
-
-1. Costs API for reconciled spend
-2. Official pricing page for live estimate
-
-### Anthropic Direct
-
-Preferred truth:
-
-1. Usage & Cost API for reconciled spend
-2. Official pricing docs for live estimate
-
-### OpenRouter
-
-Preferred truth:
-
-1. `GET /api/v1/generation` for reconciled `total_cost`
-2. `GET /api/v1/models` pricing for live estimate
-
-Do not use underlying provider public pricing as the source of truth for OpenRouter billing.
-
-### Gemini / Vertex
-
-Preferred truth:
-
-1. official billing export or billing API for reconciled spend when available for the route
-2. official pricing docs for estimate
-
-### DeepSeek
-
-Preferred truth:
-
-1. official machine-readable cost source if available in the future
-2. official pricing docs snapshot today
-
-### Subscription-Included Routes
-
-Preferred truth:
-
-1. explicit route config marking the model as included in subscription
-
-These should display `included`, not an API list-price estimate.
-
-### Custom Endpoint / Local Model
-
-Preferred truth:
-
-1. user override
-2. custom contract config
-3. unknown
-
-These should default to `unknown`.
-
-## Pricing Catalog
-
-Replace the current `MODEL_PRICING` dict with a richer pricing catalog.
-
-Suggested record:
-
-```python
-@dataclass
-class PricingEntry:
-    provider: str
-    route_pattern: str
-    model_pattern: str
-
-    input_cost_per_million: Decimal | None = None
-    output_cost_per_million: Decimal | None = None
-    cache_read_cost_per_million: Decimal | None = None
-    cache_write_cost_per_million: Decimal | None = None
-    request_cost: Decimal | None = None
-    image_cost: Decimal | None = None
-
-    source: str = "official_docs_snapshot"
-    source_url: str | None = None
-    fetched_at: datetime | None = None
-    pricing_version: str | None = None
-```
-
-The catalog should be route-aware:
-
- `openai:gpt-5`
- `anthropic:claude-opus-4-6`
- `openrouter:anthropic/claude-opus-4.6`
- `copilot:gpt-4o`
-
-This avoids conflating direct-provider billing with aggregator billing.
-
-## Pricing Sync Architecture
-
-Introduce a pricing sync subsystem instead of manually maintaining a single hardcoded table.
-
-Suggested modules:
-
- `agent/pricing/catalog.py`
- `agent/pricing/sources.py`
- `agent/pricing/sync.py`
- `agent/pricing/reconcile.py`
- `agent/pricing/types.py`
-
-### Sync Sources
-
- OpenRouter models API
- official provider docs snapshots where no API exists
- user overrides from config
-
-### Sync Output
-
-Cache pricing entries locally with:
-
- source URL
- fetch timestamp
- version/hash
- confidence/source type
-
-### Sync Frequency
-
- startup warm cache
- background refresh every 6 to 24 hours depending on source
- manual `hermes pricing sync`
-
-## Reconciliation Architecture
-
-Live requests may produce only an estimate initially. Hermes should reconcile them later when a provider exposes actual billed cost.
-
-Suggested flow:
-
-1. Agent call completes.
-2. Hermes stores canonical usage plus reconciliation ids.
-3. Hermes computes an immediate estimate if a pricing source exists.
-4. A reconciliation worker fetches actual cost when supported.
-5. Session and message records are updated with `actual` cost.
-
-This can run:
-
- inline for cheap lookups
- asynchronously for delayed provider accounting
-
-## Persistence Changes
-
-Session storage should stop storing only aggregate prompt/completion totals.
-
-Add fields for both usage and cost certainty:
-
- `input_tokens`
- `output_tokens`
- `cache_read_tokens`
- `cache_write_tokens`
- `reasoning_tokens`
- `estimated_cost_usd`
- `actual_cost_usd`
- `cost_status`
- `cost_source`
- `pricing_version`
- `billing_provider`
- `billing_mode`
-
-If schema expansion is too large for one PR, add a new pricing events table:
-
-```text
-session_cost_events
-  id
-  session_id
-  request_id
-  provider
-  model
-  billing_mode
-  input_tokens
-  output_tokens
-  cache_read_tokens
-  cache_write_tokens
-  estimated_cost_usd
-  actual_cost_usd
-  cost_status
-  cost_source
-  pricing_version
-  created_at
-  updated_at
-```
-
-## Hermes Touchpoints
-
-### `run_agent.py`
-
-Current responsibility:
-
- parse raw provider usage
- update session token counters
-
-New responsibility:
-
- build `CanonicalUsage`
- update canonical counters
- store reconciliation ids
- emit usage event to pricing subsystem
-
-### `agent/usage_pricing.py`
-
-Current responsibility:
-
- static lookup table
- direct cost arithmetic
-
-New responsibility:
-
- move or replace with pricing catalog facade
- no fuzzy model-family heuristics
- no direct pricing without billing-route context
-
-### `cli.py`
-
-Current responsibility:
-
- compute session cost directly from prompt/completion totals
-
-New responsibility:
-
- display `CostResult`
- show status badges:
-  - `actual`
-  - `estimated`
-  - `included`
-  - `n/a`
-
-### `agent/insights.py`
-
-Current responsibility:
-
- recompute historical estimates from static pricing
-
-New responsibility:
-
- aggregate stored pricing events
- prefer actual cost over estimate
- surface estimates only when reconciliation is unavailable
-
-## UX Rules
-
-### Status Bar
-
-Show one of:
-
- `$1.42`
- `~$1.42`
- `included`
- `cost n/a`
-
-Where:
-
- `$1.42` means `actual`
- `~$1.42` means `estimated`
- `included` means subscription-backed or explicitly zero-cost route
- `cost n/a` means unknown
-
-### `/usage`
-
-Show:
-
- token buckets
- estimated cost
- actual cost if available
- cost status
- pricing source
-
-### `/insights`
-
-Aggregate:
-
- actual cost totals
- estimated-only totals
- unknown-cost sessions count
- included-cost sessions count
-
-## Config And Overrides
-
-Add user-configurable pricing overrides in config:
-
-```yaml
-pricing:
-  mode: hybrid
-  sync_on_startup: true
-  sync_interval_hours: 12
-  overrides:
-    - provider: openrouter
-      model: anthropic/claude-opus-4.6
-      billing_mode: custom_contract
-      input_cost_per_million: 4.25
-      output_cost_per_million: 22.0
-      cache_read_cost_per_million: 0.5
-      cache_write_cost_per_million: 6.0
-  included_routes:
-    - provider: copilot
-      model: "*"
-    - provider: codex-subscription
-      model: "*"
-```
-
-Overrides must win over catalog defaults for the matching billing route.
-
-## Rollout Plan
-
-### Phase 1
-
- add canonical usage model
- split cache token buckets in `run_agent.py`
- stop pricing cache-inflated prompt totals
- preserve current UI with improved backend math
-
-### Phase 2
-
- add route-aware pricing catalog
- integrate OpenRouter models API sync
- add `estimated` vs `included` vs `unknown`
-
-### Phase 3
-
- add reconciliation for OpenRouter generation cost
- add actual cost persistence
- update `/insights` to prefer actual cost
-
-### Phase 4
-
- add direct OpenAI and Anthropic reconciliation paths
- add user overrides and contract pricing
- add pricing sync CLI command
-
-## Testing Strategy
-
-Add tests for:
-
- OpenAI cached token subtraction
- Anthropic cache read/write separation
- OpenRouter estimated vs actual reconciliation
- subscription-backed models showing `included`
- custom endpoints showing `n/a`
- override precedence
- stale catalog fallback behavior
-
-Current tests that assume heuristic pricing should be replaced with route-aware expectations.
-
-## Non-Goals
-
- exact enterprise billing reconstruction without an official source or user override
- backfilling perfect historical cost for old sessions that lack cache bucket data
- scraping arbitrary provider web pages at request time
-
-## Recommendation
-
-Do not expand the existing `MODEL_PRICING` dict.
-
-That path cannot satisfy the product requirement. Hermes should instead migrate to:
-
- canonical usage normalization
- route-aware pricing sources
- estimate-then-reconcile cost lifecycle
- explicit certainty states in the UI
-
-This is the minimum architecture that makes the statement "Hermes pricing is backed by official sources where possible, and otherwise clearly labeled" defensible.
@@ -1,108 +0,0 @@
-# Ink Gateway TUI Migration — Post-mortem
-
-Planned: 2026-04-01 · Delivered: 2026-04 · Status: shipped, classic (prompt_toolkit) CLI still present
-
-## What Shipped
-
-Three layers, same repo, Python runtime unchanged.
-
-```
-ui-tui (Node/TS)  ──stdio JSON-RPC──▶  tui_gateway (Py)  ──▶  AIAgent (run_agent.py)
-```
-
-### Backend — `tui_gateway/`
-
-```
-tui_gateway/
-├── entry.py          # subprocess entrypoint, stdio read/write loop
-├── server.py         # everything: sessions dict, @method handlers, _emit
-├── render.py         # stream renderer, diff rendering, message rendering
-├── slash_worker.py   # subprocess that runs hermes_cli slash commands
-└── __init__.py
-```
-
-`server.py` owns the full runtime-control surface: session store (`_sessions: dict[str, dict]`), method registry (`@method("…")` decorator), event emitter (`_emit`), agent lifecycle (`_make_agent`, `_init_session`, `_wire_callbacks`), approval/sudo/clarify round-trips, and JSON-RPC dispatch.
-
-Protocol methods (`@method(...)` in `server.py`):
-
- session: `session.{create, resume, list, close, interrupt, usage, history, compress, branch, title, save, undo}`
- prompt: `prompt.{submit, background, btw}`
- tools: `tools.{list, show, configure}`
- slash: `slash.exec`, `command.{dispatch, resolve}`, `commands.catalog`, `complete.{path, slash}`
- approvals: `approval.respond`, `sudo.respond`, `clarify.respond`, `secret.respond`
- config/state: `config.{get, set, show}`, `model.options`, `reload.mcp`
- ops: `shell.exec`, `cli.exec`, `terminal.resize`, `input.detect_drop`, `clipboard.paste`, `paste.collapse`, `image.attach`, `process.stop`
- misc: `agents.list`, `skills.manage`, `plugins.list`, `cron.manage`, `insights.get`, `rollback.{list, diff, restore}`, `browser.manage`
-
-Protocol events (`_emit(…)` → handled in `ui-tui/src/app/createGatewayEventHandler.ts`):
-
- lifecycle: `gateway.{ready, stderr}`, `session.info`, `skin.changed`
- stream: `message.{start, delta, complete}`, `thinking.delta`, `reasoning.{delta, available}`, `status.update`
- tools: `tool.{start, progress, complete, generating}`, `subagent.{start, thinking, tool, progress, complete}`
- interactive: `approval.request`, `sudo.request`, `clarify.request`, `secret.request`
- async: `background.complete`, `btw.complete`, `error`
-
-### Frontend — `ui-tui/src/`
-
-```
-src/
-├── entry.tsx            # node bootstrap: bootBanner → spawn python → dynamic-import Ink → render(<App/>)
-├── app.tsx              # <GatewayProvider> wraps <AppLayout>
-├── bootBanner.ts        # raw-ANSI banner to stdout in ~2ms, pre-React
-├── gatewayClient.ts     # JSON-RPC client over child_process stdio
-├── gatewayTypes.ts      # typed RPC responses + GatewayEvent union
-├── theme.ts             # DEFAULT_THEME + fromSkin
-│
-├── app/                 # hooks + stores — the orchestration layer
-│   ├── uiStore.ts             # nanostore: sid, info, busy, usage, theme, status…
-│   ├── turnStore.ts           # nanostore: per-turn activity / reasoning / tools
-│   ├── turnController.ts      # imperative singleton for stream-time operations
-│   ├── overlayStore.ts        # nanostore: modal/overlay state
-│   ├── useMainApp.ts          # top-level composition hook
-│   ├── useSessionLifecycle.ts # session.create/resume/close/reset
-│   ├── useSubmission.ts       # shell/slash/prompt dispatch + interpolation
-│   ├── useConfigSync.ts       # config.get + mtime poll
-│   ├── useComposerState.ts    # input buffer, paste snippets, editor mode
-│   ├── useInputHandlers.ts    # key bindings
-│   ├── createGatewayEventHandler.ts  # event-stream dispatcher
-│   ├── createSlashHandler.ts         # slash command router (registry + python fallback)
-│   └── slash/commands/        # core.ts, ops.ts, session.ts — TS-owned slash commands
-│
-├── components/          # AppLayout, AppChrome, AppOverlays, MessageLine, Thinking, Markdown, pickers, prompts, Banner, SessionPanel
-├── config/              # env, limits, timing constants
-├── content/             # charms, faces, fortunes, hotkeys, placeholders, verbs
-├── domain/              # details, messages, paths, roles, slash, usage, viewport
-├── protocol/            # interpolation, paste regex
-├── hooks/               # useCompletion, useInputHistory, useQueue, useVirtualHistory
-└── lib/                 # history, messages, osc52, rpc, text
-```
-
-### CLI entry points — `hermes_cli/main.py`
-
- `hermes --tui`      → `node dist/entry.js` (auto-builds when `.ts`/`.tsx` newer than `dist/entry.js`)
- `hermes --tui --dev` → `tsx src/entry.tsx` (skip build)
- `HERMES_TUI_DIR=…`  → external prebuilt dist (nix, distro packaging)
-
-## Diverged From Original Plan
-
-| Plan | Reality | Why |
-|---|---|---|
-| `tui_gateway/{controller,session_state,events,protocol}.py` | all collapsed into `server.py` | no second consumer ever emerged, keeping one file cheaper than four |
-| `ui-tui/src/main.tsx` | split into `entry.tsx` (bootstrap) + `app.tsx` (shell) | boot banner + early python spawn wanted a pre-React moment |
-| `ui-tui/src/state/store.ts` | three nanostores (`uiStore`, `turnStore`, `overlayStore`) | separate lifetimes: ui persists, turn resets per reply, overlay is modal |
-| `approval.requested` / `sudo.requested` / `clarify.requested` | `*.request` (no `-ed`) | cosmetic |
-| `session.cancel` | dropped | `session.interrupt` covers it |
-| `HERMES_EXPERIMENTAL_TUI=1`, `display.experimental_tui: true`, `/tui on/off/status` | none shipped | `--tui` went from opt-in to first-class without an experimental phase |
-
-## Post-migration Additions (not in original plan)
-
- **Async `session.create`** — returns sid in ~1ms, agent builds on a background thread, `session.info` broadcasts when ready; `_wait_agent()` gates every agent-touching handler via `_sess`
- **`bootBanner`** — raw-ANSI logo painted to stdout at T≈2ms, before Ink loads; `<AlternateScreen>` wipes it seamlessly when React mounts
- **Selection uniform bg** — `theme.color.selectionBg` wired via `useSelection().setSelectionBgColor`; replaces SGR-inverse per-cell swap that fragmented over amber/gold fg
- **Slash command registry** — TS-owned commands in `app/slash/commands/{core,ops,session}.ts`, everything else falls through to `slash.exec` (python worker)
- **Turn store + controller split** — imperative singleton (`turnController`) holds refs/timers, nanostore (`turnStore`) holds render-visible state
-
-## What's Still Open
-
- **Classic CLI not deleted.** `cli.py` still has ~80 `prompt_toolkit` references; classic REPL is still the default when `--tui` is absent. The original plan's "Cut 4 · prompt_toolkit removal later" hasn't happened.
- **No config-file opt-in.** `HERMES_EXPERIMENTAL_TUI` and `display.experimental_tui` were never built; only the CLI flag exists. Fine for now — if we want "default to TUI", a single line in `main.py` flips it.
@@ -1,106 +0,0 @@
-# ============================================================================
-# Hermes Agent — Example Skin Template
-# ============================================================================
-#
-# Copy this file to ~/.hermes/skins/<name>.yaml to create a custom skin.
-# All fields are optional — missing values inherit from the default skin.
-# Activate with: /skin <name>  or  display.skin: <name> in config.yaml
-#
-# Keys are marked:
-#   (both)    — applies to both the classic CLI and the TUI
-#   (classic) — classic CLI only (see hermes --tui in user-guide/tui.md)
-#   (tui)     — TUI only
-#
-# See hermes_cli/skin_engine.py for the full schema reference.
-# ============================================================================
-
-# Required: unique skin name (used in /skin command and config)
-name: example
-description: An example custom skin — copy and modify this template
-
-# ── Colors ──────────────────────────────────────────────────────────────────
-# Hex color values. These control the visual palette.
-colors:
-  # Banner panel (the startup welcome box) — (both)
-  banner_border: "#CD7F32"        # Panel border
-  banner_title: "#FFD700"         # Panel title text
-  banner_accent: "#FFBF00"        # Section headers (Available Tools, Skills, etc.)
-  banner_dim: "#B8860B"           # Dim/muted text (separators, model info)
-  banner_text: "#FFF8DC"          # Body text (tool names, skill names)
-
-  # UI elements — (both)
-  ui_accent: "#FFBF00"            # General accent (falls back to banner_accent)
-  ui_label: "#4dd0e1"             # Labels
-  ui_ok: "#4caf50"                # Success indicators
-  ui_error: "#ef5350"             # Error indicators
-  ui_warn: "#ffa726"              # Warning indicators
-
-  # Input area
-  prompt: "#FFF8DC"               # Prompt text / `❯` glyph color (both)
-  input_rule: "#CD7F32"           # Horizontal rule above input (classic)
-
-  # Response box — (classic)
-  response_border: "#FFD700"      # Response box border
-
-  # Session display — (both)
-  session_label: "#DAA520"        # "Session: " label
-  session_border: "#8B8682"       # Session ID text
-
-  # TUI / CLI surfaces — (classic: status bar, voice badge, completion meta)
-  status_bar_bg: "#1a1a2e"              # Status / usage bar background (classic)
-  voice_status_bg: "#1a1a2e"            # Voice-mode badge background (classic)
-  completion_menu_bg: "#1a1a2e"         # Completion list background (both)
-  completion_menu_current_bg: "#333355" # Active completion row background (both)
-  completion_menu_meta_bg: "#1a1a2e"    # Completion meta column bg (classic)
-  completion_menu_meta_current_bg: "#333355"  # Active meta bg (classic)
-
-  # Drag-to-select background — (tui)
-  selection_bg: "#3a3a55"               # Uniform selection highlight in the TUI
-
-# ── Spinner ─────────────────────────────────────────────────────────────────
-# (classic) — the TUI uses its own animated indicators; spinner config here
-# is only read by the classic prompt_toolkit CLI.
-spinner:
-  # Faces shown while waiting for the API response
-  waiting_faces:
-    - "(｡◕‿◕｡)"
-    - "(◕‿◕✿)"
-    - "٩(◕‿◕｡)۶"
-
-  # Faces shown during extended thinking/reasoning
-  thinking_faces:
-    - "(｡•́︿•̀｡)"
-    - "(◔_◔)"
-    - "(¬‿¬)"
-
-  # Verbs used in spinner messages (e.g., "pondering your request...")
-  thinking_verbs:
-    - "pondering"
-    - "contemplating"
-    - "musing"
-    - "ruminating"
-
-  # Optional: left/right decorations around the spinner
-  # Each entry is a [left, right] pair. Omit entirely for no wings.
-  # wings:
-  #   - ["⟪⚔", "⚔⟫"]
-  #   - ["⟪▲", "▲⟫"]
-
-# ── Branding ────────────────────────────────────────────────────────────────
-# Text strings used throughout the interface.
-branding:
-  agent_name: "Hermes Agent"                  # (both) Banner title, about display
-  welcome: "Welcome! Type your message or /help for commands."  # (both)
-  goodbye: "Goodbye! ⚕"                       # (both) Exit message
-  response_label: " ⚕ Hermes "                # (classic) Response box header label
-  prompt_symbol: "❯ "                          # (both) Input prompt glyph
-  help_header: "(^_^)? Available Commands"     # (both) /help overlay title
-
-# ── Tool Output ─────────────────────────────────────────────────────────────
-# Character used as the prefix for tool output lines. (both)
-# Default is "┊" (thin dotted vertical line). Some alternatives:
-#   "╎" (light triple dash vertical)
-#   "▏" (left one-eighth block)
-#   "│" (box drawing light vertical)
-#   "┃" (box drawing heavy vertical)
-tool_prefix: "┊"
@@ -1,329 +0,0 @@
-# Container-Aware CLI Review Fixes Spec
-
-**PR:** NousResearch/hermes-agent#7543
-**Review:** cursor[bot] bugbot review (4094049442) + two prior rounds
-**Date:** 2026-04-12
-**Branch:** `feat/container-aware-cli-clean`
-
-## Review Issues Summary
-
-Six issues were raised across three bugbot review rounds. Three were fixed in intermediate commits (38277a6a, 726cf90f). This spec addresses remaining design concerns surfaced by those reviews and simplifies the implementation based on interview decisions.
-
-| # | Issue | Severity | Status |
-|---|-------|----------|--------|
-| 1 | `os.execvp` retry loop unreachable | Medium | Fixed in 79e8cd12 (switched to subprocess.run) |
-| 2 | Redundant `shutil.which("sudo")` | Medium | Fixed in 38277a6a (reuses `sudo` var) |
-| 3 | Missing `chown -h` on symlink update | Low | Fixed in 38277a6a |
-| 4 | Container routing after `parse_args()` | High | Fixed in 726cf90f |
-| 5 | Hardcoded `/home/${user}` | Medium | Fixed in 726cf90f |
-| 6 | Group membership not gated on `container.enable` | Low | Fixed in 726cf90f |
-
-The mechanical fixes are in place but the overall design needs revision. The retry loop, error swallowing, and process model have deeper issues than what the bugbot flagged.
-
---
-
-## Spec: Revised `_exec_in_container`
-
-### Design Principles
-
-1. **Let it crash.** No silent fallbacks. If `.container-mode` exists but something goes wrong, the error propagates naturally (Python traceback). The only case where container routing is skipped is when `.container-mode` doesn't exist or `HERMES_DEV=1`.
-2. **No retries.** Probe once for sudo, exec once. If it fails, docker/podman's stderr reaches the user verbatim.
-3. **Completely transparent.** No error wrapping, no prefixes, no spinners. Docker's output goes straight through.
-4. **`os.execvp` on the happy path.** Replace the Python process entirely so there's no idle parent during interactive sessions. Note: `execvp` never returns on success (process is replaced) and raises `OSError` on failure (it does not return a value). The container process's exit code becomes the process exit code by definition — no explicit propagation needed.
-5. **One human-readable exception to "let it crash".** `subprocess.TimeoutExpired` from the sudo probe gets a specific catch with a readable message, since a raw traceback for "your Docker daemon is slow" is confusing. All other exceptions propagate naturally.
-
-### Execution Flow
-
-```
-1. get_container_exec_info()
-   - HERMES_DEV=1 → return None (skip routing)
-   - Inside container → return None (skip routing)
-   - .container-mode doesn't exist → return None (skip routing)
-   - .container-mode exists → parse and return dict
-   - .container-mode exists but malformed/unreadable → LET IT CRASH (no try/except)
-
-2. _exec_in_container(container_info, sys.argv[1:])
-   a. shutil.which(backend) → if None, print "{backend} not found on PATH" and sys.exit(1)
-   b. Sudo probe: subprocess.run([runtime, "inspect", "--format", "ok", container_name], timeout=15)
-      - If succeeds → needs_sudo = False
-      - If fails → try subprocess.run([sudo, "-n", runtime, "inspect", ...], timeout=15)
-        - If succeeds → needs_sudo = True
-        - If fails → print error with sudoers hint (including why -n is required) and sys.exit(1)
-      - If TimeoutExpired → catch specifically, print human-readable message about slow daemon
-   c. Build exec_cmd: [sudo? + runtime, "exec", tty_flags, "-u", exec_user, env_flags, container, hermes_bin, *cli_args]
-   d. os.execvp(exec_cmd[0], exec_cmd)
-      - On success: process is replaced — Python is gone, container exit code IS the process exit code
-      - On OSError: let it crash (natural traceback)
-```
-
-### Changes to `hermes_cli/main.py`
-
-#### `_exec_in_container` — rewrite
-
-Remove:
- The entire retry loop (`max_retries`, `for attempt in range(...)`)
- Spinner logic (`"Waiting for container..."`, dots)
- Exit code classification (125/126/127 handling)
- `subprocess.run` for the exec call (keep it only for the sudo probe)
- Special TTY vs non-TTY retry counts
- The `time` import (no longer needed)
-
-Change:
- Use `os.execvp(exec_cmd[0], exec_cmd)` as the final call
- Keep the `subprocess` import only for the sudo probe
- Keep TTY detection for the `-it` vs `-i` flag
- Keep env var forwarding (TERM, COLORTERM, LANG, LC_ALL)
- Keep the sudo probe as-is (it's the one "smart" part)
- Bump probe `timeout` from 5s to 15s — cold podman on a loaded machine needs headroom
- Catch `subprocess.TimeoutExpired` specifically on both probe calls — print a readable message about the daemon being unresponsive instead of a raw traceback
- Expand the sudoers hint error message to explain *why* `-n` (non-interactive) is required: a password prompt would hang the CLI or break piped commands
-
-The function becomes roughly:
-
-```python
-def _exec_in_container(container_info: dict, cli_args: list):
-    """Replace the current process with a command inside the managed container.
-
-    Probes whether sudo is needed (rootful containers), then os.execvp
-    into the container. If exec fails, the OS error propagates naturally.
-    """
-    import shutil
-    import subprocess
-
-    backend = container_info["backend"]
-    container_name = container_info["container_name"]
-    exec_user = container_info["exec_user"]
-    hermes_bin = container_info["hermes_bin"]
-
-    runtime = shutil.which(backend)
-    if not runtime:
-        print(f"Error: {backend} not found on PATH. Cannot route to container.",
-              file=sys.stderr)
-        sys.exit(1)
-
-    # Probe whether we need sudo to see the rootful container.
-    # Timeout is 15s — cold podman on a loaded machine can take a while.
-    # TimeoutExpired is caught specifically for a human-readable message;
-    # all other exceptions propagate naturally.
-    needs_sudo = False
-    sudo = None
-    try:
-        probe = subprocess.run(
-            [runtime, "inspect", "--format", "ok", container_name],
-            capture_output=True, text=True, timeout=15,
-        )
-    except subprocess.TimeoutExpired:
-        print(
-            f"Error: timed out waiting for {backend} to respond.\n"
-            f"The {backend} daemon may be unresponsive or starting up.",
-            file=sys.stderr,
-        )
-        sys.exit(1)
-
-    if probe.returncode != 0:
-        sudo = shutil.which("sudo")
-        if sudo:
-            try:
-                probe2 = subprocess.run(
-                    [sudo, "-n", runtime, "inspect", "--format", "ok", container_name],
-                    capture_output=True, text=True, timeout=15,
-                )
-            except subprocess.TimeoutExpired:
-                print(
-                    f"Error: timed out waiting for sudo {backend} to respond.",
-                    file=sys.stderr,
-                )
-                sys.exit(1)
-
-            if probe2.returncode == 0:
-                needs_sudo = True
-            else:
-                print(
-                    f"Error: container '{container_name}' not found via {backend}.\n"
-                    f"\n"
-                    f"The NixOS service runs the container as root. Your user cannot\n"
-                    f"see it because {backend} uses per-user namespaces.\n"
-                    f"\n"
-                    f"Fix: grant passwordless sudo for {backend}. The -n (non-interactive)\n"
-                    f"flag is required because the CLI calls sudo non-interactively —\n"
-                    f"a password prompt would hang or break piped commands:\n"
-                    f"\n"
-                    f'  security.sudo.extraRules = [{{\n'
-                    f'    users = [ "{os.getenv("USER", "your-user")}" ];\n'
-                    f'    commands = [{{ command = "{runtime}"; options = [ "NOPASSWD" ]; }}];\n'
-                    f'  }}];\n'
-                    f"\n"
-                    f"Or run: sudo hermes {' '.join(cli_args)}",
-                    file=sys.stderr,
-                )
-                sys.exit(1)
-        else:
-            print(
-                f"Error: container '{container_name}' not found via {backend}.\n"
-                f"The container may be running under root. Try: sudo hermes {' '.join(cli_args)}",
-                file=sys.stderr,
-            )
-            sys.exit(1)
-
-    is_tty = sys.stdin.isatty()
-    tty_flags = ["-it"] if is_tty else ["-i"]
-
-    env_flags = []
-    for var in ("TERM", "COLORTERM", "LANG", "LC_ALL"):
-        val = os.environ.get(var)
-        if val:
-            env_flags.extend(["-e", f"{var}={val}"])
-
-    cmd_prefix = [sudo, "-n", runtime] if needs_sudo else [runtime]
-    exec_cmd = (
-        cmd_prefix + ["exec"]
-        + tty_flags
-        + ["-u", exec_user]
-        + env_flags
-        + [container_name, hermes_bin]
-        + cli_args
-    )
-
-    # execvp replaces this process entirely — it never returns on success.
-    # On failure it raises OSError, which propagates naturally.
-    os.execvp(exec_cmd[0], exec_cmd)
-```
-
-#### Container routing call site in `main()` — remove try/except
-
-Current:
-```python
-try:
-    from hermes_cli.config import get_container_exec_info
-    container_info = get_container_exec_info()
-    if container_info:
-        _exec_in_container(container_info, sys.argv[1:])
-        sys.exit(1)  # exec failed if we reach here
-except SystemExit:
-    raise
-except Exception:
-    pass  # Container routing unavailable, proceed locally
-```
-
-Revised:
-```python
-from hermes_cli.config import get_container_exec_info
-container_info = get_container_exec_info()
-if container_info:
-    _exec_in_container(container_info, sys.argv[1:])
-    # Unreachable: os.execvp never returns on success (process is replaced)
-    # and raises OSError on failure (which propagates as a traceback).
-    # This line exists only as a defensive assertion.
-    sys.exit(1)
-```
-
-No try/except. If `.container-mode` doesn't exist, `get_container_exec_info()` returns `None` and we skip routing. If it exists but is broken, the exception propagates with a natural traceback.
-
-Note: `sys.exit(1)` after `_exec_in_container` is dead code in all paths — `os.execvp` either replaces the process or raises. It's kept as a belt-and-suspenders assertion with a comment marking it unreachable, not as actual error handling.
-
-### Changes to `hermes_cli/config.py`
-
-#### `get_container_exec_info` — remove inner try/except
-
-Current code catches `(OSError, IOError)` and returns `None`. This silently hides permission errors, corrupt files, etc.
-
-Change: Remove the try/except around file reading. Keep the early returns for `HERMES_DEV=1` and `_is_inside_container()`. The `FileNotFoundError` from `open()` when `.container-mode` doesn't exist should still return `None` (this is the "container mode not enabled" case). All other exceptions propagate.
-
-```python
-def get_container_exec_info() -> Optional[dict]:
-    if os.environ.get("HERMES_DEV") == "1":
-        return None
-    if _is_inside_container():
-        return None
-
-    container_mode_file = get_hermes_home() / ".container-mode"
-
-    try:
-        with open(container_mode_file, "r") as f:
-            # ... parse key=value lines ...
-    except FileNotFoundError:
-        return None
-    # All other exceptions (PermissionError, malformed data, etc.) propagate
-
-    return { ... }
-```
-
---
-
-## Spec: NixOS Module Changes
-
-### Symlink creation — simplify to two branches
-
-Current: 4 branches (symlink exists, directory exists, other file, doesn't exist).
-
-Revised: 2 branches.
-
-```bash
-if [ -d "${symlinkPath}" ] && [ ! -L "${symlinkPath}" ]; then
-  # Real directory — back it up, then create symlink
-  _backup="${symlinkPath}.bak.$(date +%s)"
-  echo "hermes-agent: backing up existing ${symlinkPath} to $_backup"
-  mv "${symlinkPath}" "$_backup"
-fi
-# For everything else (symlink, doesn't exist, etc.) — just force-create
-ln -sfn "${target}" "${symlinkPath}"
-chown -h ${user}:${cfg.group} "${symlinkPath}"
-```
-
-`ln -sfn` handles: existing symlink (replaces), doesn't exist (creates), and after the `mv` above (creates). The only case that needs special handling is a real directory, because `ln -sfn` cannot atomically replace a directory.
-
-Note: there is a theoretical race between the `[ -d ... ]` check and the `mv` (something could create/remove the directory in between). In practice this is a NixOS activation script running as root during `nixos-rebuild switch` — no other process should be touching `~/.hermes` at that moment. Not worth adding locking for.
-
-### Sudoers — document, don't auto-configure
-
-Do NOT add `security.sudo.extraRules` to the module. Document the sudoers requirement in the module's description/comments and in the error message the CLI prints when sudo probe fails.
-
-### Group membership gating — keep as-is
-
-The fix in 726cf90f (`cfg.container.enable && cfg.container.hostUsers != []`) is correct. Leftover group membership when container mode is disabled is harmless. No cleanup needed.
-
---
-
-## Spec: Test Rewrite
-
-The existing test file (`tests/hermes_cli/test_container_aware_cli.py`) has 16 tests. With the simplified exec model, several are obsolete.
-
-### Tests to keep (update as needed)
-
- `test_is_inside_container_dockerenv` — unchanged
- `test_is_inside_container_containerenv` — unchanged
- `test_is_inside_container_cgroup_docker` — unchanged
- `test_is_inside_container_false_on_host` — unchanged
- `test_get_container_exec_info_returns_metadata` — unchanged
- `test_get_container_exec_info_none_inside_container` — unchanged
- `test_get_container_exec_info_none_without_file` — unchanged
- `test_get_container_exec_info_skipped_when_hermes_dev` — unchanged
- `test_get_container_exec_info_not_skipped_when_hermes_dev_zero` — unchanged
- `test_get_container_exec_info_defaults` — unchanged
- `test_get_container_exec_info_docker_backend` — unchanged
-
-### Tests to add
-
- `test_get_container_exec_info_crashes_on_permission_error` — verify that `PermissionError` propagates (no silent `None` return)
- `test_exec_in_container_calls_execvp` — verify `os.execvp` is called with correct args (runtime, tty flags, user, env, container, binary, cli args)
- `test_exec_in_container_sudo_probe_sets_prefix` — verify that when first probe fails and sudo probe succeeds, `os.execvp` is called with `sudo -n` prefix
- `test_exec_in_container_no_runtime_hard_fails` — keep existing, verify `sys.exit(1)` when `shutil.which` returns None
- `test_exec_in_container_non_tty_uses_i_only` — update to check `os.execvp` args instead of `subprocess.run` args
- `test_exec_in_container_probe_timeout_prints_message` — verify that `subprocess.TimeoutExpired` from the probe produces a human-readable error and `sys.exit(1)`, not a raw traceback
- `test_exec_in_container_container_not_running_no_sudo` — verify the path where runtime exists (`shutil.which` returns a path) but probe returns non-zero and no sudo is available. Should print the "container may be running under root" error. This is distinct from `no_runtime_hard_fails` which covers `shutil.which` returning None.
-
-### Tests to delete
-
- `test_exec_in_container_tty_retries_on_container_failure` — retry loop removed
- `test_exec_in_container_non_tty_retries_silently_exits_126` — retry loop removed
- `test_exec_in_container_propagates_hermes_exit_code` — no subprocess.run to check exit codes; execvp replaces the process. Note: exit code propagation still works correctly — when `os.execvp` succeeds, the container's process *becomes* this process, so its exit code is the process exit code by OS semantics. No application code needed, no test needed. A comment in the function docstring documents this intent for future readers.
-
---
-
-## Out of Scope
-
- Auto-configuring sudoers rules in the NixOS module
- Any changes to `get_container_exec_info` parsing logic beyond the try/except narrowing
- Changes to `.container-mode` file format
- Changes to the `HERMES_DEV=1` bypass
- Changes to container detection logic (`_is_inside_container`)
@@ -53,7 +53,6 @@ def _run_tool_in_thread(tool_name: str, arguments: Dict[str, Any], task_id: str)
    try:
        loop = asyncio.get_running_loop()
        # We're in an async context -- need to run in thread
-        import concurrent.futures
        with concurrent.futures.ThreadPoolExecutor(max_workers=1) as pool:
            future = pool.submit(
                handle_function_call, tool_name, arguments, task_id
@@ -576,6 +576,14 @@ def load_gateway_config() -> GatewayConfig:
                    bridged["free_response_channels"] = platform_cfg["free_response_channels"]
                if "mention_patterns" in platform_cfg:
                    bridged["mention_patterns"] = platform_cfg["mention_patterns"]
+                if "dm_policy" in platform_cfg:
+                    bridged["dm_policy"] = platform_cfg["dm_policy"]
+                if "allow_from" in platform_cfg:
+                    bridged["allow_from"] = platform_cfg["allow_from"]
+                if "group_policy" in platform_cfg:
+                    bridged["group_policy"] = platform_cfg["group_policy"]
+                if "group_allow_from" in platform_cfg:
+                    bridged["group_allow_from"] = platform_cfg["group_allow_from"]
                if plat == Platform.DISCORD and "channel_skill_bindings" in platform_cfg:
                    bridged["channel_skill_bindings"] = platform_cfg["channel_skill_bindings"]
                if "channel_prompts" in platform_cfg:
@@ -608,6 +616,8 @@ def load_gateway_config() -> GatewayConfig:
                    if isinstance(frc, list):
                        frc = ",".join(str(v) for v in frc)
                    os.environ["SLACK_FREE_RESPONSE_CHANNELS"] = str(frc)
+                if "reactions" in slack_cfg and not os.getenv("SLACK_REACTIONS"):
+                    os.environ["SLACK_REACTIONS"] = str(slack_cfg["reactions"]).lower()

            # Discord settings → env vars (env vars take precedence)
            discord_cfg = yaml_cfg.get("discord", {})
@@ -662,8 +672,7 @@ def load_gateway_config() -> GatewayConfig:
                if "require_mention" in telegram_cfg and not os.getenv("TELEGRAM_REQUIRE_MENTION"):
                    os.environ["TELEGRAM_REQUIRE_MENTION"] = str(telegram_cfg["require_mention"]).lower()
                if "mention_patterns" in telegram_cfg and not os.getenv("TELEGRAM_MENTION_PATTERNS"):
-                    import json as _json
-                    os.environ["TELEGRAM_MENTION_PATTERNS"] = _json.dumps(telegram_cfg["mention_patterns"])
+                    os.environ["TELEGRAM_MENTION_PATTERNS"] = json.dumps(telegram_cfg["mention_patterns"])
                frc = telegram_cfg.get("free_response_chats")
                if frc is not None and not os.getenv("TELEGRAM_FREE_RESPONSE_CHATS"):
                    if isinstance(frc, list):
@@ -700,6 +709,20 @@ def load_gateway_config() -> GatewayConfig:
                    if isinstance(frc, list):
                        frc = ",".join(str(v) for v in frc)
                    os.environ["WHATSAPP_FREE_RESPONSE_CHATS"] = str(frc)
+                if "dm_policy" in whatsapp_cfg and not os.getenv("WHATSAPP_DM_POLICY"):
+                    os.environ["WHATSAPP_DM_POLICY"] = str(whatsapp_cfg["dm_policy"]).lower()
+                af = whatsapp_cfg.get("allow_from")
+                if af is not None and not os.getenv("WHATSAPP_ALLOWED_USERS"):
+                    if isinstance(af, list):
+                        af = ",".join(str(v) for v in af)
+                    os.environ["WHATSAPP_ALLOWED_USERS"] = str(af)
+                if "group_policy" in whatsapp_cfg and not os.getenv("WHATSAPP_GROUP_POLICY"):
+                    os.environ["WHATSAPP_GROUP_POLICY"] = str(whatsapp_cfg["group_policy"]).lower()
+                gaf = whatsapp_cfg.get("group_allow_from")
+                if gaf is not None and not os.getenv("WHATSAPP_GROUP_ALLOWED_USERS"):
+                    if isinstance(gaf, list):
+                        gaf = ",".join(str(v) for v in gaf)
+                    os.environ["WHATSAPP_GROUP_ALLOWED_USERS"] = str(gaf)

            # DingTalk settings → env vars (env vars take precedence)
            dingtalk_cfg = yaml_cfg.get("dingtalk", {})
@@ -1237,7 +1260,6 @@ def _apply_env_overrides(config: GatewayConfig) -> None:
            if legacy_home:
                qq_home = legacy_home
                qq_home_name_env = "QQ_HOME_CHANNEL_NAME"
-                import logging
                logging.getLogger(__name__).warning(
                    "QQ_HOME_CHANNEL is deprecated; rename to QQBOT_HOME_CHANNEL "
                    "in your .env for consistency with the platform key."
@@ -135,9 +135,22 @@ class HookRegistry:
            except Exception as e:
                print(f"[hooks] Error loading hook {hook_dir.name}: {e}", flush=True)

+    def _resolve_handlers(self, event_type: str) -> List[Callable]:
+        """Return all handlers that should fire for ``event_type``.
+
+        Exact matches fire first, followed by wildcard matches (e.g.
+        ``command:*`` matches ``command:reset``).
+        """
+        handlers = list(self._handlers.get(event_type, []))
+        if ":" in event_type:
+            base = event_type.split(":")[0]
+            wildcard_key = f"{base}:*"
+            handlers.extend(self._handlers.get(wildcard_key, []))
+        return handlers
+
    async def emit(self, event_type: str, context: Optional[Dict[str, Any]] = None) -> None:
        """
-        Fire all handlers registered for an event.
+        Fire all handlers registered for an event, discarding return values.

        Supports wildcard matching: handlers registered for "command:*" will
        fire for any "command:..." event. Handlers registered for a base type
@@ -151,16 +164,7 @@ class HookRegistry:
        if context is None:
            context = {}

-        # Collect handlers: exact match + wildcard match
-        handlers = list(self._handlers.get(event_type, []))
-
-        # Check for wildcard patterns (e.g., "command:*" matches "command:reset")
-        if ":" in event_type:
-            base = event_type.split(":")[0]
-            wildcard_key = f"{base}:*"
-            handlers.extend(self._handlers.get(wildcard_key, []))
-
-        for fn in handlers:
+        for fn in self._resolve_handlers(event_type):
            try:
                result = fn(event_type, context)
                # Support both sync and async handlers
@@ -168,3 +172,32 @@ class HookRegistry:
                    await result
            except Exception as e:
                print(f"[hooks] Error in handler for '{event_type}': {e}", flush=True)
+
+    async def emit_collect(
+        self,
+        event_type: str,
+        context: Optional[Dict[str, Any]] = None,
+    ) -> List[Any]:
+        """Fire handlers and return their non-None return values in order.
+
+        Like :meth:`emit` but captures each handler's return value. Used for
+        decision-style hooks (e.g. ``command:<name>`` policies that want to
+        allow/deny/rewrite the command before normal dispatch).
+
+        Exceptions from individual handlers are logged but do not abort the
+        remaining handlers.
+        """
+        if context is None:
+            context = {}
+
+        results: List[Any] = []
+        for fn in self._resolve_handlers(event_type):
+            try:
+                result = fn(event_type, context)
+                if asyncio.iscoroutine(result):
+                    result = await result
+                if result is not None:
+                    results.append(result)
+            except Exception as e:
+                print(f"[hooks] Error in handler for '{event_type}': {e}", flush=True)
+        return results
@@ -117,6 +117,160 @@ def _normalize_chat_content(
        return ""


+# Content part type aliases used by the OpenAI Chat Completions and Responses
+# APIs.  We accept both spellings on input and emit a single canonical internal
+# shape (``{"type": "text", ...}`` / ``{"type": "image_url", ...}``) that the
+# rest of the agent pipeline already understands.
+_TEXT_PART_TYPES = frozenset({"text", "input_text", "output_text"})
+_IMAGE_PART_TYPES = frozenset({"image_url", "input_image"})
+_FILE_PART_TYPES = frozenset({"file", "input_file"})
+
+
+def _normalize_multimodal_content(content: Any) -> Any:
+    """Validate and normalize multimodal content for the API server.
+
+    Returns a plain string when the content is text-only, or a list of
+    ``{"type": "text"|"image_url", ...}`` parts when images are present.
+    The output shape is the native OpenAI Chat Completions vision format,
+    which the agent pipeline accepts verbatim (OpenAI-wire providers) or
+    converts (``_preprocess_anthropic_content`` for Anthropic).
+
+    Raises ``ValueError`` with an OpenAI-style code on invalid input:
+      * ``unsupported_content_type`` — file/input_file/file_id parts, or
+        non-image ``data:`` URLs.
+      * ``invalid_image_url`` — missing URL or unsupported scheme.
+      * ``invalid_content_part`` — malformed text/image objects.
+
+    Callers translate the ValueError into a 400 response.
+    """
+    # Scalar passthrough mirrors ``_normalize_chat_content``.
+    if content is None:
+        return ""
+    if isinstance(content, str):
+        return content[:MAX_NORMALIZED_TEXT_LENGTH] if len(content) > MAX_NORMALIZED_TEXT_LENGTH else content
+    if not isinstance(content, list):
+        # Mirror the legacy text-normalizer's fallback so callers that
+        # pre-existed image support still get a string back.
+        return _normalize_chat_content(content)
+
+    items = content[:MAX_CONTENT_LIST_SIZE] if len(content) > MAX_CONTENT_LIST_SIZE else content
+    normalized_parts: List[Dict[str, Any]] = []
+    text_accum_len = 0
+
+    for part in items:
+        if isinstance(part, str):
+            if part:
+                trimmed = part[:MAX_NORMALIZED_TEXT_LENGTH]
+                normalized_parts.append({"type": "text", "text": trimmed})
+                text_accum_len += len(trimmed)
+            continue
+
+        if not isinstance(part, dict):
+            # Ignore unknown scalars for forward compatibility with future
+            # Responses API additions (e.g. ``refusal``).  The same policy
+            # the text normalizer applies.
+            continue
+
+        raw_type = part.get("type")
+        part_type = str(raw_type or "").strip().lower()
+
+        if part_type in _TEXT_PART_TYPES:
+            text = part.get("text")
+            if text is None:
+                continue
+            if not isinstance(text, str):
+                text = str(text)
+            if text:
+                trimmed = text[:MAX_NORMALIZED_TEXT_LENGTH]
+                normalized_parts.append({"type": "text", "text": trimmed})
+                text_accum_len += len(trimmed)
+            continue
+
+        if part_type in _IMAGE_PART_TYPES:
+            detail = part.get("detail")
+            image_ref = part.get("image_url")
+            # OpenAI Responses sends ``input_image`` with a top-level
+            # ``image_url`` string; Chat Completions sends ``image_url`` as
+            # ``{"url": "...", "detail": "..."}``.  Support both.
+            if isinstance(image_ref, dict):
+                url_value = image_ref.get("url")
+                detail = image_ref.get("detail", detail)
+            else:
+                url_value = image_ref
+            if not isinstance(url_value, str) or not url_value.strip():
+                raise ValueError("invalid_image_url:Image parts must include a non-empty image URL.")
+            url_value = url_value.strip()
+            lowered = url_value.lower()
+            if lowered.startswith("data:"):
+                if not lowered.startswith("data:image/") or "," not in url_value:
+                    raise ValueError(
+                        "unsupported_content_type:Only image data URLs are supported. "
+                        "Non-image data payloads are not supported."
+                    )
+            elif not (lowered.startswith("http://") or lowered.startswith("https://")):
+                raise ValueError(
+                    "invalid_image_url:Image inputs must use http(s) URLs or data:image/... URLs."
+                )
+            image_part: Dict[str, Any] = {"type": "image_url", "image_url": {"url": url_value}}
+            if detail is not None:
+                if not isinstance(detail, str) or not detail.strip():
+                    raise ValueError("invalid_content_part:Image detail must be a non-empty string when provided.")
+                image_part["image_url"]["detail"] = detail.strip()
+            normalized_parts.append(image_part)
+            continue
+
+        if part_type in _FILE_PART_TYPES:
+            raise ValueError(
+                "unsupported_content_type:Inline image inputs are supported, "
+                "but uploaded files and document inputs are not supported on this endpoint."
+            )
+
+        # Unknown part type — reject explicitly so clients get a clear error
+        # instead of a silently dropped turn.
+        raise ValueError(
+            f"unsupported_content_type:Unsupported content part type {raw_type!r}. "
+            "Only text and image_url/input_image parts are supported."
+        )
+
+    if not normalized_parts:
+        return ""
+
+    # Text-only: collapse to a plain string so downstream logging/trajectory
+    # code sees the native shape and prompt caching on text-only turns is
+    # unaffected.
+    if all(p.get("type") == "text" for p in normalized_parts):
+        return "\n".join(p["text"] for p in normalized_parts if p.get("text"))
+
+    return normalized_parts
+
+
+def _content_has_visible_payload(content: Any) -> bool:
+    """True when content has any text or image attachment.  Used to reject empty turns."""
+    if isinstance(content, str):
+        return bool(content.strip())
+    if isinstance(content, list):
+        for part in content:
+            if isinstance(part, dict):
+                ptype = str(part.get("type") or "").strip().lower()
+                if ptype in _TEXT_PART_TYPES and str(part.get("text") or "").strip():
+                    return True
+                if ptype in _IMAGE_PART_TYPES:
+                    return True
+    return False
+
+
+def _multimodal_validation_error(exc: ValueError, *, param: str) -> "web.Response":
+    """Translate a ``_normalize_multimodal_content`` ValueError into a 400 response."""
+    raw = str(exc)
+    code, _, message = raw.partition(":")
+    if not message:
+        code, message = "invalid_content_part", raw
+    return web.json_response(
+        _openai_error(message, code=code, param=param),
+        status=400,
+    )
+
+
 def check_api_server_requirements() -> bool:
    """Check if API server dependencies are available."""
    return AIOHTTP_AVAILABLE
@@ -169,7 +323,6 @@ class ResponseStore:
        ).fetchone()
        if row is None:
            return None
-        import time
        self._conn.execute(
            "UPDATE responses SET accessed_at = ? WHERE response_id = ?",
            (time.time(), response_id),
@@ -179,7 +332,6 @@ class ResponseStore:

    def put(self, response_id: str, data: Dict[str, Any]) -> None:
        """Store a response, evicting the oldest if at capacity."""
-        import time
        self._conn.execute(
            "INSERT OR REPLACE INTO responses (response_id, data, accessed_at) VALUES (?, ?, ?)",
            (response_id, json.dumps(data, default=str), time.time()),
@@ -315,12 +467,12 @@ class _IdempotencyCache:
    def __init__(self, max_items: int = 1000, ttl_seconds: int = 300):
        from collections import OrderedDict
        self._store = OrderedDict()
+        self._inflight: Dict[tuple[str, str], "asyncio.Task[Any]"] = {}
        self._ttl = ttl_seconds
        self._max = max_items

    def _purge(self):
-        import time as _t
-        now = _t.time()
+        now = time.time()
        expired = [k for k, v in self._store.items() if now - v["ts"] > self._ttl]
        for k in expired:
            self._store.pop(k, None)
@@ -332,11 +484,27 @@ class _IdempotencyCache:
        item = self._store.get(key)
        if item and item["fp"] == fingerprint:
            return item["resp"]
-        resp = await compute_coro()
-        import time as _t
-        self._store[key] = {"resp": resp, "fp": fingerprint, "ts": _t.time()}
-        self._purge()
-        return resp
+
+        inflight_key = (key, fingerprint)
+        task = self._inflight.get(inflight_key)
+        if task is None:
+            async def _compute_and_store():
+                resp = await compute_coro()
+                import time as _t
+                self._store[key] = {"resp": resp, "fp": fingerprint, "ts": _t.time()}
+                self._purge()
+                return resp
+
+            task = asyncio.create_task(_compute_and_store())
+            self._inflight[inflight_key] = task
+
+            def _clear_inflight(done_task: "asyncio.Task[Any]") -> None:
+                if self._inflight.get(inflight_key) is done_task:
+                    self._inflight.pop(inflight_key, None)
+
+            task.add_done_callback(_clear_inflight)
+
+        return await asyncio.shield(task)


 _idem_cache = _IdempotencyCache()
@@ -366,6 +534,30 @@ def _derive_chat_session_id(
    return f"api-{digest}"


+_CRON_AVAILABLE = False
+try:
+    from cron.jobs import (
+        list_jobs as _cron_list,
+        get_job as _cron_get,
+        create_job as _cron_create,
+        update_job as _cron_update,
+        remove_job as _cron_remove,
+        pause_job as _cron_pause,
+        resume_job as _cron_resume,
+        trigger_job as _cron_trigger,
+    )
+    _CRON_AVAILABLE = True
+except ImportError:
+    _cron_list = None
+    _cron_get = None
+    _cron_create = None
+    _cron_update = None
+    _cron_remove = None
+    _cron_pause = None
+    _cron_resume = None
+    _cron_trigger = None
+
+
 class APIServerAdapter(BasePlatformAdapter):
    """
    OpenAI-compatible HTTP API server adapter.
@@ -637,26 +829,32 @@ class APIServerAdapter(BasePlatformAdapter):
        system_prompt = None
        conversation_messages: List[Dict[str, str]] = []

-        for msg in messages:
+        for idx, msg in enumerate(messages):
            role = msg.get("role", "")
-            content = _normalize_chat_content(msg.get("content", ""))
+            raw_content = msg.get("content", "")
            if role == "system":
-                # Accumulate system messages
+                # System messages don't support images (Anthropic rejects, OpenAI
+                # text-model systems don't render them).  Flatten to text.
+                content = _normalize_chat_content(raw_content)
                if system_prompt is None:
                    system_prompt = content
                else:
                    system_prompt = system_prompt + "\n" + content
            elif role in ("user", "assistant"):
+                try:
+                    content = _normalize_multimodal_content(raw_content)
+                except ValueError as exc:
+                    return _multimodal_validation_error(exc, param=f"messages[{idx}].content")
                conversation_messages.append({"role": role, "content": content})

        # Extract the last user message as the primary input
-        user_message = ""
+        user_message: Any = ""
        history = []
        if conversation_messages:
            user_message = conversation_messages[-1].get("content", "")
            history = conversation_messages[:-1]

-        if not user_message:
+        if not _content_has_visible_payload(user_message):
            return web.json_response(
                {"error": {"message": "No user message found in messages", "type": "invalid_request_error"}},
                status=400,
@@ -1424,16 +1622,19 @@ class APIServerAdapter(BasePlatformAdapter):
            # No error if conversation doesn't exist yet — it's a new conversation

        # Normalize input to message list
-        input_messages: List[Dict[str, str]] = []
+        input_messages: List[Dict[str, Any]] = []
        if isinstance(raw_input, str):
            input_messages = [{"role": "user", "content": raw_input}]
        elif isinstance(raw_input, list):
-            for item in raw_input:
+            for idx, item in enumerate(raw_input):
                if isinstance(item, str):
                    input_messages.append({"role": "user", "content": item})
                elif isinstance(item, dict):
                    role = item.get("role", "user")
-                    content = _normalize_chat_content(item.get("content", ""))
+                    try:
+                        content = _normalize_multimodal_content(item.get("content", ""))
+                    except ValueError as exc:
+                        return _multimodal_validation_error(exc, param=f"input[{idx}].content")
                    input_messages.append({"role": role, "content": content})
        else:
            return web.json_response(_openai_error("'input' must be a string or array"), status=400)
@@ -1442,7 +1643,7 @@ class APIServerAdapter(BasePlatformAdapter):
        # This lets stateless clients supply their own history instead of
        # relying on server-side response chaining via previous_response_id.
        # Precedence: explicit conversation_history > previous_response_id.
-        conversation_history: List[Dict[str, str]] = []
+        conversation_history: List[Dict[str, Any]] = []
        raw_history = body.get("conversation_history")
        if raw_history:
            if not isinstance(raw_history, list):
@@ -1456,7 +1657,11 @@ class APIServerAdapter(BasePlatformAdapter):
                        _openai_error(f"conversation_history[{i}] must have 'role' and 'content' fields"),
                        status=400,
                    )
-                conversation_history.append({"role": str(entry["role"]), "content": str(entry["content"])})
+                try:
+                    entry_content = _normalize_multimodal_content(entry["content"])
+                except ValueError as exc:
+                    return _multimodal_validation_error(exc, param=f"conversation_history[{i}].content")
+                conversation_history.append({"role": str(entry["role"]), "content": entry_content})
            if previous_response_id:
                logger.debug("Both conversation_history and previous_response_id provided; using conversation_history")

@@ -1476,8 +1681,8 @@ class APIServerAdapter(BasePlatformAdapter):
            conversation_history.append(msg)

        # Last input message is the user_message
-        user_message = input_messages[-1].get("content", "") if input_messages else ""
-        if not user_message:
+        user_message: Any = input_messages[-1].get("content", "") if input_messages else ""
+        if not _content_has_visible_payload(user_message):
            return web.json_response(_openai_error("No user message found in input"), status=400)

        # Truncation support
@@ -1682,44 +1887,16 @@ class APIServerAdapter(BasePlatformAdapter):
    # Cron jobs API
    # ------------------------------------------------------------------

-    # Check cron module availability once (not per-request)
-    _CRON_AVAILABLE = False
-    try:
-        from cron.jobs import (
-            list_jobs as _cron_list,
-            get_job as _cron_get,
-            create_job as _cron_create,
-            update_job as _cron_update,
-            remove_job as _cron_remove,
-            pause_job as _cron_pause,
-            resume_job as _cron_resume,
-            trigger_job as _cron_trigger,
-        )
-        # Wrap as staticmethod to prevent descriptor binding — these are plain
-        # module functions, not instance methods.  Without this, self._cron_*()
-        # injects ``self`` as the first positional argument and every call
-        # raises TypeError.
-        _cron_list = staticmethod(_cron_list)
-        _cron_get = staticmethod(_cron_get)
-        _cron_create = staticmethod(_cron_create)
-        _cron_update = staticmethod(_cron_update)
-        _cron_remove = staticmethod(_cron_remove)
-        _cron_pause = staticmethod(_cron_pause)
-        _cron_resume = staticmethod(_cron_resume)
-        _cron_trigger = staticmethod(_cron_trigger)
-        _CRON_AVAILABLE = True
-    except ImportError:
-        pass
-
    _JOB_ID_RE = __import__("re").compile(r"[a-f0-9]{12}")
    # Allowed fields for update — prevents clients injecting arbitrary keys
    _UPDATE_ALLOWED_FIELDS = {"name", "schedule", "prompt", "deliver", "skills", "skill", "repeat", "enabled"}
    _MAX_NAME_LENGTH = 200
    _MAX_PROMPT_LENGTH = 5000

-    def _check_jobs_available(self) -> Optional["web.Response"]:
+    @staticmethod
+    def _check_jobs_available() -> Optional["web.Response"]:
        """Return error response if cron module isn't available."""
-        if not self._CRON_AVAILABLE:
+        if not _CRON_AVAILABLE:
            return web.json_response(
                {"error": "Cron module not available"}, status=501,
            )
@@ -1744,7 +1921,7 @@ class APIServerAdapter(BasePlatformAdapter):
            return cron_err
        try:
            include_disabled = request.query.get("include_disabled", "").lower() in ("true", "1")
-            jobs = self._cron_list(include_disabled=include_disabled)
+            jobs = _cron_list(include_disabled=include_disabled)
            return web.json_response({"jobs": jobs})
        except Exception as e:
            return web.json_response({"error": str(e)}, status=500)
@@ -1792,7 +1969,7 @@ class APIServerAdapter(BasePlatformAdapter):
            if repeat is not None:
                kwargs["repeat"] = repeat

-            job = self._cron_create(**kwargs)
+            job = _cron_create(**kwargs)
            return web.json_response({"job": job})
        except Exception as e:
            return web.json_response({"error": str(e)}, status=500)
@@ -1809,7 +1986,7 @@ class APIServerAdapter(BasePlatformAdapter):
        if id_err:
            return id_err
        try:
-            job = self._cron_get(job_id)
+            job = _cron_get(job_id)
            if not job:
                return web.json_response({"error": "Job not found"}, status=404)
            return web.json_response({"job": job})
@@ -1842,7 +2019,7 @@ class APIServerAdapter(BasePlatformAdapter):
                return web.json_response(
                    {"error": f"Prompt must be ≤ {self._MAX_PROMPT_LENGTH} characters"}, status=400,
                )
-            job = self._cron_update(job_id, sanitized)
+            job = _cron_update(job_id, sanitized)
            if not job:
                return web.json_response({"error": "Job not found"}, status=404)
            return web.json_response({"job": job})
@@ -1861,7 +2038,7 @@ class APIServerAdapter(BasePlatformAdapter):
        if id_err:
            return id_err
        try:
-            success = self._cron_remove(job_id)
+            success = _cron_remove(job_id)
            if not success:
                return web.json_response({"error": "Job not found"}, status=404)
            return web.json_response({"ok": True})
@@ -1880,7 +2057,7 @@ class APIServerAdapter(BasePlatformAdapter):
        if id_err:
            return id_err
        try:
-            job = self._cron_pause(job_id)
+            job = _cron_pause(job_id)
            if not job:
                return web.json_response({"error": "Job not found"}, status=404)
            return web.json_response({"job": job})
@@ -1899,7 +2076,7 @@ class APIServerAdapter(BasePlatformAdapter):
        if id_err:
            return id_err
        try:
-            job = self._cron_resume(job_id)
+            job = _cron_resume(job_id)
            if not job:
                return web.json_response({"error": "Job not found"}, status=404)
            return web.json_response({"job": job})
@@ -1918,7 +2095,7 @@ class APIServerAdapter(BasePlatformAdapter):
        if id_err:
            return id_err
        try:
-            job = self._cron_trigger(job_id)
+            job = _cron_trigger(job_id)
            if not job:
                return web.json_response({"error": "Job not found"}, status=404)
            return web.json_response({"job": job})
@@ -19,6 +19,8 @@ import uuid
 from abc import ABC, abstractmethod
 from urllib.parse import urlsplit

+from utils import normalize_proxy_url
+
 logger = logging.getLogger(__name__)


@@ -159,13 +161,13 @@ def resolve_proxy_url(platform_env_var: str | None = None) -> str | None:
    if platform_env_var:
        value = (os.environ.get(platform_env_var) or "").strip()
        if value:
-            return value
+            return normalize_proxy_url(value)
    for key in ("HTTPS_PROXY", "HTTP_PROXY", "ALL_PROXY",
                "https_proxy", "http_proxy", "all_proxy"):
        value = (os.environ.get(key) or "").strip()
        if value:
-            return value
-    return _detect_macos_system_proxy()
+            return normalize_proxy_url(value)
+    return normalize_proxy_url(_detect_macos_system_proxy())


 def proxy_kwargs_for_bot(proxy_url: str | None) -> dict:
@@ -391,12 +393,9 @@ async def cache_image_from_url(url: str, ext: str = ".jpg", retries: int = 2) ->
    if not is_safe_url(url):
        raise ValueError(f"Blocked unsafe URL (SSRF protection): {safe_url_for_log(url)}")

-    import asyncio
    import httpx
-    import logging as _logging
-    _log = _logging.getLogger(__name__)
+    _log = logging.getLogger(__name__)

-    last_exc = None
    async with httpx.AsyncClient(
        timeout=30.0,
        follow_redirects=True,
@@ -414,7 +413,6 @@ async def cache_image_from_url(url: str, ext: str = ".jpg", retries: int = 2) ->
                response.raise_for_status()
                return cache_image_from_bytes(response.content, ext)
            except (httpx.TimeoutException, httpx.HTTPStatusError) as exc:
-                last_exc = exc
                if isinstance(exc, httpx.HTTPStatusError) and exc.response.status_code < 429:
                    raise
                if attempt < retries:
@@ -430,7 +428,6 @@ async def cache_image_from_url(url: str, ext: str = ".jpg", retries: int = 2) ->
                    await asyncio.sleep(wait)
                    continue
                raise
-    raise last_exc


 def cleanup_image_cache(max_age_hours: int = 24) -> int:
@@ -510,12 +507,9 @@ async def cache_audio_from_url(url: str, ext: str = ".ogg", retries: int = 2) ->
    if not is_safe_url(url):
        raise ValueError(f"Blocked unsafe URL (SSRF protection): {safe_url_for_log(url)}")

-    import asyncio
    import httpx
-    import logging as _logging
-    _log = _logging.getLogger(__name__)
+    _log = logging.getLogger(__name__)

-    last_exc = None
    async with httpx.AsyncClient(
        timeout=30.0,
        follow_redirects=True,
@@ -533,7 +527,6 @@ async def cache_audio_from_url(url: str, ext: str = ".ogg", retries: int = 2) ->
                response.raise_for_status()
                return cache_audio_from_bytes(response.content, ext)
            except (httpx.TimeoutException, httpx.HTTPStatusError) as exc:
-                last_exc = exc
                if isinstance(exc, httpx.HTTPStatusError) and exc.response.status_code < 429:
                    raise
                if attempt < retries:
@@ -549,7 +542,39 @@ async def cache_audio_from_url(url: str, ext: str = ".ogg", retries: int = 2) ->
                    await asyncio.sleep(wait)
                    continue
                raise
-    raise last_exc
+
+
+# ---------------------------------------------------------------------------
+# Video cache utilities
+#
+# Same pattern as image/audio cache -- videos from platforms are downloaded
+# here so the agent can reference them by local file path.
+# ---------------------------------------------------------------------------
+
+VIDEO_CACHE_DIR = get_hermes_dir("cache/videos", "video_cache")
+
+SUPPORTED_VIDEO_TYPES = {
+    ".mp4": "video/mp4",
+    ".mov": "video/quicktime",
+    ".webm": "video/webm",
+    ".mkv": "video/x-matroska",
+    ".avi": "video/x-msvideo",
+}
+
+
+def get_video_cache_dir() -> Path:
+    """Return the video cache directory, creating it if it doesn't exist."""
+    VIDEO_CACHE_DIR.mkdir(parents=True, exist_ok=True)
+    return VIDEO_CACHE_DIR
+
+
+def cache_video_from_bytes(data: bytes, ext: str = ".mp4") -> str:
+    """Save raw video bytes to the cache and return the absolute file path."""
+    cache_dir = get_video_cache_dir()
+    filename = f"video_{uuid.uuid4().hex[:12]}{ext}"
+    filepath = cache_dir / filename
+    filepath.write_bytes(data)
+    return str(filepath)


 # ---------------------------------------------------------------------------
@@ -727,7 +752,10 @@ class MessageEvent:
        if not self.is_command():
            return self.text
        parts = self.text.split(maxsplit=1)
-        return parts[1] if len(parts) > 1 else ""
+        args = parts[1] if len(parts) > 1 else ""
+        # iOS auto-corrects -- to — (em dash) and - to – (en dash)
+        args = args.replace("\u2014\u2014", "--").replace("\u2014", "--").replace("\u2013", "-")
+        return args


@dataclass 
@@ -872,10 +900,16 @@ class BasePlatformAdapter(ABC):
        self._fatal_error_retryable = True
        self._fatal_error_handler: Optional[Callable[["BasePlatformAdapter"], Awaitable[None] | None]] = None
        
-        # Track active message handlers per session for interrupt support
-        # Key: session_key (e.g., chat_id), Value: (event, asyncio.Event for interrupt)
+        # Track active message handlers per session for interrupt support.
+        # _active_sessions stores the per-session interrupt Event; _session_tasks
+        # maps session → the specific Task currently processing it so that
+        # session-terminating commands (/stop, /new, /reset) can cancel the
+        # right task and release the adapter-level guard deterministically.
+        # Without the owner-task map, an old task's finally block could delete
+        # a newer task's guard, leaving stale busy state.
        self._active_sessions: Dict[str, asyncio.Event] = {}
        self._pending_messages: Dict[str, MessageEvent] = {}
+        self._session_tasks: Dict[str, asyncio.Task] = {}
        # Background message-processing tasks spawned by handle_message().
        # Gateway shutdown cancels these so an old gateway instance doesn't keep
        # working on a task after --replace or manual restarts.
@@ -1318,7 +1352,7 @@ class BasePlatformAdapter(ABC):
        # Extract MEDIA:<path> tags, allowing optional whitespace after the colon
        # and quoted/backticked paths for LLM-formatted outputs.
        media_pattern = re.compile(
-            r'''[`"']?MEDIA:\s*(?P<path>`[^`\n]+`|"[^"\n]+"|'[^'\n]+'|(?:~/|/)\S+(?:[^\S\n]+\S+)*?\.(?:png|jpe?g|gif|webp|mp4|mov|avi|mkv|webm|ogg|opus|mp3|wav|m4a)(?=[\s`"',;:)\]}]|$)|\S+)[`"']?'''
+            r'''[`"']?MEDIA:\s*(?P<path>`[^`\n]+`|"[^"\n]+"|'[^'\n]+'|(?:~/|/)\S+(?:[^\S\n]+\S+)*?\.(?:png|jpe?g|gif|webp|mp4|mov|avi|mkv|webm|ogg|opus|mp3|wav|m4a|epub|pdf|zip|rar|7z|docx?|xlsx?|pptx?|txt|csv|apk|ipa)(?=[\s`"',;:)\]}]|$)|\S+)[`"']?'''
        )
        for match in media_pattern.finditer(content):
            path = match.group("path").strip()
@@ -1652,6 +1686,222 @@ class BasePlatformAdapter(ABC):
            return f"{existing_text}\n\n{new_text}".strip()
        return existing_text

+    # ------------------------------------------------------------------
+    # Session task + guard ownership helpers
+    # ------------------------------------------------------------------
+    # These were introduced together with the _session_tasks owner map to
+    # make session lifecycle reconciliation deterministic across (a) the
+    # normal completion path, (b) /stop/ /new/ /reset bypass commands,
+    # and (c) stale-lock self-heal on the next inbound message.
+
+    def _release_session_guard(
+        self,
+        session_key: str,
+        *,
+        guard: Optional[asyncio.Event] = None,
+    ) -> None:
+        """Release the adapter-level guard for a session.
+
+        When ``guard`` is provided, only release the entry if it still points
+        at that exact Event.  This lets reset-like commands swap in a temporary
+        guard while the old processing task unwinds, without having the old
+        task's cleanup accidentally clear the replacement guard.
+        """
+        current_guard = self._active_sessions.get(session_key)
+        if current_guard is None:
+            return
+        if guard is not None and current_guard is not guard:
+            return
+        del self._active_sessions[session_key]
+
+    def _session_task_is_stale(self, session_key: str) -> bool:
+        """Return True if the owner task for ``session_key`` is done/cancelled.
+
+        A lock is "stale" when the adapter still has ``_active_sessions[key]``
+        AND a known owner task in ``_session_tasks`` that has already exited.
+        When there is no owner task at all, that usually means the guard was
+        installed by some path other than handle_message() (tests sometimes
+        install guards directly) — don't treat that as stale.  The on-entry
+        self-heal only needs to handle the production split-brain case where
+        an owner task was recorded, then exited without clearing its guard.
+        """
+        task = self._session_tasks.get(session_key)
+        if task is None:
+            return False
+        done = getattr(task, "done", None)
+        return bool(done and done())
+
+    def _heal_stale_session_lock(self, session_key: str) -> bool:
+        """Clear a stale session lock if the owner task is already gone.
+
+        Returns True if a stale lock was healed.  Returns False if there is
+        no lock, or the owner task is still alive (the normal busy case).
+
+        This is the on-entry safety net sidbin's issue #11016 analysis calls
+        for: without it, a split-brain — adapter still thinks the session is
+        active, but nothing is actually processing — traps the chat in
+        infinite "Interrupting current task..." until the gateway is
+        restarted.
+        """
+        if session_key not in self._active_sessions:
+            return False
+        if not self._session_task_is_stale(session_key):
+            return False
+        logger.warning(
+            "[%s] Healing stale session lock for %s (owner task is done/absent)",
+            self.name,
+            session_key,
+        )
+        self._active_sessions.pop(session_key, None)
+        self._pending_messages.pop(session_key, None)
+        self._session_tasks.pop(session_key, None)
+        return True
+
+    def _start_session_processing(
+        self,
+        event: MessageEvent,
+        session_key: str,
+        *,
+        interrupt_event: Optional[asyncio.Event] = None,
+    ) -> bool:
+        """Spawn a background processing task under the given session guard.
+
+        Returns True on success.  If the runtime stubs ``create_task`` with a
+        non-Task sentinel (some tests do this), the guard is rolled back and
+        False is returned so the caller isn't left holding a half-installed
+        session lock.
+        """
+        guard = interrupt_event or asyncio.Event()
+        self._active_sessions[session_key] = guard
+
+        task = asyncio.create_task(self._process_message_background(event, session_key))
+        self._session_tasks[session_key] = task
+        try:
+            self._background_tasks.add(task)
+        except TypeError:
+            # Tests stub create_task() with lightweight sentinels that are not
+            # hashable and do not support lifecycle callbacks.
+            self._session_tasks.pop(session_key, None)
+            self._release_session_guard(session_key, guard=guard)
+            return False
+        if hasattr(task, "add_done_callback"):
+            task.add_done_callback(self._background_tasks.discard)
+            task.add_done_callback(self._expected_cancelled_tasks.discard)
+        return True
+
+    async def cancel_session_processing(
+        self,
+        session_key: str,
+        *,
+        release_guard: bool = True,
+        discard_pending: bool = True,
+    ) -> None:
+        """Cancel in-flight processing for a single session.
+
+        ``release_guard=False`` keeps the adapter-level session guard in place
+        so reset-like commands can finish atomically before follow-up messages
+        are allowed to start a fresh background task.
+        """
+        task = self._session_tasks.pop(session_key, None)
+        if task is not None and not task.done():
+            logger.debug(
+                "[%s] Cancelling active processing for session %s",
+                self.name,
+                session_key,
+            )
+            self._expected_cancelled_tasks.add(task)
+            task.cancel()
+            try:
+                await task
+            except asyncio.CancelledError:
+                pass
+            except Exception:
+                logger.debug(
+                    "[%s] Session cancellation raised while unwinding %s",
+                    self.name,
+                    session_key,
+                    exc_info=True,
+                )
+        if discard_pending:
+            self._pending_messages.pop(session_key, None)
+        if release_guard:
+            self._release_session_guard(session_key)
+
+    async def _drain_pending_after_session_command(
+        self,
+        session_key: str,
+        command_guard: asyncio.Event,
+    ) -> None:
+        """Resume the latest queued follow-up once a session command completes.
+
+        Called at the tail of /stop, /new, and /reset dispatch.  Releases the
+        command-scoped guard, then — if a follow-up message landed while the
+        command was running — spawns a fresh processing task for it.
+        """
+        pending_event = self._pending_messages.pop(session_key, None)
+        self._release_session_guard(session_key, guard=command_guard)
+        if pending_event is None:
+            return
+        self._start_session_processing(pending_event, session_key)
+
+    async def _dispatch_active_session_command(
+        self,
+        event: MessageEvent,
+        session_key: str,
+        cmd: str,
+    ) -> None:
+        """Dispatch a reset-like bypass command while preserving guard ordering.
+
+        /stop, /new, and /reset must:
+          1. Keep the session guard installed while the runner processes the
+             command (so a racing follow-up message stays queued, not
+             dispatched as a second parallel run).
+          2. Cancel the old in-flight adapter task only AFTER the runner has
+             finished handling the command (so the runner sees consistent
+             state and its response is sent in order).
+          3. Release the command-scoped guard and drain the latest queued
+             follow-up exactly once, after 1 and 2 complete.
+        """
+        logger.debug(
+            "[%s] Command '/%s' bypassing active-session guard for %s",
+            self.name,
+            cmd,
+            session_key,
+        )
+
+        current_guard = self._active_sessions.get(session_key)
+        command_guard = asyncio.Event()
+        self._active_sessions[session_key] = command_guard
+        thread_meta = {"thread_id": event.source.thread_id} if event.source.thread_id else None
+
+        try:
+            response = await self._message_handler(event)
+            # Old adapter task (if any) is cancelled AFTER the runner has
+            # fully handled the command — keeps ordering deterministic.
+            await self.cancel_session_processing(
+                session_key,
+                release_guard=False,
+                discard_pending=False,
+            )
+            if response:
+                await self._send_with_retry(
+                    chat_id=event.source.chat_id,
+                    content=response,
+                    reply_to=event.message_id,
+                    metadata=thread_meta,
+                )
+        except Exception:
+            # On failure, restore the original guard if one still exists so
+            # we don't leave the session in a half-reset state.
+            if self._active_sessions.get(session_key) is command_guard:
+                if session_key in self._session_tasks and current_guard is not None:
+                    self._active_sessions[session_key] = current_guard
+                else:
+                    self._release_session_guard(session_key, guard=command_guard)
+            raise
+
+        await self._drain_pending_after_session_command(session_key, command_guard)
+
    async def handle_message(self, event: MessageEvent) -> None:
        """
        Process an incoming message.
@@ -1668,7 +1918,15 @@ class BasePlatformAdapter(ABC):
            group_sessions_per_user=self.config.extra.get("group_sessions_per_user", True),
            thread_sessions_per_user=self.config.extra.get("thread_sessions_per_user", False),
        )
-        
+
+        # On-entry self-heal: if the adapter still has an _active_sessions
+        # entry for this key but the owner task has already exited (done or
+        # cancelled), the lock is stale.  Clear it and fall through to
+        # normal dispatch so the user isn't trapped behind a dead guard —
+        # this is the split-brain tail described in issue #11016.
+        if session_key in self._active_sessions:
+            self._heal_stale_session_lock(session_key)
+
        # Check if there's already an active handler for this session
        if session_key in self._active_sessions:
            # Certain commands must bypass the active-session guard and be
@@ -1685,6 +1943,23 @@ class BasePlatformAdapter(ABC):
            from hermes_cli.commands import should_bypass_active_session

            if should_bypass_active_session(cmd):
+                # /stop, /new, /reset must cancel the in-flight adapter task
+                # and preserve ordering of queued follow-ups.  Route those
+                # through the dedicated handoff path that serializes
+                # cancellation + runner response + pending drain.
+                if cmd in ("stop", "new", "reset"):
+                    try:
+                        await self._dispatch_active_session_command(event, session_key, cmd)
+                    except Exception as e:
+                        logger.error(
+                            "[%s] Command '/%s' dispatch failed: %s",
+                            self.name, cmd, e, exc_info=True,
+                        )
+                    return
+
+                # Other bypass commands (/approve, /deny, /status,
+                # /background, /restart) just need direct dispatch — they
+                # don't cancel the running task.
                logger.debug(
                    "[%s] Command '/%s' bypassing active-session guard for %s",
                    self.name, cmd, session_key,
@@ -1730,19 +2005,9 @@ class BasePlatformAdapter(ABC):
        # starts would also pass the _active_sessions check and spawn a
        # duplicate task.  (grammY sequentialize / aiogram EventIsolation
        # pattern — set the guard synchronously, not inside the task.)
-        self._active_sessions[session_key] = asyncio.Event()
-
-        # Spawn background task to process this message
-        task = asyncio.create_task(self._process_message_background(event, session_key))
-        try:
-            self._background_tasks.add(task)
-        except TypeError:
-            # Some tests stub create_task() with lightweight sentinels that are not
-            # hashable and do not support lifecycle callbacks.
-            return
-        if hasattr(task, "add_done_callback"):
-            task.add_done_callback(self._background_tasks.discard)
-            task.add_done_callback(self._expected_cancelled_tasks.discard)
+        # _start_session_processing installs the guard AND the owner-task
+        # mapping atomically so stale-lock detection works.
+        self._start_session_processing(event, session_key)
    
    @staticmethod
    def _get_human_delay() -> float:
@@ -1754,8 +2019,6 @@ class BasePlatformAdapter(ABC):
          HERMES_HUMAN_DELAY_MIN_MS: minimum delay in ms (default 800, custom mode)
          HERMES_HUMAN_DELAY_MAX_MS: maximum delay in ms (default 2500, custom mode)
        """
-        import random
-
        mode = os.getenv("HERMES_HUMAN_DELAY_MODE", "off").lower()
        if mode == "off":
            return 0.0
@@ -2104,6 +2367,9 @@ class BasePlatformAdapter(ABC):
                drain_task = asyncio.create_task(
                    self._process_message_background(late_pending, session_key)
                )
+                # Hand ownership of the session to the drain task so stale-lock
+                # detection keeps working while it runs.
+                self._session_tasks[session_key] = drain_task
                try:
                    self._background_tasks.add(drain_task)
                    drain_task.add_done_callback(self._background_tasks.discard)
@@ -2113,9 +2379,14 @@ class BasePlatformAdapter(ABC):
                # Leave _active_sessions[session_key] populated — the drain
                # task's own lifecycle will clean it up.
            else:
-                # Clean up session tracking
-                if session_key in self._active_sessions:
-                    del self._active_sessions[session_key]
+                # Clean up session tracking.  Guard-match both deletes so a
+                # reset-like command that already swapped in its own
+                # command_guard (and cancelled us) can't be accidentally
+                # cleared by our unwind.  The command owns the session now.
+                current_task = asyncio.current_task()
+                if current_task is not None and self._session_tasks.get(session_key) is current_task:
+                    del self._session_tasks[session_key]
+                self._release_session_guard(session_key, guard=interrupt_event)
    
    async def cancel_background_tasks(self) -> None:
        """Cancel any in-flight background message-processing tasks.
@@ -2145,6 +2416,7 @@ class BasePlatformAdapter(ABC):
            # will be in self._background_tasks now.  Re-check.
        self._background_tasks.clear()
        self._expected_cancelled_tasks.clear()
+        self._session_tasks.clear()
        self._pending_messages.clear()
        self._active_sessions.clear()

@@ -75,7 +75,7 @@ def _redact(text: str) -> str:
 def check_bluebubbles_requirements() -> bool:
    try:
        import aiohttp  # noqa: F401
-        import httpx as _httpx  # noqa: F401
+        import httpx  # noqa: F401
    except ImportError:
        return False
    return True
@@ -23,6 +23,7 @@ from typing import Callable, Dict, Optional, Any
 logger = logging.getLogger(__name__)

 VALID_THREAD_AUTO_ARCHIVE_MINUTES = {60, 1440, 4320, 10080}
+_DISCORD_COMMAND_SYNC_POLICIES = {"safe", "bulk", "off"}

 try:
    import discord
@@ -527,6 +528,7 @@ class DiscordAdapter(BasePlatformAdapter):
        # Reply threading mode: "off" (no replies), "first" (reply on first
        # chunk only, default), "all" (reply-reference on every chunk).
        self._reply_to_mode: str = getattr(config, 'reply_to_mode', 'first') or 'first'
+        self._slash_commands: bool = self.config.extra.get("slash_commands", True)

    async def connect(self) -> bool:
        """Connect to Discord and start receiving events."""
@@ -541,7 +543,6 @@ class DiscordAdapter(BasePlatformAdapter):
            # ctypes.util.find_library fails on macOS with Homebrew-installed libs,
            # so fall back to known Homebrew paths if needed.
            if not opus_path:
-                import sys
                _homebrew_paths = (
                    "/opt/homebrew/lib/libopus.dylib",  # Apple Silicon
                    "/usr/local/lib/libopus.dylib",     # Intel Mac
@@ -745,7 +746,8 @@ class DiscordAdapter(BasePlatformAdapter):
                    )

            # Register slash commands
-            self._register_slash_commands()
+            if self._slash_commands:
+                self._register_slash_commands()

            # Start the bot in background
            self._bot_task = asyncio.create_task(self._client.start(self.config.token))
@@ -801,8 +803,27 @@ class DiscordAdapter(BasePlatformAdapter):
        if not self._client:
            return
        try:
-            synced = await asyncio.wait_for(self._client.tree.sync(), timeout=30)
-            logger.info("[%s] Synced %d slash command(s)", self.name, len(synced))
+            sync_policy = self._get_discord_command_sync_policy()
+            if sync_policy == "off":
+                logger.info("[%s] Skipping Discord slash command sync (policy=off)", self.name)
+                return
+
+            if sync_policy == "bulk":
+                synced = await asyncio.wait_for(self._client.tree.sync(), timeout=30)
+                logger.info("[%s] Synced %d slash command(s) via bulk tree sync", self.name, len(synced))
+                return
+
+            summary = await asyncio.wait_for(self._safe_sync_slash_commands(), timeout=30)
+            logger.info(
+                "[%s] Safely reconciled %d slash command(s): unchanged=%d updated=%d recreated=%d created=%d deleted=%d",
+                self.name,
+                summary["total"],
+                summary["unchanged"],
+                summary["updated"],
+                summary["recreated"],
+                summary["created"],
+                summary["deleted"],
+            )
        except asyncio.TimeoutError:
            logger.warning("[%s] Slash command sync timed out after 30s", self.name)
        except asyncio.CancelledError:
@@ -810,6 +831,183 @@ class DiscordAdapter(BasePlatformAdapter):
        except Exception as e:  # pragma: no cover - defensive logging
            logger.warning("[%s] Slash command sync failed: %s", self.name, e, exc_info=True)

+    def _get_discord_command_sync_policy(self) -> str:
+        raw = str(os.getenv("DISCORD_COMMAND_SYNC_POLICY", "safe") or "").strip().lower()
+        if raw in _DISCORD_COMMAND_SYNC_POLICIES:
+            return raw
+        if raw:
+            logger.warning(
+                "[%s] Invalid DISCORD_COMMAND_SYNC_POLICY=%r; falling back to 'safe'",
+                self.name,
+                raw,
+            )
+        return "safe"
+
+    def _canonicalize_app_command_payload(self, payload: Dict[str, Any]) -> Dict[str, Any]:
+        """Reduce command payloads to the semantic fields Hermes manages."""
+        contexts = payload.get("contexts")
+        integration_types = payload.get("integration_types")
+        return {
+            "type": int(payload.get("type", 1) or 1),
+            "name": str(payload.get("name", "") or ""),
+            "description": str(payload.get("description", "") or ""),
+            "default_member_permissions": self._normalize_permissions(
+                payload.get("default_member_permissions")
+            ),
+            "dm_permission": bool(payload.get("dm_permission", True)),
+            "nsfw": bool(payload.get("nsfw", False)),
+            "contexts": sorted(int(c) for c in contexts) if contexts else None,
+            "integration_types": (
+                sorted(int(i) for i in integration_types) if integration_types else None
+            ),
+            "options": [
+                self._canonicalize_app_command_option(item)
+                for item in payload.get("options", []) or []
+                if isinstance(item, dict)
+            ],
+        }
+
+    @staticmethod
+    def _normalize_permissions(value: Any) -> Optional[str]:
+        """Discord emits default_member_permissions as str server-side but discord.py
+        sets it as int locally. Normalize to str-or-None so the comparison is stable."""
+        if value is None:
+            return None
+        return str(value)
+
+    def _existing_command_to_payload(self, command: Any) -> Dict[str, Any]:
+        """Build a canonical-ready dict from an AppCommand.
+
+        discord.py's AppCommand.to_dict() does NOT include nsfw,
+        dm_permission, or default_member_permissions (they live only on the
+        attributes). Pull them from the attributes so the canonicalizer sees
+        the real server-side values instead of defaults — otherwise any
+        command using non-default permissions would diff on every startup.
+        """
+        payload = dict(command.to_dict())
+        nsfw = getattr(command, "nsfw", None)
+        if nsfw is not None:
+            payload["nsfw"] = bool(nsfw)
+        guild_only = getattr(command, "guild_only", None)
+        if guild_only is not None:
+            payload["dm_permission"] = not bool(guild_only)
+        default_permissions = getattr(command, "default_member_permissions", None)
+        if default_permissions is not None:
+            payload["default_member_permissions"] = getattr(
+                default_permissions, "value", default_permissions
+            )
+        return payload
+
+    def _canonicalize_app_command_option(self, payload: Dict[str, Any]) -> Dict[str, Any]:
+        return {
+            "type": int(payload.get("type", 0) or 0),
+            "name": str(payload.get("name", "") or ""),
+            "description": str(payload.get("description", "") or ""),
+            "required": bool(payload.get("required", False)),
+            "autocomplete": bool(payload.get("autocomplete", False)),
+            "choices": [
+                {
+                    "name": str(choice.get("name", "") or ""),
+                    "value": choice.get("value"),
+                }
+                for choice in payload.get("choices", []) or []
+                if isinstance(choice, dict)
+            ],
+            "channel_types": list(payload.get("channel_types", []) or []),
+            "min_value": payload.get("min_value"),
+            "max_value": payload.get("max_value"),
+            "min_length": payload.get("min_length"),
+            "max_length": payload.get("max_length"),
+            "options": [
+                self._canonicalize_app_command_option(item)
+                for item in payload.get("options", []) or []
+                if isinstance(item, dict)
+            ],
+        }
+
+    def _patchable_app_command_payload(self, payload: Dict[str, Any]) -> Dict[str, Any]:
+        """Fields supported by discord.py's edit_global_command route."""
+        canonical = self._canonicalize_app_command_payload(payload)
+        return {
+            "name": canonical["name"],
+            "description": canonical["description"],
+            "options": canonical["options"],
+        }
+
+    async def _safe_sync_slash_commands(self) -> Dict[str, int]:
+        """Diff existing global commands and only mutate the commands that changed."""
+        if not self._client:
+            return {
+                "total": 0,
+                "unchanged": 0,
+                "updated": 0,
+                "recreated": 0,
+                "created": 0,
+                "deleted": 0,
+            }
+
+        tree = self._client.tree
+        app_id = getattr(self._client, "application_id", None) or getattr(getattr(self._client, "user", None), "id", None)
+        if not app_id:
+            raise RuntimeError("Discord application ID is unavailable for slash command sync")
+
+        desired_payloads = [command.to_dict(tree) for command in tree.get_commands()]
+        desired_by_key = {
+            (int(payload.get("type", 1) or 1), str(payload.get("name", "") or "").lower()): payload
+            for payload in desired_payloads
+        }
+        existing_commands = await tree.fetch_commands()
+        existing_by_key = {
+            (
+                int(getattr(getattr(command, "type", None), "value", getattr(command, "type", 1)) or 1),
+                str(command.name or "").lower(),
+            ): command
+            for command in existing_commands
+        }
+
+        unchanged = 0
+        updated = 0
+        recreated = 0
+        created = 0
+        deleted = 0
+        http = self._client.http
+
+        for key, desired in desired_by_key.items():
+            current = existing_by_key.pop(key, None)
+            if current is None:
+                await http.upsert_global_command(app_id, desired)
+                created += 1
+                continue
+
+            current_existing_payload = self._existing_command_to_payload(current)
+            current_payload = self._canonicalize_app_command_payload(current_existing_payload)
+            desired_payload = self._canonicalize_app_command_payload(desired)
+            if current_payload == desired_payload:
+                unchanged += 1
+                continue
+
+            if self._patchable_app_command_payload(current_existing_payload) == self._patchable_app_command_payload(desired):
+                await http.delete_global_command(app_id, current.id)
+                await http.upsert_global_command(app_id, desired)
+                recreated += 1
+                continue
+
+            await http.edit_global_command(app_id, current.id, desired)
+            updated += 1
+
+        for current in existing_by_key.values():
+            await http.delete_global_command(app_id, current.id)
+            deleted += 1
+
+        return {
+            "total": len(desired_payloads),
+            "unchanged": unchanged,
+            "updated": updated,
+            "recreated": recreated,
+            "created": created,
+            "deleted": deleted,
+        }
+
    async def _add_reaction(self, message: Any, emoji: str) -> bool:
        """Add an emoji reaction to a Discord message."""
        if not message or not hasattr(message, "add_reaction"):
@@ -1081,6 +1279,8 @@ class DiscordAdapter(BasePlatformAdapter):
        chat_id: str,
        message_id: str,
        content: str,
+        *,
+        finalize: bool = False,
    ) -> SendResult:
        """Edit a previously sent Discord message."""
        if not self._client:
@@ -1420,8 +1620,7 @@ class DiscordAdapter(BasePlatformAdapter):
        speaking_user_ids: set = set()
        receiver = self._voice_receivers.get(guild_id)
        if receiver:
-            import time as _time
-            now = _time.monotonic()
+            now = time.monotonic()
            with receiver._lock:
                for ssrc, last_t in receiver._last_packet_time.items():
                    # Consider "speaking" if audio received within last 2 seconds
@@ -2129,10 +2328,42 @@ class DiscordAdapter(BasePlatformAdapter):
        # This ensures new commands added to COMMAND_REGISTRY in
        # hermes_cli/commands.py automatically appear as Discord slash
        # commands without needing a manual entry here.
+        def _build_auto_slash_command(_name: str, _description: str, _args_hint: str = ""):
+            """Build a discord.app_commands.Command that proxies to _run_simple_slash."""
+            discord_name = _name.lower()[:32]
+            desc = (_description or f"Run /{_name}")[:100]
+            has_args = bool(_args_hint)
+
+            if has_args:
+                def _make_args_handler(__name: str, __hint: str):
+                    @discord.app_commands.describe(args=f"Arguments: {__hint}"[:100])
+                    async def _handler(interaction: discord.Interaction, args: str = ""):
+                        await self._run_simple_slash(
+                            interaction, f"/{__name} {args}".strip()
+                        )
+                    _handler.__name__ = f"auto_slash_{__name.replace('-', '_')}"
+                    return _handler
+
+                handler = _make_args_handler(_name, _args_hint)
+            else:
+                def _make_simple_handler(__name: str):
+                    async def _handler(interaction: discord.Interaction):
+                        await self._run_simple_slash(interaction, f"/{__name}")
+                    _handler.__name__ = f"auto_slash_{__name.replace('-', '_')}"
+                    return _handler
+
+                handler = _make_simple_handler(_name)
+
+            return discord.app_commands.Command(
+                name=discord_name,
+                description=desc,
+                callback=handler,
+            )
+
+        already_registered: set[str] = set()
        try:
            from hermes_cli.commands import COMMAND_REGISTRY, _is_gateway_available, _resolve_config_gates

-            already_registered = set()
            try:
                already_registered = {cmd.name for cmd in tree.get_commands()}
            except Exception:
@@ -2147,38 +2378,10 @@ class DiscordAdapter(BasePlatformAdapter):
                discord_name = cmd_def.name.lower()[:32]
                if discord_name in already_registered:
                    continue
-                # Skip aliases that overlap with already-registered names
-                # (aliases for explicitly registered commands are handled above).
-                desc = (cmd_def.description or f"Run /{cmd_def.name}")[:100]
-                has_args = bool(cmd_def.args_hint)
-
-                if has_args:
-                    # Command takes optional arguments — create handler with
-                    # an optional ``args`` string parameter.
-                    def _make_args_handler(_name: str, _hint: str):
-                        @discord.app_commands.describe(args=f"Arguments: {_hint}"[:100])
-                        async def _handler(interaction: discord.Interaction, args: str = ""):
-                            await self._run_simple_slash(
-                                interaction, f"/{_name} {args}".strip()
-                            )
-                        _handler.__name__ = f"auto_slash_{_name.replace('-', '_')}"
-                        return _handler
-
-                    handler = _make_args_handler(cmd_def.name, cmd_def.args_hint)
-                else:
-                    # Parameterless command.
-                    def _make_simple_handler(_name: str):
-                        async def _handler(interaction: discord.Interaction):
-                            await self._run_simple_slash(interaction, f"/{_name}")
-                        _handler.__name__ = f"auto_slash_{_name.replace('-', '_')}"
-                        return _handler
-
-                    handler = _make_simple_handler(cmd_def.name)
-
-                auto_cmd = discord.app_commands.Command(
-                    name=discord_name,
-                    description=desc,
-                    callback=handler,
+                auto_cmd = _build_auto_slash_command(
+                    cmd_def.name,
+                    cmd_def.description,
+                    cmd_def.args_hint,
                )
                try:
                    tree.add_command(auto_cmd)
@@ -2195,6 +2398,35 @@ class DiscordAdapter(BasePlatformAdapter):
        except Exception as e:
            logger.warning("Discord auto-register from COMMAND_REGISTRY failed: %s", e)

+        # ── Plugin-registered slash commands ──
+        # Plugins register via PluginContext.register_command(); we mirror
+        # those into Discord's native slash picker so users get the same
+        # autocomplete UX as for built-in commands. No per-platform plugin
+        # API needed — plugin commands are platform-agnostic.
+        try:
+            from hermes_cli.commands import _iter_plugin_command_entries
+
+            for plugin_name, plugin_desc, plugin_args_hint in _iter_plugin_command_entries():
+                discord_name = plugin_name.lower()[:32]
+                if discord_name in already_registered:
+                    continue
+                auto_cmd = _build_auto_slash_command(
+                    plugin_name,
+                    plugin_desc,
+                    plugin_args_hint,
+                )
+                try:
+                    tree.add_command(auto_cmd)
+                    already_registered.add(discord_name)
+                except Exception:
+                    # Silently skip commands that fail registration (e.g.
+                    # name conflict with a subcommand group).
+                    pass
+        except Exception as e:
+            logger.warning(
+                "Discord auto-register from plugin commands failed: %s", e
+            )
+
        # Register skills under a single /skill command group with category
        # subcommand groups.  This uses 1 top-level slot instead of N,
        # supporting up to 25 categories × 25 skills = 625 skills.
@@ -2960,6 +3192,17 @@ class DiscordAdapter(BasePlatformAdapter):
            parent_channel_id = self._get_parent_channel_id(message.channel)

        is_voice_linked_channel = False
+
+        # Save mention-stripped text before auto-threading since create_thread()
+        # can clobber message.content, breaking /command detection in channels.
+        raw_content = message.content.strip()
+        normalized_content = raw_content
+        mention_prefix = False
+        if self._client.user and self._client.user in message.mentions:
+            mention_prefix = True
+            normalized_content = normalized_content.replace(f"<@{self._client.user.id}>", "").strip()
+            normalized_content = normalized_content.replace(f"<@!{self._client.user.id}>", "").strip()
+            message.content = normalized_content
        if not isinstance(message.channel, discord.DMChannel):
            channel_ids = {str(message.channel.id)}
            if parent_channel_id:
@@ -2997,13 +3240,8 @@ class DiscordAdapter(BasePlatformAdapter):
            in_bot_thread = is_thread and thread_id in self._threads

            if require_mention and not is_free_channel and not in_bot_thread:
-                if self._client.user not in message.mentions:
+                if self._client.user not in message.mentions and not mention_prefix:
                    return
-
-            if self._client.user and self._client.user in message.mentions:
-                message.content = message.content.replace(f"<@{self._client.user.id}>", "").strip()
-                message.content = message.content.replace(f"<@!{self._client.user.id}>", "").strip()
-
        # Auto-thread: when enabled, automatically create a thread for every
        # @mention in a text channel so each conversation is isolated (like Slack).
        # Messages already inside threads or DMs are unaffected.
@@ -3025,7 +3263,7 @@ class DiscordAdapter(BasePlatformAdapter):

        # Determine message type
        msg_type = MessageType.TEXT
-        if message.content.startswith("/"):
+        if normalized_content.startswith("/"):
            msg_type = MessageType.COMMAND
        elif message.attachments:
            # Check attachment types
@@ -3165,7 +3403,9 @@ class DiscordAdapter(BasePlatformAdapter):
                                att.filename, e, exc_info=True,
                            )

-        event_text = message.content
+        # Use normalized_content (saved before auto-threading) instead of message.content,
+        # to detect /slash commands in channel messages.
+        event_text = normalized_content
        if pending_text_injection:
            event_text = f"{pending_text_injection}\n\n{event_text}" if event_text else pending_text_injection

@@ -545,6 +545,7 @@ class EmailAdapter(BasePlatformAdapter):
        caption: Optional[str] = None,
        file_name: Optional[str] = None,
        reply_to: Optional[str] = None,
+        **kwargs,
    ) -> SendResult:
        """Send a file as an email attachment."""
        try:
@@ -8,11 +8,41 @@ Supports:
 - Gateway allowlist integration via FEISHU_ALLOWED_USERS
 - Persistent dedup state across restarts
 - Per-chat serial message processing (matches openclaw createChatQueue)
- Persistent ACK emoji reaction on inbound messages
+- Processing status reactions: Typing while working, removed on success,
+  swapped for CrossMark on failure
 - Reaction events routed as synthetic text events (matches openclaw)
 - Interactive card button-click events routed as synthetic COMMAND events
 - Webhook anomaly tracking (matches openclaw createWebhookAnomalyTracker)
 - Verification token validation as second auth layer (matches openclaw)
+
+Feishu identity model
+---------------------
+Feishu uses three user-ID tiers (official docs:
+https://open.feishu.cn/document/home/user-identity-introduction/introduction):
+
+  open_id  (ou_xxx)  — **App-scoped**.  The same person gets a different
+                        open_id under each Feishu app.  Always available in
+                        event payloads without extra permissions.
+  user_id  (u_xxx)   — **Tenant-scoped**.  Stable within a company but
+                        requires the ``contact:user.employee_id:readonly``
+                        scope.  May not be present.
+  union_id (on_xxx)  — **Developer-scoped**.  Same across all apps owned by
+                        one developer/ISV.  Best cross-app stable ID.
+
+For bots specifically:
+
+  app_id              — The application's canonical credential identifier.
+  bot open_id         — Returned by ``/bot/v3/info``.  This is the bot's own
+                        open_id *within its app context* and is what Feishu
+                        puts in ``mentions[].id.open_id`` when someone
+                        @-mentions the bot.  Used for mention gating only.
+
+In single-bot mode (what Hermes currently supports), open_id works as a
+de-facto unique user identifier since there is only one app context.
+
+Session-key participant isolation prefers ``union_id`` (via user_id_alt)
+over ``open_id`` (via user_id) so that sessions stay stable if the same
+user is seen through different apps in the future.
 """

 from __future__ import annotations
@@ -29,11 +59,12 @@ import re
 import threading
 import time
 import uuid
+from collections import OrderedDict
 from dataclasses import dataclass, field
 from datetime import datetime
 from pathlib import Path
 from types import SimpleNamespace
-from typing import Any, Dict, List, Optional
+from typing import Any, Dict, List, Optional, Sequence
 from urllib.error import HTTPError, URLError
 from urllib.parse import urlencode
 from urllib.request import Request, urlopen
@@ -71,7 +102,9 @@ try:
        UpdateMessageRequest,
        UpdateMessageRequestBody,
    )
+    from lark_oapi.core import AccessTokenType, HttpMethod
    from lark_oapi.core.const import FEISHU_DOMAIN, LARK_DOMAIN
+    from lark_oapi.core.model import BaseRequest
    from lark_oapi.event.callback.model.p2_card_action_trigger import (
        CallBackCard,
        P2CardActionTriggerResponse,
@@ -98,6 +131,7 @@ from gateway.platforms.base import (
    BasePlatformAdapter,
    MessageEvent,
    MessageType,
+    ProcessingOutcome,
    SendResult,
    SUPPORTED_DOCUMENT_TYPES,
    cache_document_from_bytes,
@@ -190,7 +224,17 @@ _APPROVAL_LABEL_MAP: Dict[str, str] = {
 }
 _FEISHU_BOT_MSG_TRACK_SIZE = 512                   # LRU size for tracking sent message IDs
 _FEISHU_REPLY_FALLBACK_CODES = frozenset({230011, 231003})  # reply target withdrawn/missing → create fallback
-_FEISHU_ACK_EMOJI = "OK"
+
+# Feishu reactions render as prominent badges, unlike Discord/Telegram's
+# small footer emoji — a success badge on every message would add noise, so
+# we only mark start (Typing) and failure (CrossMark); the reply itself is
+# the success signal.
+_FEISHU_REACTION_IN_PROGRESS = "Typing"
+_FEISHU_REACTION_FAILURE = "CrossMark"
+# Bound on the (message_id → reaction_id) handle cache. Happy-path entries
+# drain on completion; the cap is a safeguard against unbounded growth from
+# delete-failures, not a capacity plan.
+_FEISHU_PROCESSING_REACTION_CACHE_SIZE = 1024

 # QR onboarding constants
 _ONBOARD_ACCOUNTS_URLS = {
@@ -221,6 +265,8 @@ FALLBACK_ATTACHMENT_TEXT = "[Attachment]"
 _PREFERRED_LOCALES = ("zh_cn", "en_us")
 _MARKDOWN_SPECIAL_CHARS_RE = re.compile(r"([\\`*_{}\[\]()#+\-!|>~])")
 _MENTION_PLACEHOLDER_RE = re.compile(r"@_user_\d+")
+_MENTION_BOUNDARY_CHARS = frozenset(" \t\n\r.,;:!?、，。；：！？()[]{}<>\"'`")
+_TRAILING_TERMINAL_PUNCT = frozenset(" \t\n\r.!?。！？")
 _WHITESPACE_RE = re.compile(r"\s+")
 _SUPPORTED_CARD_TEXT_KEYS = (
    "title",
@@ -264,12 +310,36 @@ class FeishuPostMediaRef:
    resource_type: str = "file"


+@dataclass(frozen=True)
+class FeishuMentionRef:
+    name: str = ""
+    open_id: str = ""
+    is_all: bool = False
+    is_self: bool = False
+
+
+@dataclass(frozen=True)
+class _FeishuBotIdentity:
+    open_id: str = ""
+    user_id: str = ""
+    name: str = ""
+
+    def matches(self, *, open_id: str, user_id: str, name: str) -> bool:
+        # Precedence: open_id > user_id > name. IDs are authoritative when both
+        # sides have them; the next tier is only considered when either side
+        # lacks the current one.
+        if open_id and self.open_id:
+            return open_id == self.open_id
+        if user_id and self.user_id:
+            return user_id == self.user_id
+        return bool(self.name) and name == self.name
+
+
@dataclass(frozen=True)
 class FeishuPostParseResult:
    text_content: str
    image_keys: List[str] = field(default_factory=list)
    media_refs: List[FeishuPostMediaRef] = field(default_factory=list)
-    mentioned_ids: List[str] = field(default_factory=list)


@dataclass(frozen=True)
@@ -279,14 +349,14 @@ class FeishuNormalizedMessage:
    preferred_message_type: str = "text"
    image_keys: List[str] = field(default_factory=list)
    media_refs: List[FeishuPostMediaRef] = field(default_factory=list)
-    mentioned_ids: List[str] = field(default_factory=list)
+    mentions: List[FeishuMentionRef] = field(default_factory=list)
    relation_kind: str = "plain"
    metadata: Dict[str, Any] = field(default_factory=dict)


@dataclass(frozen=True)
 class FeishuAdapterSettings:
-    app_id: str
+    app_id: str  # Canonical bot/app identifier (credential, not from event payloads)
    app_secret: str
    domain_name: str
    connection_mode: str
@@ -294,7 +364,11 @@ class FeishuAdapterSettings:
    verification_token: str
    group_policy: str
    allowed_group_users: frozenset[str]
+    # Bot's own open_id (app-scoped) — returned by /bot/v3/info.  Used only for
+    # @mention matching: Feishu puts this value in mentions[].id.open_id when
+    # a user @-mentions the bot in a group chat.
    bot_open_id: str
+    # Bot's user_id (tenant-scoped) — optional, used as fallback mention match.
    bot_user_id: str
    bot_name: str
    dedup_cache_size: int
@@ -492,14 +566,17 @@ def _build_markdown_post_rows(content: str) -> List[List[Dict[str, str]]]:
    return rows or [[{"tag": "md", "text": content}]]


-def parse_feishu_post_payload(payload: Any) -> FeishuPostParseResult:
+def parse_feishu_post_payload(
+    payload: Any,
+    *,
+    mentions_map: Optional[Dict[str, FeishuMentionRef]] = None,
+) -> FeishuPostParseResult:
    resolved = _resolve_post_payload(payload)
    if not resolved:
        return FeishuPostParseResult(text_content=FALLBACK_POST_TEXT)

    image_keys: List[str] = []
    media_refs: List[FeishuPostMediaRef] = []
-    mentioned_ids: List[str] = []
    parts: List[str] = []

    title = _normalize_feishu_text(str(resolved.get("title", "")).strip())
@@ -510,7 +587,10 @@ def parse_feishu_post_payload(payload: Any) -> FeishuPostParseResult:
        if not isinstance(row, list):
            continue
        row_text = _normalize_feishu_text(
-            "".join(_render_post_element(item, image_keys, media_refs, mentioned_ids) for item in row)
+            "".join(
+                _render_post_element(item, image_keys, media_refs, mentions_map)
+                for item in row
+            )
        )
        if row_text:
            parts.append(row_text)
@@ -519,7 +599,6 @@ def parse_feishu_post_payload(payload: Any) -> FeishuPostParseResult:
        text_content="\n".join(parts).strip() or FALLBACK_POST_TEXT,
        image_keys=image_keys,
        media_refs=media_refs,
-        mentioned_ids=mentioned_ids,
    )


@@ -571,7 +650,7 @@ def _render_post_element(
    element: Any,
    image_keys: List[str],
    media_refs: List[FeishuPostMediaRef],
-    mentioned_ids: List[str],
+    mentions_map: Optional[Dict[str, FeishuMentionRef]] = None,
 ) -> str:
    if isinstance(element, str):
        return element
@@ -589,19 +668,21 @@ def _render_post_element(
        escaped_label = _escape_markdown_text(label)
        return f"[{escaped_label}]({href})" if href else escaped_label
    if tag == "at":
-        mentioned_id = (
-            str(element.get("open_id", "")).strip()
-            or str(element.get("user_id", "")).strip()
-        )
-        if mentioned_id and mentioned_id not in mentioned_ids:
-            mentioned_ids.append(mentioned_id)
-        display_name = (
-            str(element.get("user_name", "")).strip()
-            or str(element.get("name", "")).strip()
-            or str(element.get("text", "")).strip()
-            or mentioned_id
-        )
-        return f"@{_escape_markdown_text(display_name)}" if display_name else "@"
+        # Post <at>.user_id is a placeholder ("@_user_N" or "@_all"); look up
+        # the real ref in mentions_map for the display name.
+        placeholder = str(element.get("user_id", "")).strip()
+        if placeholder == "@_all":
+            # Feishu SDK sometimes omits @_all from the top-level mentions
+            # payload; record it here so the caller's mention list stays complete.
+            if mentions_map is not None and "@_all" not in mentions_map:
+                mentions_map["@_all"] = FeishuMentionRef(is_all=True)
+            return "@all"
+        ref = (mentions_map or {}).get(placeholder)
+        if ref is not None:
+            display_name = ref.name or ref.open_id or "user"
+        else:
+            display_name = str(element.get("user_name", "")).strip() or "user"
+        return f"@{_escape_markdown_text(display_name)}"
    if tag in {"img", "image"}:
        image_key = str(element.get("image_key", "")).strip()
        if image_key and image_key not in image_keys:
@@ -639,8 +720,7 @@ def _render_post_element(

    nested_parts: List[str] = []
    for key in ("text", "title", "content", "children", "elements"):
-        value = element.get(key)
-        extracted = _render_nested_post(value, image_keys, media_refs, mentioned_ids)
+        extracted = _render_nested_post(element.get(key), image_keys, media_refs, mentions_map)
        if extracted:
            nested_parts.append(extracted)
    return " ".join(part for part in nested_parts if part)
@@ -650,7 +730,7 @@ def _render_nested_post(
    value: Any,
    image_keys: List[str],
    media_refs: List[FeishuPostMediaRef],
-    mentioned_ids: List[str],
+    mentions_map: Optional[Dict[str, FeishuMentionRef]] = None,
 ) -> str:
    if isinstance(value, str):
        return _escape_markdown_text(value)
@@ -658,17 +738,17 @@ def _render_nested_post(
        return " ".join(
            part
            for item in value
-            for part in [_render_nested_post(item, image_keys, media_refs, mentioned_ids)]
+            for part in [_render_nested_post(item, image_keys, media_refs, mentions_map)]
            if part
        )
    if isinstance(value, dict):
-        direct = _render_post_element(value, image_keys, media_refs, mentioned_ids)
+        direct = _render_post_element(value, image_keys, media_refs, mentions_map)
        if direct:
            return direct
        return " ".join(
            part
            for item in value.values()
-            for part in [_render_nested_post(item, image_keys, media_refs, mentioned_ids)]
+            for part in [_render_nested_post(item, image_keys, media_refs, mentions_map)]
            if part
        )
    return ""
@@ -679,31 +759,48 @@ def _render_nested_post(
 # ---------------------------------------------------------------------------


-def normalize_feishu_message(*, message_type: str, raw_content: str) -> FeishuNormalizedMessage:
+def normalize_feishu_message(
+    *,
+    message_type: str,
+    raw_content: str,
+    mentions: Optional[Sequence[Any]] = None,
+    bot: _FeishuBotIdentity = _FeishuBotIdentity(),
+) -> FeishuNormalizedMessage:
    normalized_type = str(message_type or "").strip().lower()
    payload = _load_feishu_payload(raw_content)
+    mentions_map = _build_mentions_map(mentions, bot)

    if normalized_type == "text":
+        text = str(payload.get("text", "") or "")
+        # Feishu SDK sometimes omits @_all from the mentions payload even when
+        # the text literal contains it (confirmed via im.v1.message.get).
+        if "@_all" in text and "@_all" not in mentions_map:
+            mentions_map["@_all"] = FeishuMentionRef(is_all=True)
        return FeishuNormalizedMessage(
            raw_type=normalized_type,
-            text_content=_normalize_feishu_text(str(payload.get("text", "") or "")),
+            text_content=_normalize_feishu_text(text, mentions_map),
+            mentions=list(mentions_map.values()),
        )
    if normalized_type == "post":
-        parsed_post = parse_feishu_post_payload(payload)
+        # The walker writes back to mentions_map if it encounters
+        # <at user_id="@_all">, so reading .values() after parsing is enough.
+        parsed_post = parse_feishu_post_payload(payload, mentions_map=mentions_map)
        return FeishuNormalizedMessage(
            raw_type=normalized_type,
            text_content=parsed_post.text_content,
            image_keys=list(parsed_post.image_keys),
            media_refs=list(parsed_post.media_refs),
-            mentioned_ids=list(parsed_post.mentioned_ids),
+            mentions=list(mentions_map.values()),
            relation_kind="post",
        )
+    mention_refs = list(mentions_map.values())
    if normalized_type == "image":
        image_key = str(payload.get("image_key", "") or "").strip()
        alt_text = _normalize_feishu_text(
            str(payload.get("text", "") or "")
            or str(payload.get("alt", "") or "")
-            or FALLBACK_IMAGE_TEXT
+            or FALLBACK_IMAGE_TEXT,
+            mentions_map,
        )
        return FeishuNormalizedMessage(
            raw_type=normalized_type,
@@ -711,6 +808,7 @@ def normalize_feishu_message(*, message_type: str, raw_content: str) -> FeishuNo
            preferred_message_type="photo",
            image_keys=[image_key] if image_key else [],
            relation_kind="image",
+            mentions=mention_refs,
        )
    if normalized_type in {"file", "audio", "media"}:
        media_ref = _build_media_ref_from_payload(payload, resource_type=normalized_type)
@@ -722,6 +820,7 @@ def normalize_feishu_message(*, message_type: str, raw_content: str) -> FeishuNo
            media_refs=[media_ref] if media_ref.file_key else [],
            relation_kind=normalized_type,
            metadata={"placeholder_text": placeholder},
+            mentions=mention_refs,
        )
    if normalized_type == "merge_forward":
        return _normalize_merge_forward_message(payload)
@@ -996,8 +1095,20 @@ def _first_non_empty_text(*values: Any) -> str:
 # ---------------------------------------------------------------------------


-def _normalize_feishu_text(text: str) -> str:
-    cleaned = _MENTION_PLACEHOLDER_RE.sub(" ", text or "")
+def _normalize_feishu_text(
+    text: str,
+    mentions_map: Optional[Dict[str, FeishuMentionRef]] = None,
+) -> str:
+    def _sub(match: "re.Match[str]") -> str:
+        key = match.group(0)
+        ref = (mentions_map or {}).get(key)
+        if ref is None:
+            return " "
+        name = ref.name or ref.open_id or "user"
+        return f"@{name}"
+
+    cleaned = _MENTION_PLACEHOLDER_RE.sub(_sub, text or "")
+    cleaned = cleaned.replace("@_all", "@all")
    cleaned = cleaned.replace("\r\n", "\n").replace("\r", "\n")
    cleaned = "\n".join(_WHITESPACE_RE.sub(" ", line).strip() for line in cleaned.split("\n"))
    cleaned = "\n".join(line for line in cleaned.split("\n") if line)
@@ -1016,6 +1127,117 @@ def _unique_lines(lines: List[str]) -> List[str]:
    return unique


+# ---------------------------------------------------------------------------
+# Mention helpers
+# ---------------------------------------------------------------------------
+
+
+def _extract_mention_ids(mention: Any) -> tuple[str, str]:
+    # Returns (open_id, user_id). im.v1.message.get hands back id as a string
+    # plus id_type discriminator; event payloads hand back a nested UserId
+    # object carrying both fields.
+    mention_id = getattr(mention, "id", None)
+    if isinstance(mention_id, str):
+        id_type = str(getattr(mention, "id_type", "") or "").lower()
+        if id_type == "open_id":
+            return mention_id, ""
+        if id_type == "user_id":
+            return "", mention_id
+        return "", ""
+    if mention_id is None:
+        return "", ""
+    return (
+        str(getattr(mention_id, "open_id", "") or ""),
+        str(getattr(mention_id, "user_id", "") or ""),
+    )
+
+
+def _build_mentions_map(
+    mentions: Optional[Sequence[Any]],
+    bot: _FeishuBotIdentity,
+) -> Dict[str, FeishuMentionRef]:
+    result: Dict[str, FeishuMentionRef] = {}
+    for mention in mentions or []:
+        key = str(getattr(mention, "key", "") or "")
+        if not key:
+            continue
+        if key == "@_all":
+            result[key] = FeishuMentionRef(is_all=True)
+            continue
+        open_id, user_id = _extract_mention_ids(mention)
+        name = str(getattr(mention, "name", "") or "").strip()
+        result[key] = FeishuMentionRef(
+            name=name,
+            open_id=open_id,
+            is_self=bot.matches(open_id=open_id, user_id=user_id, name=name),
+        )
+    return result
+
+
+def _build_mention_hint(mentions: Sequence[FeishuMentionRef]) -> str:
+    parts: List[str] = []
+    seen: set = set()
+    for ref in mentions:
+        if ref.is_self:
+            continue
+        signature = (ref.is_all, ref.open_id, ref.name)
+        if signature in seen:
+            continue
+        seen.add(signature)
+        if ref.is_all:
+            parts.append("@all")
+        elif ref.open_id:
+            parts.append(f"{ref.name or 'unknown'} (open_id={ref.open_id})")
+        else:
+            parts.append(ref.name or "unknown")
+    return f"[Mentioned: {', '.join(parts)}]" if parts else ""
+
+
+def _strip_edge_self_mentions(
+    text: str,
+    mentions: Sequence[FeishuMentionRef],
+) -> str:
+    # Leading: strip consecutive self-mentions unconditionally.
+    # Trailing: strip only when followed by whitespace/terminal punct, so
+    # mid-sentence references ("don't @Bot again") stay intact.
+    # Leading word-boundary prevents @Al from eating @Alice.
+    if not text:
+        return text
+    self_names = [
+        f"@{ref.name or ref.open_id or 'user'}"
+        for ref in mentions
+        if ref.is_self
+    ]
+    if not self_names:
+        return text
+
+    remaining = text.lstrip()
+    while True:
+        for nm in self_names:
+            if not remaining.startswith(nm):
+                continue
+            after = remaining[len(nm):]
+            if after and after[0] not in _MENTION_BOUNDARY_CHARS:
+                continue
+            remaining = after.lstrip()
+            break
+        else:
+            break
+
+    while True:
+        i = len(remaining)
+        while i > 0 and remaining[i - 1] in _TRAILING_TERMINAL_PUNCT:
+            i -= 1
+        body = remaining[:i]
+        tail = remaining[i:]
+        for nm in self_names:
+            if body.endswith(nm):
+                remaining = body[: -len(nm)].rstrip() + tail
+                break
+        else:
+            return remaining
+
+
 def _run_official_feishu_ws_client(ws_client: Any, adapter: Any) -> None:
    """Run the official Lark WS client in its own thread-local event loop."""
    import lark_oapi.ws.client as ws_client_module
@@ -1141,6 +1363,9 @@ class FeishuAdapter(BasePlatformAdapter):
        # Exec approval button state (approval_id → {session_key, message_id, chat_id})
        self._approval_state: Dict[int, Dict[str, str]] = {}
        self._approval_counter = itertools.count(1)
+        # Feishu reaction deletion requires the opaque reaction_id returned
+        # by create, so we cache it per message_id.
+        self._pending_processing_reactions: "OrderedDict[str, str]" = OrderedDict()
        self._load_seen_message_ids()

    @staticmethod
@@ -1468,11 +1693,14 @@ class FeishuAdapter(BasePlatformAdapter):
        chat_id: str,
        message_id: str,
        content: str,
+        *,
+        finalize: bool = False,
    ) -> SendResult:
        """Edit a previously sent Feishu text/post message."""
        if not self._client:
            return SendResult(success=False, error="Not connected")

+        content = self.format_message(content)
        try:
            msg_type, payload = self._build_outbound_payload(content)
            body = self._build_update_message_body(msg_type=msg_type, content=payload)
@@ -2048,12 +2276,12 @@ class FeishuAdapter(BasePlatformAdapter):
            operator_type,
            emoji_type,
        )
-        # Only process reactions from real users. Ignore app/bot-generated reactions
-        # and Hermes' own ACK emoji to avoid feedback loops.
+        # Drop bot/app-origin reactions to break the feedback loop from our
+        # own lifecycle reactions. A human reacting with the same emoji (e.g.
+        # clicking Typing on a bot message) is still routed through.
        loop = self._loop
        if (
            operator_type in {"bot", "app"}
-            or emoji_type == _FEISHU_ACK_EMOJI
            or not message_id
            or loop is None
            or bool(getattr(loop, "is_closed", lambda: False)())
@@ -2277,33 +2505,35 @@ class FeishuAdapter(BasePlatformAdapter):

    async def _handle_message_with_guards(self, event: MessageEvent) -> None:
        """Dispatch a single event through the agent pipeline with per-chat serialization
-        and a persistent ACK emoji reaction before processing starts.
+        before handing the event off to the agent.

-        - Per-chat lock: ensures messages in the same chat are processed one at a time
-          (matches openclaw's createChatQueue serial queue behaviour).
-        - ACK indicator: adds a CHECK reaction to the triggering message before handing
-          off to the agent and leaves it in place as a receipt marker.
+        Per-chat lock ensures messages in the same chat are processed one at a
+        time (matches openclaw's createChatQueue serial queue behaviour).
        """
        chat_id = getattr(event.source, "chat_id", "") or "" if event.source else ""
        chat_lock = self._get_chat_lock(chat_id)
        async with chat_lock:
-            message_id = event.message_id
-            if message_id:
-                await self._add_ack_reaction(message_id)
            await self.handle_message(event)

-    async def _add_ack_reaction(self, message_id: str) -> Optional[str]:
-        """Add a persistent ACK emoji reaction to signal the message was received."""
-        if not self._client or not message_id:
+    # =========================================================================
+    # Processing status reactions
+    # =========================================================================
+
+    def _reactions_enabled(self) -> bool:
+        return os.getenv("FEISHU_REACTIONS", "true").strip().lower() not in ("false", "0", "no")
+
+    async def _add_reaction(self, message_id: str, emoji_type: str) -> Optional[str]:
+        """Return the reaction_id on success, else None. The id is needed later for deletion."""
+        if not self._client or not message_id or not emoji_type:
            return None
        try:
-            from lark_oapi.api.im.v1 import (  # lazy import — keeps optional dep optional
+            from lark_oapi.api.im.v1 import (
                CreateMessageReactionRequest,
                CreateMessageReactionRequestBody,
            )
            body = (
                CreateMessageReactionRequestBody.builder()
-                .reaction_type({"emoji_type": _FEISHU_ACK_EMOJI})
+                .reaction_type({"emoji_type": emoji_type})
                .build()
            )
            request = (
@@ -2316,16 +2546,93 @@ class FeishuAdapter(BasePlatformAdapter):
            if response and getattr(response, "success", lambda: False)():
                data = getattr(response, "data", None)
                return getattr(data, "reaction_id", None)
-            logger.warning(
-                "[Feishu] Failed to add ack reaction to %s: code=%s msg=%s",
+            logger.debug(
+                "[Feishu] Add reaction %s on %s rejected: code=%s msg=%s",
+                emoji_type,
                message_id,
                getattr(response, "code", None),
                getattr(response, "msg", None),
            )
        except Exception:
-            logger.warning("[Feishu] Failed to add ack reaction to %s", message_id, exc_info=True)
+            logger.warning(
+                "[Feishu] Add reaction %s on %s raised",
+                emoji_type,
+                message_id,
+                exc_info=True,
+            )
        return None

+    async def _remove_reaction(self, message_id: str, reaction_id: str) -> bool:
+        if not self._client or not message_id or not reaction_id:
+            return False
+        try:
+            from lark_oapi.api.im.v1 import DeleteMessageReactionRequest
+            request = (
+                DeleteMessageReactionRequest.builder()
+                .message_id(message_id)
+                .reaction_id(reaction_id)
+                .build()
+            )
+            response = await asyncio.to_thread(self._client.im.v1.message_reaction.delete, request)
+            if response and getattr(response, "success", lambda: False)():
+                return True
+            logger.debug(
+                "[Feishu] Remove reaction %s on %s rejected: code=%s msg=%s",
+                reaction_id,
+                message_id,
+                getattr(response, "code", None),
+                getattr(response, "msg", None),
+            )
+        except Exception:
+            logger.warning(
+                "[Feishu] Remove reaction %s on %s raised",
+                reaction_id,
+                message_id,
+                exc_info=True,
+            )
+        return False
+
+    def _remember_processing_reaction(self, message_id: str, reaction_id: str) -> None:
+        cache = self._pending_processing_reactions
+        cache[message_id] = reaction_id
+        cache.move_to_end(message_id)
+        while len(cache) > _FEISHU_PROCESSING_REACTION_CACHE_SIZE:
+            cache.popitem(last=False)
+
+    def _pop_processing_reaction(self, message_id: str) -> Optional[str]:
+        return self._pending_processing_reactions.pop(message_id, None)
+
+    async def on_processing_start(self, event: MessageEvent) -> None:
+        if not self._reactions_enabled():
+            return
+        message_id = event.message_id
+        if not message_id or message_id in self._pending_processing_reactions:
+            return
+        reaction_id = await self._add_reaction(message_id, _FEISHU_REACTION_IN_PROGRESS)
+        if reaction_id:
+            self._remember_processing_reaction(message_id, reaction_id)
+
+    async def on_processing_complete(
+        self, event: MessageEvent, outcome: ProcessingOutcome
+    ) -> None:
+        if not self._reactions_enabled():
+            return
+        message_id = event.message_id
+        if not message_id:
+            return
+
+        start_reaction_id = self._pending_processing_reactions.get(message_id)
+        if start_reaction_id:
+            if not await self._remove_reaction(message_id, start_reaction_id):
+                # Don't stack a second badge on top of a Typing we couldn't
+                # remove — UI would read as both "working" and "done/failed"
+                # simultaneously. Keep the handle so LRU eventually evicts it.
+                return
+            self._pop_processing_reaction(message_id)
+
+        if outcome is ProcessingOutcome.FAILURE:
+            await self._add_reaction(message_id, _FEISHU_REACTION_FAILURE)
+
    # =========================================================================
    # Webhook server and security
    # =========================================================================
@@ -2373,13 +2680,22 @@ class FeishuAdapter(BasePlatformAdapter):
        chat_type: str,
        message_id: str,
    ) -> None:
-        text, inbound_type, media_urls, media_types = await self._extract_message_content(message)
+        text, inbound_type, media_urls, media_types, mentions = await self._extract_message_content(message)
+
+        if inbound_type == MessageType.TEXT:
+            text = _strip_edge_self_mentions(text, mentions)
+            if text.startswith("/"):
+                inbound_type = MessageType.COMMAND
+
+        # Guard runs post-strip so a pure "@Bot" message (stripped to "") is dropped.
        if inbound_type == MessageType.TEXT and not text and not media_urls:
-            logger.debug("[Feishu] Ignoring unsupported or empty message type: %s", getattr(message, "message_type", ""))
+            logger.debug("[Feishu] Ignoring empty text message id=%s", message_id)
            return

-        if inbound_type == MessageType.TEXT and text.startswith("/"):
-            inbound_type = MessageType.COMMAND
+        if inbound_type != MessageType.COMMAND:
+            hint = _build_mention_hint(mentions)
+            if hint:
+                text = f"{hint}\n\n{text}" if text else hint

        reply_to_message_id = (
            getattr(message, "parent_id", None)
@@ -2838,14 +3154,20 @@ class FeishuAdapter(BasePlatformAdapter):
    # Message content extraction and resource download
    # =========================================================================

-    async def _extract_message_content(self, message: Any) -> tuple[str, MessageType, List[str], List[str]]:
-        """Extract text and cached media from a normalized Feishu message."""
+    async def _extract_message_content(
+        self, message: Any
+    ) -> tuple[str, MessageType, List[str], List[str], List[FeishuMentionRef]]:
        raw_content = getattr(message, "content", "") or ""
        raw_type = getattr(message, "message_type", "") or ""
        message_id = str(getattr(message, "message_id", "") or "")
        logger.info("[Feishu] Received raw message type=%s message_id=%s", raw_type, message_id)

-        normalized = normalize_feishu_message(message_type=raw_type, raw_content=raw_content)
+        normalized = normalize_feishu_message(
+            message_type=raw_type,
+            raw_content=raw_content,
+            mentions=getattr(message, "mentions", None),
+            bot=self._bot_identity(),
+        )
        media_urls, media_types = await self._download_feishu_message_resources(
            message_id=message_id,
            normalized=normalized,
@@ -2862,7 +3184,7 @@ class FeishuAdapter(BasePlatformAdapter):
            if injected:
                text = injected

-        return text, inbound_type, media_urls, media_types
+        return text, inbound_type, media_urls, media_types, list(normalized.mentions)

    async def _download_feishu_message_resources(
        self,
@@ -3126,10 +3448,22 @@ class FeishuAdapter(BasePlatformAdapter):
        return "group"

    async def _resolve_sender_profile(self, sender_id: Any) -> Dict[str, Optional[str]]:
+        """Map Feishu's three-tier user IDs onto Hermes' SessionSource fields.
+
+        Preference order for the primary ``user_id`` field:
+          1. user_id  (tenant-scoped, most stable — requires permission scope)
+          2. open_id  (app-scoped, always available — different per bot app)
+
+        ``user_id_alt`` carries the union_id (developer-scoped, stable across
+        all apps by the same developer).  Session-key generation prefers
+        user_id_alt when present, so participant isolation stays stable even
+        if the primary ID is the app-scoped open_id.
+        """
        open_id = getattr(sender_id, "open_id", None) or None
        user_id = getattr(sender_id, "user_id", None) or None
        union_id = getattr(sender_id, "union_id", None) or None
-        primary_id = open_id or user_id
+        # Prefer tenant-scoped user_id; fall back to app-scoped open_id.
+        primary_id = user_id or open_id
        display_name = await self._resolve_sender_name_from_api(primary_id or union_id)
        return {
            "user_id": primary_id,
@@ -3211,15 +3545,31 @@ class FeishuAdapter(BasePlatformAdapter):
            body = getattr(parent, "body", None)
            msg_type = getattr(parent, "msg_type", "") or ""
            raw_content = getattr(body, "content", "") or ""
-            text = self._extract_text_from_raw_content(msg_type=msg_type, raw_content=raw_content)
+            parent_mentions = getattr(parent, "mentions", None) if parent else None
+            text = self._extract_text_from_raw_content(
+                msg_type=msg_type,
+                raw_content=raw_content,
+                mentions=parent_mentions,
+            )
            self._message_text_cache[message_id] = text
            return text
        except Exception:
            logger.warning("[Feishu] Failed to fetch parent message %s", message_id, exc_info=True)
            return None

-    def _extract_text_from_raw_content(self, *, msg_type: str, raw_content: str) -> Optional[str]:
-        normalized = normalize_feishu_message(message_type=msg_type, raw_content=raw_content)
+    def _extract_text_from_raw_content(
+        self,
+        *,
+        msg_type: str,
+        raw_content: str,
+        mentions: Optional[Sequence[Any]] = None,
+    ) -> Optional[str]:
+        normalized = normalize_feishu_message(
+            message_type=msg_type,
+            raw_content=raw_content,
+            mentions=mentions,
+            bot=self._bot_identity(),
+        )
        if normalized.text_content:
            return normalized.text_content
        placeholder = normalized.metadata.get("placeholder_text") if isinstance(normalized.metadata, dict) else None
@@ -3289,10 +3639,10 @@ class FeishuAdapter(BasePlatformAdapter):
        normalized = normalize_feishu_message(
            message_type=getattr(message, "message_type", "") or "",
            raw_content=raw_content,
+            mentions=getattr(message, "mentions", None),
+            bot=self._bot_identity(),
        )
-        if normalized.mentioned_ids:
-            return self._post_mentions_bot(normalized.mentioned_ids)
-        return False
+        return self._post_mentions_bot(normalized.mentions)

    def _is_self_sent_bot_message(self, event: Any) -> bool:
        """Return True only for Feishu events emitted by this Hermes bot."""
@@ -3312,30 +3662,37 @@ class FeishuAdapter(BasePlatformAdapter):
        return False

    def _message_mentions_bot(self, mentions: List[Any]) -> bool:
-        """Check whether any mention targets the configured or inferred bot identity."""
+        # IDs trump names: when both sides have open_id (or both user_id),
+        # match requires equal IDs. Name fallback only when either side
+        # lacks an ID.
        for mention in mentions:
            mention_id = getattr(mention, "id", None)
-            mention_open_id = getattr(mention_id, "open_id", None)
-            mention_user_id = getattr(mention_id, "user_id", None)
+            mention_open_id = (getattr(mention_id, "open_id", None) or "").strip()
+            mention_user_id = (getattr(mention_id, "user_id", None) or "").strip()
            mention_name = (getattr(mention, "name", None) or "").strip()

-            if self._bot_open_id and mention_open_id == self._bot_open_id:
-                return True
-            if self._bot_user_id and mention_user_id == self._bot_user_id:
-                return True
+            if mention_open_id and self._bot_open_id:
+                if mention_open_id == self._bot_open_id:
+                    return True
+                continue  # IDs differ — not the bot; skip name fallback.
+            if mention_user_id and self._bot_user_id:
+                if mention_user_id == self._bot_user_id:
+                    return True
+                continue
            if self._bot_name and mention_name == self._bot_name:
                return True

        return False

-    def _post_mentions_bot(self, mentioned_ids: List[str]) -> bool:
-        if not mentioned_ids:
-            return False
-        if self._bot_open_id and self._bot_open_id in mentioned_ids:
-            return True
-        if self._bot_user_id and self._bot_user_id in mentioned_ids:
-            return True
-        return False
+    def _post_mentions_bot(self, mentions: List[FeishuMentionRef]) -> bool:
+        return any(m.is_self for m in mentions)
+
+    def _bot_identity(self) -> _FeishuBotIdentity:
+        return _FeishuBotIdentity(
+            open_id=self._bot_open_id,
+            user_id=self._bot_user_id,
+            name=self._bot_name,
+        )

    async def _hydrate_bot_identity(self) -> None:
        """Best-effort discovery of bot identity for precise group mention gating
@@ -3360,14 +3717,15 @@ class FeishuAdapter(BasePlatformAdapter):
        # uses via probe_bot().
        if not self._bot_open_id or not self._bot_name:
            try:
-                resp = await asyncio.to_thread(
-                    self._client.request,
-                    method="GET",
-                    url="/open-apis/bot/v3/info",
-                    body=None,
-                    raw_response=True,
+                req = (
+                    BaseRequest.builder()
+                    .http_method(HttpMethod.GET)
+                    .uri("/open-apis/bot/v3/info")
+                    .token_types({AccessTokenType.TENANT})
+                    .build()
                )
-                content = getattr(resp, "content", None)
+                resp = await asyncio.to_thread(self._client.request, req)
+                content = getattr(getattr(resp, "raw", None), "content", None)
                if content:
                    payload = json.loads(content)
                    parsed = _parse_bot_response(payload) or {}
@@ -4115,6 +4473,9 @@ def probe_bot(app_id: str, app_secret: str, domain: str) -> Optional[dict]:

    Uses lark_oapi SDK when available, falls back to raw HTTP otherwise.
    Returns {"bot_name": ..., "bot_open_id": ...} on success, None on failure.
+
+    Note: ``bot_open_id`` here is the bot's app-scoped open_id — the same ID
+    that Feishu puts in @mention payloads.  It is NOT the app_id.
    """
    if FEISHU_AVAILABLE:
        return _probe_bot_sdk(app_id, app_secret, domain)
@@ -4135,12 +4496,12 @@ def _build_onboard_client(app_id: str, app_secret: str, domain: str) -> Any:


 def _parse_bot_response(data: dict) -> Optional[dict]:
-    """Extract bot_name and bot_open_id from a /bot/v3/info response."""
+    # /bot/v3/info returns bot.app_name; legacy paths used bot_name — accept both.
    if data.get("code") != 0:
        return None
    bot = data.get("bot") or data.get("data", {}).get("bot") or {}
    return {
-        "bot_name": bot.get("bot_name"),
+        "bot_name": bot.get("app_name") or bot.get("bot_name"),
        "bot_open_id": bot.get("open_id"),
    }

@@ -4149,13 +4510,18 @@ def _probe_bot_sdk(app_id: str, app_secret: str, domain: str) -> Optional[dict]:
    """Probe bot info using lark_oapi SDK."""
    try:
        client = _build_onboard_client(app_id, app_secret, domain)
-        resp = client.request(
-            method="GET",
-            url="/open-apis/bot/v3/info",
-            body=None,
-            raw_response=True,
+        req = (
+            BaseRequest.builder()
+            .http_method(HttpMethod.GET)
+            .uri("/open-apis/bot/v3/info")
+            .token_types({AccessTokenType.TENANT})
+            .build()
        )
-        return _parse_bot_response(json.loads(resp.content))
+        resp = client.request(req)
+        content = getattr(getattr(resp, "raw", None), "content", None)
+        if content is None:
+            return None
+        return _parse_bot_response(json.loads(content))
    except Exception as exc:
        logger.debug("[Feishu onboard] SDK probe failed: %s", exc)
        return None
@@ -825,7 +825,7 @@ class MatrixAdapter(BasePlatformAdapter):


    async def edit_message(
-        self, chat_id: str, message_id: str, content: str
+        self, chat_id: str, message_id: str, content: str, *, finalize: bool = False
    ) -> SendResult:
        """Edit an existing message (via m.replace)."""

@@ -304,7 +304,7 @@ class MattermostAdapter(BasePlatformAdapter):
        )

    async def edit_message(
-        self, chat_id: str, message_id: str, content: str
+        self, chat_id: str, message_id: str, content: str, *, finalize: bool = False
    ) -> SendResult:
        """Edit an existing post."""
        formatted = self.format_message(content)
@@ -410,7 +410,6 @@ class MattermostAdapter(BasePlatformAdapter):
            logger.warning("Mattermost: blocked unsafe URL (SSRF protection)")
            return await self.send(chat_id, f"{caption or ''}\n{url}".strip(), reply_to)

-        import asyncio
        import aiohttp

        last_exc = None
@@ -26,9 +26,8 @@ from .adapter import (  # noqa: F401
 # -- Onboard (QR-code scan-to-configure) -----------------------------------
 from .onboard import (  # noqa: F401
    BindStatus,
-    create_bind_task,
-    poll_bind_result,
    build_connect_url,
+    qr_register,
 )
 from .crypto import decrypt_secret, generate_bind_key  # noqa: F401

@@ -44,9 +43,8 @@ __all__ = [
    "_ssrf_redirect_guard",
    # onboard
    "BindStatus",
-    "create_bind_task",
-    "poll_bind_result",
    "build_connect_url",
+    "qr_register",
    # crypto
    "decrypt_secret",
    "generate_bind_key",
@@ -535,6 +535,9 @@ class QQAdapter(BasePlatformAdapter):
                    quick_disconnect_count = 0
                else:
                    backoff_idx += 1
+                    if backoff_idx >= MAX_RECONNECT_ATTEMPTS:
+                        logger.error("[%s] Max reconnect attempts reached (QQCloseError)", self._log_tag)
+                        return

            except Exception as exc:
                if not self._running:
@@ -1086,11 +1089,8 @@ class QQAdapter(BasePlatformAdapter):
            return MessageType.VIDEO
        if "image" in first_type or "photo" in first_type:
            return MessageType.PHOTO
-        # Unknown content type with an attachment — don't assume PHOTO
-        # to prevent non-image files from being sent to vision analysis.
        logger.debug(
-            "[%s] Unknown media content_type '%s', defaulting to TEXT",
-            self._log_tag,
+            "Unknown media content_type '%s', defaulting to TEXT",
            first_type,
        )
        return MessageType.TEXT
@@ -1826,14 +1826,12 @@ class QQAdapter(BasePlatformAdapter):
            body["file_name"] = file_name

        # Retry transient upload failures
-        last_exc = None
        for attempt in range(3):
            try:
                return await self._api_request(
                    "POST", path, body, timeout=FILE_UPLOAD_TIMEOUT
                )
            except RuntimeError as exc:
-                last_exc = exc
                err_msg = str(exc)
                if any(
                        kw in err_msg
@@ -1842,8 +1840,8 @@ class QQAdapter(BasePlatformAdapter):
                    raise
                if attempt < 2:
                    await asyncio.sleep(1.5 * (attempt + 1))
-
-        raise last_exc  # type: ignore[misc]
+                else:
+                    raise

    # Maximum time (seconds) to wait for reconnection before giving up on send.
    _RECONNECT_WAIT_SECONDS = 15.0
@@ -1,6 +1,10 @@
 """
 QQBot scan-to-configure (QR code onboard) module.

+Mirrors the Feishu onboarding pattern: synchronous HTTP + a single public
+entry-point ``qr_register()`` that handles the full flow (create task →
+display QR code → poll → decrypt credentials).
+
 Calls the ``q.qq.com`` ``create_bind_task`` / ``poll_bind_result`` APIs to
 generate a QR-code URL and poll for scan completion.  On success the caller
 receives the bot's *app_id*, *client_secret* (decrypted locally), and the
@@ -12,18 +16,20 @@ Reference: https://bot.q.qq.com/wiki/develop/api-v2/
 from __future__ import annotations

 import logging
+import time
 from enum import IntEnum
-from typing import Tuple
+from typing import Optional, Tuple
 from urllib.parse import quote

 from .constants import (
    ONBOARD_API_TIMEOUT,
    ONBOARD_CREATE_PATH,
+    ONBOARD_POLL_INTERVAL,
    ONBOARD_POLL_PATH,
    PORTAL_HOST,
    QR_URL_TEMPLATE,
 )
-from .crypto import generate_bind_key
+from .crypto import decrypt_secret, generate_bind_key
 from .utils import get_api_headers

 logger = logging.getLogger(__name__)
@@ -35,7 +41,7 @@ logger = logging.getLogger(__name__)


 class BindStatus(IntEnum):
-    """Status codes returned by ``poll_bind_result``."""
+    """Status codes returned by ``_poll_bind_result``."""

    NONE = 0
    PENDING = 1
@@ -44,18 +50,40 @@ class BindStatus(IntEnum):


 # ---------------------------------------------------------------------------
-# Public API
+# QR rendering
+# ---------------------------------------------------------------------------
+
+try:
+    import qrcode as _qrcode_mod
+except (ImportError, TypeError):
+    _qrcode_mod = None  # type: ignore[assignment]
+
+
+def _render_qr(url: str) -> bool:
+    """Try to render a QR code in the terminal. Returns True if successful."""
+    if _qrcode_mod is None:
+        return False
+    try:
+        qr = _qrcode_mod.QRCode(
+            error_correction=_qrcode_mod.constants.ERROR_CORRECT_M,
+            border=2,
+        )
+        qr.add_data(url)
+        qr.make(fit=True)
+        qr.print_ascii(invert=True)
+        return True
+    except Exception:
+        return False
+
+
+# ---------------------------------------------------------------------------
+# Synchronous HTTP helpers (mirrors Feishu _post_registration pattern)
 # ---------------------------------------------------------------------------


-async def create_bind_task(
-    timeout: float = ONBOARD_API_TIMEOUT,
-) -> Tuple[str, str]:
+def _create_bind_task(timeout: float = ONBOARD_API_TIMEOUT) -> Tuple[str, str]:
    """Create a bind task and return *(task_id, aes_key_base64)*.

-    The AES key is generated locally and sent to the server so it can
-    encrypt the bot credentials before returning them.
-
    Raises:
        RuntimeError: If the API returns a non-zero ``retcode``.
    """
@@ -64,8 +92,8 @@ async def create_bind_task(
    url = f"https://{PORTAL_HOST}{ONBOARD_CREATE_PATH}"
    key = generate_bind_key()

-    async with httpx.AsyncClient(timeout=timeout, follow_redirects=True) as client:
-        resp = await client.post(url, json={"key": key}, headers=get_api_headers())
+    with httpx.Client(timeout=timeout, follow_redirects=True) as client:
+        resp = client.post(url, json={"key": key}, headers=get_api_headers())
        resp.raise_for_status()
        data = resp.json()

@@ -80,7 +108,7 @@ async def create_bind_task(
    return task_id, key


-async def poll_bind_result(
+def _poll_bind_result(
    task_id: str,
    timeout: float = ONBOARD_API_TIMEOUT,
 ) -> Tuple[BindStatus, str, str, str]:
@@ -89,12 +117,6 @@ async def poll_bind_result(
    Returns:
        A 4-tuple of ``(status, bot_appid, bot_encrypt_secret, user_openid)``.

-        * ``bot_encrypt_secret`` is AES-256-GCM encrypted — decrypt it with
-          :func:`~gateway.platforms.qqbot.crypto.decrypt_secret` using the
-          key from :func:`create_bind_task`.
-        * ``user_openid`` is the OpenID of the person who scanned the code
-          (available when ``status == COMPLETED``).
-
    Raises:
        RuntimeError: If the API returns a non-zero ``retcode``.
    """
@@ -102,8 +124,8 @@ async def poll_bind_result(

    url = f"https://{PORTAL_HOST}{ONBOARD_POLL_PATH}"

-    async with httpx.AsyncClient(timeout=timeout, follow_redirects=True) as client:
-        resp = await client.post(url, json={"task_id": task_id}, headers=get_api_headers())
+    with httpx.Client(timeout=timeout, follow_redirects=True) as client:
+        resp = client.post(url, json={"task_id": task_id}, headers=get_api_headers())
        resp.raise_for_status()
        data = resp.json()

@@ -122,3 +144,77 @@ async def poll_bind_result(
 def build_connect_url(task_id: str) -> str:
    """Build the QR-code target URL for a given *task_id*."""
    return QR_URL_TEMPLATE.format(task_id=quote(task_id))
+
+
+# ---------------------------------------------------------------------------
+# Public entry-point
+# ---------------------------------------------------------------------------
+
+_MAX_REFRESHES = 3
+
+
+def qr_register(timeout_seconds: int = 600) -> Optional[dict]:
+    """Run the QQBot scan-to-configure QR registration flow.
+
+    Mirrors ``feishu.qr_register()``: handles create → display → poll →
+    decrypt in one call.  Unexpected errors propagate to the caller.
+
+    :returns:
+        ``{"app_id": ..., "client_secret": ..., "user_openid": ...}`` on
+        success, or ``None`` on failure / expiry / cancellation.
+    """
+    deadline = time.monotonic() + timeout_seconds
+
+    for refresh_count in range(_MAX_REFRESHES + 1):
+        # ── Create bind task ──
+        try:
+            task_id, aes_key = _create_bind_task()
+        except Exception as exc:
+            logger.warning("[QQBot onboard] Failed to create bind task: %s", exc)
+            return None
+
+        url = build_connect_url(task_id)
+
+        # ── Display QR code + URL ──
+        print()
+        if _render_qr(url):
+            print(f"  Scan the QR code above, or open this URL directly:\n  {url}")
+        else:
+            print(f"  Open this URL in QQ on your phone:\n  {url}")
+            print("  Tip: pip install qrcode  to display a scannable QR code here")
+        print()
+
+        # ── Poll loop ──
+        while time.monotonic() < deadline:
+            try:
+                status, app_id, encrypted_secret, user_openid = _poll_bind_result(task_id)
+            except Exception:
+                time.sleep(ONBOARD_POLL_INTERVAL)
+                continue
+
+            if status == BindStatus.COMPLETED:
+                client_secret = decrypt_secret(encrypted_secret, aes_key)
+                print()
+                print(f"  QR scan complete! (App ID: {app_id})")
+                if user_openid:
+                    print(f"  Scanner's OpenID: {user_openid}")
+                return {
+                    "app_id": app_id,
+                    "client_secret": client_secret,
+                    "user_openid": user_openid,
+                }
+
+            if status == BindStatus.EXPIRED:
+                if refresh_count >= _MAX_REFRESHES:
+                    logger.warning("[QQBot onboard] QR code expired %d times — giving up", _MAX_REFRESHES)
+                    return None
+                print(f"\n  QR code expired, refreshing... ({refresh_count + 1}/{_MAX_REFRESHES})")
+                break  # next for-loop iteration creates a new task
+
+            time.sleep(ONBOARD_POLL_INTERVAL)
+        else:
+            # deadline reached without completing
+            logger.warning("[QQBot onboard] Poll timed out after %ds", timeout_seconds)
+            return None
+
+    return None
@@ -18,6 +18,7 @@ import logging
 import os
 import random
 import time
+import uuid
 from datetime import datetime, timezone
 from pathlib import Path
 from typing import Dict, List, Optional, Any
@@ -127,6 +128,27 @@ def _render_mentions(text: str, mentions: list) -> str:
    return text


+def _is_signal_service_id(value: str) -> bool:
+    """Return True if *value* already looks like a Signal service identifier."""
+    if not value:
+        return False
+    if value.startswith("PNI:") or value.startswith("u:"):
+        return True
+    try:
+        uuid.UUID(value)
+        return True
+    except (ValueError, AttributeError, TypeError):
+        return False
+
+
+def _looks_like_e164_number(value: str) -> bool:
+    """Return True for a plausible E.164 phone number."""
+    if not value or not value.startswith("+"):
+        return False
+    digits = value[1:]
+    return digits.isdigit() and 7 <= len(digits) <= 15
+
+
 def check_signal_requirements() -> bool:
    """Check if Signal is configured (has URL and account)."""
    return bool(os.getenv("SIGNAL_HTTP_URL") and os.getenv("SIGNAL_ACCOUNT"))
@@ -179,6 +201,12 @@ class SignalAdapter(BasePlatformAdapter):
        # in Note to Self / self-chat mode (mirrors WhatsApp recentlySentIds)
        self._recent_sent_timestamps: set = set()
        self._max_recent_timestamps = 50
+        # Signal increasingly exposes ACI/PNI UUIDs as stable recipient IDs.
+        # Keep a best-effort mapping so outbound sends can upgrade from a
+        # phone number to the corresponding UUID when signal-cli prefers it.
+        self._recipient_uuid_by_number: Dict[str, str] = {}
+        self._recipient_number_by_uuid: Dict[str, str] = {}
+        self._recipient_cache_lock = asyncio.Lock()

        logger.info("Signal adapter initialized: url=%s account=%s groups=%s",
                     self.http_url, redact_phone(self.account),
@@ -195,31 +223,40 @@ class SignalAdapter(BasePlatformAdapter):
            return False

        # Acquire scoped lock to prevent duplicate Signal listeners for the same phone
+        lock_acquired = False
        try:
            if not self._acquire_platform_lock('signal-phone', self.account, 'Signal account'):
                return False
+            lock_acquired = True
        except Exception as e:
            logger.warning("Signal: Could not acquire phone lock (non-fatal): %s", e)

        self.client = httpx.AsyncClient(timeout=30.0)
-
-        # Health check — verify signal-cli daemon is reachable
        try:
-            resp = await self.client.get(f"{self.http_url}/api/v1/check", timeout=10.0)
-            if resp.status_code != 200:
-                logger.error("Signal: health check failed (status %d)", resp.status_code)
+            # Health check — verify signal-cli daemon is reachable
+            try:
+                resp = await self.client.get(f"{self.http_url}/api/v1/check", timeout=10.0)
+                if resp.status_code != 200:
+                    logger.error("Signal: health check failed (status %d)", resp.status_code)
+                    return False
+            except Exception as e:
+                logger.error("Signal: cannot reach signal-cli at %s: %s", self.http_url, e)
                return False
-        except Exception as e:
-            logger.error("Signal: cannot reach signal-cli at %s: %s", self.http_url, e)
-            return False

-        self._running = True
-        self._last_sse_activity = time.time()
-        self._sse_task = asyncio.create_task(self._sse_listener())
-        self._health_monitor_task = asyncio.create_task(self._health_monitor())
+            self._running = True
+            self._last_sse_activity = time.time()
+            self._sse_task = asyncio.create_task(self._sse_listener())
+            self._health_monitor_task = asyncio.create_task(self._health_monitor())

-        logger.info("Signal: connected to %s", self.http_url)
-        return True
+            logger.info("Signal: connected to %s", self.http_url)
+            return True
+        finally:
+            if not self._running:
+                if self.client:
+                    await self.client.aclose()
+                    self.client = None
+                if lock_acquired:
+                    self._release_platform_lock()

    async def disconnect(self) -> None:
        """Stop SSE listener and clean up."""
@@ -400,6 +437,7 @@ class SignalAdapter(BasePlatformAdapter):
        )
        sender_name = envelope_data.get("sourceName", "")
        sender_uuid = envelope_data.get("sourceUuid", "")
+        self._remember_recipient_identifiers(sender, sender_uuid)

        if not sender:
            logger.debug("Signal: ignoring envelope with no sender")
@@ -518,6 +556,64 @@ class SignalAdapter(BasePlatformAdapter):

        await self.handle_message(event)

+    def _remember_recipient_identifiers(self, number: Optional[str], service_id: Optional[str]) -> None:
+        """Cache any number↔UUID mapping observed from Signal envelopes."""
+        if not number or not service_id or not _is_signal_service_id(service_id):
+            return
+        self._recipient_uuid_by_number[number] = service_id
+        self._recipient_number_by_uuid[service_id] = number
+
+    def _extract_contact_uuid(self, contact: Any, phone_number: str) -> Optional[str]:
+        """Best-effort extraction of a Signal service ID from listContacts output."""
+        if not isinstance(contact, dict):
+            return None
+
+        number = contact.get("number")
+        recipient = contact.get("recipient")
+        service_id = contact.get("uuid") or contact.get("serviceId")
+        if not service_id:
+            profile = contact.get("profile")
+            if isinstance(profile, dict):
+                service_id = profile.get("serviceId") or profile.get("uuid")
+
+        if service_id and _is_signal_service_id(service_id):
+            matches_number = number == phone_number or recipient == phone_number
+            if matches_number:
+                return service_id
+        return None
+
+    async def _resolve_recipient(self, chat_id: str) -> str:
+        """Return the preferred Signal recipient identifier for a direct chat."""
+        if (
+            not chat_id
+            or chat_id.startswith("group:")
+            or _is_signal_service_id(chat_id)
+            or not _looks_like_e164_number(chat_id)
+        ):
+            return chat_id
+
+        cached = self._recipient_uuid_by_number.get(chat_id)
+        if cached:
+            return cached
+
+        async with self._recipient_cache_lock:
+            cached = self._recipient_uuid_by_number.get(chat_id)
+            if cached:
+                return cached
+
+            contacts = await self._rpc("listContacts", {
+                "account": self.account,
+                "allRecipients": True,
+            })
+            if isinstance(contacts, list):
+                for contact in contacts:
+                    number = contact.get("number") if isinstance(contact, dict) else None
+                    service_id = self._extract_contact_uuid(contact, chat_id)
+                    if number and service_id:
+                        self._remember_recipient_identifiers(number, service_id)
+
+            return self._recipient_uuid_by_number.get(chat_id, chat_id)
+
    # ------------------------------------------------------------------
    # Attachment Handling
    # ------------------------------------------------------------------
@@ -633,7 +729,7 @@ class SignalAdapter(BasePlatformAdapter):
        if chat_id.startswith("group:"):
            params["groupId"] = chat_id[6:]
        else:
-            params["recipient"] = [chat_id]
+            params["recipient"] = [await self._resolve_recipient(chat_id)]

        result = await self._rpc("send", params)

@@ -684,7 +780,7 @@ class SignalAdapter(BasePlatformAdapter):
        if chat_id.startswith("group:"):
            params["groupId"] = chat_id[6:]
        else:
-            params["recipient"] = [chat_id]
+            params["recipient"] = [await self._resolve_recipient(chat_id)]

        fails = self._typing_failures.get(chat_id, 0)
        result = await self._rpc(
@@ -745,7 +841,7 @@ class SignalAdapter(BasePlatformAdapter):
        if chat_id.startswith("group:"):
            params["groupId"] = chat_id[6:]
        else:
-            params["recipient"] = [chat_id]
+            params["recipient"] = [await self._resolve_recipient(chat_id)]

        result = await self._rpc("send", params)
        if result is not None:
@@ -784,7 +880,7 @@ class SignalAdapter(BasePlatformAdapter):
        if chat_id.startswith("group:"):
            params["groupId"] = chat_id[6:]
        else:
-            params["recipient"] = [chat_id]
+            params["recipient"] = [await self._resolve_recipient(chat_id)]

        result = await self._rpc("send", params)
        if result is not None:
@@ -38,6 +38,7 @@ from gateway.platforms.base import (
    BasePlatformAdapter,
    MessageEvent,
    MessageType,
+    ProcessingOutcome,
    SendResult,
    SUPPORTED_DOCUMENT_TYPES,
    safe_url_for_log,
@@ -113,6 +114,11 @@ class SlackAdapter(BasePlatformAdapter):
        # Cache for _fetch_thread_context results: cache_key → _ThreadContextCache
        self._thread_context_cache: Dict[str, _ThreadContextCache] = {}
        self._THREAD_CACHE_TTL = 60.0
+        # Track message IDs that should get reaction lifecycle (DMs / @mentions).
+        self._reacting_message_ids: set = set()
+        # Track active assistant thread status indicators so stop_typing can
+        # clear them (chat_id → thread_ts).
+        self._active_status_threads: Dict[str, str] = {}

    async def connect(self) -> bool:
        """Connect to Slack via Socket Mode."""
@@ -150,9 +156,11 @@ class SlackAdapter(BasePlatformAdapter):
            except Exception as e:
                logger.warning("[Slack] Failed to read %s: %s", tokens_file, e)

+        lock_acquired = False
        try:
            if not self._acquire_platform_lock('slack-app-token', app_token, 'Slack app token'):
                return False
+            lock_acquired = True

            # First token is the primary — used for AsyncApp / Socket Mode
            primary_token = bot_tokens[0]
@@ -228,6 +236,9 @@ class SlackAdapter(BasePlatformAdapter):
        except Exception as e:  # pragma: no cover - defensive logging
            logger.error("[Slack] Connection failed: %s", e, exc_info=True)
            return False
+        finally:
+            if lock_acquired and not self._running:
+                self._release_platform_lock()

    async def disconnect(self) -> None:
        """Disconnect from Slack."""
@@ -316,6 +327,8 @@ class SlackAdapter(BasePlatformAdapter):
        chat_id: str,
        message_id: str,
        content: str,
+        *,
+        finalize: bool = False,
    ) -> SendResult:
        """Edit a previously sent Slack message."""
        if not self._app:
@@ -355,6 +368,7 @@ class SlackAdapter(BasePlatformAdapter):
        if not thread_ts:
            return  # Can only set status in a thread context

+        self._active_status_threads[chat_id] = thread_ts
        try:
            await self._get_client(chat_id).assistant_threads_setStatus(
                channel_id=chat_id,
@@ -366,6 +380,22 @@ class SlackAdapter(BasePlatformAdapter):
            # in an assistant-enabled context. Falls back to reactions.
            logger.debug("[Slack] assistant.threads.setStatus failed: %s", e)

+    async def stop_typing(self, chat_id: str) -> None:
+        """Clear the assistant thread status indicator."""
+        if not self._app:
+            return
+        thread_ts = self._active_status_threads.pop(chat_id, None)
+        if not thread_ts:
+            return
+        try:
+            await self._get_client(chat_id).assistant_threads_setStatus(
+                channel_id=chat_id,
+                thread_ts=thread_ts,
+                status="",
+            )
+        except Exception as e:
+            logger.debug("[Slack] assistant.threads.setStatus clear failed: %s", e)
+
    def _dm_top_level_threads_as_sessions(self) -> bool:
        """Whether top-level Slack DMs get per-message session threads.

@@ -577,6 +607,38 @@ class SlackAdapter(BasePlatformAdapter):
            logger.debug("[Slack] reactions.remove failed (%s): %s", emoji, e)
            return False

+    def _reactions_enabled(self) -> bool:
+        """Check if message reactions are enabled via config/env."""
+        return os.getenv("SLACK_REACTIONS", "true").lower() not in ("false", "0", "no")
+
+    async def on_processing_start(self, event: MessageEvent) -> None:
+        """Add an in-progress reaction when message processing begins."""
+        if not self._reactions_enabled():
+            return
+        ts = getattr(event, "message_id", None)
+        if not ts or ts not in self._reacting_message_ids:
+            return
+        channel_id = getattr(event.source, "chat_id", None)
+        if channel_id:
+            await self._add_reaction(channel_id, ts, "eyes")
+
+    async def on_processing_complete(self, event: MessageEvent, outcome: ProcessingOutcome) -> None:
+        """Swap the in-progress reaction for a final success/failure reaction."""
+        if not self._reactions_enabled():
+            return
+        ts = getattr(event, "message_id", None)
+        if not ts or ts not in self._reacting_message_ids:
+            return
+        self._reacting_message_ids.discard(ts)
+        channel_id = getattr(event.source, "chat_id", None)
+        if not channel_id:
+            return
+        await self._remove_reaction(channel_id, ts, "eyes")
+        if outcome == ProcessingOutcome.SUCCESS:
+            await self._add_reaction(channel_id, ts, "white_check_mark")
+        elif outcome == ProcessingOutcome.FAILURE:
+            await self._add_reaction(channel_id, ts, "x")
+
    # ----- User identity resolution -----

    async def _resolve_user_name(self, user_id: str, chat_id: str = "") -> str:
@@ -1206,17 +1268,12 @@ class SlackAdapter(BasePlatformAdapter):
        # Only react when bot is directly addressed (DM or @mention).
        # In listen-all channels (require_mention=false), reacting to every
        # casual message would be noisy.
-        _should_react = is_dm or is_mentioned
-
+        _should_react = (is_dm or is_mentioned) and self._reactions_enabled()
        if _should_react:
-            await self._add_reaction(channel_id, ts, "eyes")
+            self._reacting_message_ids.add(ts)

        await self.handle_message(msg_event)

-        if _should_react:
-            await self._remove_reaction(channel_id, ts, "eyes")
-            await self._add_reaction(channel_id, ts, "white_check_mark")
-
    # ----- Approval button support (Block Kit) -----

    async def send_exec_approval(
@@ -1593,11 +1650,9 @@ class SlackAdapter(BasePlatformAdapter):

    async def _download_slack_file(self, url: str, ext: str, audio: bool = False, team_id: str = "") -> str:
        """Download a Slack file using the bot token for auth, with retry."""
-        import asyncio
        import httpx

        bot_token = self._team_clients[team_id].token if team_id and team_id in self._team_clients else self.config.token
-        last_exc = None

        async with httpx.AsyncClient(timeout=30.0, follow_redirects=True) as client:
            for attempt in range(3):
@@ -1627,7 +1682,6 @@ class SlackAdapter(BasePlatformAdapter):
                        from gateway.platforms.base import cache_image_from_bytes
                        return cache_image_from_bytes(response.content, ext)
                except (httpx.TimeoutException, httpx.HTTPStatusError) as exc:
-                    last_exc = exc
                    if isinstance(exc, httpx.HTTPStatusError) and exc.response.status_code < 429:
                        raise
                    if attempt < 2:
@@ -1636,15 +1690,12 @@ class SlackAdapter(BasePlatformAdapter):
                        await asyncio.sleep(1.5 * (attempt + 1))
                        continue
                    raise
-        raise last_exc

    async def _download_slack_file_bytes(self, url: str, team_id: str = "") -> bytes:
        """Download a Slack file and return raw bytes, with retry."""
-        import asyncio
        import httpx

        bot_token = self._team_clients[team_id].token if team_id and team_id in self._team_clients else self.config.token
-        last_exc = None

        async with httpx.AsyncClient(timeout=30.0, follow_redirects=True) as client:
            for attempt in range(3):
@@ -1656,7 +1707,6 @@ class SlackAdapter(BasePlatformAdapter):
                    response.raise_for_status()
                    return response.content
                except (httpx.TimeoutException, httpx.HTTPStatusError) as exc:
-                    last_exc = exc
                    if isinstance(exc, httpx.HTTPStatusError) and exc.response.status_code < 429:
                        raise
                    if attempt < 2:
@@ -1665,7 +1715,6 @@ class SlackAdapter(BasePlatformAdapter):
                        await asyncio.sleep(1.5 * (attempt + 1))
                        continue
                    raise
-        raise last_exc

    # ── Channel mention gating ─────────────────────────────────────────────

@@ -11,6 +11,7 @@ import asyncio
 import json
 import logging
 import os
+import tempfile
 import html as _html
 import re
 from typing import Dict, List, Optional, Any
@@ -70,8 +71,10 @@ from gateway.platforms.base import (
    SendResult,
    cache_image_from_bytes,
    cache_audio_from_bytes,
+    cache_video_from_bytes,
    cache_document_from_bytes,
    resolve_proxy_url,
+    SUPPORTED_VIDEO_TYPES,
    SUPPORTED_DOCUMENT_TYPES,
    utf16_len,
    _prefix_within_utf16_limit,
@@ -493,6 +496,13 @@ class TelegramAdapter(BasePlatformAdapter):
                    "[%s] DM topic '%s' already exists in chat %s (will be mapped from incoming messages)",
                    self.name, name, chat_id,
                )
+            elif "not a forum" in error_text or "forums_disabled" in error_text:
+                logger.warning(
+                    "[%s] Cannot create DM topic '%s' in chat %s: Topics mode is not enabled. "
+                    "The user must open the DM with this bot in Telegram, tap the bot name "
+                    "at the top, and enable 'Topics' in chat settings before topics can be created.",
+                    self.name, name, chat_id,
+                )
            else:
                logger.warning(
                    "[%s] Failed to create DM topic '%s' in chat %s: %s",
@@ -534,8 +544,23 @@ class TelegramAdapter(BasePlatformAdapter):
                        break

            if changed:
-                with open(config_path, "w") as f:
-                    _yaml.dump(config, f, default_flow_style=False, sort_keys=False)
+                fd, tmp_path = tempfile.mkstemp(
+                    dir=str(config_path.parent),
+                    suffix=".tmp",
+                    prefix=".config_",
+                )
+                try:
+                    with os.fdopen(fd, "w", encoding="utf-8") as f:
+                        _yaml.dump(config, f, default_flow_style=False, sort_keys=False)
+                        f.flush()
+                        os.fsync(f.fileno())
+                    os.replace(tmp_path, config_path)
+                except BaseException:
+                    try:
+                        os.unlink(tmp_path)
+                    except OSError:
+                        pass
+                    raise
                logger.info(
                    "[%s] Persisted thread_id=%s for topic '%s' in config.yaml",
                    self.name, thread_id, topic_name,
@@ -769,8 +794,28 @@ class TelegramAdapter(BasePlatformAdapter):
                # Telegram pushes updates to our HTTP endpoint.  This
                # enables cloud platforms (Fly.io, Railway) to auto-wake
                # suspended machines on inbound HTTP traffic.
+                #
+                # SECURITY: TELEGRAM_WEBHOOK_SECRET is REQUIRED. Without it,
+                # python-telegram-bot passes secret_token=None and the
+                # webhook endpoint accepts any HTTP POST — attackers can
+                # inject forged updates as if from Telegram. Refuse to
+                # start rather than silently run in fail-open mode.
+                # See GHSA-3vpc-7q5r-276h.
                webhook_port = int(os.getenv("TELEGRAM_WEBHOOK_PORT", "8443"))
-                webhook_secret = os.getenv("TELEGRAM_WEBHOOK_SECRET", "").strip() or None
+                webhook_secret = os.getenv("TELEGRAM_WEBHOOK_SECRET", "").strip()
+                if not webhook_secret:
+                    raise RuntimeError(
+                        "TELEGRAM_WEBHOOK_SECRET is required when "
+                        "TELEGRAM_WEBHOOK_URL is set. Without it, the "
+                        "webhook endpoint accepts forged updates from "
+                        "anyone who can reach it — see "
+                        "https://github.com/NousResearch/hermes-agent/"
+                        "security/advisories/GHSA-3vpc-7q5r-276h.\n\n"
+                        "Generate a secret and set it in your .env:\n"
+                        "  export TELEGRAM_WEBHOOK_SECRET=\"$(openssl rand -hex 32)\"\n\n"
+                        "Then register it with Telegram when setting the "
+                        "webhook via setWebhook's secret_token parameter."
+                    )
                from urllib.parse import urlparse
                webhook_path = urlparse(webhook_url).path or "/telegram"

@@ -1081,6 +1126,8 @@ class TelegramAdapter(BasePlatformAdapter):
        chat_id: str,
        message_id: str,
        content: str,
+        *,
+        finalize: bool = False,
    ) -> SendResult:
        """Edit a previously sent Telegram message."""
        if not self._bot:
@@ -1686,7 +1733,6 @@ class TelegramAdapter(BasePlatformAdapter):
            return SendResult(success=False, error="Not connected")
        
        try:
-            import os
            if not os.path.exists(audio_path):
                return SendResult(success=False, error=self._missing_media_path_error("Audio", audio_path))
            
@@ -1735,7 +1781,6 @@ class TelegramAdapter(BasePlatformAdapter):
            return SendResult(success=False, error="Not connected")

        try:
-            import os
            if not os.path.exists(image_path):
                return SendResult(success=False, error=self._missing_media_path_error("Image", image_path))

@@ -2048,7 +2093,7 @@ class TelegramAdapter(BasePlatformAdapter):
            url = m.group(2).replace('\\', '\\\\').replace(')', '\\)')
            return _ph(f'[{display}]({url})')

-        text = re.sub(r'\[([^\]]+)\]\(([^)]+)\)', _convert_link, text)
+        text = re.sub(r'\[([^\]]+)\]\(([^()]*(?:\([^()]*\)[^()]*)*)\)', _convert_link, text)

        # 4) Convert markdown headers (## Title) → bold *Title*
        def _convert_header(m):
@@ -2256,22 +2301,27 @@ class TelegramAdapter(BasePlatformAdapter):

        bot_username = (getattr(self._bot, "username", None) or "").lstrip("@").lower()
        bot_id = getattr(self._bot, "id", None)
+        expected = f"@{bot_username}" if bot_username else None

        def _iter_sources():
            yield getattr(message, "text", None) or "", getattr(message, "entities", None) or []
            yield getattr(message, "caption", None) or "", getattr(message, "caption_entities", None) or []

+        # Telegram parses mentions server-side and emits MessageEntity objects
+        # (type=mention for @username, type=text_mention for @FirstName targeting
+        # a user without a public username). Only those entities are authoritative —
+        # raw substring matches like "foo@hermes_bot.example" are not mentions
+        # (bug #12545). Entities also correctly handle @handles inside URLs, code
+        # blocks, and quoted text, where a regex scan would over-match.
        for source_text, entities in _iter_sources():
-            if bot_username and f"@{bot_username}" in source_text.lower():
-                return True
            for entity in entities:
                entity_type = str(getattr(entity, "type", "")).split(".")[-1].lower()
-                if entity_type == "mention" and bot_username:
+                if entity_type == "mention" and expected:
                    offset = int(getattr(entity, "offset", -1))
                    length = int(getattr(entity, "length", 0))
                    if offset < 0 or length <= 0:
                        continue
-                    if source_text[offset:offset + length].strip().lower() == f"@{bot_username}":
+                    if source_text[offset:offset + length].strip().lower() == expected:
                        return True
                elif entity_type == "text_mention":
                    user = getattr(entity, "user", None)
@@ -2303,10 +2353,16 @@ class TelegramAdapter(BasePlatformAdapter):
        DMs remain unrestricted. Group/supergroup messages are accepted when:
        - the chat is explicitly allowlisted in ``free_response_chats``
        - ``require_mention`` is disabled
-        - the message is a command
        - the message replies to the bot
        - the bot is @mentioned
        - the text/caption matches a configured regex wake-word pattern
+
+        When ``require_mention`` is enabled, slash commands are not given
+        special treatment — they must pass the same mention/reply checks
+        as any other group message.  Users can still trigger commands via
+        the Telegram bot menu (``/command@botname``) or by explicitly
+        mentioning the bot (``@botname /command``), both of which are
+        recognised as mentions by :meth:`_message_mentions_bot`.
        """
        if not self._is_group_chat(message):
            return True
@@ -2321,8 +2377,6 @@ class TelegramAdapter(BasePlatformAdapter):
            return True
        if not self._telegram_require_mention():
            return True
-        if is_command:
-            return True
        if self._is_reply_to_bot(message):
            return True
        if self._message_mentions_bot(message):
@@ -2605,6 +2659,23 @@ class TelegramAdapter(BasePlatformAdapter):
            except Exception as e:
                logger.warning("[Telegram] Failed to cache audio: %s", e, exc_info=True)

+        elif msg.video:
+            try:
+                file_obj = await msg.video.get_file()
+                video_bytes = await file_obj.download_as_bytearray()
+                ext = ".mp4"
+                if getattr(file_obj, "file_path", None):
+                    for candidate in SUPPORTED_VIDEO_TYPES:
+                        if file_obj.file_path.lower().endswith(candidate):
+                            ext = candidate
+                            break
+                cached_path = cache_video_from_bytes(bytes(video_bytes), ext=ext)
+                event.media_urls = [cached_path]
+                event.media_types = [SUPPORTED_VIDEO_TYPES.get(ext, "video/mp4")]
+                logger.info("[Telegram] Cached user video at %s", cached_path)
+            except Exception as e:
+                logger.warning("[Telegram] Failed to cache video: %s", e, exc_info=True)
+
        # Download document files to cache for agent processing
        elif msg.document:
            doc = msg.document
@@ -2621,6 +2692,21 @@ class TelegramAdapter(BasePlatformAdapter):
                    mime_to_ext = {v: k for k, v in SUPPORTED_DOCUMENT_TYPES.items()}
                    ext = mime_to_ext.get(doc.mime_type, "")

+                if not ext and doc.mime_type:
+                    video_mime_to_ext = {v: k for k, v in SUPPORTED_VIDEO_TYPES.items()}
+                    ext = video_mime_to_ext.get(doc.mime_type, "")
+
+                if ext in SUPPORTED_VIDEO_TYPES:
+                    file_obj = await doc.get_file()
+                    video_bytes = await file_obj.download_as_bytearray()
+                    cached_path = cache_video_from_bytes(bytes(video_bytes), ext=ext)
+                    event.media_urls = [cached_path]
+                    event.media_types = [SUPPORTED_VIDEO_TYPES[ext]]
+                    event.message_type = MessageType.VIDEO
+                    logger.info("[Telegram] Cached user video document at %s", cached_path)
+                    await self.handle_message(event)
+                    return
+
                # Check if supported
                if ext not in SUPPORTED_DOCUMENT_TYPES:
                    supported_list = ", ".join(sorted(SUPPORTED_DOCUMENT_TYPES.keys()))
@@ -2759,13 +2845,11 @@ class TelegramAdapter(BasePlatformAdapter):
            logger.info("[Telegram] Analyzing sticker at %s", cached_path)

            from tools.vision_tools import vision_analyze_tool
-            import json as _json
-
            result_json = await vision_analyze_tool(
                image_url=cached_path,
                user_prompt=STICKER_VISION_PROMPT,
            )
-            result = _json.loads(result_json)
+            result = json.loads(result_json)

            if result.get("success"):
                description = result.get("analysis", "a sticker")
@@ -313,24 +313,14 @@ class WebhookAdapter(BasePlatformAdapter):
                {"error": "Payload too large"}, status=413
            )

-        # ── Rate limiting ────────────────────────────────────────
-        now = time.time()
-        window = self._rate_counts.setdefault(route_name, [])
-        window[:] = [t for t in window if now - t < 60]
-        if len(window) >= self._rate_limit:
-            return web.json_response(
-                {"error": "Rate limit exceeded"}, status=429
-            )
-        window.append(now)
-
-        # Read body
+        # Read body (must be done before any validation)
        try:
            raw_body = await request.read()
        except Exception as e:
            logger.error("[webhook] Failed to read body: %s", e)
            return web.json_response({"error": "Bad request"}, status=400)

-        # Validate HMAC signature (skip for INSECURE_NO_AUTH testing mode)
+        # Validate HMAC signature FIRST (skip for INSECURE_NO_AUTH testing mode)
        secret = route_config.get("secret", self._global_secret)
        if secret and secret != _INSECURE_NO_AUTH:
            if not self._validate_signature(request, raw_body, secret):
@@ -341,6 +331,16 @@ class WebhookAdapter(BasePlatformAdapter):
                    {"error": "Invalid signature"}, status=401
                )

+        # ── Rate limiting (after auth) ───────────────────────────
+        now = time.time()
+        window = self._rate_counts.setdefault(route_name, [])
+        window[:] = [t for t in window if now - t < 60]
+        if len(window) >= self._rate_limit:
+            return web.json_response(
+                {"error": "Rate limit exceeded"}, status=429
+            )
+        window.append(now)
+
        # Parse payload
        try:
            payload = json.loads(raw_body)
@@ -508,6 +508,11 @@ class WeComAdapter(BasePlatformAdapter):
        self._remember_chat_req_id(chat_id, self._payload_req_id(payload))

        text, reply_text = self._extract_text(body)
+        # Strip leading @mention in group chats so slash commands like
+        # "@BotName /approve" are correctly recognized as "/approve".
+        # Mirrors what the Telegram adapter does (re.sub @botname).
+        if is_group and text:
+            text = re.sub(r"^@\S+\s*", "", text).strip()
        media_urls, media_types = await self._extract_media(body)
        message_type = self._derive_message_type(body, text, media_types)
        has_reply_context = bool(reply_text and (text or media_urls))
@@ -624,13 +629,16 @@ class WeComAdapter(BasePlatformAdapter):
        msgtype = str(body.get("msgtype") or "").lower()

        if msgtype == "mixed":
-            mixed = body.get("mixed") if isinstance(body.get("mixed"), dict) else {}
-            items = mixed.get("msg_item") if isinstance(mixed.get("msg_item"), list) else []
+            _raw_mixed = body.get("mixed")
+            mixed = _raw_mixed if isinstance(_raw_mixed, dict) else {}
+            _raw_items = mixed.get("msg_item")
+            items = _raw_items if isinstance(_raw_items, list) else []
            for item in items:
                if not isinstance(item, dict):
                    continue
                if str(item.get("msgtype") or "").lower() == "text":
-                    text_block = item.get("text") if isinstance(item.get("text"), dict) else {}
+                    _raw_text = item.get("text")
+                    text_block = _raw_text if isinstance(_raw_text, dict) else {}
                    content = str(text_block.get("content") or "").strip()
                    if content:
                        text_parts.append(content)
@@ -672,8 +680,10 @@ class WeComAdapter(BasePlatformAdapter):
        msgtype = str(body.get("msgtype") or "").lower()

        if msgtype == "mixed":
-            mixed = body.get("mixed") if isinstance(body.get("mixed"), dict) else {}
-            items = mixed.get("msg_item") if isinstance(mixed.get("msg_item"), list) else []
+            _raw_mixed = body.get("mixed")
+            mixed = _raw_mixed if isinstance(_raw_mixed, dict) else {}
+            _raw_items = mixed.get("msg_item")
+            items = _raw_items if isinstance(_raw_items, list) else []
            for item in items:
                if not isinstance(item, dict):
                    continue
@@ -1459,3 +1469,134 @@ class WeComAdapter(BasePlatformAdapter):
            "name": chat_id,
            "type": "group" if chat_id and chat_id.lower().startswith("group") else "dm",
        }
+
+
+# ------------------------------------------------------------------
+# QR code scan flow for obtaining bot credentials
+# ------------------------------------------------------------------
+
+_QR_GENERATE_URL = "https://work.weixin.qq.com/ai/qc/generate"
+_QR_QUERY_URL = "https://work.weixin.qq.com/ai/qc/query_result"
+_QR_CODE_PAGE = "https://work.weixin.qq.com/ai/qc/gen?source=hermes&scode="
+_QR_POLL_INTERVAL = 3  # seconds
+_QR_POLL_TIMEOUT = 300  # 5 minutes
+
+
+def qr_scan_for_bot_info(
+    *,
+    timeout_seconds: int = _QR_POLL_TIMEOUT,
+) -> Optional[Dict[str, str]]:
+    """Run the WeCom QR scan flow to obtain bot_id and secret.
+
+    Fetches a QR code from WeCom, renders it in the terminal, and polls
+    until the user scans it or the timeout expires.
+
+    Returns ``{"bot_id": ..., "secret": ...}`` on success, ``None`` on
+    failure or timeout.
+
+    Note: the ``work.weixin.qq.com/ai/qc/{generate,query_result}`` endpoints
+    used here are not part of WeCom's public developer API — they back the
+    admin-console web UI's bot-creation flow and may change without notice.
+    The same pattern is used by the feishu/dingtalk QR setup wizards.
+    """
+    try:
+        import urllib.request
+        import urllib.parse
+    except ImportError:  # pragma: no cover
+        logger.error("urllib is required for WeCom QR scan")
+        return None
+
+    generate_url = f"{_QR_GENERATE_URL}?source=hermes"
+
+    # ── Step 1: Fetch QR code ──
+    print("  Connecting to WeCom...", end="", flush=True)
+    try:
+        req = urllib.request.Request(generate_url, headers={"User-Agent": "HermesAgent/1.0"})
+        with urllib.request.urlopen(req, timeout=15) as resp:
+            raw = json.loads(resp.read().decode("utf-8"))
+    except Exception as exc:
+        logger.error("WeCom QR: failed to fetch QR code: %s", exc)
+        print(f" failed: {exc}")
+        return None
+
+    data = raw.get("data") or {}
+    scode = str(data.get("scode") or "").strip()
+    auth_url = str(data.get("auth_url") or "").strip()
+
+    if not scode or not auth_url:
+        logger.error("WeCom QR: unexpected response format: %s", raw)
+        print(" failed: unexpected response format")
+        return None
+
+    print(" done.")
+
+    # ── Step 2: Render QR code in terminal ──
+    print()
+    qr_rendered = False
+    try:
+        import qrcode as _qrcode
+        qr = _qrcode.QRCode()
+        qr.add_data(auth_url)
+        qr.make(fit=True)
+        qr.print_ascii(invert=True)
+        qr_rendered = True
+    except ImportError:
+        pass
+    except Exception:
+        pass
+
+    page_url = f"{_QR_CODE_PAGE}{urllib.parse.quote(scode)}"
+    if qr_rendered:
+        print(f"\n  Scan the QR code above, or open this URL directly:\n  {page_url}")
+    else:
+        print(f"  Open this URL in WeCom on your phone:\n\n  {page_url}\n")
+        print("  Tip: pip install qrcode  to display a scannable QR code here next time")
+    print()
+    print("  Fetching configuration results...", end="", flush=True)
+
+    # ── Step 3: Poll for result ──
+    import time
+    deadline = time.time() + timeout_seconds
+    query_url = f"{_QR_QUERY_URL}?scode={urllib.parse.quote(scode)}"
+    poll_count = 0
+
+    while time.time() < deadline:
+        try:
+            req = urllib.request.Request(query_url, headers={"User-Agent": "HermesAgent/1.0"})
+            with urllib.request.urlopen(req, timeout=10) as resp:
+                result = json.loads(resp.read().decode("utf-8"))
+        except Exception as exc:
+            logger.debug("WeCom QR poll error: %s", exc)
+            time.sleep(_QR_POLL_INTERVAL)
+            continue
+
+        poll_count += 1
+        # Print a dot on every poll so progress is visible within 3s.
+        print(".", end="", flush=True)
+
+        result_data = result.get("data") or {}
+        status = str(result_data.get("status") or "").lower()
+
+        if status == "success":
+            print()  # newline after "Fetching configuration results..." dots
+            bot_info = result_data.get("bot_info") or {}
+            bot_id = str(bot_info.get("botid") or bot_info.get("bot_id") or "").strip()
+            secret = str(bot_info.get("secret") or "").strip()
+            if bot_id and secret:
+                return {"bot_id": bot_id, "secret": secret}
+            logger.warning(
+                "WeCom QR: scan reported success but bot_info missing or incomplete: %s",
+                result_data,
+            )
+            print(
+                "  QR scan reported success but no bot credentials were returned.\n"
+                "  This usually means the bot was not actually created on the WeCom side.\n"
+                "  Falling back to manual credential entry."
+            )
+            return None
+
+        time.sleep(_QR_POLL_INTERVAL)
+
+    print()  # newline after dots
+    print(f"  QR scan timed out ({timeout_seconds // 60} minutes). Please try again.")
+    return None
@@ -66,6 +66,37 @@ def _kill_port_process(port: int) -> None:
    except Exception:
        pass

+
+def _terminate_bridge_process(proc, *, force: bool = False) -> None:
+    """Terminate the bridge process using process-tree semantics where possible."""
+    if _IS_WINDOWS:
+        cmd = ["taskkill", "/PID", str(proc.pid), "/T"]
+        if force:
+            cmd.append("/F")
+        try:
+            result = subprocess.run(
+                cmd,
+                capture_output=True,
+                text=True,
+                timeout=10,
+            )
+        except FileNotFoundError:
+            if force:
+                proc.kill()
+            else:
+                proc.terminate()
+            return
+
+        if result.returncode != 0:
+            details = (result.stderr or result.stdout or "").strip()
+            raise OSError(details or f"taskkill failed for PID {proc.pid}")
+        return
+
+    import signal
+
+    sig = signal.SIGTERM if not force else signal.SIGKILL
+    os.killpg(os.getpgid(proc.pid), sig)
+
 import sys
 sys.path.insert(0, str(Path(__file__).resolve().parents[2]))

@@ -118,6 +149,10 @@ class WhatsAppAdapter(BasePlatformAdapter):
    - bridge_script: Path to the Node.js bridge script
    - bridge_port: Port for HTTP communication (default: 3000)
    - session_path: Path to store WhatsApp session data
+    - dm_policy: "open" | "allowlist" | "disabled" — how DMs are handled (default: "open")
+    - allow_from: List of sender IDs allowed in DMs (when dm_policy="allowlist")
+    - group_policy: "open" | "allowlist" | "disabled" — which groups are processed (default: "open")
+    - group_allow_from: List of group JIDs allowed (when group_policy="allowlist")
    """
    
    # WhatsApp message limits — practical UX limit, not protocol max.
@@ -140,6 +175,10 @@ class WhatsAppAdapter(BasePlatformAdapter):
            get_hermes_dir("platforms/whatsapp/session", "whatsapp/session")
        ))
        self._reply_prefix: Optional[str] = config.extra.get("reply_prefix")
+        self._dm_policy = str(config.extra.get("dm_policy") or os.getenv("WHATSAPP_DM_POLICY", "open")).strip().lower()
+        self._allow_from = self._coerce_allow_list(config.extra.get("allow_from") or config.extra.get("allowFrom"))
+        self._group_policy = str(config.extra.get("group_policy") or os.getenv("WHATSAPP_GROUP_POLICY", "open")).strip().lower()
+        self._group_allow_from = self._coerce_allow_list(config.extra.get("group_allow_from") or config.extra.get("groupAllowFrom"))
        self._mention_patterns = self._compile_mention_patterns()
        self._message_queue: asyncio.Queue = asyncio.Queue()
        self._bridge_log_fh = None
@@ -163,6 +202,33 @@ class WhatsAppAdapter(BasePlatformAdapter):
            return {str(part).strip() for part in raw if str(part).strip()}
        return {part.strip() for part in str(raw).split(",") if part.strip()}

+    @staticmethod
+    def _coerce_allow_list(raw) -> set[str]:
+        """Parse allow_from / group_allow_from from config or env var."""
+        if raw is None:
+            return set()
+        if isinstance(raw, list):
+            return {str(part).strip() for part in raw if str(part).strip()}
+        return {part.strip() for part in str(raw).split(",") if part.strip()}
+
+    def _is_dm_allowed(self, sender_id: str) -> bool:
+        """Check whether a DM from the given sender should be processed."""
+        if self._dm_policy == "disabled":
+            return False
+        if self._dm_policy == "allowlist":
+            return sender_id in self._allow_from
+        # "open" — all DMs allowed
+        return True
+
+    def _is_group_allowed(self, chat_id: str) -> bool:
+        """Check whether a group chat should be processed."""
+        if self._group_policy == "disabled":
+            return False
+        if self._group_policy == "allowlist":
+            return chat_id in self._group_allow_from
+        # "open" — all groups allowed
+        return True
+
    def _compile_mention_patterns(self):
        patterns = self.config.extra.get("mention_patterns")
        if patterns is None:
@@ -255,8 +321,18 @@ class WhatsAppAdapter(BasePlatformAdapter):
        return cleaned.strip() or text

    def _should_process_message(self, data: Dict[str, Any]) -> bool:
-        if not data.get("isGroup"):
+        is_group = data.get("isGroup", False)
+        if is_group:
+            chat_id = str(data.get("chatId") or "")
+            if not self._is_group_allowed(chat_id):
+                return False
+        else:
+            sender_id = str(data.get("senderId") or data.get("from") or "")
+            if not self._is_dm_allowed(sender_id):
+                return False
+            # DMs that pass the policy gate are always processed
            return True
+        # Group messages: check mention / free-response settings
        chat_id = str(data.get("chatId") or "")
        if chat_id in self._whatsapp_free_response_chats():
            return True
@@ -289,39 +365,40 @@ class WhatsAppAdapter(BasePlatformAdapter):
        logger.info("[%s] Bridge found at %s", self.name, bridge_path)
        
        # Acquire scoped lock to prevent duplicate sessions
+        lock_acquired = False
        try:
            if not self._acquire_platform_lock('whatsapp-session', str(self._session_path), 'WhatsApp session'):
                return False
+            lock_acquired = True
        except Exception as e:
            logger.warning("[%s] Could not acquire session lock (non-fatal): %s", self.name, e)

-        # Auto-install npm dependencies if node_modules doesn't exist
-        bridge_dir = bridge_path.parent
-        if not (bridge_dir / "node_modules").exists():
-            print(f"[{self.name}] Installing WhatsApp bridge dependencies...")
-            try:
-                install_result = subprocess.run(
-                    ["npm", "install", "--silent"],
-                    cwd=str(bridge_dir),
-                    capture_output=True,
-                    text=True,
-                    timeout=60,
-                )
-                if install_result.returncode != 0:
-                    print(f"[{self.name}] npm install failed: {install_result.stderr}")
-                    return False
-                print(f"[{self.name}] Dependencies installed")
-            except Exception as e:
-                print(f"[{self.name}] Failed to install dependencies: {e}")
-                return False
-        
        try:
+            # Auto-install npm dependencies if node_modules doesn't exist
+            bridge_dir = bridge_path.parent
+            if not (bridge_dir / "node_modules").exists():
+                print(f"[{self.name}] Installing WhatsApp bridge dependencies...")
+                try:
+                    install_result = subprocess.run(
+                        ["npm", "install", "--silent"],
+                        cwd=str(bridge_dir),
+                        capture_output=True,
+                        text=True,
+                        timeout=60,
+                    )
+                    if install_result.returncode != 0:
+                        print(f"[{self.name}] npm install failed: {install_result.stderr}")
+                        return False
+                    print(f"[{self.name}] Dependencies installed")
+                except Exception as e:
+                    print(f"[{self.name}] Failed to install dependencies: {e}")
+                    return False
+
            # Ensure session directory exists
            self._session_path.mkdir(parents=True, exist_ok=True)
            
            # Check if bridge is already running and connected
            import aiohttp
-            import asyncio
            try:
                async with aiohttp.ClientSession() as session:
                    async with session.get(
@@ -452,10 +529,13 @@ class WhatsAppAdapter(BasePlatformAdapter):
            return True
            
        except Exception as e:
-            self._release_platform_lock()
            logger.error("[%s] Failed to start bridge: %s", self.name, e, exc_info=True)
-            self._close_bridge_log()
            return False
+        finally:
+            if not self._running:
+                if lock_acquired:
+                    self._release_platform_lock()
+                self._close_bridge_log()
    
    def _close_bridge_log(self) -> None:
        """Close the bridge log file handle if open."""
@@ -487,22 +567,14 @@ class WhatsAppAdapter(BasePlatformAdapter):
        """Stop the WhatsApp bridge and clean up any orphaned processes."""
        if self._bridge_process:
            try:
-                # Kill the entire process group so child node processes die too
-                import signal
                try:
-                    if _IS_WINDOWS:
-                        self._bridge_process.terminate()
-                    else:
-                        os.killpg(os.getpgid(self._bridge_process.pid), signal.SIGTERM)
+                    _terminate_bridge_process(self._bridge_process, force=False)
                except (ProcessLookupError, PermissionError):
                    self._bridge_process.terminate()
                await asyncio.sleep(1)
                if self._bridge_process.poll() is None:
                    try:
-                        if _IS_WINDOWS:
-                            self._bridge_process.kill()
-                        else:
-                            os.killpg(os.getpgid(self._bridge_process.pid), signal.SIGKILL)
+                        _terminate_bridge_process(self._bridge_process, force=True)
                    except (ProcessLookupError, PermissionError):
                        self._bridge_process.kill()
            except Exception as e:
@@ -655,6 +727,8 @@ class WhatsAppAdapter(BasePlatformAdapter):
        chat_id: str,
        message_id: str,
        content: str,
+        *,
+        finalize: bool = False,
    ) -> SendResult:
        """Edit a previously sent message via the WhatsApp bridge."""
        if not self._running or not self._http_session:
@@ -766,6 +840,17 @@ class WhatsAppAdapter(BasePlatformAdapter):
        """Send a video natively via bridge — plays inline in WhatsApp."""
        return await self._send_media_to_bridge(chat_id, video_path, "video", caption)

+    async def send_voice(
+        self,
+        chat_id: str,
+        audio_path: str,
+        caption: Optional[str] = None,
+        reply_to: Optional[str] = None,
+        **kwargs,
+    ) -> SendResult:
+        """Send an audio file as a WhatsApp voice message via bridge."""
+        return await self._send_media_to_bridge(chat_id, audio_path, "audio", caption)
+
    async def send_document(
        self,
        chat_id: str,
@@ -80,7 +80,7 @@ class SessionSource:
    user_name: Optional[str] = None
    thread_id: Optional[str] = None  # For forum topics, Discord threads, etc.
    chat_topic: Optional[str] = None  # Channel topic/description (Discord, Slack)
-    user_id_alt: Optional[str] = None  # Signal UUID (alternative to phone number)
+    user_id_alt: Optional[str] = None  # Platform-specific stable alt ID (Signal UUID, Feishu union_id)
    chat_id_alt: Optional[str] = None  # Signal group internal ID
    is_bot: bool = False  # True when the message author is a bot/webhook (Discord)
    
@@ -152,6 +152,7 @@ class SessionContext:
    source: SessionSource
    connected_platforms: List[Platform]
    home_channels: Dict[Platform, HomeChannel]
+    shared_multi_user_session: bool = False
    
    # Session metadata
    session_key: str = ""
@@ -166,6 +167,7 @@ class SessionContext:
            "home_channels": {
                p.value: hc.to_dict() for p, hc in self.home_channels.items()
            },
+            "shared_multi_user_session": self.shared_multi_user_session,
            "session_key": self.session_key,
            "session_id": self.session_id,
            "created_at": self.created_at.isoformat() if self.created_at else None,
@@ -240,18 +242,16 @@ def build_session_context_prompt(
        lines.append(f"**Channel Topic:** {context.source.chat_topic}")

    # User identity.
-    # In shared thread sessions (non-DM with thread_id), multiple users
-    # contribute to the same conversation.  Don't pin a single user name
-    # in the system prompt — it changes per-turn and would bust the prompt
-    # cache.  Instead, note that this is a multi-user thread; individual
-    # sender names are prefixed on each user message by the gateway.
-    _is_shared_thread = (
-        context.source.chat_type != "dm"
-        and context.source.thread_id
-    )
-    if _is_shared_thread:
+    # In shared multi-user sessions (shared threads OR shared non-thread groups
+    # when group_sessions_per_user=False), multiple users contribute to the same
+    # conversation.  Don't pin a single user name in the system prompt — it
+    # changes per-turn and would bust the prompt cache.  Instead, note that
+    # this is a multi-user session; individual sender names are prefixed on
+    # each user message by the gateway.
+    if context.shared_multi_user_session:
+        session_label = "Multi-user thread" if context.source.thread_id else "Multi-user session"
        lines.append(
-            "**Session type:** Multi-user thread — messages are prefixed "
+            f"**Session type:** {session_label} — messages are prefixed "
            "with [sender name]. Multiple users may participate."
        )
    elif context.source.user_name:
@@ -467,6 +467,27 @@ class SessionEntry:
        )


+def is_shared_multi_user_session(
+    source: SessionSource,
+    *,
+    group_sessions_per_user: bool = True,
+    thread_sessions_per_user: bool = False,
+) -> bool:
+    """Return True when a non-DM session is shared across participants.
+
+    Mirrors the isolation rules in :func:`build_session_key`:
+      - DMs are never shared.
+      - Threads are shared unless ``thread_sessions_per_user`` is True.
+      - Non-thread group/channel sessions are shared unless
+        ``group_sessions_per_user`` is True (default: True = isolated).
+    """
+    if source.chat_type == "dm":
+        return False
+    if source.thread_id:
+        return not thread_sessions_per_user
+    return not group_sessions_per_user
+
+
 def build_session_key(
    source: SessionSource,
    group_sessions_per_user: bool = True,
@@ -926,12 +947,18 @@ class SessionStore:
                    continue
                # Never prune sessions with an active background process
                # attached — the user may still be waiting on output.
+                # The callback is keyed by session_key (see process_registry.
+                # has_active_for_session); passing session_id here used to
+                # never match, so active sessions got pruned anyway.
                if self._has_active_processes_fn is not None:
                    try:
-                        if self._has_active_processes_fn(entry.session_id):
+                        if self._has_active_processes_fn(entry.session_key):
                            continue
-                    except Exception:
-                        pass
+                    except Exception as exc:
+                        logger.debug(
+                            "has_active_processes_fn raised during prune for %s: %s",
+                            entry.session_key, exc,
+                        )
                if entry.updated_at < cutoff:
                    removed_keys.append(key)
            for key in removed_keys:
@@ -1120,6 +1147,10 @@ class SessionStore:
                    tool_name=message.get("tool_name"),
                    tool_calls=message.get("tool_calls"),
                    tool_call_id=message.get("tool_call_id"),
+                    reasoning=message.get("reasoning") if message.get("role") == "assistant" else None,
+                    reasoning_content=message.get("reasoning_content") if message.get("role") == "assistant" else None,
+                    reasoning_details=message.get("reasoning_details") if message.get("role") == "assistant" else None,
+                    codex_reasoning_items=message.get("codex_reasoning_items") if message.get("role") == "assistant" else None,
                )
            except Exception as e:
                logger.debug("Session DB operation failed: %s", e)
@@ -1149,6 +1180,7 @@ class SessionStore:
                        tool_calls=msg.get("tool_calls"),
                        tool_call_id=msg.get("tool_call_id"),
                        reasoning=msg.get("reasoning") if role == "assistant" else None,
+                        reasoning_content=msg.get("reasoning_content") if role == "assistant" else None,
                        reasoning_details=msg.get("reasoning_details") if role == "assistant" else None,
                        codex_reasoning_items=msg.get("codex_reasoning_items") if role == "assistant" else None,
                    )
@@ -1232,6 +1264,11 @@ def build_session_context(
        source=source,
        connected_platforms=connected,
        home_channels=home_channels,
+        shared_multi_user_session=is_shared_multi_user_session(
+            source,
+            group_sessions_per_user=getattr(config, "group_sessions_per_user", True),
+            thread_sessions_per_user=getattr(config, "thread_sessions_per_user", False),
+        ),
    )
    
    if session_entry:
@@ -56,6 +56,12 @@ _SESSION_USER_ID: ContextVar = ContextVar("HERMES_SESSION_USER_ID", default=_UNS
 _SESSION_USER_NAME: ContextVar = ContextVar("HERMES_SESSION_USER_NAME", default=_UNSET)
 _SESSION_KEY: ContextVar = ContextVar("HERMES_SESSION_KEY", default=_UNSET)

+# Cron auto-delivery vars — set per-job in run_job() so concurrent jobs
+# don't clobber each other's delivery targets.
+_CRON_AUTO_DELIVER_PLATFORM: ContextVar = ContextVar("HERMES_CRON_AUTO_DELIVER_PLATFORM", default=_UNSET)
+_CRON_AUTO_DELIVER_CHAT_ID: ContextVar = ContextVar("HERMES_CRON_AUTO_DELIVER_CHAT_ID", default=_UNSET)
+_CRON_AUTO_DELIVER_THREAD_ID: ContextVar = ContextVar("HERMES_CRON_AUTO_DELIVER_THREAD_ID", default=_UNSET)
+
 _VAR_MAP = {
    "HERMES_SESSION_PLATFORM": _SESSION_PLATFORM,
    "HERMES_SESSION_CHAT_ID": _SESSION_CHAT_ID,
@@ -64,6 +70,9 @@ _VAR_MAP = {
    "HERMES_SESSION_USER_ID": _SESSION_USER_ID,
    "HERMES_SESSION_USER_NAME": _SESSION_USER_NAME,
    "HERMES_SESSION_KEY": _SESSION_KEY,
+    "HERMES_CRON_AUTO_DELIVER_PLATFORM": _CRON_AUTO_DELIVER_PLATFORM,
+    "HERMES_CRON_AUTO_DELIVER_CHAT_ID": _CRON_AUTO_DELIVER_CHAT_ID,
+    "HERMES_CRON_AUTO_DELIVER_THREAD_ID": _CRON_AUTO_DELIVER_THREAD_ID,
 }


@@ -22,11 +22,18 @@ from pathlib import Path
 from hermes_constants import get_hermes_home
 from typing import Any, Optional

+if sys.platform == "win32":
+    import msvcrt
+else:
+    import fcntl
+
 _GATEWAY_KIND = "hermes-gateway"
 _RUNTIME_STATUS_FILE = "gateway_state.json"
 _LOCKS_DIRNAME = "gateway-locks"
 _IS_WINDOWS = sys.platform == "win32"
 _UNSET = object()
+_GATEWAY_LOCK_FILENAME = "gateway.lock"
+_gateway_lock_handle = None


 def _get_pid_path() -> Path:
@@ -35,6 +42,14 @@ def _get_pid_path() -> Path:
    return home / "gateway.pid"


+def _get_gateway_lock_path(pid_path: Optional[Path] = None) -> Path:
+    """Return the path to the runtime gateway lock file."""
+    if pid_path is not None:
+        return pid_path.with_name(_GATEWAY_LOCK_FILENAME)
+    home = get_hermes_home()
+    return home / _GATEWAY_LOCK_FILENAME
+
+
 def _get_runtime_status_path() -> Path:
    """Return the persisted runtime health/status file path."""
    return _get_pid_path().with_name(_RUNTIME_STATUS_FILE)
@@ -98,6 +113,11 @@ def _get_process_start_time(pid: int) -> Optional[int]:
        return None


+def get_process_start_time(pid: int) -> Optional[int]:
+    """Public wrapper for retrieving a process start time when available."""
+    return _get_process_start_time(pid)
+
+
 def _read_process_cmdline(pid: int) -> Optional[str]:
    """Return the process command line as a space-separated string."""
    cmdline_path = Path(f"/proc/{pid}/cmdline")
@@ -121,6 +141,7 @@ def _looks_like_gateway_process(pid: int) -> bool:
        "hermes_cli.main gateway",
        "hermes_cli/main.py gateway",
        "hermes gateway",
+        "hermes-gateway",
        "gateway/run.py",
    )
    return any(pattern in cmdline for pattern in patterns)
@@ -212,21 +233,160 @@ def _read_pid_record(pid_path: Optional[Path] = None) -> Optional[dict]:
    return None


+def _read_gateway_lock_record(lock_path: Optional[Path] = None) -> Optional[dict[str, Any]]:
+    return _read_pid_record(lock_path or _get_gateway_lock_path())
+
+
+def _pid_from_record(record: Optional[dict[str, Any]]) -> Optional[int]:
+    if not record:
+        return None
+    try:
+        return int(record["pid"])
+    except (KeyError, TypeError, ValueError):
+        return None
+
+
 def _cleanup_invalid_pid_path(pid_path: Path, *, cleanup_stale: bool) -> None:
+    """Delete a stale gateway PID file (and its sibling lock metadata).
+
+    Called from ``get_running_pid()`` after the runtime lock has already been
+    confirmed inactive, so the on-disk metadata is known to belong to a dead
+    process.  Unlike ``remove_pid_file()`` (which defensively refuses to delete
+    a PID file whose ``pid`` field differs from ``os.getpid()`` to protect
+    ``--replace`` handoffs), this path force-unlinks both files so the next
+    startup sees a clean slate.
+    """
    if not cleanup_stale:
        return
    try:
-        if pid_path == _get_pid_path():
-            remove_pid_file()
-        else:
-            pid_path.unlink(missing_ok=True)
+        pid_path.unlink(missing_ok=True)
+    except Exception:
+        pass
+    try:
+        _get_gateway_lock_path(pid_path).unlink(missing_ok=True)
    except Exception:
        pass


+def _write_gateway_lock_record(handle) -> None:
+    handle.seek(0)
+    handle.truncate()
+    json.dump(_build_pid_record(), handle)
+    handle.flush()
+    try:
+        os.fsync(handle.fileno())
+    except OSError:
+        pass
+
+
+def _try_acquire_file_lock(handle) -> bool:
+    try:
+        if _IS_WINDOWS:
+            handle.seek(0, os.SEEK_END)
+            if handle.tell() == 0:
+                handle.write("\n")
+                handle.flush()
+            handle.seek(0)
+            msvcrt.locking(handle.fileno(), msvcrt.LK_NBLCK, 1)
+        else:
+            fcntl.flock(handle.fileno(), fcntl.LOCK_EX | fcntl.LOCK_NB)
+        return True
+    except (BlockingIOError, OSError):
+        return False
+
+
+def _release_file_lock(handle) -> None:
+    try:
+        if _IS_WINDOWS:
+            handle.seek(0)
+            msvcrt.locking(handle.fileno(), msvcrt.LK_UNLCK, 1)
+        else:
+            fcntl.flock(handle.fileno(), fcntl.LOCK_UN)
+    except OSError:
+        pass
+
+
+def acquire_gateway_runtime_lock() -> bool:
+    """Claim the cross-process runtime lock for the gateway.
+
+    Unlike the PID file, the lock is owned by the live process itself. If the
+    process dies abruptly, the OS releases the lock automatically.
+    """
+    global _gateway_lock_handle
+    if _gateway_lock_handle is not None:
+        return True
+
+    path = _get_gateway_lock_path()
+    path.parent.mkdir(parents=True, exist_ok=True)
+    handle = open(path, "a+", encoding="utf-8")
+    if not _try_acquire_file_lock(handle):
+        handle.close()
+        return False
+    _write_gateway_lock_record(handle)
+    _gateway_lock_handle = handle
+    return True
+
+
+def release_gateway_runtime_lock() -> None:
+    """Release the gateway runtime lock when owned by this process."""
+    global _gateway_lock_handle
+    handle = _gateway_lock_handle
+    if handle is None:
+        return
+    _gateway_lock_handle = None
+    _release_file_lock(handle)
+    try:
+        handle.close()
+    except OSError:
+        pass
+
+
+def is_gateway_runtime_lock_active(lock_path: Optional[Path] = None) -> bool:
+    """Return True when some process currently owns the gateway runtime lock."""
+    global _gateway_lock_handle
+    resolved_lock_path = lock_path or _get_gateway_lock_path()
+    if _gateway_lock_handle is not None and resolved_lock_path == _get_gateway_lock_path():
+        return True
+
+    if not resolved_lock_path.exists():
+        return False
+
+    handle = open(resolved_lock_path, "a+", encoding="utf-8")
+    try:
+        if _try_acquire_file_lock(handle):
+            _release_file_lock(handle)
+            return False
+        return True
+    finally:
+        try:
+            handle.close()
+        except OSError:
+            pass
+
+
 def write_pid_file() -> None:
-    """Write the current process PID and metadata to the gateway PID file."""
-    _write_json_file(_get_pid_path(), _build_pid_record())
+    """Write the current process PID and metadata to the gateway PID file.
+
+    Uses atomic O_CREAT | O_EXCL creation so that concurrent --replace
+    invocations race: exactly one process wins and the rest get
+    FileExistsError.
+    """
+    path = _get_pid_path()
+    path.parent.mkdir(parents=True, exist_ok=True)
+    record = json.dumps(_build_pid_record())
+    try:
+        fd = os.open(path, os.O_CREAT | os.O_EXCL | os.O_WRONLY)
+    except FileExistsError:
+        raise  # Let caller decide: another gateway is racing us
+    try:
+        with os.fdopen(fd, "w", encoding="utf-8") as f:
+            f.write(record)
+    except Exception:
+        try:
+            path.unlink(missing_ok=True)
+        except OSError:
+            pass
+        raise


 def write_runtime_status(
@@ -341,7 +501,8 @@ def acquire_scoped_lock(scope: str, identity: str, metadata: Optional[dict[str,
        if not stale:
            try:
                os.kill(existing_pid, 0)
-            except (ProcessLookupError, PermissionError):
+            except (ProcessLookupError, PermissionError, OSError):
+                # Windows raises OSError with WinError 87 for invalid pid check
                stale = True
            else:
                current_start = _get_process_start_time(existing_pid)
@@ -406,17 +567,43 @@ def release_scoped_lock(scope: str, identity: str) -> None:
        pass


-def release_all_scoped_locks() -> int:
-    """Remove all scoped lock files in the lock directory.
+def release_all_scoped_locks(
+    *,
+    owner_pid: Optional[int] = None,
+    owner_start_time: Optional[int] = None,
+) -> int:
+    """Remove scoped lock files in the lock directory.

    Called during --replace to clean up stale locks left by stopped/killed
-    gateway processes that did not release their locks gracefully.
+    gateway processes that did not release their locks gracefully. When an
+    ``owner_pid`` is provided, only lock records belonging to that gateway
+    process are removed. ``owner_start_time`` further narrows the match to
+    protect against PID reuse.
+
+    When no owner is provided, preserves the legacy behavior and removes every
+    scoped lock file in the directory.
+
    Returns the number of lock files removed.
    """
    lock_dir = _get_lock_dir()
    removed = 0
    if lock_dir.exists():
        for lock_file in lock_dir.glob("*.lock"):
+            if owner_pid is not None:
+                record = _read_json_file(lock_file)
+                if not isinstance(record, dict):
+                    continue
+                try:
+                    record_pid = int(record.get("pid"))
+                except (TypeError, ValueError):
+                    continue
+                if record_pid != owner_pid:
+                    continue
+                if (
+                    owner_start_time is not None
+                    and record.get("start_time") != owner_start_time
+                ):
+                    continue
            try:
                lock_file.unlink(missing_ok=True)
                removed += 1
@@ -563,35 +750,46 @@ def get_running_pid(
    Cleans up stale PID files automatically.
    """
    resolved_pid_path = pid_path or _get_pid_path()
-    record = _read_pid_record(resolved_pid_path)
-    if not record:
+    resolved_lock_path = _get_gateway_lock_path(resolved_pid_path)
+    lock_active = is_gateway_runtime_lock_active(resolved_lock_path)
+    if not lock_active:
        _cleanup_invalid_pid_path(resolved_pid_path, cleanup_stale=cleanup_stale)
        return None

-    try:
-        pid = int(record["pid"])
-    except (KeyError, TypeError, ValueError):
-        _cleanup_invalid_pid_path(resolved_pid_path, cleanup_stale=cleanup_stale)
-        return None
+    primary_record = _read_pid_record(resolved_pid_path)
+    fallback_record = _read_gateway_lock_record(resolved_lock_path)

-    try:
-        os.kill(pid, 0)  # signal 0 = existence check, no actual signal sent
-    except (ProcessLookupError, PermissionError):
-        _cleanup_invalid_pid_path(resolved_pid_path, cleanup_stale=cleanup_stale)
-        return None
+    for record in (primary_record, fallback_record):
+        pid = _pid_from_record(record)
+        if pid is None:
+            continue

-    recorded_start = record.get("start_time")
-    current_start = _get_process_start_time(pid)
-    if recorded_start is not None and current_start is not None and current_start != recorded_start:
-        _cleanup_invalid_pid_path(resolved_pid_path, cleanup_stale=cleanup_stale)
-        return None
+        try:
+            os.kill(pid, 0)  # signal 0 = existence check, no actual signal sent
+        except ProcessLookupError:
+            continue
+        except PermissionError:
+            # The process exists but belongs to another user/service scope.
+            # With the runtime lock still held, prefer keeping it visible
+            # rather than deleting the PID file as "stale".
+            if _record_looks_like_gateway(record):
+                return pid
+            continue
+        except OSError:
+            # Windows raises OSError with WinError 87 for an invalid pid
+            # (process is definitely gone). Treat as "process doesn't exist".
+            continue

-    if not _looks_like_gateway_process(pid):
-        if not _record_looks_like_gateway(record):
-            _cleanup_invalid_pid_path(resolved_pid_path, cleanup_stale=cleanup_stale)
-            return None
+        recorded_start = record.get("start_time")
+        current_start = _get_process_start_time(pid)
+        if recorded_start is not None and current_start is not None and current_start != recorded_start:
+            continue

-    return pid
+        if _looks_like_gateway_process(pid) or _record_looks_like_gateway(record):
+            return pid
+
+    _cleanup_invalid_pid_path(resolved_pid_path, cleanup_stale=cleanup_stale)
+    return None


 def is_gateway_running(
@@ -11,5 +11,5 @@ Provides subcommands for:
 - hermes cron          - Manage cron jobs
 """

-__version__ = "0.10.0"
-__release_date__ = "2026.4.16"
+__version__ = "0.11.0"
+__release_date__ = "2026.4.23"
@@ -20,6 +20,7 @@ import logging
 import os
 import shutil
 import shlex
+import ssl
 import stat
 import base64
 import hashlib
@@ -71,6 +72,8 @@ DEFAULT_QWEN_BASE_URL = "https://portal.qwen.ai/v1"
 DEFAULT_GITHUB_MODELS_BASE_URL = "https://api.githubcopilot.com"
 DEFAULT_COPILOT_ACP_BASE_URL = "acp://copilot"
 DEFAULT_OLLAMA_CLOUD_BASE_URL = "https://ollama.com/v1"
+STEPFUN_STEP_PLAN_INTL_BASE_URL = "https://api.stepfun.ai/step_plan/v1"
+STEPFUN_STEP_PLAN_CN_BASE_URL = "https://api.stepfun.com/step_plan/v1"
 CODEX_OAUTH_CLIENT_ID = "app_EMoamEEZ73f0CkXaXp7hrann"
 CODEX_OAUTH_TOKEN_URL = "https://auth.openai.com/oauth/token"
 CODEX_ACCESS_TOKEN_REFRESH_SKEW_SECONDS = 120
@@ -151,7 +154,7 @@ PROVIDER_REGISTRY: Dict[str, ProviderConfig] = {
        id="gemini",
        name="Google AI Studio",
        auth_type="api_key",
-        inference_base_url="https://generativelanguage.googleapis.com/v1beta/openai",
+        inference_base_url="https://generativelanguage.googleapis.com/v1beta",
        api_key_env_vars=("GOOGLE_API_KEY", "GEMINI_API_KEY"),
        base_url_env_var="GEMINI_BASE_URL",
    ),
@@ -167,8 +170,11 @@ PROVIDER_REGISTRY: Dict[str, ProviderConfig] = {
        id="kimi-coding",
        name="Kimi / Moonshot",
        auth_type="api_key",
+        # Legacy platform.moonshot.ai keys use this endpoint (OpenAI-compat).
+        # sk-kimi- (Kimi Code) keys are auto-redirected to api.kimi.com/coding
+        # by _resolve_kimi_base_url() below.
        inference_base_url="https://api.moonshot.ai/v1",
-        api_key_env_vars=("KIMI_API_KEY",),
+        api_key_env_vars=("KIMI_API_KEY", "KIMI_CODING_API_KEY"),
        base_url_env_var="KIMI_BASE_URL",
    ),
    "kimi-coding-cn": ProviderConfig(
@@ -178,6 +184,14 @@ PROVIDER_REGISTRY: Dict[str, ProviderConfig] = {
        inference_base_url="https://api.moonshot.cn/v1",
        api_key_env_vars=("KIMI_CN_API_KEY",),
    ),
+    "stepfun": ProviderConfig(
+        id="stepfun",
+        name="StepFun Step Plan",
+        auth_type="api_key",
+        inference_base_url=STEPFUN_STEP_PLAN_INTL_BASE_URL,
+        api_key_env_vars=("STEPFUN_API_KEY",),
+        base_url_env_var="STEPFUN_BASE_URL",
+    ),
    "arcee": ProviderConfig(
        id="arcee",
        name="Arcee AI",
@@ -200,6 +214,7 @@ PROVIDER_REGISTRY: Dict[str, ProviderConfig] = {
        auth_type="api_key",
        inference_base_url="https://api.anthropic.com",
        api_key_env_vars=("ANTHROPIC_API_KEY", "ANTHROPIC_TOKEN", "CLAUDE_CODE_OAUTH_TOKEN"),
+        base_url_env_var="ANTHROPIC_BASE_URL",
    ),
    "alibaba": ProviderConfig(
        id="alibaba",
@@ -339,10 +354,16 @@ def get_anthropic_key() -> str:
 # =============================================================================

 # Kimi Code (kimi.com/code) issues keys prefixed "sk-kimi-" that only work
-# on api.kimi.com/coding/v1.  Legacy keys from platform.moonshot.ai work on
-# api.moonshot.ai/v1 (the default).  Auto-detect when user hasn't set
+# on api.kimi.com/coding.  Legacy keys from platform.moonshot.ai work on
+# api.moonshot.ai/v1 (the old default).  Auto-detect when user hasn't set
 # KIMI_BASE_URL explicitly.
-KIMI_CODE_BASE_URL = "https://api.kimi.com/coding/v1"
+#
+# Note: the base URL intentionally has NO /v1 suffix.  The /coding endpoint
+# speaks the Anthropic Messages protocol, and the anthropic SDK appends
+# "/v1/messages" internally — so "/coding" + SDK suffix → "/coding/v1/messages"
+# (the correct target). Using "/coding/v1" here would produce
+# "/coding/v1/v1/messages" (a 404).
+KIMI_CODE_BASE_URL = "https://api.kimi.com/coding"


 def _resolve_kimi_base_url(api_key: str, default_url: str, env_override: str) -> str:
@@ -353,6 +374,9 @@ def _resolve_kimi_base_url(api_key: str, default_url: str, env_override: str) ->
    """
    if env_override:
        return env_override
+    # No key → nothing to infer from.  Return default without inspecting.
+    if not api_key:
+        return default_url
    if api_key.startswith("sk-kimi-"):
        return KIMI_CODE_BASE_URL
    return default_url
@@ -480,6 +504,14 @@ def _resolve_zai_base_url(api_key: str, default_url: str, env_override: str) ->
    if env_override:
        return env_override

+    # No API key set → don't probe (would fire N×M HTTPS requests with an
+    # empty Bearer token, all returning 401).  This path is hit during
+    # auxiliary-client auto-detection when the user has no Z.AI credentials
+    # at all — the caller discards the result immediately, so the probe is
+    # pure latency for every AIAgent construction.
+    if not api_key:
+        return default_url
+
    # Check provider-state cache for a previously-detected endpoint.
    auth_store = _load_auth_store()
    state = _load_provider_state(auth_store, "zai") or {}
@@ -587,7 +619,25 @@ def _oauth_trace(event: str, *, sequence_id: Optional[str] = None, **fields: Any
 # =============================================================================

 def _auth_file_path() -> Path:
-    return get_hermes_home() / "auth.json"
+    path = get_hermes_home() / "auth.json"
+    # Seat belt: if pytest is running and HERMES_HOME resolves to the real
+    # user's auth store, refuse rather than silently corrupt it. This catches
+    # tests that forgot to monkeypatch HERMES_HOME, tests invoked without the
+    # hermetic conftest, or sandbox escapes via threads/subprocesses. In
+    # production (no PYTEST_CURRENT_TEST) this is a single dict lookup.
+    if os.environ.get("PYTEST_CURRENT_TEST"):
+        real_home_auth = (Path.home() / ".hermes" / "auth.json").resolve(strict=False)
+        try:
+            resolved = path.resolve(strict=False)
+        except Exception:
+            resolved = path
+        if resolved == real_home_auth:
+            raise RuntimeError(
+                f"Refusing to touch real user auth store during test run: {path}. "
+                "Set HERMES_HOME to a tmp_path in your test fixture, or run "
+                "via scripts/run_tests.sh for hermetic CI-parity env."
+            )
+    return path


 def _auth_lock_path() -> Path:
@@ -971,6 +1021,7 @@ def resolve_provider(
        "x-ai": "xai", "x.ai": "xai", "grok": "xai",
        "kimi": "kimi-coding", "kimi-for-coding": "kimi-coding", "moonshot": "kimi-coding",
        "kimi-cn": "kimi-coding-cn", "moonshot-cn": "kimi-coding-cn",
+        "step": "stepfun", "stepfun-coding-plan": "stepfun",
        "arcee-ai": "arcee", "arceeai": "arcee",
        "minimax-china": "minimax-cn", "minimax_cn": "minimax-cn",
        "claude": "anthropic", "claude-code": "anthropic",
@@ -1652,7 +1703,7 @@ def _resolve_verify(
    insecure: Optional[bool] = None,
    ca_bundle: Optional[str] = None,
    auth_state: Optional[Dict[str, Any]] = None,
-) -> bool | str:
+) -> bool | ssl.SSLContext:
    tls_state = auth_state.get("tls") if isinstance(auth_state, dict) else {}
    tls_state = tls_state if isinstance(tls_state, dict) else {}

@@ -1672,13 +1723,12 @@ def _resolve_verify(
    if effective_ca:
        ca_path = str(effective_ca)
        if not os.path.isfile(ca_path):
-            import logging
-            logging.getLogger("hermes.auth").warning(
+            logger.warning(
                "CA bundle path does not exist: %s — falling back to default certificates",
                ca_path,
            )
            return True
-        return ca_path
+        return ssl.create_default_context(cafile=ca_path)
    return True


@@ -2721,6 +2771,17 @@ def _update_config_for_provider(
        # Clear stale base_url to prevent contamination when switching providers
        model_cfg.pop("base_url", None)

+    # Clear stale api_key/api_mode left over from a previous custom provider.
+    # When the user switches from e.g. a MiniMax custom endpoint
+    # (api_mode=anthropic_messages, api_key=mxp-...) to a built-in provider
+    # (e.g. OpenRouter), the stale api_key/api_mode would override the new
+    # provider's credentials and transport choice.  Built-in providers that
+    # need a specific api_mode (copilot, xai) set it at request-resolution
+    # time via `_copilot_runtime_api_mode` / `_detect_api_mode_for_url`, so
+    # removing the persisted value here is safe.
+    model_cfg.pop("api_key", None)
+    model_cfg.pop("api_mode", None)
+
    # When switching to a non-OpenRouter provider, ensure model.default is
    # valid for the new provider.  An OpenRouter-formatted name like
    # "anthropic/claude-opus-4.6" will fail on direct-API providers.
@@ -2760,6 +2821,7 @@ def _prompt_model_selection(
    pricing: Optional[Dict[str, Dict[str, str]]] = None,
    unavailable_models: Optional[List[str]] = None,
    portal_url: str = "",
+    allow_custom = True
 ) -> Optional[str]:
    """Interactive model selection. Puts current_model first with a marker. Returns chosen model ID or None.

@@ -2848,8 +2910,16 @@ def _prompt_model_selection(
        from simple_term_menu import TerminalMenu

        choices = [f"  {_label(mid)}" for mid in ordered]
-        choices.append("  Enter custom model name")
-        choices.append("  Skip (keep current)")
+
+        custom_idx = None
+        if allow_custom:
+            custom_idx = len(choices)
+            choices.append("  Enter custom model name")
+
+        skip_idx = None
+        if current_model:
+            skip_idx = len(choices)
+            choices.append("  Skip (keep current)")

        # Print the unavailable block BEFORE the menu via regular print().
        # simple_term_menu pads title lines to terminal width (causes wrapping),
@@ -2886,21 +2956,29 @@ def _prompt_model_selection(
        print()
        if idx < len(ordered):
            return ordered[idx]
-        elif idx == len(ordered):
+        if idx == custom_idx:
            custom = input("Enter model name: ").strip()
            return custom if custom else None
+        if idx == skip_idx:
+            return None
        return None
    except (ImportError, NotImplementedError, OSError, subprocess.SubprocessError):
        pass

    # Fallback: numbered list
    print(menu_title)
-    num_width = len(str(len(ordered) + 2))
+    n = len(ordered)
+    extra = []
+    if allow_custom:
+        extra.append("Enter custom model name")
+    if current_model:
+        extra.append("Skip (keep current)")
+    total = n + len(extra)
+    num_width = len(str(total))
    for i, mid in enumerate(ordered, 1):
        print(f"  {i:>{num_width}}. {_label(mid)}")
-    n = len(ordered)
-    print(f"  {n + 1:>{num_width}}. Enter custom model name")
-    print(f"  {n + 2:>{num_width}}. Skip (keep current)")
+    for j, label in enumerate(extra, n + 1):
+        print(f"  {j:>{num_width}}. {label}")

    if _unavailable:
        _upgrade_url = (portal_url or DEFAULT_NOUS_PORTAL_URL).rstrip("/")
@@ -2912,18 +2990,19 @@ def _prompt_model_selection(

    while True:
        try:
-            choice = input(f"Choice [1-{n + 2}] (default: skip): ").strip()
+            choice = input(f"Choice [1-{total}]: ").strip()
            if not choice:
                return None
-            idx = int(choice)
-            if 1 <= idx <= n:
-                return ordered[idx - 1]
-            elif idx == n + 1:
-                custom = input("Enter model name: ").strip()
-                return custom if custom else None
-            elif idx == n + 2:
-                return None
-            print(f"Please enter 1-{n + 2}")
+            val = int(choice)
+            if 1 <= val <= n:
+                return ordered[val - 1]
+            extra_idx = val - n - 1
+            if 0 <= extra_idx < len(extra):
+                if extra[extra_idx] == "Enter custom model name":
+                    custom = input("Enter model name: ").strip()
+                    return custom if custom else None
+                return None  # skip
+            print(f"Please enter 1-{total}")
        except ValueError:
            print("Please enter a number")
        except (KeyboardInterrupt, EOFError):
@@ -3199,7 +3278,6 @@ def _nous_device_code_login(
        open_browser = False

    print(f"Starting Hermes login via {pconfig.name}...")
-    print(f"Portal: {portal_base_url}")
    if insecure:
        print("TLS verification: disabled (--insecure)")
    elif ca_bundle:
@@ -3219,19 +3297,18 @@ def _nous_device_code_login(
        interval = int(device_data["interval"])

        print()
-        print("To continue:")
-        print(f"  1. Open: {verification_url}")
-        print(f"  2. If prompted, enter code: {user_code}")
-
        if open_browser:
            opened = webbrowser.open(verification_url)
            if opened:
-                print("  (Opened browser for verification)")
+                print("If you don't see a browser window open, navigate to this URL:")
            else:
-                print("  Could not open browser automatically — use the URL above.")
+                print("Navigate to this URL to continue:")
+        print(verification_url)
+        print(f"If you're prompted for a code, use {user_code}")
+        print()

        effective_interval = max(1, min(interval, DEVICE_AUTH_POLL_INTERVAL_CAP_SECONDS))
-        print(f"Waiting for approval (polling every {effective_interval}s)...")
+        print(f"Waiting for approval (checking every {effective_interval}s)...")

        token_data = _poll_for_token(
            client=client,
@@ -3296,7 +3373,7 @@ def _nous_device_code_login(
        raise


-def _login_nous(args, pconfig: ProviderConfig) -> None:
+def login_nous(args, pconfig: ProviderConfig) -> None:
    """Nous Portal device authorization flow."""
    timeout_seconds = getattr(args, "timeout", None) or 15.0
    insecure = bool(getattr(args, "insecure", False))
@@ -3353,29 +3430,34 @@ def _login_nous(args, pconfig: ProviderConfig) -> None:
                )

            from hermes_cli.models import (
-                _PROVIDER_MODELS, get_pricing_for_provider, filter_nous_free_models,
+                _PROVIDER_MODELS, get_pricing_for_provider,
                check_nous_free_tier, partition_nous_models_by_tier,
            )
            model_ids = _PROVIDER_MODELS.get("nous", [])

+            _portal = auth_state.get("portal_base_url", "")
+
            print()
+
            unavailable_models: list = []
            if model_ids:
                pricing = get_pricing_for_provider("nous")
-                model_ids = filter_nous_free_models(model_ids, pricing)
                free_tier = check_nous_free_tier()
                if free_tier:
                    model_ids, unavailable_models = partition_nous_models_by_tier(
                        model_ids, pricing, free_tier=True,
                    )
-            _portal = auth_state.get("portal_base_url", "")
-            if model_ids:
-                print(f"Showing {len(model_ids)} curated models — use \"Enter custom model name\" for others.")
-                selected_model = _prompt_model_selection(
-                    model_ids, pricing=pricing,
-                    unavailable_models=unavailable_models,
-                    portal_url=_portal,
-                )
+                if not free_tier:
+                    print(f"Showing {len(model_ids)} curated models — use \"Enter custom model name\" for others.")
+                if len(model_ids) > 1:
+                    selected_model = _prompt_model_selection(
+                        model_ids, pricing=pricing,
+                        unavailable_models=unavailable_models,
+                        portal_url=_portal,
+                        allow_custom=not free_tier
+                    )
+                else:
+                    selected_model = model_ids[0]
            elif unavailable_models:
                _url = (_portal or DEFAULT_NOUS_PORTAL_URL).rstrip("/")
                print("No free models currently available.")
@@ -152,6 +152,23 @@ def auth_add_command(args) -> None:

    pool = load_pool(provider)

+    # Clear ALL suppressions for this provider — re-adding a credential is
+    # a strong signal the user wants auth re-enabled.  This covers env:*
+    # (shell-exported vars), gh_cli (copilot), claude_code, qwen-cli,
+    # device_code (codex), etc.  One consistent re-engagement pattern.
+    # Matches the Codex device_code re-link pattern that predates this.
+    if not provider.startswith(CUSTOM_POOL_PREFIX):
+        try:
+            from hermes_cli.auth import (
+                _load_auth_store,
+                unsuppress_credential_source,
+            )
+            suppressed = _load_auth_store().get("suppressed_sources", {})
+            for src in list(suppressed.get(provider, []) or []):
+                unsuppress_credential_source(provider, src)
+        except Exception:
+            pass
+
    if requested_type == AUTH_TYPE_API_KEY:
        token = (getattr(args, "api_key", None) or "").strip()
        if not token:
@@ -338,71 +355,28 @@ def auth_remove_command(args) -> None:
        raise SystemExit(f'No credential matching "{target}" for provider {provider}.')
    print(f"Removed {provider} credential #{index} ({removed.label})")

-    # If this was an env-seeded credential, also clear the env var from .env
-    # so it doesn't get re-seeded on the next load_pool() call.
-    if removed.source.startswith("env:"):
-        env_var = removed.source[len("env:"):]
-        if env_var:
-            from hermes_cli.config import remove_env_value
-            cleared = remove_env_value(env_var)
-            if cleared:
-                print(f"Cleared {env_var} from .env")
+    # Unified removal dispatch.  Every credential source Hermes reads from
+    # (env vars, external OAuth files, auth.json blocks, custom config)
+    # has a RemovalStep registered in agent.credential_sources.  The step
+    # handles its source-specific cleanup and we centralise suppression +
+    # user-facing output here so every source behaves identically from
+    # the user's perspective.
+    from agent.credential_sources import find_removal_step
+    from hermes_cli.auth import suppress_credential_source

-    # If this was a singleton-seeded credential (OAuth device_code, hermes_pkce),
-    # clear the underlying auth store / credential file so it doesn't get
-    # re-seeded on the next load_pool() call.
-    elif provider == "openai-codex" and (
-        removed.source == "device_code" or removed.source.endswith(":device_code")
-    ):
-        # Codex tokens live in TWO places: the Hermes auth store and
-        # ~/.codex/auth.json (the Codex CLI shared file).  On every refresh,
-        # refresh_codex_oauth_pure() writes to both.  So clearing only the
-        # Hermes auth store is not enough — _seed_from_singletons() will
-        # auto-import from ~/.codex/auth.json on the next load_pool() and
-        # the removal is instantly undone.  Mark the source as suppressed
-        # so auto-import is skipped; leave ~/.codex/auth.json untouched so
-        # the Codex CLI itself keeps working.
-        from hermes_cli.auth import (
-            _load_auth_store, _save_auth_store, _auth_store_lock,
-            suppress_credential_source,
-        )
-        with _auth_store_lock():
-            auth_store = _load_auth_store()
-            providers_dict = auth_store.get("providers")
-            if isinstance(providers_dict, dict) and provider in providers_dict:
-                del providers_dict[provider]
-                _save_auth_store(auth_store)
-                print(f"Cleared {provider} OAuth tokens from auth store")
-        suppress_credential_source(provider, "device_code")
-        print("Suppressed openai-codex device_code source — it will not be re-seeded.")
-        print("Note: Codex CLI credentials still live in ~/.codex/auth.json")
-        print("Run `hermes auth add openai-codex` to re-enable if needed.")
+    step = find_removal_step(provider, removed.source)
+    if step is None:
+        # Unregistered source — e.g. "manual", which has nothing external
+        # to clean up.  The pool entry is already gone; we're done.
+        return

-    elif removed.source == "device_code" and provider == "nous":
-        from hermes_cli.auth import (
-            _load_auth_store, _save_auth_store, _auth_store_lock,
-        )
-        with _auth_store_lock():
-            auth_store = _load_auth_store()
-            providers_dict = auth_store.get("providers")
-            if isinstance(providers_dict, dict) and provider in providers_dict:
-                del providers_dict[provider]
-                _save_auth_store(auth_store)
-                print(f"Cleared {provider} OAuth tokens from auth store")
-
-    elif removed.source == "hermes_pkce" and provider == "anthropic":
-        from hermes_constants import get_hermes_home
-        oauth_file = get_hermes_home() / ".anthropic_oauth.json"
-        if oauth_file.exists():
-            oauth_file.unlink()
-            print("Cleared Hermes Anthropic OAuth credentials")
-
-    elif removed.source == "claude_code" and provider == "anthropic":
-        from hermes_cli.auth import suppress_credential_source
-        suppress_credential_source(provider, "claude_code")
-        print("Suppressed claude_code credential — it will not be re-seeded.")
-        print("Note: Claude Code credentials still live in ~/.claude/.credentials.json")
-        print("Run `hermes auth add anthropic` to re-enable if needed.")
+    result = step.remove_fn(provider, removed)
+    for line in result.cleaned:
+        print(line)
+    if result.suppress:
+        suppress_credential_source(provider, removed.source)
+    for line in result.hints:
+        print(line)


 def auth_reset_command(args) -> None:
@@ -201,7 +201,7 @@ def run_backup(args) -> None:
                else:
                    zf.write(abs_path, arcname=str(rel_path))
                    total_bytes += abs_path.stat().st_size
-            except (PermissionError, OSError) as exc:
+            except (PermissionError, OSError, ValueError) as exc:
                errors.append(f"  {rel_path}: {exc}")
                continue

@@ -249,7 +249,7 @@ def _scan_workspace_state(source_dir: Path) -> list[tuple[Path, str]]:
            state_path = child / state_name
            if state_path.exists():
                kind = "directory" if state_path.is_dir() else "file"
-                rel = state_path.relative_to(source_dir)
+                rel = state_path.relative_to(source_dir).as_posix()
                findings.append((state_path, f"Workspace {kind}: {rel}"))

    return findings
@@ -12,6 +12,7 @@ import os
 logger = logging.getLogger(__name__)

 DEFAULT_CODEX_MODELS: List[str] = [
+    "gpt-5.5",
    "gpt-5.4-mini",
    "gpt-5.4",
    "gpt-5.3-codex",
@@ -21,10 +22,10 @@ DEFAULT_CODEX_MODELS: List[str] = [
 ]

 _FORWARD_COMPAT_TEMPLATE_MODELS: List[tuple[str, tuple[str, ...]]] = [
+    ("gpt-5.5", ("gpt-5.4", "gpt-5.4-mini", "gpt-5.3-codex")),
    ("gpt-5.4-mini", ("gpt-5.3-codex", "gpt-5.2-codex")),
    ("gpt-5.4", ("gpt-5.3-codex", "gpt-5.2-codex")),
    ("gpt-5.3-codex", ("gpt-5.2-codex",)),
-    ("gpt-5.3-codex-spark", ("gpt-5.3-codex", "gpt-5.2-codex")),
 ]


@@ -260,6 +260,26 @@ GATEWAY_KNOWN_COMMANDS: frozenset[str] = frozenset(
 )


+def is_gateway_known_command(name: str | None) -> bool:
+    """Return True if ``name`` resolves to a gateway-dispatchable slash command.
+
+    This covers both built-in commands (``GATEWAY_KNOWN_COMMANDS`` derived
+    from ``COMMAND_REGISTRY``) and plugin-registered commands, which are
+    looked up lazily so importing this module never forces plugin
+    discovery. Gateway code uses this to decide whether to emit
+    ``command:<name>`` hooks — plugin commands get the same lifecycle
+    events as built-ins.
+    """
+    if not name:
+        return False
+    if name in GATEWAY_KNOWN_COMMANDS:
+        return True
+    for plugin_name, _description, _args_hint in _iter_plugin_command_entries():
+        if plugin_name == name:
+            return True
+    return False
+
+
 # Commands with explicit Level-2 running-agent handlers in gateway/run.py.
 # Listed here for introspection / tests; semantically a subset of
 # "all resolvable commands" — which is the real bypass set (see
@@ -371,12 +391,47 @@ def gateway_help_lines() -> list[str]:
    return lines


+def _iter_plugin_command_entries() -> list[tuple[str, str, str]]:
+    """Yield (name, description, args_hint) tuples for all plugin slash commands.
+
+    Plugin commands are registered via
+    :func:`hermes_cli.plugins.PluginContext.register_command`. They behave
+    like ``CommandDef`` entries for gateway surfacing: they appear in the
+    Telegram command menu, in Slack's ``/hermes`` subcommand mapping, and
+    (via :func:`gateway.platforms.discord._register_slash_commands`) in
+    Discord's native slash command picker.
+
+    Lookup is lazy so importing this module never forces plugin discovery
+    (which can trigger filesystem scans and environment-dependent
+    behavior).
+    """
+    try:
+        from hermes_cli.plugins import get_plugin_commands
+    except Exception:
+        return []
+    try:
+        commands = get_plugin_commands() or {}
+    except Exception:
+        return []
+    entries: list[tuple[str, str, str]] = []
+    for name, meta in commands.items():
+        if not isinstance(name, str) or not isinstance(meta, dict):
+            continue
+        description = str(meta.get("description") or f"Run /{name}")
+        args_hint = str(meta.get("args_hint") or "").strip()
+        entries.append((name, description, args_hint))
+    return entries
+
+
 def telegram_bot_commands() -> list[tuple[str, str]]:
    """Return (command_name, description) pairs for Telegram setMyCommands.

    Telegram command names cannot contain hyphens, so they are replaced with
    underscores.  Aliases are skipped -- Telegram shows one menu entry per
    canonical command.
+
+    Plugin-registered slash commands are included so plugins get native
+    autocomplete in Telegram without touching core code.
    """
    overrides = _resolve_config_gates()
    result: list[tuple[str, str]] = []
@@ -386,6 +441,10 @@ def telegram_bot_commands() -> list[tuple[str, str]]:
        tg_name = _sanitize_telegram_name(cmd.name)
        if tg_name:
            result.append((tg_name, cmd.description))
+    for name, description, _args_hint in _iter_plugin_command_entries():
+        tg_name = _sanitize_telegram_name(name)
+        if tg_name:
+            result.append((tg_name, description))
    return result


@@ -497,9 +556,8 @@ def _collect_gateway_skill_entries(
    # --- Tier 1: Plugin slash commands (never trimmed) ---------------------
    plugin_pairs: list[tuple[str, str]] = []
    try:
-        from hermes_cli.plugins import get_plugin_manager
-        pm = get_plugin_manager()
-        plugin_cmds = getattr(pm, "_plugin_commands", {})
+        from hermes_cli.plugins import get_plugin_commands
+        plugin_cmds = get_plugin_commands()
        for cmd_name in sorted(plugin_cmds):
            name = sanitize_name(cmd_name) if sanitize_name else cmd_name
            if not name:
@@ -751,6 +809,9 @@ def slack_subcommand_map() -> dict[str, str]:

    Maps both canonical names and aliases so /hermes bg do stuff works
    the same as /hermes background do stuff.
+
+    Plugin-registered slash commands are included so ``/hermes <plugin-cmd>``
+    routes through the plugin handler.
    """
    overrides = _resolve_config_gates()
    mapping: dict[str, str] = {}
@@ -760,6 +821,9 @@ def slack_subcommand_map() -> dict[str, str]:
        mapping[cmd.name] = f"/{cmd.name}"
        for alias in cmd.aliases:
            mapping[alias] = f"/{alias}"
+    for name, _description, _args_hint in _iter_plugin_command_entries():
+        if name not in mapping:
+            mapping[name] = f"/{name}"
    return mapping


@@ -925,12 +989,22 @@ class SlashCommandCompleter(Completer):
                    display_meta=meta,
                )

-        # If the user typed @file: or @folder:, delegate to path completions
+        # If the user typed @file: / @folder: (or just @file / @folder with
+        # no colon yet), delegate to path completions.  Accepting the bare
+        # form lets the picker surface directories as soon as the user has
+        # typed `@folder`, without requiring them to first accept the static
+        # `@folder:` hint and re-trigger completion.
        for prefix in ("@file:", "@folder:"):
-            if word.startswith(prefix):
-                path_part = word[len(prefix):] or "."
+            bare = prefix[:-1]
+
+            if word == bare or word.startswith(prefix):
+                want_dir = prefix == "@folder:"
+                path_part = '' if word == bare else word[len(prefix):]
                expanded = os.path.expanduser(path_part)
-                if expanded.endswith("/"):
+
+                if not expanded or expanded == ".":
+                    search_dir, match_prefix = ".", ""
+                elif expanded.endswith("/"):
                    search_dir, match_prefix = expanded, ""
                else:
                    search_dir = os.path.dirname(expanded) or "."
@@ -946,15 +1020,21 @@ class SlashCommandCompleter(Completer):
                for entry in sorted(entries):
                    if match_prefix and not entry.lower().startswith(prefix_lower):
                        continue
-                    if count >= limit:
-                        break
                    full_path = os.path.join(search_dir, entry)
                    is_dir = os.path.isdir(full_path)
+                    # `@folder:` must only surface directories; `@file:` only
+                    # regular files.  Without this filter `@folder:` listed
+                    # every .env / .gitignore in the cwd, defeating the
+                    # explicit prefix and confusing users expecting a
+                    # directory picker.
+                    if want_dir != is_dir:
+                        continue
+                    if count >= limit:
+                        break
                    display_path = os.path.relpath(full_path)
                    suffix = "/" if is_dir else ""
-                    kind = "folder" if is_dir else "file"
                    meta = "dir" if is_dir else _file_size_label(full_path)
-                    completion = f"@{kind}:{display_path}{suffix}"
+                    completion = f"{prefix}{display_path}{suffix}"
                    yield Completion(
                        completion,
                        start_position=-len(word),
@@ -13,6 +13,7 @@ This module provides:
 """

 import copy
+import logging
 import os
 import platform
 import re
@@ -24,6 +25,7 @@ from dataclasses import dataclass
 from pathlib import Path
 from typing import Dict, Any, Optional, List, Tuple

+logger = logging.getLogger(__name__)

 _IS_WINDOWS = platform.system() == "Windows"
 _ENV_VAR_NAME_RE = re.compile(r"^[A-Za-z_][A-Za-z0-9_]*$")
@@ -359,6 +361,15 @@ DEFAULT_CONFIG = {
        # to finish, then interrupts any remaining runs after the timeout.
        # 0 = no drain, interrupt immediately.
        "restart_drain_timeout": 60,
+        # Max app-level retry attempts for API errors (connection drops,
+        # provider timeouts, 5xx, etc.) before the agent surfaces the
+        # failure.  The OpenAI SDK already does its own low-level retries
+        # (max_retries=2 default) for transient network errors; this is
+        # the Hermes-level retry loop that wraps the whole call.  Lower
+        # this to 1 if you use fallback providers and want fast failover
+        # on flaky primaries; raise it if you prefer to tolerate longer
+        # provider hiccups on a single provider.
+        "api_max_retries": 3,
        "service_tier": "",
        # Tool-use enforcement: injects system prompt guidance that tells the
        # model to actually call tools instead of describing intended actions.
@@ -373,7 +384,11 @@ DEFAULT_CONFIG = {
        # Periodic "still working" notification interval (seconds).
        # Sends a status message every N seconds so the user knows the
        # agent hasn't died during long tasks.  0 = disable notifications.
-        "gateway_notify_interval": 600,
+        # Lower values mean faster feedback on slow tasks but more chat
+        # noise; 180s is a compromise that catches spinning weak-model runs
+        # (60+ tool iterations with tiny output) before users assume the
+        # bot is dead and /restart.
+        "gateway_notify_interval": 180,
    },
    
    "terminal": {
@@ -385,6 +400,32 @@ DEFAULT_CONFIG = {
        # (terminal and execute_code).  Skill-declared required_environment_variables
        # are passed through automatically; this list is for non-skill use cases.
        "env_passthrough": [],
+        # Extra files to source in the login shell when building the
+        # per-session environment snapshot.  Use this when tools like nvm,
+        # pyenv, asdf, or custom PATH entries are registered by files that
+        # a bash login shell would skip — most commonly ``~/.bashrc``
+        # (bash doesn't source bashrc in non-interactive login mode) or
+        # zsh-specific files like ``~/.zshrc`` / ``~/.zprofile``.
+        # Paths support ``~`` / ``${VAR}``. Missing files are silently
+        # skipped. When empty, Hermes auto-sources ``~/.profile``,
+        # ``~/.bash_profile``, and ``~/.bashrc`` (in that order) if the
+        # snapshot shell is bash (this is the ``auto_source_bashrc``
+        # behaviour — disable with that key if you want strict login-only
+        # semantics).
+        "shell_init_files": [],
+        # When true (default), Hermes sources the user's shell rc files
+        # (``~/.profile``, ``~/.bash_profile``, ``~/.bashrc``) in the
+        # login shell used to build the environment snapshot. This
+        # captures PATH additions, shell functions, and aliases — which a
+        # plain ``bash -l -c`` would otherwise miss because bash skips
+        # bashrc in non-interactive login mode, and because a default
+        # Debian/Ubuntu ``~/.bashrc`` short-circuits on non-interactive
+        # sources. ``~/.profile`` and ``~/.bash_profile`` are tried first
+        # because ``n`` / ``nvm`` / ``asdf`` installers typically write
+        # their PATH exports there without an interactivity guard. Turn
+        # this off if your rc files misbehave when sourced
+        # non-interactively (e.g. one that hard-exits on TTY checks).
+        "auto_source_bashrc": True,
        "docker_image": "nikolaik/python-nodejs:python3.11-nodejs20",
        "docker_forward_env": [],
        # Explicit environment variables to set inside Docker containers.
@@ -474,13 +515,6 @@ DEFAULT_CONFIG = {
        },
    },

-    "smart_model_routing": {
-        "enabled": False,
-        "max_simple_chars": 160,
-        "max_simple_words": 28,
-        "cheap_model": {},
-    },
-    
    # Auxiliary model config — provider:model for each side task.
    # Format: provider is the provider name, model is the model slug.
    # "auto" for provider = auto-detect best available provider.
@@ -494,6 +528,7 @@ DEFAULT_CONFIG = {
            "base_url": "",        # direct OpenAI-compatible endpoint (takes precedence over provider)
            "api_key": "",         # API key for base_url (falls back to OPENAI_API_KEY)
            "timeout": 120,        # seconds — LLM API call timeout; vision payloads need generous timeout
+            "extra_body": {},      # OpenAI-compatible provider-specific request fields
            "download_timeout": 30,  # seconds — image HTTP download timeout; increase for slow connections
        },
        "web_extract": {
@@ -502,6 +537,7 @@ DEFAULT_CONFIG = {
            "base_url": "",
            "api_key": "",
            "timeout": 360,        # seconds (6min) — per-attempt LLM summarization timeout; increase for slow local models
+            "extra_body": {},
        },
        "compression": {
            "provider": "auto",
@@ -509,6 +545,7 @@ DEFAULT_CONFIG = {
            "base_url": "",
            "api_key": "",
            "timeout": 120,        # seconds — compression summarises large contexts; increase for local models
+            "extra_body": {},
        },
        "session_search": {
            "provider": "auto",
@@ -516,6 +553,8 @@ DEFAULT_CONFIG = {
            "base_url": "",
            "api_key": "",
            "timeout": 30,
+            "extra_body": {},
+            "max_concurrency": 3,  # Clamp parallel summaries to avoid request-burst 429s on small providers
        },
        "skills_hub": {
            "provider": "auto",
@@ -523,6 +562,7 @@ DEFAULT_CONFIG = {
            "base_url": "",
            "api_key": "",
            "timeout": 30,
+            "extra_body": {},
        },
        "approval": {
            "provider": "auto",
@@ -530,6 +570,7 @@ DEFAULT_CONFIG = {
            "base_url": "",
            "api_key": "",
            "timeout": 30,
+            "extra_body": {},
        },
        "mcp": {
            "provider": "auto",
@@ -537,6 +578,7 @@ DEFAULT_CONFIG = {
            "base_url": "",
            "api_key": "",
            "timeout": 30,
+            "extra_body": {},
        },
        "flush_memories": {
            "provider": "auto",
@@ -544,6 +586,7 @@ DEFAULT_CONFIG = {
            "base_url": "",
            "api_key": "",
            "timeout": 30,
+            "extra_body": {},
        },
        "title_generation": {
            "provider": "auto",
@@ -551,6 +594,7 @@ DEFAULT_CONFIG = {
            "base_url": "",
            "api_key": "",
            "timeout": 30,
+            "extra_body": {},
        },
    },
    
@@ -562,9 +606,14 @@ DEFAULT_CONFIG = {
        "bell_on_complete": False,
        "show_reasoning": False,
        "streaming": False,
+        "final_response_markdown": "strip",  # render | strip | raw
        "inline_diffs": True,     # Show inline diff previews for write actions (write_file, patch, skill_manage)
        "show_cost": False,       # Show $ cost in the status bar (off by default)
        "skin": "default",
+        "user_message_preview": {  # CLI: how many submitted user-message lines to echo back in scrollback
+            "first_lines": 2,
+            "last_lines": 2,
+        },
        "interim_assistant_messages": True,  # Gateway: show natural mid-turn assistant status messages
        "tool_progress_command": False,  # Enable /verbose command in messaging gateway
        "tool_progress_overrides": {},  # DEPRECATED — use display.platforms instead
@@ -583,6 +632,10 @@ DEFAULT_CONFIG = {
    },
    
    # Text-to-speech configuration
+    # Each provider supports an optional `max_text_length:` override for the
+    # per-request input-character cap. Omit it to use the provider's documented
+    # limit (OpenAI 4096, xAI 15000, MiniMax 10000, ElevenLabs 5k-40k model-aware,
+    # Gemini 5000, Edge 5000, Mistral 4000, NeuTTS/KittenTTS 2000).
    "tts": {
        "provider": "edge",  # "edge" (free) | "elevenlabs" (premium) | "openai" | "xai" | "minimax" | "mistral" | "neutts" (local)
        "edge": {
@@ -635,6 +688,7 @@ DEFAULT_CONFIG = {
        "record_key": "ctrl+b",
        "max_recording_seconds": 120,
        "auto_tts": False,
+        "beep_enabled": True,         # Play record start/stop beeps in CLI voice mode
        "silence_threshold": 200,     # RMS below this = silence (0-32767)
        "silence_duration": 3.0,      # Seconds of silence before auto-stop
    },
@@ -677,10 +731,26 @@ DEFAULT_CONFIG = {
        "provider": "",    # e.g. "openrouter" (empty = inherit parent provider + credentials)
        "base_url": "",    # direct OpenAI-compatible endpoint for subagents
        "api_key": "",     # API key for delegation.base_url (falls back to OPENAI_API_KEY)
+        # When delegate_task narrows child toolsets explicitly, preserve any
+        # MCP toolsets the parent already has enabled. On by default so
+        # narrowing (e.g. toolsets=["web","browser"]) expresses "I want these
+        # extras" without silently stripping MCP tools the parent already has.
+        # Set to false for strict intersection.
+        "inherit_mcp_toolsets": True,
        "max_iterations": 50,  # per-subagent iteration cap (each subagent gets its own budget,
                               # independent of the parent's max_iterations)
+        "child_timeout_seconds": 600,  # wall-clock timeout for each child agent (floor 30s,
+                                       # no ceiling). High-reasoning models on large tasks
+                                       # (e.g. gpt-5.5 xhigh, opus-4.6) need generous budgets;
+                                       # raise if children time out before producing output.
        "reasoning_effort": "",  # reasoning effort for subagents: "xhigh", "high", "medium",
                                 # "low", "minimal", "none" (empty = inherit parent's level)
+        "max_concurrent_children": 3,  # max parallel children per batch; floor of 1 enforced, no ceiling
+        # Orchestrator role controls (see tools/delegate_tool.py:_get_max_spawn_depth
+        # and _get_orchestrator_enabled).  Values are clamped to [1, 3] with a
+        # warning log if out of range.
+        "max_spawn_depth": 1,        # depth cap (1 = flat [default], 2 = orchestrator→leaf, 3 = three-level)
+        "orchestrator_enabled": True,  # kill switch for role="orchestrator"
    },

    # Ephemeral prefill messages file — JSON list of {role, content} dicts
@@ -693,6 +763,31 @@ DEFAULT_CONFIG = {
    # always goes to ~/.hermes/skills/.
    "skills": {
        "external_dirs": [],   # e.g. ["~/.agents/skills", "/shared/team-skills"]
+        # Substitute ${HERMES_SKILL_DIR} and ${HERMES_SESSION_ID} in SKILL.md
+        # content with the absolute skill directory and the active session id
+        # before the agent sees it.  Lets skill authors reference bundled
+        # scripts without the agent having to join paths.
+        "template_vars": True,
+        # Pre-execute inline shell snippets written as !`cmd` in SKILL.md
+        # body.  Their stdout is inlined into the skill message before the
+        # agent reads it, so skills can inject dynamic context (dates, git
+        # state, detected tool versions, …).  Off by default because any
+        # content from the skill author runs on the host without approval;
+        # only enable for skill sources you trust.
+        "inline_shell": False,
+        # Timeout (seconds) for each !`cmd` snippet when inline_shell is on.
+        "inline_shell_timeout": 10,
+        # Run the keyword/pattern security scanner on skills the agent
+        # writes via skill_manage (create/edit/patch).  Off by default
+        # because the agent can already execute the same code paths via
+        # terminal() with no gate, so the scan adds friction (blocks
+        # skills that mention risky keywords in prose) without meaningful
+        # security.  Turn on if you want the belt-and-suspenders — a
+        # dangerous verdict will then surface as a tool error to the
+        # agent, which can retry with the flagged content removed.
+        # External hub installs (trusted/community sources) are always
+        # scanned regardless of this setting.
+        "guard_agent_created": False,
    },

    # Honcho AI-native memory -- reads ~/.honcho/config.json as single source of truth.
@@ -763,6 +858,21 @@ DEFAULT_CONFIG = {
    "command_allowlist": [],
    # User-defined quick commands that bypass the agent loop (type: exec only)
    "quick_commands": {},
+
+    # Shell-script hooks — declarative bridge that invokes shell scripts
+    # on plugin-hook events (pre_tool_call, post_tool_call, pre_llm_call,
+    # subagent_stop, etc.).  Each entry maps an event name to a list of
+    # {matcher, command, timeout} dicts.  First registration of a new
+    # command prompts the user for consent; subsequent runs reuse the
+    # stored approval from ~/.hermes/shell-hooks-allowlist.json.
+    # See `website/docs/user-guide/features/hooks.md` for schema + examples.
+    "hooks": {},
+
+    # Auto-accept shell-hook registrations without a TTY prompt.  Also
+    # toggleable per-invocation via --accept-hooks or HERMES_ACCEPT_HOOKS=1.
+    # Gateway / cron / non-interactive runs need this (or one of the other
+    # channels) to pick up newly-added hooks.
+    "hooks_auto_accept": False,
    # Custom personalities — add your own entries here
    # Supports string format: {"name": "system prompt"}
    # Or dict format: {"name": {"description": "...", "system_prompt": "...", "tone": "...", "style": "..."}}
@@ -770,6 +880,7 @@ DEFAULT_CONFIG = {

    # Pre-exec security scanning via tirith
    "security": {
+        "allow_private_urls": False,  # Allow requests to private/internal IPs (for OpenWrt, proxies, VPNs)
        "redact_secrets": True,
        "tirith_enabled": True,
        "tirith_path": "tirith",
@@ -786,6 +897,11 @@ DEFAULT_CONFIG = {
        # Wrap delivered cron responses with a header (task name) and footer
        # ("The agent cannot see this message").  Set to false for clean output.
        "wrap_response": True,
+        # Maximum number of due jobs to run in parallel per tick.
+        # null/0 = unbounded (limited only by thread count).
+        # 1 = serial (pre-v0.9 behaviour).
+        # Also overridable via HERMES_CRON_MAX_PARALLEL env var.
+        "max_parallel_jobs": None,
    },

    # execute_code settings — controls the tool used for programmatic tool calls.
@@ -818,8 +934,36 @@ DEFAULT_CONFIG = {
        "force_ipv4": False,
    },

+    # Session storage — controls automatic cleanup of ~/.hermes/state.db.
+    # state.db accumulates every session, message, tool call, and FTS5 index
+    # entry forever.  Without auto-pruning, a heavy user (gateway + cron)
+    # reports 384MB+ databases with 68K+ messages, which slows down FTS5
+    # inserts, /resume listing, and insights queries.
+    "sessions": {
+        # When true, prune ended sessions older than retention_days once
+        # per (roughly) min_interval_hours at CLI/gateway/cron startup.
+        # Only touches ended sessions — active sessions are always preserved.
+        # Default false: session history is valuable for search recall, and
+        # silently deleting it could surprise users.  Opt in explicitly.
+        "auto_prune": False,
+        # How many days of ended-session history to keep.  Matches the
+        # default of ``hermes sessions prune``.
+        "retention_days": 90,
+        # VACUUM after a prune that actually deleted rows.  SQLite does not
+        # reclaim disk space on DELETE — freed pages are just reused on
+        # subsequent INSERTs — so without VACUUM the file stays bloated
+        # even after pruning.  VACUUM blocks writes for a few seconds per
+        # 100MB, so it only runs at startup, and only when prune deleted
+        # ≥1 session.
+        "vacuum_after_prune": True,
+        # Minimum hours between auto-maintenance runs (avoids repeating
+        # the sweep on every CLI invocation).  Tracked via state_meta in
+        # state.db itself, so it's shared across all processes.
+        "min_interval_hours": 24,
+    },
+
    # Config schema version - bump this when adding new required fields
-    "_config_version": 19,
+    "_config_version": 22,
 }

 # =============================================================================
@@ -975,6 +1119,22 @@ OPTIONAL_ENV_VARS = {
        "category": "provider",
        "advanced": True,
    },
+    "STEPFUN_API_KEY": {
+        "description": "StepFun Step Plan API key",
+        "prompt": "StepFun Step Plan API key",
+        "url": "https://platform.stepfun.com/",
+        "password": True,
+        "category": "provider",
+        "advanced": True,
+    },
+    "STEPFUN_BASE_URL": {
+        "description": "StepFun Step Plan base URL override",
+        "prompt": "StepFun Step Plan base URL (leave empty for default)",
+        "url": None,
+        "password": False,
+        "category": "provider",
+        "advanced": True,
+    },
    "ARCEEAI_API_KEY": {
        "description": "Arcee AI API key",
        "prompt": "Arcee AI API key",
@@ -1148,7 +1308,7 @@ OPTIONAL_ENV_VARS = {
        "advanced": True,
    },
    "XIAOMI_API_KEY": {
-        "description": "Xiaomi MiMo API key for MiMo models (mimo-v2-pro, mimo-v2-omni, mimo-v2-flash)",
+        "description": "Xiaomi MiMo API key for MiMo models (mimo-v2.5-pro, mimo-v2.5, mimo-v2-pro, mimo-v2-omni, mimo-v2-flash)",
        "prompt": "Xiaomi MiMo API Key",
        "url": "https://platform.xiaomimimo.com",
        "password": True,
@@ -1842,12 +2002,53 @@ def _normalize_custom_provider_entry(
    if not isinstance(entry, dict):
        return None

+    # Accept camelCase aliases commonly used in hand-written configs.
+    _CAMEL_ALIASES: Dict[str, str] = {
+        "apiKey": "api_key",
+        "baseUrl": "base_url",
+        "apiMode": "api_mode",
+        "keyEnv": "key_env",
+        "defaultModel": "default_model",
+        "contextLength": "context_length",
+        "rateLimitDelay": "rate_limit_delay",
+    }
+    _KNOWN_KEYS = {
+        "name", "api", "url", "base_url", "api_key", "key_env",
+        "api_mode", "transport", "model", "default_model", "models",
+        "context_length", "rate_limit_delay",
+    }
+    for camel, snake in _CAMEL_ALIASES.items():
+        if camel in entry and snake not in entry:
+            logger.warning(
+                "providers.%s: camelCase key '%s' auto-mapped to '%s' "
+                "(use snake_case to avoid this warning)",
+                provider_key or "?", camel, snake,
+            )
+            entry[snake] = entry[camel]
+    unknown = set(entry.keys()) - _KNOWN_KEYS - set(_CAMEL_ALIASES.keys())
+    if unknown:
+        logger.warning(
+            "providers.%s: unknown config keys ignored: %s",
+            provider_key or "?", ", ".join(sorted(unknown)),
+        )
+
+    from urllib.parse import urlparse
+
    base_url = ""
-    for url_key in ("api", "url", "base_url"):
+    for url_key in ("base_url", "url", "api"):
        raw_url = entry.get(url_key)
        if isinstance(raw_url, str) and raw_url.strip():
-            base_url = raw_url.strip()
-            break
+            candidate = raw_url.strip()
+            parsed = urlparse(candidate)
+            if parsed.scheme and parsed.netloc:
+                base_url = candidate
+                break
+            else:
+                logger.warning(
+                    "providers.%s: '%s' value '%s' is not a valid URL "
+                    "(no scheme or host) — skipped",
+                    provider_key or "?", url_key, candidate,
+                )
    if not base_url:
        return None

@@ -1888,6 +2089,14 @@ def _normalize_custom_provider_entry(
    models = entry.get("models")
    if isinstance(models, dict) and models:
        normalized["models"] = models
+    elif isinstance(models, list) and models:
+        # Hand-edited configs (and older Hermes versions) write ``models`` as
+        # a plain list of model ids. Preserve them by converting to the dict
+        # shape downstream code expects; otherwise normalize silently drops
+        # the list and /model shows the provider with (0) models.
+        normalized["models"] = {
+            str(m): {} for m in models if isinstance(m, str) and m.strip()
+        }

    context_length = entry.get("context_length")
    if isinstance(context_length, int) and context_length > 0:
@@ -1986,6 +2195,7 @@ _KNOWN_ROOT_KEYS = {
    "fallback_providers", "credential_pool_strategies", "toolsets",
    "agent", "terminal", "display", "compression", "delegation",
    "auxiliary", "custom_providers", "context", "memory", "gateway",
+    "sessions",
 }

 # Valid fields inside a custom_providers list entry
@@ -2143,7 +2353,6 @@ def print_config_warnings(config: Optional[Dict[str, Any]] = None) -> None:
    if not issues:
        return

-    import sys
    lines = ["\033[33m⚠ Config issues detected in config.yaml:\033[0m"]
    for ci in issues:
        marker = "\033[31m✗\033[0m" if ci.severity == "error" else "\033[33m⚠\033[0m"
@@ -2158,7 +2367,6 @@ def warn_deprecated_cwd_env_vars(config: Optional[Dict[str, Any]] = None) -> Non
    These env vars are deprecated — the canonical setting is terminal.cwd
    in config.yaml.  Prints a migration hint to stderr.
    """
-    import os, sys
    messaging_cwd = os.environ.get("MESSAGING_CWD")
    terminal_cwd_env = os.environ.get("TERMINAL_CWD")

@@ -2476,6 +2684,71 @@ def migrate_config(interactive: bool = True, quiet: bool = False) -> Dict[str, A
                    else:
                        print("  ✓ Removed unused compression.summary_* keys")

+    # ── Version 20 → 21: plugins are now opt-in; grandfather existing user plugins ──
+    # The loader now requires plugins to appear in ``plugins.enabled`` before
+    # loading. Existing installs had all discovered plugins loading by default
+    # (minus anything in ``plugins.disabled``). To avoid silently breaking
+    # those setups on upgrade, populate ``plugins.enabled`` with the set of
+    # currently-installed user plugins that aren't already disabled.
+    #
+    # Bundled plugins (shipped in the repo itself) are NOT grandfathered —
+    # they ship off for everyone, including existing users, so any user who
+    # wants one has to opt in explicitly.
+    if current_ver < 21:
+        config = read_raw_config()
+        plugins_cfg = config.get("plugins")
+        if not isinstance(plugins_cfg, dict):
+            plugins_cfg = {}
+        # Only migrate if the enabled allow-list hasn't been set yet.
+        if "enabled" not in plugins_cfg:
+            disabled = plugins_cfg.get("disabled", []) or []
+            if not isinstance(disabled, list):
+                disabled = []
+            disabled_set = set(disabled)
+
+            # Scan ``$HERMES_HOME/plugins/`` for currently installed user plugins.
+            grandfathered: List[str] = []
+            try:
+                user_plugins_dir = get_hermes_home() / "plugins"
+                if user_plugins_dir.is_dir():
+                    for child in sorted(user_plugins_dir.iterdir()):
+                        if not child.is_dir():
+                            continue
+                        manifest_file = child / "plugin.yaml"
+                        if not manifest_file.exists():
+                            manifest_file = child / "plugin.yml"
+                        if not manifest_file.exists():
+                            continue
+                        try:
+                            with open(manifest_file) as _mf:
+                                manifest = yaml.safe_load(_mf) or {}
+                        except Exception:
+                            manifest = {}
+                        name = manifest.get("name") or child.name
+                        if name in disabled_set:
+                            continue
+                        grandfathered.append(name)
+            except Exception:
+                grandfathered = []
+
+            plugins_cfg["enabled"] = grandfathered
+            config["plugins"] = plugins_cfg
+            save_config(config)
+            results["config_added"].append(
+                f"plugins.enabled (opt-in allow-list, {len(grandfathered)} grandfathered)"
+            )
+            if not quiet:
+                if grandfathered:
+                    print(
+                        f"  ✓ Plugins now opt-in: grandfathered "
+                        f"{len(grandfathered)} existing plugin(s) into plugins.enabled"
+                    )
+                else:
+                    print(
+                        "  ✓ Plugins now opt-in: no existing plugins to grandfather. "
+                        "Use `hermes plugins enable <name>` to activate."
+                    )
+
    if current_ver < latest_ver and not quiet:
        print(f"Config version: {current_ver} → {latest_ver}")
    
@@ -2878,19 +3151,6 @@ _FALLBACK_COMMENT = """
 # fallback_model:
 #   provider: openrouter
 #   model: anthropic/claude-sonnet-4
-#
-# ── Smart Model Routing ────────────────────────────────────────────────
-# Optional cheap-vs-strong routing for simple turns.
-# Keeps the primary model for complex work, but can route short/simple
-# messages to a cheaper model across providers.
-#
-# smart_model_routing:
-#   enabled: true
-#   max_simple_chars: 160
-#   max_simple_words: 28
-#   cheap_model:
-#     provider: openrouter
-#     model: google/gemini-2.5-flash
 """


@@ -2922,19 +3182,6 @@ _COMMENTED_SECTIONS = """
 # fallback_model:
 #   provider: openrouter
 #   model: anthropic/claude-sonnet-4
-#
-# ── Smart Model Routing ────────────────────────────────────────────────
-# Optional cheap-vs-strong routing for simple turns.
-# Keeps the primary model for complex work, but can route short/simple
-# messages to a cheaper model across providers.
-#
-# smart_model_routing:
-#   enabled: true
-#   max_simple_chars: 160
-#   max_simple_words: 28
-#   cheap_model:
-#     provider: openrouter
-#     model: google/gemini-2.5-flash
 """


@@ -2964,7 +3211,7 @@ def save_config(config: Dict[str, Any]):
    if not sec or sec.get("redact_secrets") is None:
        parts.append(_SECURITY_COMMENT)
    fb = normalized.get("fallback_model", {})
-    if not fb or not (fb.get("provider") and fb.get("model")):
+    if not fb or not isinstance(fb, dict) or not (fb.get("provider") and fb.get("model")):
        parts.append(_FALLBACK_COMMENT)

    atomic_yaml_write(
@@ -3127,7 +3374,6 @@ def _check_non_ascii_credential(key: str, value: str) -> str:
            bad_chars.append(f"  position {i}: {ch!r} (U+{ord(ch):04X})")
    sanitized = value.encode("ascii", errors="ignore").decode("ascii")

-    import sys
    print(
        f"\n  Warning: {key} contains non-ASCII characters that will break API requests.\n"
        f"  This usually happens when copy-pasting from a PDF, rich-text editor,\n"
@@ -3397,6 +3643,10 @@ def show_config():
    print(f"  Personality:  {display.get('personality', 'kawaii')}")
    print(f"  Reasoning:    {'on' if display.get('show_reasoning', False) else 'off'}")
    print(f"  Bell:         {'on' if display.get('bell_on_complete', False) else 'off'}")
+    ump = display.get('user_message_preview', {}) if isinstance(display.get('user_message_preview', {}), dict) else {}
+    ump_first = ump.get('first_lines', 2)
+    ump_last = ump.get('last_lines', 2)
+    print(f"  User preview: first {ump_first} line(s), last {ump_last} line(s)")

    # Terminal
    print()
@@ -13,6 +13,7 @@ import time
 import urllib.error
 import urllib.parse
 import urllib.request
+from dataclasses import dataclass
 from pathlib import Path
 from typing import Optional

@@ -147,6 +148,14 @@ def _sweep_expired_pastes(now: Optional[float] = None) -> tuple[int, int]:
    return (deleted, len(remaining))


+def _best_effort_sweep_expired_pastes() -> None:
+    """Attempt pending-paste cleanup without letting /debug fail offline."""
+    try:
+        _sweep_expired_pastes()
+    except Exception:
+        pass
+
+
 # ---------------------------------------------------------------------------
 # Privacy / delete helpers
 # ---------------------------------------------------------------------------
@@ -314,72 +323,128 @@ def upload_to_pastebin(content: str, expiry_days: int = 7) -> str:
 # Log file reading
 # ---------------------------------------------------------------------------

-def _resolve_log_path(log_name: str) -> Optional[Path]:
-    """Find the log file for *log_name*, falling back to the .1 rotation.

-    Returns the path if found, or None.
-    """
+@dataclass
+class LogSnapshot:
+    """Single-read snapshot of a log file used by debug-share."""
+
+    path: Optional[Path]
+    tail_text: str
+    full_text: Optional[str]
+
+
+def _primary_log_path(log_name: str) -> Optional[Path]:
+    """Where *log_name* would live if present. Doesn't check existence."""
    from hermes_cli.logs import LOG_FILES

    filename = LOG_FILES.get(log_name)
-    if not filename:
+    return (get_hermes_home() / "logs" / filename) if filename else None
+
+
+def _resolve_log_path(log_name: str) -> Optional[Path]:
+    """Find the log file for *log_name*, falling back to the .1 rotation.
+
+    Returns the first non-empty candidate (primary, then .1), or None.
+    Callers distinguish 'empty primary' from 'truly missing' via
+    :func:`_primary_log_path`.
+    """
+    primary = _primary_log_path(log_name)
+    if primary is None:
        return None

-    log_dir = get_hermes_home() / "logs"
-    primary = log_dir / filename
    if primary.exists() and primary.stat().st_size > 0:
        return primary

-    # Fall back to the most recent rotated file (.1).
-    rotated = log_dir / f"{filename}.1"
+    rotated = primary.parent / f"{primary.name}.1"
    if rotated.exists() and rotated.stat().st_size > 0:
        return rotated

    return None


-def _read_log_tail(log_name: str, num_lines: int) -> str:
-    """Read the last *num_lines* from a log file, or return a placeholder."""
-    from hermes_cli.logs import _read_last_n_lines
+def _capture_log_snapshot(
+    log_name: str,
+    *,
+    tail_lines: int,
+    max_bytes: int = _MAX_LOG_BYTES,
+) -> LogSnapshot:
+    """Capture a log once and derive summary/full-log views from it.

-    log_path = _resolve_log_path(log_name)
-    if log_path is None:
-        return "(file not found)"
-
-    try:
-        lines = _read_last_n_lines(log_path, num_lines)
-        return "".join(lines).rstrip("\n")
-    except Exception as exc:
-        return f"(error reading: {exc})"
-
-
-def _read_full_log(log_name: str, max_bytes: int = _MAX_LOG_BYTES) -> Optional[str]:
-    """Read a log file for standalone upload.
-
-    Returns the file content (last *max_bytes* if truncated), or None if the
-    file doesn't exist or is empty.
+    The report tail and standalone log upload must come from the same file
+    snapshot. Otherwise a rotation/truncate between reads can make the report
+    look newer than the uploaded ``agent.log`` paste.
    """
    log_path = _resolve_log_path(log_name)
    if log_path is None:
-        return None
+        primary = _primary_log_path(log_name)
+        tail = "(file empty)" if primary and primary.exists() else "(file not found)"
+        return LogSnapshot(path=None, tail_text=tail, full_text=None)

    try:
        size = log_path.stat().st_size
        if size == 0:
-            return None
+            # race: file was truncated between _resolve_log_path and stat
+            return LogSnapshot(path=log_path, tail_text="(file empty)", full_text=None)

-        if size <= max_bytes:
-            return log_path.read_text(encoding="utf-8", errors="replace")
-
-        # File is larger than max_bytes — read the tail.
        with open(log_path, "rb") as f:
-            f.seek(size - max_bytes)
-            # Skip partial line at the seek point.
-            f.readline()
-            content = f.read().decode("utf-8", errors="replace")
-        return f"[... truncated — showing last ~{max_bytes // 1024}KB ...]\n{content}"
-    except Exception:
-        return None
+            if size <= max_bytes:
+                raw = f.read()
+                truncated = False
+            else:
+                # Read from the end until we have enough bytes for the
+                # standalone upload and enough newline context to render the
+                # summary tail from the same snapshot.
+                chunk_size = 8192
+                pos = size
+                chunks: list[bytes] = []
+                total = 0
+                newline_count = 0
+
+                while pos > 0 and (total < max_bytes or newline_count <= tail_lines + 1) and total < max_bytes * 2:
+                    read_size = min(chunk_size, pos)
+                    pos -= read_size
+                    f.seek(pos)
+                    chunk = f.read(read_size)
+                    chunks.insert(0, chunk)
+                    total += len(chunk)
+                    newline_count += chunk.count(b"\n")
+                    chunk_size = min(chunk_size * 2, 65536)
+
+                raw = b"".join(chunks)
+                truncated = pos > 0
+
+        full_raw = raw
+        if truncated and len(full_raw) > max_bytes:
+            cut = len(full_raw) - max_bytes
+            # Check whether the cut lands exactly on a line boundary.  If the
+            # byte just before the cut position is a newline the first retained
+            # byte starts a complete line and we should keep it.  Only drop a
+            # partial first line when we're genuinely mid-line.
+            on_boundary = cut > 0 and full_raw[cut - 1 : cut] == b"\n"
+            full_raw = full_raw[cut:]
+            if not on_boundary and b"\n" in full_raw:
+                full_raw = full_raw.split(b"\n", 1)[1]
+
+        all_text = raw.decode("utf-8", errors="replace")
+        tail_text = "".join(all_text.splitlines(keepends=True)[-tail_lines:]).rstrip("\n")
+
+        full_text = full_raw.decode("utf-8", errors="replace")
+        if truncated:
+            full_text = f"[... truncated — showing last ~{max_bytes // 1024}KB ...]\n{full_text}"
+
+        return LogSnapshot(path=log_path, tail_text=tail_text, full_text=full_text)
+    except Exception as exc:
+        return LogSnapshot(path=log_path, tail_text=f"(error reading: {exc})", full_text=None)
+
+
+def _capture_default_log_snapshots(log_lines: int) -> dict[str, LogSnapshot]:
+    """Capture all logs used by debug-share exactly once."""
+    errors_lines = min(log_lines, 100)
+    return {
+        "agent": _capture_log_snapshot("agent", tail_lines=log_lines),
+        "errors": _capture_log_snapshot("errors", tail_lines=errors_lines),
+        "gateway": _capture_log_snapshot("gateway", tail_lines=errors_lines),
+    }


 # ---------------------------------------------------------------------------
@@ -405,7 +470,12 @@ def _capture_dump() -> str:
    return capture.getvalue()


-def collect_debug_report(*, log_lines: int = 200, dump_text: str = "") -> str:
+def collect_debug_report(
+    *,
+    log_lines: int = 200,
+    dump_text: str = "",
+    log_snapshots: Optional[dict[str, LogSnapshot]] = None,
+) -> str:
    """Build the summary debug report: system dump + log tails.

    Parameters
@@ -424,19 +494,22 @@ def collect_debug_report(*, log_lines: int = 200, dump_text: str = "") -> str:
        dump_text = _capture_dump()
    buf.write(dump_text)

+    if log_snapshots is None:
+        log_snapshots = _capture_default_log_snapshots(log_lines)
+
    # ── Recent log tails (summary only) ──────────────────────────────────
    buf.write("\n\n")
    buf.write(f"--- agent.log (last {log_lines} lines) ---\n")
-    buf.write(_read_log_tail("agent", log_lines))
+    buf.write(log_snapshots["agent"].tail_text)
    buf.write("\n\n")

    errors_lines = min(log_lines, 100)
    buf.write(f"--- errors.log (last {errors_lines} lines) ---\n")
-    buf.write(_read_log_tail("errors", errors_lines))
+    buf.write(log_snapshots["errors"].tail_text)
    buf.write("\n\n")

    buf.write(f"--- gateway.log (last {errors_lines} lines) ---\n")
-    buf.write(_read_log_tail("gateway", errors_lines))
+    buf.write(log_snapshots["gateway"].tail_text)
    buf.write("\n")

    return buf.getvalue()
@@ -448,6 +521,8 @@ def collect_debug_report(*, log_lines: int = 200, dump_text: str = "") -> str:

 def run_debug_share(args):
    """Collect debug report + full logs, upload each, print URLs."""
+    _best_effort_sweep_expired_pastes()
+
    log_lines = getattr(args, "lines", 200)
    expiry = getattr(args, "expire", 7)
    local_only = getattr(args, "local", False)
@@ -459,10 +534,15 @@ def run_debug_share(args):

    # Capture dump once — prepended to every paste for context.
    dump_text = _capture_dump()
+    log_snapshots = _capture_default_log_snapshots(log_lines)

-    report = collect_debug_report(log_lines=log_lines, dump_text=dump_text)
-    agent_log = _read_full_log("agent")
-    gateway_log = _read_full_log("gateway")
+    report = collect_debug_report(
+        log_lines=log_lines,
+        dump_text=dump_text,
+        log_snapshots=log_snapshots,
+    )
+    agent_log = log_snapshots["agent"].full_text
+    gateway_log = log_snapshots["gateway"].full_text

    # Prepend dump header to each full log so every paste is self-contained.
    if agent_log:
--- a/Show More
+++ b/Show More