Pluginify provider/platform/terminal backends

Move provider adapters (anthropic, bedrock, azure), platform adapters (telegram, slack, discord, feishu, dingtalk, matrix), and terminal backends (modal, daytona) out of core into plugins/ workspace members. Core references them via the plugin registries (get_provider_namespace / get_provider_service / get_tool_provider / get_credential_pool_hook) instead of direct imports. - Provider/platform/terminal adapters relocated under plugins/; pyproject extras now reference workspace members, not inline dep specs. - Anthropic credential discovery moved into a credential_pool_hook, including the api_key_path_explicit OAuth-masquerade guard. - Vercel AI Gateway + Vercel Sandbox removed (upstream deletion). - Terminal backends resolve ModalEnvironment / DaytonaEnvironment lazily from the plugin registry. - uv.lock regenerated against the pluginified workspace (233 packages). Verified: zero dead imports of relocated modules in core (import smoke test + exhaustive rename-map grep); credential_pool test suite green.
Merge pull request #34097 from kshitijk4poor/salvage/memori-trace-messages
2026-05-28 18:01:50 -04:00 · 2026-05-28 13:56:07 -07:00 · 2026-05-29 02:16:43 +05:30 · 2026-05-29 02:16:43 +05:30 · 2026-05-28 13:17:58 -07:00 · 2026-05-28 13:07:20 -07:00
1133 changed files with 88884 additions and 120764 deletions
@@ -417,9 +417,9 @@ IMAGE_TOOLS_DEBUG=false
 # Default STT provider is "local" (faster-whisper) — runs on your machine, no API key needed.
 # Install with: pip install faster-whisper
 # Model downloads automatically on first use (~150 MB for "base").
-# To use cloud providers instead, set GROQ_API_KEY, VOICE_TOOLS_OPENAI_KEY, or ELEVENLABS_API_KEY above.
-# Provider priority: local > groq > openai > mistral > xai > elevenlabs
-# Configure in config.yaml: stt.provider: local | groq | openai | mistral | xai | elevenlabs
+# To use cloud providers instead, set GROQ_API_KEY or VOICE_TOOLS_OPENAI_KEY above.
+# Provider priority: local > groq > openai
+# Configure in config.yaml: stt.provider: local | groq | openai

 # =============================================================================
 # STT ADVANCED OVERRIDES (optional)
@@ -427,12 +427,10 @@ IMAGE_TOOLS_DEBUG=false
 # Override default STT models per provider (normally set via stt.model in config.yaml)
 # STT_GROQ_MODEL=whisper-large-v3-turbo
 # STT_OPENAI_MODEL=whisper-1
-# STT_ELEVENLABS_MODEL=scribe_v2

 # Override STT provider endpoints (for proxies or self-hosted instances)
 # GROQ_BASE_URL=https://api.groq.com/openai/v1
 # STT_OPENAI_BASE_URL=https://api.openai.com/v1
-# ELEVENLABS_STT_BASE_URL=https://api.elevenlabs.io/v1

 # =============================================================================
 # MICROSOFT TEAMS INTEGRATION
@@ -22,7 +22,12 @@ concurrency:

 jobs:
  deploy-vercel:
-    if: github.event_name == 'release'
+    # Triggered automatically on release publish (production cuts) and
+    # manually via `gh workflow run deploy-site.yml` when an out-of-band
+    # main commit needs to ship live before the next release tag — e.g.
+    # a skills-index PR that doesn't touch website/** paths and so
+    # doesn't auto-deploy via the deploy-docs path.
+    if: github.event_name == 'release' || github.event_name == 'workflow_dispatch'
    runs-on: ubuntu-latest
    steps:
      - name: Trigger Vercel Deploy
@@ -1,342 +0,0 @@
-name: Desktop Release
-
-on:
-  push:
-    branches: [main]
-  release:
-    types: [published]
-  workflow_dispatch:
-    inputs:
-      channel:
-        description: Release channel to build
-        required: true
-        default: nightly
-        type: choice
-        options:
-          - nightly
-          - stable
-      release_tag:
-        description: "Required when channel=stable (example: v2026.5.5)"
-        required: false
-        type: string
-
-permissions:
-  contents: write
-
-concurrency:
-  group: desktop-release-${{ github.ref }}
-  cancel-in-progress: false
-
-jobs:
-  prepare:
-    if: github.repository == 'NousResearch/hermes-agent'
-    runs-on: ubuntu-latest
-    outputs:
-      channel: ${{ steps.meta.outputs.channel }}
-      release_name: ${{ steps.meta.outputs.release_name }}
-      release_tag: ${{ steps.meta.outputs.release_tag }}
-      version: ${{ steps.meta.outputs.version }}
-      is_stable: ${{ steps.meta.outputs.is_stable }}
-    steps:
-      - id: meta
-        env:
-          EVENT_NAME: ${{ github.event_name }}
-          INPUT_CHANNEL: ${{ github.event.inputs.channel }}
-          INPUT_RELEASE_TAG: ${{ github.event.inputs.release_tag }}
-          RELEASE_TAG_FROM_EVENT: ${{ github.event.release.tag_name }}
-          GITHUB_SHA: ${{ github.sha }}
-        run: |
-          set -euo pipefail
-
-          channel="nightly"
-          release_tag="desktop-nightly"
-          is_stable="false"
-
-          if [[ "$EVENT_NAME" == "release" ]]; then
-            channel="stable"
-            release_tag="$RELEASE_TAG_FROM_EVENT"
-            is_stable="true"
-          elif [[ "$EVENT_NAME" == "workflow_dispatch" && "$INPUT_CHANNEL" == "stable" ]]; then
-            channel="stable"
-            release_tag="$INPUT_RELEASE_TAG"
-            is_stable="true"
-          fi
-
-          if [[ "$channel" == "stable" ]]; then
-            if [[ -z "$release_tag" ]]; then
-              echo "Stable desktop releases require a release tag." >&2
-              exit 1
-            fi
-
-            version="${release_tag#v}"
-            release_name="Hermes Desktop ${release_tag}"
-          else
-            stamp="$(date -u +%Y%m%d)"
-            short_sha="${GITHUB_SHA::7}"
-            version="0.0.0-nightly.${stamp}.${short_sha}"
-            release_name="Hermes Desktop Nightly ${stamp}-${short_sha}"
-          fi
-
-          {
-            echo "channel=$channel"
-            echo "release_name=$release_name"
-            echo "release_tag=$release_tag"
-            echo "version=$version"
-            echo "is_stable=$is_stable"
-          } >> "$GITHUB_OUTPUT"
-
-  build:
-    if: github.repository == 'NousResearch/hermes-agent'
-    needs: prepare
-    strategy:
-      fail-fast: false
-      matrix:
-        include:
-          - platform: mac
-            runner: macos-latest
-            build_args: --mac dmg zip
-          - platform: win
-            runner: windows-latest
-            build_args: --win nsis msi
-    runs-on: ${{ matrix.runner }}
-    env:
-      DESKTOP_CHANNEL: ${{ needs.prepare.outputs.channel }}
-      DESKTOP_VERSION: ${{ needs.prepare.outputs.version }}
-      MAC_CSC_LINK: ${{ secrets.CSC_LINK }}
-      MAC_CSC_KEY_PASSWORD: ${{ secrets.CSC_KEY_PASSWORD }}
-      APPLE_API_KEY: ${{ secrets.APPLE_API_KEY }}
-      APPLE_API_KEY_ID: ${{ secrets.APPLE_API_KEY_ID }}
-      APPLE_API_ISSUER: ${{ secrets.APPLE_API_ISSUER }}
-      WIN_CSC_LINK: ${{ secrets.WIN_CSC_LINK }}
-      WIN_CSC_KEY_PASSWORD: ${{ secrets.WIN_CSC_KEY_PASSWORD }}
-    steps:
-      - uses: actions/checkout@34e114876b0b11c390a56381ad16ebd13914f8d5  # v4
-
-      - uses: actions/setup-node@49933ea5288caeca8642d1e84afbd3f7d6820020  # v4
-        with:
-          node-version: 20
-          cache: npm
-          cache-dependency-path: package-lock.json
-
-      - uses: actions/setup-python@a26af69be951a213d495a4c3e4e4022e16d87065  # v5
-        with:
-          python-version: "3.11"
-
-      - name: Enforce signing gates for stable releases
-        if: needs.prepare.outputs.is_stable == 'true'
-        shell: bash
-        run: |
-          set -euo pipefail
-          missing=()
-
-          if [[ "${{ matrix.platform }}" == "mac" ]]; then
-            [[ -z "${MAC_CSC_LINK:-}" ]] && missing+=("CSC_LINK")
-            [[ -z "${MAC_CSC_KEY_PASSWORD:-}" ]] && missing+=("CSC_KEY_PASSWORD")
-            [[ -z "${APPLE_API_KEY:-}" ]] && missing+=("APPLE_API_KEY")
-            [[ -z "${APPLE_API_KEY_ID:-}" ]] && missing+=("APPLE_API_KEY_ID")
-            [[ -z "${APPLE_API_ISSUER:-}" ]] && missing+=("APPLE_API_ISSUER")
-          else
-            [[ -z "${WIN_CSC_LINK:-}" ]] && missing+=("WIN_CSC_LINK")
-            [[ -z "${WIN_CSC_KEY_PASSWORD:-}" ]] && missing+=("WIN_CSC_KEY_PASSWORD")
-          fi
-
-          if (( ${#missing[@]} > 0 )); then
-            echo "::error::Stable desktop release missing required secrets: ${missing[*]}"
-            exit 1
-          fi
-
-      - name: Install workspace dependencies
-        run: npm ci
-
-      - name: Install TUI dependencies
-        run: npm --prefix ui-tui ci
-
-      - name: Build bundled TUI payload
-        run: npm --prefix ui-tui run build
-
-      - name: Build desktop renderer
-        run: npm --prefix apps/desktop run build
-
-      - name: Map macOS signing credentials
-        if: matrix.platform == 'mac'
-        shell: bash
-        run: |
-          set -euo pipefail
-          has_link=0
-          has_pass=0
-          [[ -n "${MAC_CSC_LINK:-}" ]] && has_link=1
-          [[ -n "${MAC_CSC_KEY_PASSWORD:-}" ]] && has_pass=1
-
-          if [[ $has_link -eq 1 && $has_pass -eq 1 ]]; then
-            echo "CSC_LINK=${MAC_CSC_LINK}" >> "$GITHUB_ENV"
-            echo "CSC_KEY_PASSWORD=${MAC_CSC_KEY_PASSWORD}" >> "$GITHUB_ENV"
-          elif [[ $has_link -eq 1 || $has_pass -eq 1 ]]; then
-            echo "::error::macOS signing secrets are partially configured. Set both CSC_LINK and CSC_KEY_PASSWORD."
-            exit 1
-          fi
-
-      - name: Map Windows signing credentials
-        if: matrix.platform == 'win'
-        shell: bash
-        run: |
-          set -euo pipefail
-          has_link=0
-          has_pass=0
-          [[ -n "${WIN_CSC_LINK:-}" ]] && has_link=1
-          [[ -n "${WIN_CSC_KEY_PASSWORD:-}" ]] && has_pass=1
-
-          if [[ $has_link -eq 1 && $has_pass -eq 1 ]]; then
-            echo "CSC_LINK=${WIN_CSC_LINK}" >> "$GITHUB_ENV"
-            echo "CSC_KEY_PASSWORD=${WIN_CSC_KEY_PASSWORD}" >> "$GITHUB_ENV"
-            echo "CSC_FOR_PULL_REQUEST=true" >> "$GITHUB_ENV"
-          elif [[ $has_link -eq 1 || $has_pass -eq 1 ]]; then
-            echo "::error::Windows signing secrets are partially configured. Set both WIN_CSC_LINK and WIN_CSC_KEY_PASSWORD."
-            exit 1
-          fi
-
-      - name: Build desktop installers
-        shell: bash
-        env:
-          NODE_OPTIONS: --max-old-space-size=16384
-        run: |
-          set -euo pipefail
-          npm --prefix apps/desktop run builder -- \
-            ${{ matrix.build_args }} \
-            --publish never \
-            --config.extraMetadata.version="${DESKTOP_VERSION}" \
-            --config.extraMetadata.desktopChannel="${DESKTOP_CHANNEL}"
-
-      - name: Notarize and staple macOS DMG
-        if: matrix.platform == 'mac' && needs.prepare.outputs.is_stable == 'true'
-        shell: bash
-        run: |
-          set -euo pipefail
-          dmg_path="$(ls apps/desktop/release/*.dmg | head -n 1)"
-          node apps/desktop/scripts/notarize-artifact.cjs "$dmg_path"
-
-      - name: Validate macOS notarization and Gatekeeper trust
-        if: matrix.platform == 'mac' && needs.prepare.outputs.is_stable == 'true'
-        shell: bash
-        run: |
-          set -euo pipefail
-          app_path="$(ls -d apps/desktop/release/mac*/Hermes.app | head -n 1)"
-          dmg_path="$(ls apps/desktop/release/*.dmg | head -n 1)"
-          xcrun stapler validate "$app_path"
-          xcrun stapler validate "$dmg_path"
-          spctl --assess --type execute --verbose=4 "$app_path"
-
-      - name: Generate desktop checksums
-        shell: bash
-        run: |
-          set -euo pipefail
-          node <<'EOF'
-          const crypto = require('node:crypto')
-          const fs = require('node:fs')
-          const path = require('node:path')
-
-          const releaseDir = path.resolve('apps/desktop/release')
-          const platform = process.env.PLATFORM
-          const extensions = platform === 'mac' ? ['.dmg', '.zip'] : ['.exe', '.msi']
-          const files = fs
-            .readdirSync(releaseDir)
-            .filter(name => extensions.some(ext => name.endsWith(ext)))
-            .sort()
-
-          if (!files.length) {
-            throw new Error(`No release artifacts were produced for ${platform}`)
-          }
-
-          const lines = files.map(name => {
-            const full = path.join(releaseDir, name)
-            const hash = crypto.createHash('sha256').update(fs.readFileSync(full)).digest('hex')
-            return `${hash}  ${name}`
-          })
-          fs.writeFileSync(path.join(releaseDir, `SHA256SUMS-${platform}.txt`), `${lines.join('\n')}\n`)
-          EOF
-        env:
-          PLATFORM: ${{ matrix.platform }}
-
-      - name: Upload packaged desktop artifacts
-        uses: actions/upload-artifact@ea165f8d65b6e75b540449e92b4886f43607fa02  # v4
-        with:
-          name: desktop-${{ matrix.platform }}
-          path: |
-            apps/desktop/release/*.dmg
-            apps/desktop/release/*.zip
-            apps/desktop/release/*.exe
-            apps/desktop/release/*.msi
-            apps/desktop/release/SHA256SUMS-${{ matrix.platform }}.txt
-          if-no-files-found: error
-
-  publish:
-    if: github.repository == 'NousResearch/hermes-agent'
-    needs: [prepare, build]
-    runs-on: ubuntu-latest
-    env:
-      GH_TOKEN: ${{ github.token }}
-      CHANNEL: ${{ needs.prepare.outputs.channel }}
-      RELEASE_NAME: ${{ needs.prepare.outputs.release_name }}
-      RELEASE_TAG: ${{ needs.prepare.outputs.release_tag }}
-    steps:
-      - uses: actions/checkout@34e114876b0b11c390a56381ad16ebd13914f8d5  # v4
-        with:
-          fetch-depth: 0
-
-      - uses: actions/download-artifact@d3f86a106a0bac45b974a628896c90dbdf5c8093  # v4
-        with:
-          pattern: desktop-*
-          merge-multiple: true
-          path: dist/desktop
-
-      - name: Publish desktop assets to GitHub release
-        shell: bash
-        run: |
-          set -euo pipefail
-          shopt -s globstar nullglob
-
-          files=(
-            dist/desktop/**/*.dmg
-            dist/desktop/**/*.zip
-            dist/desktop/**/*.exe
-            dist/desktop/**/*.msi
-            dist/desktop/**/SHA256SUMS-*.txt
-          )
-
-          if (( ${#files[@]} == 0 )); then
-            echo "No desktop artifacts were downloaded for publishing." >&2
-            exit 1
-          fi
-
-          if [[ "$CHANNEL" == "nightly" ]]; then
-            git tag -f "$RELEASE_TAG" "$GITHUB_SHA"
-            git push origin "refs/tags/$RELEASE_TAG" --force
-
-            notes="Automated nightly desktop build from main. This prerelease is replaced on each new run."
-
-            if gh release view "$RELEASE_TAG" >/dev/null 2>&1; then
-              while IFS= read -r asset_name; do
-                gh release delete-asset "$RELEASE_TAG" "$asset_name" --yes
-              done < <(gh release view "$RELEASE_TAG" --json assets -q '.assets[].name')
-
-              gh release edit "$RELEASE_TAG" \
-                --title "$RELEASE_NAME" \
-                --prerelease \
-                --notes "$notes"
-            else
-              gh release create "$RELEASE_TAG" \
-                --target "$GITHUB_SHA" \
-                --title "$RELEASE_NAME" \
-                --notes "$notes" \
-                --prerelease
-            fi
-          else
-            if ! gh release view "$RELEASE_TAG" >/dev/null 2>&1; then
-              notes="Automated desktop artifacts attached by desktop-release workflow."
-              gh release create "$RELEASE_TAG" \
-                --target "$GITHUB_SHA" \
-                --title "$RELEASE_NAME" \
-                --notes "$notes"
-            fi
-          fi
-
-          gh release upload "$RELEASE_TAG" "${files[@]}" --clobber
@@ -71,6 +71,8 @@ jobs:
          load: true
          platforms: linux/amd64
          tags: ${{ env.IMAGE_NAME }}:test
+          build-args: |
+            HERMES_GIT_SHA=${{ github.sha }}
          cache-from: type=gha,scope=docker-amd64
          cache-to: type=gha,mode=max,scope=docker-amd64

@@ -149,6 +151,8 @@ jobs:
          platforms: linux/amd64
          labels: |
            org.opencontainers.image.revision=${{ github.sha }}
+          build-args: |
+            HERMES_GIT_SHA=${{ github.sha }}
          outputs: type=image,name=${{ env.IMAGE_NAME }},push-by-digest=true,name-canonical=true,push=true
          cache-from: type=gha,scope=docker-amd64
          cache-to: type=gha,mode=max,scope=docker-amd64
@@ -203,6 +207,8 @@ jobs:
          load: true
          platforms: linux/arm64
          tags: ${{ env.IMAGE_NAME }}:test
+          build-args: |
+            HERMES_GIT_SHA=${{ github.sha }}
          cache-from: type=gha,scope=docker-arm64
          cache-to: type=gha,mode=max,scope=docker-arm64

@@ -228,6 +234,8 @@ jobs:
          platforms: linux/arm64
          labels: |
            org.opencontainers.image.revision=${{ github.sha }}
+          build-args: |
+            HERMES_GIT_SHA=${{ github.sha }}
          outputs: type=image,name=${{ env.IMAGE_NAME }},push-by-digest=true,name-canonical=true,push=true
          cache-from: type=gha,scope=docker-arm64
          cache-to: type=gha,mode=max,scope=docker-arm64
@@ -200,3 +200,22 @@ jobs:

      - name: Run footgun checker
        run: python scripts/check-windows-footguns.py --all
+
+  plugin-isolation:
+    # Enforce that core code and core tests never import from plugin packages.
+    # Core must interact with plugins exclusively through the registry layer.
+    # See scripts/check_no_plugin_imports_in_core.py for the rule list.
+    name: Plugin isolation (blocking)
+    runs-on: ubuntu-latest
+    timeout-minutes: 5
+    steps:
+      - name: Checkout code
+        uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2
+
+      - name: Set up Python
+        uses: actions/setup-python@a309ff8b426b58ec0e2a45f0f869d46889d02405 # v5
+        with:
+          python-version: "3.11"
+
+      - name: Run plugin isolation checker
+        run: python scripts/check_no_plugin_imports_in_core.py
@@ -6,8 +6,8 @@ on:
    paths:
      - 'ui-tui/package-lock.json'
      - 'ui-tui/package.json'
-      - 'apps/dashboard/package-lock.json'
-      - 'apps/dashboard/package.json'
+      - 'web/package-lock.json'
+      - 'web/package.json'
  workflow_dispatch:
    inputs:
      pr_number:
@@ -28,7 +28,7 @@ concurrency:
 jobs:
  # ── Auto-fix on main ───────────────────────────────────────────────
  # Fires when a push to main touches package.json or package-lock.json
-  # in ui-tui/ or apps/dashboard/. Runs fix-lockfiles and pushes the hash
+  # in ui-tui/ or web/. Runs fix-lockfiles and pushes the hash
  # update commit directly to main so Nix builds never stay broken.
  #
  # Safety invariants:
@@ -110,7 +110,7 @@ jobs:
            # run recompute from the correct package-lock state.
            pkg_changed="$(git diff --name-only "$BASE_SHA"..origin/main -- \
              'ui-tui/package-lock.json' 'ui-tui/package.json' \
-              'apps/dashboard/package-lock.json' 'apps/dashboard/package.json' || true)"
+              'web/package-lock.json' 'web/package.json' || true)"
            if [ -n "$pkg_changed" ]; then
              echo "::warning::Package files changed since hash computation — aborting; a fresh run will recompute"
              exit 0
@@ -48,7 +48,7 @@ agent-browser/
 privvy*
 images/
 __pycache__/
-hermes_agent.egg-info/
+*.egg-info
 wandb/
 testlogs

@@ -63,10 +63,6 @@ environments/benchmarks/evals/

 # Web UI build output
 hermes_cli/web_dist/
-apps/desktop/build/
-apps/desktop/dist/
-apps/desktop/release/
-apps/desktop/*.tsbuildinfo

 # Web UI assets — synced from @nous-research/ui at build time via
 # `npm run sync-assets` (see web/package.json).
@@ -82,21 +78,16 @@ mini-swe-agent/
 .nix-stamps/
 result
 website/static/api/skills-index.json
+# skills.json + skills-meta.json are build artifacts emitted by
+# website/scripts/extract-skills.py during prebuild — keep them out of
+# git for the same reason as skills-index.json (large, generated, change
+# every build).
+website/static/api/skills.json
+website/static/api/skills-meta.json
 models-dev-upstream/
-
-# Local editor / agent tooling (machine-specific; keep in global config, not the repo)
-.codex/
-.cursor/
-.gemini/
-.zed/
-.mcp.json
-opencode.json
-config/mcporter.json
-
 hermes_cli/tui_dist/*
 hermes_cli/scripts/
-docs/superpowers/*
-# Working directory for the Hermes Agent's session state (~/.hermes/ at runtime;
+docs/superpowers/*# Working directory for the Hermes Agent's session state (~/.hermes/ at runtime;
 # also created in-repo when an agent operates in this checkout). Plans, audit
 # logs, and per-session caches are never artifacts of the codebase.
 .hermes/
@@ -2,8 +2,6 @@

 Instructions for AI coding assistants and developers working on the hermes-agent codebase.

-**Never give up on the right solution.**
-
 ## Development Environment

 ```bash
@@ -31,7 +29,9 @@ hermes-agent/
 ├── hermes_constants.py   # get_hermes_home(), display_hermes_home() — profile-aware paths
 ├── hermes_logging.py     # setup_logging() — agent.log / errors.log / gateway.log (profile-aware)
 ├── batch_runner.py       # Parallel batch processing
+├── _build_backend.py     # Custom PEP 517 build backend — inlines plugin deps at wheel build time
 ├── agent/                # Agent internals (provider adapters, memory, caching, compression, etc.)
+│   └── plugin_registries.py  # Typed capability registries (auth, transport, platform, tool, model_metadata)
 ├── hermes_cli/           # CLI subcommands, setup wizard, plugins loader, skin engine
 ├── tools/                # Tool implementations — auto-discovered via tools/registry.py
 │   └── environments/     # Terminal backends (local, docker, ssh, modal, daytona, singularity)
@@ -41,16 +41,20 @@ hermes-agent/
 │   │                     #   dingtalk, wecom, weixin, feishu, qqbot, bluebubbles,
 │   │                     #   yuanbao, webhook, api_server, ...). See ADDING_A_PLATFORM.md.
 │   └── builtin_hooks/    # Extension point for always-registered gateway hooks (none shipped)
-├── plugins/              # Plugin system (see "Plugins" section below)
+├── plugins/              # Plugin packages — uv workspace members (see "Plugins" section)
+│   ├── model-providers/  # anthropic, bedrock, azure-foundry (own pyproject.toml each)
+│   ├── platforms/        # telegram, slack, discord, feishu, dingtalk, matrix
+│   ├── tts/              # Text-to-speech plugin
+│   ├── stt/              # Speech-to-text plugin
+│   ├── image_gen/        # FAL image generation
+│   ├── terminals/        # daytona, modal, vercel
+│   ├── web/              # exa, firecrawl, parallel
 │   ├── memory/           # Memory-provider plugins (honcho, mem0, supermemory, ...)
 │   ├── context_engine/   # Context-engine plugins
-│   ├── model-providers/  # Inference backend plugins (openrouter, anthropic, gmi, ...)
 │   ├── kanban/           # Multi-agent board dispatcher + worker plugin
 │   ├── hermes-achievements/  # Gamified achievement tracking
 │   ├── observability/    # Metrics / traces / logs plugin
-│   ├── image_gen/        # Image-generation providers
-│   └── <others>/         # disk-cleanup, example-dashboard, google_meet, platforms,
-│                         #   spotify, strike-freedom-cockpit, ...
+│   └── <others>/         # dashboard, google_meet, spotify, strike-freedom-cockpit, ...
 ├── optional-skills/      # Heavier/niche skills shipped but NOT active by default
 ├── skills/               # Built-in skills bundled with the repo
 ├── ui-tui/               # Ink (React) terminal UI — `hermes --tui`
@@ -68,29 +72,6 @@ hermes-agent/
 `gateway.log` when running the gateway. Profile-aware via `get_hermes_home()`.
 Browse with `hermes logs [--follow] [--level ...] [--session ...]`.

-## TypeScript Style
-
-Applies to TypeScript across Hermes: desktop, TUI, website, and future TS packages.
-
- Prefer small nanostores over component state when state is shared, reused, or read by distant UI.
- Let each feature own its atoms. Chat state belongs near chat, shell state near shell, shared state in `src/store`.
- Components that render from an atom should use `useStore`. Non-rendering actions should read with `$atom.get()`.
- Do not pass state through three components when the leaf can subscribe to the atom.
- Keep persistence beside the atom that owns it.
- Keep route roots thin. They compose routes and shell; they should not become controllers.
- No monolithic hooks. A hook should own one narrow job.
- Prefer colocated action modules over hidden god hooks.
- If a callback is pure side effect, use the terse void form:
-  `onState={st => void setGatewayState(st)}`.
- Async UI handlers should make intent explicit:
-  `onClick={() => void save()}`.
- Prefer interfaces for public props and shared object shapes. Avoid `type X = { ... }` for object props.
- Extend React primitives for props: `React.ComponentProps<'button'>`, `React.ComponentProps<typeof Dialog>`, `Omit<...>`, `Pick<...>`.
- Table-driven beats condition ladders when mapping ids, routes, or views.
- `src/app` owns routes, pages, and page-specific components.
- `src/store` owns shared atoms.
- `src/lib` owns shared pure helpers.
-
 ## File Dependency Chain

 ```
@@ -274,7 +255,7 @@ npm test          # vitest

 The dashboard embeds the real `hermes --tui` — **not** a rewrite.  See `hermes_cli/pty_bridge.py` + the `@app.websocket("/api/pty")` endpoint in `hermes_cli/web_server.py`.

- Browser loads `apps/dashboard/src/pages/ChatPage.tsx`, which mounts xterm.js's `Terminal` with the WebGL renderer, `@xterm/addon-fit` for container-driven resize, and `@xterm/addon-unicode11` for modern wide-character widths.
+- Browser loads `web/src/pages/ChatPage.tsx`, which mounts xterm.js's `Terminal` with the WebGL renderer, `@xterm/addon-fit` for container-driven resize, and `@xterm/addon-unicode11` for modern wide-character widths.
 - `/api/pty?token=…` upgrades to a WebSocket; auth uses the same ephemeral `_SESSION_TOKEN` as REST, via query param (browsers can't set `Authorization` on WS upgrade).
 - The server spawns whatever `hermes --tui` would spawn, through `ptyprocess` (POSIX PTY — WSL works, native Windows does not).
 - Frames: raw PTY bytes each direction; resize via `\x1b[RESIZE:<cols>;<rows>]` intercepted on the server and applied with `TIOCSWINSZ`.
@@ -511,9 +492,102 @@ Activate with `/skin cyberpunk` or `display.skin: cyberpunk` in config.yaml.

 ## Plugins

-Hermes has two plugin surfaces. Both live under `plugins/` in the repo so
-repo-shipped plugins can be discovered alongside user-installed ones in
-`~/.hermes/plugins/` and pip-installed entry points.
+Hermes uses a **plugin-first architecture**: every optional capability (model
+providers, platform adapters, TTS/STT, terminal backends, image generation)
+lives in its own installable Python package under `plugins/`. The core
+codebase (`agent/`, `hermes_cli/`, `gateway/`, `tools/`) **never** imports
+from a `hermes_agent_*` plugin package directly. Instead, plugins register
+their capabilities into typed registries during `register()`, and the core
+queries those registries at runtime.
+
+Full architecture doc: `website/docs/developer-guide/plugin-architecture.md`
+
+### Workspace layout
+
+All 21 builtin plugins are uv workspace members — each has its own
+`pyproject.toml` (single source of truth for deps), `plugin.yaml`
+(directory-scanner manifest for dev mode), and `hermes_agent_<name>/` package
+with `register(ctx)`:
+
+```
+plugins/
+├── model-providers/        # anthropic, bedrock, azure-foundry
+├── platforms/              # telegram, slack, discord, feishu, dingtalk, matrix
+├── tts/                    # text-to-speech (Edge TTS + ElevenLabs)
+├── stt/                    # speech-to-text
+├── image_gen/fal_pkg/      # FAL image generation
+├── terminals/              # daytona, modal, vercel
+├── web/                    # exa, firecrawl, parallel
+├── memory/                 # honcho, hindsight
+├── dashboard/              # streamlit dashboard
+└── hermes-achievements/    # gamified achievement tracking
+```
+
+### The hermetic core boundary
+
+Core code MUST NOT import from `hermes_agent_*` packages. Instead it queries
+typed registries in `agent/plugin_registries.py`:
+
+```python
+# ❌ BAD — core directly imports plugin
+from hermes_agent_bedrock import has_aws_credentials
+
+# ✅ GOOD — core queries the registry
+from agent.plugin_registries import registries
+bedrock_auth = registries.get_auth_provider("bedrock")
+```
+
+Registry types: `auth_providers`, `transport_builders`, `platform_adapters`,
+`tool_providers`, `model_metadata`, `credential_pools`.
+
+Each plugin's `register(ctx)` populates the registries via `ctx.register_*()`:
+- `ctx.register_auth_provider(name, provider, ...)`
+- `ctx.register_transport(name, builder, ...)`
+- `ctx.register_platform(name, label, adapter_factory, check_fn, ...)`
+- `ctx.register_tool_provider(entry, ...)`
+- `ctx.register_model_metadata(entry, ...)`
+- `ctx.register_credential_pool(entry, ...)`
+- Plus existing: `register_tool()`, `register_hook()`, `register_cli_command()`,
+  `register_tts_provider()`, `register_transcription_provider()`,
+  `register_image_gen_provider()`, `register_video_gen_provider()`,
+  `register_context_engine()`
+
+### Plugin discovery
+
+Three discovery paths (same as before, now workspace-aware):
+1. **Directory scanner** — `plugins/`, `~/.hermes/plugins/`, `.hermes/plugins/`
+   (looks for `plugin.yaml`)
+2. **Entry points** — `[project.entry-points."hermes_agent.plugins"]`
+3. **uv workspace** — `uv sync --extra <name>` installs the plugin into venv
+
+### Dependency management
+
+- Each plugin's `pyproject.toml` is the **only** place its deps are declared
+- Root `pyproject.toml` maps extras to workspace members:
+  `telegram = ["hermes-agent-telegram"]`
+- `uv.lock` resolves the whole workspace (236 packages)
+- No `LAZY_DEPS`, no `ensure()`, no runtime `pip install`
+- Custom PEP 517 build backend (`_build_backend.py`) inlines plugin deps
+  at wheel build time for PyPI publishing
+
+### NixOS
+
+`loadWorkspace` discovers all workspace members from `uv.lock` automatically.
+`mkVirtualEnv { hermes-agent = ["all"] }` installs all plugins. Select specific
+plugins with `extraDependencyGroups = ["telegram", "anthropic"]`.
+
+### Tests
+
+Plugin tests live in `plugins/<category>/<name>/tests/`. The test runner
+discovers both `tests/` and `plugins/`. Running plugin tests requires the
+plugin to be installed (`uv sync --extra <name>`).
+
+### The rule
+
+**If it can be a plugin, it must be a plugin.** Adding optional capabilities
+to core files is a code review rejection. If the plugin surface doesn't
+support what you need, extend the surface (new registry type, new hook, new
+`ctx` method) — don't inline the capability.

 ### General plugins (`hermes_cli/plugins.py` + `plugins/<name>/`)

@@ -556,9 +630,14 @@ providers don't clutter `hermes --help`.
 **Rule (Teknium, May 2026):** plugins MUST NOT modify core files
 (`run_agent.py`, `cli.py`, `gateway/run.py`, `hermes_cli/main.py`, etc.).
 If a plugin needs a capability the framework doesn't expose, expand the
-generic plugin surface (new hook, new ctx method) — never hardcode
-plugin-specific logic into core. PR #5295 removed 95 lines of hardcoded
-honcho argparse from `main.py` for exactly this reason.
+generic plugin surface (new hook, new ctx method, new registry type) — never
+hardcode plugin-specific logic into core. PR #5295 removed 95 lines of
+hardcoded honcho argparse from `main.py` for exactly this reason.
+
+**Hermetic core boundary (May 2026):** core code (`agent/`, `hermes_cli/`,
+`gateway/`, `tools/`) MUST NOT import from `hermes_agent_*` plugin packages.
+Use the typed registries in `agent/plugin_registries.py` instead. See the
+**Plugins** section below for the full list of registry types.

 **No new in-tree memory providers (policy, May 2026):** the set of
 built-in memory providers under `plugins/memory/` is closed. New memory
@@ -1036,40 +1115,41 @@ def profile_env(tmp_path, monkeypatch):

 ## Testing

-**ALWAYS use `scripts/run_tests.sh`** — do not call `pytest` directly. The script enforces
-hermetic environment parity with CI (unset credential vars, TZ=UTC, LANG=C.UTF-8,
-`-n auto` xdist workers, in-tree subprocess-isolation plugin). Direct `pytest`
-on a 16+ core developer machine with API keys set diverges from CI in ways
-that have caused multiple "works locally, fails in CI" incidents (and the reverse).
+**ALWAYS use `scripts/run_tests.sh`** — do NOT call `pytest` directly on a directory.
+The script enforces hermetic environment parity with CI and provides per-file
+process isolation that prevents registry singleton / module-level state leakage
+between test files.

 ```bash
 scripts/run_tests.sh                                  # full suite, CI-parity
 scripts/run_tests.sh tests/gateway/                   # one directory
+scripts/run_tests.sh tests/agent/test_foo.py          # one file
 scripts/run_tests.sh tests/agent/test_foo.py::test_x  # one test
 scripts/run_tests.sh -v --tb=long                     # pass-through pytest flags
-scripts/run_tests.sh --no-isolate tests/foo/          # disable subprocess isolation (faster, for debugging)
 ```

-### Subprocess-per-test isolation
+For a **single test file or specific test**, bare `pytest` is fine:

-Every test runs in a freshly-spawned Python subprocess via the in-tree plugin
-at `tests/_isolate_plugin.py`. This means module-level dicts/sets and
-ContextVars from one test cannot leak into the next — the historic
-`_reset_module_state` autouse fixture is gone.
+```bash
+nix run nixpkgs#uv -- run python -m pytest tests/agent/test_foo.py -q
+nix run nixpkgs#uv -- run python -m pytest tests/agent/test_foo.py::test_x --tb=short
+```

-Implementation notes:
+Running bare `pytest` on a directory (e.g. `pytest tests/`) will print a warning
+from `conftest.py` telling you to use the script instead.

- The plugin uses `multiprocessing.get_context("spawn")`, which works on
-  Linux, macOS, and Windows alike (POSIX `fork` is not used).
- Per-test overhead is ~0.5–1.0s (Python startup + pytest collection). xdist
-  parallelism amortizes this across cores; on a 20-core box the full suite
-  finishes in roughly the same wall time as before, but flake-free.
- `isolate_timeout` (configured in `pyproject.toml`) caps each test at 30s.
-  Hangs are killed and surfaced as a failure report.
- Pass `--no-isolate` to disable isolation — useful when debugging a single
-  test interactively, or when you specifically want to verify state leakage.
- The plugin disables itself in child processes (sentinel envvar
-  `HERMES_ISOLATE_CHILD=1`), so there's no fork-bomb risk.
+### Per-file process isolation
+
+`scripts/run_tests.sh` calls `scripts/run_tests_parallel.py`, which spawns one
+`python -m pytest <file>` subprocess per test **file** (not per test), giving each
+a fresh Python interpreter. This means module-level dicts/sets, ContextVars, and
+registry singletons from one test file cannot leak into another — no shared state
+between files, no xdist required.
+
+`HERMES_PARALLEL_RUNNER=1` is set in each subprocess so `conftest.py` knows tests
+are running under the managed runner. If you need to suppress the bare-pytest
+directory warning in a special case, set this variable yourself — but prefer the
+script.

 ### Why the wrapper (and why the old "just call pytest" doesn't work)

@@ -1081,31 +1161,13 @@ Five real sources of local-vs-CI drift the script closes:
 | HOME / `~/.hermes/` | Your real config+auth.json | Temp dir per test |
 | Timezone | Local TZ (PDT etc.) | UTC |
 | Locale | Whatever is set | C.UTF-8 |
-| xdist workers | `-n auto` = all cores | `-n auto` (safe — subprocess isolation prevents cross-worker flakes) |
+| File isolation | Shared interpreter — state leaks between files | One subprocess per file |

-`tests/conftest.py` also enforces points 1-4 as an autouse fixture so ANY pytest
-invocation (including IDE integrations) gets hermetic behavior — but the wrapper
-is belt-and-suspenders.
+`tests/conftest.py` also enforces the credential/TZ/locale points as an autouse
+fixture so ANY pytest invocation (including IDE integrations) gets hermetic
+behavior — but the wrapper adds per-file process isolation on top.

-### Running without the wrapper (only if you must)
-
-If you can't use the wrapper (e.g. inside an IDE that shells pytest directly),
-at minimum activate the venv. The isolation plugin loads automatically from
-`addopts` in `pyproject.toml`, so you get the same per-test process isolation
-either way.
-
-```bash
-source .venv/bin/activate   # or: source venv/bin/activate
-python -m pytest tests/ -q
-```
-
-If you need to bypass isolation for fast feedback while debugging:
-
-```bash
-python -m pytest tests/agent/test_foo.py -q --no-isolate
-```
-
-Always run the full suite before pushing changes.
+Always run the full suite via `scripts/run_tests.sh` before pushing changes.

 ### Don't write change-detector tests

@@ -121,12 +121,11 @@ hermes chat -q "Hello"
 ### Run tests

 ```bash
-# Preferred — matches CI (hermetic env, 4 xdist workers); see AGENTS.md
+# Preferred — matches CI (hermetic env, per-file process isolation); see AGENTS.md
 scripts/run_tests.sh

-# Alternative (activate the venv first). The wrapper is still recommended
-# for parity with GitHub Actions before you open a PR:
-pytest tests/ -v
+# For a single file or specific test, bare pytest is also fine:
+# python -m pytest tests/agent/test_foo.py -q
 ```

 ---
@@ -857,7 +856,7 @@ refactor/description   # Code restructuring

 ### Before submitting

-1. **Run tests**: `scripts/run_tests.sh` (recommended; same as CI) or `pytest tests/ -v` with the project venv activated
+1. **Run tests**: `scripts/run_tests.sh` (recommended; same as CI — hermetic env + per-file process isolation)
 2. **Test manually**: Run `hermes` and exercise the code path you changed
 3. **Check cross-platform impact**: If you touch file I/O, process management, or terminal handling, consider macOS, Linux, and WSL2
 4. **Keep PRs focused**: One logical change per PR. Don't mix a bug fix with a refactor with a new feature.
@@ -25,7 +25,7 @@ ENV PLAYWRIGHT_BROWSERS_PATH=/opt/hermes/.playwright
 # hermes process, the dashboard, and per-profile gateways.
 RUN apt-get update && \
    apt-get install -y --no-install-recommends \
-    ca-certificates curl python3 ripgrep ffmpeg gcc python3-dev libffi-dev procps git openssh-client docker-cli xz-utils && \
+    ca-certificates curl python3 python-is-python3 ripgrep ffmpeg gcc python3-dev libffi-dev procps git openssh-client docker-cli xz-utils && \
    rm -rf /var/lib/apt/lists/*

 # ---------- s6-overlay install ----------
@@ -187,6 +187,29 @@ RUN chmod -R a+rX /opt/hermes && \
 # this a fast (~1s) egg-link creation with no resolution or downloads.
 RUN uv pip install --no-cache-dir --no-deps -e "."

+# ---------- Bake build-time git revision ----------
+# .dockerignore excludes .git, so `git rev-parse HEAD` from inside the
+# container always returns nothing — meaning `hermes dump` reports
+# "(unknown)" and the startup banner drops its `· upstream <sha>` suffix.
+# That makes support triage from container bug reports impossible:
+# we can't tell which commit the user is actually running.
+#
+# Fix: write the commit SHA passed via the HERMES_GIT_SHA build-arg to
+# /opt/hermes/.hermes_build_sha at build time, and have
+# hermes_cli/build_info.py read it at runtime.  Both `hermes dump` and
+# banner.get_git_banner_state() try the baked SHA first, then fall back
+# to live `git rev-parse` for source installs (unchanged behaviour).
+#
+# The arg is optional — local `docker build` without --build-arg simply
+# omits the file, and the runtime falls back to live-git lookup.  CI
+# (.github/workflows/docker-publish.yml) passes ${{ github.sha }} so
+# every published image has it.
+ARG HERMES_GIT_SHA=
+RUN if [ -n "${HERMES_GIT_SHA}" ]; then \
+        printf '%s\n' "${HERMES_GIT_SHA}" > /opt/hermes/.hermes_build_sha && \
+        chown hermes:hermes /opt/hermes/.hermes_build_sha; \
+    fi
+
 # ---------- s6-overlay service wiring ----------
 # Static services declared at build time: main-hermes + dashboard.
 # Per-profile gateway services are registered dynamically at runtime by
@@ -213,13 +236,32 @@ COPY --chmod=0755 docker/cont-init.d/02-reconcile-profiles /etc/cont-init.d/02-r
 # ---------- Runtime ----------
 ENV HERMES_WEB_DIST=/opt/hermes/hermes_cli/web_dist
 ENV HERMES_HOME=/opt/data
+
+# `docker exec` privilege-drop shim. When operators run
+# `docker exec <c> hermes ...` they default to root, and any file the
+# command writes under $HERMES_HOME (auth.json, .env, config.yaml) ends
+# up root-owned and unreadable to the supervised gateway (UID 10000).
+# The shim lives at /opt/hermes/bin/hermes, sits earliest on PATH, and
+# transparently re-exec's the real venv binary via `s6-setuidgid hermes`
+# when invoked as root. Non-root callers (supervised processes,
+# `--user hermes`, etc.) hit the short-circuit path with no overhead.
+# Recursion is impossible because the shim exec's the venv binary by
+# absolute path (/opt/hermes/.venv/bin/hermes). See the shim source for
+# the opt-out env var (HERMES_DOCKER_EXEC_AS_ROOT=1).
+COPY --chmod=0755 docker/hermes-exec-shim.sh /opt/hermes/bin/hermes
+
 # Pre-s6 entrypoint.sh did `source .venv/bin/activate` which exported
 # the venv bin onto PATH; Architecture B's main-wrapper.sh does the
 # same for the container's main process, but `docker exec` and our
 # cont-init.d scripts don't pass through the wrapper. Expose the venv
 # bin globally so `docker exec <container> hermes ...` and any
 # subprocess that doesn't activate the venv first still find hermes.
-ENV PATH="/opt/hermes/.venv/bin:/opt/data/.local/bin:${PATH}"
+#
+# /opt/hermes/bin is prepended ahead of the venv so the privilege-drop
+# shim wins PATH resolution. The shim's last act is to exec the venv
+# binary by absolute path, so this PATH ordering is transparent to
+# every other consumer.
+ENV PATH="/opt/hermes/bin:/opt/hermes/.venv/bin:/opt/data/.local/bin:${PATH}"
 RUN mkdir -p /opt/data
 VOLUME [ "/opt/data" ]

@@ -179,7 +179,7 @@ curl -LsSf https://astral.sh/uv/install.sh | sh
 uv venv venv --python 3.11
 source venv/bin/activate
 uv pip install -e ".[all,dev]"
-python -m pytest tests/ -q
+scripts/run_tests.sh
 ```

 ---
@@ -3,73 +3,75 @@
 **Release Date:** May 16, 2026
 **Since v0.13.0:** 808 commits · 633 merged PRs · 1393 files changed · 165,061 insertions · 545 issues closed (12 P0, 50 P1) · 215 community contributors (including co-authors)

-> The Foundation Release — Hermes Agent installs and runs anywhere now. Native Windows ships in early beta with a full PowerShell installer story, a `pip install hermes-agent` wheel lands on PyPI, lazy-deps reshape what `pip install hermes-agent` actually pulls down, the supply-chain checker scans every install/upgrade for unsafe versions, and a new OpenAI-compatible local proxy lets Codex / Aider / Cline talk to OAuth-only providers (Claude Pro, ChatGPT Pro, SuperGrok). The cold-start wave shaves ~19 seconds off `hermes` launch, browser-tool CDP calls run 180x faster, and `hermes tools` All-Platforms drops from 14s to under 1.5s. Two new messaging platforms (LINE and SimpleX Chat) and a Microsoft Graph foundation (Teams pipeline + webhook adapter) land alongside `/handoff` that finally transfers sessions live, `vision_analyze` passing pixels through to vision-capable models, `x_search` as a first-class tool, LSP semantic diagnostics on every `write_file` / `patch`, a unified pluggable `video_generate`, a `computer_use` cua-driver backend, cross-session 1-hour Claude prompt caching, a per-turn file-mutation verifier, plus 9 new optional skills. 50+ P1 closures, 12 P0 closures.
+> The Foundation Release — Hermes installs and runs anywhere, ships with the things you actually want to use, and stops shipping the things you don't. xAI Grok lands as a SuperGrok OAuth provider with grok-4.3 bumped to a 1M context window. A new OpenAI-compatible local proxy turns any OAuth-authed Hermes provider — Claude Pro, ChatGPT Pro, SuperGrok — into an endpoint that Codex / Aider / Cline / Continue can hit. `x_search` lands as a first-class X (Twitter) search tool with OAuth-or-API-key auth. The Microsoft Teams stack is wired end-to-end (Graph auth + webhook listener + pipeline runtime + outbound delivery). A debloating wave makes installs dramatically lighter — heavyweight backends now lazy-install on first use, the `[all]` extras drop everything covered by lazy-deps, and a tiered install falls back when a wheel rejects on your platform. `pip install hermes-agent` works from PyPI. The cold-start wave shaves ~19 seconds off `hermes` launch. Browser CDP calls are 180x faster. Two new messaging platforms (LINE + SimpleX Chat) bring the total to 22. Cross-session 1-hour Claude prompt caching, `/handoff` that actually transfers sessions live, native button UI for `clarify` on Telegram and Discord, Discord channel history backfill, LSP semantic diagnostics on every write, a unified pluggable `video_generate`, a `computer_use` cua-driver backend that finally works with non-Anthropic providers, clickable URLs in any terminal, Zed ACP Registry integration via `uvx`, native Windows beta, 9 new optional skills, OpenRouter Pareto Code router, huggingface/skills as a trusted default tap. 12 P0 + 50 P1 closures.

 ---

 ## ✨ Highlights

- **Native Windows support (early beta)** — full PowerShell installer, native subprocess/PTY paths, taskkill-based process management, MinGit auto-install, Microsoft Store python stub detection, foreground Ctrl+C preservation, taskkill+ps2 fallback, npm prefix handling, and ~40 follow-up Windows-only fixes across CLI / gateway / TUI / curator / tools. Hermes finally runs natively on `cmd.exe` and PowerShell, no WSL required. ([#21561](https://github.com/NousResearch/hermes-agent/pull/21561), [#22130](https://github.com/NousResearch/hermes-agent/pull/22130), [#22752](https://github.com/NousResearch/hermes-agent/pull/22752), [#26618](https://github.com/NousResearch/hermes-agent/pull/26618), and many more)
+- **xAI Grok via SuperGrok OAuth — and grok-4.3 jumps to a 1M context window** — If you pay for SuperGrok, you can now use Grok inside Hermes by signing in with your xAI account — no API key, no separate billing. The wire-through also bumps grok-4.3 to a 1M token context window, so you can drop whole codebases or research corpora into a single prompt. Includes proper handling for entitlement errors and an SSH-to-tunnel docs page for when you're SSH'd into a remote box and need to complete the OAuth flow. ([#26534](https://github.com/NousResearch/hermes-agent/pull/26534), [#26664](https://github.com/NousResearch/hermes-agent/pull/26664), [#26644](https://github.com/NousResearch/hermes-agent/pull/26644), [#26592](https://github.com/NousResearch/hermes-agent/pull/26592))

- **`pip install hermes-agent && hermes`** — Hermes Agent is now a real PyPI package. One command, no clone, no git, no shell installer. Wheel includes the Ink TUI bundle and shell launcher. (salvage of [#26350](https://github.com/NousResearch/hermes-agent/pull/26350)) ([#26593](https://github.com/NousResearch/hermes-agent/pull/26593))
+- **OpenAI-compatible local proxy for OAuth providers** — Run `hermes proxy` and you get a `http://localhost:port` endpoint that speaks the OpenAI API but is backed by whichever OAuth provider you're signed into — Claude Pro, ChatGPT Pro, SuperGrok. Now any tool that expects an OpenAI-compatible endpoint (Codex CLI, Aider, Cline, Continue, your custom scripts) just works with your existing subscription, no API key required. One subscription, every tool. ([#25969](https://github.com/NousResearch/hermes-agent/pull/25969))

- **Cold-start performance wave — ~19s off `hermes` launch** — skills cache, lazy Feishu import, no Nous HTTP at startup, plus PEP-562 lazy adapter imports (QQ, Yuanbao, Teams, Google Chat), deferred `fal_client` / `google-cloud` / `httpx` loads, models.dev disk-cache-first lookup, parallel doctor API checks, eager-skip plugin discovery on built-in subcommands, `hermes tools` All-Platforms drops from 14s to <1.5s, welcome banner skipped on `chat -q`. ([#22138](https://github.com/NousResearch/hermes-agent/pull/22138), [#22120](https://github.com/NousResearch/hermes-agent/pull/22120), [#22681](https://github.com/NousResearch/hermes-agent/pull/22681), [#22790](https://github.com/NousResearch/hermes-agent/pull/22790), [#22808](https://github.com/NousResearch/hermes-agent/pull/22808), [#22831](https://github.com/NousResearch/hermes-agent/pull/22831), [#22859](https://github.com/NousResearch/hermes-agent/pull/22859), [#22904](https://github.com/NousResearch/hermes-agent/pull/22904), [#22766](https://github.com/NousResearch/hermes-agent/pull/22766), [#25341](https://github.com/NousResearch/hermes-agent/pull/25341))
+- **`x_search` — first-class X (Twitter) search tool** — The agent can now search X directly without installing a skill or wiring up a custom integration. Search the timeline, find threads, surface specific posts — straight from the chat. Auth with either your X OAuth login or an API key, whichever you have. ([#26763](https://github.com/NousResearch/hermes-agent/pull/26763))

- **180x faster `browser_console` evaluations** — routed through the supervisor's persistent CDP WebSocket instead of spawning a fresh DevTools session per call. Real-world page interactions feel instant. ([#23226](https://github.com/NousResearch/hermes-agent/pull/23226))
+- **Microsoft Teams — end-to-end** — Hermes can now read messages from Teams and post back. The full Microsoft Graph stack lands together: auth + client foundation, a webhook listener that receives Teams events, a pipeline plugin runtime, and outbound delivery. Wire up the bot once, then chat to your agent from any Teams channel, DM, or group. (salvages of #21408–#21411) ([#21922](https://github.com/NousResearch/hermes-agent/pull/21922), [#21969](https://github.com/NousResearch/hermes-agent/pull/21969), [#22007](https://github.com/NousResearch/hermes-agent/pull/22007), [#22024](https://github.com/NousResearch/hermes-agent/pull/22024))

- **Supply-chain advisory checker + lazy-deps framework + tiered install fallback** — every `pip install` / `hermes update` scans dependencies against an advisory list, lazy-deps replace heavy import-time loads with first-use installs, and the installer falls back through extras tiers when a wheel rejects on the target platform. ([#24220](https://github.com/NousResearch/hermes-agent/pull/24220))
+- **Debloating wave — lighter installs, less you don't use** — A clean `pip install hermes-agent` used to pull down everything: every messaging adapter SDK, every image-gen SDK, every voice/TTS provider, whether you used them or not. Now those heavy backends (Slack / Matrix / Feishu / DingTalk adapters, hindsight client, codex app-server, Pixverse / Camofox / image-gen SDKs, voice/TTS providers) install automatically the first time you actually use them. The `[all]` extras drop everything covered by lazy-deps, the installer falls back through tiers when a wheel doesn't fit your platform, and a supply-chain advisory checker scans every install for unsafe versions. Faster installs, smaller disk footprint, fewer transitive vulnerabilities. ([#24220](https://github.com/NousResearch/hermes-agent/pull/24220), [#24515](https://github.com/NousResearch/hermes-agent/pull/24515), [#25014](https://github.com/NousResearch/hermes-agent/pull/25014), [#25038](https://github.com/NousResearch/hermes-agent/pull/25038), [#25766](https://github.com/NousResearch/hermes-agent/pull/25766), [#21818](https://github.com/NousResearch/hermes-agent/pull/21818))

- **OpenAI-compatible local proxy** — `hermes proxy` exposes any OAuth-authed provider (Claude Pro, ChatGPT Pro, SuperGrok) as an OpenAI-compatible endpoint that Codex / Aider / Cline / VS Code Continue can hit. Your subscription, your tools. ([#25969](https://github.com/NousResearch/hermes-agent/pull/25969))
+- **`pip install hermes-agent && hermes`** — Hermes Agent is now a real PyPI package. No more cloning the repo or running shell installers — one pip command and you're running. The wheel ships with the Ink TUI bundle and the shell launcher, so the full experience comes out of the box. (salvage of [#26350](https://github.com/NousResearch/hermes-agent/pull/26350)) ([#26593](https://github.com/NousResearch/hermes-agent/pull/26593), [#26148](https://github.com/NousResearch/hermes-agent/pull/26148))

- **Cross-session 1-hour Claude prompt cache** — Anthropic / OpenRouter / Nous Portal now share a 1h prefix cache across sessions for Claude models. Fast resume, fast `/new`, lower cost on repeat work. ([#23828](https://github.com/NousResearch/hermes-agent/pull/23828))
+- **Cross-session 1h Claude prompt cache** — When you use Claude through Anthropic, OpenRouter, or Nous Portal, the prompt prefix (system prompt, skills, memory) now caches for an hour across sessions. Start a `/new` session and the first response comes back faster and cheaper because the cache is still warm from your last session. Background memory review hits the cache too, so it's not paying full price every turn. ([#23828](https://github.com/NousResearch/hermes-agent/pull/23828), [#25434](https://github.com/NousResearch/hermes-agent/pull/25434), [#24778](https://github.com/NousResearch/hermes-agent/pull/24778))

- **Two new messaging platforms — LINE + SimpleX Chat** — LINE Messaging API lands as a first-class platform, SimpleX Chat salvages #2558 onto the modern adapter spec. Hermes is now on 22 platforms. ([#23197](https://github.com/NousResearch/hermes-agent/pull/23197), [#26232](https://github.com/NousResearch/hermes-agent/pull/26232))
+- **180x faster `browser_console` evaluations** — When the agent uses the browser tool to inspect a page or run JavaScript, those calls now share one persistent connection to Chrome instead of spinning up a new DevTools session every time. The difference is huge: things that used to take a couple of seconds per call return in milliseconds. Real-world page interactions feel instant. ([#23226](https://github.com/NousResearch/hermes-agent/pull/23226))

- **Microsoft Graph foundation — Teams pipeline + webhook adapter** — `msgraph` auth/client foundation, webhook listener platform, Teams pipeline plugin runtime, and Teams outbound delivery via the existing adapter — Hermes can now read and post to Teams. (salvages of #21408–#21411) ([#21922](https://github.com/NousResearch/hermes-agent/pull/21922), [#21969](https://github.com/NousResearch/hermes-agent/pull/21969), [#22007](https://github.com/NousResearch/hermes-agent/pull/22007), [#22024](https://github.com/NousResearch/hermes-agent/pull/22024))
+- **Cold-start performance wave — ~19 seconds off `hermes` launch** — Running `hermes` used to make you wait through a chunk of import overhead and network calls before you saw a prompt. Now the launch path is mostly deferred: heavy adapters only load when you use them, model catalogs come from disk cache first, doctor checks run in parallel, and `chat -q` skips the welcome banner entirely. The `hermes tools` All-Platforms screen alone dropped from 14 seconds to under 1.5 seconds. ([#22138](https://github.com/NousResearch/hermes-agent/pull/22138), [#22120](https://github.com/NousResearch/hermes-agent/pull/22120), [#22681](https://github.com/NousResearch/hermes-agent/pull/22681), [#22790](https://github.com/NousResearch/hermes-agent/pull/22790), [#22808](https://github.com/NousResearch/hermes-agent/pull/22808), [#22831](https://github.com/NousResearch/hermes-agent/pull/22831), [#22859](https://github.com/NousResearch/hermes-agent/pull/22859), [#22904](https://github.com/NousResearch/hermes-agent/pull/22904), [#22766](https://github.com/NousResearch/hermes-agent/pull/22766), [#25341](https://github.com/NousResearch/hermes-agent/pull/25341))

- **`/handoff` actually transfers the session live** — the agent's active session moves to a different model / persona / profile mid-conversation, with messages, tool history, and context preserved. ([#23395](https://github.com/NousResearch/hermes-agent/pull/23395))
+- **Two new messaging platforms — LINE + SimpleX Chat** — LINE is huge in Japan, Korea, and Taiwan, and now Hermes runs natively on the LINE Messaging API. SimpleX Chat is the privacy-focused decentralized messenger with no user IDs — also wired up as a first-class platform. That brings Hermes to 22 messaging platforms total, so wherever you and your team chat, the agent can be there. ([#23197](https://github.com/NousResearch/hermes-agent/pull/23197), [#26232](https://github.com/NousResearch/hermes-agent/pull/26232))

- **`x_search` — first-class X (Twitter) search tool** — gated tool with OAuth-or-API-key auth, no skill needed to query the timeline. ([#26763](https://github.com/NousResearch/hermes-agent/pull/26763))
+- **`/handoff` actually transfers the session live** — Switching models or personalities mid-conversation used to mean losing context or starting over. Now `/handoff` moves your active session — every message, every tool call, every piece of context — to the target model, persona, or profile, live, without dropping anything. Mid-debugging hand off from a fast model to a deep-reasoning one, or pass a session between profiles for different parts of a task. ([#23395](https://github.com/NousResearch/hermes-agent/pull/23395))

- **`vision_analyze` returns pixels to vision-capable models** — when the active model can see, `vision_analyze` now hands the image straight through instead of falling back to a text description. ([#22955](https://github.com/NousResearch/hermes-agent/pull/22955))
+- **Native button UI for `clarify` on Telegram and Discord** — When the agent uses the `clarify` tool to ask you a multiple-choice question, it now shows real platform-native buttons on Telegram and Discord instead of asking you to type back the option number. Tap the button, the agent gets your answer. Especially nice on mobile. ([#24199](https://github.com/NousResearch/hermes-agent/pull/24199), [#25485](https://github.com/NousResearch/hermes-agent/pull/25485))

- **LSP semantic diagnostics on every write** — `write_file` and `patch` now run real language-server diagnostics on the post-edit file (delta-only) and surface real errors before they ship downstream. ([#24168](https://github.com/NousResearch/hermes-agent/pull/24168), [#25978](https://github.com/NousResearch/hermes-agent/pull/25978))
+- **Discord channel history backfill (default on)** — When Hermes joins a Discord channel or thread for the first time, it now reads the recent message history so it knows what's been said before it responds. No more "what are we talking about?" — the agent has the context that's already on screen for everyone else. ([#25984](https://github.com/NousResearch/hermes-agent/pull/25984))

- **Per-turn file-mutation verifier footer** — after every turn that wrote files, the agent gets a verifier footer summarizing what actually changed on disk — catches silent overwrites and "wrote it but it didn't land" bugs. ([#24498](https://github.com/NousResearch/hermes-agent/pull/24498))
+- **`vision_analyze` returns pixels to vision-capable models** — When you point the agent at an image with `vision_analyze` and the active model can actually see (GPT-5, Claude, Gemini, Grok-vision), Hermes now passes the raw pixels straight to the model instead of converting them to a text description first. You get the model's actual visual reasoning instead of a degraded text-summary round-trip. ([#22955](https://github.com/NousResearch/hermes-agent/pull/22955))

- **Unified `video_generate` with pluggable provider backends** — single tool, any backend. Drop in a new video provider as a plugin, no core changes. ([#25126](https://github.com/NousResearch/hermes-agent/pull/25126))
+- **Per-turn file-mutation verifier footer** — After every turn that wrote or edited files, the agent now gets a short footer summarizing exactly what changed on disk — the file paths, the line counts, the actual delta. That means the agent catches its own mistakes when a write didn't land or got silently overwritten, instead of confidently telling you "I added the function" when the file wasn't actually saved. ([#24498](https://github.com/NousResearch/hermes-agent/pull/24498))

- **`computer_use` cua-driver backend** — proper focus-safe ops, non-Anthropic provider support, refresh on `hermes update`. Computer-use is no longer locked to a single SDK. (re-salvage of #16936) ([#21967](https://github.com/NousResearch/hermes-agent/pull/21967), [#24063](https://github.com/NousResearch/hermes-agent/pull/24063))
+- **LSP semantic diagnostics on every write** — When the agent uses `write_file` or `patch`, Hermes now runs a real language server against the edited file and surfaces any new errors back to the agent before the next turn. Type errors, undefined symbols, missing imports — caught immediately. Goes way beyond v0.13.0's basic Python/JSON/YAML/TOML linting because it's actual semantic analysis. ([#24168](https://github.com/NousResearch/hermes-agent/pull/24168), [#25978](https://github.com/NousResearch/hermes-agent/pull/25978))

- **xAI Grok OAuth provider — SuperGrok via subscription** — sign in with your xAI account, talk to Grok models from Hermes. ([#26534](https://github.com/NousResearch/hermes-agent/pull/26534))
+- **Unified `video_generate` with pluggable provider backends** — One tool, any video model. Hermes ships with the obvious backends already, but you can drop in a new video provider as a plugin without touching core. So when a new video model lands next month, it can be a one-file plugin instead of a fork. ([#25126](https://github.com/NousResearch/hermes-agent/pull/25126))

- **Clarify with buttons — native inline keyboards on Telegram + Discord** — the `clarify` tool renders multi-choice prompts as platform-native buttons instead of typed responses. ([#24199](https://github.com/NousResearch/hermes-agent/pull/24199), [#25485](https://github.com/NousResearch/hermes-agent/pull/25485))
+- **`computer_use` cua-driver backend — works with non-Anthropic models now** — Computer-use (the agent controlling your mouse and keyboard to drive GUI apps) used to be locked to Anthropic's SDK. The new cua-driver backend works with non-Anthropic providers too, has proper focus-safe operations, and refreshes itself on `hermes update`. Now any vision-capable model can drive your desktop. (re-salvage of #16936) ([#21967](https://github.com/NousResearch/hermes-agent/pull/21967), [#24063](https://github.com/NousResearch/hermes-agent/pull/24063))

- **Discord channel history backfill (default on)** — Hermes reads recent channel history when joining a thread so it actually knows what's been said. ([#25984](https://github.com/NousResearch/hermes-agent/pull/25984))
+- **Clickable URLs in any terminal** — Links in agent output are now real OSC8 hyperlinks with hover-highlight in any terminal that supports them. Click to open in your browser — no more copy-paste-trim of long URLs from the transcript. Just works in iTerm2, Kitty, Ghostty, modern Windows Terminal, etc. (@OutThisLife) ([#25071](https://github.com/NousResearch/hermes-agent/pull/25071), [#24013](https://github.com/NousResearch/hermes-agent/pull/24013))

- **Watchers skill — RSS / HTTP JSON / GitHub polling via cron `no_agent` mode** — skill recipes that wire change-detection sources directly into cron's script-only watchdog mode. ([#21881](https://github.com/NousResearch/hermes-agent/pull/21881))
+- **Zed ACP Registry — `uvx` install in one click** — Hermes is now listed in Zed's Agent Client Protocol registry, so Zed users can install it with one click. The install path uses `uvx` so there's no npm dependency. `hermes acp --setup-browser` bootstraps the browser tools for registry-driven installs. (salvage of [#25908](https://github.com/NousResearch/hermes-agent/pull/25908)) ([#26079](https://github.com/NousResearch/hermes-agent/pull/26079), [#26120](https://github.com/NousResearch/hermes-agent/pull/26120), [#26234](https://github.com/NousResearch/hermes-agent/pull/26234))

- **Zed ACP Registry integration + uvx distribution** — Hermes is in the Zed registry, installable via `uvx` (no npm). Plus `hermes acp --setup-browser` bootstraps browser tools for registry installs. (salvage of [#25908](https://github.com/NousResearch/hermes-agent/pull/25908)) ([#26079](https://github.com/NousResearch/hermes-agent/pull/26079), [#26120](https://github.com/NousResearch/hermes-agent/pull/26120), [#26234](https://github.com/NousResearch/hermes-agent/pull/26234))
+- **OpenRouter Pareto Code router with `min_coding_score` knob** — OpenRouter's "Pareto" router automatically picks the cheapest model that meets a minimum quality bar. The new `min_coding_score` config lets you set that bar for coding tasks specifically — Hermes routes to the most affordable model that's at least that good at code. Stop paying for top-tier models when a mid-tier one would do. ([#22838](https://github.com/NousResearch/hermes-agent/pull/22838))

- **OpenRouter Pareto Code router** — wire a new OpenRouter router with `min_coding_score` knob. Pick the cheapest model that meets your quality bar. ([#22838](https://github.com/NousResearch/hermes-agent/pull/22838))
+- **NovitaAI as a new model provider** — NovitaAI joins the provider lineup, giving you another option for open-source model hosting (Llama, Qwen, DeepSeek, etc.) with their pricing and rate limits. (salvage #7219) (@kshitijk4poor) ([#25507](https://github.com/NousResearch/hermes-agent/pull/25507))

- **Optional codex app-server runtime for OpenAI/Codex models** — drives the OpenAI Codex CLI under the hood for OpenAI/Codex paths, with session reuse, wedge retirement, and OAuth refresh classification. ([#24182](https://github.com/NousResearch/hermes-agent/pull/24182), [#25769](https://github.com/NousResearch/hermes-agent/pull/25769))
+- **Codex app-server runtime for OpenAI/Codex models** — An optional runtime that drives OpenAI's Codex CLI under the hood when you're using OpenAI or Codex paths. You get session reuse, automatic retirement of wedged sessions, and proper OAuth refresh classification — the kind of plumbing that makes long agentic runs not fall over. ([#24182](https://github.com/NousResearch/hermes-agent/pull/24182), [#25769](https://github.com/NousResearch/hermes-agent/pull/25769))

- **`hermes-skills/huggingface` as a trusted default tap** — community skills index from huggingface.co/skills is available by default in the Skills Hub. ([#26219](https://github.com/NousResearch/hermes-agent/pull/26219))
+- **`huggingface/skills` as a trusted default tap** — The community skills index hosted at huggingface.co/skills is now wired into the Skills Hub by default. So when somebody publishes a useful skill there, you can install it from your own `hermes skills` browser without any extra config. (closes #2549) ([#26219](https://github.com/NousResearch/hermes-agent/pull/26219))

- **9 new optional skills** — Hyperliquid (perp/spot trading via SDK + REST) (@kshitijk4poor & Hermes), Yahoo Finance market data, api-testing (REST/GraphQL debug), unified EVM multi-chain skill (folds #25291 + #2010 + base/), darwinian-evolver, osint-investigation (closes #355), pinggy-tunnel, watchers (RSS/HTTP/GitHub via cron), Notion overhaul for the Developer Platform (May 2026). ([#23582](https://github.com/NousResearch/hermes-agent/pull/23582), [#23583](https://github.com/NousResearch/hermes-agent/pull/23583), [#23590](https://github.com/NousResearch/hermes-agent/pull/23590), [#25299](https://github.com/NousResearch/hermes-agent/pull/25299), [#26760](https://github.com/NousResearch/hermes-agent/pull/26760), [#26729](https://github.com/NousResearch/hermes-agent/pull/26729), [#26765](https://github.com/NousResearch/hermes-agent/pull/26765), [#21881](https://github.com/NousResearch/hermes-agent/pull/21881), [#26612](https://github.com/NousResearch/hermes-agent/pull/26612))
+- **9 new optional skills** — Hyperliquid (perp + spot trading via the SDK and REST API), Yahoo Finance (live market data, fundamentals, historicals), api-testing (REST + GraphQL debug recipes), unified EVM multi-chain (one skill covers Ethereum + L2s + Base), darwinian-evolver (evolutionary prompt/skill tuning), osint-investigation (OSINT recipes for people / domains / orgs), pinggy-tunnel (expose local services to the public internet), watchers (polls RSS / HTTP JSON / GitHub via cron `no_agent` mode for change detection), and a full Notion overhaul for the May 2026 Developer Platform. ([#23582](https://github.com/NousResearch/hermes-agent/pull/23582), [#23583](https://github.com/NousResearch/hermes-agent/pull/23583), [#23590](https://github.com/NousResearch/hermes-agent/pull/23590), [#25299](https://github.com/NousResearch/hermes-agent/pull/25299), [#26760](https://github.com/NousResearch/hermes-agent/pull/26760), [#26729](https://github.com/NousResearch/hermes-agent/pull/26729), [#26765](https://github.com/NousResearch/hermes-agent/pull/26765), [#21881](https://github.com/NousResearch/hermes-agent/pull/21881), [#26612](https://github.com/NousResearch/hermes-agent/pull/26612))

- **API server exposes run approval events** — long-running runs surface approval requests over the API stream, no more silent stalls. (salvage of [#20311](https://github.com/NousResearch/hermes-agent/pull/20311)) ([#21899](https://github.com/NousResearch/hermes-agent/pull/21899))
+- **API server exposes run approval events** — If you're driving Hermes programmatically through the HTTP API, long-running runs no longer silently hang when the agent hits an approval-required command. The approval request now surfaces on the API stream so your client can prompt the user and reply — no more silent stalls. (salvage of [#20311](https://github.com/NousResearch/hermes-agent/pull/20311)) ([#21899](https://github.com/NousResearch/hermes-agent/pull/21899))

- **`/subgoal` — user-added criteria appended to active `/goal`** — layer extra success criteria onto a running goal loop. The judge sees them in the prompt, no behavior change when subgoals are empty. ([#25449](https://github.com/NousResearch/hermes-agent/pull/25449))
+- **Plugins can run any LLM call via `ctx.llm` + replace built-in tools via `tool_override`** — If you're writing a Hermes plugin, you now get first-class access to make LLM calls through the active provider and credentials — no manual client wiring. The new `tool_override` flag lets a plugin swap out a built-in tool with its own implementation cleanly. Plugin authors get the same model-routing and auth plumbing the core agent uses. (closes #11049) ([#23194](https://github.com/NousResearch/hermes-agent/pull/23194), [#26759](https://github.com/NousResearch/hermes-agent/pull/26759))

- **Plugins can run any LLM call via `ctx.llm`** — plugins get a first-class hook to make their own LLM requests through the active provider/credentials, no manual wiring. Plus `tool_override` flag for replacing built-in tools. ([#23194](https://github.com/NousResearch/hermes-agent/pull/23194), [#26759](https://github.com/NousResearch/hermes-agent/pull/26759))
+- **Brave Search (free tier) + DuckDuckGo (DDGS) as web-search providers** — Two new free web-search backends join Tavily, SearXNG, and Exa. Brave Search has a generous free tier; DDGS is the DuckDuckGo scraper that needs no key at all. Pick whichever fits your budget and rate-limit needs. ([#21337](https://github.com/NousResearch/hermes-agent/pull/21337))

- **Brave Search (free tier) + DuckDuckGo (DDGS) as web-search providers** — two new free search backends alongside Tavily / SearXNG / Exa. ([#21337](https://github.com/NousResearch/hermes-agent/pull/21337))
+- **Sudo brute-force block + 3 dangerous-command bypasses closed + tool-error sanitization** — The approval gate now blocks `sudo -S` brute-force attempts and classifies stdin-fed or askpass-stripped sudo invocations as DANGEROUS. Three known bypasses of dangerous-command detection are closed (inspired by Claude Code's command-detection work). And tool error strings are now sanitized before being re-injected into the model context, so a malicious file or remote service can't pass instructions to your agent through error output. ([#23736](https://github.com/NousResearch/hermes-agent/pull/23736), [#26829](https://github.com/NousResearch/hermes-agent/pull/26829), [#26823](https://github.com/NousResearch/hermes-agent/pull/26823))

- **Sudo brute-force block + sudo-stdin/askpass DANGEROUS classification** — closes the `sudo -S` brute-force avenue; approval gates classify stdin-fed and askpass-stripped sudo invocations as dangerous. (salvages of #22194 + #21128) ([#23736](https://github.com/NousResearch/hermes-agent/pull/23736))
+- **`/subgoal` — user-added criteria appended to an active `/goal`** — When you've got a `/goal` running (the persistent Ralph-loop goal where the agent keeps going until criteria are met), you can now use `/subgoal <text>` to layer extra success criteria onto it mid-run. The judge factors your new criteria into the done-or-keep-going decision without restarting the loop. ([#25449](https://github.com/NousResearch/hermes-agent/pull/25449))

- **Provider rename — Alibaba Cloud → Qwen Cloud, picker reorder** — matches what the world calls it. Existing config keys still work. ([#24835](https://github.com/NousResearch/hermes-agent/pull/24835))
+- **Provider rename — Alibaba Cloud → Qwen Cloud** — The Alibaba Cloud provider is renamed to Qwen Cloud in the picker and config to match what the rest of the world calls it. Existing config keys still work — no breaking changes — but the UI matches the actual brand now. ([#24835](https://github.com/NousResearch/hermes-agent/pull/24835))
+
+- **Native Windows support (early beta)** — Hermes now runs natively on `cmd.exe` and PowerShell without WSL. A full PowerShell installer handles MinGit auto-install, Microsoft Store python stub detection, and the foreground Ctrl+C dance. There's still rough edges (this is the "early beta" stamp) — ~40 follow-up Windows-only fixes already landed in the window — but the basic loop works end-to-end on a clean Windows box. ([#21561](https://github.com/NousResearch/hermes-agent/pull/21561))


 ---
@@ -0,0 +1,651 @@
+# Hermes Agent v0.15.0 (v2026.5.28)
+
+**Release Date:** May 28, 2026
+**Since v0.14.0:** 1,302 commits · 747 merged PRs · 1,746 files changed · 282,712 insertions · 36,699 deletions · 560+ issues closed (15 P0, 65 P1, 19 security-tagged) · 321 community contributors (including co-authors)
+
+> **The Velocity Release.** Hermes gets dramatically faster — to start, to run, to ship work, and to grow. The 16,083-line `run_agent.py` collapses to 3,821 (-76%) across 14 cohesive `agent/*` modules. Kanban grew into a real multi-agent platform across 104 PRs — orchestrator auto-decomposition, swarm topology, scheduled tasks, worktree-per-task, per-task model overrides. The cold-start perf wave keeps going: another second shaved off launch, 47% fewer per-conversation function calls, `hermes --version` flipping the head-to-head benchmark against Codex CLI. `session_search` is 4,500× faster and free now. Promptware defense lands against Brainworm-class attacks. Bitwarden Secrets Manager replaces N per-provider API keys with one bootstrap token. Skill bundles let one slash command load a whole workflow. The Ink TUI gets a multi-session orchestrator. Two new image_gen providers (Krea 2 Medium + Large, FAL ported to plugin), the Nous-approved MCP catalog with an interactive picker, an OpenHands orchestration skill, ntfy as the 23rd messaging platform, and a deep xAI integration round (Web Search plugin, xai-oauth `hermes proxy` upstream, retired-May-15 model detection + `hermes migrate xai`, natural TTS speech-tag pauses, base_url leak guard, OpenAI-style execution guidance for Grok). 15 P0 + 65 P1 closures alongside.
+
+---
+
+## ✨ Highlights
+
+- **The Big Refactor — `run_agent.py` is no longer 16,000 lines** — The file at the heart of Hermes — the agent conversation loop — has been reduced from 16,083 lines to 3,821 (-76%), with the extracted code redistributed across 14 cohesive modules under `agent/`. Behavior is unchanged: every extraction keeps a thin forwarder on `AIAgent`, every test patch path still works, every external caller is compatible. The reason you care: future Hermes development moves faster, plugin authors can finally grep the codebase, and the file that took 90 seconds to load in your editor opens in a blink. ([#27248](https://github.com/NousResearch/hermes-agent/pull/27248))
+
+- **Kanban grew into a real multi-agent platform — 104 PRs end to end** — Triage auto-decomposes one task into a tree of sub-tasks. `hermes kanban swarm` creates a full Swarm v1 graph in one command — root, parallel workers, gated verifier, gated synthesizer, shared blackboard. Tasks support per-task model overrides (cheap models for boilerplate, expensive ones for hard sub-tasks), board-level default workdirs, per-task worktree paths and branches, scheduled start times, configurable claim TTL, retry fingerprinting, stale-task detection, respawn guards, and a drag-to-delete trash zone. Workers report through `/workers/active`, `/runs/{id}`, and `/inspect` endpoints. ([#27572](https://github.com/NousResearch/hermes-agent/pull/27572), [#28443](https://github.com/NousResearch/hermes-agent/pull/28443), [#28364](https://github.com/NousResearch/hermes-agent/pull/28364), [#28394](https://github.com/NousResearch/hermes-agent/pull/28394), [#28462](https://github.com/NousResearch/hermes-agent/pull/28462), [#28384](https://github.com/NousResearch/hermes-agent/pull/28384), [#28467](https://github.com/NousResearch/hermes-agent/pull/28467), [#28455](https://github.com/NousResearch/hermes-agent/pull/28455), [#28452](https://github.com/NousResearch/hermes-agent/pull/28452), [#28432](https://github.com/NousResearch/hermes-agent/pull/28432), [#28468](https://github.com/NousResearch/hermes-agent/pull/28468), [#28420](https://github.com/NousResearch/hermes-agent/pull/28420))
+
+- **Cold-start perf wave keeps going — another second saved, 47% fewer per-turn function calls** — Three new optimization rounds: defer `openai._base_client` import (-240ms / -17MB on every CLI invocation), hot-path optimizations cut 47% of per-conversation function calls (399k → 213k for 31-turn chat), defer compression-feasibility check (-170 to -290ms on every agent construction), adaptive subprocess polling (-195ms per tool call, 1+ second per turn). Termux cold start drops from 2.9s to 0.8s. `hermes --version` cold drops 63% (701ms → 258ms), flipping the head-to-head benchmark against Codex CLI from 5/11 wins to 6/11. ([#28864](https://github.com/NousResearch/hermes-agent/pull/28864), [#28866](https://github.com/NousResearch/hermes-agent/pull/28866), [#28957](https://github.com/NousResearch/hermes-agent/pull/28957), [#29006](https://github.com/NousResearch/hermes-agent/pull/29006), [#29419](https://github.com/NousResearch/hermes-agent/pull/29419), [#30121](https://github.com/NousResearch/hermes-agent/pull/30121), [#30609](https://github.com/NousResearch/hermes-agent/pull/30609), [#31968](https://github.com/NousResearch/hermes-agent/pull/31968))
+
+- **`session_search` rebuilt — no LLM, no cost, 4,500× faster** — The old `session_search` was an aux-LLM-powered tool that cost ~$0.30/call and took ~30 seconds to summarize three sessions, sometimes confabulating when the right session wasn't even in the FTS5 hit list. The new shape is one tool with three modes (discovery, scroll, browse) inferred from which args are set — no `mode` parameter, no aux-LLM, no config knob, no companion skill. Discovery is ~20ms instead of ~90s; scroll is ~1ms. Searching your past sessions for context is now free and instant. ([#27590](https://github.com/NousResearch/hermes-agent/pull/27590))
+
+- **Promptware defense — Brainworm-class attacks blocked at three chokepoints** — Inspired by recent Brainworm / Promptware Kill Chain research (Origin HQ, arxiv 2601.09625), Hermes now defends the context window against prompt-injection attacks that try to hijack the agent via tool output, recalled memory, or stored skills. Single source of truth (`tools/threat_patterns.py`) with ~15 new Brainworm/C2 patterns; recalled memory is scanned at load time; tool results get delimiter markers so a malicious file or remote service can't impersonate Hermes' own system content. Paired with a new `security-guidance` plugin that pattern-matches dangerous code writes. ([#32269](https://github.com/NousResearch/hermes-agent/pull/32269), [#33131](https://github.com/NousResearch/hermes-agent/pull/33131), [#9151](https://github.com/NousResearch/hermes-agent/pull/9151))
+
+- **Bitwarden Secrets Manager — one bootstrap token replaces every per-provider API key** — Stop keeping plaintext API keys in `~/.hermes/.env`. Install Bitwarden Secrets Manager (`bws` auto-installs lazily on first use), point Hermes at it with one bootstrap token (`BWS_ACCESS_TOKEN`), and every credential you need comes from Bitwarden at startup. Rotate a key in the Bitwarden web app and the rotation actually takes effect — Bitwarden defaults to source-of-truth so its values overwrite matching env vars on startup. Flip `secrets.bitwarden.override_existing: false` to invert. EU Cloud and self-hosted Bitwarden server URLs supported. Detected credentials are now labeled with their source so you can see at a glance which keys came from Bitwarden vs. the local env. ([#30035](https://github.com/NousResearch/hermes-agent/pull/30035), [#31378](https://github.com/NousResearch/hermes-agent/pull/31378), [#30364](https://github.com/NousResearch/hermes-agent/pull/30364))
+
+- **ntfy as the 23rd messaging platform — push notifications without an account** — ntfy is the self-hostable push-notification service with no signup, no API key, just a topic URL. Hermes now adapts to it as a platform plugin (zero edits to core), so your agent can send you push notifications from any cron job, kanban task completion, or chat `send_message` — to your phone, your watch, your desktop, your homelab. (salvages [#30625](https://github.com/NousResearch/hermes-agent/pull/30625) → originally [#4043](https://github.com/NousResearch/hermes-agent/pull/4043)) ([#30867](https://github.com/NousResearch/hermes-agent/pull/30867))
+
+- **Skill bundles — `/<name>` loads multiple skills at once** — A skill bundle is a named group of skills that loads them all together with one slash command. Set up your "writing day" bundle (humanizer + ideation + obsidian + youtube-content) and `/writing-day` activates all four for the session. Skills Hub now has health checks, a freshness badge, and a watchdog cron. Three new optional skills land: `code-wiki` (Karpathy's LLM-Wiki, persistent indexed dev wiki), `openhands` (delegate to OpenHands for parallel coding agents), and `web-pentest` (OWASP-style web pentest recipes). ([#28373](https://github.com/NousResearch/hermes-agent/pull/28373), [#32345](https://github.com/NousResearch/hermes-agent/pull/32345), [#32240](https://github.com/NousResearch/hermes-agent/pull/32240), [#32261](https://github.com/NousResearch/hermes-agent/pull/32261), [#32265](https://github.com/NousResearch/hermes-agent/pull/32265))
+
+- **TUI session orchestrator — multiple live sessions in one TUI window** — The Ink TUI gained an active-session switcher overlay. List, switch between, refresh, and close multiple live process-local sessions without leaving the TUI; dispatch a new session with a session-scoped model picker. Plus a wave of TUI polish — mouse-tracking DEC mode presets, scrollback preservation across branches and termux, slash-dropdown fixes, x.com link rendering, and CJK / IME input rendering improvements. (salvages [#27642](https://github.com/NousResearch/hermes-agent/pull/27642)) ([#32980](https://github.com/NousResearch/hermes-agent/pull/32980), [#30084](https://github.com/NousResearch/hermes-agent/pull/30084))
+
+- **Two new image_gen providers — Krea 2 Medium + Large, FAL ported to plugin** — Krea joins the image_gen lineup as a built-in plugin: `Krea 2 Medium` ($0.03) and `Krea 2 Large` ($0.06), auto-discovered, selectable via `hermes tools` → Image Generation → Krea. Available through both the native Krea plugin and the FAL.ai catalog. The FAL.ai backend got pulled out of the monolithic image-generation tool into `plugins/image_gen/fal/`, completing the four-way architectural parity already established by web, browser, and video_gen — new image providers are now one file, not a fork. ([#33236](https://github.com/NousResearch/hermes-agent/pull/33236), [#30380](https://github.com/NousResearch/hermes-agent/pull/30380), [#33506](https://github.com/NousResearch/hermes-agent/pull/33506))
+
+- **Nous-approved MCP catalog with interactive picker** — A curated catalog of Nous-vetted MCP servers, mirroring the optional-skills shape. Run `hermes mcp` and you get an interactive picker; install with one keystroke, credentials prompted at install time and written to `~/.hermes/.env`. Ships with the n8n manifest first. Closes the discovery gap that left users hunting GitHub for trusted MCP servers. ([#30870](https://github.com/NousResearch/hermes-agent/pull/30870))
+
+- **OpenHands orchestration skill** — A new optional skill under `optional-skills/autonomous-ai-agents/openhands/` lets the agent delegate coding tasks to the OpenHands CLI alongside `claude-code`, `codex`, and `opencode`. OpenHands is the model-agnostic member of that family — any LiteLLM-supported provider works (OpenAI, Anthropic, OpenRouter, your own), so you can route a sub-task to the cheapest model that can finish it. Drop-in worker for kanban swarms and `/delegate` flows. (closes [#477](https://github.com/NousResearch/hermes-agent/issues/477)) ([#32261](https://github.com/NousResearch/hermes-agent/pull/32261))
+
+- **Deep xAI integration round — Web Search plugin, OAuth proxy upstream, May 15 retirement detection, natural TTS, security hardening** — Six interlocking xAI improvements:
+    - **xAI Web Search** lands as a `plugins/web/xai/` provider, slots alongside Brave / Tavily / Exa / SearXNG / DDGS / Firecrawl — reuses your existing Grok OAuth or `XAI_API_KEY` credentials, no new env vars. ([#29042](https://github.com/NousResearch/hermes-agent/pull/29042))
+    - **`hermes proxy` gains an xAI upstream** — your local OpenAI-compatible endpoint can now be backed by SuperGrok OAuth, no PKCE-refresh code to write in your client. ([#28356](https://github.com/NousResearch/hermes-agent/pull/28356))
+    - **May 15 model retirement detection** — `grok-4`, `grok-4-fast{,-reasoning,-non-reasoning}`, `grok-3`, `grok-code-fast-1`, `grok-imagine-image-pro` etc. are detected in doctor and chat startup, with `hermes migrate xai` to one-shot config migration to the supported model. No more silent 404s after the retirement date. ([#29277](https://github.com/NousResearch/hermes-agent/pull/29277))
+    - **Opt-in `auto_speech_tags`** for xAI TTS — inserts light `[pause]` tags between paragraphs and sentences for more natural-sounding voice replies. Default OFF. ([#29376](https://github.com/NousResearch/hermes-agent/pull/29376))
+    - **`xai-oauth` `base_url` pinned to `x.ai` origin** — closes a silent credential-leak vector where `XAI_BASE_URL` could repoint OAuth-authenticated inference to an attacker-controlled host. ([#28952](https://github.com/NousResearch/hermes-agent/pull/28952))
+    - **OpenAI-style execution guidance applied to Grok models** — Grok and xai-oauth now get the same family-specific execution discipline block GPT/Codex have, so the model stops claiming completion without tool calls and stops suggesting workarounds instead of using existing tools. ([#27797](https://github.com/NousResearch/hermes-agent/pull/27797))
+    - Plus `x_search` degraded-results surfacing, tier-gated 403 with API-key fallback, PKCE `code_challenge` round-trip fix, dead-token quarantine on terminal refresh failure, MiniMax-style short-token refresh on per-request, and `WKE=unauthenticated` honor at both classifier sites. ([#29484](https://github.com/NousResearch/hermes-agent/pull/29484), [#28351](https://github.com/NousResearch/hermes-agent/pull/28351), [#27560](https://github.com/NousResearch/hermes-agent/pull/27560), [#28116](https://github.com/NousResearch/hermes-agent/pull/28116), [#30619](https://github.com/NousResearch/hermes-agent/pull/30619), [#30872](https://github.com/NousResearch/hermes-agent/pull/30872))
+
+---
+
+## 🏗️ Core Agent & Architecture
+
+### The Big Refactor — `run_agent.py` 16k → 3.8k
+
+- `run_agent.py` from 16,083 → 3,821 lines (-76%), extracted into 14 cohesive `agent/*` modules. `run_conversation` alone was 3,877 lines before the refactor. Every extraction keeps a thin forwarder on `AIAgent`, every test-patch path is preserved, every external caller stays compatible. ([#27248](https://github.com/NousResearch/hermes-agent/pull/27248))
+
+### Agent loop & conversation
+
+- Auxiliary task layered fallback (primary → chain → main agent → graceful fail) on capacity errors (402/429/connection). (salvages [#26811](https://github.com/NousResearch/hermes-agent/pull/26811) + [#26998](https://github.com/NousResearch/hermes-agent/pull/26998)) ([#27625](https://github.com/NousResearch/hermes-agent/pull/27625))
+- Buffer retry/fallback status; surface only on terminal failure (no more noisy "retrying..." spam in mid-run output). ([#33816](https://github.com/NousResearch/hermes-agent/pull/33816))
+- Host contract for external context engines — condenses 5 prior PRs into one extension surface. ([#33750](https://github.com/NousResearch/hermes-agent/pull/33750))
+- Fallback immediately on provider content-policy blocks. ([#33883](https://github.com/NousResearch/hermes-agent/pull/33883))
+- Re-pad `reasoning_content` on cross-provider fallback to require-side providers. (salvage [#33784](https://github.com/NousResearch/hermes-agent/pull/33784)) ([#33795](https://github.com/NousResearch/hermes-agent/pull/33795))
+- Per-turn tool-outcome verifier — patch tool gets indent preservation, CRLF preservation, per-file failure escalation. ([#32273](https://github.com/NousResearch/hermes-agent/pull/32273))
+- Single-knob native vision for custom-provider models. ([#29679](https://github.com/NousResearch/hermes-agent/pull/29679))
+- Background review fork isolated from external memory plugins. ([#27190](https://github.com/NousResearch/hermes-agent/pull/27190))
+- Background review inherits parent toolset config for `tools[]` cache parity. ([#29704](https://github.com/NousResearch/hermes-agent/pull/29704))
+- Recover from providers returning list-type tool content. ([#30259](https://github.com/NousResearch/hermes-agent/pull/30259))
+- Treat partial-stream stub responses as length truncation rather than clean stop. ([#30998](https://github.com/NousResearch/hermes-agent/pull/30998))
+- OpenAI execution guidance applied to xAI Grok / xai-oauth. ([#27797](https://github.com/NousResearch/hermes-agent/pull/27797))
+- ContextVars propagate to concurrent tool worker threads.
+- Preload `jiter` native parser. ([#33692](https://github.com/NousResearch/hermes-agent/pull/33692))
+- Expose context engine tools with saved toolsets. (salvage of [#31194](https://github.com/NousResearch/hermes-agent/pull/31194)) ([#33719](https://github.com/NousResearch/hermes-agent/pull/33719))
+
+### Sessions & memory
+
+- `session_search` rebuilt — single-shape (discovery + scroll + browse), no aux-LLM, ~20ms vs. ~90s. ([#27590](https://github.com/NousResearch/hermes-agent/pull/27590))
+- Salvage [#29182](https://github.com/NousResearch/hermes-agent/pull/29182) — opt-in JSON snapshot writer for sessions. ([#29278](https://github.com/NousResearch/hermes-agent/pull/29278))
+- Persist `platform_message_id` for recall across gateway restarts. ([#29449](https://github.com/NousResearch/hermes-agent/pull/29449))
+- Inline memory-context mentions stay visible in conversation. ([#28132](https://github.com/NousResearch/hermes-agent/pull/28132))
+- Recalled memory labeled informational, not authoritative. ([#28583](https://github.com/NousResearch/hermes-agent/pull/28583))
+- Memory + context-engine tool injection gated on `enabled_toolsets`. ([#30177](https://github.com/NousResearch/hermes-agent/pull/30177))
+- Guard against external drift in `MEMORY.md` / `USER.md`. ([#30877](https://github.com/NousResearch/hermes-agent/pull/30877))
+- Honcho runtime peer mapping — correctness follow-ups + setup wizard + docs. ([#30077](https://github.com/NousResearch/hermes-agent/pull/30077))
+- Periodic memory logging for leak detection. (salvage of [#17667](https://github.com/NousResearch/hermes-agent/pull/17667)) ([#27102](https://github.com/NousResearch/hermes-agent/pull/27102))
+
+### Codex / Responses-API maturation
+
+- TTFB watchdog for stalled Codex Responses streams. ([#32042](https://github.com/NousResearch/hermes-agent/pull/32042))
+- Actionable hint when stale-call detector fires on known silent-reject pattern. ([#32016](https://github.com/NousResearch/hermes-agent/pull/32016), [#33133](https://github.com/NousResearch/hermes-agent/pull/33133))
+- Drop SDK `responses.stream()` helper; consume events directly. ([#33042](https://github.com/NousResearch/hermes-agent/pull/33042))
+- Gracefully recover from `invalid_encrypted_content`. (salvage of [#10144](https://github.com/NousResearch/hermes-agent/pull/10144)) ([#33035](https://github.com/NousResearch/hermes-agent/pull/33035))
+- Recover Codex Responses streams with null output. ([#32963](https://github.com/NousResearch/hermes-agent/pull/32963), [#33390](https://github.com/NousResearch/hermes-agent/pull/33390))
+- Drop foreign-issuer reasoning and transient `rs_tmp` reasoning replay state. ([#33156](https://github.com/NousResearch/hermes-agent/pull/33156), [#33146](https://github.com/NousResearch/hermes-agent/pull/33146))
+- Codex 429 quota classified as rate-limit, not missing credentials. ([#33168](https://github.com/NousResearch/hermes-agent/pull/33168))
+- Codex chat path falls back to credential_pool when singleton is empty. ([#33189](https://github.com/NousResearch/hermes-agent/pull/33189))
+- Codex re-auth syncs credential_pool. ([#33164](https://github.com/NousResearch/hermes-agent/pull/33164))
+- Omit `tools` key when no tools registered. ([#33409](https://github.com/NousResearch/hermes-agent/pull/33409))
+- Parse Codex image-generation SSE directly. ([#32933](https://github.com/NousResearch/hermes-agent/pull/32933))
+
+---
+
+## 🎛️ Kanban — Multi-Agent Maturation Wave
+
+### Orchestration & dispatch
+
+- Orchestrator-driven auto-decomposition on triage. ([#27572](https://github.com/NousResearch/hermes-agent/pull/27572))
+- Kanban swarm topology helper — `hermes kanban swarm` creates a Swarm v1 graph (root + parallel workers + gated verifier + gated synthesizer + shared blackboard). (salvages [#26791](https://github.com/NousResearch/hermes-agent/pull/26791) by @Niraven) ([#28443](https://github.com/NousResearch/hermes-agent/pull/28443))
+- Dispatcher wires review agents from the review column. ([#28449](https://github.com/NousResearch/hermes-agent/pull/28449))
+- Stale-detection for running tasks in dispatcher. ([#28452](https://github.com/NousResearch/hermes-agent/pull/28452))
+- Respawn guard blocks repeat worker storms. ([#28455](https://github.com/NousResearch/hermes-agent/pull/28455))
+- Respawn guard defers `blocker_auth` instead of auto-blocking. ([#28683](https://github.com/NousResearch/hermes-agent/pull/28683))
+- Cross-profile cron jobs surface in dashboard. ([#28457](https://github.com/NousResearch/hermes-agent/pull/28457))
+- Worker visibility endpoints: `/workers/active`, `/runs/{id}`, `/inspect`. (salvages [#23761](https://github.com/NousResearch/hermes-agent/pull/23761) by @Interstellar-code) ([#28432](https://github.com/NousResearch/hermes-agent/pull/28432))
+
+### Task configuration & scheduling
+
+- Per-task model override. ([#28364](https://github.com/NousResearch/hermes-agent/pull/28364))
+- Board-level default workdir. ([#28394](https://github.com/NousResearch/hermes-agent/pull/28394))
+- Configurable worktree paths and branches. ([#28462](https://github.com/NousResearch/hermes-agent/pull/28462))
+- Scheduled task start times. ([#28384](https://github.com/NousResearch/hermes-agent/pull/28384))
+- Scheduled status for delayed follow-ups. ([#28467](https://github.com/NousResearch/hermes-agent/pull/28467))
+- Trimmed task comments. ([#28399](https://github.com/NousResearch/hermes-agent/pull/28399))
+- Initial-status for human-ops cards. ([#28414](https://github.com/NousResearch/hermes-agent/pull/28414))
+- `max_in_progress` config to cap concurrent running tasks. ([#28420](https://github.com/NousResearch/hermes-agent/pull/28420))
+- Filter tasks by workflow fields. ([#28454](https://github.com/NousResearch/hermes-agent/pull/28454))
+- `--sort` for `hermes kanban list`. ([#28427](https://github.com/NousResearch/hermes-agent/pull/28427))
+- Optional `board` parameter on all MCP tools. ([#28444](https://github.com/NousResearch/hermes-agent/pull/28444))
+- Stamp originating ACP session_id on tasks. ([#28447](https://github.com/NousResearch/hermes-agent/pull/28447))
+- `auto_promote_children` config toggle. ([#28344](https://github.com/NousResearch/hermes-agent/pull/28344))
+- `archive --rm` to hard-delete archived tasks. ([#28355](https://github.com/NousResearch/hermes-agent/pull/28355))
+- Promote dependents when parent is archived. ([#28372](https://github.com/NousResearch/hermes-agent/pull/28372))
+- Promote blocked tasks when parent dependencies complete. ([#28377](https://github.com/NousResearch/hermes-agent/pull/28377))
+- Demote ready children when parent is reopened. ([#28382](https://github.com/NousResearch/hermes-agent/pull/28382))
+- `promote` verb for manual `todo→ready` recovery + bulk `--ids`. (salvage [#29464](https://github.com/NousResearch/hermes-agent/pull/29464)) ([#31334](https://github.com/NousResearch/hermes-agent/pull/31334))
+
+### Dashboard
+
+- Drag-to-delete trash zone + bulk delete. ([#28468](https://github.com/NousResearch/hermes-agent/pull/28468))
+- Surface per-task `model_override` in show + tool output. ([#28442](https://github.com/NousResearch/hermes-agent/pull/28442))
+- Cross-profile notification delivery via `kanban.notification_sources`. ([#28395](https://github.com/NousResearch/hermes-agent/pull/28395))
+- Scratch-workspace deletion warning for users. ([#30949](https://github.com/NousResearch/hermes-agent/pull/30949))
+- Mobile dashboard UX polish. ([#28127](https://github.com/NousResearch/hermes-agent/pull/28127))
+
+### Reliability
+
+- Worker log retention configurable. ([#27867](https://github.com/NousResearch/hermes-agent/pull/27867))
+- Configurable claim TTL. ([#28392](https://github.com/NousResearch/hermes-agent/pull/28392))
+- Fingerprint crash errors to prevent fleet-wide retry exhaustion. ([#28380](https://github.com/NousResearch/hermes-agent/pull/28380))
+- Reset failure counters on `unblock_task`. ([#28379](https://github.com/NousResearch/hermes-agent/pull/28379))
+- Detect cycles in `decompose_triage_task` sibling-link pre-validation. ([#28088](https://github.com/NousResearch/hermes-agent/pull/28088))
+- Surface unusable triage auxiliary model (auto-decompose aware). ([#27871](https://github.com/NousResearch/hermes-agent/pull/27871))
+- Align failure diagnostics with retry limit. ([#27868](https://github.com/NousResearch/hermes-agent/pull/27868))
+- Align worker terminal timeout with task runtime. ([#27864](https://github.com/NousResearch/hermes-agent/pull/27864))
+- Auto-install bundled skills (kanban-worker) on init. ([#28368](https://github.com/NousResearch/hermes-agent/pull/28368))
+- Make legacy task migration idempotent. ([#28397](https://github.com/NousResearch/hermes-agent/pull/28397))
+- Serialize DB initialization. ([#28383](https://github.com/NousResearch/hermes-agent/pull/28383))
+- Persist worker session metadata on completion. ([#28387](https://github.com/NousResearch/hermes-agent/pull/28387))
+- Pass `accept-hooks` to worker chat subprocess. ([#28393](https://github.com/NousResearch/hermes-agent/pull/28393))
+- Preserve worker tools with restricted toolsets. ([#28396](https://github.com/NousResearch/hermes-agent/pull/28396))
+- Avoid unsafe Windows worker Hermes shim resolution. ([#28398](https://github.com/NousResearch/hermes-agent/pull/28398))
+- Sync slash subcommands with live parser. ([#28376](https://github.com/NousResearch/hermes-agent/pull/28376))
+- Show scheduled kanban tasks in dashboard. ([#28400](https://github.com/NousResearch/hermes-agent/pull/28400))
+- Assign single-task kanban decompositions. ([#28401](https://github.com/NousResearch/hermes-agent/pull/28401))
+- Configurable `max_tokens` for kanban specify. ([#28374](https://github.com/NousResearch/hermes-agent/pull/28374))
+- Per-job profile support for cron. ([#28124](https://github.com/NousResearch/hermes-agent/pull/28124))
+- Codex app-server: include every Kanban-pinned path in `writable_roots`. ([#28435](https://github.com/NousResearch/hermes-agent/pull/28435))
+- Cache kanban worker guidance at session init for prompt-cache reuse. ([#28425](https://github.com/NousResearch/hermes-agent/pull/28425))
+
+---
+
+## ⚡ Performance
+
+- `openai._base_client` import deferred — 240ms / 17MB off every CLI cold start. ([#28864](https://github.com/NousResearch/hermes-agent/pull/28864))
+- Agent-loop hot-path optimizations — 47% fewer per-conversation function calls (399k → 213k for 31-turn chat). ([#28866](https://github.com/NousResearch/hermes-agent/pull/28866))
+- Compression-feasibility check deferred — 170-290ms off every agent construction. ([#28957](https://github.com/NousResearch/hermes-agent/pull/28957))
+- Adaptive subprocess poll — ~195ms off every tool call, 1+ second per turn. ([#29006](https://github.com/NousResearch/hermes-agent/pull/29006))
+- Termux TUI cold start speedup. ([#29419](https://github.com/NousResearch/hermes-agent/pull/29419))
+- Termux non-TUI cold start speedup. (salvage [#29438](https://github.com/NousResearch/hermes-agent/pull/29438)) ([#30121](https://github.com/NousResearch/hermes-agent/pull/30121))
+- Termux fast-path version + deferred bare-prompt agent startup. ([#30609](https://github.com/NousResearch/hermes-agent/pull/30609))
+- Cut hermes `--version` wall time 63% — flips head-to-head vs Codex CLI. ([#31968](https://github.com/NousResearch/hermes-agent/pull/31968))
+- Date-only timestamp + loud gateway-DB roundtrip logging — improves prompt-cache hit rate. ([#27675](https://github.com/NousResearch/hermes-agent/pull/27675))
+- Cache kanban worker guidance at session init for prompt-cache reuse. ([#28425](https://github.com/NousResearch/hermes-agent/pull/28425))
+
+---
+
+## 🔧 Tool System
+
+### Tool surface
+
+- `patch`: indent preservation, CRLF preservation, per-file failure escalation. ([#32273](https://github.com/NousResearch/hermes-agent/pull/32273))
+- `terminal`: warn at call time when `background=true` runs silently. ([#31289](https://github.com/NousResearch/hermes-agent/pull/31289))
+- `terminal`: nudge homebrewed CI pollers at the tool surface. ([#33142](https://github.com/NousResearch/hermes-agent/pull/33142))
+- `x_search`: surface degraded results + validate dates. ([#29484](https://github.com/NousResearch/hermes-agent/pull/29484))
+- `x_search`: auto-enable toolset when xAI credentials are configured. ([#27376](https://github.com/NousResearch/hermes-agent/pull/27376))
+- `computer_use`: route SOM/vision captures via auxiliary.vision. ([#30126](https://github.com/NousResearch/hermes-agent/pull/30126))
+- `transcription`: reject symlinked audio inputs. ([#10082](https://github.com/NousResearch/hermes-agent/pull/10082))
+- TTS: prevent double `[pause]` in xAI auto speech tags. ([#32237](https://github.com/NousResearch/hermes-agent/pull/32237))
+- TTS: preserve native audio outside Telegram voice delivery. ([#28512](https://github.com/NousResearch/hermes-agent/pull/28512))
+- TTS: opt-in xAI `auto_speech_tags` speech-tag pauses for natural voice replies. ([#29376](https://github.com/NousResearch/hermes-agent/pull/29376))
+- Voice: chunk oversized CLI recordings. ([#30044](https://github.com/NousResearch/hermes-agent/pull/30044))
+- Voice: honor `PULSE_SERVER` / `PIPEWIRE_REMOTE` inside Docker. ([#22534](https://github.com/NousResearch/hermes-agent/pull/22534))
+
+### Browser
+
+- All cloud browser providers (Browserbase, Anchor, Camofox, Hyperbrowser, etc.) migrated to image_gen-style plugins. (salvages [#25580](https://github.com/NousResearch/hermes-agent/pull/25580)) ([#27403](https://github.com/NousResearch/hermes-agent/pull/27403))
+- Auto-launch Chromium-family browser for CDP. ([#29106](https://github.com/NousResearch/hermes-agent/pull/29106))
+- Docker: discover agent-browser Chromium binary at boot. ([#33184](https://github.com/NousResearch/hermes-agent/pull/33184))
+
+### Image generation
+
+- **Krea** provider plugin (Krea 2 Medium + Large). ([#33236](https://github.com/NousResearch/hermes-agent/pull/33236))
+- FAL backend ported to `plugins/image_gen/fal`. (salvage [#27966](https://github.com/NousResearch/hermes-agent/pull/27966)) ([#30380](https://github.com/NousResearch/hermes-agent/pull/30380))
+- Cache xAI ephemeral URL responses to disk. ([#31759](https://github.com/NousResearch/hermes-agent/pull/31759))
+
+### Web search
+
+- **xAI Web Search** as a provider plugin. ([#29042](https://github.com/NousResearch/hermes-agent/pull/29042))
+
+### MCP
+
+- **Nous-approved MCP catalog** with interactive picker. ([#30870](https://github.com/NousResearch/hermes-agent/pull/30870))
+- **TLS client certificate (mTLS) support** for HTTP and SSE MCP servers. ([#33721](https://github.com/NousResearch/hermes-agent/pull/33721))
+- Stdin paste-back fallback for headless OAuth flow. ([#32053](https://github.com/NousResearch/hermes-agent/pull/32053))
+- `skip` at paste prompt bypasses auth without disabling server. ([#32069](https://github.com/NousResearch/hermes-agent/pull/32069))
+- Registry-aware `mcp_` prefix on both ends of round-trip. ([#31700](https://github.com/NousResearch/hermes-agent/pull/31700))
+
+---
+
+## 🧩 Skills Ecosystem
+
+### Skills system
+
+- **Skill bundles** — `/<name>` loads multiple skills. ([#28373](https://github.com/NousResearch/hermes-agent/pull/28373))
+- Skills Hub: health checks, freshness badge, and a watchdog cron. ([#32345](https://github.com/NousResearch/hermes-agent/pull/32345))
+- Opt-in AST deep diagnostics on skill writes. (salvage of [#30918](https://github.com/NousResearch/hermes-agent/pull/30918)) ([#31198](https://github.com/NousResearch/hermes-agent/pull/31198))
+- Bundled/pinned skill protection in background-review prompts. ([#28338](https://github.com/NousResearch/hermes-agent/pull/28338))
+- Show user-modified skill names in bundled skill sync summary. ([#28671](https://github.com/NousResearch/hermes-agent/pull/28671))
+- Load symlinked skill slash commands. ([#27759](https://github.com/NousResearch/hermes-agent/pull/27759))
+- Deduplicate Skills Hub search results by identifier, not name. ([#29490](https://github.com/NousResearch/hermes-agent/pull/29490))
+
+### New skills
+
+- `openhands` — delegate-to-OpenHands orchestration skill (closes [#477](https://github.com/NousResearch/hermes-agent/issues/477)) ([#32261](https://github.com/NousResearch/hermes-agent/pull/32261))
+- `code-wiki` — persistent indexed dev wiki (closes [#486](https://github.com/NousResearch/hermes-agent/issues/486)) ([#32240](https://github.com/NousResearch/hermes-agent/pull/32240))
+- `web-pentest` — OWASP recipes (closes [#400](https://github.com/NousResearch/hermes-agent/issues/400)) ([#32265](https://github.com/NousResearch/hermes-agent/pull/32265))
+- `baoyu-article-illustrator` ([#28287](https://github.com/NousResearch/hermes-agent/pull/28287))
+
+---
+
+## ☁️ Providers
+
+### xAI deep integration
+
+- **xAI Web Search** as a `plugins/web/xai/` provider plugin. ([#29042](https://github.com/NousResearch/hermes-agent/pull/29042))
+- **`hermes proxy` xAI upstream** — OpenAI-compatible local proxy backed by xai-oauth. ([#28356](https://github.com/NousResearch/hermes-agent/pull/28356))
+- **May 15 model retirement detection + `hermes migrate xai`** for grok-4 / grok-3 / grok-code-fast-1 / grok-imagine-image-pro. ([#29277](https://github.com/NousResearch/hermes-agent/pull/29277))
+- **Opt-in `auto_speech_tags`** for natural xAI TTS voice replies. ([#29376](https://github.com/NousResearch/hermes-agent/pull/29376))
+- **xai-oauth base_url pinned to x.ai origin** — closes silent credential-leak vector. ([#28952](https://github.com/NousResearch/hermes-agent/pull/28952))
+- **OpenAI-style execution guidance** applied to Grok / xai-oauth models. ([#27797](https://github.com/NousResearch/hermes-agent/pull/27797))
+- xAI: detect retired May 15 models in doctor/chat startup. ([#29277](https://github.com/NousResearch/hermes-agent/pull/29277))
+- xAI: resolve Grok Build context for OAuth. ([#30579](https://github.com/NousResearch/hermes-agent/pull/30579))
+- xAI OAuth: tier-gated 403 with API-key fallback. ([#28351](https://github.com/NousResearch/hermes-agent/pull/28351))
+- xAI OAuth: PKCE `code_challenge` echo. ([#27560](https://github.com/NousResearch/hermes-agent/pull/27560))
+- xAI OAuth: quarantine dead tokens on terminal refresh failure. ([#28116](https://github.com/NousResearch/hermes-agent/pull/28116))
+- xAI OAuth: honor `WKE=unauthenticated` disambiguator at both classifier sites. ([#30872](https://github.com/NousResearch/hermes-agent/pull/30872))
+- xAI OAuth: accept bare-code manual paste (state=None). (closes [#26923](https://github.com/NousResearch/hermes-agent/issues/26923)) ([#33880](https://github.com/NousResearch/hermes-agent/pull/33880))
+- xAI OAuth: fall back to manual paste on loopback timeout. ([#33231](https://github.com/NousResearch/hermes-agent/pull/33231))
+- xAI proxy: handle 429 rate-limit responses in proxy retry path. ([#33743](https://github.com/NousResearch/hermes-agent/pull/33743))
+
+### Other providers
+
+- **OpenAI API as a first-class provider** (distinct from Codex runtime). ([#31898](https://github.com/NousResearch/hermes-agent/pull/31898))
+- **Microsoft Entra ID** auth for Azure Foundry (with 1M Anthropic-Messages beta preserved on Bearer). (salvages [#27509](https://github.com/NousResearch/hermes-agent/pull/27509), [#27022](https://github.com/NousResearch/hermes-agent/pull/27022)) ([#28101](https://github.com/NousResearch/hermes-agent/pull/28101), [#28084](https://github.com/NousResearch/hermes-agent/pull/28084))
+- **OpenRouter** sticky routing — `session_id` passed via `extra_body` so a long-running session keeps landing on the same upstream provider. (@Cybourgeoisie) ([#33939](https://github.com/NousResearch/hermes-agent/pull/33939))
+- Nous: JWT token for inference; stop replaying invalid Nous refresh tokens. (@rewbs) ([#27663](https://github.com/NousResearch/hermes-agent/pull/27663))
+- Nous Portal: one-shot setup, status CLI, and Nous-included markers. ([#30860](https://github.com/NousResearch/hermes-agent/pull/30860))
+- Anthropic adapter: extract 7 helpers from `convert_messages_to_anthropic`. (salvage [#27784](https://github.com/NousResearch/hermes-agent/pull/27784)) ([#30386](https://github.com/NousResearch/hermes-agent/pull/30386))
+- Catalog: add `qwen3.7-max` to Alibaba + Alibaba-Coding-Plan model lists. ([#33129](https://github.com/NousResearch/hermes-agent/pull/33129))
+- opencode-go: route `qwen3.7-max` via `anthropic_messages`. (@beardthelion) ([#32780](https://github.com/NousResearch/hermes-agent/pull/32780))
+- opencode-go: expose Kimi K2 + DeepSeek reasoning controls. ([#30845](https://github.com/NousResearch/hermes-agent/pull/30845))
+- Remove Vercel AI Gateway and Vercel Sandbox.
+- MiniMax OAuth: refresh short-lived access tokens per request. ([#30619](https://github.com/NousResearch/hermes-agent/pull/30619))
+- Codex OAuth: quarantine terminal refresh errors. ([#28118](https://github.com/NousResearch/hermes-agent/pull/28118))
+- Codex: drop dead model slugs that HTTP 400 on ChatGPT Pro. ([#33424](https://github.com/NousResearch/hermes-agent/pull/33424))
+- Codex: sync `manual:device_code` pool entries on re-auth. ([#33744](https://github.com/NousResearch/hermes-agent/pull/33744))
+- MiniMax OAuth: quarantine terminal refresh errors. ([#28119](https://github.com/NousResearch/hermes-agent/pull/28119))
+
+---
+
+## 🔑 Secrets
+
+- **Bitwarden Secrets Manager** integration with lazy `bws` install. ([#30035](https://github.com/NousResearch/hermes-agent/pull/30035))
+- Bitwarden: EU Cloud + self-hosted server URL support. ([#31378](https://github.com/NousResearch/hermes-agent/pull/31378))
+- Label detected credentials with their source (Bitwarden). ([#30364](https://github.com/NousResearch/hermes-agent/pull/30364))
+
+---
+
+## 📱 Messaging Platforms (Gateway)
+
+### Gateway core
+
+- **Deliverable mode** — agents ship artifacts as native uploads from any platform (Slack/Discord/Telegram/Teams/Email). ([#27813](https://github.com/NousResearch/hermes-agent/pull/27813))
+- `hermes send` — pipe any script's output to any messaging platform. (salvage of [#19631](https://github.com/NousResearch/hermes-agent/pull/19631)) ([#27188](https://github.com/NousResearch/hermes-agent/pull/27188))
+- Debounce queued text follow-ups during active sessions. (salvage of [#31235](https://github.com/NousResearch/hermes-agent/pull/31235)) ([#31341](https://github.com/NousResearch/hermes-agent/pull/31341))
+- Plugin-transformed final_response delivered through streaming gate. ([#31433](https://github.com/NousResearch/hermes-agent/pull/31433))
+- Refresh cached agent tools on `/reload-mcp`. ([#32815](https://github.com/NousResearch/hermes-agent/pull/32815))
+- Harden kanban + provider cleanup races on long-running workloads. ([#29479](https://github.com/NousResearch/hermes-agent/pull/29479))
+
+### New / reorganized adapters
+
+- **ntfy** — 23rd platform, push notifications, plugin shape, zero core edits. (salvages [#30625](https://github.com/NousResearch/hermes-agent/pull/30625) → [#4043](https://github.com/NousResearch/hermes-agent/pull/4043)) ([#30867](https://github.com/NousResearch/hermes-agent/pull/30867))
+- **Discord** adapter migrated to bundled plugin. (salvage of [#24356](https://github.com/NousResearch/hermes-agent/pull/24356)) ([#30591](https://github.com/NousResearch/hermes-agent/pull/30591))
+- **Mattermost** adapter migrated to bundled plugin. (salvage of [#30916](https://github.com/NousResearch/hermes-agent/pull/30916)) ([#31748](https://github.com/NousResearch/hermes-agent/pull/31748))
+
+### Telegram
+
+- Edit status messages in place instead of appending. (based on [#30141](https://github.com/NousResearch/hermes-agent/pull/30141) by @qike-ms) ([#30864](https://github.com/NousResearch/hermes-agent/pull/30864))
+- Skip-STT audio path + 2GB cap via local Bot API server. ([#28541](https://github.com/NousResearch/hermes-agent/pull/28541))
+- Route image documents (.png/.jpg/.webp/.gif) through vision pipeline. ([#28519](https://github.com/NousResearch/hermes-agent/pull/28519))
+- Route audio file attachments away from STT pipeline. ([#28478](https://github.com/NousResearch/hermes-agent/pull/28478))
+- `disable_topic_auto_rename` gateway flag. ([#28523](https://github.com/NousResearch/hermes-agent/pull/28523))
+- `ignore_root_dm` config to drop messages without thread_id. ([#28536](https://github.com/NousResearch/hermes-agent/pull/28536))
+- Chat-scoped auth without sender user_id. ([#28525](https://github.com/NousResearch/hermes-agent/pull/28525))
+- Fail-closed auth fallback when `TELEGRAM_ALLOWED_USERS` is empty. ([#28494](https://github.com/NousResearch/hermes-agent/pull/28494))
+- Roll over tool progress bubbles + scope audio_file_paths. ([#28482](https://github.com/NousResearch/hermes-agent/pull/28482))
+- Avoid duplicate text after auto-TTS voice replies. ([#28509](https://github.com/NousResearch/hermes-agent/pull/28509))
+- Mark final voice reply notify-worthy so Telegram delivers it audibly. ([#28504](https://github.com/NousResearch/hermes-agent/pull/28504))
+
+### Discord
+
+- Recover Windows voice opus decoding. ([#33182](https://github.com/NousResearch/hermes-agent/pull/33182))
+- `allow_any_attachment` config to accept arbitrary file types. ([#27245](https://github.com/NousResearch/hermes-agent/pull/27245))
+- Transcribe native voice notes. ([#28993](https://github.com/NousResearch/hermes-agent/pull/28993))
+- Define UI view classes after lazy install. ([#28817](https://github.com/NousResearch/hermes-agent/pull/28817))
+
+### Signal / Matrix / Feishu / Slack / WeCom
+
+- Signal: `require_mention` filter for group chats. ([#28574](https://github.com/NousResearch/hermes-agent/pull/28574))
+- Matrix: warn on clock-skew silent message drops. ([#27330](https://github.com/NousResearch/hermes-agent/pull/27330))
+- Matrix E2EE installs full dep set; plugins respect `is_connected`. ([#31688](https://github.com/NousResearch/hermes-agent/pull/31688))
+- Feishu: require webhook auth secret + honor config extras. ([#30746](https://github.com/NousResearch/hermes-agent/pull/30746))
+- Feishu: enforce auth and chat binding for approval buttons. ([#30744](https://github.com/NousResearch/hermes-agent/pull/30744))
+- Slack: socket recovery + Windows restart dedupe. ([#28873](https://github.com/NousResearch/hermes-agent/pull/28873))
+- WeCom: safe-parse untrusted XML. ([#32442](https://github.com/NousResearch/hermes-agent/pull/32442))
+
+### DingTalk / Webhooks / Microsoft Graph
+
+- DingTalk: transcribe native voice notes. ([#28993](https://github.com/NousResearch/hermes-agent/pull/28993))
+- Webhook: enforce `INSECURE_NO_AUTH` safety rail on dynamic route reloads. ([#30863](https://github.com/NousResearch/hermes-agent/pull/30863))
+- Webhook: restrict default toolset capabilities. ([#30745](https://github.com/NousResearch/hermes-agent/pull/30745))
+- Microsoft Graph: harden webhook auth requirements. ([#30169](https://github.com/NousResearch/hermes-agent/pull/30169))
+
+---
+
+## 🖥️ CLI & TUI
+
+### CLI
+
+- `/update` slash command in CLI and TUI. ([#23854](https://github.com/NousResearch/hermes-agent/pull/23854))
+- Update auto-rollback when post-pull syntax check fails. ([#28669](https://github.com/NousResearch/hermes-agent/pull/28669))
+- `--branch` flag for `hermes update`. (@jquesnelle) ([#29591](https://github.com/NousResearch/hermes-agent/pull/29591))
+- `/exit --delete` flag to remove session on quit. (salvage of [#17665](https://github.com/NousResearch/hermes-agent/pull/17665)) ([#27101](https://github.com/NousResearch/hermes-agent/pull/27101))
+- `▶ N` indicator in status bar for running `/background` tasks. ([#27175](https://github.com/NousResearch/hermes-agent/pull/27175))
+- Live background terminal-process count in status bar. ([#32061](https://github.com/NousResearch/hermes-agent/pull/32061))
+- Append session recap to `/status` output. (salvage of [#18587](https://github.com/NousResearch/hermes-agent/pull/18587)) ([#27176](https://github.com/NousResearch/hermes-agent/pull/27176))
+- Configurable paste-collapse thresholds (TUI + CLI). (salvage [#29723](https://github.com/NousResearch/hermes-agent/pull/29723)) ([#32087](https://github.com/NousResearch/hermes-agent/pull/32087))
+- `/resume` accepts position numbers. ([#31709](https://github.com/NousResearch/hermes-agent/pull/31709))
+- Bring tool-call display back — verbose mode, specific failure reasons, todo progress. ([#31293](https://github.com/NousResearch/hermes-agent/pull/31293))
+- Validate runtime token refresh in Qwen auth status. ([#31196](https://github.com/NousResearch/hermes-agent/pull/31196))
+
+### TUI
+
+- **TUI session orchestrator** — multiple live sessions in one TUI window. (salvages [#27642](https://github.com/NousResearch/hermes-agent/pull/27642)) ([#32980](https://github.com/NousResearch/hermes-agent/pull/32980))
+- `mouse_tracking` DEC mode presets. (salvage of [#26681](https://github.com/NousResearch/hermes-agent/pull/26681) by @OutThisLife) ([#30084](https://github.com/NousResearch/hermes-agent/pull/30084))
+- Termux scrollback preservation + touch-friendly defaults. ([#28910](https://github.com/NousResearch/hermes-agent/pull/28910))
+- Full assistant text in scrollback (no history truncation). ([#28829](https://github.com/NousResearch/hermes-agent/pull/28829))
+- Preserve scrollback when branching sessions. ([#30162](https://github.com/NousResearch/hermes-agent/pull/30162))
+- Preserve Python dunder identifiers in markdown. ([#28582](https://github.com/NousResearch/hermes-agent/pull/28582))
+- Active profile shown in TUI prompt. ([#28581](https://github.com/NousResearch/hermes-agent/pull/28581))
+- Improve Charizard completion menu contrast. ([#28346](https://github.com/NousResearch/hermes-agent/pull/28346))
+- Stop slash dropdown chopping last char of `/goal`. ([#31311](https://github.com/NousResearch/hermes-agent/pull/31311))
+- Clipboard copy on linux/wayland. ([#29342](https://github.com/NousResearch/hermes-agent/pull/29342))
+- Anchor `splitReasoning` unclosed-tag regex; stop eating last paragraph. ([#29426](https://github.com/NousResearch/hermes-agent/pull/29426))
+- Surface verbose tool details. ([#30225](https://github.com/NousResearch/hermes-agent/pull/30225))
+- Load Linux skills on Termux + salvage @adybag14-cyber's Termux gates. ([#30166](https://github.com/NousResearch/hermes-agent/pull/30166))
+- Handle images with codex app-server. ([#31220](https://github.com/NousResearch/hermes-agent/pull/31220))
+- Refresh virtual transcript on viewport resize. ([#31077](https://github.com/NousResearch/hermes-agent/pull/31077))
+- Ignore late thinking deltas after completion. ([#31055](https://github.com/NousResearch/hermes-agent/pull/31055))
+- Commit composer input bursts immediately. ([#31053](https://github.com/NousResearch/hermes-agent/pull/31053))
+- Log parent gateway lifecycle exits. ([#31051](https://github.com/NousResearch/hermes-agent/pull/31051))
+- Clear TTS env var on voice off + TTS indicator in status bar. ([#30987](https://github.com/NousResearch/hermes-agent/pull/30987))
+- Pass `--expose-gc` as node argv instead of NODE_OPTIONS. ([#29998](https://github.com/NousResearch/hermes-agent/pull/29998))
+- Align composer cursorLayout with wrap-ansi to kill multiline cursor drift. ([#27489](https://github.com/NousResearch/hermes-agent/pull/27489))
+- Harden Terminal.app rendering and color paths. ([#27251](https://github.com/NousResearch/hermes-agent/pull/27251))
+- Keep `/goal` verdict out of compact status row. ([#27971](https://github.com/NousResearch/hermes-agent/pull/27971))
+- Clamp curses color 8 for 8-color terminals (Docker). ([#30260](https://github.com/NousResearch/hermes-agent/pull/30260))
+
+---
+
+## 🔒 Security & Reliability
+
+### Promptware & memory hardening
+
+- **Promptware defense** — shared threat patterns + memory load-time scan + tool-result delimiters. ([#32269](https://github.com/NousResearch/hermes-agent/pull/32269))
+- Expand memory content scanning patterns to parity with skills guard. ([#9151](https://github.com/NousResearch/hermes-agent/pull/9151))
+- Harden Skills Guard multi-word prompt patterns. (@YLChen-007) ([#26852](https://github.com/NousResearch/hermes-agent/pull/26852))
+- Split cron scanner so skill prose stops false-positiving exfil patterns. ([#32339](https://github.com/NousResearch/hermes-agent/pull/32339))
+
+### File safety
+
+- Protect Hermes control-plane files from prompt injection (`auth.json`, `config.yaml`, `webhook_subscriptions.json`, `mcp-tokens/`). (salvages @PratikRai0101's [#14157](https://github.com/NousResearch/hermes-agent/pull/14157)) ([#30397](https://github.com/NousResearch/hermes-agent/pull/30397))
+- Write-deny `<root>/.env` when running under a profile. ([#29687](https://github.com/NousResearch/hermes-agent/pull/29687))
+- Defense-in-depth read-deny on credential stores. (salvages [#17659](https://github.com/NousResearch/hermes-agent/pull/17659) + [#8055](https://github.com/NousResearch/hermes-agent/pull/8055)) ([#30721](https://github.com/NousResearch/hermes-agent/pull/30721))
+- TTS `output_path` traversal + update ZIP symlink reject. (salvage [#6693](https://github.com/NousResearch/hermes-agent/pull/6693) + [#15881](https://github.com/NousResearch/hermes-agent/pull/15881)) ([#32056](https://github.com/NousResearch/hermes-agent/pull/32056))
+- Reject symlinked audio inputs. ([#10082](https://github.com/NousResearch/hermes-agent/pull/10082))
+
+### Credential safety
+
+- Avoid persisting borrowed credential secrets — runtime env-sourced keys no longer leak into `auth.json`. ([#31416](https://github.com/NousResearch/hermes-agent/pull/31416))
+- Validate Nous Portal `inference_base_url` against host allowlist. (salvages [#27612](https://github.com/NousResearch/hermes-agent/pull/27612)) ([#30611](https://github.com/NousResearch/hermes-agent/pull/30611))
+- Harden API server key placeholder handling. ([#30738](https://github.com/NousResearch/hermes-agent/pull/30738))
+- Harden Google Chat OAuth credential persistence. (@Zyrixtrex) ([#24788](https://github.com/NousResearch/hermes-agent/pull/24788))
+- xAI OAuth: pin inference `base_url` to x.ai origin. ([#28952](https://github.com/NousResearch/hermes-agent/pull/28952))
+- Quarantine dead OAuth tokens on terminal refresh failure (xAI, Codex, MiniMax). ([#28116](https://github.com/NousResearch/hermes-agent/pull/28116), [#28118](https://github.com/NousResearch/hermes-agent/pull/28118), [#28119](https://github.com/NousResearch/hermes-agent/pull/28119))
+
+### Supply-chain
+
+- **On-demand supply-chain audit via OSV.dev** — `hermes audit`. ([#31460](https://github.com/NousResearch/hermes-agent/pull/31460))
+- `hermes update` syntax-validates critical files post-pull, auto-rollback on failure. ([#28669](https://github.com/NousResearch/hermes-agent/pull/28669))
+- Quarantine `hermes.exe` vs concurrent Windows instance. ([#26677](https://github.com/NousResearch/hermes-agent/pull/26677))
+
+### Other hardening
+
+- Restrict default webhook toolset capabilities. ([#30745](https://github.com/NousResearch/hermes-agent/pull/30745))
+- Harden Microsoft Graph webhook auth requirements. ([#30169](https://github.com/NousResearch/hermes-agent/pull/30169))
+- Require source CIDR allowlisting for public msgraph webhook binds. ([#33722](https://github.com/NousResearch/hermes-agent/pull/33722))
+- Require `API_SERVER_KEY` before dispatching API server work. ([#33232](https://github.com/NousResearch/hermes-agent/pull/33232))
+- env_passthrough: apply GHSA-rhgp-j443-p4rf filter to config.yaml path. (@roadhero) ([#27794](https://github.com/NousResearch/hermes-agent/pull/27794))
+- Dashboard + WeCom: restrict markdown link schemes; safe-parse untrusted XML. ([#32442](https://github.com/NousResearch/hermes-agent/pull/32442))
+- Salvage project-plugin RCE bypass fix from PR [#29311](https://github.com/NousResearch/hermes-agent/pull/29311) (GHSA-5qr3-c538-wm9j). ([#30837](https://github.com/NousResearch/hermes-agent/pull/30837))
+- Cross-profile soft guard on file-write tools + system-prompt hint. ([#31290](https://github.com/NousResearch/hermes-agent/pull/31290))
+- Reject unsafe tar members in Android psutil compatibility installer. ([#33742](https://github.com/NousResearch/hermes-agent/pull/33742))
+- Reject non-regular tar members during tirith auto-install. ([#33786](https://github.com/NousResearch/hermes-agent/pull/33786))
+
+---
+
+## 🪟 Native Windows (Beta Continued)
+
+- Complete Windows bootstrap — `dep_ensure` + `install.ps1` + detection. (@alt-glitch) ([#27845](https://github.com/NousResearch/hermes-agent/pull/27845))
+- `install.ps1`: strip BOM, `-Commit`/`-Tag` pin params, harden git ops. (@jquesnelle) ([#28169](https://github.com/NousResearch/hermes-agent/pull/28169))
+- Consolidate ACP browser bootstrap into `install.{sh,ps1}`. (@alt-glitch) ([#27851](https://github.com/NousResearch/hermes-agent/pull/27851))
+- `hermes update` quarantines live `hermes.exe`. ([#26677](https://github.com/NousResearch/hermes-agent/pull/26677))
+- Discord voice opus decoding on Windows. ([#33182](https://github.com/NousResearch/hermes-agent/pull/33182))
+- Windows Docker Desktop compatible compose file. (@Sunil123135) ([#31031](https://github.com/NousResearch/hermes-agent/pull/31031))
+
+---
+
+## 🖥️ Web Dashboard
+
+- Hardened Slack socket recovery + Windows restart dedupe. ([#28873](https://github.com/NousResearch/hermes-agent/pull/28873))
+- Web dashboard: migrate checkboxes to `@nous-research/ui` + design-system polish. (@austinpickett) ([#28814](https://github.com/NousResearch/hermes-agent/pull/28814))
+- Web dashboard: collapsible sidebar. (@austinpickett) ([#33421](https://github.com/NousResearch/hermes-agent/pull/33421))
+- Dashboard typography & contrast pass. (salvage of [#28832](https://github.com/NousResearch/hermes-agent/pull/28832)) ([#30714](https://github.com/NousResearch/hermes-agent/pull/30714))
+- Skills page: lazy-fetch catalog instead of bundling 34MB into JS. ([#33809](https://github.com/NousResearch/hermes-agent/pull/33809))
+
+---
+
+## 🐳 Docker
+
+- **s6-overlay container supervision** — abstract `ServiceManager` protocol (systemd/launchd/Windows/s6 backends), per-profile gateway supervision in-container, container-restart reconciliation, hadolint/shellcheck CI. (salvage of [#30136](https://github.com/NousResearch/hermes-agent/pull/30136), @benbarclay) ([#31760](https://github.com/NousResearch/hermes-agent/pull/31760))
+- Auto-redirect `gateway run` to supervised mode inside the s6 image. (@benbarclay) ([#33583](https://github.com/NousResearch/hermes-agent/pull/33583))
+- Tee supervised gateway stdout to docker logs. (@benbarclay) ([#33621](https://github.com/NousResearch/hermes-agent/pull/33621))
+- Drop `docker exec` to hermes uid before invoking the CLI. (@benbarclay) ([#33628](https://github.com/NousResearch/hermes-agent/pull/33628))
+- Align HOME for dashboard and s6 gateway services. (@Dusk1e) ([#33481](https://github.com/NousResearch/hermes-agent/pull/33481))
+- Bake build-time git SHA into image so `hermes dump` reports it. (@benbarclay) ([#33655](https://github.com/NousResearch/hermes-agent/pull/33655))
+- `hermes update` prints `docker pull` guidance instead of bogus git error. (@benbarclay) ([#33659](https://github.com/NousResearch/hermes-agent/pull/33659))
+- Upgrade Node to 22 LTS via multi-stage from `node:22-bookworm-slim`. (@benbarclay) ([#33060](https://github.com/NousResearch/hermes-agent/pull/33060))
+- Drop `build-essential` from apt install. (@benbarclay) ([#33028](https://github.com/NousResearch/hermes-agent/pull/33028))
+- Propagate env through s6 to cont-init and main CMD. ([#32412](https://github.com/NousResearch/hermes-agent/pull/32412))
+- Targeted chown to preserve host file ownership in `HERMES_HOME`. ([#33033](https://github.com/NousResearch/hermes-agent/pull/33033))
+- `mkdir HERMES_HOME` as root in stage2 before chown / privilege drop. ([#33078](https://github.com/NousResearch/hermes-agent/pull/33078))
+- chown `ui-tui` and `node_modules` on UID remap so TUI esbuild works. ([#33045](https://github.com/NousResearch/hermes-agent/pull/33045))
+- Include `anthropic`, `bedrock`, `azure-identity` extras in image. ([#30504](https://github.com/NousResearch/hermes-agent/pull/30504))
+- Stop pushing per-commit SHA tags to Docker Hub. ([#29387](https://github.com/NousResearch/hermes-agent/pull/29387))
+- Simplify Docker tagging — push both `:main` and `:latest` on main push. ([#33225](https://github.com/NousResearch/hermes-agent/pull/33225))
+- Test slicing across GH actions jobs. (@ethernet8023) ([#30575](https://github.com/NousResearch/hermes-agent/pull/30575))
+- Discover agent-browser Chromium binary at boot. ([#33184](https://github.com/NousResearch/hermes-agent/pull/33184))
+
+---
+
+## 🌐 API Server
+
+- **Session control API** — `/api/sessions/*` (list/create/read/patch/delete/fork) + SSE-streaming chat. (salvages [#29302](https://github.com/NousResearch/hermes-agent/pull/29302) by @Codename-11 + multimodal followup by @Schwartz10) ([#33134](https://github.com/NousResearch/hermes-agent/pull/33134))
+- `GET /v1/skills` and `/v1/toolsets`. ([#33016](https://github.com/NousResearch/hermes-agent/pull/33016))
+- Coerce stringified booleans in stream/store/approval payloads. (salvage [#26639](https://github.com/NousResearch/hermes-agent/pull/26639)) ([#27293](https://github.com/NousResearch/hermes-agent/pull/27293))
+- Honor `key_env` in auth-failure fallback resolution. ([#30840](https://github.com/NousResearch/hermes-agent/pull/30840))
+
+---
+
+## 🎟️ ACP (VS Code / Zed / JetBrains)
+
+- Session edit auto-approval modes. (salvage of [#27034](https://github.com/NousResearch/hermes-agent/pull/27034)) ([#27862](https://github.com/NousResearch/hermes-agent/pull/27862))
+- Enrich Zed permission cards — command in title + `reject_always`. ([#28148](https://github.com/NousResearch/hermes-agent/pull/28148))
+- Replay session history before responding to `session/load`. ([#26957](https://github.com/NousResearch/hermes-agent/pull/26957), [#26943](https://github.com/NousResearch/hermes-agent/pull/26943))
+- Plugin-transformed final_response delivered through streaming gate. ([#31433](https://github.com/NousResearch/hermes-agent/pull/31433))
+
+---
+
+## 🔌 Plugin Surface
+
+- `register_tts_provider()` plugin hook. (salvage of [#30420](https://github.com/NousResearch/hermes-agent/pull/30420)) ([#31745](https://github.com/NousResearch/hermes-agent/pull/31745))
+- `register_transcription_provider()` hook + `stt.providers` command-provider registry. (salvage of [#30493](https://github.com/NousResearch/hermes-agent/pull/30493)) ([#31907](https://github.com/NousResearch/hermes-agent/pull/31907))
+- `register_auxiliary_task()` in PluginContext API. (salvage [#29817](https://github.com/NousResearch/hermes-agent/pull/29817)) ([#31177](https://github.com/NousResearch/hermes-agent/pull/31177))
+- Bundled `security-guidance` plugin. ([#33131](https://github.com/NousResearch/hermes-agent/pull/33131))
+- Discord and Mattermost migrated to bundled plugins. ([#30591](https://github.com/NousResearch/hermes-agent/pull/30591), [#31748](https://github.com/NousResearch/hermes-agent/pull/31748))
+- ntfy as platform plugin. ([#30867](https://github.com/NousResearch/hermes-agent/pull/30867))
+- Surface category-namespaced plugins in `hermes plugins list`. ([#27187](https://github.com/NousResearch/hermes-agent/pull/27187))
+- Plugin discovery failures raised to WARNING level. ([#28318](https://github.com/NousResearch/hermes-agent/pull/28318))
+- `hermes_plugins` included in gateway.log component filter. ([#28313](https://github.com/NousResearch/hermes-agent/pull/28313))
+- Seed plugin extras before `is_connected` gate. ([#31703](https://github.com/NousResearch/hermes-agent/pull/31703))
+- Dashboard: allowlist plugin assets + denylist subprocess-influencing env vars. ([#32277](https://github.com/NousResearch/hermes-agent/pull/32277))
+
+---
+
+## 📦 Distribution & Install
+
+- Install-method stamping + Docker detection. (@alt-glitch) ([#27843](https://github.com/NousResearch/hermes-agent/pull/27843))
+- Nix `#messaging` and `#full` package variants. (@alt-glitch) ([#33108](https://github.com/NousResearch/hermes-agent/pull/33108))
+- Pre-load messaging gateway deps via `--extra messaging`. (salvage [#26394](https://github.com/NousResearch/hermes-agent/pull/26394)) ([#27558](https://github.com/NousResearch/hermes-agent/pull/27558))
+- Avoid piping installer directly into `iex` (Windows). ([#28347](https://github.com/NousResearch/hermes-agent/pull/28347))
+- Ship bundled skills in wheel. ([#28421](https://github.com/NousResearch/hermes-agent/pull/28421))
+- Ship dashboard plugin assets in wheel. ([#28406](https://github.com/NousResearch/hermes-agent/pull/28406))
+- Make Camofox lazy-installed instead of eager. ([#27055](https://github.com/NousResearch/hermes-agent/pull/27055))
+- Wire STT lazy-install into transcription_tools.py. ([#30256](https://github.com/NousResearch/hermes-agent/pull/30256))
+
+---
+
+## 🐛 Notable Bug Fixes (highlights only)
+
+- Match bare custom provider by active base URL in `hermes model`. ([#28908](https://github.com/NousResearch/hermes-agent/pull/28908))
+- Route `auxiliary.vision.provider=openai` to api.openai.com, skip text-only main. ([#31452](https://github.com/NousResearch/hermes-agent/pull/31452))
+- Lint: skip per-file shell linter when LSP will handle the file. ([#29054](https://github.com/NousResearch/hermes-agent/pull/29054))
+- Treat empty credential pool entries as unauthenticated in `/model` picker. ([#28312](https://github.com/NousResearch/hermes-agent/pull/28312))
+- Reverted within window: Firecrawl integration tag, send_message @username auto-mentions, Telegram quick-command-only menus, Telegram pin-on-turn.
+
+---
+
+## 🧪 Testing
+
+- Disarm lazy-install probe so `_HAS_FASTER_WHISPER` patches work. ([#30334](https://github.com/NousResearch/hermes-agent/pull/30334))
+- Cover default board dashboard pin. ([#28361](https://github.com/NousResearch/hermes-agent/pull/28361))
+- Cover `_task_dict` `task_age` fallback. ([#28365](https://github.com/NousResearch/hermes-agent/pull/28365))
+- Allowlist `tmp_path` for `kanban_notify` artifact delivery tests. ([#30851](https://github.com/NousResearch/hermes-agent/pull/30851), [#30852](https://github.com/NousResearch/hermes-agent/pull/30852))
+- Cover null output stream terminal events in Codex. ([#33137](https://github.com/NousResearch/hermes-agent/pull/33137))
+
+---
+
+## 📚 Documentation
+
+- **30-day docs overhaul** — full correctness audit, every PR in the window covered, Nous Portal weave, sidebar reorg. ([#33782](https://github.com/NousResearch/hermes-agent/pull/33782))
+- Dedicated Nous Portal integration page and setup guide. ([#31296](https://github.com/NousResearch/hermes-agent/pull/31296))
+- Providers: move Nous Portal first, Google Gemini OAuth last. ([#31287](https://github.com/NousResearch/hermes-agent/pull/31287))
+- `session_search` rewrite for single-shape tool. ([#27840](https://github.com/NousResearch/hermes-agent/pull/27840))
+- Kanban: document failure_limit, max_retries, inline create shortcuts, goals & kanban settings. ([#28357](https://github.com/NousResearch/hermes-agent/pull/28357), [#28358](https://github.com/NousResearch/hermes-agent/pull/28358), [#28359](https://github.com/NousResearch/hermes-agent/pull/28359), [#28360](https://github.com/NousResearch/hermes-agent/pull/28360), [#28362](https://github.com/NousResearch/hermes-agent/pull/28362))
+- Kanban Codex lane skill. ([#28430](https://github.com/NousResearch/hermes-agent/pull/28430))
+- xAI OAuth: note X Premium+ also unlocks Grok OAuth. ([#29055](https://github.com/NousResearch/hermes-agent/pull/29055))
+- Docs site: Docker audio bridge notes, "Installing more tools in the container", xurl auth HOME in Docker.
+- Email: clarify gateway vs Himalaya setup. (@helix4u) ([#33634](https://github.com/NousResearch/hermes-agent/pull/33634))
+- Auth docs: replace stale `hermes login` references with `hermes auth add`. ([#32859](https://github.com/NousResearch/hermes-agent/pull/32859))
+
+---
+
+## 👥 Contributors
+
+### Core
+- @teknium1 (lead)
+
+### Notable salvages & cherry-picks
+
+- **@benbarclay** — s6-overlay container supervision (29 commits salvaged), Node 22 LTS upgrade, build-essential cleanup, `gateway run` auto-redirect in s6, tee supervised stdout to docker logs, `hermes update` Docker guidance, build-time SHA stamping
+- **@OutThisLife** — `mouse_tracking` DEC mode presets
+- **@jquesnelle** — Windows installer hardening, `--branch` flag for `hermes update`, install.ps1 BOM strip / commit-pin
+- **@alt-glitch** — Windows `dep_ensure` bootstrap, Nix package variants (`.#messaging`, `.#full`), install-method stamping, ACP browser bootstrap consolidation
+- **@austinpickett** — `/update` slash command, dashboard checkboxes → `@nous-research/ui`, mobile dashboard polish, collapsible sidebar
+- **@ethernet8023** — CI test slicing across GH Actions jobs, TUI clipboard copy fix
+- **@kshitijk4poor** — doctor section banner + fail-and-issue helpers extraction, post-tag salvage cluster (curator-fallout, kanban SQLite hardening, install world-readable uv dirs, xAI bare-code paste)
+- **@rewbs** — Nous JWT inference switch + refresh-token replay fix
+- **@Codename-11** + **@Schwartz10** — session control API (REST + SSE + multimodal followup)
+- **@Niraven** — kanban swarm topology helper
+- **@Interstellar-code** — kanban worker visibility endpoints
+- **@adybag14-cyber** — termux cold-start optimizations (multiple PRs)
+- **@qike-ms** — Telegram in-place status edits design
+- **@sprmn24** — ntfy adapter
+- **@Jaaneek** — xAI Web Search provider plugin
+- **@yannsunn** — xAI upstream adapter for `hermes proxy`
+- **@Cybourgeoisie** — OpenRouter sticky routing via session_id
+- **@memosr** — Nous Portal base_url allowlist validation
+- **@Sunil123135** — Windows Docker Desktop compose file
+- **@Dusk1e** — Docker HOME alignment for dashboard + s6 gateway services
+- **@beardthelion** — opencode-go anthropic_messages routing
+- **@YLChen-007** — Skills Guard multi-word prompt patterns
+- **@roadhero** — env_passthrough GHSA-rhgp-j443-p4rf filter
+- **@Zyrixtrex** — Google Chat OAuth credential persistence hardening
+- **@briandevans**, **@tomqiaozc** — defense-in-depth read-deny on credential stores
+- **@PratikRai0101** — control-plane file write protection
+- **@helix4u**, **@Bartok9**, **@zccyman** — auxiliary fallback ladder components
+- **@ms-alan**, **@ticketclosed-wontfix**, **@donovan-yohan** — TUI session orchestrator + follow-ups
+- **@daimon-nous[bot]** — cron per-job profile support
+- **@bisko** — re-pad `reasoning_content` on cross-provider fallback
+
+### All Contributors
+
+@02356abc, @0xchainer, @0xDevNinja, @0xjackyang, @0xsir0000, @0z1-ghb, @8bit64k, @aaronlab, @AceWattGit,
+@ACR27, @adam91holt, @AdamPlatin123, @Ade5954, @AdityaRajeshGadgil, @adybag14-cyber, @AhmetArif0, @ai-hana-ai,
+@alaamohanad169-ship-it, @alber70g, @albert748, @alt-glitch, @aqilaziz, @argabor, @asdlem, @austinpickett,
+@avifenesh, @awizemann, @B0Tch1, @Bartok9, @BaxBit, @Beandon13, @beardthelion, @benbarclay, @bensargotest-sys,
+@binhnt92, @bird, @bisko, @BlackishGreen33, @booker1207, @bradhallett, @briandevans, @Brixyy, @brndnsvr,
+@BROCCOLO1D, @btorresgil, @burjorjee, @carltonawong, @Carry00, @chaconne67, @chdlc, @chromalinx, @ChyuWei,
+@CipherFrame, @cmullins70, @CNSeniorious000, @codeblackhole1024, @Codename-11, @colin-chang, @counterposition,
+@cresslank, @CryptoByz, @cyb0rgk1tty, @Cybourgeoisie, @daizhonggeng, @darvsum, @davidcampbelldc, @deas,
+@dgians, @dillweed, @DoGMaTiiC, @donovan-yohan, @draplater, @Drexuxux, @dskwe, @dsr-restyn, @Dusk1e,
+@dusterbloom, @duyua9, @egilewski, @el-analista, @eliteworkstation94-ai, @eloklam, @EloquentBrush0x, @emonty,
+@emozilla, @erhnysr, @erikengervall, @Erosika, @ether-btc, @ethernet8023, @EvilHumphrey, @fabiosiqueira,
+@falasi, @falconexe, @fardoche6, @felix-windsor, @Fewmanism, @ffr31mr, @flamiinngo, @flanny7, @flooryyyy,
+@fonhal, @francip, @fujinice, @gianfrancopiana, @glennc, @Glucksberg, @godlin-gh, @Grogger, @guillaumemeyer,
+@Gutslabs, @H-Ali13381, @hanzckernel, @haran2001, @hawknewton, @hayka-pacha, @hehehe0803, @helix4u, @HenkDz,
+@Hermes, @hermesagent26, @Hinotoi-agent, @hongchen1993, @honor2030, @houenyang-momo, @ht1072, @hueilau,
+@iamfoz, @ilonagaja509-glitch, @InB4DevOps, @indigokarasu, @Interstellar-code, @iqdoctor, @iRonin, @Jaaneek,
+@JabberELF, @jacevys, @jackey8616, @jackjin1997, @jdelmerico, @jfuenmayor, @Jiahui-Gu, @JimLiu, @joe102084,
+@JohnC1009, @jonpol01, @Jpalmer95, @Julientalbot, @justemu, @justincc, @jvinals, @karthikeyann, @kasunvinod,
+@kchuang1015, @kenyonxu, @khungate, @kiranvk-2011, @kjames2001, @konsisumer, @kpadilha, @kriscolab,
+@krislidimo, @kronexoi, @kshitijk4poor, @kunci115, @Kylejeong2, @kylekahraman, @LaPhilosophie, @leeseoki0,
+@lemassykoi, @Lempkey, @LeonJS, @LeonSGP43, @lidge-jun, @LifeJiggy, @liuhao1024, @LizerAIDev, @loicnico96,
+@loongfay, @m0n3r0, @malaiwah, @matthewlai, @mavrickdeveloper, @maxmilian, @McClean-Edison, @memosr,
+@Mind-Dragon, @momowind, @MoonJuhan, @MoonRay305, @moortekweb-art, @MorAlekss, @ms-alan, @Nami4D,
+@nehaaprasaad, @nekwo, @nftpoetrist, @NickLarcombe, @nidhi-singh02, @Niraven, @nnnet, @noctilust, @novax635,
+@nthrow, @nv-kasikritc, @nycomar, @OCWC22, @oemtalks, @OmX, @ooovenenoso, @orcool, @oseftg, @outsourc-e,
+@OutThisLife, @Paperclip, @PaTTeeL, @pepelax, @phoenixshen, @Pluviobyte, @pnascimento9596, @pochi-gio, @pr7426,
+@PratikRai0101, @Prithvi1994, @psionic73, @ptichalouf, @Que0x, @QuenVix, @quocanh261997, @qWaitCrypto, @Qwinty,
+@r266-tech, @rak135, @rdasilva1016-ui, @rewbs, @roadhero, @rodrigoeqnit, @RonHillDev, @roycepersonalassistant,
+@rudi193-cmd, @RyanRana, @sadiksaifi, @samahn0601, @samggggflynn, @SamuelZ12, @sanghyuk-seo-nexcube,
+@Saurav0989, @savanne-kham, @Schrotti77, @Schwartz10, @SerenityTn, @sgtworkman, @sharziki, @shaun0927,
+@shellybotmoyer, @shunsuke-hikiyama, @SimbaKingjoe, @SimoKiihamaki, @sir-ad, @Slimydog21, @slowtokki0409,
+@Soju06, @someaka, @soynchux, @sprmn24, @Stark-X, @steezkelly, @stepanov1975, @stephenschoettler,
+@stevehq26-bot, @steveonjava, @Strontvod, @subtract0, @Sunil123135, @superearn-fisher, @Sylw3ster, @tchanee,
+@that-ambuj, @thedavidmurray, @TheOnlyMika, @therahul-yo, @thewillhuang, @ticketclosed-wontfix, @Timur00Kh,
+@tomqiaozc, @Tosko4, @Tranquil-Flow, @tw2818, @uzunkuyruk, @vaddisrinivas, @vanthinh6886, @vgocoder,
+@victorGPT, @vynxevainglory-ai, @waefrebeorn, @walli, @wangpuv, @wanwan2qq, @wesleysimplicio, @worlldz,
+@wpengpeng168, @WuKongAI-CMU, @wuli666, @Wysie, @wysie, @xxxigm, @yannsunn, @YanzhongSu, @YarrowQiao, @ygd58,
+@YLChen-007, @yoniebans, @yu-xin-c, @YuanHanzhong, @zapabob, @zccyman, @ziliangpeng, @zwolniony, @Zyrixtrex
+
+---
+
+**Full Changelog**: [v2026.5.16...v2026.5.28](https://github.com/NousResearch/hermes-agent/compare/v2026.5.16...v2026.5.28)
@@ -0,0 +1,183 @@
+"""Custom PEP 517 build backend for hermes-agent.
+
+At wheel build time, rewrites [project.optional-dependencies] so that
+plugin extras (e.g. ``anthropic = ["hermes-agent-anthropic"]``) are
+inlined with the actual deps from each plugin's pyproject.toml.
+
+In the source repo (and on Nix), uv resolves workspace members natively
+so this backend is NOT used — it's only invoked when building a wheel
+for PyPI publication.
+
+Usage in pyproject.toml::
+
+    [build-system]
+    requires = ["setuptools>=61.0"]
+    build-backend = "_build_backend"
+    backend-path = ["."]
+
+How it works:
+1.  ``build_wheel`` intercepts the call before setuptools sees pyproject.toml.
+2.  It reads the workspace member dirs from [tool.uv.workspace].members.
+3.  For each member, it reads the member's pyproject.toml and extracts
+    ``project.dependencies`` (excluding the ``hermes-agent`` base dep).
+4.  It rewrites the main pyproject.toml's optional-dependencies to inline
+    those deps instead of the workspace member references.
+5.  It writes a temporary pyproject.toml, delegates to
+    ``setuptools.build_meta.build_wheel``, then restores the original.
+"""
+
+from __future__ import annotations
+
+import os
+import shutil
+import tempfile
+from pathlib import Path
+from typing import Any
+
+import tomllib
+
+# The original setuptools backend we delegate to.
+_BACKEND = "setuptools.build_meta"
+
+
+def _load_pyproject(path: Path) -> dict:
+    with path.open("rb") as f:
+        return tomllib.load(f)
+
+
+def _save_pyproject(path: Path, data: dict) -> None:
+    """Write a pyproject.toml. Uses a simple serializer since we only
+    need to preserve the structure enough for setuptools to parse."""
+    import tomli_w
+    with path.open("wb") as f:
+        tomli_w.dump(data, f)
+
+
+def _inline_plugin_deps(root: Path, data: dict) -> dict:
+    """Rewrite optional-dependencies to inline plugin deps.
+
+    Maps each plugin extra (e.g. ``anthropic = ["hermes-agent-anthropic"]``)
+    to the actual deps from that plugin's pyproject.toml, minus the
+    ``hermes-agent`` base dependency.
+    """
+    opt_deps = data.get("project", {}).get("optional-dependencies", {})
+    workspace = data.get("tool", {}).get("uv", {}).get("workspace", {})
+    members = workspace.get("members", [])
+
+    # Build a map: package name → (member_dir, pyproject_data)
+    pkg_to_deps: dict[str, list[str]] = {}
+    for member_glob in members:
+        for member_dir in sorted(root.glob(member_glob)):
+            pptoml = member_dir / "pyproject.toml"
+            if not pptoml.exists():
+                continue
+            member_data = _load_pyproject(pptoml)
+            pkg_name = member_data.get("project", {}).get("name", "")
+            if not pkg_name:
+                continue
+            # Extract deps, excluding the base hermes-agent dependency
+            raw_deps = member_data.get("project", {}).get("dependencies", [])
+            filtered = [
+                d for d in raw_deps
+                if not d.replace(" ", "").startswith("hermes-agent")
+            ]
+            pkg_to_deps[pkg_name] = filtered
+
+    # Rewrite optional-dependencies
+    new_opt_deps = {}
+    for extra_name, specs in opt_deps.items():
+        new_specs = []
+        for spec in specs:
+            # Check if this spec references a workspace member package
+            if spec in pkg_to_deps:
+                # Inline the plugin's deps
+                new_specs.extend(pkg_to_deps[spec])
+            else:
+                new_specs.append(spec)
+        new_opt_deps[extra_name] = new_specs
+
+    data["project"]["optional-dependencies"] = new_opt_deps
+
+    # Remove [tool.uv] section — it's not valid in a published wheel
+    if "uv" in data.get("tool", {}):
+        del data["tool"]["uv"]
+
+    return data
+
+
+# ---------------------------------------------------------------------------
+# PEP 517 hooks
+# ---------------------------------------------------------------------------
+
+def build_wheel(wheel_directory: str, config_settings: dict[str, Any] | None = None, metadata_directory: str | None = None) -> str:
+    """Build a wheel with inlined plugin deps."""
+    root = Path.cwd()
+    pyproject_path = root / "pyproject.toml"
+
+    # Read and rewrite
+    data = _load_pyproject(pyproject_path)
+    data = _inline_plugin_deps(root, data)
+
+    # Write a temporary pyproject.toml, build, then restore
+    backup = pyproject_path.with_suffix(".toml.bak")
+    shutil.copy2(pyproject_path, backup)
+    try:
+        _save_pyproject(pyproject_path, data)
+
+        # Delegate to setuptools
+        import importlib
+        backend = importlib.import_module(_BACKEND)
+        return backend.build_wheel(wheel_directory, config_settings)
+    finally:
+        shutil.copy2(backup, pyproject_path)
+        backup.unlink()
+
+
+def build_sdist(sdist_directory: str, config_settings: dict[str, Any] | None = None) -> str:
+    """Build an sdist — no rewriting needed."""
+    import importlib
+    backend = importlib.import_module(_BACKEND)
+    return backend.build_sdist(sdist_directory, config_settings)
+
+
+def get_requires_for_build_wheel(config_settings: dict[str, Any] | None = None) -> list[str]:
+    return ["setuptools>=61.0", "tomli_w"]
+
+
+def get_requires_for_build_sdist(config_settings: dict[str, Any] | None = None) -> list[str]:
+    return ["setuptools>=61.0"]
+
+
+def prepare_metadata_for_build_wheel(metadata_directory: str, config_settings: dict[str, Any] | None = None) -> str:
+    """Prepare metadata with inlined plugin deps."""
+    root = Path.cwd()
+    pyproject_path = root / "pyproject.toml"
+
+    data = _load_pyproject(pyproject_path)
+    data = _inline_plugin_deps(root, data)
+
+    backup = pyproject_path.with_suffix(".toml.bak")
+    shutil.copy2(pyproject_path, backup)
+    try:
+        _save_pyproject(pyproject_path, data)
+
+        import importlib
+        backend = importlib.import_module(_BACKEND)
+        return backend.prepare_metadata_for_build_wheel(metadata_directory, config_settings)
+    finally:
+        shutil.copy2(backup, pyproject_path)
+        backup.unlink()
+
+
+def build_editable(wheel_directory: str, config_settings: dict[str, Any] | None = None, metadata_directory: str | None = None) -> str:
+    """Build an editable install — no rewriting needed (dev mode)."""
+    import importlib
+    backend = importlib.import_module(_BACKEND)
+    kwargs: dict[str, Any] = {"config_settings": config_settings}
+    if metadata_directory is not None:
+        kwargs["metadata_directory"] = metadata_directory
+    return backend.build_editable(wheel_directory, **kwargs)
+
+
+def get_requires_for_build_editable(config_settings: dict[str, Any] | None = None) -> list[str]:
+    return ["setuptools>=61.0"]
@@ -1,7 +1,7 @@
 {
  "id": "hermes-agent",
  "name": "Hermes Agent",
-  "version": "0.14.0",
+  "version": "0.15.0",
  "description": "Self-improving open-source AI agent by Nous Research with ACP editor integration, persistent memory, skills, and rich tool support.",
  "repository": "https://github.com/NousResearch/hermes-agent",
  "website": "https://hermes-agent.nousresearch.com/docs/user-guide/features/acp",
@@ -9,7 +9,7 @@
  "license": "MIT",
  "distribution": {
    "uvx": {
-      "package": "hermes-agent[acp]==0.14.0",
+      "package": "hermes-agent[acp]==0.15.0",
      "args": ["hermes-acp"]
    }
  }
@@ -4,3 +4,5 @@ These modules contain pure utility functions and self-contained classes
 that were previously embedded in the 3,600-line run_agent.py. Extracting
 them makes run_agent.py focused on the AIAgent orchestrator class.
 """
+
+from . import jiter_preload as _jiter_preload  # noqa: F401
@@ -6,7 +6,9 @@ from typing import Any, Optional

 import httpx

-from agent.anthropic_adapter import _is_oauth_token, resolve_anthropic_token
+from agent.plugin_registries import registries
+_is_oauth_token = registries.get_provider_service("anthropic", "_is_oauth_token")
+resolve_anthropic_token = registries.get_provider_service("anthropic", "resolve_anthropic_token")
 from hermes_cli.auth import _read_codex_tokens, resolve_codex_runtime_credentials
 from hermes_cli.runtime_provider import resolve_runtime_provider

@@ -176,7 +178,7 @@ def _fetch_anthropic_account_usage() -> Optional[AccountUsageSnapshot]:
    token = (resolve_anthropic_token() or "").strip()
    if not token:
        return None
-    if not _is_oauth_token(token):
+    if _is_oauth_token is not None and not _is_oauth_token(token):
        return AccountUsageSnapshot(
            provider="anthropic",
            source="oauth_usage_api",
@@ -404,7 +404,7 @@ def init_agent(
    agent.status_callback = status_callback
    agent.tool_gen_callback = tool_gen_callback

-    
+
    # Tool execution state — allows _vprint during tool execution
    # even when stream consumers are registered (no tokens streaming then)
    agent._executing_tools = False
@@ -437,12 +437,12 @@ def init_agent(
    # their tids explicitly.
    agent._tool_worker_threads: set[int] = set()
    agent._tool_worker_threads_lock = threading.Lock()
-    
+
    # Subagent delegation state
    agent._delegate_depth = 0        # 0 = top-level agent, incremented for children
    agent._active_children = []      # Running child AIAgents (for interrupt propagation)
    agent._active_children_lock = threading.Lock()
-    
+
    # Store OpenRouter provider preferences
    agent.providers_allowed = providers_allowed
    agent.providers_ignored = providers_ignored
@@ -455,7 +455,7 @@ def init_agent(
    # Store toolset filtering options
    agent.enabled_toolsets = enabled_toolsets
    agent.disabled_toolsets = disabled_toolsets
-    
+
    # Model response configuration
    agent.max_tokens = max_tokens  # None = use model default
    agent.reasoning_config = reasoning_config  # None = use default (medium for OpenRouter)
@@ -463,7 +463,7 @@ def init_agent(
    agent.request_overrides = dict(request_overrides or {})
    agent.prefill_messages = prefill_messages or []  # Prefilled conversation turns
    agent._force_ascii_payload = False
-    
+
    # Anthropic prompt caching: auto-enabled for Claude models on native
    # Anthropic, OpenRouter, and third-party gateways that speak the
    # Anthropic protocol (``api_mode == 'anthropic_messages'``). Reduces
@@ -535,7 +535,7 @@ def init_agent(
        # console. Any future noise reduction belongs at the
        # handler level inside hermes_logging.py, not here.
        pass
-    
+
    # Internal stream callback (set during streaming TTS).
    # Initialized here so _vprint can reference it before run_conversation.
    agent._stream_callback = None
@@ -585,12 +585,14 @@ def init_agent(
    _provider_timeout = get_provider_request_timeout(agent.provider, agent.model)

    if agent.api_mode == "anthropic_messages":
-        from agent.anthropic_adapter import build_anthropic_client, resolve_anthropic_token
+        from agent.plugin_registries import registries
+        build_anthropic_client = registries.get_provider_service("anthropic", "build_anthropic_client")
+        resolve_anthropic_token = registries.get_provider_service("anthropic", "resolve_anthropic_token")
        # Bedrock + Claude → use AnthropicBedrock SDK for full feature parity
        # (prompt caching, thinking budgets, adaptive thinking).
        _is_bedrock_anthropic = agent.provider == "bedrock"
        if _is_bedrock_anthropic:
-            from agent.anthropic_adapter import build_anthropic_bedrock_client
+            build_anthropic_bedrock_client = registries.get_provider_service("anthropic", "build_anthropic_bedrock_client")
            _region_match = re.search(r"bedrock-runtime\.([a-z0-9-]+)\.", base_url or "")
            _br_region = _region_match.group(1) if _region_match else "us-east-1"
            agent._bedrock_region = _br_region
@@ -644,8 +646,8 @@ def init_agent(
            # so injects Claude-Code identity headers and system prompts
            # that cause 401/403 on their endpoints.  Guards #1739 and
            # the third-party identity-injection bug.
-            from agent.anthropic_adapter import _is_oauth_token as _is_oat
-            agent._is_anthropic_oauth = _is_oat(effective_key) if (_is_native_anthropic and isinstance(effective_key, str)) else False
+            _is_oauth_token = registries.get_provider_service("anthropic", "_is_oauth_token")
+            agent._is_anthropic_oauth = _is_oauth_token(effective_key) if (_is_oauth_token is not None and _is_native_anthropic and isinstance(effective_key, str)) else False
            agent._anthropic_client = build_anthropic_client(effective_key, base_url, timeout=_provider_timeout)
            # No OpenAI client needed for Anthropic mode
            agent.client = None
@@ -657,9 +659,10 @@ def init_agent(
                # The Anthropic adapter installs an httpx event hook
                # that mints a fresh JWT per request — we never
                # invoke or inspect the callable in the banner.
-                from agent.azure_identity_adapter import is_token_provider
+                from agent.plugin_registries import registries
+                is_token_provider = registries.get_provider_service("azure", "is_token_provider")

-                if is_token_provider(effective_key):
+                if is_token_provider and is_token_provider(effective_key):
                    print("🔑 Using credentials: Microsoft Entra ID")
                elif isinstance(effective_key, str) and len(effective_key) > 12:
                    print(f"🔑 Using token: {effective_key[:8]}...{effective_key[-4:]}")
@@ -869,10 +872,11 @@ def init_agent(
                # provider (Azure Foundry). The OpenAI SDK mints a
                # fresh JWT per request internally — the banner
                # never invokes or inspects the callable.
-                from agent.azure_identity_adapter import is_token_provider
+                from agent.plugin_registries import registries
+                is_token_provider = registries.get_provider_service("azure", "is_token_provider")

                key_used = client_kwargs.get("api_key", "none")
-                if is_token_provider(key_used):
+                if is_token_provider and is_token_provider(key_used):
                    print("🔑 Using credentials: Microsoft Entra ID")
                elif isinstance(key_used, str) and key_used and key_used != "dummy-key" and len(key_used) > 12:
                    print(f"🔑 Using API key: {key_used[:8]}...{key_used[-4:]}")
@@ -880,7 +884,7 @@ def init_agent(
                    print("⚠️  Warning: API key appears invalid or missing")
        except Exception as e:
            raise RuntimeError(f"Failed to initialize OpenAI client: {e}")
-    
+
    # Provider fallback chain — ordered list of backup providers tried
    # when the primary is exhausted (rate-limit, overload, connection
    # failure).  Supports both legacy single-dict ``fallback_model`` and
@@ -912,7 +916,7 @@ def init_agent(
        disabled_toolsets=disabled_toolsets,
        quiet_mode=agent.quiet_mode,
    )
-    
+
    # Show tool configuration and store valid tool names for validation
    agent.valid_tool_names = set()
    if agent.tools:
@@ -945,16 +949,16 @@ def init_agent(
        missing_reqs = [name for name, available in requirements.items() if not available]
        if missing_reqs:
            print(f"⚠️  Some tools may not work due to missing requirements: {missing_reqs}")
-    
+
    # Show trajectory saving status
    if agent.save_trajectories and not agent.quiet_mode:
        print("📝 Trajectory saving enabled")
-    
+
    # Show ephemeral system prompt status
    if agent.ephemeral_system_prompt and not agent.quiet_mode:
        prompt_preview = agent.ephemeral_system_prompt[:60] + "..." if len(agent.ephemeral_system_prompt) > 60 else agent.ephemeral_system_prompt
        print(f"🔒 Ephemeral system prompt: '{prompt_preview}' (not saved to trajectories)")
-    
+
    # Show prompt caching status
    if agent._use_prompt_caching and not agent.quiet_mode:
        if agent._use_native_cache_layout and agent.provider == "anthropic":
@@ -964,7 +968,7 @@ def init_agent(
        else:
            source = "Claude via OpenRouter"
        print(f"💾 Prompt caching: ENABLED ({source}, {agent._cache_ttl} TTL)")
-    
+
    # Session logging setup - auto-save conversation trajectories for debugging
    agent.session_start = datetime.now()
    if session_id:
@@ -1004,7 +1008,7 @@ def init_agent(
        pass
    # logs_dir is retained unconditionally for request_dump_*.json (debug
    # breadcrumb path written by agent_runtime_helpers.dump_api_request_debug).
-    
+
    # Track conversation messages for session logging
    agent._session_messages: List[Dict[str, Any]] = []
    # Responses encrypted reasoning replay state.  Some OpenAI-compatible
@@ -1016,10 +1020,10 @@ def init_agent(
    agent._codex_reasoning_replay_enabled = True
    agent._memory_write_origin = "assistant_tool"
    agent._memory_write_context = "foreground"
-    
+
    # Cached system prompt -- built once per session, only rebuilt on compression
    agent._cached_system_prompt: Optional[str] = None
-    
+
    # Filesystem checkpoint manager (transparent — not a tool)
    from tools.checkpoint_manager import CheckpointManager
    agent._checkpoint_mgr = CheckpointManager(
@@ -1028,7 +1032,7 @@ def init_agent(
        max_total_size_mb=checkpoint_max_total_size_mb,
        max_file_size_mb=checkpoint_max_file_size_mb,
    )
-    
+
    # SQLite session store (optional -- provided by CLI or gateway)
    agent._session_db = session_db
    agent._parent_session_id = parent_session_id
@@ -1039,11 +1043,11 @@ def init_agent(
        "reasoning_config": reasoning_config,
        "max_tokens": max_tokens,
    }
-    
+
    # In-memory todo list for task planning (one per agent/session)
    from tools.todo_tool import TodoStore
    agent._todo_store = TodoStore()
-    
+
    # Load config once for memory, skills, and compression sections
    try:
        from hermes_cli.config import load_config as _load_agent_config
@@ -1085,7 +1089,7 @@ def init_agent(
                agent._memory_store.load_from_disk()
        except Exception:
            pass  # Memory is optional -- don't break agent init
-    
+


    # Memory provider plugin (external — one at a time, alongside built-in)
@@ -1522,6 +1526,7 @@ def init_agent(
                platform=agent.platform or "cli",
                model=agent.model,
                context_length=getattr(agent.context_compressor, "context_length", 0),
+                conversation_id=getattr(agent, "_gateway_session_key", None),
            )
        except Exception as _ce_err:
            _ra().logger.debug("Context engine on_session_start: %s", _ce_err)
@@ -1544,7 +1549,7 @@ def init_agent(
    agent.session_estimated_cost_usd = 0.0
    agent.session_cost_status = "unknown"
    agent.session_cost_source = "none"
-    
+
    # ── Ollama num_ctx injection ──
    # Ollama defaults to 2048 context regardless of the model's capabilities.
    # When running against an Ollama server, detect the model's max context
@@ -766,7 +766,8 @@ def try_recover_primary_transport(
        agent.api_key = rt["api_key"]

        if agent.api_mode == "anthropic_messages":
-            from agent.anthropic_adapter import build_anthropic_client
+            from agent.plugin_registries import registries
+            build_anthropic_client = registries.get_provider_service("anthropic", "build_anthropic_client")
            agent._anthropic_api_key = rt["anthropic_api_key"]
            agent._anthropic_base_url = rt["anthropic_base_url"]
            agent._anthropic_client = build_anthropic_client(
@@ -930,7 +931,8 @@ def restore_primary_runtime(agent) -> bool:

        # ── Rebuild client for the primary provider ──
        if agent.api_mode == "anthropic_messages":
-            from agent.anthropic_adapter import build_anthropic_client
+            from agent.plugin_registries import registries
+            build_anthropic_client = registries.get_provider_service("anthropic", "build_anthropic_client")
            agent._anthropic_api_key = rt["anthropic_api_key"]
            agent._anthropic_base_url = rt["anthropic_base_url"]
            agent._anthropic_client = build_anthropic_client(
@@ -1436,11 +1438,10 @@ def switch_model(agent, new_model, new_provider, api_key='', base_url='', api_mo

        # ── Build new client ──
        if api_mode == "anthropic_messages":
-            from agent.anthropic_adapter import (
-                build_anthropic_client,
-                resolve_anthropic_token,
-                _is_oauth_token,
-            )
+            from agent.plugin_registries import registries
+            build_anthropic_client = registries.get_provider_service("anthropic", "build_anthropic_client")
+            resolve_anthropic_token = registries.get_provider_service("anthropic", "resolve_anthropic_token")
+            _is_oauth_token = registries.get_provider_service("anthropic", "_is_oauth_token")
            # Only fall back to ANTHROPIC_TOKEN when the provider is actually Anthropic.
            # Other anthropic_messages providers (MiniMax, Alibaba, etc.) must use their own
            # API key — falling back would send Anthropic credentials to third-party endpoints.
@@ -1994,6 +1995,36 @@ def copy_reasoning_content_for_api(agent, source_msg: dict, api_msg: dict) -> No
    api_msg.pop("reasoning_content", None)


+def reapply_reasoning_echo_for_provider(agent, api_messages: list) -> int:
+    """Re-pad assistant turns with reasoning_content for the active provider.
+
+    ``api_messages`` is built once, before the retry loop, while the *primary*
+    provider is active.  If a mid-conversation fallback then switches to a
+    require-side provider (DeepSeek / Kimi / MiMo thinking mode), assistant
+    turns that were built when the prior provider did NOT need the echo-back go
+    out without ``reasoning_content`` and the new provider rejects them with
+    HTTP 400 ("The reasoning_content in the thinking mode must be passed back").
+
+    Calling this immediately before building the request kwargs re-applies the
+    pad against the *current* provider.  It is idempotent and a no-op unless
+    ``_needs_thinking_reasoning_pad()`` is True for the active provider, so it
+    is safe to call every iteration and covers every fallback path.
+
+    Returns the number of assistant turns that gained reasoning_content.
+    """
+    if not agent._needs_thinking_reasoning_pad():
+        return 0
+    padded = 0
+    for api_msg in api_messages:
+        if api_msg.get("role") != "assistant":
+            continue
+        if api_msg.get("reasoning_content"):
+            continue
+        copy_reasoning_content_for_api(agent, api_msg, api_msg)
+        if api_msg.get("reasoning_content"):
+            padded += 1
+    return padded
+

 def _iter_pool_sockets(client: Any):
    """Yield raw sockets reachable from an OpenAI/httpx client pool.
@@ -0,0 +1,166 @@
+"""Anthropic auxiliary client wrappers — core module, no SDK dependency.
+
+Provides OpenAI-client-compatible shims over native Anthropic SDK clients,
+so auxiliary tasks (compression, vision, web extract, etc.) can call
+``client.chat.completions.create()`` regardless of the underlying SDK.
+
+The wrapper classes themselves never import the anthropic SDK.  They delegate
+wire-format conversion to :mod:`agent.anthropic_format` and response
+normalization to the ``anthropic_messages`` transport registered in
+:mod:`agent.transports`.
+"""
+
+from __future__ import annotations
+
+import asyncio
+import logging
+from types import SimpleNamespace
+from typing import Any, Optional
+
+from agent.anthropic_format import (
+    build_anthropic_kwargs,
+    _forbids_sampling_params,
+)
+
+logger = logging.getLogger(__name__)
+
+
+# ---------------------------------------------------------------------------
+# Adapter: Anthropic SDK → OpenAI-compatible completions.create()
+# ---------------------------------------------------------------------------
+
+class _AnthropicCompletionsAdapter:
+    """OpenAI-client-compatible adapter for Anthropic Messages API."""
+
+    def __init__(self, real_client: Any, model: str, is_oauth: bool = False):
+        self._client = real_client
+        self._model = model
+        self._is_oauth = is_oauth
+
+    def create(self, **kwargs) -> Any:
+        from agent.transports import get_transport
+
+        messages = kwargs.get("messages", [])
+        model = kwargs.get("model", self._model)
+        tools = kwargs.get("tools")
+        tool_choice = kwargs.get("tool_choice")
+        # ZAI's Anthropic-compatible endpoint rejects max_tokens on vision
+        # models (glm-4v-flash etc.) with error code 1210.  When the caller
+        # signals this by setting _skip_zai_max_tokens in kwargs, omit it.
+        _skip_mt = kwargs.pop("_skip_zai_max_tokens", False)
+        if _skip_mt:
+            max_tokens = None
+        else:
+            max_tokens = kwargs.get("max_tokens") or kwargs.get("max_completion_tokens") or 2000
+        temperature = kwargs.get("temperature")
+
+        normalized_tool_choice = None
+        if isinstance(tool_choice, str):
+            normalized_tool_choice = tool_choice
+        elif isinstance(tool_choice, dict):
+            choice_type = str(tool_choice.get("type", "")).lower()
+            if choice_type == "function":
+                normalized_tool_choice = tool_choice.get("function", {}).get("name")
+            elif choice_type in {"auto", "required", "none"}:
+                normalized_tool_choice = choice_type
+
+        anthropic_kwargs = build_anthropic_kwargs(
+            model=model,
+            messages=messages,
+            tools=tools,
+            max_tokens=max_tokens,
+            reasoning_config=None,
+            tool_choice=normalized_tool_choice,
+            is_oauth=self._is_oauth,
+        )
+        # Opus 4.7+ rejects any non-default temperature/top_p/top_k; only set
+        # temperature for models that still accept it. build_anthropic_kwargs
+        # additionally strips these keys as a safety net — keep both layers.
+        if temperature is not None:
+            if not _forbids_sampling_params(model):
+                anthropic_kwargs["temperature"] = temperature
+
+        response = self._client.messages.create(**anthropic_kwargs)
+        _transport = get_transport("anthropic_messages")
+        _nr = _transport.normalize_response(
+            response, strip_tool_prefix=self._is_oauth
+        )
+
+        assistant_message = SimpleNamespace(
+            content=_nr.content,
+            tool_calls=_nr.tool_calls,
+            reasoning=_nr.reasoning,
+        )
+        finish_reason = _nr.finish_reason
+
+        usage = None
+        if hasattr(response, "usage") and response.usage:
+            prompt_tokens = getattr(response.usage, "input_tokens", 0) or 0
+            completion_tokens = getattr(response.usage, "output_tokens", 0) or 0
+            total_tokens = getattr(response.usage, "total_tokens", 0) or (prompt_tokens + completion_tokens)
+            usage = SimpleNamespace(
+                prompt_tokens=prompt_tokens,
+                completion_tokens=completion_tokens,
+                total_tokens=total_tokens,
+            )
+
+        choice = SimpleNamespace(
+            index=0,
+            message=assistant_message,
+            finish_reason=finish_reason,
+        )
+        return SimpleNamespace(
+            choices=[choice],
+            model=model,
+            usage=usage,
+        )
+
+
+class _AnthropicChatShim:
+    def __init__(self, adapter: _AnthropicCompletionsAdapter):
+        self.completions = adapter
+
+
+# ---------------------------------------------------------------------------
+# Public wrappers
+# ---------------------------------------------------------------------------
+
+class AnthropicAuxiliaryClient:
+    """OpenAI-client-compatible wrapper over a native Anthropic client."""
+
+    def __init__(self, real_client: Any, model: str, api_key: str, base_url: str, is_oauth: bool = False):
+        self._real_client = real_client
+        adapter = _AnthropicCompletionsAdapter(real_client, model, is_oauth=is_oauth)
+        self.chat = _AnthropicChatShim(adapter)
+        self.api_key = api_key
+        self.base_url = base_url
+
+    def close(self):
+        close_fn = getattr(self._real_client, "close", None)
+        if callable(close_fn):
+            close_fn()
+
+
+class _AsyncAnthropicCompletionsAdapter:
+    def __init__(self, sync_adapter: _AnthropicCompletionsAdapter):
+        self._sync = sync_adapter
+
+    async def create(self, **kwargs) -> Any:
+        return await asyncio.to_thread(self._sync.create, **kwargs)
+
+
+class _AsyncAnthropicChatShim:
+    def __init__(self, adapter: _AsyncAnthropicCompletionsAdapter):
+        self.completions = adapter
+
+
+class AsyncAnthropicAuxiliaryClient:
+    def __init__(self, sync_wrapper: AnthropicAuxiliaryClient):
+        sync_adapter = sync_wrapper.chat.completions
+        async_adapter = _AsyncAnthropicCompletionsAdapter(sync_adapter)
+        self.chat = _AsyncAnthropicChatShim(async_adapter)
+        self.api_key = sync_wrapper.api_key
+        self.base_url = sync_wrapper.base_url
+        # Mirror _real_client so cache eviction on a poisoned underlying
+        # client also drops this entry.
+        self._real_client = sync_wrapper._real_client
@@ -106,6 +106,41 @@ from utils import base_url_host_matches, base_url_hostname, normalize_proxy_env_

 logger = logging.getLogger(__name__)

+# ---------------------------------------------------------------------------
+# Core anthropic wire-format modules (no SDK dependency)
+# ---------------------------------------------------------------------------
+
+from agent.anthropic_aux import (  # noqa: F401
+    AnthropicAuxiliaryClient,
+    AsyncAnthropicAuxiliaryClient,
+)
+
+# ---------------------------------------------------------------------------
+# Plugin-registry helper — access *plugin-provided* anthropic services
+# (resolve.py functions: maybe_wrap_anthropic, is_anthropic_compat_endpoint, etc.)
+# Wire-format code (message conversion, aux client wrappers) lives in core
+# and is imported directly above.
+# ---------------------------------------------------------------------------
+
+def _anthropic_plugin_service(name: str):
+    """Lazy accessor for anthropic plugin resolve services.
+
+    Only the SDK-dependent orchestration (maybe_wrap_anthropic,
+    is_anthropic_compat_endpoint, convert_openai_images_to_anthropic) lives
+    in the plugin.  Core accesses it through
+    ``registries.get_provider_service("anthropic", name)`` so that:
+      - Core never imports from a plugin package directly.
+      - The plugin need only be installed when the user actually uses it.
+    """
+    from agent.plugin_registries import registries
+    svc = registries.get_provider_service("anthropic", name)
+    if svc is None:
+        raise ImportError(
+            f"anthropic plugin service {name!r} not available — "
+            f"the hermes_agent_anthropic package may not be installed"
+        )
+    return svc
+

 def _safe_isinstance(obj: Any, maybe_type: Any) -> bool:
    """Return False instead of raising when a patched symbol is not a type."""
@@ -417,7 +452,6 @@ auxiliary_is_nous: bool = False
 _OPENROUTER_MODEL = "google/gemini-3-flash-preview"
 _NOUS_MODEL = "google/gemini-3-flash-preview"
 _NOUS_DEFAULT_BASE_URL = "https://inference-api.nousresearch.com/v1"
-_ANTHROPIC_DEFAULT_BASE_URL = "https://api.anthropic.com"
 _AUTH_JSON_PATH = get_hermes_home() / "auth.json"

 # Codex OAuth endpoint used when a caller explicitly requests
@@ -948,253 +982,6 @@ class AsyncCodexAuxiliaryClient:
        self._real_client = sync_wrapper._real_client


-class _AnthropicCompletionsAdapter:
-    """OpenAI-client-compatible adapter for Anthropic Messages API."""
-
-    def __init__(self, real_client: Any, model: str, is_oauth: bool = False):
-        self._client = real_client
-        self._model = model
-        self._is_oauth = is_oauth
-
-    def create(self, **kwargs) -> Any:
-        from agent.anthropic_adapter import build_anthropic_kwargs
-        from agent.transports import get_transport
-
-        messages = kwargs.get("messages", [])
-        model = kwargs.get("model", self._model)
-        tools = kwargs.get("tools")
-        tool_choice = kwargs.get("tool_choice")
-        # ZAI's Anthropic-compatible endpoint rejects max_tokens on vision
-        # models (glm-4v-flash etc.) with error code 1210.  When the caller
-        # signals this by setting _skip_zai_max_tokens in kwargs, omit it.
-        _skip_mt = kwargs.pop("_skip_zai_max_tokens", False)
-        if _skip_mt:
-            max_tokens = None
-        else:
-            max_tokens = kwargs.get("max_tokens") or kwargs.get("max_completion_tokens") or 2000
-        temperature = kwargs.get("temperature")
-
-        normalized_tool_choice = None
-        if isinstance(tool_choice, str):
-            normalized_tool_choice = tool_choice
-        elif isinstance(tool_choice, dict):
-            choice_type = str(tool_choice.get("type", "")).lower()
-            if choice_type == "function":
-                normalized_tool_choice = tool_choice.get("function", {}).get("name")
-            elif choice_type in {"auto", "required", "none"}:
-                normalized_tool_choice = choice_type
-
-        anthropic_kwargs = build_anthropic_kwargs(
-            model=model,
-            messages=messages,
-            tools=tools,
-            max_tokens=max_tokens,
-            reasoning_config=None,
-            tool_choice=normalized_tool_choice,
-            is_oauth=self._is_oauth,
-        )
-        # Opus 4.7+ rejects any non-default temperature/top_p/top_k; only set
-        # temperature for models that still accept it. build_anthropic_kwargs
-        # additionally strips these keys as a safety net — keep both layers.
-        if temperature is not None:
-            from agent.anthropic_adapter import _forbids_sampling_params
-            if not _forbids_sampling_params(model):
-                anthropic_kwargs["temperature"] = temperature
-
-        response = self._client.messages.create(**anthropic_kwargs)
-        _transport = get_transport("anthropic_messages")
-        _nr = _transport.normalize_response(
-            response, strip_tool_prefix=self._is_oauth
-        )
-
-        # ToolCall already duck-types as OpenAI shape (.type, .function.name,
-        # .function.arguments) via properties, so no wrapping needed.
-        assistant_message = SimpleNamespace(
-            content=_nr.content,
-            tool_calls=_nr.tool_calls,
-            reasoning=_nr.reasoning,
-        )
-        finish_reason = _nr.finish_reason
-
-        usage = None
-        if hasattr(response, "usage") and response.usage:
-            prompt_tokens = getattr(response.usage, "input_tokens", 0) or 0
-            completion_tokens = getattr(response.usage, "output_tokens", 0) or 0
-            total_tokens = getattr(response.usage, "total_tokens", 0) or (prompt_tokens + completion_tokens)
-            usage = SimpleNamespace(
-                prompt_tokens=prompt_tokens,
-                completion_tokens=completion_tokens,
-                total_tokens=total_tokens,
-            )
-
-        choice = SimpleNamespace(
-            index=0,
-            message=assistant_message,
-            finish_reason=finish_reason,
-        )
-        return SimpleNamespace(
-            choices=[choice],
-            model=model,
-            usage=usage,
-        )
-
-
-class _AnthropicChatShim:
-    def __init__(self, adapter: _AnthropicCompletionsAdapter):
-        self.completions = adapter
-
-
-class AnthropicAuxiliaryClient:
-    """OpenAI-client-compatible wrapper over a native Anthropic client."""
-
-    def __init__(self, real_client: Any, model: str, api_key: str, base_url: str, is_oauth: bool = False):
-        self._real_client = real_client
-        adapter = _AnthropicCompletionsAdapter(real_client, model, is_oauth=is_oauth)
-        self.chat = _AnthropicChatShim(adapter)
-        self.api_key = api_key
-        self.base_url = base_url
-
-    def close(self):
-        close_fn = getattr(self._real_client, "close", None)
-        if callable(close_fn):
-            close_fn()
-
-
-class _AsyncAnthropicCompletionsAdapter:
-    def __init__(self, sync_adapter: _AnthropicCompletionsAdapter):
-        self._sync = sync_adapter
-
-    async def create(self, **kwargs) -> Any:
-        import asyncio
-        return await asyncio.to_thread(self._sync.create, **kwargs)
-
-
-class _AsyncAnthropicChatShim:
-    def __init__(self, adapter: _AsyncAnthropicCompletionsAdapter):
-        self.completions = adapter
-
-
-class AsyncAnthropicAuxiliaryClient:
-    def __init__(self, sync_wrapper: "AnthropicAuxiliaryClient"):
-        sync_adapter = sync_wrapper.chat.completions
-        async_adapter = _AsyncAnthropicCompletionsAdapter(sync_adapter)
-        self.chat = _AsyncAnthropicChatShim(async_adapter)
-        self.api_key = sync_wrapper.api_key
-        self.base_url = sync_wrapper.base_url
-        # See AsyncCodexAuxiliaryClient: mirror _real_client so cache
-        # eviction on a poisoned underlying client also drops this entry.
-        self._real_client = sync_wrapper._real_client
-
-
-def _endpoint_speaks_anthropic_messages(base_url: str) -> bool:
-    """True if the endpoint at ``base_url`` speaks the Anthropic Messages
-    protocol instead of OpenAI chat.completions.
-
-    Mirrors ``hermes_cli.runtime_provider._detect_api_mode_for_url`` so the
-    auxiliary client and the main agent stay in sync on transport selection.
-    Covers:
-
-    - Any URL ending in ``/anthropic`` (MiniMax, Zhipu GLM, LiteLLM proxies,
-      Anthropic-compatible gateways).
-    - ``api.kimi.com/coding`` (Kimi Coding Plan — the /coding route only
-      speaks Claude-Code's native Anthropic shape; ``chat.completions``
-      returns 404 on Anthropic-only model aliases like ``kimi-for-coding``).
-    - ``api.anthropic.com`` (native Anthropic).
-    """
-    normalized = (base_url or "").strip().lower().rstrip("/")
-    if not normalized:
-        return False
-    if normalized.endswith("/anthropic"):
-        return True
-    hostname = base_url_hostname(normalized)
-    if hostname == "api.anthropic.com":
-        return True
-    if hostname == "api.kimi.com" and "/coding" in normalized:
-        return True
-    return False
-
-
-def _maybe_wrap_anthropic(
-    client_obj: Any,
-    model: str,
-    api_key: str,
-    base_url: str,
-    api_mode: Optional[str] = None,
-) -> Any:
-    """Rewrap a plain OpenAI client in ``AnthropicAuxiliaryClient`` when
-    the endpoint actually speaks Anthropic Messages.
-
-    This is the single chokepoint for aux-client transport correction.
-    Runs at the end of every ``resolve_provider_client`` branch so that
-    api_key providers (Kimi Coding Plan), the ``custom`` endpoint, and
-    future /anthropic gateways all land on the right wire format
-    regardless of which branch built the client.
-
-    Returns ``client_obj`` unchanged when:
-
-    - It's already an Anthropic/Codex/Gemini/CopilotACP wrapper.
-    - The endpoint is an OpenAI-wire endpoint.
-    - ``api_mode`` is explicitly set to a non-Anthropic transport.
-    - The ``anthropic`` SDK is not installed (falls back to OpenAI wire).
-    """
-    # Already wrapped — don't double-wrap.
-    if _safe_isinstance(client_obj, AnthropicAuxiliaryClient):
-        return client_obj
-    # Other specialized adapters we should never re-dispatch.
-    if _safe_isinstance(client_obj, CodexAuxiliaryClient):
-        return client_obj
-    try:
-        from agent.gemini_native_adapter import GeminiNativeClient
-        if _safe_isinstance(client_obj, GeminiNativeClient):
-            return client_obj
-    except ImportError:
-        pass
-    try:
-        from agent.copilot_acp_client import CopilotACPClient
-        if _safe_isinstance(client_obj, CopilotACPClient):
-            return client_obj
-    except ImportError:
-        pass
-
-    # Explicit non-anthropic api_mode wins over URL heuristics.
-    if api_mode and api_mode != "anthropic_messages":
-        return client_obj
-
-    should_wrap = (
-        api_mode == "anthropic_messages"
-        or _endpoint_speaks_anthropic_messages(base_url)
-    )
-    if not should_wrap:
-        return client_obj
-
-    try:
-        from agent.anthropic_adapter import build_anthropic_client
-    except ImportError:
-        logger.warning(
-            "Endpoint %s speaks Anthropic Messages but the anthropic SDK is "
-            "not installed — falling back to OpenAI-wire (will likely 404).",
-            base_url,
-        )
-        return client_obj
-
-    try:
-        real_client = build_anthropic_client(api_key, base_url)
-    except Exception as exc:
-        logger.warning(
-            "Failed to build Anthropic client for %s (%s) — falling back to "
-            "OpenAI-wire client.", base_url, exc,
-        )
-        return client_obj
-
-    logger.debug(
-        "Auxiliary transport: wrapping client in AnthropicAuxiliaryClient "
-        "(model=%s, base_url=%s, api_mode=%s)",
-        model, base_url[:60] if base_url else "", api_mode or "auto-detected",
-    )
-    return AnthropicAuxiliaryClient(
-        real_client, model, api_key, base_url, is_oauth=False,
-    )
-

 def _read_nous_auth() -> Optional[dict]:
    """Read and validate ~/.hermes/auth.json for an active Nous provider.
@@ -1405,7 +1192,14 @@ def _resolve_api_key_provider() -> Tuple[Optional[OpenAI], Optional[str]]:
                    continue
            except ImportError:
                pass
-            return _try_anthropic()
+            # Delegate to the anthropic plugin resolver via the registry
+            from agent.plugin_registries import registries as _ar
+            _anthro_resolver = _ar.get_provider_resolver("anthropic")
+            if _anthro_resolver is not None:
+                _ac, _am = _anthro_resolver()
+                if _ac is not None:
+                    return _ac, _am
+            continue

        pool_present, entry = _select_pool_entry(provider_id)
        if pool_present:
@@ -1442,7 +1236,7 @@ def _resolve_api_key_provider() -> Tuple[Optional[OpenAI], Optional[str]]:
                except Exception:
                    pass
            _client = OpenAI(api_key=api_key, base_url=base_url, **extra)
-            _client = _maybe_wrap_anthropic(_client, model, api_key, raw_base_url)
+            _client = _anthropic_plugin_service("maybe_wrap_anthropic")(_client, model, api_key, raw_base_url)
            return _client, model

        creds = resolve_api_key_provider_credentials(provider_id)
@@ -1479,7 +1273,7 @@ def _resolve_api_key_provider() -> Tuple[Optional[OpenAI], Optional[str]]:
            except Exception:
                pass
        _client = OpenAI(api_key=api_key, base_url=base_url, **extra)
-        _client = _maybe_wrap_anthropic(_client, model, api_key, raw_base_url)
+        _client = _anthropic_plugin_service("maybe_wrap_anthropic")(_client, model, api_key, raw_base_url)
        return _client, model

    return None, None
@@ -1488,7 +1282,6 @@ def _resolve_api_key_provider() -> Tuple[Optional[OpenAI], Optional[str]]:
 # ── Provider resolution helpers ─────────────────────────────────────────────


-
 def _try_openrouter(explicit_api_key: str = None, model: str = None) -> Tuple[Optional[OpenAI], Optional[str]]:
    pool_present, entry = _select_pool_entry("openrouter")
    if pool_present:
@@ -1810,7 +1603,11 @@ def _try_custom_endpoint() -> Tuple[Optional[Any], Optional[str]]:
        # LiteLLM proxies, etc.).  Must NEVER be treated as OAuth —
        # Anthropic OAuth claims only apply to api.anthropic.com.
        try:
-            from agent.anthropic_adapter import build_anthropic_client
+            from agent.plugin_registries import registries
+            _anthropic = registries.get_provider_namespace("anthropic")
+            build_anthropic_client = _anthropic.get("build_anthropic_client")
+            if build_anthropic_client is None:
+                raise ImportError("anthropic provider not registered")
            real_client = build_anthropic_client(custom_key, custom_base)
        except ImportError:
            logger.warning(
@@ -1825,7 +1622,7 @@ def _try_custom_endpoint() -> Tuple[Optional[Any], Optional[str]]:
    # URL-based anthropic detection for custom endpoints that didn't set
    # api_mode explicitly (e.g. kimi.com/coding reached via custom config).
    _fallback_client = OpenAI(api_key=custom_key, base_url=_clean_base, **_extra)
-    _fallback_client = _maybe_wrap_anthropic(
+    _fallback_client = _anthropic_plugin_service("maybe_wrap_anthropic")(
        _fallback_client, model, custom_key, custom_base, custom_mode,
    )
    return _fallback_client, model
@@ -2003,7 +1800,7 @@ def _try_azure_foundry(
        # for Entra ID it's a callable. ``_maybe_wrap_anthropic`` →
        # ``build_anthropic_client`` detects the callable and installs
        # the bearer-injecting httpx hook.
-        return _maybe_wrap_anthropic(
+        return _anthropic_plugin_service("maybe_wrap_anthropic")(
            client, final_model, api_key,
            base_url, runtime_api_mode,
        ), final_model
@@ -2012,54 +1809,6 @@ def _try_azure_foundry(
    return client, final_model


-def _try_anthropic(explicit_api_key: str = None) -> Tuple[Optional[Any], Optional[str]]:
-    try:
-        from agent.anthropic_adapter import build_anthropic_client, resolve_anthropic_token
-    except ImportError:
-        return None, None
-
-    pool_present, entry = _select_pool_entry("anthropic")
-    if pool_present:
-        if entry is None:
-            return None, None
-        token = explicit_api_key or _pool_runtime_api_key(entry)
-    else:
-        entry = None
-        token = explicit_api_key or resolve_anthropic_token()
-    if not token:
-        return None, None
-
-    # Allow base URL override from config.yaml model.base_url, but only
-    # when the configured provider is anthropic — otherwise a non-Anthropic
-    # base_url (e.g. Codex endpoint) would leak into Anthropic requests.
-    base_url = _pool_runtime_base_url(entry, _ANTHROPIC_DEFAULT_BASE_URL) if pool_present else _ANTHROPIC_DEFAULT_BASE_URL
-    try:
-        from hermes_cli.config import load_config
-        cfg = load_config()
-        model_cfg = cfg.get("model")
-        if isinstance(model_cfg, dict):
-            cfg_provider = str(model_cfg.get("provider") or "").strip().lower()
-            if cfg_provider == "anthropic":
-                cfg_base_url = (model_cfg.get("base_url") or "").strip().rstrip("/")
-                if cfg_base_url:
-                    base_url = cfg_base_url
-    except Exception:
-        pass
-
-    from agent.anthropic_adapter import _is_oauth_token
-    is_oauth = _is_oauth_token(token)
-    model = _get_aux_model_for_provider("anthropic") or "claude-haiku-4-5-20251001"
-    logger.debug("Auxiliary client: Anthropic native (%s) at %s (oauth=%s)", model, base_url, is_oauth)
-    try:
-        real_client = build_anthropic_client(token, base_url)
-    except ImportError:
-        # The anthropic_adapter module imports fine but the SDK itself is
-        # missing — build_anthropic_client raises ImportError at call time
-        # when _anthropic_sdk is None.  Treat as unavailable.
-        return None, None
-    return AnthropicAuxiliaryClient(real_client, model, token, base_url, is_oauth=is_oauth), model
-
-
 _AUTO_PROVIDER_LABELS = {
    "_try_openrouter": "openrouter",
    "_try_nous": "nous",
@@ -2244,11 +1993,15 @@ def _is_payment_error(exc: Exception) -> bool:
    # but sometimes wrap them in 429 or other codes.
    # Daily quota exhaustion from Bedrock, Vertex AI, and similar providers
    # uses different language but is semantically identical to credit exhaustion.
-    if status in {402, 429, None}:
+    if status in {402, 404, 429, None}:
        if any(kw in err_lower for kw in (
            "credits", "insufficient funds",
            "can only afford", "billing",
            "payment required",
+            "out of funds", "run out of funds",
+            "balance_depleted", "no usable credits",
+            "model_not_supported_on_free_tier",
+            "not available on the free tier",
            # Daily / monthly / weekly quota exhaustion keywords
            "quota exceeded", "quota_exceeded",
            "too many tokens per day", "daily limit",
@@ -2260,6 +2013,18 @@ def _is_payment_error(exc: Exception) -> bool:
    return False


+def _nous_portal_account_has_fresh_paid_access() -> bool:
+    """Return True only when the fresh Nous account API says paid access is allowed."""
+    try:
+        from hermes_cli.nous_account import get_nous_portal_account_info
+
+        account_info = get_nous_portal_account_info(force_fresh=True)
+        return account_info.paid_service_access is True
+    except Exception as exc:
+        logger.debug("Auxiliary Nous paid-entitlement refresh check failed: %s", exc)
+        return False
+
+
 def _is_rate_limit_error(exc: Exception) -> bool:
    """Detect rate-limit errors that warrant provider fallback.

@@ -2288,6 +2053,10 @@ def _is_rate_limit_error(exc: Exception) -> bool:
        if not any(kw in err_lower for kw in (
            "credits", "insufficient funds", "billing",
            "payment required", "can only afford",
+            "out of funds", "run out of funds",
+            "balance_depleted", "no usable credits",
+            "model_not_supported_on_free_tier",
+            "not available on the free tier",
        )):
            return True
    return False
@@ -2609,8 +2378,8 @@ def _retry_same_provider_sync(
        extra_body=effective_extra_body,
        base_url=retry_base or resolved_base_url,
    )
-    if _is_anthropic_compat_endpoint(resolved_provider, retry_base):
-        retry_kwargs["messages"] = _convert_openai_images_to_anthropic(retry_kwargs["messages"])
+    if _anthropic_plugin_service("is_anthropic_compat_endpoint")(resolved_provider, retry_base):
+        retry_kwargs["messages"] = _anthropic_plugin_service("convert_openai_images_to_anthropic")(retry_kwargs["messages"])
    return _validate_llm_response(
        retry_client.chat.completions.create(**retry_kwargs), task,
    )
@@ -2666,8 +2435,8 @@ async def _retry_same_provider_async(
        extra_body=effective_extra_body,
        base_url=retry_base or resolved_base_url,
    )
-    if _is_anthropic_compat_endpoint(resolved_provider, retry_base):
-        retry_kwargs["messages"] = _convert_openai_images_to_anthropic(retry_kwargs["messages"])
+    if _anthropic_plugin_service("is_anthropic_compat_endpoint")(resolved_provider, retry_base):
+        retry_kwargs["messages"] = _anthropic_plugin_service("convert_openai_images_to_anthropic")(retry_kwargs["messages"])
    return _validate_llm_response(
        await retry_client.chat.completions.create(**retry_kwargs), task,
    )
@@ -2701,12 +2470,19 @@ def _refresh_provider_credentials(provider: str) -> bool:
            _evict_cached_clients(normalized)
            return True
        if normalized == "anthropic":
-            from agent.anthropic_adapter import read_claude_code_credentials, _refresh_oauth_token, resolve_anthropic_token
+            from agent.plugin_registries import registries
+            _anthropic = registries.get_provider_namespace("anthropic")
+            read_claude_code_credentials = _anthropic.get("read_claude_code_credentials")
+            _refresh_oauth_token = _anthropic.get("_refresh_oauth_token")
+            resolve_anthropic_token = _anthropic.get("resolve_anthropic_token")
+            if read_claude_code_credentials is None:
+                return False

            creds = read_claude_code_credentials()
-            token = _refresh_oauth_token(creds) if isinstance(creds, dict) and creds.get("refreshToken") else None
+            token = _refresh_oauth_token(creds) if isinstance(creds, dict) and creds.get("refreshToken") and _refresh_oauth_token else None
            if not str(token or "").strip():
-                token = resolve_anthropic_token()
+                if resolve_anthropic_token is not None:
+                    token = resolve_anthropic_token()
            if not str(token or "").strip():
                return False
            _evict_cached_clients(normalized)
@@ -3027,7 +2803,7 @@ def _to_async_client(sync_client, model: str, is_vision: bool = False):

    if isinstance(sync_client, CodexAuxiliaryClient):
        return AsyncCodexAuxiliaryClient(sync_client), model
-    if isinstance(sync_client, AnthropicAuxiliaryClient):
+    if _safe_isinstance(sync_client, AnthropicAuxiliaryClient):
        return AsyncAnthropicAuxiliaryClient(sync_client), model
    try:
        from agent.gemini_native_adapter import GeminiNativeClient, AsyncGeminiNativeClient
@@ -3213,7 +2989,7 @@ def resolve_provider_client(
            return CodexAuxiliaryClient(client_obj, final_model_str)
        # Anthropic-wire endpoints: rewrap plain OpenAI clients so
        # chat.completions.create() is translated to /v1/messages.
-        return _maybe_wrap_anthropic(
+        return _anthropic_plugin_service("maybe_wrap_anthropic")(
            client_obj, final_model_str, api_key_str, base_url_str, api_mode,
        )

@@ -3445,7 +3221,11 @@ def resolve_provider_client(
                # branch in _try_custom_endpoint(). See #15033.
                if entry_api_mode == "anthropic_messages":
                    try:
-                        from agent.anthropic_adapter import build_anthropic_client
+                        from agent.plugin_registries import registries
+                        _anthropic = registries.get_provider_namespace("anthropic")
+                        build_anthropic_client = _anthropic.get("build_anthropic_client")
+                        if build_anthropic_client is None:
+                            raise ImportError("anthropic provider not registered")
                        real_client = build_anthropic_client(custom_key, custom_base)
                    except ImportError:
                        logger.warning(
@@ -3488,39 +3268,32 @@ def resolve_provider_client(
    except ImportError:
        pass

-    # ── Azure Foundry (delegates to runtime resolver for auth_mode-aware routing) ─
-    #
-    # The generic PROVIDER_REGISTRY path below uses
-    # ``resolve_api_key_provider_credentials`` which only knows about the
-    # static ``AZURE_FOUNDRY_API_KEY`` env var. That misses two important
-    # cases for the ``azure-foundry`` provider:
-    #
-    #   1. ``model.auth_mode: entra_id`` — no static key exists; we need
-    #      a callable bearer-token provider from ``azure_identity_adapter``.
-    #   2. Non-default ``model.base_url`` (Foundry projects path) — the
-    #      env-var-only resolver doesn't apply config-yaml-driven URL
-    #      overrides.
-    #
-    # Delegate to the same runtime resolver the main agent uses so
-    # auxiliary tasks (title generation, compression, vision, embedding,
-    # session search) inherit the user's full Azure config.
-    if provider == "azure-foundry":
-        client, default_model = _try_azure_foundry(
+    # ── Plugin-registered resolvers (azure-foundry, etc.) ──────────────
+    # Providers with complex auth (Entra ID, OAuth, etc.) register a
+    # resolver callable so core doesn't need per-provider if/elif branches.
+    from agent.plugin_registries import registries as _reg_early
+    _early_resolver = _reg_early.get_provider_resolver(provider)
+    if _early_resolver is not None:
+        client, default_model = _early_resolver(
            model=model,
            explicit_api_key=explicit_api_key,
            explicit_base_url=explicit_base_url,
+            async_mode=async_mode,
+            is_vision=is_vision,
+            main_runtime=main_runtime,
            api_mode=api_mode,
        )
-        if client is None:
-            logger.warning(
-                "resolve_provider_client: azure-foundry requested but "
-                "runtime resolution failed (run: hermes doctor for "
-                "diagnostics)"
-            )
-            return None, None
-        final_model = _normalize_resolved_model(model or default_model, provider)
-        return (_to_async_client(client, final_model, is_vision=is_vision) if async_mode
-                else (client, final_model))
+        if client is not None:
+            final_model = _normalize_resolved_model(model or default_model, provider)
+            return (_to_async_client(client, final_model, is_vision=is_vision) if async_mode
+                    else (client, final_model))
+        # Resolver returned None — provider unavailable
+        logger.warning(
+            "resolve_provider_client: %s requested but resolver returned "
+            "no client (run: hermes doctor for diagnostics)",
+            provider,
+        )
+        return None, None

    # ── API-key providers from PROVIDER_REGISTRY ─────────────────────
    try:
@@ -3539,14 +3312,6 @@ def resolve_provider_client(
        return None, None

    if pconfig.auth_type == "api_key":
-        if provider == "anthropic":
-            client, default_model = _try_anthropic(explicit_api_key=explicit_api_key)
-            if client is None:
-                logger.warning("resolve_provider_client: anthropic requested but no Anthropic credentials found")
-                return None, None
-            final_model = _normalize_resolved_model(model or default_model, provider)
-            return (_to_async_client(client, final_model, is_vision=is_vision) if async_mode else (client, final_model))
-
        creds = resolve_api_key_provider_credentials(provider)
        api_key = str(creds.get("api_key", "")).strip()
        # Honour an explicit api_key override (e.g. from a fallback_model entry
@@ -3679,37 +3444,14 @@ def resolve_provider_client(
        return None, None

    elif pconfig.auth_type == "aws_sdk":
-        # AWS SDK providers (Bedrock) — use the Anthropic Bedrock client via
-        # boto3's credential chain (IAM roles, SSO, env vars, instance metadata).
-        try:
-            from agent.bedrock_adapter import has_aws_credentials, resolve_bedrock_region
-            from agent.anthropic_adapter import build_anthropic_bedrock_client
-        except ImportError:
-            logger.warning("resolve_provider_client: bedrock requested but "
-                           "boto3 or anthropic SDK not installed")
-            return None, None
-
-        if not has_aws_credentials():
-            logger.debug("resolve_provider_client: bedrock requested but "
-                         "no AWS credentials found")
-            return None, None
-
-        region = resolve_bedrock_region()
-        default_model = "anthropic.claude-haiku-4-5-20251001-v1:0"
-        final_model = _normalize_resolved_model(model or default_model, provider)
-        try:
-            real_client = build_anthropic_bedrock_client(region)
-        except ImportError as exc:
-            logger.warning("resolve_provider_client: cannot create Bedrock "
-                           "client: %s", exc)
-            return None, None
-        client = AnthropicAuxiliaryClient(
-            real_client, final_model, api_key="aws-sdk",
-            base_url=f"https://bedrock-runtime.{region}.amazonaws.com",
+        # AWS SDK providers (e.g. Bedrock) — handled by the early resolver
+        # catch above when a plugin registers one.  If we reach here, no
+        # resolver was registered.
+        logger.warning(
+            "resolve_provider_client: aws_sdk provider %s has no "
+            "registered resolver (plugin not loaded?)", provider,
        )
-        logger.debug("resolve_provider_client: bedrock (%s, %s)", final_model, region)
-        return (_to_async_client(client, final_model, is_vision=is_vision) if async_mode
-                else (client, final_model))
+        return None, None

    elif pconfig.auth_type in {"oauth_device_code", "oauth_external"}:
        # OAuth providers — route through their specific try functions
@@ -3833,7 +3575,12 @@ def _resolve_strict_vision_backend(
        # allow-list); callers must specify via auxiliary.<task>.model.
        return resolve_provider_client("openai-codex", model, is_vision=True)
    if provider == "anthropic":
-        return _try_anthropic()
+        from agent.plugin_registries import registries as _reg
+        _resolver = _reg.get_provider_resolver("anthropic")
+        if _resolver is not None:
+            return _resolver(model=model)
+        # Fallback: no resolver registered (plugin not loaded)
+        return None, None
    if provider == "custom":
        return _try_custom_endpoint()
    return None, None
@@ -4563,69 +4310,6 @@ def _get_task_extra_body(task: str) -> Dict[str, Any]:

 # Providers that use Anthropic-compatible endpoints (via OpenAI SDK wrapper).
 # Their image content blocks must use Anthropic format, not OpenAI format.
-_ANTHROPIC_COMPAT_PROVIDERS = frozenset({"minimax", "minimax-oauth", "minimax-cn"})
-
-
-def _is_anthropic_compat_endpoint(provider: str, base_url: str) -> bool:
-    """Detect if an endpoint expects Anthropic-format content blocks.
-
-    Returns True for known Anthropic-compatible providers (MiniMax) and
-    any endpoint whose URL contains ``/anthropic`` in the path.
-    """
-    if provider in _ANTHROPIC_COMPAT_PROVIDERS:
-        return True
-    url_lower = (base_url or "").lower()
-    return "/anthropic" in url_lower
-
-
-def _convert_openai_images_to_anthropic(messages: list) -> list:
-    """Convert OpenAI ``image_url`` content blocks to Anthropic ``image`` blocks.
-
-    Only touches messages that have list-type content with ``image_url`` blocks;
-    plain text messages pass through unchanged.
-    """
-    converted = []
-    for msg in messages:
-        content = msg.get("content")
-        if not isinstance(content, list):
-            converted.append(msg)
-            continue
-        new_content = []
-        changed = False
-        for block in content:
-            if block.get("type") == "image_url":
-                image_url_val = (block.get("image_url") or {}).get("url", "")
-                if image_url_val.startswith("data:"):
-                    # Parse data URI: data:<media_type>;base64,<data>
-                    header, _, b64data = image_url_val.partition(",")
-                    media_type = "image/png"
-                    if ":" in header and ";" in header:
-                        media_type = header.split(":", 1)[1].split(";", 1)[0]
-                    new_content.append({
-                        "type": "image",
-                        "source": {
-                            "type": "base64",
-                            "media_type": media_type,
-                            "data": b64data,
-                        },
-                    })
-                else:
-                    # URL-based image
-                    new_content.append({
-                        "type": "image",
-                        "source": {
-                            "type": "url",
-                            "url": image_url_val,
-                        },
-                    })
-                changed = True
-            else:
-                new_content.append(block)
-        converted.append({**msg, "content": new_content} if changed else msg)
-    return converted
-
-
-
 def _build_call_kwargs(
    provider: str,
    model: str,
@@ -4655,8 +4339,10 @@ def _build_call_kwargs(
    # structured-JSON extraction) don't 400 the moment
    # the aux model is flipped to 4.7.
    if temperature is not None:
-        from agent.anthropic_adapter import _forbids_sampling_params
-        if _forbids_sampling_params(model):
+        from agent.plugin_registries import registries
+        _anthropic = registries.get_provider_namespace("anthropic")
+        _forbids_sampling_params = _anthropic.get("_forbids_sampling_params")
+        if _forbids_sampling_params is not None and _forbids_sampling_params(model):
            temperature = None

    if temperature is not None:
@@ -4868,8 +4554,8 @@ def call_llm(

    # Convert image blocks for Anthropic-compatible endpoints (e.g. MiniMax)
    _client_base = str(getattr(client, "base_url", "") or "")
-    if _is_anthropic_compat_endpoint(resolved_provider, _client_base):
-        kwargs["messages"] = _convert_openai_images_to_anthropic(kwargs["messages"])
+    if _anthropic_plugin_service("is_anthropic_compat_endpoint")(resolved_provider, _client_base):
+        kwargs["messages"] = _anthropic_plugin_service("convert_openai_images_to_anthropic")(kwargs["messages"])

    # Handle unsupported temperature, max_tokens vs max_completion_tokens retry,
    # then payment fallback.
@@ -4937,6 +4623,41 @@ def call_llm(
            resolved_provider == "nous"
            or base_url_host_matches(_base_info, "inference-api.nousresearch.com")
        )
+        if (
+            _is_payment_error(first_err)
+            and client_is_nous
+            and _nous_portal_account_has_fresh_paid_access()
+        ):
+            refreshed_client, refreshed_model = _refresh_nous_auxiliary_client(
+                cache_provider=resolved_provider or "nous",
+                model=final_model,
+                async_mode=False,
+                base_url=resolved_base_url,
+                api_key=resolved_api_key,
+                api_mode=resolved_api_mode,
+                main_runtime=main_runtime,
+                is_vision=(task == "vision"),
+            )
+            if refreshed_client is not None:
+                logger.info(
+                    "Auxiliary %s: refreshed Nous runtime credentials after paid account check, retrying",
+                    task or "call",
+                )
+                if refreshed_model and refreshed_model != kwargs.get("model"):
+                    kwargs["model"] = refreshed_model
+                try:
+                    return _validate_llm_response(
+                        refreshed_client.chat.completions.create(**kwargs), task)
+                except Exception as retry_err:
+                    if not (
+                        _is_auth_error(retry_err)
+                        or _is_payment_error(retry_err)
+                        or _is_connection_error(retry_err)
+                        or _is_rate_limit_error(retry_err)
+                    ):
+                        raise
+                    first_err = retry_err
+
        if _is_auth_error(first_err) and client_is_nous:
            refreshed_client, refreshed_model = _refresh_nous_auxiliary_client(
                cache_provider=resolved_provider or "nous",
@@ -5276,8 +4997,8 @@ async def async_call_llm(
        base_url=_client_base or resolved_base_url)

    # Convert image blocks for Anthropic-compatible endpoints (e.g. MiniMax)
-    if _is_anthropic_compat_endpoint(resolved_provider, _client_base):
-        kwargs["messages"] = _convert_openai_images_to_anthropic(kwargs["messages"])
+    if _anthropic_plugin_service("is_anthropic_compat_endpoint")(resolved_provider, _client_base):
+        kwargs["messages"] = _anthropic_plugin_service("convert_openai_images_to_anthropic")(kwargs["messages"])

    try:
        return _validate_llm_response(
@@ -5339,6 +5060,40 @@ async def async_call_llm(
            resolved_provider == "nous"
            or base_url_host_matches(_client_base, "inference-api.nousresearch.com")
        )
+        if (
+            _is_payment_error(first_err)
+            and client_is_nous
+            and _nous_portal_account_has_fresh_paid_access()
+        ):
+            refreshed_client, refreshed_model = _refresh_nous_auxiliary_client(
+                cache_provider=resolved_provider or "nous",
+                model=final_model,
+                async_mode=True,
+                base_url=resolved_base_url,
+                api_key=resolved_api_key,
+                api_mode=resolved_api_mode,
+                is_vision=(task == "vision"),
+            )
+            if refreshed_client is not None:
+                logger.info(
+                    "Auxiliary %s (async): refreshed Nous runtime credentials after paid account check, retrying",
+                    task or "call",
+                )
+                if refreshed_model and refreshed_model != kwargs.get("model"):
+                    kwargs["model"] = refreshed_model
+                try:
+                    return _validate_llm_response(
+                        await refreshed_client.chat.completions.create(**kwargs), task)
+                except Exception as retry_err:
+                    if not (
+                        _is_auth_error(retry_err)
+                        or _is_payment_error(retry_err)
+                        or _is_connection_error(retry_err)
+                        or _is_rate_limit_error(retry_err)
+                    ):
+                        raise
+                    first_err = retry_err
+
        if _is_auth_error(first_err) and client_is_nous:
            refreshed_client, refreshed_model = _refresh_nous_auxiliary_client(
                cache_provider=resolved_provider or "nous",
@@ -483,6 +483,11 @@ def _run_review_in_thread(
            finally:
                clear_thread_tool_whitelist()

+            # Snapshot review actions before teardown. close() is allowed to
+            # clean per-session state, but the user-visible self-improvement
+            # summary still needs the completed review agent's tool results.
+            review_messages = list(getattr(review_agent, "_session_messages", []))
+
            # Tear down memory providers while stdout is still
            # redirected so background thread teardown (Honcho flush,
            # Hindsight sync, etc.) stays silent.  The finally block
@@ -495,7 +500,6 @@ def _run_review_in_thread(
                review_agent.close()
            except Exception:
                pass
-            review_messages = list(getattr(review_agent, "_session_messages", []))
            review_agent = None

        # Scan the review agent's messages for successful tool actions
@@ -235,12 +235,14 @@ def interruptible_api_call(agent, api_kwargs: dict):
                # normalize_converse_response produces an OpenAI-compatible
                # SimpleNamespace so the rest of the agent loop can treat
                # bedrock responses like chat_completions responses.
-                from agent.bedrock_adapter import (
-                    _get_bedrock_runtime_client,
-                    invalidate_runtime_client,
-                    is_stale_connection_error,
-                    normalize_converse_response,
-                )
+                from agent.plugin_registries import registries
+                _bedrock = registries.get_provider_namespace("bedrock")
+                _get_bedrock_runtime_client = _bedrock.get("_get_bedrock_runtime_client")
+                invalidate_runtime_client = _bedrock.get("invalidate_runtime_client")
+                is_stale_connection_error = _bedrock.get("is_stale_connection_error")
+                normalize_converse_response = _bedrock.get("normalize_converse_response")
+                if _get_bedrock_runtime_client is None or normalize_converse_response is None:
+                    raise ImportError("bedrock provider not registered")
                region = api_kwargs.pop("__bedrock_region__", "us-east-1")
                api_kwargs.pop("__bedrock_converse__", None)
                client = _get_bedrock_runtime_client(region)
@@ -403,13 +405,13 @@ def interruptible_api_call(agent, api_kwargs: dict):
                _elapsed, _ttfb_timeout, api_kwargs.get("model", "unknown"),
            )
            if _silent_hint:
-                agent._emit_status(
+                agent._buffer_status(
                    f"⚠️ No first byte from provider in {int(_elapsed)}s "
                    f"(codex stream, model: {api_kwargs.get('model', 'unknown')}). "
                    f"Reconnecting. {_silent_hint}"
                )
            else:
-                agent._emit_status(
+                agent._buffer_status(
                    f"⚠️ No first byte from provider in {int(_elapsed)}s "
                    f"(codex stream, model: {api_kwargs.get('model', 'unknown')}). "
                    f"Reconnecting."
@@ -455,7 +457,7 @@ def interruptible_api_call(agent, api_kwargs: dict):
                api_kwargs.get("model", "unknown"),
                f"{_est_tokens_for_codex_watchdog:,}",
            )
-            agent._emit_status(
+            agent._buffer_status(
                f"⚠️ Codex stream sent no events for {int(_event_stale_elapsed)}s "
                f"after first byte (model: {api_kwargs.get('model', 'unknown')}). "
                f"Reconnecting."
@@ -493,13 +495,13 @@ def interruptible_api_call(agent, api_kwargs: dict):
                api_kwargs.get("model", "unknown"), f"{_est_ctx:,}",
            )
            if _silent_hint:
-                agent._emit_status(
+                agent._buffer_status(
                    f"⚠️ No response from provider for {int(_elapsed)}s "
                    f"(non-streaming, model: {api_kwargs.get('model', 'unknown')}). "
                    f"{_silent_hint}"
                )
            else:
-                agent._emit_status(
+                agent._buffer_status(
                    f"⚠️ No response from provider for {int(_elapsed)}s "
                    f"(non-streaming, model: {api_kwargs.get('model', 'unknown')}). "
                    f"Aborting call."
@@ -696,8 +698,11 @@ def build_api_kwargs(agent, api_messages: list) -> dict:
    _ant_max = None
    if (_is_or or _is_nous) and "claude" in (agent.model or "").lower():
        try:
-            from agent.anthropic_adapter import _get_anthropic_max_output
-            _ant_max = _get_anthropic_max_output(agent.model)
+            from agent.plugin_registries import registries
+            _anthropic = registries.get_provider_namespace("anthropic")
+            _get_anthropic_max_output = _anthropic.get("_get_anthropic_max_output")
+            if _get_anthropic_max_output is not None:
+                _ant_max = _get_anthropic_max_output(agent.model)
        except Exception:
            pass

@@ -1182,15 +1187,20 @@ def try_activate_fallback(agent, reason: "FailoverReason | None" = None) -> bool

        if fb_api_mode == "anthropic_messages":
            # Build native Anthropic client instead of using OpenAI client
-            from agent.anthropic_adapter import build_anthropic_client, resolve_anthropic_token, _is_oauth_token
-            effective_key = (fb_client.api_key or resolve_anthropic_token() or "") if fb_provider == "anthropic" else (fb_client.api_key or "")
+            from agent.plugin_registries import registries
+            _anthropic = registries.get_provider_namespace("anthropic")
+            build_anthropic_client = _anthropic.get("build_anthropic_client")
+            resolve_anthropic_token = _anthropic.get("resolve_anthropic_token")
+            _is_oauth_token = _anthropic.get("_is_oauth_token")
+            effective_key = (fb_client.api_key or (resolve_anthropic_token() if resolve_anthropic_token else "") or "") if fb_provider == "anthropic" else (fb_client.api_key or "")
            agent.api_key = effective_key
            agent._anthropic_api_key = effective_key
            agent._anthropic_base_url = fb_base_url
-            agent._anthropic_client = build_anthropic_client(
-                effective_key, agent._anthropic_base_url, timeout=_fb_timeout,
-            )
-            agent._is_anthropic_oauth = _is_oauth_token(effective_key) if fb_provider == "anthropic" else False
+            if build_anthropic_client is not None:
+                agent._anthropic_client = build_anthropic_client(
+                    effective_key, agent._anthropic_base_url, timeout=_fb_timeout,
+                )
+            agent._is_anthropic_oauth = _is_oauth_token(effective_key) if fb_provider == "anthropic" and _is_oauth_token else False
            agent.client = None
            agent._client_kwargs = {}
        else:
@@ -1262,7 +1272,7 @@ def try_activate_fallback(agent, reason: "FailoverReason | None" = None) -> bool
                api_mode=agent.api_mode,
            )

-        agent._emit_status(
+        agent._buffer_status(
            f"🔄 Primary model failed — switching to fallback: "
            f"{fb_model} via {fb_provider}"
        )
@@ -1574,12 +1584,14 @@ def interruptible_streaming_api_call(agent, api_kwargs: dict, *, on_first_delta=

        def _bedrock_call():
            try:
-                from agent.bedrock_adapter import (
-                    _get_bedrock_runtime_client,
-                    invalidate_runtime_client,
-                    is_stale_connection_error,
-                    stream_converse_with_callbacks,
-                )
+                from agent.plugin_registries import registries
+                _bedrock = registries.get_provider_namespace("bedrock")
+                _get_bedrock_runtime_client = _bedrock.get("_get_bedrock_runtime_client")
+                invalidate_runtime_client = _bedrock.get("invalidate_runtime_client")
+                is_stale_connection_error = _bedrock.get("is_stale_connection_error")
+                stream_converse_with_callbacks = _bedrock.get("stream_converse_with_callbacks")
+                if _get_bedrock_runtime_client is None or stream_converse_with_callbacks is None:
+                    raise ImportError("bedrock provider not registered")
                region = api_kwargs.pop("__bedrock_region__", "us-east-1")
                api_kwargs.pop("__bedrock_converse__", None)
                client = _get_bedrock_runtime_client(region)
@@ -2251,7 +2263,7 @@ def interruptible_streaming_api_call(agent, api_kwargs: dict, *, on_first_delta=
                            mid_tool_call=False,
                            diag=request_client_holder.get("diag"),
                        )
-                        agent._emit_status(
+                        agent._buffer_status(
                            "❌ Provider returned malformed streaming data after "
                            f"{_max_stream_retries + 1} attempts. "
                            "The provider may be experiencing issues — "
@@ -2358,7 +2370,7 @@ def interruptible_streaming_api_call(agent, api_kwargs: dict, *, on_first_delta=
                _stale_elapsed, _stream_stale_timeout,
                api_kwargs.get("model", "unknown"), f"{_est_ctx:,}",
            )
-            agent._emit_status(
+            agent._buffer_status(
                f"⚠️ No response from provider for {int(_stale_elapsed)}s "
                f"(model: {api_kwargs.get('model', 'unknown')}, "
                f"context: ~{_est_ctx:,} tokens). "
@@ -221,6 +221,114 @@ def _truncate_tool_call_args_json(args: str, head_chars: int = 200) -> str:
    return json.dumps(shrunken, ensure_ascii=False)


+_IMAGE_PART_TYPES = frozenset({"image_url", "input_image", "image"})
+
+
+def _is_image_part(part: Any) -> bool:
+    """True if ``part`` is a multimodal image content block.
+
+    Recognizes all three shapes the agent handles:
+      - OpenAI chat.completions: ``{"type": "image_url", "image_url": ...}``
+      - OpenAI Responses API:    ``{"type": "input_image", "image_url": "..."}``
+      - Anthropic native:        ``{"type": "image", "source": {...}}``
+    """
+    if not isinstance(part, dict):
+        return False
+    return part.get("type") in _IMAGE_PART_TYPES
+
+
+def _content_has_images(content: Any) -> bool:
+    """True if a message's ``content`` is a multimodal list with image parts."""
+    if not isinstance(content, list):
+        return False
+    return any(_is_image_part(p) for p in content)
+
+
+def _strip_images_from_content(content: Any) -> Any:
+    """Return a copy of ``content`` with every image part replaced by a
+    short text placeholder.
+
+    - String content is returned unchanged.
+    - Non-list, non-string content is returned unchanged.
+    - List content: image parts become ``{"type": "text", "text": "[Attached
+      image — stripped after compression]"}``; other parts are preserved as-is.
+
+    Input is never mutated.
+    """
+    if not isinstance(content, list):
+        return content
+    if not any(_is_image_part(p) for p in content):
+        return content
+
+    new_parts: List[Any] = []
+    for p in content:
+        if _is_image_part(p):
+            new_parts.append({
+                "type": "text",
+                "text": "[Attached image — stripped after compression]",
+            })
+        else:
+            new_parts.append(p)
+    return new_parts
+
+
+def _strip_historical_media(messages: List[Dict[str, Any]]) -> List[Dict[str, Any]]:
+    """Replace image parts in older messages with placeholder text.
+
+    The anchor is the *last* user message that has any image content. Every
+    message before that anchor gets its image parts replaced with a short
+    placeholder so the outgoing request stops re-shipping the same multi-MB
+    base-64 image blobs on every turn.
+
+    If no user message carries images, the list is returned unchanged.
+    If the only user message with images is the very first one (nothing
+    earlier to strip), the list is returned unchanged.
+
+    Shallow copies of touched messages only; input is never mutated.
+    Port of Kilo-Org/kilocode#9434 (adapted for the OpenAI-style message
+    shape the hermes compressor emits).
+    """
+    if not messages:
+        return messages
+
+    # Find the newest user message that carries at least one image part.
+    # We anchor on image-bearing user messages (not all user messages) so
+    # a plain text follow-up after a big-image turn still strips the old
+    # image — matching the problem kilocode#9434 set out to solve.
+    anchor = -1
+    for i in range(len(messages) - 1, -1, -1):
+        msg = messages[i]
+        if not isinstance(msg, dict):
+            continue
+        if msg.get("role") != "user":
+            continue
+        if _content_has_images(msg.get("content")):
+            anchor = i
+            break
+
+    if anchor <= 0:
+        # No image-bearing user message, or it's the very first message —
+        # nothing before it to strip.
+        return messages
+
+    changed = False
+    result: List[Dict[str, Any]] = []
+    for i, msg in enumerate(messages):
+        if i >= anchor or not isinstance(msg, dict):
+            result.append(msg)
+            continue
+        content = msg.get("content")
+        if not _content_has_images(content):
+            result.append(msg)
+            continue
+        new_msg = msg.copy()
+        new_msg["content"] = _strip_images_from_content(content)
+        result.append(new_msg)
+        changed = True
+
+    return result if changed else messages
+
+
 def _summarize_tool_result(tool_name: str, tool_args: str, tool_content: str) -> str:
    """Create an informative 1-line summary of a tool call + result.

@@ -1609,6 +1717,14 @@ The user has requested that this compaction PRIORITISE preserving all informatio

        compressed = self._sanitize_tool_pairs(compressed)

+        # Replace image parts in all compressed messages before the newest
+        # image-bearing user turn with a short text placeholder. Without
+        # this, tail messages keep their original multi-MB base-64 image
+        # payloads forever, which can push every subsequent API request
+        # past the provider's body-size limit and wedge the session.
+        # Port of Kilo-Org/kilocode#9434.
+        compressed = _strip_historical_media(compressed)
+
        new_estimate = estimate_messages_tokens_rough(compressed)
        saved_estimate = display_tokens - new_estimate

@@ -71,7 +71,12 @@ class ContextEngine(ABC):
    def update_from_response(self, usage: Dict[str, Any]) -> None:
        """Update tracked token usage from an API response.

-        Called after every LLM call with the usage dict from the response.
+        Called after every LLM call with a normalized usage dict. The legacy
+        keys ``prompt_tokens``, ``completion_tokens``, and ``total_tokens``
+        are always present. Newer hosts also include canonical buckets:
+        ``input_tokens``, ``output_tokens``, ``cache_read_tokens``,
+        ``cache_write_tokens``, and ``reasoning_tokens``. Engines should
+        treat those fields as optional for compatibility with older hosts.
        """

    @abstractmethod
@@ -421,6 +421,7 @@ def compress_context(
                agent.session_id or "",
                boundary_reason="compression",
                old_session_id=_old_sid,
+                conversation_id=getattr(agent, "_gateway_session_key", None),
            )
    except Exception as _ce_err:
        logger.debug("context engine on_session_start (compression): %s", _ce_err)
@@ -27,7 +27,7 @@ import time
 import uuid
 from typing import Any, Dict, List, Optional

-from agent.anthropic_adapter import _is_oauth_token
+from agent.plugin_registries import registries as _registries
 from agent.auxiliary_client import set_runtime_main
 from agent.codex_responses_adapter import _summarize_user_message_for_log
 from agent.display import KawaiiSpinner
@@ -49,9 +49,8 @@ from agent.model_metadata import (
    MINIMUM_CONTEXT_LENGTH,
    estimate_messages_tokens_rough,
    estimate_request_tokens_rough,
-    get_next_probe_tier,
+    get_context_length_from_provider_error,
    parse_available_output_tokens_from_error,
-    parse_context_limit_from_error,
    save_context_length,
 )
 from agent.nous_rate_guard import (
@@ -127,6 +126,106 @@ def _ra():
    return run_agent


+def _nous_entitlement_message(capability: str) -> str:
+    try:
+        from hermes_cli.nous_account import (
+            format_nous_portal_entitlement_message,
+            get_nous_portal_account_info,
+        )
+
+        account_info = get_nous_portal_account_info(force_fresh=True)
+        message = format_nous_portal_entitlement_message(
+            account_info,
+            capability=capability,
+        )
+        return message or ""
+    except Exception:
+        return ""
+
+
+def _print_nous_entitlement_guidance(agent, capability: str) -> bool:
+    message = _nous_entitlement_message(capability)
+    if not message:
+        return False
+    for line in message.splitlines():
+        agent._vprint(f"{agent.log_prefix}   💡 {line}", force=True)
+    return True
+
+
+def _is_nous_inference_route(provider: str, base_url: str) -> bool:
+    provider = (provider or "").strip().lower()
+    if provider == "nous":
+        return True
+    base = str(base_url or "")
+    return (
+        base_url_host_matches(base, "inference-api.nousresearch.com")
+        or base_url_host_matches(base, "inference.nousresearch.com")
+    )
+
+
+def _billing_or_entitlement_message(
+    *,
+    capability: str,
+    provider: str,
+    base_url: str,
+    model: str,
+) -> str:
+    if _is_nous_inference_route(provider, base_url):
+        return _nous_entitlement_message(capability)
+
+    provider_label = (provider or "").strip() or "the selected provider"
+    model_label = (model or "").strip() or "the selected model"
+    lines = [
+        (
+            f"{provider_label} reported that billing, credits, or account "
+            f"entitlement is exhausted for {model_label}."
+        ),
+        "Add credits or update billing with that provider, then retry.",
+    ]
+    if base_url_host_matches(str(base_url or ""), "openrouter.ai"):
+        lines.append("OpenRouter credits: https://openrouter.ai/settings/credits")
+    lines.append("You can switch providers temporarily with /model <model> --provider <provider>.")
+    return "\n".join(lines)
+
+
+def _print_billing_or_entitlement_guidance(
+    agent,
+    *,
+    capability: str,
+    provider: str,
+    base_url: str,
+    model: str,
+) -> bool:
+    message = _billing_or_entitlement_message(
+        capability=capability,
+        provider=provider,
+        base_url=base_url,
+        model=model,
+    )
+    if not message:
+        return False
+    for line in message.splitlines():
+        agent._vprint(f"{agent.log_prefix}   💡 {line}", force=True)
+    return True
+
+
+def _try_refresh_nous_paid_entitlement_credentials(agent) -> bool:
+    """Refresh Nous runtime credentials after a fresh paid-entitlement check."""
+    try:
+        from hermes_cli.auth import NOUS_INFERENCE_AUTH_MODE_LEGACY
+        from hermes_cli.nous_account import get_nous_portal_account_info
+
+        account_info = get_nous_portal_account_info(force_fresh=True)
+        if account_info.paid_service_access is not True:
+            return False
+        return agent._try_refresh_nous_client_credentials(
+            force=False,
+            inference_auth_mode=NOUS_INFERENCE_AUTH_MODE_LEGACY,
+        )
+    except Exception:
+        return False
+
+
 def _restore_or_build_system_prompt(agent, system_message, conversation_history):
    """Restore the cached system prompt from the session DB or build it fresh.

@@ -1017,6 +1116,7 @@ def run_conversation(
        codex_auth_retry_attempted=False
        anthropic_auth_retry_attempted=False
        nous_auth_retry_attempted=False
+        nous_paid_entitlement_refresh_attempted=False
        copilot_auth_retry_attempted=False
        thinking_sig_retry_attempted = False
        invalid_encrypted_content_retry_attempted = False
@@ -1050,17 +1150,18 @@ def run_conversation(
                            f"Nous Portal rate limit active — "
                            f"resets in {_fmt_nous_remaining(_nous_remaining)}."
                        )
-                        agent._vprint(
-                            f"{agent.log_prefix}⏳ {_nous_msg} Trying fallback...",
-                            force=True,
+                        agent._buffer_vprint(
+                            f"⏳ {_nous_msg} Trying fallback..."
                        )
-                        agent._emit_status(f"⏳ {_nous_msg}")
+                        agent._buffer_status(f"⏳ {_nous_msg}")
                        if agent._try_activate_fallback():
                            retry_count = 0
                            compression_attempts = 0
                            primary_recovery_attempted = False
                            continue
-                        # No fallback available — return with clear message
+                        # No fallback available — surface buffered context
+                        # so user sees the rate-limit message that led here.
+                        agent._flush_status_buffer()
                        agent._persist_session(messages, conversation_history)
                        return {
                            "final_response": (
@@ -1082,6 +1183,14 @@ def run_conversation(

            try:
                agent._reset_stream_delivery_tracking()
+                # api_messages is built once, before this retry loop, while the
+                # primary provider is active.  A mid-conversation fallback can
+                # switch to a require-side provider (DeepSeek / Kimi / MiMo) that
+                # rejects assistant turns lacking reasoning_content.  Re-apply the
+                # echo-back pad for the *current* provider here (idempotent no-op
+                # unless the active provider needs it) so the fallback request
+                # isn't sent with stale, primary-shaped reasoning fields.
+                agent._reapply_reasoning_echo_for_provider(api_messages)
                api_kwargs = agent._build_api_kwargs(api_messages)
                if agent._force_ascii_payload:
                    _sanitize_structure_non_ascii(api_kwargs)
@@ -1275,9 +1384,10 @@ def run_conversation(
                            error_details.append("response.choices is empty")

                if response_invalid:
-                    # Stop spinner before printing error messages
+                    # Stop spinner silently — retry status is now buffered
+                    # and only surfaced if every retry+fallback exhausts.
                    if thinking_spinner:
-                        thinking_spinner.stop("(´;ω;`) oops, retrying...")
+                        thinking_spinner.stop("")
                        thinking_spinner = None
                    if agent.thinking_callback:
                        agent.thinking_callback("")
@@ -1290,7 +1400,7 @@ def run_conversation(
                    # rate-limit symptom.  Switch to fallback immediately
                    # rather than retrying with extended backoff.
                    if agent._fallback_index < len(agent._fallback_chain):
-                        agent._emit_status("⚠️ Empty/malformed response — switching to fallback...")
+                        agent._buffer_status("⚠️ Empty/malformed response — switching to fallback...")
                    if agent._try_activate_fallback():
                        retry_count = 0
                        compression_attempts = 0
@@ -1352,20 +1462,22 @@ def run_conversation(
                    else:
                        _failure_hint = f"response time {api_duration:.1f}s"

-                    agent._vprint(f"{agent.log_prefix}⚠️  Invalid API response (attempt {retry_count}/{max_retries}): {', '.join(error_details)}", force=True)
-                    agent._vprint(f"{agent.log_prefix}   🏢 Provider: {provider_name}", force=True)
+                    agent._buffer_vprint(f"⚠️  Invalid API response (attempt {retry_count}/{max_retries}): {', '.join(error_details)}")
+                    agent._buffer_vprint(f"   🏢 Provider: {provider_name}")
                    cleaned_provider_error = agent._clean_error_message(error_msg)
-                    agent._vprint(f"{agent.log_prefix}   📝 Provider message: {cleaned_provider_error}", force=True)
-                    agent._vprint(f"{agent.log_prefix}   ⏱️  {_failure_hint}", force=True)
+                    agent._buffer_vprint(f"   📝 Provider message: {cleaned_provider_error}")
+                    agent._buffer_vprint(f"   ⏱️  {_failure_hint}")
                    
                    if retry_count >= max_retries:
                        # Try fallback before giving up
-                        agent._emit_status(f"⚠️ Max retries ({max_retries}) for invalid responses — trying fallback...")
+                        agent._buffer_status(f"⚠️ Max retries ({max_retries}) for invalid responses — trying fallback...")
                        if agent._try_activate_fallback():
                            retry_count = 0
                            compression_attempts = 0
                            primary_recovery_attempted = False
                            continue
+                        # Terminal — flush buffered retry trace so user sees what happened.
+                        agent._flush_status_buffer()
                        agent._emit_status(f"❌ Max retries ({max_retries}) exceeded for invalid responses. Giving up.")
                        logger.error(f"{agent.log_prefix}Invalid API response after {max_retries} retries.")
                        agent._persist_session(messages, conversation_history)
@@ -1379,7 +1491,7 @@ def run_conversation(
                    
                    # Backoff before retry — jittered exponential: 5s base, 120s cap
                    wait_time = jittered_backoff(retry_count, base_delay=5.0, max_delay=120.0)
-                    agent._vprint(f"{agent.log_prefix}⏳ Retrying in {wait_time:.1f}s ({_failure_hint})...", force=True)
+                    agent._buffer_vprint(f"⏳ Retrying in {wait_time:.1f}s ({_failure_hint})...")
                    logger.warning(f"Invalid API response (retry {retry_count}/{max_retries}): {', '.join(error_details)} | Provider: {provider_name}")
                    
                    # Sleep in small increments to stay responsive to interrupts
@@ -1606,14 +1718,14 @@ def run_conversation(
                        if assistant_message is not None and _trunc_has_tool_calls:
                            if truncated_tool_call_retries < 1:
                                truncated_tool_call_retries += 1
-                                agent._vprint(
-                                    f"{agent.log_prefix}⚠️  Truncated tool call detected — retrying API call...",
-                                    force=True,
+                                agent._buffer_vprint(
+                                    f"⚠️  Truncated tool call detected — retrying API call..."
                                )
                                # Don't append the broken response to messages;
                                # just re-run the same API call from the current
                                # message state, giving the model another chance.
                                continue
+                            agent._flush_status_buffer()
                            agent._vprint(
                                f"{agent.log_prefix}⚠️  Truncated tool call response detected again — refusing to execute incomplete tool arguments.",
                                force=True,
@@ -1647,6 +1759,7 @@ def run_conversation(
                        }
                    else:
                        # First message was truncated - mark as failed
+                        agent._flush_status_buffer()
                        agent._vprint(f"{agent.log_prefix}❌ First response truncated - cannot recover", force=True)
                        agent._persist_session(messages, conversation_history)
                        return {
@@ -1668,10 +1781,19 @@ def run_conversation(
                    prompt_tokens = canonical_usage.prompt_tokens
                    completion_tokens = canonical_usage.output_tokens
                    total_tokens = canonical_usage.total_tokens
+                    # Forward canonical token + cache buckets so context engines
+                    # can make decisions on cache hit ratios / reasoning costs,
+                    # not just legacy aggregate tokens. Legacy keys stay for
+                    # back-compat with engines that only read prompt/completion/total.
                    usage_dict = {
                        "prompt_tokens": prompt_tokens,
                        "completion_tokens": completion_tokens,
                        "total_tokens": total_tokens,
+                        "input_tokens": canonical_usage.input_tokens,
+                        "output_tokens": canonical_usage.output_tokens,
+                        "cache_read_tokens": canonical_usage.cache_read_tokens,
+                        "cache_write_tokens": canonical_usage.cache_write_tokens,
+                        "reasoning_tokens": canonical_usage.reasoning_tokens,
                    }
                    agent.context_compressor.update_from_response(usage_dict)

@@ -1789,6 +1911,11 @@ def run_conversation(
                        )
                
                has_retried_429 = False  # Reset on success
+                # Note: don't clear the retry buffer here — an "API call
+                # success" only means we got bytes back, not that we got
+                # usable content. Empty responses still loop through the
+                # empty-retry path below; the buffer is cleared when
+                # genuinely successful content is detected later (~L4127).
                # Clear Nous rate limit state on successful request —
                # proves the limit has reset and other sessions can
                # resume hitting Nous.
@@ -1815,9 +1942,10 @@ def run_conversation(
                break

            except Exception as api_error:
-                # Stop spinner before printing error messages
+                # Stop spinner silently — retry status is buffered and
+                # only flushed when every retry+fallback is exhausted.
                if thinking_spinner:
-                    thinking_spinner.stop("(╥_╥) error, retrying...")
+                    thinking_spinner.stop("")
                    thinking_spinner = None
                if agent.thinking_callback:
                    agent.thinking_callback("")
@@ -1872,14 +2000,12 @@ def run_conversation(
                    if _surrogates_found or _is_surrogate_error:
                        agent._unicode_sanitization_passes += 1
                        if _surrogates_found:
-                            agent._vprint(
-                                f"{agent.log_prefix}⚠️  Stripped invalid surrogate characters from messages. Retrying...",
-                                force=True,
+                            agent._buffer_vprint(
+                                f"⚠️  Stripped invalid surrogate characters from messages. Retrying..."
                            )
                        else:
-                            agent._vprint(
-                                f"{agent.log_prefix}⚠️  Surrogate encoding error — retrying after full-payload sanitization...",
-                                force=True,
+                            agent._buffer_vprint(
+                                f"⚠️  Surrogate encoding error — retrying after full-payload sanitization..."
                            )
                        continue
                    if _is_ascii_codec:
@@ -2093,6 +2219,23 @@ def run_conversation(
                    classified.should_rotate_credential, classified.should_fallback,
                )

+                if (
+                    classified.reason == FailoverReason.billing
+                    and _is_nous_inference_route(
+                        getattr(agent, "provider", "") or "",
+                        getattr(agent, "base_url", "") or "",
+                    )
+                    and not nous_paid_entitlement_refresh_attempted
+                ):
+                    nous_paid_entitlement_refresh_attempted = True
+                    if _try_refresh_nous_paid_entitlement_credentials(agent):
+                        agent._vprint(
+                            f"{agent.log_prefix}🔐 Nous paid access verified — "
+                            "refreshed runtime credentials and retrying request...",
+                            force=True,
+                        )
+                        continue
+
                recovered_with_pool, has_retried_429 = agent._recover_with_credential_pool(
                    status_code=status_code,
                    has_retried_429=has_retried_429,
@@ -2190,7 +2333,7 @@ def run_conversation(
                    codex_auth_retry_attempted = True
                    if agent._try_refresh_codex_client_credentials(force=True):
                        _label = "xAI OAuth" if agent.provider == "xai-oauth" else "Codex"
-                        agent._vprint(f"{agent.log_prefix}🔐 {_label} auth refreshed after 401. Retrying request...")
+                        agent._buffer_vprint(f"🔐 {_label} auth refreshed after 401. Retrying request...")
                        continue
                if (
                    agent.api_mode == "chat_completions"
@@ -2217,7 +2360,8 @@ def run_conversation(
                    print(f"{agent.log_prefix}🔐 Nous 401 — Portal authentication failed.")
                    if _body_text:
                        print(f"{agent.log_prefix}   Response: {_body_text}")
-                    print(f"{agent.log_prefix}   Most likely: Portal OAuth expired, account out of credits, or agent key revoked.")
+                    if not _print_nous_entitlement_guidance(agent, "Nous model access"):
+                        print(f"{agent.log_prefix}   Most likely: Portal OAuth expired, account out of credits, or agent key revoked.")
                    print(f"{agent.log_prefix}   Troubleshooting:")
                    print(f"{agent.log_prefix}     • Re-authenticate: hermes auth add nous")
                    print(f"{agent.log_prefix}     • Check credits / billing: https://portal.nousresearch.com")
@@ -2230,7 +2374,7 @@ def run_conversation(
                ):
                    copilot_auth_retry_attempted = True
                    if agent._try_refresh_copilot_client_credentials():
-                        agent._vprint(f"{agent.log_prefix}🔐 Copilot credentials refreshed after 401. Retrying request...")
+                        agent._buffer_vprint(f"🔐 Copilot credentials refreshed after 401. Retrying request...")
                        continue
                if (
                    agent.api_mode == "anthropic_messages"
@@ -2239,8 +2383,8 @@ def run_conversation(
                    and not anthropic_auth_retry_attempted
                ):
                    anthropic_auth_retry_attempted = True
-                    from agent.anthropic_adapter import _is_oauth_token
-                    from agent.azure_identity_adapter import is_token_provider
+                    _is_oauth_token = _registries.get_provider_service("anthropic", "_is_oauth_token")
+                    is_token_provider = _registries.get_provider_service("azure", "is_token_provider")
                    if agent._try_refresh_anthropic_client_credentials():
                        print(f"{agent.log_prefix}🔐 Anthropic credentials refreshed after 401. Retrying request...")
                        continue
@@ -2257,7 +2401,7 @@ def run_conversation(
                        print(f"{agent.log_prefix}   Run `hermes doctor` for credential-chain diagnostics, or")
                        print(f"{agent.log_prefix}   `az login` if your developer session expired.")
                    else:
-                        auth_method = "Bearer (OAuth/setup-token)" if _is_oauth_token(key) else "x-api-key (API key)"
+                        auth_method = "Bearer (OAuth/setup-token)" if (_is_oauth_token is not None and _is_oauth_token(key)) else "x-api-key (API key)"
                        print(f"{agent.log_prefix}   Auth method: {auth_method}")
                        print(f"{agent.log_prefix}   Token prefix: {key[:12]}..." if isinstance(key, str) and len(key) > 12 else f"{agent.log_prefix}   Token: (empty or short)")
                    print(f"{agent.log_prefix}   Troubleshooting:")
@@ -2405,41 +2549,37 @@ def run_conversation(
                _base = getattr(agent, "base_url", "unknown")
                _model = getattr(agent, "model", "unknown")
                _status_code_str = f" [HTTP {status_code}]" if status_code else ""
-                agent._vprint(f"{agent.log_prefix}⚠️  API call failed (attempt {retry_count}/{max_retries}): {error_type}{_status_code_str}", force=True)
-                agent._vprint(f"{agent.log_prefix}   🔌 Provider: {_provider}  Model: {_model}", force=True)
-                agent._vprint(f"{agent.log_prefix}   🌐 Endpoint: {_base}", force=True)
-                agent._vprint(f"{agent.log_prefix}   📝 Error: {_error_summary}", force=True)
+                agent._buffer_vprint(f"⚠️  API call failed (attempt {retry_count}/{max_retries}): {error_type}{_status_code_str}")
+                agent._buffer_vprint(f"   🔌 Provider: {_provider}  Model: {_model}")
+                agent._buffer_vprint(f"   🌐 Endpoint: {_base}")
+                agent._buffer_vprint(f"   📝 Error: {_error_summary}")
                if status_code and status_code < 500:
                    _err_body = getattr(api_error, "body", None)
                    _err_body_str = str(_err_body)[:300] if _err_body else None
                    if _err_body_str:
-                        agent._vprint(f"{agent.log_prefix}   📋 Details: {_err_body_str}", force=True)
-                agent._vprint(f"{agent.log_prefix}   ⏱️  Elapsed: {elapsed_time:.2f}s  Context: {len(api_messages)} msgs, ~{approx_tokens:,} tokens")
+                        agent._buffer_vprint(f"   📋 Details: {_err_body_str}")
+                agent._buffer_vprint(f"   ⏱️  Elapsed: {elapsed_time:.2f}s  Context: {len(api_messages)} msgs, ~{approx_tokens:,} tokens")

                # Actionable hint for OpenRouter "no tool endpoints" error.
-                # This fires regardless of whether fallback succeeds — the
-                # user needs to know WHY their model failed so they can fix
-                # their provider routing, not just silently fall back.
+                # Buffered like the rest of the retry trace — surfaced only
+                # if every retry+fallback exhausts.  Avoids spamming users
+                # who recover automatically via fallback.
                if (
                    agent._is_openrouter_url()
                    and "support tool use" in error_msg
                ):
-                    agent._vprint(
-                        f"{agent.log_prefix}   💡 No OpenRouter providers for {_model} support tool calling with your current settings.",
-                        force=True,
+                    agent._buffer_vprint(
+                        f"   💡 No OpenRouter providers for {_model} support tool calling with your current settings."
                    )
                    if agent.providers_allowed:
-                        agent._vprint(
-                            f"{agent.log_prefix}      Your provider_routing.only restriction is filtering out tool-capable providers.",
-                            force=True,
+                        agent._buffer_vprint(
+                            f"      Your provider_routing.only restriction is filtering out tool-capable providers."
                        )
-                        agent._vprint(
-                            f"{agent.log_prefix}      Try removing the restriction or adding providers that support tools for this model.",
-                            force=True,
+                        agent._buffer_vprint(
+                            f"      Try removing the restriction or adding providers that support tools for this model."
                        )
-                    agent._vprint(
-                        f"{agent.log_prefix}      Check which providers support tools: https://openrouter.ai/models/{_model}",
-                        force=True,
+                    agent._buffer_vprint(
+                        f"      Check which providers support tools: https://openrouter.ai/models/{_model}"
                    )

                # Check for interrupt before deciding to retry
@@ -2489,11 +2629,10 @@ def run_conversation(
                            # user later enables extra usage the 1M limit
                            # should come back automatically.
                            compressor._context_probe_persistable = False
-                        agent._vprint(
-                            f"{agent.log_prefix}⚠️  Anthropic long-context tier "
+                        agent._buffer_vprint(
+                            f"⚠️  Anthropic long-context tier "
                            f"requires extra usage — reducing context: "
-                            f"{old_ctx:,} → {_reduced_ctx:,} tokens",
-                            force=True,
+                            f"{old_ctx:,} → {_reduced_ctx:,} tokens"
                        )

                    compression_attempts += 1
@@ -2509,7 +2648,7 @@ def run_conversation(
                        # messages to the new session, not skipping them.
                        conversation_history = None
                        if len(messages) < original_len or old_ctx > _reduced_ctx:
-                            agent._emit_status(
+                            agent._buffer_status(
                                f"🗜️ Context reduced to {_reduced_ctx:,} tokens "
                                f"(was {old_ctx:,}), retrying..."
                            )
@@ -2538,7 +2677,12 @@ def run_conversation(
                        base_url=getattr(agent, "base_url", None),
                    )
                    if not pool_may_recover:
-                        agent._emit_status("⚠️ Rate limited — switching to fallback provider...")
+                        if classified.reason == FailoverReason.billing:
+                            agent._buffer_status(
+                                "⚠️ Billing or credits exhausted — switching to fallback provider..."
+                            )
+                        else:
+                            agent._buffer_status("⚠️ Rate limited — switching to fallback provider...")
                        if agent._try_activate_fallback(reason=classified.reason):
                            retry_count = 0
                            compression_attempts = 0
@@ -2650,6 +2794,8 @@ def run_conversation(
                if is_payload_too_large:
                    compression_attempts += 1
                    if compression_attempts > max_compression_attempts:
+                        # Terminal — surface the buffered retry trace.
+                        agent._flush_status_buffer()
                        agent._vprint(f"{agent.log_prefix}❌ Max compression attempts ({max_compression_attempts}) reached for payload-too-large error.", force=True)
                        agent._vprint(f"{agent.log_prefix}   💡 Try /new to start a fresh conversation, or /compress to retry compression.", force=True)
                        logger.error(f"{agent.log_prefix}413 compression failed after {max_compression_attempts} attempts.")
@@ -2663,7 +2809,7 @@ def run_conversation(
                            "failed": True,
                            "compression_exhausted": True,
                        }
-                    agent._emit_status(f"⚠️  Request payload too large (413) — compression attempt {compression_attempts}/{max_compression_attempts}...")
+                    agent._buffer_status(f"⚠️  Request payload too large (413) — compression attempt {compression_attempts}/{max_compression_attempts}...")

                    original_len = len(messages)
                    messages, active_system_prompt = agent._compress_context(
@@ -2676,11 +2822,14 @@ def run_conversation(
                    conversation_history = None

                    if len(messages) < original_len:
-                        agent._emit_status(f"🗜️ Compressed {original_len} → {len(messages)} messages, retrying...")
+                        agent._buffer_status(f"🗜️ Compressed {original_len} → {len(messages)} messages, retrying...")
                        time.sleep(2)  # Brief pause between compression retries
                        restart_with_compressed_messages = True
                        break
                    else:
+                        # Terminal — surface buffered context so the user
+                        # sees what compression attempts were made.
+                        agent._flush_status_buffer()
                        agent._vprint(f"{agent.log_prefix}❌ Payload too large and cannot compress further.", force=True)
                        agent._vprint(f"{agent.log_prefix}   💡 Try /new to start a fresh conversation, or /compress to retry compression.", force=True)
                        logger.error(f"{agent.log_prefix}413 payload too large. Cannot compress further.")
@@ -2724,16 +2873,16 @@ def run_conversation(
                        # touching context_length or triggering compression.
                        safe_out = max(1, available_out - 64)  # small safety margin
                        agent._ephemeral_max_output_tokens = safe_out
-                        agent._vprint(
-                            f"{agent.log_prefix}⚠️  Output cap too large for current prompt — "
+                        agent._buffer_vprint(
+                            f"⚠️  Output cap too large for current prompt — "
                            f"retrying with max_tokens={safe_out:,} "
-                            f"(available_tokens={available_out:,}; context_length unchanged at {old_ctx:,})",
-                            force=True,
+                            f"(available_tokens={available_out:,}; context_length unchanged at {old_ctx:,})"
                        )
                        # Still count against compression_attempts so we don't
                        # loop forever if the error keeps recurring.
                        compression_attempts += 1
                        if compression_attempts > max_compression_attempts:
+                            agent._flush_status_buffer()
                            agent._vprint(f"{agent.log_prefix}❌ Max compression attempts ({max_compression_attempts}) reached.", force=True)
                            agent._vprint(f"{agent.log_prefix}   💡 Try /new to start a fresh conversation, or /compress to retry compression.", force=True)
                            logger.error(f"{agent.log_prefix}Context compression failed after {max_compression_attempts} attempts.")
@@ -2750,9 +2899,13 @@ def run_conversation(
                        restart_with_compressed_messages = True
                        break

-                    # Error is about the INPUT being too large — reduce context_length.
-                    # Try to parse the actual limit from the error message
-                    parsed_limit = parse_context_limit_from_error(error_msg)
+                    # Error is about the INPUT being too large.  Only reduce
+                    # context_length when the provider explicitly reports the
+                    # real lower limit.  If the provider only says "input
+                    # exceeds the context window", keep the configured window
+                    # and try compression; guessing probe tiers can incorrectly
+                    # turn a user-configured 1M window into 256K/128K/64K.
+                    new_ctx = get_context_length_from_provider_error(error_msg, old_ctx)
                    _provider_lower = (getattr(agent, "provider", "") or "").lower()
                    _base_lower = (getattr(agent, "base_url", "") or "").rstrip("/").lower()
                    is_minimax_provider = (
@@ -2764,24 +2917,12 @@ def run_conversation(
                    )
                    minimax_delta_only_overflow = (
                        is_minimax_provider
-                        and parsed_limit is None
+                        and new_ctx is None
                        and "context window exceeds limit (" in error_msg
                    )
-                    if parsed_limit and parsed_limit < old_ctx:
-                        new_ctx = parsed_limit
-                        agent._vprint(f"{agent.log_prefix}Context limit detected from API: {new_ctx:,} tokens (was {old_ctx:,})", force=True)
-                    elif minimax_delta_only_overflow:
-                        new_ctx = old_ctx
-                        agent._vprint(
-                            f"{agent.log_prefix}Provider reported overflow amount only; "
-                            f"keeping context_length at {old_ctx:,} tokens and compressing.",
-                            force=True,
-                        )
-                    else:
-                        # Step down to the next probe tier
-                        new_ctx = get_next_probe_tier(old_ctx)

-                    if new_ctx and new_ctx < old_ctx:
+                    if new_ctx is not None:
+                        agent._buffer_vprint(f"Context limit detected from API: {new_ctx:,} tokens (was {old_ctx:,})")
                        compressor.update_model(
                            model=agent.model,
                            context_length=new_ctx,
@@ -2791,23 +2932,26 @@ def run_conversation(
                            api_mode=agent.api_mode,
                        )
                        # Context probing flags — only set on built-in
-                        # compressor (plugin engines manage their own).
+                        # compressor (plugin engines manage their own).  This
+                        # value came from the provider, so it is safe to cache.
                        if hasattr(compressor, "_context_probed"):
                            compressor._context_probed = True
-                            # Only persist limits parsed from the provider's
-                            # error message (a real number).  Guessed fallback
-                            # tiers from get_next_probe_tier() should stay
-                            # in-memory only — persisting them pollutes the
-                            # cache with wrong values.
-                            compressor._context_probe_persistable = bool(
-                                parsed_limit and parsed_limit == new_ctx
-                            )
-                        agent._vprint(f"{agent.log_prefix}⚠️  Context length exceeded — stepping down: {old_ctx:,} → {new_ctx:,} tokens", force=True)
+                            compressor._context_probe_persistable = True
+                        agent._buffer_vprint(f"⚠️  Context length exceeded — using provider limit: {old_ctx:,} → {new_ctx:,} tokens")
+                    elif minimax_delta_only_overflow:
+                        agent._buffer_vprint(
+                            f"Provider reported overflow amount only; "
+                            f"keeping context_length at {old_ctx:,} tokens and compressing."
+                        )
                    else:
-                        agent._vprint(f"{agent.log_prefix}⚠️  Context length exceeded at minimum tier — attempting compression...", force=True)
+                        agent._buffer_vprint(
+                            f"⚠️  Context length exceeded, but provider did not report a max context length; "
+                            f"keeping context_length at {old_ctx:,} tokens and compressing."
+                        )

                    compression_attempts += 1
                    if compression_attempts > max_compression_attempts:
+                        agent._flush_status_buffer()
                        agent._vprint(f"{agent.log_prefix}❌ Max compression attempts ({max_compression_attempts}) reached.", force=True)
                        agent._vprint(f"{agent.log_prefix}   💡 Try /new to start a fresh conversation, or /compress to retry compression.", force=True)
                        logger.error(f"{agent.log_prefix}Context compression failed after {max_compression_attempts} attempts.")
@@ -2821,7 +2965,7 @@ def run_conversation(
                            "failed": True,
                            "compression_exhausted": True,
                        }
-                    agent._emit_status(f"🗜️ Context too large (~{approx_tokens:,} tokens) — compressing ({compression_attempts}/{max_compression_attempts})...")
+                    agent._buffer_status(f"🗜️ Context too large (~{approx_tokens:,} tokens) — compressing ({compression_attempts}/{max_compression_attempts})...")

                    original_len = len(messages)
                    messages, active_system_prompt = agent._compress_context(
@@ -2835,12 +2979,13 @@ def run_conversation(

                    if len(messages) < original_len or new_ctx and new_ctx < old_ctx:
                        if len(messages) < original_len:
-                            agent._emit_status(f"🗜️ Compressed {original_len} → {len(messages)} messages, retrying...")
+                            agent._buffer_status(f"🗜️ Compressed {original_len} → {len(messages)} messages, retrying...")
                        time.sleep(2)  # Brief pause between compression retries
                        restart_with_compressed_messages = True
                        break
                    else:
                        # Can't compress further and already at minimum tier
+                        agent._flush_status_buffer()
                        agent._vprint(f"{agent.log_prefix}❌ Context length exceeded and cannot compress further.", force=True)
                        agent._vprint(f"{agent.log_prefix}   💡 The conversation has accumulated too much content. Try /new to start fresh, or /compress to manually trigger compression.", force=True)
                        logger.error(f"{agent.log_prefix}Context length exceeded: {approx_tokens:,} tokens. Cannot compress further.")
@@ -2929,7 +3074,10 @@ def run_conversation(
                if is_client_error:
                    # Try fallback before aborting — a different provider
                    # may not have the same issue (rate limit, auth, etc.)
-                    agent._emit_status(f"⚠️ Non-retryable error (HTTP {status_code}) — trying fallback...")
+                    if classified.reason == FailoverReason.content_policy_blocked:
+                        agent._buffer_status("⚠️ Provider safety filter blocked this request — trying fallback...")
+                    else:
+                        agent._buffer_status(f"⚠️ Non-retryable error (HTTP {status_code}) — trying fallback...")
                    if agent._try_activate_fallback():
                        retry_count = 0
                        compression_attempts = 0
@@ -2939,16 +3087,38 @@ def run_conversation(
                        agent._dump_api_request_debug(
                            api_kwargs, reason="non_retryable_client_error", error=api_error,
                        )
-                    agent._emit_status(
-                        f"❌ Non-retryable error (HTTP {status_code}): "
-                        f"{agent._summarize_api_error(api_error)}"
-                    )
+                    # Terminal — flush buffered context so the user sees
+                    # what was tried before the abort.
+                    agent._flush_status_buffer()
+                    if classified.reason == FailoverReason.content_policy_blocked:
+                        agent._emit_status(
+                            f"❌ Provider safety filter blocked this request: "
+                            f"{agent._summarize_api_error(api_error)}"
+                        )
+                    else:
+                        agent._emit_status(
+                            f"❌ Non-retryable error (HTTP {status_code}): "
+                            f"{agent._summarize_api_error(api_error)}"
+                        )
                    agent._vprint(f"{agent.log_prefix}❌ Non-retryable client error (HTTP {status_code}). Aborting.", force=True)
                    agent._vprint(f"{agent.log_prefix}   🔌 Provider: {_provider}  Model: {_model}", force=True)
                    agent._vprint(f"{agent.log_prefix}   🌐 Endpoint: {_base}", force=True)
                    # Actionable guidance for common auth errors
                    if classified.is_auth or classified.reason == FailoverReason.billing:
-                        if _provider in {"openai-codex", "xai-oauth", "nous"} and status_code == 401:
+                        if classified.reason == FailoverReason.billing and _print_billing_or_entitlement_guidance(
+                            agent,
+                            capability="model access",
+                            provider=_provider,
+                            base_url=str(_base),
+                            model=_model,
+                        ):
+                            pass
+                        elif _provider == "nous" and _print_nous_entitlement_guidance(
+                            agent,
+                            "Nous model access",
+                        ):
+                            pass
+                        elif _provider in {"openai-codex", "xai-oauth", "nous"} and status_code == 401:
                            if _provider == "openai-codex":
                                agent._vprint(f"{agent.log_prefix}   💡 Codex OAuth token was rejected (HTTP 401). Your token may have been", force=True)
                                agent._vprint(f"{agent.log_prefix}      refreshed by another client (Codex CLI, VS Code). To fix:", force=True)
@@ -2976,6 +3146,28 @@ def run_conversation(
                                agent._vprint(f"{agent.log_prefix}      • Check credits: https://openrouter.ai/settings/credits", force=True)
                    else:
                        agent._vprint(f"{agent.log_prefix}   💡 This type of error won't be fixed by retrying.", force=True)
+                    # Content-policy blocks deserve their own actionable
+                    # guidance — neither "fix your API key" nor "retry won't
+                    # help" tells the user what to actually do. The provider
+                    # has refused this specific prompt, so the recovery is
+                    # either a rephrase or routing to a different model.
+                    if classified.reason == FailoverReason.content_policy_blocked:
+                        agent._vprint(
+                            f"{agent.log_prefix}   💡 The provider's safety filter rejected this specific prompt.",
+                            force=True,
+                        )
+                        agent._vprint(
+                            f"{agent.log_prefix}      • Try rephrasing the request, narrowing the context, or splitting into smaller steps.",
+                            force=True,
+                        )
+                        agent._vprint(
+                            f"{agent.log_prefix}      • Configure a fallback provider so future blocks route automatically:",
+                            force=True,
+                        )
+                        agent._vprint(
+                            f"{agent.log_prefix}        hermes fallback add   (interactive picker — same as `hermes model`)",
+                            force=True,
+                        )
                    logger.error(f"{agent.log_prefix}Non-retryable client error: {api_error}")
                    # Skip session persistence when the error is likely
                    # context-overflow related (status 400 + large session).
@@ -2990,6 +3182,23 @@ def run_conversation(
                        )
                    else:
                        agent._persist_session(messages, conversation_history)
+                    if classified.reason == FailoverReason.content_policy_blocked:
+                        _summary = agent._summarize_api_error(api_error)
+                        _policy_response = (
+                            f"⚠️  The model provider's safety filter blocked this request "
+                            f"(not a Hermes/gateway failure).\n\n"
+                            f"Provider message: {_summary}\n\n"
+                            f"Try rephrasing the request, narrowing the context, or "
+                            f"adding a fallback provider with `hermes fallback add`."
+                        )
+                        return {
+                            "final_response": _policy_response,
+                            "messages": messages,
+                            "api_calls": api_call_count,
+                            "completed": False,
+                            "failed": True,
+                            "error": f"content_policy_blocked: {_summary}",
+                        }
                    return {
                        "final_response": None,
                        "messages": messages,
@@ -3011,14 +3220,32 @@ def run_conversation(
                        retry_count = 0
                        continue
                    # Try fallback before giving up entirely
-                    agent._emit_status(f"⚠️ Max retries ({max_retries}) exhausted — trying fallback...")
+                    agent._buffer_status(f"⚠️ Max retries ({max_retries}) exhausted — trying fallback...")
                    if agent._try_activate_fallback():
                        retry_count = 0
                        compression_attempts = 0
                        primary_recovery_attempted = False
                        continue
+                    # Terminal — flush buffered retry/fallback trace.
+                    agent._flush_status_buffer()
                    _final_summary = agent._summarize_api_error(api_error)
-                    if is_rate_limited:
+                    _billing_guidance = ""
+                    if classified.reason == FailoverReason.billing:
+                        agent._emit_status(f"❌ Billing or credits exhausted — {_final_summary}")
+                        _billing_guidance = _billing_or_entitlement_message(
+                            capability="model access",
+                            provider=_provider,
+                            base_url=str(_base),
+                            model=_model,
+                        )
+                        _print_billing_or_entitlement_guidance(
+                            agent,
+                            capability="model access",
+                            provider=_provider,
+                            base_url=str(_base),
+                            model=_model,
+                        )
+                    elif is_rate_limited:
                        agent._emit_status(f"❌ Rate limited after {max_retries} retries — {_final_summary}")
                    else:
                        agent._emit_status(f"❌ API failed after {max_retries} retries — {_final_summary}")
@@ -3063,7 +3290,12 @@ def run_conversation(
                            api_kwargs, reason="max_retries_exhausted", error=api_error,
                        )
                    agent._persist_session(messages, conversation_history)
-                    _final_response = f"API call failed after {max_retries} retries: {_final_summary}"
+                    if classified.reason == FailoverReason.billing:
+                        _final_response = f"Billing or credits exhausted: {_final_summary}"
+                        if _billing_guidance:
+                            _final_response += f"\n\n{_billing_guidance}"
+                    else:
+                        _final_response = f"API call failed after {max_retries} retries: {_final_summary}"
                    if _is_stream_drop:
                        _final_response += (
                            "\n\nThe provider's stream connection keeps "
@@ -3095,9 +3327,9 @@ def run_conversation(
                                pass
                wait_time = _retry_after if _retry_after else jittered_backoff(retry_count, base_delay=2.0, max_delay=60.0)
                if is_rate_limited:
-                    agent._emit_status(f"⏱️ Rate limited. Waiting {wait_time:.1f}s (attempt {retry_count + 1}/{max_retries})...")
+                    agent._buffer_status(f"⏱️ Rate limited. Waiting {wait_time:.1f}s (attempt {retry_count + 1}/{max_retries})...")
                else:
-                    agent._emit_status(f"⏳ Retrying in {wait_time:.1f}s (attempt {retry_count}/{max_retries})...")
+                    agent._buffer_status(f"⏳ Retrying in {wait_time:.1f}s (attempt {retry_count}/{max_retries})...")
                logger.warning(
                    "Retrying API call in %ss (attempt %s/%s) %s error=%s",
                    wait_time,
@@ -3256,14 +3488,15 @@ def run_conversation(
            if has_incomplete_scratchpad(assistant_message.content or ""):
                agent._incomplete_scratchpad_retries += 1
                
-                agent._vprint(f"{agent.log_prefix}⚠️  Incomplete <REASONING_SCRATCHPAD> detected (opened but never closed)")
+                agent._buffer_vprint(f"⚠️  Incomplete <REASONING_SCRATCHPAD> detected (opened but never closed)")
                
                if agent._incomplete_scratchpad_retries <= 2:
-                    agent._vprint(f"{agent.log_prefix}🔄 Retrying API call ({agent._incomplete_scratchpad_retries}/2)...")
+                    agent._buffer_vprint(f"🔄 Retrying API call ({agent._incomplete_scratchpad_retries}/2)...")
                    # Don't add the broken message, just retry
                    continue
                else:
                    # Max retries - discard this turn and save as partial
+                    agent._flush_status_buffer()
                    agent._vprint(f"{agent.log_prefix}❌ Max retries (2) for incomplete scratchpad. Saving as partial.", force=True)
                    agent._incomplete_scratchpad_retries = 0
                    
@@ -3371,9 +3604,10 @@ def run_conversation(
                    available = ", ".join(sorted(agent.valid_tool_names))
                    invalid_name = invalid_tool_calls[0]
                    invalid_preview = invalid_name[:80] + "..." if len(invalid_name) > 80 else invalid_name
-                    agent._vprint(f"{agent.log_prefix}⚠️  Unknown tool '{invalid_preview}' — sending error to model for agent-correction ({agent._invalid_tool_retries}/3)")
+                    agent._buffer_vprint(f"⚠️  Unknown tool '{invalid_preview}' — sending error to model for agent-correction ({agent._invalid_tool_retries}/3)")

                    if agent._invalid_tool_retries >= 3:
+                        agent._flush_status_buffer()
                        agent._vprint(f"{agent.log_prefix}❌ Max retries (3) for invalid tool calls exceeded. Stopping as partial.", force=True)
                        agent._invalid_tool_retries = 0
                        agent._persist_session(messages, conversation_history)
@@ -3457,16 +3691,16 @@ def run_conversation(
                    agent._invalid_json_retries += 1

                    tool_name, error_msg = invalid_json_args[0]
-                    agent._vprint(f"{agent.log_prefix}⚠️  Invalid JSON in tool call arguments for '{tool_name}': {error_msg}")
+                    agent._buffer_vprint(f"⚠️  Invalid JSON in tool call arguments for '{tool_name}': {error_msg}")

                    if agent._invalid_json_retries < 3:
-                        agent._vprint(f"{agent.log_prefix}🔄 Retrying API call ({agent._invalid_json_retries}/3)...")
+                        agent._buffer_vprint(f"🔄 Retrying API call ({agent._invalid_json_retries}/3)...")
                        # Don't add anything to messages, just retry the API call
                        continue
                    else:
                        # Instead of returning partial, inject tool error results so the model can recover.
                        # Using tool results (not user messages) preserves role alternation.
-                        agent._vprint(f"{agent.log_prefix}⚠️  Injecting recovery tool results for invalid JSON...")
+                        agent._buffer_vprint(f"⚠️  Injecting recovery tool results for invalid JSON...")
                        agent._invalid_json_retries = 0  # Reset for next attempt
                        
                        # Append the assistant message with its (broken) tool_calls
@@ -3774,7 +4008,7 @@ def run_conversation(
                            "Empty response after tool calls — nudging model "
                            "to continue processing"
                        )
-                        agent._emit_status(
+                        agent._buffer_status(
                            "⚠️ Model returned empty after tool calls — "
                            "nudging to continue"
                        )
@@ -3820,7 +4054,7 @@ def run_conversation(
                            "prefilling to continue (%d/2)",
                            agent._thinking_prefill_retries,
                        )
-                        agent._emit_status(
+                        agent._buffer_status(
                            f"↻ Thinking-only response — prefilling to continue "
                            f"({agent._thinking_prefill_retries}/2)"
                        )
@@ -3855,7 +4089,7 @@ def run_conversation(
                            "retry %d/3 (model=%s)",
                            agent._empty_content_retries, agent.model,
                        )
-                        agent._emit_status(
+                        agent._buffer_status(
                            f"⚠️ Empty response from model — retrying "
                            f"({agent._empty_content_retries}/3)"
                        )
@@ -3874,13 +4108,13 @@ def run_conversation(
                            agent._empty_content_retries, agent.model,
                            agent.provider,
                        )
-                        agent._emit_status(
+                        agent._buffer_status(
                            "⚠️ Model returning empty responses — "
                            "switching to fallback provider..."
                        )
                        if agent._try_activate_fallback():
                            agent._empty_content_retries = 0
-                            agent._emit_status(
+                            agent._buffer_status(
                                f"↻ Switched to fallback: {agent.model} "
                                f"({agent.provider})"
                            )
@@ -3894,6 +4128,9 @@ def run_conversation(
                    # Exhausted retries and fallback chain (or no
                    # fallback configured).  Fall through to the
                    # "(empty)" terminal.
+                    # Surface the buffered retry/fallback trace so the
+                    # user can see what was attempted before "(empty)".
+                    agent._flush_status_buffer()
                    _turn_exit_reason = "empty_response_exhausted"
                    reasoning_text = agent._extract_reasoning(assistant_message)
                    agent._drop_trailing_empty_response_scaffolding(messages)
@@ -3938,6 +4175,9 @@ def run_conversation(
                # Reset retry counter/signature on successful content
                agent._empty_content_retries = 0
                agent._thinking_prefill_retries = 0
+                # Successful content reached — drop any buffered retry
+                # status from earlier failed attempts in this turn.
+                agent._clear_status_buffer()

                if (
                    agent.api_mode == "codex_responses"
@@ -4321,6 +4561,7 @@ def run_conversation(
        original_user_message=original_user_message,
        final_response=final_response,
        interrupted=interrupted,
+        messages=messages,
    )

    # Background memory/skill review — runs AFTER the response is delivered
@@ -458,43 +458,6 @@ class CredentialPool:
        self._persist()
        return updated

-    def _sync_anthropic_entry_from_credentials_file(self, entry: PooledCredential) -> PooledCredential:
-        """Sync a claude_code pool entry from ~/.claude/.credentials.json if tokens differ.
-
-        OAuth refresh tokens are single-use. When something external (e.g.
-        Claude Code CLI, or another profile's pool) refreshes the token, it
-        writes the new pair to ~/.claude/.credentials.json. The pool entry's
-        refresh token becomes stale. This method detects that and syncs.
-        """
-        if self.provider != "anthropic" or entry.source != "claude_code":
-            return entry
-        try:
-            from agent.anthropic_adapter import read_claude_code_credentials
-            creds = read_claude_code_credentials()
-            if not creds:
-                return entry
-            file_refresh = creds.get("refreshToken", "")
-            file_access = creds.get("accessToken", "")
-            file_expires = creds.get("expiresAt", 0)
-            # If the credentials file has a different token pair, sync it
-            if file_refresh and file_refresh != entry.refresh_token:
-                logger.debug("Pool entry %s: syncing tokens from credentials file (refresh token changed)", entry.id)
-                updated = replace(
-                    entry,
-                    access_token=file_access,
-                    refresh_token=file_refresh,
-                    expires_at_ms=file_expires,
-                    last_status=None,
-                    last_status_at=None,
-                    last_error_code=None,
-                )
-                self._replace_entry(entry, updated)
-                self._persist()
-                return updated
-        except Exception as exc:
-            logger.debug("Failed to sync from credentials file: %s", exc)
-        return entry
-
    def _sync_codex_entry_from_auth_store(self, entry: PooledCredential) -> PooledCredential:
        """Sync a Codex device_code pool entry from auth.json if tokens differ.

@@ -784,32 +747,11 @@ class CredentialPool:
            return None

        try:
-            if self.provider == "anthropic":
-                from agent.anthropic_adapter import refresh_anthropic_oauth_pure
-
-                refreshed = refresh_anthropic_oauth_pure(
-                    entry.refresh_token,
-                    use_json=entry.source.endswith("hermes_pkce"),
-                )
-                updated = replace(
-                    entry,
-                    access_token=refreshed["access_token"],
-                    refresh_token=refreshed["refresh_token"],
-                    expires_at_ms=refreshed["expires_at_ms"],
-                )
-                # Keep ~/.claude/.credentials.json in sync so that the
-                # fallback path (resolve_anthropic_token) and other profiles
-                # see the latest tokens.
-                if entry.source == "claude_code":
-                    try:
-                        from agent.anthropic_adapter import _write_claude_code_credentials
-                        _write_claude_code_credentials(
-                            refreshed["access_token"],
-                            refreshed["refresh_token"],
-                            refreshed["expires_at_ms"],
-                        )
-                    except Exception as wexc:
-                        logger.debug("Failed to write refreshed token to credentials file: %s", wexc)
+            # ── Plugin-registered credential pool hooks ──
+            from agent.plugin_registries import registries as _cph_reg2
+            _hook = _cph_reg2.get_credential_pool_hook(self.provider)
+            if _hook is not None and _hook.refresh_oauth is not None:
+                updated = _hook.refresh_oauth(entry, pool=self)
            elif self.provider == "openai-codex":
                # Adopt fresher tokens from auth.json before spending the
                # refresh_token — single-use tokens consumed by another Hermes
@@ -864,46 +806,18 @@ class CredentialPool:
                return entry
        except Exception as exc:
            logger.debug("Credential refresh failed for %s/%s: %s", self.provider, entry.id, exc)
-            # For anthropic claude_code entries: the refresh token may have been
-            # consumed by another process. Check if ~/.claude/.credentials.json
-            # has a newer token pair and retry once.
-            if self.provider == "anthropic" and entry.source == "claude_code":
-                synced = self._sync_anthropic_entry_from_credentials_file(entry)
-                if synced.refresh_token != entry.refresh_token:
-                    logger.debug("Retrying refresh with synced token from credentials file")
-                    try:
-                        from agent.anthropic_adapter import refresh_anthropic_oauth_pure
-                        refreshed = refresh_anthropic_oauth_pure(
-                            synced.refresh_token,
-                            use_json=synced.source.endswith("hermes_pkce"),
-                        )
-                        updated = replace(
-                            synced,
-                            access_token=refreshed["access_token"],
-                            refresh_token=refreshed["refresh_token"],
-                            expires_at_ms=refreshed["expires_at_ms"],
-                            last_status=STATUS_OK,
-                            last_status_at=None,
-                            last_error_code=None,
-                        )
-                        self._replace_entry(synced, updated)
-                        self._persist()
-                        try:
-                            from agent.anthropic_adapter import _write_claude_code_credentials
-                            _write_claude_code_credentials(
-                                refreshed["access_token"],
-                                refreshed["refresh_token"],
-                                refreshed["expires_at_ms"],
-                            )
-                        except Exception as wexc:
-                            logger.debug("Failed to write refreshed token to credentials file (retry path): %s", wexc)
-                        return updated
-                    except Exception as retry_exc:
-                        logger.debug("Retry refresh also failed: %s", retry_exc)
-                elif not self._entry_needs_refresh(synced):
-                    # Credentials file had a valid (non-expired) token — use it directly
-                    logger.debug("Credentials file has valid token, using without refresh")
-                    return synced
+            # ── Plugin-registered credential pool hooks ──
+            # The hook's refresh_oauth already handles retry-with-sync internally,
+            # so if we got here it means a non-hook provider failed.
+            from agent.plugin_registries import registries as _cph_reg3
+            _hook = _cph_reg3.get_credential_pool_hook(self.provider)
+            if _hook is not None and _hook.sync_from_credentials_file is not None:
+                # Give the hook a chance to sync from external file
+                synced = _hook.sync_from_credentials_file(entry)
+                if synced is not entry:
+                    entry = synced
+                    self._replace_entry(entry, synced)
+                    self._persist()
            # For xai-oauth: same race as nous — another process may have
            # consumed the refresh token between our proactive sync and the
            # HTTP call.  Re-check auth.json and adopt the fresh tokens if
@@ -1124,10 +1038,11 @@ class CredentialPool:
    def _entry_needs_refresh(self, entry: PooledCredential) -> bool:
        if entry.auth_type != AUTH_TYPE_OAUTH:
            return False
-        if self.provider == "anthropic":
-            if entry.expires_at_ms is None:
-                return False
-            return int(entry.expires_at_ms) <= int(time.time() * 1000) + 120_000
+        # ── Plugin-registered credential pool hooks ──
+        from agent.plugin_registries import registries as _cph_reg
+        _hook = _cph_reg.get_credential_pool_hook(self.provider)
+        if _hook is not None and _hook.needs_refresh is not None:
+            return _hook.needs_refresh(entry)
        if self.provider == "openai-codex":
            return _codex_access_token_is_expiring(
                entry.access_token,
@@ -1160,12 +1075,16 @@ class CredentialPool:
        cleared_any = False
        available: List[PooledCredential] = []
        for entry in self._entries:
-            # For anthropic claude_code entries, sync from the credentials file
-            # before any status/refresh checks. This picks up tokens refreshed
-            # by other processes (Claude Code CLI, other Hermes profiles).
-            if (self.provider == "anthropic" and entry.source == "claude_code"
+            # ── Plugin-registered credential pool hooks ──
+            # Sync exhausted entries from external credentials files before
+            # status/refresh checks. This picks up tokens refreshed by other
+            # processes (e.g. Claude Code CLI, other Hermes profiles).
+            from agent.plugin_registries import registries as _cph_reg4
+            _avail_hook = _cph_reg4.get_credential_pool_hook(self.provider)
+            if (_avail_hook is not None
+                    and _avail_hook.sync_from_credentials_file is not None
                    and entry.last_status == STATUS_EXHAUSTED):
-                synced = self._sync_anthropic_entry_from_credentials_file(entry)
+                synced = _avail_hook.sync_from_credentials_file(entry)
                if synced is not entry:
                    entry = synced
                    cleared_any = True
@@ -1515,84 +1434,15 @@ def _seed_from_singletons(provider: str, entries: List[PooledCredential]) -> Tup
        def _is_suppressed(_p, _s):  # type: ignore[misc]
            return False

-    if provider == "anthropic":
-        # Only auto-discover external credentials (Claude Code, Hermes PKCE)
-        # when the user has explicitly configured anthropic as their provider.
-        # Without this gate, auxiliary client fallback chains silently read
-        # ~/.claude/.credentials.json without user consent.  See PR #4210.
-        try:
-            from hermes_cli.auth import is_provider_explicitly_configured
-            if not is_provider_explicitly_configured("anthropic"):
-                return changed, active_sources
-        except ImportError:
-            pass
-
-        # API-key vs OAuth is a user-visible choice at `hermes setup` ("Claude
-        # Pro/Max subscription" vs "Anthropic API key").  The signal that the
-        # user picked the API-key path is: ANTHROPIC_API_KEY set in the env,
-        # AND no OAuth env vars set — `save_anthropic_api_key()` writes the
-        # API key and zeros ANTHROPIC_TOKEN; `save_anthropic_oauth_token()`
-        # does the inverse.  When that signal is present we MUST NOT seed
-        # autodiscovered OAuth tokens (~/.claude/.credentials.json from the
-        # Claude Code CLI, hermes_pkce creds from a previous OAuth login)
-        # into the anthropic pool — otherwise rotation on a 401/429 silently
-        # flips the session onto an OAuth credential, which forces the Claude
-        # Code identity injection, `mcp_` tool-name rewrite, and claude-cli
-        # User-Agent header (`agent/anthropic_adapter.py:2128`).  Users who
-        # explicitly opted into the API-key path are explicitly opting OUT of
-        # that masquerade.  Prefer ~/.hermes/.env over os.environ for the
-        # same reason `_seed_from_env` does — that's the authoritative file
-        # that `hermes setup` writes.
-        _env_file = load_env()
-
-        def _env_val(key: str) -> str:
-            return (_env_file.get(key) or os.environ.get(key) or "").strip()
-
-        anthropic_api_key = _env_val("ANTHROPIC_API_KEY")
-        anthropic_oauth_env = (
-            _env_val("ANTHROPIC_TOKEN") or _env_val("CLAUDE_CODE_OAUTH_TOKEN")
+    # ── Plugin-registered credential pool hooks ──
+    from agent.plugin_registries import registries as _cp_reg
+    _cp_hook = _cp_reg.get_credential_pool_hook(provider)
+    if _cp_hook is not None and _cp_hook.discover_credentials is not None:
+        hook_changed, hook_sources = _cp_hook.discover_credentials(
+            entries, provider, _is_suppressed,
        )
-        api_key_path_explicit = bool(anthropic_api_key and not anthropic_oauth_env)
-
-        if api_key_path_explicit:
-            # Prune any stale autodiscovered OAuth entries that may have been
-            # seeded into the on-disk pool during a previous OAuth session.
-            # Without this, switching OAuth -> API key at setup leaves the
-            # OAuth entries dormant in auth.json forever and rotation on a
-            # transient 401 could revive them.
-            retained = [
-                entry for entry in entries
-                if entry.source not in {"hermes_pkce", "claude_code"}
-            ]
-            if len(retained) != len(entries):
-                entries[:] = retained
-                changed = True
-            return changed, active_sources
-
-        from agent.anthropic_adapter import read_claude_code_credentials, read_hermes_oauth_credentials
-
-        for source_name, creds in (
-            ("hermes_pkce", read_hermes_oauth_credentials()),
-            ("claude_code", read_claude_code_credentials()),
-        ):
-            if creds and creds.get("accessToken"):
-                if _is_suppressed(provider, source_name):
-                    continue
-                active_sources.add(source_name)
-                changed |= _upsert_entry(
-                    entries,
-                    provider,
-                    source_name,
-                    {
-                        "source": source_name,
-                        "auth_type": AUTH_TYPE_OAUTH,
-                        "access_token": creds.get("accessToken", ""),
-                        "refresh_token": creds.get("refreshToken"),
-                        "expires_at_ms": creds.get("expiresAt"),
-                        "label": label_from_token(creds.get("accessToken", ""), source_name),
-                    },
-                )
-
+        changed |= hook_changed
+        active_sources |= hook_sources
    elif provider == "nous":
        state = _load_provider_state(auth_store, "nous")
        has_runtime_material = bool(
@@ -1903,12 +1753,11 @@ def _seed_from_env(provider: str, entries: List[PooledCredential]) -> Tuple[bool
        env_url = _get_env_prefer_dotenv(pconfig.base_url_env_var).rstrip("/")

    env_vars = list(pconfig.api_key_env_vars)
-    if provider == "anthropic":
-        env_vars = [
-            "ANTHROPIC_TOKEN",
-            "CLAUDE_CODE_OAUTH_TOKEN",
-            "ANTHROPIC_API_KEY",
-        ]
+    # ── Plugin-registered credential pool hooks: env var order override ──
+    from agent.plugin_registries import registries as _env_reg
+    _env_hook = _env_reg.get_credential_pool_hook(provider)
+    if _env_hook is not None and _env_hook.env_var_order is not None:
+        env_vars = _env_hook.env_var_order

    for env_var in env_vars:
        # Prefer ~/.hermes/.env over os.environ
@@ -1919,7 +1768,11 @@ def _seed_from_env(provider: str, entries: List[PooledCredential]) -> Tuple[bool
        if _is_source_suppressed(provider, source):
            continue
        active_sources.add(source)
-        auth_type = AUTH_TYPE_OAUTH if provider == "anthropic" and not token.startswith("sk-ant-api") else AUTH_TYPE_API_KEY
+        # ── Plugin-registered credential pool hooks: auth type detection ──
+        if _env_hook is not None and _env_hook.detect_auth_type is not None:
+            auth_type = _env_hook.detect_auth_type(token)
+        else:
+            auth_type = AUTH_TYPE_API_KEY
        base_url = env_url or pconfig.inference_base_url
        if provider == "kimi-coding":
            base_url = _resolve_kimi_base_url(token, pconfig.inference_base_url, env_url)
@@ -904,10 +904,6 @@ def get_cute_tool_message(
            extra = f" +{len(urls)-1}" if len(urls) > 1 else ""
            return _wrap(f"┊ 📄 fetch     {_trunc(domain, 35)}{extra}  {dur}")
        return _wrap(f"┊ 📄 fetch     pages  {dur}")
-    if tool_name == "web_crawl":
-        url = args.get("url", "")
-        domain = url.replace("https://", "").replace("http://", "").split("/")[0]
-        return _wrap(f"┊ 🕸️  crawl     {_trunc(domain, 35)}  {dur}")
    if tool_name == "terminal":
        return _wrap(f"┊ 💻 $         {_trunc(args.get('command', ''), 42)}  {dur}")
    if tool_name == "process":
@@ -44,9 +44,10 @@ class FailoverReason(enum.Enum):
    payload_too_large = "payload_too_large"  # 413 — compress payload
    image_too_large = "image_too_large"   # Native image part exceeds provider's per-image limit — shrink and retry

-    # Model
+    # Model / provider policy
    model_not_found = "model_not_found"  # 404 or invalid model — fallback to different model
    provider_policy_blocked = "provider_policy_blocked"  # Aggregator (e.g. OpenRouter) blocked the only endpoint due to account data/privacy policy
+    content_policy_blocked = "content_policy_blocked"  # Provider safety filter rejected this prompt — deterministic per-request, don't retry unchanged

    # Request format
    format_error = "format_error"        # 400 bad request — abort or strip + retry
@@ -97,13 +98,20 @@ _BILLING_PATTERNS = [
    "insufficient_quota",
    "insufficient balance",
    "credit balance",
+    "credits exhausted",
    "credits have been exhausted",
+    "no usable credits",
    "top up your credits",
    "payment required",
    "billing hard limit",
    "exceeded your current quota",
    "account is deactivated",
    "plan does not include",
+    "out of funds",
+    "run out of funds",
+    "balance_depleted",
+    "model_not_supported_on_free_tier",
+    "not available on the free tier",
 ]

 # Patterns that indicate rate limiting (transient, will resolve)
@@ -282,6 +290,45 @@ _PROVIDER_POLICY_BLOCKED_PATTERNS = [
    "no endpoints found matching your data policy",
 ]

+# Provider content-policy / safety-filter blocks. Distinct from
+# ``provider_policy_blocked`` above (which is an OpenRouter *account*-level
+# data/privacy guardrail) — these are *per-prompt* safety decisions made by
+# the upstream model provider. They are deterministic for the unchanged
+# request, so retrying the same prompt three times just reproduces the same
+# block and burns paid attempts on a refusal. The recovery is to switch to a
+# configured fallback model/provider immediately, or surface the block to
+# the user with actionable guidance if no fallback exists.
+#
+# Patterns are intentionally narrow — each phrase is a verbatim string from
+# a specific provider's safety pipeline, not a generic word like "policy" or
+# "violation" that could collide with billing/auth/format errors:
+#   • OpenAI Codex cybersecurity refusal (gpt-5.5, the case from #18028)
+#   • OpenAI moderation refusal ("violates our usage policies", with
+#     "usage policies" disambiguating from billing's "exceeded ... policy")
+#   • Anthropic safety refusal ("prompt was flagged by ... safety system")
+#   • OpenAI Responses content filter
+_CONTENT_POLICY_BLOCKED_PATTERNS = [
+    # OpenAI Codex (#18028) — message may arrive without an HTTP status
+    "flagged for possible cybersecurity risk",
+    "trusted access for cyber",
+    # OpenAI moderation — chat completions / responses
+    "violates our usage policies",
+    "violates openai's usage policies",
+    "your request was flagged by",
+    # Anthropic safety system
+    "prompt was flagged by our safety",
+    "responses cannot be generated due to safety",
+    # Generic content-filter wording seen on Azure / OpenAI Responses.
+    # ``content_filter`` (underscore) is the OpenAI-standard error/finish
+    # token surfaced verbatim by their SDKs when a request is blocked.
+    # ``responsibleaipolicyviolation`` is Azure OpenAI's error code.
+    # Deliberately NOT matching the space variant ("content filter") — it
+    # appears in benign config descriptions and tooltip text that providers
+    # echo back; the underscore form is provider-specific enough.
+    "content_filter",
+    "responsibleaipolicyviolation",
+]
+
 # Auth patterns (non-status-code signals)
 _AUTH_PATTERNS = [
    "invalid api key",
@@ -485,6 +532,20 @@ def classify_api_error(

    # ── 1. Provider-specific patterns (highest priority) ────────────

+    # Provider content-policy / safety-filter block. The provider has made a
+    # deterministic refusal decision about THIS prompt — retrying unchanged
+    # just reproduces the same refusal and burns paid attempts. Must run
+    # before status-based classification so a 400 safety block isn't
+    # downgraded to a generic ``format_error`` and a status-less block
+    # (OpenAI Codex SDK can raise without one) isn't left in the retryable
+    # ``unknown`` bucket. See issue #18028.
+    if any(p in error_msg for p in _CONTENT_POLICY_BLOCKED_PATTERNS):
+        return _result(
+            FailoverReason.content_policy_blocked,
+            retryable=False,
+            should_fallback=True,
+        )
+
    # Anthropic thinking block signature invalid (400).
    # Don't gate on provider — OpenRouter proxies Anthropic errors, so the
    # provider may be "openrouter" even though the error is Anthropic-specific.
@@ -690,8 +751,13 @@ def _classify_by_status(
        )

    if status_code == 403:
-        # OpenRouter 403 "key limit exceeded" is actually billing
-        if "key limit exceeded" in error_msg or "spending limit" in error_msg:
+        # OpenRouter 403 "key limit exceeded" is actually billing. Other
+        # providers also use 403 for account-plan or credit exhaustion.
+        if (
+            "key limit exceeded" in error_msg
+            or "spending limit" in error_msg
+            or any(p in error_msg for p in _BILLING_PATTERNS)
+        ):
            return result_fn(
                FailoverReason.billing,
                retryable=False,
@@ -708,6 +774,17 @@ def _classify_by_status(
        return _classify_402(error_msg, result_fn)

    if status_code == 404:
+        # Nous API currently surfaces HA/NAS credit depletion as a paid model
+        # becoming unavailable on the Free Tier, returned as 404 rather than
+        # 402. Treat that as entitlement/billing exhaustion, not a missing
+        # model, so the retry loop can show credit/top-up guidance.
+        if any(p in error_msg for p in _BILLING_PATTERNS):
+            return result_fn(
+                FailoverReason.billing,
+                retryable=False,
+                should_rotate_credential=True,
+                should_fallback=True,
+            )
        # OpenRouter policy-block 404 — distinct from "model not found".
        # The model exists; the user's account privacy setting excludes the
        # only endpoint serving it. Falling back to another provider won't
@@ -973,7 +1050,15 @@ def _classify_by_error_code(
            should_rotate_credential=True,
        )

-    if code_lower in {"insufficient_quota", "billing_not_active", "payment_required"}:
+    if code_lower in {
+        "insufficient_quota",
+        "billing_not_active",
+        "payment_required",
+        "insufficient_credits",
+        "no_usable_credits",
+        "balance_depleted",
+        "model_not_supported_on_free_tier",
+    }:
        return result_fn(
            FailoverReason.billing,
            retryable=False,
@@ -0,0 +1,39 @@
+"""Best-effort early import for the OpenAI SDK's native streaming parser.
+
+The OpenAI SDK imports ``jiter`` while constructing streaming chat-completion
+responses.  On some Windows installs the native extension can be imported
+directly from the Hermes venv, but the first import fails when it happens later
+inside the threaded streaming request path.  Loading it once during agent
+package import avoids that import-order failure while preserving the normal
+SDK error path for genuinely missing or broken installs.
+"""
+
+from __future__ import annotations
+
+import importlib
+
+_JITER_PRELOADED = False
+_JITER_PRELOAD_ERROR: Exception | None = None
+
+
+def preload_jiter_native_extension() -> bool:
+    """Import jiter's native extension early if it is available."""
+
+    global _JITER_PRELOADED, _JITER_PRELOAD_ERROR
+
+    if _JITER_PRELOADED:
+        return True
+
+    try:
+        importlib.import_module("jiter.jiter")
+        from jiter import from_json as _from_json  # noqa: F401
+    except Exception as exc:
+        _JITER_PRELOAD_ERROR = exc
+        return False
+
+    _JITER_PRELOADED = True
+    _JITER_PRELOAD_ERROR = None
+    return True
+
+
+preload_jiter_native_extension()
@@ -368,11 +368,42 @@ class MemoryManager:

    # -- Sync ----------------------------------------------------------------

-    def sync_all(self, user_content: str, assistant_content: str, *, session_id: str = "") -> None:
+    @staticmethod
+    def _provider_sync_accepts_messages(provider: MemoryProvider) -> bool:
+        """Return whether sync_turn accepts a messages keyword."""
+        try:
+            signature = inspect.signature(provider.sync_turn)
+        except (TypeError, ValueError):
+            return True
+        params = list(signature.parameters.values())
+        if any(p.kind == inspect.Parameter.VAR_KEYWORD for p in params):
+            return True
+        return "messages" in signature.parameters
+
+    def sync_all(
+        self,
+        user_content: str,
+        assistant_content: str,
+        *,
+        session_id: str = "",
+        messages: Optional[List[Dict[str, Any]]] = None,
+    ) -> None:
        """Sync a completed turn to all providers."""
        for provider in self._providers:
            try:
-                provider.sync_turn(user_content, assistant_content, session_id=session_id)
+                if messages is not None and self._provider_sync_accepts_messages(provider):
+                    provider.sync_turn(
+                        user_content,
+                        assistant_content,
+                        session_id=session_id,
+                        messages=messages,
+                    )
+                else:
+                    provider.sync_turn(
+                        user_content,
+                        assistant_content,
+                        session_id=session_id,
+                    )
            except Exception as e:
                logger.warning(
                    "Memory provider '%s' sync_turn failed: %s",
@@ -112,11 +112,22 @@ class MemoryProvider(ABC):
        that do background prefetching should override this.
        """

-    def sync_turn(self, user_content: str, assistant_content: str, *, session_id: str = "") -> None:
+    def sync_turn(
+        self,
+        user_content: str,
+        assistant_content: str,
+        *,
+        session_id: str = "",
+        messages: Optional[List[Dict[str, Any]]] = None,
+    ) -> None:
        """Persist a completed turn to the backend.

        Called after each turn. Should be non-blocking — queue for
        background processing if the backend has latency.
+
+        ``messages`` is the OpenAI-style conversation message list as of the
+        completed turn, including any assistant tool calls and tool results.
+        Providers that do not need raw turn context can ignore it.
        """

    @abstractmethod
@@ -141,6 +141,8 @@ DEFAULT_CONTEXT_LENGTHS = {
    # fuzzy-match collisions (e.g. "anthropic/claude-sonnet-4" is a
    # substring of "anthropic/claude-sonnet-4.6").
    # OpenRouter-prefixed models resolve via OpenRouter live API or models.dev.
+    "claude-opus-4-8": 1000000,
+    "claude-opus-4.8": 1000000,
    "claude-opus-4-7": 1000000,
    "claude-opus-4.7": 1000000,
    "claude-opus-4-6": 1000000,
@@ -911,12 +913,33 @@ def parse_context_limit_from_error(error_msg: str) -> Optional[int]:
    return None


+def get_context_length_from_provider_error(
+    error_msg: str,
+    current_context_length: int,
+) -> Optional[int]:
+    """Return a provider-reported lower context limit, if one is present.
+
+    Context-overflow recovery must not invent a new model window size.  Some
+    providers only say that the input exceeds the context window without
+    reporting the actual maximum.  In that case callers should keep the
+    configured context length and try compression only, rather than stepping
+    down through guessed probe tiers (1M → 256K → 128K → ...).
+    """
+    parsed_limit = parse_context_limit_from_error(error_msg)
+    if parsed_limit is None:
+        return None
+    if parsed_limit < current_context_length:
+        return parsed_limit
+    return None
+
+
 def parse_available_output_tokens_from_error(error_msg: str) -> Optional[int]:
    """Detect an "output cap too large" error and return how many output tokens are available.

    Background — two distinct context errors exist:
      1. "Prompt too long"  — the INPUT itself exceeds the context window.
-           Fix: compress history and/or halve context_length.
+           Fix: compress history, and only reduce context_length if the
+           provider explicitly reports the actual lower limit.
      2. "max_tokens too large" — input is fine, but input + requested_output > window.
           Fix: reduce max_tokens (the output cap) for this call.
           Do NOT touch context_length — the window hasn't shrunk.
@@ -1544,8 +1567,11 @@ def get_model_context_length(
        and base_url_host_matches(base_url, "amazonaws.com")
    ):
        try:
-            from agent.bedrock_adapter import get_bedrock_context_length
-            return get_bedrock_context_length(model)
+            from agent.plugin_registries import registries
+            _bedrock = registries.get_provider_namespace("bedrock")
+            get_bedrock_context_length = _bedrock.get("get_bedrock_context_length")
+            if get_bedrock_context_length is not None:
+                return get_bedrock_context_length(model)
        except ImportError:
            pass  # boto3 not installed — fall through to generic resolution

@@ -15,6 +15,18 @@ and MoonshotAI/kimi-cli#1595:
 2. When ``anyOf`` is used, ``type`` must be on the ``anyOf`` children, not
   the parent.  Presence of both causes "type should be defined in anyOf
   items instead of the parent schema".
+3. ``enum`` arrays on scalar-typed nodes may not contain ``null`` or empty
+   strings.  Strip those entries (drop the enum entirely if it becomes empty).
+4. ``$ref`` nodes may not carry sibling keywords.  Moonshot expands the
+   reference before validation and then rejects the node if sibling keys
+   like ``description`` remain on the same node as ``$ref``.  Strip every
+   sibling from ``$ref`` nodes so only ``{"$ref": "..."}`` survives.
+   (Ported from anomalyco/opencode#24730.)
+5. ``items`` may not be a tuple-style array (``items: [schemaA, schemaB]``
+   for positional element schemas).  Moonshot's schema engine requires a
+   single object schema applied to every array element.  Collapse tuple
+   ``items`` to the first element schema (or ``{}`` if the tuple is empty).
+   (Ported from anomalyco/opencode#24730.)

 The ``#/definitions/...`` → ``#/$defs/...`` rewrite for draft-07 refs is
 handled separately in ``tools/mcp_tool._normalize_mcp_input_schema`` so it
@@ -66,6 +78,16 @@ def _repair_schema(node: Any, is_schema: bool = True) -> Any:
            }
        elif key in _SCHEMA_LIST_KEYS and isinstance(value, list):
            repaired[key] = [_repair_schema(v, is_schema=True) for v in value]
+        elif key == "items" and isinstance(value, list):
+            # Rule 5: tuple-style ``items`` arrays (positional element
+            # schemas) are not accepted by Moonshot.  Collapse to the
+            # first element schema if present, else to ``{}``.  This
+            # matches opencode's behaviour for moonshotai / kimi models.
+            first = value[0] if value else {}
+            if isinstance(first, dict):
+                repaired[key] = _repair_schema(first, is_schema=True)
+            else:
+                repaired[key] = first
        elif key in _SCHEMA_NODE_KEYS:
            # items / not / additionalProperties: single nested schema.
            # additionalProperties can also be a bool — leave those alone.
@@ -130,6 +152,15 @@ def _repair_schema(node: Any, is_schema: bool = True) -> Any:
            else:
                repaired.pop("enum")

+    # Rule 4: $ref nodes must not have sibling keywords.  Moonshot expands
+    # the reference before validation and then rejects the node if siblings
+    # like ``description`` / ``type`` / ``default`` appear alongside $ref.
+    # The referenced definition still carries its own description on the
+    # target node, which Moonshot accepts.
+    # (Ported from anomalyco/opencode#24730.)
+    if "$ref" in repaired:
+        return {"$ref": repaired["$ref"]}
+
    return repaired


@@ -0,0 +1,586 @@
+"""Plugin capability registries.
+
+Each plugin's ``register(ctx)`` function populates these registries via
+``ctx.register_<capability>()``.  The core codebase then queries the
+registries instead of importing from plugin packages directly.
+
+This is the **only** coupling point between the core and plugins: the core
+imports from ``agent.plugin_registries``, never from ``hermes_agent_*``.
+"""
+
+from __future__ import annotations
+
+from dataclasses import dataclass, field
+from typing import (
+    Any,
+    Callable,
+    Dict,
+    List,
+    Optional,
+    Protocol,
+    Sequence,
+    Tuple,
+    Type,
+    runtime_checkable,
+)
+
+
+# ---------------------------------------------------------------------------
+# Auth providers
+# ---------------------------------------------------------------------------
+
+@runtime_checkable
+class AuthProvider(Protocol):
+    """A plugin that can provide or check authentication credentials.
+
+    Registered via ``ctx.register_auth_provider(name, provider)``.
+    Queried by ``hermes_cli/auth_commands.py``, ``doctor.py``, etc.
+    """
+
+    @property
+    def name(self) -> str: ...
+
+    def has_credentials(self) -> bool:
+        """Return True if the required credentials are present in env/config."""
+        ...
+
+    def check_env_vars(self) -> Dict[str, str | None]:
+        """Return a dict of env-var-name → current-value (or None if unset).
+
+        Used by ``hermes doctor`` to display credential status.
+        """
+        ...
+
+    def resolve_token(self, **kwargs: Any) -> Any:
+        """Resolve and return an auth token/credential for the provider.
+
+        The return type is provider-specific (string, tuple, object, etc.).
+        """
+        ...
+
+    def refresh_token(self, **kwargs: Any) -> Any:
+        """Refresh an existing token.  Raises if refresh is not supported."""
+        ...
+
+
+@dataclass
+class AuthProviderEntry:
+    provider: AuthProvider
+    """The auth provider instance."""
+
+    cli_group: str = ""
+    """CLI argument group name (e.g. 'Anthropic', 'AWS / Bedrock')."""
+
+    setup_subcommands: bool = False
+    """Whether this provider adds CLI auth subcommands (login, logout, etc.)."""
+
+
+# ---------------------------------------------------------------------------
+# Transport builders
+# ---------------------------------------------------------------------------
+
+@runtime_checkable
+class TransportBuilder(Protocol):
+    """A plugin that builds clients and converts messages for a model transport.
+
+    Registered via ``ctx.register_transport(name, builder)``.
+    Queried by ``agent/transports/`` and ``agent/auxiliary_client.py``.
+    """
+
+    def build_client(self, **kwargs: Any) -> Any:
+        """Build and return a provider-specific API client."""
+        ...
+
+    def build_kwargs(self, **kwargs: Any) -> Dict[str, Any]:
+        """Build the kwargs dict for a provider-specific API call."""
+        ...
+
+    def convert_messages(self, messages: Sequence[Any], **kwargs: Any) -> Any:
+        """Convert internal message format to provider-specific format."""
+        ...
+
+    def convert_tools(self, tools: Sequence[Any], **kwargs: Any) -> Any:
+        """Convert internal tool format to provider-specific format."""
+        ...
+
+    def normalize_response(self, response: Any, **kwargs: Any) -> Any:
+        """Normalize a provider-specific response into the internal format."""
+        ...
+
+
+# ---------------------------------------------------------------------------
+# Platform adapters
+# ---------------------------------------------------------------------------
+
+@dataclass
+class PlatformAdapterEntry:
+    """A registered platform adapter.
+
+    Registered via ``ctx.register_platform(name, entry)``.
+    Queried by ``gateway/run.py`` and ``tools/send_message_tool.py``.
+    """
+    name: str
+    """Platform identifier (e.g. 'telegram', 'slack')."""
+
+    adapter_class: Type
+    """The adapter class (e.g. TelegramAdapter)."""
+
+    check_requirements: Callable[[], bool]
+    """Check if the platform's dependencies are installed and configured."""
+
+    available_flag: str = ""
+    """Name of the module-level AVAILABLE boolean, if any."""
+
+    constants: Dict[str, Any] = field(default_factory=dict)
+    """Platform-specific constants (e.g. FEISHU_DOMAIN, LARK_DOMAIN)."""
+
+    helper_functions: Dict[str, Callable] = field(default_factory=dict)
+    """Platform-specific helper functions (e.g. probe_bot, qr_register)."""
+
+
+# ---------------------------------------------------------------------------
+# Tool providers
+# ---------------------------------------------------------------------------
+
+@dataclass
+class ToolProviderEntry:
+    """A registered tool provider.
+
+    Registered via ``ctx.register_tool_provider(name, entry)``.
+    Queried by ``tools/`` modules.
+    """
+    name: str
+    """Tool identifier (e.g. 'tts', 'stt', 'fal', 'daytona')."""
+
+    tool_functions: Dict[str, Callable] = field(default_factory=dict)
+    """Tool functions keyed by name (e.g. 'text_to_speech_tool', 'transcribe_audio')."""
+
+    check_fn: Optional[Callable] = None
+    """Check if the tool's dependencies are available."""
+
+    constants: Dict[str, Any] = field(default_factory=dict)
+    """Tool-specific constants (e.g. MAX_FILE_SIZE)."""
+
+    config_functions: Dict[str, Callable] = field(default_factory=dict)
+    """Config/utility functions (e.g. _get_provider, _load_stt_config)."""
+
+    environment_classes: Dict[str, Type] = field(default_factory=dict)
+    """Environment classes for terminal backends (e.g. DaytonaEnvironment)."""
+
+
+# ---------------------------------------------------------------------------
+# Model metadata providers
+# ---------------------------------------------------------------------------
+
+@dataclass
+class ModelMetadataEntry:
+    """A registered model metadata provider.
+
+    Registered via ``ctx.register_model_metadata(name, entry)``.
+    Queried by ``agent/model_metadata.py`` and CLI model commands.
+    """
+    name: str
+    """Provider identifier (e.g. 'anthropic', 'bedrock')."""
+
+    get_context_length: Optional[Callable[[str], int | None]] = None
+    """Return the context length for a model name, or None if unknown."""
+
+    list_models: Optional[Callable[[], List[str]]] = None
+    """Return a list of known model IDs for this provider."""
+
+    constants: Dict[str, Any] = field(default_factory=dict)
+    """Provider-specific constants (e.g. _COMMON_BETAS, betas lists)."""
+
+
+# ---------------------------------------------------------------------------
+# Credential pool entries
+# ---------------------------------------------------------------------------
+
+@dataclass
+class CredentialPoolEntry:
+    """A registered credential pool provider.
+
+    Registered via ``ctx.register_credential_pool(name, entry)``.
+    Queried by ``agent/credential_pool.py``.
+    """
+    name: str
+    """Provider identifier (e.g. 'anthropic')."""
+
+    read_credentials: Optional[Callable] = None
+    """Read stored credentials."""
+
+    write_credentials: Optional[Callable] = None
+    """Write/store credentials."""
+
+    refresh_credentials: Optional[Callable] = None
+    """Refresh stored credentials."""
+
+    read_oauth: Optional[Callable] = None
+    """Read OAuth credentials."""
+
+
+# ---------------------------------------------------------------------------
+# Provider resolvers
+# ---------------------------------------------------------------------------
+
+@runtime_checkable
+class ProviderResolver(Protocol):
+    """A plugin that resolves an auxiliary client for a specific provider.
+
+    Registered via ``ctx.register_provider_resolver(provider_name, resolver)``.
+    Queried by ``agent/auxiliary_client.py`` in ``resolve_provider_client()``.
+    """
+
+    def __call__(
+        self,
+        *,
+        model: str | None = None,
+        explicit_api_key: str | None = None,
+        explicit_base_url: str | None = None,
+        async_mode: bool = False,
+        is_vision: bool = False,
+        main_runtime: dict | None = None,
+        api_mode: str | None = None,
+    ) -> tuple[Any, str] | tuple[None, None]:
+        """Return ``(client, default_model)`` or ``(None, None)`` if unavailable."""
+        ...
+
+
+# ---------------------------------------------------------------------------
+# Credential pool hooks
+# ---------------------------------------------------------------------------
+
+@dataclass
+class CredentialPoolHook:
+    """Provider-specific credential pool operations.
+
+    Registered via ``ctx.register_credential_pool_hook(provider_name, hook)``.
+    Queried by ``agent/credential_pool.py``.
+    """
+
+    sync_from_credentials_file: Optional[Callable] = None
+    """Sync a pool entry from an external credentials file (e.g. ~/.claude/.credentials.json)."""
+
+    refresh_oauth: Optional[Callable] = None
+    """Refresh an OAuth token for a pool entry."""
+
+    should_include_in_pool: Optional[Callable] = None
+    """Return True if this provider's credentials should be included in the pool."""
+
+    needs_refresh: Optional[Callable] = None
+    """Return True if an OAuth entry needs a token refresh."""
+
+    source_priority: Optional[Callable] = None
+    """Return integer priority for a credential source (lower = preferred)."""
+
+    discover_credentials: Optional[Callable] = None
+    """Discover external credentials and upsert into the pool entries.
+
+    Signature: (entries: list, provider: str, is_suppressed: Callable) -> (changed: bool, active_sources: set)
+    """
+
+    env_var_order: Optional[list] = None
+    """Override env var scan order for this provider (e.g. ['ANTHROPIC_TOKEN', 'CLAUDE_CODE_OAUTH_TOKEN', 'ANTHROPIC_API_KEY'])."""
+
+    detect_auth_type: Optional[Callable] = None
+    """Given a token string, return the auth type for this provider.
+
+    Signature: (token: str) -> str  (e.g. AUTH_TYPE_OAUTH or AUTH_TYPE_API_KEY)
+    """
+
+
+# ---------------------------------------------------------------------------
+# Pricing providers
+# ---------------------------------------------------------------------------
+
+# Re-export PricingEntry from usage_pricing — that's the canonical definition
+# with Decimal fields. The registry stores these directly keyed by (provider, model).
+# Lazy import to avoid circular dependency (usage_pricing imports registries at runtime).
+def _get_pricing_entry_class():
+    from agent.usage_pricing import PricingEntry
+    return PricingEntry
+
+
+# ---------------------------------------------------------------------------
+# Provider overlays
+# ---------------------------------------------------------------------------
+
+@dataclass
+class ProviderOverlayEntry:
+    """A provider overlay registered by a plugin.
+
+    Registered via ``ctx.register_provider_overlay(provider_name, entry)``.
+    Queried by ``hermes_cli/providers.py``.
+
+    This mirrors the fields of ``HermesOverlay`` so that providers.py
+    can merge plugin-registered overlays seamlessly.
+    """
+
+    provider_name: str
+    """Primary provider name (e.g. 'anthropic', 'bedrock')."""
+
+    transport: str = "openai_chat"
+    """Transport type: openai_chat | anthropic_messages | codex_responses | bedrock_converse"""
+
+    is_aggregator: bool = False
+    """Whether this provider aggregates multiple model providers."""
+
+    auth_type: str = "api_key"
+    """Auth type: api_key | oauth_device_code | oauth_external | aws_sdk | external_process"""
+
+    extra_env_vars: Tuple[str, ...] = ()
+    """Environment variable names that indicate this provider is configured."""
+
+    base_url_override: str = ""
+    """Override if models.dev URL is wrong/missing."""
+
+    base_url_env_var: str = ""
+    """Env var for user-custom base URL."""
+
+    display_name: str = ""
+    """Human-readable name for the provider (e.g. 'Anthropic', 'AWS Bedrock')."""
+
+    aliases: List[str] = field(default_factory=list)
+    """Alternative names that resolve to this provider."""
+
+
+# ---------------------------------------------------------------------------
+# The global registries (singleton)
+# ---------------------------------------------------------------------------
+
+class PluginRegistries:
+    """Central store for all plugin-registered capabilities.
+
+    A single instance is created at import time and shared across the
+    process.  Plugins populate it during ``register()``; the core
+    queries it at runtime.
+    """
+
+    def __init__(self) -> None:
+        self.auth_providers: Dict[str, AuthProviderEntry] = {}
+        self.transport_builders: Dict[str, TransportBuilder] = {}
+        self._transports: Dict[str, type] = {}
+        self.platform_adapters: Dict[str, PlatformAdapterEntry] = {}
+        self.tool_providers: Dict[str, ToolProviderEntry] = {}
+        self.model_metadata: Dict[str, ModelMetadataEntry] = {}
+        self.credential_pools: Dict[str, CredentialPoolEntry] = {}
+        self._provider_services: Dict[str, Dict[str, Any]] = {}
+        self._provider_resolvers: Dict[str, Callable] = {}
+        self._credential_pool_hooks: Dict[str, CredentialPoolHook] = {}
+        self._pricing_providers: Dict[tuple, Any] = {}
+        self._provider_overlays: Dict[str, ProviderOverlayEntry] = {}
+
+    # -- registration methods (called from PluginContext) --------------------
+
+    def register_auth_provider(
+        self,
+        name: str,
+        provider: AuthProvider,
+        *,
+        cli_group: str = "",
+        setup_subcommands: bool = False,
+    ) -> None:
+        self.auth_providers[name] = AuthProviderEntry(
+            provider=provider,
+            cli_group=cli_group,
+            setup_subcommands=setup_subcommands,
+        )
+
+    def register_transport(self, name: str, builder: TransportBuilder) -> None:
+        self.transport_builders[name] = builder
+
+    def register_platform(self, entry: PlatformAdapterEntry) -> None:
+        self.platform_adapters[entry.name] = entry
+
+    def register_tool_provider(self, entry: ToolProviderEntry) -> None:
+        self.tool_providers[entry.name] = entry
+
+    def register_model_metadata(self, entry: ModelMetadataEntry) -> None:
+        self.model_metadata[entry.name] = entry
+
+    def register_credential_pool(self, entry: CredentialPoolEntry) -> None:
+        self.credential_pools[entry.name] = entry
+
+    def register_provider_resolver(self, name: str, resolver: Callable) -> None:
+        """Register a provider resolver callable.
+
+        The resolver is called by ``resolve_provider_client()`` to create an
+        auxiliary client for a specific provider.  Signature::
+
+            def resolver(
+                *,
+                model: str | None,
+                explicit_api_key: str | None,
+                explicit_base_url: str | None,
+                async_mode: bool,
+                is_vision: bool,
+                main_runtime: dict | None,
+                api_mode: str | None,
+            ) -> tuple[Any, str] | tuple[None, None]:
+                ...
+
+        Returns ``(client, default_model)`` or ``(None, None)``.
+        """
+        self._provider_resolvers[name] = resolver
+
+    def register_credential_pool_hook(self, name: str, hook: CredentialPoolHook) -> None:
+        """Register a credential pool hook for provider-specific pool operations."""
+        self._credential_pool_hooks[name] = hook
+
+    def register_pricing_provider(self, name: str, entries: List[tuple]) -> None:
+        """Register pricing entries for a provider.
+
+        Each entry is a (provider, model, PricingEntry) tuple so the
+        lookup key matches the (provider, model) pattern used by
+        _OFFICIAL_DOCS_PRICING.
+        """
+        for prov, model, entry in entries:
+            self._pricing_providers[(prov, model)] = entry
+
+    def register_provider_overlay(self, entry: ProviderOverlayEntry) -> None:
+        """Register a provider overlay entry from a plugin."""
+        self._provider_overlays[entry.provider_name] = entry
+
+    # -- query helpers -------------------------------------------------------
+
+    def get_auth_provider(self, name: str) -> AuthProviderEntry | None:
+        return self.auth_providers.get(name)
+
+    def get_transport(self, name: str) -> TransportBuilder | None:
+        return self.transport_builders.get(name)
+
+    def get_platform(self, name: str) -> PlatformAdapterEntry | None:
+        return self.platform_adapters.get(name)
+
+    def get_tool_provider(self, name: str) -> ToolProviderEntry | None:
+        return self.tool_providers.get(name)
+
+    def get_model_metadata(self, name: str) -> ModelMetadataEntry | None:
+        return self.model_metadata.get(name)
+
+    def get_credential_pool(self, name: str) -> CredentialPoolEntry | None:
+        return self.credential_pools.get(name)
+
+    def get_provider_resolver(self, name: str) -> Callable | None:
+        """Return the registered resolver for a provider, or None."""
+        return self._provider_resolvers.get(name)
+
+    def get_credential_pool_hook(self, name: str) -> CredentialPoolHook | None:
+        """Return the registered credential pool hook for a provider, or None."""
+        return self._credential_pool_hooks.get(name)
+
+    def get_pricing_entry(self, provider: str, model: str) -> Any:
+        """Return a registered pricing entry for (provider, model), or None."""
+        return self._pricing_providers.get((provider, model))
+
+    def all_pricing_entries(self) -> Dict[tuple, Any]:
+        """Return all registered pricing entries (keyed by (provider, model))."""
+        return dict(self._pricing_providers)
+
+    def get_provider_overlay(self, name: str) -> ProviderOverlayEntry | None:
+        """Return a registered provider overlay, or None."""
+        return self._provider_overlays.get(name)
+
+    def all_provider_overlays(self) -> Dict[str, ProviderOverlayEntry]:
+        """Return all registered provider overlays."""
+        return dict(self._provider_overlays)
+
+    def all_auth_providers(self) -> List[AuthProviderEntry]:
+        return list(self.auth_providers.values())
+
+    def all_platforms(self) -> List[PlatformAdapterEntry]:
+        return list(self.platform_adapters.values())
+
+    def all_tool_providers(self) -> List[ToolProviderEntry]:
+        return list(self.tool_providers.values())
+
+    # -- provider services (model-provider namespace) -----------------------
+
+    def register_provider_services(self, name: str, services: Dict[str, Any]) -> None:
+        """Register a namespace dict of provider-specific services.
+
+        This is the escape hatch for model-provider plugins that expose many
+        symbols (anthropic has 50+).  Each plugin registers its public surface
+        as a flat dict of ``{symbol_name: callable_or_value}``.  Core code
+        looks up specific symbols instead of importing from the plugin
+        package directly.
+
+        Each callable value is stored as a *lazy module-attribute reference*
+        so that ``unittest.mock.patch("pkg.mod.fn")`` works correctly in
+        tests — the registry re-reads ``mod.fn`` on every lookup instead of
+        capturing the function object at register time.
+
+        Example::
+
+            registries.register_provider_services("anthropic", {
+                "build_anthropic_client": build_anthropic_client,
+                "resolve_anthropic_token": resolve_anthropic_token,
+                "_is_oauth_token": _is_oauth_token,
+                ...
+            })
+        """
+        import sys
+
+        def _make_lazy(fn: Any) -> Any:
+            """Return a lazy wrapper that re-reads fn from its module each call.
+
+            This makes mock.patch() on the module attribute work transparently —
+            the registry never caches the function object, just the reference path.
+            """
+            if not callable(fn):
+                return fn
+            module = getattr(fn, "__module__", None)
+            qualname = getattr(fn, "__qualname__", None)
+            if not module or not qualname or "." in qualname:
+                # non-simple attribute (lambda, nested fn, class method) — store directly
+                return fn
+
+            class _LazyRef:
+                __slots__ = ("_mod", "_attr", "_fallback")
+
+                def __init__(self, mod: str, attr: str, fallback: Any) -> None:
+                    self._mod = mod
+                    self._attr = attr
+                    self._fallback = fallback
+
+                def _resolve(self) -> Any:
+                    mod = sys.modules.get(self._mod)
+                    return getattr(mod, self._attr, self._fallback) if mod else self._fallback
+
+                def __call__(self, *args: Any, **kwargs: Any) -> Any:
+                    return self._resolve()(*args, **kwargs)
+
+                def __getattr__(self, name: str) -> Any:
+                    if name.startswith("_"):
+                        raise AttributeError(name)
+                    return getattr(self._resolve(), name)
+
+                def __repr__(self) -> str:  # pragma: no cover
+                    return f"<LazyRef {self._mod}.{self._attr}>"
+
+                # Allow isinstance checks and hasattr to pass through
+                def __bool__(self) -> bool:
+                    return True
+
+            return _LazyRef(module, qualname, fn)
+
+        self._provider_services[name] = {k: _make_lazy(v) for k, v in services.items()}
+
+    def get_provider_service(self, provider: str, name: str) -> Any:
+        """Look up a single symbol from a provider's service namespace.
+
+        Returns ``None`` if the provider is not registered or the symbol
+        doesn't exist.
+        """
+        ns = self._provider_services.get(provider)
+        if ns is None:
+            return None
+        return ns.get(name)
+
+    def get_provider_namespace(self, provider: str) -> Dict[str, Any]:
+        """Return the full service namespace dict for a provider (empty dict if unregistered)."""
+        return self._provider_services.get(provider, {})
+
+
+# Module-level singleton — the one and only instance.
+registries = PluginRegistries()
@@ -406,19 +406,14 @@ def redact_sensitive_text(text: str, *, force: bool = False, code_file: bool = F
    if "eyJ" in text:
        text = _JWT_RE.sub(lambda m: _mask_token(m.group(0)), text)

-    # URL userinfo (http(s)://user:pass@host) — redact for non-DB schemes.
-    # DB schemes are handled above by _DB_CONNSTR_RE.
-    if "://" in text:
-        text = _redact_url_userinfo(text)
-
-        # URL query params containing opaque tokens (?access_token=…&code=…)
-        if "?" in text:
-            text = _redact_url_query_params(text)
-
-    # HTTP access logs can contain relative request targets with query params
-    # and no URL scheme, e.g. `"POST /hook?password=... HTTP/1.1"`.
-    if "?" in text and "=" in text and _has_http_method_substring(text):
-        text = _redact_http_request_target_query_params(text)
+    # NOTE: Web-URL redaction (query params + userinfo + HTTP access-log
+    # request targets) is intentionally OFF. Many legitimate workflows pass
+    # opaque tokens through query strings — magic-link checkouts, OAuth
+    # callbacks the agent is meant to follow, pre-signed share URLs — and
+    # blanket-redacting param values by name breaks those skills mid-flow.
+    # Known credential shapes (sk-, ghp_, JWTs, etc.) inside URLs are still
+    # caught by _PREFIX_RE and _JWT_RE above. DB connection-string passwords
+    # are still caught by _DB_CONNSTR_RE.

    # Form-urlencoded bodies (only triggers on clean k=v&k=v inputs).
    if "&" in text and "=" in text:
@@ -258,7 +258,7 @@ def emit_stream_drop(
        except Exception:
            pass
    try:
-        agent._emit_status(
+        agent._buffer_status(
            f"⚠️ {provider} stream {kind} ({type(error).__name__}){_suffix} "
            f"— reconnecting, retry {attempt}/{max_attempts}"
        )
@@ -47,9 +47,16 @@ def get_transport(api_mode: str):


 def _discover_transports() -> None:
-    """Import all transport modules to trigger auto-registration."""
+    """Import all transport modules to trigger auto-registration.
+
+    Also checks the plugin registry for transports registered by plugins
+    (e.g. anthropic_messages from the anthropic plugin, bedrock_converse
+    from the bedrock plugin).  Plugin-registered transports take priority
+    over core fallbacks when both exist.
+    """
    global _discovered
    _discovered = True
+    # Core transport modules (registered automatically — no plugin needed)
    try:
        import agent.transports.anthropic  # noqa: F401
    except ImportError:
@@ -62,7 +69,10 @@ def _discover_transports() -> None:
        import agent.transports.chat_completions  # noqa: F401
    except ImportError:
        pass
+    # Plugin-registered transports (override core fallbacks)
    try:
-        import agent.transports.bedrock  # noqa: F401
+        from agent.plugin_registries import registries
+        for api_mode, transport_cls in registries._transports.items():
+            _REGISTRY.setdefault(api_mode, transport_cls)
    except ImportError:
        pass
@@ -1,41 +1,53 @@
-"""Anthropic Messages API transport.
+"""Anthropic Messages API transport — core module.

-Delegates to the existing adapter functions in agent/anthropic_adapter.py.
-This transport owns format conversion and normalization — NOT client lifecycle.
+Owns format conversion and response normalization for the ``anthropic_messages``
+wire format.  No SDK dependency; all wire-format logic lives in
+:mod:`agent.anthropic_format`.
 """

+import json
 from typing import Any, Dict, List, Optional

+from agent.anthropic_format import (
+    build_anthropic_kwargs,
+    convert_messages_to_anthropic,
+    convert_tools_to_anthropic,
+    _to_plain_data,
+)
 from agent.transports.base import ProviderTransport
-from agent.transports.types import NormalizedResponse
+from agent.transports.types import NormalizedResponse, ToolCall


 class AnthropicTransport(ProviderTransport):
    """Transport for api_mode='anthropic_messages'.

-    Wraps the existing functions in anthropic_adapter.py behind the
-    ProviderTransport ABC.  Each method delegates — no logic is duplicated.
+    Uses core functions directly from :mod:`agent.anthropic_format` — no
+    plugin registry lookups needed.  This means core tests, bedrock tests,
+    and any other consumer of the anthropic wire format work without the
+    anthropic plugin being registered.
    """

+    _STOP_REASON_MAP = {
+        "end_turn": "stop",
+        "tool_use": "tool_calls",
+        "max_tokens": "length",
+        "stop_sequence": "stop",
+        "refusal": "content_filter",
+        "model_context_window_exceeded": "length",
+    }
+
    @property
    def api_mode(self) -> str:
        return "anthropic_messages"

    def convert_messages(self, messages: List[Dict[str, Any]], **kwargs) -> Any:
-        """Convert OpenAI messages to Anthropic (system, messages) tuple.
-
-        kwargs:
-            base_url: Optional[str] — affects thinking signature handling.
-        """
-        from agent.anthropic_adapter import convert_messages_to_anthropic
-
+        """Convert OpenAI messages to Anthropic (system, messages) tuple."""
        base_url = kwargs.get("base_url")
-        return convert_messages_to_anthropic(messages, base_url=base_url)
+        return convert_messages_to_anthropic(messages, base_url=base_url,
+                                             model=kwargs.get("model"))

    def convert_tools(self, tools: List[Dict[str, Any]]) -> Any:
        """Convert OpenAI tool schemas to Anthropic input_schema format."""
-        from agent.anthropic_adapter import convert_tools_to_anthropic
-
        return convert_tools_to_anthropic(tools)

    def build_kwargs(
@@ -45,23 +57,7 @@ class AnthropicTransport(ProviderTransport):
        tools: Optional[List[Dict[str, Any]]] = None,
        **params,
    ) -> Dict[str, Any]:
-        """Build Anthropic messages.create() kwargs.
-
-        Calls convert_messages and convert_tools internally.
-
-        params (all optional):
-            max_tokens: int
-            reasoning_config: dict | None
-            tool_choice: str | None
-            is_oauth: bool
-            preserve_dots: bool
-            context_length: int | None
-            base_url: str | None
-            fast_mode: bool
-            drop_context_1m_beta: bool
-        """
-        from agent.anthropic_adapter import build_anthropic_kwargs
-
+        """Build Anthropic messages.create() kwargs."""
        return build_anthropic_kwargs(
            model=model,
            messages=messages,
@@ -78,15 +74,7 @@ class AnthropicTransport(ProviderTransport):
        )

    def normalize_response(self, response: Any, **kwargs) -> NormalizedResponse:
-        """Normalize Anthropic response to NormalizedResponse.
-
-        Parses content blocks (text, thinking, tool_use), maps stop_reason
-        to OpenAI finish_reason, and collects reasoning_details in provider_data.
-        """
-        import json
-        from agent.anthropic_adapter import _to_plain_data
-        from agent.transports.types import ToolCall
-
+        """Normalize Anthropic response to NormalizedResponse."""
        strip_tool_prefix = kwargs.get("strip_tool_prefix", False)
        _MCP_PREFIX = "mcp_"

@@ -107,12 +95,6 @@ class AnthropicTransport(ProviderTransport):
                name = block.name
                if strip_tool_prefix and name.startswith(_MCP_PREFIX):
                    stripped = name[len(_MCP_PREFIX):]
-                    # Only strip the mcp_ prefix for OAuth-injected tools
-                    # (where Hermes adds the prefix when sending to Anthropic
-                    # and must remove it on the way back).  Native MCP server
-                    # tools (from mcp_servers: in config.yaml) are registered
-                    # in the tool registry under their FULL mcp_<server>_<tool>
-                    # name and must NOT be stripped.  GH-25255.
                    from tools.registry import registry as _tool_registry
                    if (_tool_registry.get_entry(stripped)
                            and not _tool_registry.get_entry(name)):
@@ -141,13 +123,7 @@ class AnthropicTransport(ProviderTransport):
        )

    def validate_response(self, response: Any) -> bool:
-        """Check Anthropic response structure is valid.
-
-        An empty content list is legitimate when ``stop_reason == "end_turn"``
-        — the model's canonical way of signalling "nothing more to add" after
-        a tool turn that already delivered the user-facing text. Treating it
-        as invalid falsely retries a completed response.
-        """
+        """Check Anthropic response structure is valid."""
        if response is None:
            return False
        content_blocks = getattr(response, "content", None)
@@ -168,16 +144,6 @@ class AnthropicTransport(ProviderTransport):
            return {"cached_tokens": cached, "creation_tokens": written}
        return None

-    # Promote the adapter's canonical mapping to module level so it's shared
-    _STOP_REASON_MAP = {
-        "end_turn": "stop",
-        "tool_use": "tool_calls",
-        "max_tokens": "length",
-        "stop_sequence": "stop",
-        "refusal": "content_filter",
-        "model_context_window_exceeded": "length",
-    }
-
    def map_finish_reason(self, raw_reason: str) -> str:
        """Map Anthropic stop_reason to OpenAI finish_reason."""
        return self._STOP_REASON_MAP.get(raw_reason, "stop")
@@ -83,10 +83,40 @@ _UTC_NOW = lambda: datetime.now(timezone.utc)
 # Official docs snapshot entries. Models whose published pricing and cache
 # semantics are stable enough to encode exactly.
 _OFFICIAL_DOCS_PRICING: Dict[tuple[str, str], PricingEntry] = {
+    # ── Anthropic Claude 4.8 ─────────────────────────────────────────────
+    # Same $5/$25 base pricing as 4.6/4.7.  Fast-mode variant is a separate
+    # model ID with 2x premium (vs the 6x premium on older Opus generations).
+    # Source: https://openrouter.ai/anthropic/claude-opus-4.8
+    (
+        "anthropic",
+        "claude-opus-4-8",
+    ): PricingEntry(
+        input_cost_per_million=Decimal("5.00"),
+        output_cost_per_million=Decimal("25.00"),
+        cache_read_cost_per_million=Decimal("0.50"),
+        cache_write_cost_per_million=Decimal("6.25"),
+        source="official_docs_snapshot",
+        source_url="https://platform.claude.com/docs/en/about-claude/pricing",
+        pricing_version="anthropic-pricing-2026-05",
+    ),
+    (
+        "anthropic",
+        "claude-opus-4-8-fast",
+    ): PricingEntry(
+        input_cost_per_million=Decimal("10.00"),
+        output_cost_per_million=Decimal("50.00"),
+        cache_read_cost_per_million=Decimal("1.00"),
+        cache_write_cost_per_million=Decimal("12.50"),
+        source="official_docs_snapshot",
+        source_url="https://openrouter.ai/anthropic/claude-opus-4.8-fast",
+        pricing_version="anthropic-pricing-2026-05",
+    ),
    # ── Anthropic Claude 4.7 ─────────────────────────────────────────────
    # Opus 4.5/4.6/4.7 share $5/$25 pricing (new tokenizer, up to 35% more
    # tokens for the same text).
    # Source: https://platform.claude.com/docs/en/about-claude/pricing
+    # NOTE: The anthropic plugin also registers these — plugin takes priority
+    # at runtime, but these static entries ensure costs work without the plugin.
    (
        "anthropic",
        "claude-opus-4-7",
@@ -111,7 +141,6 @@ _OFFICIAL_DOCS_PRICING: Dict[tuple[str, str], PricingEntry] = {
        source_url="https://platform.claude.com/docs/en/about-claude/pricing",
        pricing_version="anthropic-pricing-2026-05",
    ),
-    # ── Anthropic Claude 4.6 ─────────────────────────────────────────────
    (
        "anthropic",
        "claude-opus-4-6",
@@ -160,7 +189,6 @@ _OFFICIAL_DOCS_PRICING: Dict[tuple[str, str], PricingEntry] = {
        source_url="https://platform.claude.com/docs/en/about-claude/pricing",
        pricing_version="anthropic-pricing-2026-05",
    ),
-    # ── Anthropic Claude 4.5 ─────────────────────────────────────────────
    (
        "anthropic",
        "claude-opus-4-5",
@@ -197,7 +225,6 @@ _OFFICIAL_DOCS_PRICING: Dict[tuple[str, str], PricingEntry] = {
        source_url="https://platform.claude.com/docs/en/about-claude/pricing",
        pricing_version="anthropic-pricing-2026-05",
    ),
-    # ── Anthropic Claude 4 / 4.1 ─────────────────────────────────────────
    (
        "anthropic",
        "claude-opus-4-20250514",
@@ -222,7 +249,56 @@ _OFFICIAL_DOCS_PRICING: Dict[tuple[str, str], PricingEntry] = {
        source_url="https://platform.claude.com/docs/en/about-claude/pricing",
        pricing_version="anthropic-pricing-2026-05",
    ),
-    # OpenAI
+    # ── Anthropic older models (pre-4.5 generation) ────────────────────────
+    (
+        "anthropic",
+        "claude-3-5-sonnet-20241022",
+    ): PricingEntry(
+        input_cost_per_million=Decimal("3.00"),
+        output_cost_per_million=Decimal("15.00"),
+        cache_read_cost_per_million=Decimal("0.30"),
+        cache_write_cost_per_million=Decimal("3.75"),
+        source="official_docs_snapshot",
+        source_url="https://platform.claude.com/docs/en/about-claude/pricing",
+        pricing_version="anthropic-pricing-2026-05",
+    ),
+    (
+        "anthropic",
+        "claude-3-5-haiku-20241022",
+    ): PricingEntry(
+        input_cost_per_million=Decimal("0.80"),
+        output_cost_per_million=Decimal("4.00"),
+        cache_read_cost_per_million=Decimal("0.08"),
+        cache_write_cost_per_million=Decimal("1.00"),
+        source="official_docs_snapshot",
+        source_url="https://platform.claude.com/docs/en/about-claude/pricing",
+        pricing_version="anthropic-pricing-2026-05",
+    ),
+    (
+        "anthropic",
+        "claude-3-opus-20240229",
+    ): PricingEntry(
+        input_cost_per_million=Decimal("15.00"),
+        output_cost_per_million=Decimal("75.00"),
+        cache_read_cost_per_million=Decimal("1.50"),
+        cache_write_cost_per_million=Decimal("18.75"),
+        source="official_docs_snapshot",
+        source_url="https://platform.claude.com/docs/en/about-claude/pricing",
+        pricing_version="anthropic-pricing-2026-05",
+    ),
+    (
+        "anthropic",
+        "claude-3-haiku-20240307",
+    ): PricingEntry(
+        input_cost_per_million=Decimal("0.25"),
+        output_cost_per_million=Decimal("1.25"),
+        cache_read_cost_per_million=Decimal("0.03"),
+        cache_write_cost_per_million=Decimal("0.30"),
+        source="official_docs_snapshot",
+        source_url="https://platform.claude.com/docs/en/about-claude/pricing",
+        pricing_version="anthropic-pricing-2026-05",
+    ),
+    # ── OpenAI ────────────────────────────────────────────────────────────
    (
        "openai",
        "gpt-4o",
@@ -300,55 +376,6 @@ _OFFICIAL_DOCS_PRICING: Dict[tuple[str, str], PricingEntry] = {
        source_url="https://openai.com/api/pricing/",
        pricing_version="openai-pricing-2026-03-16",
    ),
-    # ── Anthropic older models (pre-4.5 generation) ────────────────────────
-    (
-        "anthropic",
-        "claude-3-5-sonnet-20241022",
-    ): PricingEntry(
-        input_cost_per_million=Decimal("3.00"),
-        output_cost_per_million=Decimal("15.00"),
-        cache_read_cost_per_million=Decimal("0.30"),
-        cache_write_cost_per_million=Decimal("3.75"),
-        source="official_docs_snapshot",
-        source_url="https://platform.claude.com/docs/en/about-claude/pricing",
-        pricing_version="anthropic-pricing-2026-05",
-    ),
-    (
-        "anthropic",
-        "claude-3-5-haiku-20241022",
-    ): PricingEntry(
-        input_cost_per_million=Decimal("0.80"),
-        output_cost_per_million=Decimal("4.00"),
-        cache_read_cost_per_million=Decimal("0.08"),
-        cache_write_cost_per_million=Decimal("1.00"),
-        source="official_docs_snapshot",
-        source_url="https://platform.claude.com/docs/en/about-claude/pricing",
-        pricing_version="anthropic-pricing-2026-05",
-    ),
-    (
-        "anthropic",
-        "claude-3-opus-20240229",
-    ): PricingEntry(
-        input_cost_per_million=Decimal("15.00"),
-        output_cost_per_million=Decimal("75.00"),
-        cache_read_cost_per_million=Decimal("1.50"),
-        cache_write_cost_per_million=Decimal("18.75"),
-        source="official_docs_snapshot",
-        source_url="https://platform.claude.com/docs/en/about-claude/pricing",
-        pricing_version="anthropic-pricing-2026-05",
-    ),
-    (
-        "anthropic",
-        "claude-3-haiku-20240307",
-    ): PricingEntry(
-        input_cost_per_million=Decimal("0.25"),
-        output_cost_per_million=Decimal("1.25"),
-        cache_read_cost_per_million=Decimal("0.03"),
-        cache_write_cost_per_million=Decimal("0.30"),
-        source="official_docs_snapshot",
-        source_url="https://platform.claude.com/docs/en/about-claude/pricing",
-        pricing_version="anthropic-pricing-2026-05",
-    ),
    # DeepSeek
    (
        "deepseek",
@@ -412,80 +439,6 @@ _OFFICIAL_DOCS_PRICING: Dict[tuple[str, str], PricingEntry] = {
        source_url="https://ai.google.dev/pricing",
        pricing_version="google-pricing-2026-03-16",
    ),
-    # AWS Bedrock — pricing per the Bedrock pricing page.
-    # Bedrock charges the same per-token rates as the model provider but
-    # through AWS billing.  These are the on-demand prices (no commitment).
-    # Source: https://aws.amazon.com/bedrock/pricing/
-    (
-        "bedrock",
-        "anthropic.claude-opus-4-6",
-    ): PricingEntry(
-        input_cost_per_million=Decimal("15.00"),
-        output_cost_per_million=Decimal("75.00"),
-        source="official_docs_snapshot",
-        source_url="https://aws.amazon.com/bedrock/pricing/",
-        pricing_version="bedrock-pricing-2026-04",
-    ),
-    (
-        "bedrock",
-        "anthropic.claude-sonnet-4-6",
-    ): PricingEntry(
-        input_cost_per_million=Decimal("3.00"),
-        output_cost_per_million=Decimal("15.00"),
-        source="official_docs_snapshot",
-        source_url="https://aws.amazon.com/bedrock/pricing/",
-        pricing_version="bedrock-pricing-2026-04",
-    ),
-    (
-        "bedrock",
-        "anthropic.claude-sonnet-4-5",
-    ): PricingEntry(
-        input_cost_per_million=Decimal("3.00"),
-        output_cost_per_million=Decimal("15.00"),
-        source="official_docs_snapshot",
-        source_url="https://aws.amazon.com/bedrock/pricing/",
-        pricing_version="bedrock-pricing-2026-04",
-    ),
-    (
-        "bedrock",
-        "anthropic.claude-haiku-4-5",
-    ): PricingEntry(
-        input_cost_per_million=Decimal("0.80"),
-        output_cost_per_million=Decimal("4.00"),
-        source="official_docs_snapshot",
-        source_url="https://aws.amazon.com/bedrock/pricing/",
-        pricing_version="bedrock-pricing-2026-04",
-    ),
-    (
-        "bedrock",
-        "amazon.nova-pro",
-    ): PricingEntry(
-        input_cost_per_million=Decimal("0.80"),
-        output_cost_per_million=Decimal("3.20"),
-        source="official_docs_snapshot",
-        source_url="https://aws.amazon.com/bedrock/pricing/",
-        pricing_version="bedrock-pricing-2026-04",
-    ),
-    (
-        "bedrock",
-        "amazon.nova-lite",
-    ): PricingEntry(
-        input_cost_per_million=Decimal("0.06"),
-        output_cost_per_million=Decimal("0.24"),
-        source="official_docs_snapshot",
-        source_url="https://aws.amazon.com/bedrock/pricing/",
-        pricing_version="bedrock-pricing-2026-04",
-    ),
-    (
-        "bedrock",
-        "amazon.nova-micro",
-    ): PricingEntry(
-        input_cost_per_million=Decimal("0.035"),
-        output_cost_per_million=Decimal("0.14"),
-        source="official_docs_snapshot",
-        source_url="https://aws.amazon.com/bedrock/pricing/",
-        pricing_version="bedrock-pricing-2026-04",
-    ),
    # MiniMax
    (
        "minimax",
@@ -553,36 +506,27 @@ def resolve_billing_route(
    return BillingRoute(provider=provider_name or "unknown", model=model.split("/")[-1] if model else "", base_url=base_url or "", billing_mode="unknown")


-def _normalize_anthropic_model_name(model: str) -> str:
-    """Normalize Anthropic model name variants to canonical form.
-
-    Handles:
-      - Dot notation: claude-opus-4.7 → claude-opus-4-7
-      - Short aliases: claude-opus-4.7 → claude-opus-4-7
-      - Strips anthropic/ prefix if present
-    """
-    name = model.lower().strip()
-    if name.startswith("anthropic/"):
-        name = name[len("anthropic/"):]
-    # Normalize dots to dashes in version numbers (e.g. 4.7 → 4-7, 4.6 → 4-6)
-    # But preserve the rest of the name structure
-    name = re.sub(r"(\d+)\.(\d+)", r"\1-\2", name)
-    return name
-
-
 def _lookup_official_docs_pricing(route: BillingRoute) -> Optional[PricingEntry]:
    model = route.model.lower()
-    # Direct lookup first
+
+    # ── Plugin-registered pricing entries take priority ──
+    from agent.plugin_registries import registries as _preg
+    plugin_entry = _preg.get_pricing_entry(route.provider, model)
+    if plugin_entry:
+        return plugin_entry
+    # Try provider-specific name normalization via registry
+    _norm = _preg.get_provider_service(route.provider, "normalize_model_name")
+    if _norm is not None:
+        normalized = _norm(model)
+        if normalized != model:
+            plugin_entry = _preg.get_pricing_entry(route.provider, normalized)
+            if plugin_entry:
+                return plugin_entry
+
+    # Fall back to static dict
    entry = _OFFICIAL_DOCS_PRICING.get((route.provider, model))
    if entry:
        return entry
-    # Try normalized name for Anthropic (handles dot-notation like opus-4.7)
-    if route.provider == "anthropic":
-        normalized = _normalize_anthropic_model_name(model)
-        if normalized != model:
-            entry = _OFFICIAL_DOCS_PRICING.get((route.provider, normalized))
-            if entry:
-                return entry
    return None


@@ -61,14 +61,14 @@ from typing import Any, Dict, List


 class WebSearchProvider(abc.ABC):
-    """Abstract base class for a web search/extract/crawl backend.
+    """Abstract base class for a web search/extract backend.

    Subclasses must implement :meth:`is_available` and at least one of
-    :meth:`search` / :meth:`extract` / :meth:`crawl`. The
-    :meth:`supports_search` / :meth:`supports_extract` / :meth:`supports_crawl`
-    capability flags let the registry route each tool call to the right
-    provider, and let multi-capability providers (Firecrawl, Tavily, Exa,
-    …) advertise multiple capabilities from a single class.
+    :meth:`search` / :meth:`extract`. The :meth:`supports_search` /
+    :meth:`supports_extract` capability flags let the registry route each
+    tool call to the right provider, and let multi-capability providers
+    (Firecrawl, Tavily, Exa, …) advertise multiple capabilities from a
+    single class.
    """

    @property
@@ -113,22 +113,6 @@ class WebSearchProvider(abc.ABC):
        """
        return False

-    def supports_crawl(self) -> bool:
-        """Return True if this provider implements :meth:`crawl`.
-
-        Crawl differs from extract in that the agent provides a *seed URL*
-        and the provider walks linked pages on its own — useful for
-        documentation sites where the agent doesn't know all relevant
-        URLs upfront. Tavily is the only built-in backend that natively
-        crawls today; Firecrawl provides a similar capability that we
-        don't currently surface as a tool.
-
-        Providers that don't crawl should leave this as False; the
-        dispatcher in :func:`tools.web_tools.web_crawl_tool` will fall
-        back to its auxiliary-model summarization path.
-        """
-        return False
-
    def search(self, query: str, limit: int = 5) -> Dict[str, Any]:
        """Execute a web search.

@@ -173,26 +157,6 @@ class WebSearchProvider(abc.ABC):
            f"{self.name} does not support extract (override supports_extract)"
        )

-    def crawl(self, url: str, **kwargs: Any) -> Any:
-        """Crawl a seed URL and return results.
-
-        Override when :meth:`supports_crawl` returns True. The default
-        raises NotImplementedError; callers should gate on
-        :meth:`supports_crawl` before calling.
-
-        Return shape: ``{"results": [{"url": str, "title": str,
-        "content": str, ...}, ...]}`` matching what
-        :func:`tools.web_tools.web_crawl_tool` post-processing expects.
-
-        Implementations MAY be ``async def``.
-
-        ``kwargs`` may carry forward-compat fields (e.g. ``max_depth``,
-        ``include_domains``) — implementations should ignore unknown keys.
-        """
-        raise NotImplementedError(
-            f"{self.name} does not support crawl (override supports_crawl)"
-        )
-
    def get_setup_schema(self) -> Dict[str, Any]:
        """Return provider metadata for the ``hermes tools`` picker.

@@ -11,7 +11,7 @@ Active selection
 ----------------
 The active provider is chosen by configuration with this precedence:

-1. ``web.search_backend`` / ``web.extract_backend`` / ``web.crawl_backend``
+1. ``web.search_backend`` / ``web.extract_backend``
   (per-capability override).
 2. ``web.backend`` (shared fallback).
 3. If exactly one capability-eligible provider is registered AND available,
@@ -24,10 +24,10 @@ The active provider is chosen by configuration with this precedence:
 5. Otherwise ``None`` — the tool surfaces a helpful error pointing at
   ``hermes tools``.

-The capability filter (``supports_search`` / ``supports_extract`` /
-``supports_crawl``) is applied at every step so a search-only provider
-(``brave-free``) configured as ``web.extract_backend`` correctly falls
-through to an extract-capable backend.
+The capability filter (``supports_search`` / ``supports_extract``) is
+applied at every step so a search-only provider (``brave-free``)
+configured as ``web.extract_backend`` correctly falls through to an
+extract-capable backend.
 """

 from __future__ import annotations
@@ -131,7 +131,7 @@ _LEGACY_PREFERENCE = (


 def _resolve(configured: Optional[str], *, capability: str) -> Optional[WebSearchProvider]:
-    """Resolve the active provider for a capability ("search" | "extract" | "crawl").
+    """Resolve the active provider for a capability ("search" | "extract").

    Resolution rules (in order):

@@ -168,8 +168,6 @@ def _resolve(configured: Optional[str], *, capability: str) -> Optional[WebSearc
            return bool(p.supports_search())
        if capability == "extract":
            return bool(p.supports_extract())
-        if capability == "crawl":
-            return bool(p.supports_crawl())
        return False

    def _is_available_safe(p: WebSearchProvider) -> bool:
@@ -241,21 +239,6 @@ def get_active_extract_provider() -> Optional[WebSearchProvider]:
    return _resolve(explicit, capability="extract")


-def get_active_crawl_provider() -> Optional[WebSearchProvider]:
-    """Resolve the currently-active web crawl provider.
-
-    Reads ``web.crawl_backend`` (preferred) or ``web.backend`` (shared
-    fallback) from config.yaml; falls back per the module docstring.
-
-    Crawl is a niche capability — among built-in providers only Tavily and
-    Firecrawl implement it. Callers should expect ``None`` and fall back to
-    a different strategy (e.g. summarize-via-LLM) when neither is
-    configured.
-    """
-    explicit = _read_config_key("web", "crawl_backend") or _read_config_key("web", "backend")
-    return _resolve(explicit, capability="crawl")
-
-
 def _reset_for_tests() -> None:
    """Clear the registry. **Test-only.**"""
    with _lock:
@@ -1,40 +0,0 @@
-# Rust / Cargo
-/src-tauri/target/
-/src-tauri/Cargo.lock
-
-# Vite / build output
-/dist/
-/dist-ssr/
-*.local
-
-# TypeScript build info + tsc emit (we don't ship .js for the
-# vite.config.ts; Vite reads it directly via ts-node-style loader).
-*.tsbuildinfo
-vite.config.d.ts
-vite.config.js
-
-# Tauri generated artifacts (regenerated on each build)
-/src-tauri/gen/schemas/
-
-# Logs
-*.log
-npm-debug.log*
-yarn-debug.log*
-yarn-error.log*
-
-# Editor
-.vscode/*
-!.vscode/extensions.json
-.idea/
-.DS_Store
-*.suo
-*.ntvs*
-*.njsproj
-*.sln
-*.sw?
-
-# Node
-node_modules/
-
-# Internal placeholder (re-create if needed)
-.tauri-note
@@ -1,12 +0,0 @@
-<!doctype html>
-<html lang="en" class="h-full">
-  <head>
-    <meta charset="UTF-8" />
-    <meta name="viewport" content="width=device-width, initial-scale=1.0" />
-    <title>Hermes Setup</title>
-  </head>
-  <body class="h-full antialiased">
-    <div id="root" class="h-full"></div>
-    <script type="module" src="/src/main.tsx"></script>
-  </body>
-</html>
@@ -1,46 +0,0 @@
-{
-  "name": "@hermes/bootstrap-installer",
-  "private": true,
-  "version": "0.0.1",
-  "description": "Hermes Setup — signed installer that drives scripts/install.ps1 with a polished native UI.",
-  "type": "module",
-  "scripts": {
-    "dev": "vite --host 127.0.0.1 --port 5175",
-    "build": "tsc -b && vite build",
-    "preview": "vite preview",
-    "tauri": "tauri",
-    "tauri:dev": "tauri dev",
-    "tauri:build": "tauri build",
-    "tauri:build:debug": "tauri build --debug"
-  },
-  "dependencies": {
-    "@nous-research/ui": "0.16.0",
-    "@tailwindcss/vite": "^4.2.1",
-    "@tailwindcss/typography": "^0.5.19",
-    "@tauri-apps/api": "^2.0.0",
-    "@tauri-apps/plugin-dialog": "^2.0.0",
-    "@tauri-apps/plugin-opener": "^2.0.0",
-    "@tauri-apps/plugin-process": "^2.0.0",
-    "@tauri-apps/plugin-shell": "^2.0.0",
-    "@vscode/codicons": "^0.0.45",
-    "class-variance-authority": "^0.7.1",
-    "clsx": "^2.1.1",
-    "katex": "^0.16.45",
-    "lucide-react": "^0.577.0",
-    "nanostores": "^1.3.0",
-    "radix-ui": "^1.4.3",
-    "react": "^19.2.4",
-    "react-dom": "^19.2.4",
-    "tailwind-merge": "^3.5.0",
-    "tailwindcss": "^4.2.1",
-    "tw-shimmer": "^0.4.11"
-  },
-  "devDependencies": {
-    "@tauri-apps/cli": "^2.0.0",
-    "@types/react": "^19.2.14",
-    "@types/react-dom": "^19.2.3",
-    "@vitejs/plugin-react": "^5.2.0",
-    "typescript": "~5.9.3",
-    "vite": "^7.3.1"
-  }
-}
@@ -1,75 +0,0 @@
-[package]
-name = "hermes-bootstrap"
-version = "0.0.1"
-description = "Hermes Setup — signed installer that drives scripts/install.ps1"
-authors = ["Nous Research <info@nousresearch.com>"]
-edition = "2021"
-rust-version = "1.77"
-
-# Rename the output binary so the distributed artifact is literally
-# `Hermes-Setup.exe` on disk — not `hermes-bootstrap.exe`. Grandma sees
-# what we hand her, period. Tauri honors [[bin]] over [package].name
-# for the produced executable name.
-[[bin]]
-name = "Hermes-Setup"
-path = "src/main.rs"
-
-# The library target name MUST match the `withGlobalTauri` binding name that
-# tauri.conf.json's `app.windows[].label` references. We don't ship a separate
-# lib for now; everything is in src/.
-[lib]
-name = "hermes_bootstrap_lib"
-crate-type = ["staticlib", "cdylib", "rlib"]
-
-[build-dependencies]
-tauri-build = { version = "2", features = [] }
-
-[dependencies]
-# Tauri runtime + plugins
-tauri = { version = "2", features = [] }
-tauri-plugin-dialog = "2"
-tauri-plugin-opener = "2"
-tauri-plugin-process = "2"
-tauri-plugin-shell = "2"
-
-# Async + IO
-tokio = { version = "1", features = ["full"] }
-futures = "0.3"
-
-# Serialization
-serde = { version = "1", features = ["derive"] }
-serde_json = "1"
-
-# HTTP — rustls so we don't need OpenSSL on the build box
-reqwest = { version = "0.12", default-features = false, features = ["rustls-tls", "stream"] }
-
-# Logging — emitted to a file under HERMES_HOME/logs/ and (optionally) the
-# webview console via Tauri's event channel.
-tracing = "0.1"
-tracing-subscriber = { version = "0.3", features = ["env-filter", "fmt"] }
-tracing-appender = "0.2"
-
-# Paths + utils
-dirs = "5"
-which = "6"
-anyhow = "1"
-thiserror = "1"
-once_cell = "1"
-uuid = { version = "1", features = ["v4"] }
-
-# Process control on Windows (CREATE_NO_WINDOW etc.)
-[target.'cfg(windows)'.dependencies]
-windows-sys = { version = "0.59", features = [
-    "Win32_Foundation",
-    "Win32_System_Threading",
-    "Win32_System_Console",
-    "Win32_UI_WindowsAndMessaging",
-] }
-
-[profile.release]
-# A 5-10MB signed installer is the goal. LTO + size-opt + single codegen unit.
-panic = "abort"
-codegen-units = 1
-lto = true
-opt-level = "s"
-strip = true
@@ -1,150 +0,0 @@
-use std::process::Command;
-
-fn main() {
-    // -----------------------------------------------------------------
-    // Bake the install.ps1 pin into the binary at compile time.
-    //
-    // BUILD_PIN_COMMIT and BUILD_PIN_BRANCH are read by bootstrap.rs's
-    // `option_env!()` macro to default the install-script reference.
-    // Precedence (matches install.ps1's own arg precedence): commit > branch.
-    //
-    // Resolution order:
-    //   1. Env var override at build time (HERMES_BUILD_PIN_COMMIT, etc.).
-    //      Useful for CI builds that want to pin to a tagged release SHA
-    //      rather than whatever the checkout's HEAD happens to be.
-    //   2. `git rev-parse HEAD` + `git rev-parse --abbrev-ref HEAD` against
-    //      the repo this build.rs lives in. Default for `cargo tauri build`
-    //      from a dev machine — pins the produced .exe to your current
-    //      checkout state.
-    //   3. Last-resort fallback: hardcoded `main` branch, no commit. The
-    //      installer will fetch HEAD-of-main at runtime. Used when the
-    //      build is happening outside a git checkout (e.g. cargo install
-    //      from a packaged crate, unlikely for this binary but defensive).
-    //
-    // Build script reruns on git HEAD change so a new commit triggers
-    // a rebuild without `cargo clean`.
-    // -----------------------------------------------------------------
-
-    let commit = resolve_commit_pin();
-    let branch = resolve_branch_pin();
-
-    if let Some(c) = &commit {
-        println!("cargo:rustc-env=BUILD_PIN_COMMIT={c}");
-        println!("cargo:warning=hermes-bootstrap: pinning to commit {}", short(c));
-    }
-    if let Some(b) = &branch {
-        println!("cargo:rustc-env=BUILD_PIN_BRANCH={b}");
-        println!("cargo:warning=hermes-bootstrap: pinning to branch {b}");
-    }
-    if commit.is_none() && branch.is_none() {
-        // Fail loudly rather than silently produce a binary that errors
-        // at runtime with "no install-script pin supplied". A build that
-        // can't resolve a pin almost certainly indicates a misconfigured
-        // build environment.
-        println!(
-            "cargo:warning=hermes-bootstrap: no pin resolved at build time; binary will fail at runtime without HERMES_SETUP_DEV_REPO_ROOT or runtime args"
-        );
-    }
-
-    // Rerun build.rs when HEAD moves so successive builds pick up new
-    // commits without needing `cargo clean`. .git/HEAD changes on every
-    // commit / branch switch / rebase.
-    let git_dir = locate_git_dir();
-    if let Some(gd) = &git_dir {
-        println!("cargo:rerun-if-changed={}/HEAD", gd.display());
-        // .git/HEAD often points at a ref (e.g. `ref: refs/heads/bb/gui`);
-        // also watch the ref itself so a new commit on the same branch
-        // re-triggers.
-        if let Ok(head) = std::fs::read_to_string(gd.join("HEAD")) {
-            if let Some(rest) = head.trim().strip_prefix("ref: ") {
-                println!("cargo:rerun-if-changed={}/{}", gd.display(), rest);
-            }
-        }
-    }
-    println!("cargo:rerun-if-env-changed=HERMES_BUILD_PIN_COMMIT");
-    println!("cargo:rerun-if-env-changed=HERMES_BUILD_PIN_BRANCH");
-
-    // -----------------------------------------------------------------
-    // Tauri windows manifest. See hermes-setup.manifest for rationale —
-    // declares level="asInvoker" so Windows's installer-detection
-    // heuristic doesn't refuse to launch us without UAC elevation.
-    // -----------------------------------------------------------------
-    #[cfg(target_os = "windows")]
-    let attrs = {
-        let manifest = include_str!("hermes-setup.manifest");
-        let win = tauri_build::WindowsAttributes::new().app_manifest(manifest);
-        tauri_build::Attributes::new().windows_attributes(win)
-    };
-
-    #[cfg(not(target_os = "windows"))]
-    let attrs = tauri_build::Attributes::new();
-
-    tauri_build::try_build(attrs).expect("failed to run tauri-build");
-}
-
-fn resolve_commit_pin() -> Option<String> {
-    if let Ok(v) = std::env::var("HERMES_BUILD_PIN_COMMIT") {
-        if !v.trim().is_empty() {
-            return Some(v.trim().to_string());
-        }
-    }
-    let out = Command::new("git")
-        .args(["rev-parse", "HEAD"])
-        .output()
-        .ok()?;
-    if !out.status.success() {
-        return None;
-    }
-    let s = String::from_utf8(out.stdout).ok()?.trim().to_string();
-    if s.is_empty() {
-        None
-    } else {
-        Some(s)
-    }
-}
-
-fn resolve_branch_pin() -> Option<String> {
-    if let Ok(v) = std::env::var("HERMES_BUILD_PIN_BRANCH") {
-        if !v.trim().is_empty() {
-            return Some(v.trim().to_string());
-        }
-    }
-    let out = Command::new("git")
-        .args(["rev-parse", "--abbrev-ref", "HEAD"])
-        .output()
-        .ok()?;
-    if !out.status.success() {
-        return None;
-    }
-    let s = String::from_utf8(out.stdout).ok()?.trim().to_string();
-    // "HEAD" is what you get on a detached checkout — no meaningful branch
-    // to pin to. The commit pin still applies; just don't emit a branch.
-    if s.is_empty() || s == "HEAD" {
-        None
-    } else {
-        Some(s)
-    }
-}
-
-fn locate_git_dir() -> Option<std::path::PathBuf> {
-    let out = Command::new("git")
-        .args(["rev-parse", "--git-dir"])
-        .output()
-        .ok()?;
-    if !out.status.success() {
-        return None;
-    }
-    let s = String::from_utf8(out.stdout).ok()?.trim().to_string();
-    if s.is_empty() {
-        return None;
-    }
-    Some(std::path::PathBuf::from(s))
-}
-
-fn short(commit: &str) -> &str {
-    if commit.len() >= 12 {
-        &commit[..12]
-    } else {
-        commit
-    }
-}
@@ -1,16 +0,0 @@
-{
-  "$schema": "https://schema.tauri.app/config/2/capability",
-  "identifier": "default",
-  "description": "Capabilities required by Hermes Setup. Narrowly scoped: we don't write user files outside HERMES_HOME, we don't read arbitrary paths, and the only external network call goes through reqwest (Rust side, not exposed to the webview).",
-  "windows": ["main"],
-  "permissions": [
-    "core:default",
-    "core:window:allow-close",
-    "core:window:allow-minimize",
-    "core:event:default",
-    "opener:default",
-    "dialog:default",
-    "process:default",
-    "shell:default"
-  ]
-}
@@ -1,75 +0,0 @@
-<?xml version="1.0" encoding="UTF-8" standalone="yes"?>
-<!--
-    Hermes Setup application manifest.
-
-    The TL;DR: tell Windows we are NOT an installer in the classic "needs
-    UAC elevation" sense, despite the product name. We provision into
-    %LOCALAPPDATA%\hermes which is user-scoped and never touch HKLM or
-    Program Files. install.ps1 runs as a child process and elevates
-    itself only if a future stage explicitly needs HKLM access.
-
-    Without this manifest, the "Hermes Setup" productName embedded in
-    the binary's resource trips Windows's installer-detection heuristic
-    (https://learn.microsoft.com/en-us/windows/security/identity-protection/
-    user-account-control/how-user-account-control-works#installer-detection)
-    and CreateProcess fails with ERROR_ELEVATION_REQUIRED (740) when the
-    user double-clicks. asInvoker disables that.
-->
-<assembly xmlns="urn:schemas-microsoft-com:asm.v1" manifestVersion="1.0">
-    <assemblyIdentity
-        version="0.0.1.0"
-        processorArchitecture="*"
-        name="NousResearch.Hermes.Setup"
-        type="win32"
-    />
-    <description>Hermes Setup</description>
-
-    <trustInfo xmlns="urn:schemas-microsoft-com:asm.v3">
-        <security>
-            <requestedPrivileges>
-                <requestedExecutionLevel level="asInvoker" uiAccess="false"/>
-            </requestedPrivileges>
-        </security>
-    </trustInfo>
-
-    <!-- Tell Windows we know about all supported OSes (10 + 11) so it
-         doesn't shim us into Vista-compat mode. -->
-    <compatibility xmlns="urn:schemas-microsoft-com:compatibility.v1">
-        <application>
-            <!-- Windows 10 / 11 -->
-            <supportedOS Id="{8e0f7a12-bfb3-4fe8-b9a5-48fd50a15a9a}"/>
-            <!-- Windows 8.1 -->
-            <supportedOS Id="{1f676c76-80e1-4239-95bb-83d0f6d0da78}"/>
-            <!-- Windows 8 -->
-            <supportedOS Id="{4a2f28e3-53b9-4441-ba9c-d69d4a4a6e38}"/>
-            <!-- Windows 7 -->
-            <supportedOS Id="{35138b9a-5d96-4fbd-8e2d-a2440225f93a}"/>
-            <!-- Windows Vista -->
-            <supportedOS Id="{e2011457-1546-43c5-a5fe-008deee3d3f0}"/>
-        </application>
-    </compatibility>
-
-    <!-- Per-monitor v2 DPI awareness so the installer doesn't go blurry
-         on high-DPI displays when dragged between monitors. -->
-    <application xmlns="urn:schemas-microsoft-com:asm.v3">
-        <windowsSettings>
-            <dpiAwareness xmlns="http://schemas.microsoft.com/SMI/2016/WindowsSettings">PerMonitorV2</dpiAwareness>
-            <activeCodePage xmlns="http://schemas.microsoft.com/SMI/2019/WindowsSettings">UTF-8</activeCodePage>
-        </windowsSettings>
-    </application>
-
-    <!-- Use the modern common controls (v6 themes). Without this, our
-         file picker / shell dialogs fall back to 1990s-era visuals. -->
-    <dependency>
-        <dependentAssembly>
-            <assemblyIdentity
-                type="win32"
-                name="Microsoft.Windows.Common-Controls"
-                version="6.0.0.0"
-                processorArchitecture="*"
-                publicKeyToken="6595b64144ccf1df"
-                language="*"
-            />
-        </dependentAssembly>
-    </dependency>
-</assembly>
@@ -1,700 +0,0 @@
-//! Bootstrap orchestration.
-//!
-//! Direct port of `runBootstrap` from `apps/desktop/electron/bootstrap-runner.cjs`.
-//! Drives install.ps1 / install.sh stage-by-stage, emits progress events
-//! over the Tauri `bootstrap` channel, writes a forensic log to
-//! HERMES_HOME/logs/bootstrap-<timestamp>.log.
-//!
-//! Lifecycle:
-//!   1. `start_bootstrap` (Tauri command) → spawns the worker task.
-//!   2. Worker resolves install script (dev/cache/download).
-//!   3. Worker calls `install.ps1 -Manifest` → emits `manifest` event.
-//!   4. Worker iterates stages, calling `install.ps1 -Stage NAME -NonInteractive -Json`.
-//!   5. On success → `complete`. On any stage failure → `failed`. On cancel → `failed`.
-
-use std::path::PathBuf;
-use std::sync::Arc;
-use std::time::Instant;
-
-use anyhow::{anyhow, Result};
-use serde::{Deserialize, Serialize};
-use tauri::{AppHandle, Emitter, State};
-use tokio::sync::{mpsc, Mutex};
-
-use crate::events::{BootstrapEvent, Manifest, StageState};
-use crate::install_script::{self, Pin, ScriptKind, ScriptSource};
-use crate::powershell::{self, StreamSink};
-use crate::AppState;
-
-// ---------------------------------------------------------------------------
-// Public Tauri commands
-// ---------------------------------------------------------------------------
-
-/// Frontend → Rust: kick off the install.
-#[derive(Debug, Deserialize)]
-pub struct StartBootstrapArgs {
-    /// Optional override for the commit pin. Defaults to the build-time
-    /// pin baked in via `BUILD_PIN_COMMIT`.
-    pub commit: Option<String>,
-    /// Optional override for the branch pin. Defaults to `BUILD_PIN_BRANCH`.
-    pub branch: Option<String>,
-    /// Include Stage-Desktop (build apps/desktop) in the manifest. The
-    /// signed bootstrap installer passes true; the deprecated Electron-side
-    /// bootstrap-runner passes false to avoid building-while-running.
-    #[serde(default = "default_true")]
-    pub include_desktop: bool,
-    /// Optional override for HERMES_HOME. Tests use this; production
-    /// almost always falls back to the OS default.
-    pub hermes_home: Option<String>,
-}
-
-fn default_true() -> bool {
-    true
-}
-
-#[derive(Debug, Serialize)]
-pub struct BootstrapStatus {
-    pub running: bool,
-    pub completed: bool,
-    pub install_root: Option<String>,
-    pub last_error: Option<String>,
-}
-
-/// Handle stored in AppState while a bootstrap run is in flight. Carries
-/// the cancellation channel and the most recent terminal status so the
-/// frontend can re-query after a window refresh.
-pub struct BootstrapHandle {
-    pub cancel_tx: mpsc::Sender<()>,
-    pub started_at: Instant,
-    pub status: BootstrapStatus,
-}
-
-#[tauri::command]
-pub async fn start_bootstrap(
-    app: AppHandle,
-    state: State<'_, Arc<AppState>>,
-    args: StartBootstrapArgs,
-) -> Result<(), String> {
-    let mut guard = state.bootstrap.lock().await;
-    if let Some(h) = guard.as_ref() {
-        if h.status.running {
-            return Err("Bootstrap is already running".into());
-        }
-    }
-
-    let (cancel_tx, cancel_rx) = mpsc::channel::<()>(1);
-    let handle = BootstrapHandle {
-        cancel_tx,
-        started_at: Instant::now(),
-        status: BootstrapStatus {
-            running: true,
-            completed: false,
-            install_root: None,
-            last_error: None,
-        },
-    };
-    *guard = Some(handle);
-    drop(guard);
-
-    let app_for_task = app.clone();
-    let state_for_task = state.inner().clone();
-    let args_for_task = args;
-    let cancel_rx = Arc::new(Mutex::new(Some(cancel_rx)));
-
-    tokio::spawn(async move {
-        let result = run_bootstrap(app_for_task.clone(), args_for_task, cancel_rx).await;
-
-        // Reflect terminal state into AppState so get_bootstrap_status()
-        // can serve it after the task exits.
-        let mut guard = state_for_task.bootstrap.lock().await;
-        if let Some(h) = guard.as_mut() {
-            h.status.running = false;
-            match &result {
-                Ok(install_root) => {
-                    h.status.completed = true;
-                    h.status.install_root = Some(install_root.clone());
-                    h.status.last_error = None;
-                }
-                Err(err) => {
-                    h.status.completed = false;
-                    h.status.last_error = Some(err.to_string());
-                }
-            }
-        }
-    });
-
-    Ok(())
-}
-
-#[tauri::command]
-pub async fn cancel_bootstrap(state: State<'_, Arc<AppState>>) -> Result<(), String> {
-    let guard = state.bootstrap.lock().await;
-    if let Some(h) = guard.as_ref() {
-        let _ = h.cancel_tx.try_send(());
-    }
-    Ok(())
-}
-
-#[tauri::command]
-pub async fn get_bootstrap_status(
-    state: State<'_, Arc<AppState>>,
-) -> Result<BootstrapStatus, String> {
-    let guard = state.bootstrap.lock().await;
-    Ok(match guard.as_ref() {
-        Some(h) => BootstrapStatus {
-            running: h.status.running,
-            completed: h.status.completed,
-            install_root: h.status.install_root.clone(),
-            last_error: h.status.last_error.clone(),
-        },
-        None => BootstrapStatus {
-            running: false,
-            completed: false,
-            install_root: None,
-            last_error: None,
-        },
-    })
-}
-
-/// Spawn the locally-built Hermes desktop binary, then close the installer
-/// window. Caller resolves the binary path from `install_root`.
-///
-/// Returns Err with a human-readable message if the binary doesn't exist
-/// (e.g. when Stage-Desktop was skipped) so the frontend can present
-/// actionable failure UI rather than silently doing nothing.
-#[tauri::command]
-pub async fn launch_hermes_desktop(
-    app: AppHandle,
-    install_root: String,
-) -> Result<(), String> {
-    let install_root = PathBuf::from(install_root);
-    let exe_path = resolve_hermes_desktop_exe(&install_root).ok_or_else(|| {
-        format!(
-            "Couldn't find a built Hermes desktop at {}. The desktop build step \
-             may have been skipped or failed. Run `hermes desktop` from a \
-             terminal to build and launch it.",
-            install_root.join("apps").join("desktop").join("release").display()
-        )
-    })?;
-
-    tracing::info!(?exe_path, "launching Hermes desktop");
-
-    // Detach from us — the installer is about to exit.
-    let mut cmd = tokio::process::Command::new(&exe_path);
-    cmd.current_dir(exe_path.parent().unwrap_or(&install_root));
-    #[cfg(target_os = "windows")]
-    {
-        use std::os::windows::process::CommandExt;
-        // DETACHED_PROCESS = 0x00000008
-        cmd.creation_flags(0x0000_0008);
-    }
-
-    cmd.spawn().map_err(|e| {
-        format!(
-            "failed to launch {}: {e}",
-            exe_path.display()
-        )
-    })?;
-
-    // Give Windows ~150ms to actually start the new process before we exit.
-    tokio::time::sleep(std::time::Duration::from_millis(150)).await;
-
-    // Exit the installer cleanly. Tauri's process plugin gives us the
-    // right hook regardless of platform.
-    app.exit(0);
-    Ok(())
-}
-
-/// Walks the well-known electron-builder unpacked-app paths under
-/// `install_root`. Mirrors the resolver in `cmd_gui` (apps/desktop/release/
-/// <os>-unpacked/<exe>).
-fn resolve_hermes_desktop_exe(install_root: &std::path::Path) -> Option<PathBuf> {
-    let release_dir = install_root.join("apps").join("desktop").join("release");
-    let candidates: &[(&str, &str)] = if cfg!(target_os = "windows") {
-        &[
-            ("win-unpacked", "Hermes.exe"),
-            ("win-arm64-unpacked", "Hermes.exe"),
-        ]
-    } else if cfg!(target_os = "macos") {
-        &[
-            ("mac/Hermes.app/Contents/MacOS", "Hermes"),
-            ("mac-arm64/Hermes.app/Contents/MacOS", "Hermes"),
-        ]
-    } else {
-        &[("linux-unpacked", "hermes")]
-    };
-    for (subdir, exe) in candidates {
-        let p = release_dir.join(subdir).join(exe);
-        if p.exists() {
-            return Some(p);
-        }
-    }
-    None
-}
-
-// ---------------------------------------------------------------------------
-// Bootstrap implementation
-// ---------------------------------------------------------------------------
-
-async fn run_bootstrap(
-    app: AppHandle,
-    args: StartBootstrapArgs,
-    cancel_rx_holder: Arc<Mutex<Option<mpsc::Receiver<()>>>>,
-) -> Result<String> {
-    let kind = ScriptKind::for_current_os();
-
-    let pin = Pin {
-        commit: args.commit.or_else(|| option_env_string("BUILD_PIN_COMMIT")),
-        branch: args.branch.or_else(|| option_env_string("BUILD_PIN_BRANCH")),
-    };
-
-    tracing::info!(
-        ?pin,
-        kind = ?kind,
-        include_desktop = args.include_desktop,
-        "bootstrap starting"
-    );
-
-    let app_for_log = app.clone();
-    let emit_log = move |line: &str| {
-        emit_event(
-            &app_for_log,
-            BootstrapEvent::Log {
-                stage: None,
-                line: line.to_string(),
-            },
-        );
-        // Bump to info-level so the line shows in bootstrap-installer.log
-        // under the default INFO filter. Previously this was debug! which
-        // got dropped on the floor, leaving us blind whenever install.ps1
-        // failed — the log only had the "bootstrap starting" banner.
-        tracing::info!(target: "bootstrap.log", "{line}");
-    };
-
-    // 1. Resolve install.ps1
-    let script = install_script::resolve(kind, &pin, &emit_log)
-        .await
-        .map_err(|e| {
-            let msg = format!("resolve install script failed: {e:#}");
-            emit_event(
-                &app,
-                BootstrapEvent::Failed {
-                    stage: None,
-                    error: msg.clone(),
-                },
-            );
-            anyhow!(msg)
-        })?;
-
-    let source_note = match &script.source {
-        ScriptSource::DevCheckout => "dev checkout",
-        ScriptSource::Bundled => "bundled",
-        ScriptSource::Cached => "cached",
-        ScriptSource::Downloaded => "downloaded",
-    };
-    emit_log(&format!(
-        "[bootstrap] script {} via {}",
-        script.path.display(),
-        source_note
-    ));
-
-    // 2. Fetch manifest
-    //
-    // -IncludeDesktop MUST be passed to the manifest call too — install.ps1
-    // gates the desktop stage inclusion on this flag, so without it here
-    // the manifest comes back missing the desktop stage and we never run
-    // it. The per-stage call below also passes -IncludeDesktop to keep
-    // the contracts identical.
-    let manifest_args = build_pin_args(&script);
-    let mut manifest_args_full = vec!["-Manifest".to_string()];
-    manifest_args_full.extend(manifest_args.clone());
-    if args.include_desktop {
-        manifest_args_full.push("-IncludeDesktop".to_string());
-    }
-
-    let manifest_result = run_install_script(
-        &app,
-        &script.path,
-        &manifest_args_full,
-        args.hermes_home.as_deref(),
-        None,
-        Some("__manifest__".to_string()),
-    )
-    .await?;
-
-    if manifest_result.exit_code != Some(0) {
-        let err = format!(
-            "install.ps1 -Manifest failed: exit {:?}\n{}",
-            manifest_result.exit_code,
-            manifest_result.stderr.trim()
-        );
-        emit_event(
-            &app,
-            BootstrapEvent::Failed {
-                stage: None,
-                error: err.clone(),
-            },
-        );
-        return Err(anyhow!(err));
-    }
-
-    let manifest: Manifest = powershell::parse_manifest(&manifest_result.stdout).ok_or_else(|| {
-        let err = format!(
-            "install.ps1 -Manifest produced no parseable JSON payload\n{}",
-            truncate(&manifest_result.stdout, 4000)
-        );
-        emit_event(
-            &app,
-            BootstrapEvent::Failed {
-                stage: None,
-                error: err.clone(),
-            },
-        );
-        anyhow!(err)
-    })?;
-
-    emit_event(
-        &app,
-        BootstrapEvent::Manifest {
-            stages: manifest.stages.clone(),
-            protocol_version: manifest.protocol_version,
-        },
-    );
-
-    // 3. Iterate stages.
-    for stage in &manifest.stages {
-        // Skip Stage-Desktop unless explicitly requested. install.ps1 may
-        // or may not include it in the manifest depending on the flag we
-        // pass, but if it slipped in, gate client-side too.
-        if !args.include_desktop && stage.name.eq_ignore_ascii_case("desktop") {
-            emit_event(
-                &app,
-                BootstrapEvent::Stage {
-                    name: stage.name.clone(),
-                    state: StageState::Skipped,
-                    duration_ms: Some(0),
-                    result: None,
-                    error: Some("skipped by include_desktop=false".into()),
-                },
-            );
-            continue;
-        }
-
-        if cancellation_signalled(&cancel_rx_holder).await {
-            let err = "bootstrap cancelled by user".to_string();
-            emit_event(
-                &app,
-                BootstrapEvent::Failed {
-                    stage: Some(stage.name.clone()),
-                    error: err.clone(),
-                },
-            );
-            return Err(anyhow!(err));
-        }
-
-        let started = Instant::now();
-        emit_event(
-            &app,
-            BootstrapEvent::Stage {
-                name: stage.name.clone(),
-                state: StageState::Running,
-                duration_ms: None,
-                result: None,
-                error: None,
-            },
-        );
-
-        let mut stage_args = vec![
-            "-Stage".to_string(),
-            stage.name.clone(),
-            "-NonInteractive".to_string(),
-            "-Json".to_string(),
-        ];
-        stage_args.extend(manifest_args.clone());
-        if args.include_desktop {
-            stage_args.push("-IncludeDesktop".to_string());
-        }
-
-        // Each stage gets its own cancel receiver because tokio::select!
-        // in run_script consumes it. Take/return through the Arc<Mutex>.
-        let local_cancel_rx = cancel_rx_holder.lock().await.take();
-
-        let stage_result = run_install_script(
-            &app,
-            &script.path,
-            &stage_args,
-            args.hermes_home.as_deref(),
-            local_cancel_rx,
-            Some(stage.name.clone()),
-        )
-        .await?;
-
-        let duration_ms = started.elapsed().as_millis() as u64;
-
-        if stage_result.killed {
-            emit_event(
-                &app,
-                BootstrapEvent::Stage {
-                    name: stage.name.clone(),
-                    state: StageState::Failed,
-                    duration_ms: Some(duration_ms),
-                    result: None,
-                    error: Some("cancelled by user".into()),
-                },
-            );
-            emit_event(
-                &app,
-                BootstrapEvent::Failed {
-                    stage: Some(stage.name.clone()),
-                    error: "cancelled by user".into(),
-                },
-            );
-            return Err(anyhow!("cancelled by user"));
-        }
-
-        let result_frame = powershell::parse_stage_result(&stage_result.stdout);
-
-        match result_frame {
-            None => {
-                let err = format!(
-                    "install.ps1 -Stage {} produced no JSON result frame (exit={:?})",
-                    stage.name, stage_result.exit_code
-                );
-                emit_event(
-                    &app,
-                    BootstrapEvent::Stage {
-                        name: stage.name.clone(),
-                        state: StageState::Failed,
-                        duration_ms: Some(duration_ms),
-                        result: None,
-                        error: Some(err.clone()),
-                    },
-                );
-                emit_event(
-                    &app,
-                    BootstrapEvent::Failed {
-                        stage: Some(stage.name.clone()),
-                        error: err.clone(),
-                    },
-                );
-                return Err(anyhow!(err));
-            }
-            Some(frame) if frame.ok && frame.skipped => {
-                emit_event(
-                    &app,
-                    BootstrapEvent::Stage {
-                        name: stage.name.clone(),
-                        state: StageState::Skipped,
-                        duration_ms: Some(duration_ms),
-                        result: Some(frame),
-                        error: None,
-                    },
-                );
-            }
-            Some(frame) if frame.ok => {
-                emit_event(
-                    &app,
-                    BootstrapEvent::Stage {
-                        name: stage.name.clone(),
-                        state: StageState::Succeeded,
-                        duration_ms: Some(duration_ms),
-                        result: Some(frame),
-                        error: None,
-                    },
-                );
-            }
-            Some(frame) => {
-                let err = frame
-                    .reason
-                    .clone()
-                    .unwrap_or_else(|| format!("exit code {:?}", stage_result.exit_code));
-                emit_event(
-                    &app,
-                    BootstrapEvent::Stage {
-                        name: stage.name.clone(),
-                        state: StageState::Failed,
-                        duration_ms: Some(duration_ms),
-                        result: Some(frame),
-                        error: Some(err.clone()),
-                    },
-                );
-                emit_event(
-                    &app,
-                    BootstrapEvent::Failed {
-                        stage: Some(stage.name.clone()),
-                        error: err.clone(),
-                    },
-                );
-                return Err(anyhow!(err));
-            }
-        }
-    }
-
-    // 4. Resolve install_root. install.ps1 doesn't (yet) report this back
-    // explicitly; we infer it from $HermesHome which Stage-Repository clones
-    // the repo INTO at $HermesHome\hermes-agent. Mirrors hermes_constants.
-    let hermes_home = args
-        .hermes_home
-        .clone()
-        .unwrap_or_else(|| crate::paths::hermes_home().to_string_lossy().into_owned());
-    let install_root = PathBuf::from(&hermes_home).join("hermes-agent");
-
-    emit_event(
-        &app,
-        BootstrapEvent::Complete {
-            install_root: install_root.to_string_lossy().into_owned(),
-            marker: Some(serde_json::json!({
-                "pinnedCommit": pin.commit,
-                "pinnedBranch": pin.branch,
-            })),
-        },
-    );
-
-    Ok(install_root.to_string_lossy().into_owned())
-}
-
-async fn cancellation_signalled(holder: &Arc<Mutex<Option<mpsc::Receiver<()>>>>) -> bool {
-    let mut guard = holder.lock().await;
-    if let Some(rx) = guard.as_mut() {
-        rx.try_recv().is_ok()
-    } else {
-        false
-    }
-}
-
-async fn run_install_script(
-    app: &AppHandle,
-    script_path: &std::path::Path,
-    args: &[String],
-    hermes_home_override: Option<&str>,
-    cancel_rx: Option<mpsc::Receiver<()>>,
-    stage_name: Option<String>,
-) -> Result<powershell::ScriptResult> {
-    let app_for_stdout = app.clone();
-    let stage_for_stdout = stage_name.clone();
-    let app_for_stderr = app.clone();
-    let stage_for_stderr = stage_name.clone();
-    let stage_for_stdout_log = stage_name.clone();
-    let stage_for_stderr_log = stage_name.clone();
-
-    let sink = StreamSink {
-        on_stdout_line: Box::new(move |line: &str| {
-            emit_event(
-                &app_for_stdout,
-                BootstrapEvent::Log {
-                    stage: stage_for_stdout.clone(),
-                    line: line.to_string(),
-                },
-            );
-            // Tee to the rolling installer log so we have a persistent
-            // record of every install.ps1 line. Without this, the only
-            // log evidence of a failure was the Tauri event stream —
-            // which gets discarded the moment the failure route mounts.
-            match &stage_for_stdout_log {
-                Some(name) => {
-                    tracing::info!(target: "bootstrap.log", stage = %name, "{line}")
-                }
-                None => tracing::info!(target: "bootstrap.log", "{line}"),
-            }
-        }),
-        on_stderr_line: Box::new(move |line: &str| {
-            emit_event(
-                &app_for_stderr,
-                BootstrapEvent::Log {
-                    stage: stage_for_stderr.clone(),
-                    line: format!("stderr: {line}"),
-                },
-            );
-            // stderr-level lines get warn! so they're visually distinct
-            // when scrolling through the log later.
-            match &stage_for_stderr_log {
-                Some(name) => {
-                    tracing::warn!(target: "bootstrap.log", stage = %name, "stderr: {line}")
-                }
-                None => tracing::warn!(target: "bootstrap.log", "stderr: {line}"),
-            }
-        }),
-    };
-
-    powershell::run_script(script_path, args, sink, hermes_home_override, cancel_rx)
-        .await
-        .map_err(|e| {
-            tracing::error!(?e, "install script invocation failed");
-            anyhow!("install script invocation failed: {e:#}")
-        })
-}
-
-fn build_pin_args(script: &install_script::ResolvedScript) -> Vec<String> {
-    let mut out = Vec::new();
-    if let Some(c) = &script.commit {
-        out.push("-Commit".to_string());
-        out.push(c.clone());
-    }
-    if let Some(b) = &script.branch {
-        out.push("-Branch".to_string());
-        out.push(b.clone());
-    }
-    out
-}
-
-fn emit_event(app: &AppHandle, event: BootstrapEvent) {
-    // Tee important state transitions to the rolling installer log so
-    // bootstrap-installer.log isn't just "starting" + final summary.
-    // Log lines (the noisy stuff) handle their own tracing in
-    // run_install_script's sink; here we cover the lifecycle frames.
-    match &event {
-        BootstrapEvent::Manifest { stages, .. } => {
-            tracing::info!(
-                stage_count = stages.len(),
-                names = ?stages.iter().map(|s| s.name.as_str()).collect::<Vec<_>>(),
-                "manifest received"
-            );
-        }
-        BootstrapEvent::Stage {
-            name,
-            state,
-            duration_ms,
-            error,
-            ..
-        } => {
-            tracing::info!(
-                stage = %name,
-                ?state,
-                duration_ms = ?duration_ms,
-                error = ?error,
-                "stage transition"
-            );
-        }
-        BootstrapEvent::Complete { install_root, .. } => {
-            tracing::info!(install_root = %install_root, "bootstrap complete");
-        }
-        BootstrapEvent::Failed { stage, error } => {
-            tracing::error!(stage = ?stage, error = %error, "bootstrap FAILED");
-        }
-        BootstrapEvent::Log { .. } => {
-            // Log lines are teed via the sink callbacks in
-            // run_install_script — don't double-emit here.
-        }
-    }
-    if let Err(e) = app.emit(BootstrapEvent::CHANNEL, &event) {
-        tracing::warn!(?e, "failed to emit bootstrap event");
-    }
-}
-
-fn option_env_string(key: &str) -> Option<String> {
-    // option_env! only accepts literals, so we hardcode the known keys.
-    let val = match key {
-        "BUILD_PIN_COMMIT" => option_env!("BUILD_PIN_COMMIT"),
-        "BUILD_PIN_BRANCH" => option_env!("BUILD_PIN_BRANCH"),
-        _ => None,
-    };
-    val.map(|s| s.to_string())
-}
-
-fn truncate(s: &str, max: usize) -> String {
-    if s.len() <= max {
-        s.to_string()
-    } else {
-        format!("{}...", &s[..max])
-    }
-}
@@ -1,99 +0,0 @@
-//! Event types streamed from Rust → React.
-//!
-//! These mirror `apps/desktop/electron/bootstrap-runner.cjs`'s event shape
-//! 1:1 so the React installer code can be roughly identical to the Electron
-//! install-overlay we'll replace.
-//!
-//! The Tauri event channel name is `"bootstrap"` for all of these — the
-//! `type` discriminator on each payload is how the frontend routes.
-
-use serde::{Deserialize, Serialize};
-
-/// Stage definition as reported by `install.ps1 -Manifest`.
-#[derive(Debug, Clone, Serialize, Deserialize)]
-pub struct StageInfo {
-    pub name: String,
-    pub title: String,
-    pub category: String,
-    /// `needs_user_input=true` stages run with -NonInteractive and emit
-    /// skipped=true; the post-install wizard takes over for those.
-    #[serde(rename = "needs_user_input", alias = "needsUserInput")]
-    pub needs_user_input: bool,
-}
-
-#[derive(Debug, Clone, Serialize, Deserialize)]
-pub struct Manifest {
-    pub stages: Vec<StageInfo>,
-    #[serde(rename = "protocol_version", alias = "protocolVersion", default)]
-    pub protocol_version: Option<u32>,
-}
-
-#[derive(Debug, Clone, Serialize, Deserialize)]
-pub struct StageResultPayload {
-    pub stage: String,
-    pub ok: bool,
-    #[serde(default)]
-    pub skipped: bool,
-    #[serde(default)]
-    pub reason: Option<String>,
-    /// install.ps1 may attach stage-specific structured data here.
-    #[serde(default)]
-    pub data: Option<serde_json::Value>,
-}
-
-/// Run-state for a single stage as we transition through it.
-#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
-#[serde(rename_all = "lowercase")]
-pub enum StageState {
-    Running,
-    Succeeded,
-    Skipped,
-    Failed,
-}
-
-/// The single event channel `bootstrap` emits these. `type` discriminates.
-#[derive(Debug, Clone, Serialize)]
-#[serde(tag = "type", rename_all = "lowercase")]
-pub enum BootstrapEvent {
-    /// Sent once at the start with the full stage list.
-    Manifest {
-        stages: Vec<StageInfo>,
-        #[serde(rename = "protocolVersion")]
-        protocol_version: Option<u32>,
-    },
-    /// Stage state transition. `result` populated only on terminal states.
-    Stage {
-        name: String,
-        state: StageState,
-        #[serde(rename = "durationMs", skip_serializing_if = "Option::is_none")]
-        duration_ms: Option<u64>,
-        #[serde(skip_serializing_if = "Option::is_none")]
-        result: Option<StageResultPayload>,
-        #[serde(skip_serializing_if = "Option::is_none")]
-        error: Option<String>,
-    },
-    /// Raw stdout/stderr line from install.ps1 (or our wrapper).
-    Log {
-        #[serde(skip_serializing_if = "Option::is_none")]
-        stage: Option<String>,
-        line: String,
-    },
-    /// Sent once when all stages complete successfully.
-    Complete {
-        #[serde(rename = "installRoot")]
-        install_root: String,
-        marker: Option<serde_json::Value>,
-    },
-    /// Sent once if the run aborts.
-    Failed {
-        #[serde(skip_serializing_if = "Option::is_none")]
-        stage: Option<String>,
-        error: String,
-    },
-}
-
-impl BootstrapEvent {
-    /// Tauri event name. Single channel for all bootstrap events; the
-    /// `type` tag tells the renderer how to interpret the payload.
-    pub const CHANNEL: &'static str = "bootstrap";
-}
@@ -1,273 +0,0 @@
-//! Resolves and downloads `scripts/install.ps1` (and `install.sh`).
-//!
-//! Resolution order:
-//!   1. Dev shortcut: a sibling repo checkout via $HERMES_SETUP_DEV_REPO_ROOT
-//!      env var. Lets devs iterate without re-publishing the script.
-//!   2. Bundled fallback: if the installer was bundled with a script (e.g.
-//!      tauri's `resource` mechanism), serve from there. Not used today.
-//!   3. Network: download from GitHub raw at a pinned commit or branch.
-//!      Commit pins are immutable; branch pins are HEAD-tracking.
-//!
-//! Mirrors `apps/desktop/electron/bootstrap-runner.cjs`'s `resolveInstallScript`,
-//! but the dev-checkout resolution is driven by an env var rather than the
-//! Electron app's APP_ROOT/../.. trick, because Hermes-Setup.exe is meant
-//! to live OUTSIDE any repo checkout.
-
-use anyhow::{anyhow, Context, Result};
-use std::path::{Path, PathBuf};
-use tokio::io::AsyncWriteExt;
-
-use crate::paths;
-
-/// Identity of the install.ps1 we'll execute. Used by both the manifest
-/// fetch and the per-stage runs.
-#[derive(Debug, Clone)]
-pub struct ResolvedScript {
-    pub path: PathBuf,
-    pub source: ScriptSource,
-    /// Commit pin (40-char SHA) if known. install.ps1's `-Commit` arg is
-    /// what makes the repo stage clone the exact tested SHA.
-    pub commit: Option<String>,
-    pub branch: Option<String>,
-}
-
-#[derive(Debug, Clone, PartialEq, Eq)]
-pub enum ScriptSource {
-    DevCheckout,
-    Bundled,
-    Cached,
-    Downloaded,
-}
-
-/// What flavor of script (Windows .ps1 vs Unix .sh).
-#[derive(Debug, Clone, Copy)]
-pub enum ScriptKind {
-    Ps1,
-    Sh,
-}
-
-impl ScriptKind {
-    pub fn for_current_os() -> Self {
-        if cfg!(target_os = "windows") {
-            Self::Ps1
-        } else {
-            Self::Sh
-        }
-    }
-
-    fn filename(&self) -> &'static str {
-        match self {
-            Self::Ps1 => "install.ps1",
-            Self::Sh => "install.sh",
-        }
-    }
-}
-
-/// Validates a string looks like a git SHA (7+ hex chars). Mirrors
-/// `STAMP_COMMIT_RE` from bootstrap-runner.cjs.
-fn is_valid_commit(s: &str) -> bool {
-    let len = s.len();
-    (7..=40).contains(&len) && s.chars().all(|c| c.is_ascii_hexdigit())
-}
-
-/// Resolves the install script to use for this run.
-///
-/// `pin` is the commit-or-branch from either Hermes-Setup's build-time
-/// constant (compiled into the installer) or a runtime override.
-pub async fn resolve(
-    kind: ScriptKind,
-    pin: &Pin,
-    emit_log: &impl Fn(&str),
-) -> Result<ResolvedScript> {
-    // 1. Dev shortcut.
-    if let Ok(repo_root) = std::env::var("HERMES_SETUP_DEV_REPO_ROOT") {
-        let candidate = PathBuf::from(repo_root).join("scripts").join(kind.filename());
-        if candidate.exists() {
-            emit_log(&format!(
-                "[bootstrap] dev mode — using local {} at {}",
-                kind.filename(),
-                candidate.display()
-            ));
-            return Ok(ResolvedScript {
-                path: candidate,
-                source: ScriptSource::DevCheckout,
-                commit: pin.commit.clone(),
-                branch: pin.branch.clone(),
-            });
-        }
-    }
-
-    // 2. (Not implemented) bundled fallback.
-
-    // 3. Network. Pin must be a real commit or a branch ref.
-    let commit_or_ref = match (&pin.commit, &pin.branch) {
-        (Some(c), _) if is_valid_commit(c) => c.clone(),
-        (_, Some(b)) if !b.trim().is_empty() => b.clone(),
-        (Some(other), _) => {
-            return Err(anyhow!(
-                "install script pin commit `{other}` is not a valid git SHA"
-            ));
-        }
-        _ => {
-            return Err(anyhow!(
-                "no install-script pin supplied — installer cannot resolve a script source"
-            ));
-        }
-    };
-
-    let cached = cached_path(kind, &commit_or_ref);
-    if cached.exists() {
-        emit_log(&format!(
-            "[bootstrap] using cached {} for {}",
-            kind.filename(),
-            truncate_ref(&commit_or_ref)
-        ));
-        return Ok(ResolvedScript {
-            path: cached,
-            source: ScriptSource::Cached,
-            commit: pin.commit.clone(),
-            branch: pin.branch.clone(),
-        });
-    }
-
-    emit_log(&format!(
-        "[bootstrap] downloading {} for {} from GitHub",
-        kind.filename(),
-        truncate_ref(&commit_or_ref)
-    ));
-
-    download(kind, &commit_or_ref, &cached).await?;
-
-    emit_log(&format!("[bootstrap] cached to {}", cached.display()));
-
-    Ok(ResolvedScript {
-        path: cached,
-        source: ScriptSource::Downloaded,
-        commit: pin.commit.clone(),
-        branch: pin.branch.clone(),
-    })
-}
-
-#[derive(Debug, Clone, Default)]
-pub struct Pin {
-    pub commit: Option<String>,
-    pub branch: Option<String>,
-}
-
-fn cached_path(kind: ScriptKind, commit_or_ref: &str) -> PathBuf {
-    let safe = sanitize_ref(commit_or_ref);
-    let filename = match kind {
-        ScriptKind::Ps1 => format!("install-{safe}.ps1"),
-        ScriptKind::Sh => format!("install-{safe}.sh"),
-    };
-    paths::bootstrap_cache_dir().join(filename)
-}
-
-/// Replace anything that's not [A-Za-z0-9._-] with `_`. Branch refs can
-/// contain `/`, dots, etc.; we want a flat filename.
-fn sanitize_ref(s: &str) -> String {
-    s.chars()
-        .map(|c| {
-            if c.is_ascii_alphanumeric() || c == '.' || c == '-' || c == '_' {
-                c
-            } else {
-                '_'
-            }
-        })
-        .collect()
-}
-
-fn truncate_ref(s: &str) -> &str {
-    if is_valid_commit(s) && s.len() >= 12 {
-        &s[..12]
-    } else {
-        s
-    }
-}
-
-/// Downloads to `dest_path` via reqwest with rustls. Atomically renames
-/// `dest_path.tmp` → `dest_path` so partial writes don't poison the cache.
-async fn download(kind: ScriptKind, commit_or_ref: &str, dest_path: &Path) -> Result<()> {
-    let url = format!(
-        "https://raw.githubusercontent.com/NousResearch/hermes-agent/{}/scripts/{}",
-        commit_or_ref,
-        kind.filename()
-    );
-
-    if let Some(parent) = dest_path.parent() {
-        std::fs::create_dir_all(parent).with_context(|| {
-            format!("creating bootstrap-cache parent dir {}", parent.display())
-        })?;
-    }
-
-    let tmp_path = dest_path.with_extension({
-        let ext = dest_path
-            .extension()
-            .and_then(|s| s.to_str())
-            .unwrap_or("tmp");
-        format!("{ext}.tmp")
-    });
-
-    let response = reqwest::Client::new()
-        .get(&url)
-        .header("User-Agent", "hermes-setup/0.0.1")
-        .send()
-        .await
-        .with_context(|| format!("GET {url}"))?;
-
-    if !response.status().is_success() {
-        return Err(anyhow!(
-            "Failed to download {}: HTTP {} from {}",
-            kind.filename(),
-            response.status(),
-            url
-        ));
-    }
-
-    let bytes = response
-        .bytes()
-        .await
-        .with_context(|| format!("reading body of {url}"))?;
-
-    let mut file = tokio::fs::File::create(&tmp_path)
-        .await
-        .with_context(|| format!("creating temp file {}", tmp_path.display()))?;
-    file.write_all(&bytes)
-        .await
-        .with_context(|| format!("writing temp file {}", tmp_path.display()))?;
-    file.flush().await.context("flushing temp file")?;
-    drop(file);
-
-    tokio::fs::rename(&tmp_path, dest_path)
-        .await
-        .with_context(|| {
-            format!(
-                "renaming {} → {}",
-                tmp_path.display(),
-                dest_path.display()
-            )
-        })?;
-
-    Ok(())
-}
-
-#[cfg(test)]
-mod tests {
-    use super::*;
-
-    #[test]
-    fn is_valid_commit_accepts_short_and_full_shas() {
-        assert!(is_valid_commit("02d26981d3d4ad50e142399b8476f59ad5953ff0"));
-        assert!(is_valid_commit("02d2698"));
-        assert!(!is_valid_commit("02d269"));
-        assert!(!is_valid_commit("not-a-sha"));
-        assert!(!is_valid_commit(""));
-    }
-
-    #[test]
-    fn sanitize_ref_replaces_slashes() {
-        assert_eq!(sanitize_ref("bb/gui"), "bb_gui");
-        assert_eq!(sanitize_ref("main"), "main");
-        assert_eq!(sanitize_ref("release/1.2.3"), "release_1.2.3");
-    }
-}
@@ -1,66 +0,0 @@
-//! Hermes Setup — Tauri entrypoint.
-//!
-//! Spawns a single window pointed at the React frontend (apps/bootstrap-installer/src/).
-//! All install-time work lives in `bootstrap.rs` and is invoked through the Tauri
-//! commands registered at the bottom of `run()`.
-//!
-//! The Windows-subsystem strip lives on the binary crate (src/main.rs), not
-//! here — a crate-level attribute on a lib doesn't propagate to the linker
-//! flags of the executable that consumes it.
-
-mod bootstrap;
-mod events;
-mod install_script;
-mod powershell;
-mod paths;
-
-use std::sync::Arc;
-use tokio::sync::Mutex;
-
-/// Process-wide install state, shared across Tauri commands.
-///
-/// The bootstrap is a one-shot, single-tenant process — we only need one
-/// of these per window. `Arc<Mutex<...>>` lets command handlers grab it
-/// without lifetime gymnastics.
-pub struct AppState {
-    pub bootstrap: Mutex<Option<bootstrap::BootstrapHandle>>,
-}
-
-impl Default for AppState {
-    fn default() -> Self {
-        Self {
-            bootstrap: Mutex::new(None),
-        }
-    }
-}
-
-#[cfg_attr(mobile, tauri::mobile_entry_point)]
-pub fn run() {
-    // Tracing → bootstrap-installer.log under HERMES_HOME/logs/ so install
-    // failures leave a trail for support. Console output also goes here in
-    // debug builds.
-    let _guard = paths::init_logging();
-
-    tracing::info!("Hermes Setup starting");
-
-    tauri::Builder::default()
-        .plugin(tauri_plugin_dialog::init())
-        .plugin(tauri_plugin_opener::init())
-        .plugin(tauri_plugin_process::init())
-        .plugin(tauri_plugin_shell::init())
-        .manage(Arc::new(AppState::default()))
-        .invoke_handler(tauri::generate_handler![
-            // Bootstrap lifecycle
-            bootstrap::start_bootstrap,
-            bootstrap::cancel_bootstrap,
-            bootstrap::get_bootstrap_status,
-            // Hand-off
-            bootstrap::launch_hermes_desktop,
-            // Diagnostics
-            paths::get_log_path,
-            paths::get_hermes_home,
-            paths::open_log_dir,
-        ])
-        .run(tauri::generate_context!())
-        .expect("error while running Hermes Setup");
-}
@@ -1,19 +0,0 @@
-// Hermes Setup — process entrypoint. All logic lives in lib.rs so it can
-// be unit-tested as a library; this file just calls into it.
-//
-// The windows_subsystem attribute MUST live here on the binary crate
-// (not lib.rs) — placing it on the lib was the bug that left a stray
-// cmd window behind Hermes-Setup.exe on release builds.
-//
-// `windows_subsystem = "windows"` strips the console allocation that
-// the default `windows_subsystem = "console"` would do, so double-clicking
-// the .exe gives you ONLY the Tauri window.
-//
-// debug_assertions guard: dev builds keep the console so tracing output
-// is visible during `cargo tauri dev`.
-
-#![cfg_attr(not(debug_assertions), windows_subsystem = "windows")]
-
-fn main() {
-    hermes_bootstrap_lib::run()
-}
@@ -1,119 +0,0 @@
-//! Filesystem paths + logging setup.
-//!
-//! Mirrors `hermes_constants.get_hermes_home()` from the Python CLI:
-//!   Windows: %LOCALAPPDATA%\hermes
-//!   macOS:   ~/Library/Application Support/hermes
-//!   Linux:   ~/.hermes  (XDG override via $HERMES_HOME)
-//!
-//! IMPORTANT: this must match exactly. Drift here means install.ps1
-//! writes to one place and the installer reads from another, breaking
-//! the bootstrap-complete check.
-
-use std::path::{Path, PathBuf};
-use tracing_appender::non_blocking::WorkerGuard;
-
-/// Returns the canonical Hermes home directory, respecting $HERMES_HOME if set.
-pub fn hermes_home() -> PathBuf {
-    if let Ok(override_path) = std::env::var("HERMES_HOME") {
-        if !override_path.trim().is_empty() {
-            return PathBuf::from(override_path);
-        }
-    }
-
-    #[cfg(target_os = "windows")]
-    {
-        // %LOCALAPPDATA%\hermes — matches scripts/install.ps1's $HermesHome.
-        if let Some(local_app_data) = dirs::data_local_dir() {
-            return local_app_data.join("hermes");
-        }
-    }
-
-    #[cfg(target_os = "macos")]
-    {
-        // ~/Library/Application Support/hermes
-        if let Some(home) = dirs::home_dir() {
-            return home.join("Library/Application Support/hermes");
-        }
-    }
-
-    // Linux + fallback: ~/.hermes
-    if let Some(home) = dirs::home_dir() {
-        return home.join(".hermes");
-    }
-
-    // Last resort — current dir, almost certainly wrong but at least
-    // doesn't panic.
-    PathBuf::from(".hermes")
-}
-
-pub fn log_dir() -> PathBuf {
-    hermes_home().join("logs")
-}
-
-pub fn log_path() -> PathBuf {
-    log_dir().join("bootstrap-installer.log")
-}
-
-pub fn bootstrap_cache_dir() -> PathBuf {
-    hermes_home().join("bootstrap-cache")
-}
-
-/// Where install.ps1 writes the bootstrap-complete marker (existence-only file
-/// the Electron app also checks). Per main.cjs:
-///   const BOOTSTRAP_COMPLETE_MARKER = path.join(ACTIVE_HERMES_ROOT, '.hermes-bootstrap-complete')
-/// We don't always know ACTIVE_HERMES_ROOT until install.ps1 reports it, so
-/// this is a probe helper, not a definitive path.
-pub fn likely_bootstrap_marker(install_root: &Path) -> PathBuf {
-    install_root.join(".hermes-bootstrap-complete")
-}
-
-/// Initializes tracing to bootstrap-installer.log under HERMES_HOME/logs/.
-/// Returns a guard that flushes the appender on drop — keep it alive for
-/// the lifetime of the process.
-pub fn init_logging() -> Option<WorkerGuard> {
-    let dir = log_dir();
-    if let Err(err) = std::fs::create_dir_all(&dir) {
-        // No log dir → log to stderr only. Don't panic; the installer
-        // should still be usable on an exotic filesystem.
-        eprintln!("[hermes-setup] could not create log dir {dir:?}: {err}");
-        return None;
-    }
-
-    let file_appender = tracing_appender::rolling::never(&dir, "bootstrap-installer.log");
-    let (non_blocking, guard) = tracing_appender::non_blocking(file_appender);
-
-    let env_filter = tracing_subscriber::EnvFilter::try_from_env("HERMES_BOOTSTRAP_LOG")
-        .unwrap_or_else(|_| tracing_subscriber::EnvFilter::new("info"));
-
-    tracing_subscriber::fmt()
-        .with_env_filter(env_filter)
-        .with_writer(non_blocking)
-        .with_ansi(false)
-        .with_target(true)
-        .init();
-
-    Some(guard)
-}
-
-// ---------------------------------------------------------------------------
-// Tauri commands
-// ---------------------------------------------------------------------------
-
-#[tauri::command]
-pub fn get_log_path() -> String {
-    log_path().to_string_lossy().into_owned()
-}
-
-#[tauri::command]
-pub fn get_hermes_home() -> String {
-    hermes_home().to_string_lossy().into_owned()
-}
-
-#[tauri::command]
-pub fn open_log_dir(app: tauri::AppHandle) -> Result<(), String> {
-    use tauri_plugin_opener::OpenerExt;
-    let path = log_dir();
-    app.opener()
-        .open_path(path.to_string_lossy(), None::<&str>)
-        .map_err(|e| e.to_string())
-}
@@ -1,267 +0,0 @@
-//! Drives PowerShell (Windows) or bash (Unix) for install.ps1 / install.sh.
-//!
-//! Port of `spawnPowerShell` from bootstrap-runner.cjs, with the same
-//! line-buffered stdout/stderr streaming + cancellation semantics.
-//!
-//! On Windows we pass `-NoProfile -ExecutionPolicy Bypass -File <script>`.
-//! On Unix we shell out to `bash <script>` since install.sh expects bash.
-
-use anyhow::{Context, Result};
-use std::path::Path;
-use std::process::Stdio;
-use tokio::io::{AsyncBufReadExt, BufReader};
-use tokio::process::{Child, Command};
-use tokio::sync::mpsc;
-
-/// Hooks the caller installs to receive output.
-pub struct StreamSink {
-    pub on_stdout_line: Box<dyn Fn(&str) + Send + Sync>,
-    pub on_stderr_line: Box<dyn Fn(&str) + Send + Sync>,
-}
-
-/// Outcome of a script invocation. Mirrors bootstrap-runner.cjs's
-/// `{stdout, stderr, code, signal, killed}` shape.
-#[derive(Debug)]
-pub struct ScriptResult {
-    pub stdout: String,
-    pub stderr: String,
-    pub exit_code: Option<i32>,
-    pub killed: bool,
-}
-
-/// Cancellation signal — `cancel_tx.send(()).await` aborts the running script.
-pub type CancelRx = mpsc::Receiver<()>;
-
-/// Spawns install.ps1 / install.sh with the given args and streams output.
-///
-/// `hermes_home_override` propagates to the child as $HERMES_HOME so the
-/// install script writes to the same directory the installer is reading from.
-pub async fn run_script(
-    script_path: &Path,
-    args: &[String],
-    sink: StreamSink,
-    hermes_home_override: Option<&str>,
-    mut cancel_rx: Option<CancelRx>,
-) -> Result<ScriptResult> {
-    let mut cmd = build_command(script_path, args);
-
-    if let Some(home) = hermes_home_override {
-        cmd.env("HERMES_HOME", home);
-    }
-
-    cmd.stdin(Stdio::null())
-        .stdout(Stdio::piped())
-        .stderr(Stdio::piped());
-
-    // On Windows, avoid spawning a flashing cmd window when we're hosted
-    // inside a GUI process. Tauri's main window is already created, so
-    // the side-effect console for the child is unwanted.
-    #[cfg(target_os = "windows")]
-    {
-        // CREATE_NO_WINDOW = 0x08000000
-        cmd.creation_flags(0x0800_0000);
-    }
-
-    let mut child: Child = cmd
-        .spawn()
-        .with_context(|| format!("spawning {}", script_path.display()))?;
-
-    let stdout = child.stdout.take().expect("stdout was piped");
-    let stderr = child.stderr.take().expect("stderr was piped");
-
-    let mut stdout_reader = BufReader::new(stdout).lines();
-    let mut stderr_reader = BufReader::new(stderr).lines();
-
-    let mut combined_stdout = String::new();
-    let mut combined_stderr = String::new();
-    let mut killed = false;
-
-    // Loop: poll stdout, stderr, cancel, and child exit concurrently.
-    loop {
-        tokio::select! {
-            line = stdout_reader.next_line() => {
-                match line {
-                    Ok(Some(l)) => {
-                        (sink.on_stdout_line)(&l);
-                        combined_stdout.push_str(&l);
-                        combined_stdout.push('\n');
-                    }
-                    Ok(None) => {
-                        // EOF on stdout — wait for stderr + exit.
-                        break;
-                    }
-                    Err(e) => {
-                        tracing::warn!("stdout read error: {e}");
-                        break;
-                    }
-                }
-            }
-            line = stderr_reader.next_line() => {
-                match line {
-                    Ok(Some(l)) => {
-                        (sink.on_stderr_line)(&l);
-                        combined_stderr.push_str(&l);
-                        combined_stderr.push('\n');
-                    }
-                    Ok(None) => {
-                        // stderr EOF — keep draining stdout.
-                    }
-                    Err(e) => {
-                        tracing::warn!("stderr read error: {e}");
-                    }
-                }
-            }
-            _ = recv_cancel(&mut cancel_rx) => {
-                tracing::warn!("cancellation received — killing child");
-                killed = true;
-                // best-effort kill; don't propagate errors
-                let _ = child.start_kill();
-                break;
-            }
-        }
-    }
-
-    // Drain remaining lines after the loop exited.
-    while let Ok(Some(l)) = stdout_reader.next_line().await {
-        (sink.on_stdout_line)(&l);
-        combined_stdout.push_str(&l);
-        combined_stdout.push('\n');
-    }
-    while let Ok(Some(l)) = stderr_reader.next_line().await {
-        (sink.on_stderr_line)(&l);
-        combined_stderr.push_str(&l);
-        combined_stderr.push('\n');
-    }
-
-    let status = child
-        .wait()
-        .await
-        .context("waiting for install script to exit")?;
-
-    Ok(ScriptResult {
-        stdout: combined_stdout,
-        stderr: combined_stderr,
-        exit_code: status.code(),
-        killed,
-    })
-}
-
-async fn recv_cancel(rx: &mut Option<CancelRx>) {
-    match rx {
-        Some(r) => {
-            let _ = r.recv().await;
-        }
-        None => std::future::pending::<()>().await,
-    }
-}
-
-#[cfg(target_os = "windows")]
-fn build_command(script_path: &Path, args: &[String]) -> Command {
-    // We want PowerShell 5.1 / 7. install.ps1 uses 5.1-safe syntax everywhere.
-    // Prefer `powershell.exe` (5.1 baseline, present on every Windows since 7)
-    // over `pwsh.exe` (7+, may not be present).
-    let mut cmd = Command::new("powershell.exe");
-    cmd.arg("-NoProfile");
-    cmd.arg("-ExecutionPolicy").arg("Bypass");
-    cmd.arg("-File").arg(script_path);
-    for a in args {
-        cmd.arg(a);
-    }
-    cmd
-}
-
-#[cfg(not(target_os = "windows"))]
-fn build_command(script_path: &Path, args: &[String]) -> Command {
-    // install.sh expects bash. /bin/bash is fine on macOS (Apple still
-    // ships an old 3.2 bash; install.sh is written to that baseline).
-    let mut cmd = Command::new("bash");
-    cmd.arg(script_path);
-    for a in args {
-        cmd.arg(a);
-    }
-    cmd
-}
-
-/// Parses the LAST line of stdout that looks like a JSON object matching
-/// the install.ps1 stage-result contract: `{ok: bool, stage: string, ...}`.
-///
-/// Mirrors `parseStageResult` from bootstrap-runner.cjs. install.ps1 may
-/// print info/banner lines before the result frame; we scan from the end.
-pub fn parse_stage_result(stdout: &str) -> Option<crate::events::StageResultPayload> {
-    for line in stdout.lines().rev() {
-        let trimmed = line.trim();
-        if trimmed.is_empty() {
-            continue;
-        }
-        if let Ok(value) = serde_json::from_str::<serde_json::Value>(trimmed) {
-            if value.get("ok").and_then(|v| v.as_bool()).is_some()
-                && value.get("stage").and_then(|v| v.as_str()).is_some()
-            {
-                if let Ok(parsed) =
-                    serde_json::from_value::<crate::events::StageResultPayload>(value)
-                {
-                    return Some(parsed);
-                }
-            }
-        }
-    }
-    None
-}
-
-/// Same logic but for the `-Manifest` payload (the LAST line with a `stages`
-/// array). Returns the parsed manifest.
-pub fn parse_manifest(stdout: &str) -> Option<crate::events::Manifest> {
-    for line in stdout.lines().rev() {
-        let trimmed = line.trim();
-        if trimmed.is_empty() {
-            continue;
-        }
-        if let Ok(value) = serde_json::from_str::<serde_json::Value>(trimmed) {
-            if value.get("stages").and_then(|v| v.as_array()).is_some() {
-                if let Ok(parsed) = serde_json::from_value::<crate::events::Manifest>(value) {
-                    return Some(parsed);
-                }
-            }
-        }
-    }
-    None
-}
-
-#[cfg(target_os = "windows")]
-use std::os::windows::process::CommandExt;
-
-#[cfg(test)]
-mod tests {
-    use super::*;
-
-    #[test]
-    fn parse_stage_result_picks_last_json_line() {
-        let stdout = r#"
-[bootstrap] some info
-{"ok": false, "stage": "venv", "reason": "bad python"}
-{"ok": true, "stage": "venv"}
-final non-json banner
-"#;
-        let result = parse_stage_result(stdout).unwrap();
-        assert_eq!(result.stage, "venv");
-        assert!(result.ok);
-    }
-
-    #[test]
-    fn parse_manifest_finds_stages_array() {
-        let stdout = r#"
-info line
-{"stages": [{"name": "uv", "title": "uv", "category": "prereqs", "needs_user_input": false}], "protocol_version": 1}
-"#;
-        let m = parse_manifest(stdout).unwrap();
-        assert_eq!(m.stages.len(), 1);
-        assert_eq!(m.stages[0].name, "uv");
-        assert_eq!(m.protocol_version, Some(1));
-    }
-
-    #[test]
-    fn parse_returns_none_when_no_match() {
-        assert!(parse_stage_result("just banner\n").is_none());
-        assert!(parse_manifest("just banner\n").is_none());
-    }
-}
@@ -1,67 +0,0 @@
-{
-  "$schema": "https://schema.tauri.app/config/2",
-  "productName": "Hermes Setup",
-  "version": "0.0.1",
-  "identifier": "com.nousresearch.hermes.setup",
-  "build": {
-    "beforeDevCommand": "npm run dev",
-    "devUrl": "http://127.0.0.1:5175",
-    "beforeBuildCommand": "npm run build",
-    "frontendDist": "../dist"
-  },
-  "app": {
-    "windows": [
-      {
-        "label": "main",
-        "title": "Hermes Setup",
-        "width": 880,
-        "height": 620,
-        "minWidth": 720,
-        "minHeight": 520,
-        "resizable": true,
-        "fullscreen": false,
-        "decorations": true,
-        "transparent": false,
-        "center": true
-      }
-    ],
-    "security": {
-      "csp": "default-src 'self'; img-src 'self' data:; style-src 'self' 'unsafe-inline'; script-src 'self'; font-src 'self' data:; connect-src 'self' ipc: http://ipc.localhost"
-    },
-    "withGlobalTauri": false
-  },
-  "bundle": {
-    "active": true,
-    "category": "DeveloperTool",
-    "shortDescription": "Hermes Setup",
-    "longDescription": "Installs Hermes Agent on your machine. Drives scripts/install.ps1 (Windows) and scripts/install.sh (macOS/Linux).",
-    "publisher": "Nous Research",
-    "copyright": "Copyright © 2026 Nous Research",
-    "targets": [
-      "app",
-      "dmg",
-      "appimage"
-    ],
-    "icon": [
-      "icons/32x32.png",
-      "icons/128x128.png",
-      "icons/128x128@2x.png",
-      "icons/icon.icns",
-      "icons/icon.ico"
-    ],
-    "windows": {
-      "webviewInstallMode": {
-        "type": "embedBootstrapper"
-      }
-    },
-    "macOS": {
-      "minimumSystemVersion": "11.0",
-      "hardenedRuntime": true
-    }
-  },
-  "plugins": {
-    "shell": {
-      "open": true
-    }
-  }
-}
@@ -1,35 +0,0 @@
-import { useStore } from '@nanostores/react'
-import { useEffect } from 'react'
-import { $route, $bootstrap, initialize } from './store'
-import Welcome from './routes/welcome'
-import Progress from './routes/progress'
-import Success from './routes/success'
-import Failure from './routes/failure'
-
-/*
- * App shell — Hermes Setup.
- *
- * No header chrome (the OS title bar already says "Hermes Setup"; an
- * in-window repeat of the H mark + words was redundant slop).
- *
- * Route state lives in a single $route atom — 4 screens, no react-router.
- */
-export default function App() {
-  const route = useStore($route)
-  const bootstrap = useStore($bootstrap)
-
-  useEffect(() => {
-    void initialize()
-  }, [])
-
-  return (
-    <div className="relative flex h-full flex-col overflow-hidden bg-background text-foreground">
-      <main className="relative z-10 flex flex-1 flex-col overflow-hidden">
-        {route === 'welcome' && <Welcome />}
-        {route === 'progress' && <Progress bootstrap={bootstrap} />}
-        {route === 'success' && <Success />}
-        {route === 'failure' && <Failure bootstrap={bootstrap} />}
-      </main>
-    </div>
-  )
-}
@@ -1,80 +0,0 @@
-import { cva, type VariantProps } from 'class-variance-authority'
-import { Slot } from 'radix-ui'
-import * as React from 'react'
-
-import { cn } from '../lib/utils'
-
-/*
- * Button — copied verbatim from apps/desktop/src/components/ui/button.tsx.
- *
- * We import the desktop's local shadcn-style Button rather than
- * @nous-research/ui's <Button>, because the DS Button uses bg-midground /
- * text-background-base utilities that resolve to the DS's hardcoded
- * gold/brown brand defaults (#ffac02 / #170d02) unless overridden in
- * runtime. The desktop never sets those vars; it routes through its
- * own --dt-* token chain via shadcn classes like bg-primary. We do
- * the same so visuals match exactly.
- */
-
-const buttonVariants = cva(
-  "inline-flex shrink-0 items-center justify-center gap-2 rounded-md text-sm font-medium whitespace-nowrap transition-all outline-none focus-visible:border-ring focus-visible:ring-[0.1875rem] focus-visible:ring-ring/50 disabled:pointer-events-none disabled:opacity-50 aria-invalid:border-destructive aria-invalid:ring-destructive/20 dark:aria-invalid:ring-destructive/40 [&_svg]:pointer-events-none [&_svg]:shrink-0 [&_svg:not([class*='size-'])]:size-4",
-  {
-    variants: {
-      variant: {
-        default: 'bg-primary text-primary-foreground hover:bg-primary/90',
-        destructive:
-          'bg-destructive text-white hover:bg-destructive/90 focus-visible:ring-destructive/20 dark:bg-destructive/60 dark:focus-visible:ring-destructive/40',
-        outline:
-          'border bg-background shadow-xs hover:bg-accent hover:text-accent-foreground dark:border-input dark:bg-input/30 dark:hover:bg-input/50',
-        secondary:
-          'bg-secondary text-secondary-foreground hover:bg-secondary/80',
-        ghost:
-          'hover:bg-accent hover:text-accent-foreground dark:hover:bg-accent/50',
-        link: 'text-primary underline-offset-4 decoration-current/20 hover:underline'
-      },
-      size: {
-        default: 'h-9 px-4 py-2 has-[>svg]:px-3',
-        xs: "h-6 gap-1 rounded-md px-2 text-xs has-[>svg]:px-1.5 [&_svg:not([class*='size-'])]:size-3",
-        sm: 'h-8 gap-1.5 rounded-md px-3 has-[>svg]:px-2.5',
-        lg: 'h-10 rounded-md px-6 has-[>svg]:px-4',
-        icon: 'size-9',
-        'icon-xs':
-          "size-6 rounded-md [&_svg:not([class*='size-'])]:size-3",
-        'icon-sm': 'size-8',
-        'icon-lg': 'size-10'
-      }
-    },
-    defaultVariants: {
-      variant: 'default',
-      size: 'default'
-    }
-  }
-)
-
-interface ButtonProps
-  extends React.ComponentProps<'button'>,
-    VariantProps<typeof buttonVariants> {
-  asChild?: boolean
-}
-
-export function Button({
-  className,
-  variant = 'default',
-  size = 'default',
-  asChild = false,
-  ...props
-}: ButtonProps) {
-  const Comp = asChild ? Slot.Root : 'button'
-
-  return (
-    <Comp
-      className={cn(buttonVariants({ variant, size }), className)}
-      data-size={size}
-      data-slot="button"
-      data-variant={variant}
-      {...props}
-    />
-  )
-}
-
-export { buttonVariants }
@@ -1,12 +0,0 @@
-import { type ClassValue, clsx } from 'clsx'
-import { twMerge } from 'tailwind-merge'
-
-/*
- * cn — Tailwind-aware class merger. Same util the desktop and dashboard
- * use. clsx handles conditional classes; twMerge resolves utility
- * conflicts so `cn('px-2', condition && 'px-4')` ends up with px-4 only,
- * not both.
- */
-export function cn(...inputs: ClassValue[]) {
-  return twMerge(clsx(inputs))
-}
@@ -1,14 +0,0 @@
-import { StrictMode } from 'react'
-import { createRoot } from 'react-dom/client'
-import App from './app.tsx'
-import './styles.css'
-
-// Default to LIGHT mode — matches the Hermes desktop's default. The
-// desktop's runtime theme system can switch to .dark later, but our
-// installer ships in light mode only since we don't carry the theme
-// provider machinery.
-createRoot(document.getElementById('root')!).render(
-  <StrictMode>
-    <App />
-  </StrictMode>
-)
@@ -1,77 +0,0 @@
-import { type CSSProperties } from 'react'
-import { useStore } from '@nanostores/react'
-import { Button } from '../components/button'
-import {
-  $logPath,
-  openLogDir,
-  startInstall,
-  type BootstrapStateModel
-} from '../store'
-import { RefreshCw, FileText } from 'lucide-react'
-
-interface FailureProps {
-  bootstrap: BootstrapStateModel
-}
-
-/*
- * Failure screen. Same hero treatment as Welcome/Success — the wordmark
- * carries the brand, so we keep it across every terminal state.
- *
- * The actual error message lives below in muted text. Two clear
- * affordances: Retry (primary) and Open log folder (secondary).
- */
-export default function Failure({ bootstrap }: FailureProps) {
-  const logPath = useStore($logPath)
-
-  return (
-    <div className="hermes-fade-in flex h-full flex-col items-center justify-center gap-6 px-12 py-10">
-      <div className="w-full max-w-2xl min-w-0 text-center">
-        <p
-          className="fit-text mx-auto mb-4 w-full font-['Collapse'] font-bold uppercase leading-[0.9] tracking-[0.08em] text-destructive mix-blend-plus-lighter dark:text-destructive/90"
-          style={
-            {
-              '--fit-text-line-height': '0.9',
-              '--fit-text-max': '5rem',
-              '--fit-text-min': '2.25rem'
-            } as CSSProperties
-          }
-        >
-          <span>
-            <span>Install didn&rsquo;t finish</span>
-          </span>
-          <span aria-hidden="true">Install didn&rsquo;t finish</span>
-        </p>
-
-        <p className="m-0 mx-auto max-w-xl text-center text-sm leading-normal tracking-tight text-muted-foreground">
-          {bootstrap.error ?? 'Something went wrong during installation.'}
-        </p>
-      </div>
-
-      <div className="flex items-center gap-3">
-        <Button
-          onClick={() => void startInstall()}
-          size="lg"
-          className="inline-flex items-center gap-2 px-6"
-        >
-          <RefreshCw size={16} />
-          Retry install
-        </Button>
-        <Button
-          variant="outline"
-          size="lg"
-          onClick={() => void openLogDir()}
-          className="inline-flex items-center gap-2"
-        >
-          <FileText size={16} />
-          Open log folder
-        </Button>
-      </div>
-
-      {logPath && (
-        <p className="max-w-lg text-center text-xs text-muted-foreground/70">
-          Log: <code className="font-mono">{logPath}</code>
-        </p>
-      )}
-    </div>
-  )
-}
@@ -1,190 +0,0 @@
-import { useEffect, useRef, useState } from 'react'
-import { useStore } from '@nanostores/react'
-import { Button } from '../components/button'
-import {
-  cancelInstall,
-  $progress,
-  type BootstrapStateModel,
-  type StageState
-} from '../store'
-import { Check, X, ChevronRight, FileText, Loader2 } from 'lucide-react'
-import clsx from 'clsx'
-
-interface ProgressProps {
-  bootstrap: BootstrapStateModel
-}
-
-/*
- * Progress screen — drives a stage list + collapsible log panel. Uses
- * the DS <Progress> for the top bar so its motion + ring match the rest
- * of the product.
- */
-export default function ProgressScreen({ bootstrap }: ProgressProps) {
-  const progress = useStore($progress)
-  const [showLogs, setShowLogs] = useState(false)
-  const logEndRef = useRef<HTMLDivElement>(null)
-
-  useEffect(() => {
-    if (showLogs && logEndRef.current) {
-      logEndRef.current.scrollIntoView({ behavior: 'smooth' })
-    }
-  }, [bootstrap.logs.length, showLogs])
-
-  const currentStage =
-    bootstrap.currentStage != null
-      ? bootstrap.stages[bootstrap.currentStage]
-      : null
-
-  return (
-    <div className="hermes-fade-in flex h-full flex-col">
-      <div className="border-b border-border px-6 py-4">
-        <div className="mb-3 flex items-center justify-between text-xs">
-          <div className="flex items-center gap-2 text-foreground">
-            {bootstrap.status === 'running' && (
-              <Loader2 size={12} className="animate-spin text-primary" />
-            )}
-            <span>
-              {bootstrap.status === 'running'
-                ? currentStage
-                  ? currentStage.info.title
-                  : 'Preparing\u2026'
-                : bootstrap.status === 'completed'
-                  ? 'Done'
-                  : 'Installing'}
-            </span>
-          </div>
-          <div className="text-muted-foreground">
-            {progress.done} of {progress.total} steps
-          </div>
-        </div>
-        {/* Top progress bar — plain HTML, derived from --primary so it
-            tracks the theme accent. */}
-        <div className="h-1 w-full overflow-hidden rounded-full bg-muted">
-          <div
-            className="h-full bg-primary transition-all duration-300 ease-out"
-            style={{ width: `${Math.max(2, progress.fraction * 100)}%` }}
-          />
-        </div>
-      </div>
-
-      <div className="flex flex-1 overflow-hidden">
-        <div className="flex-1 overflow-y-auto px-6 py-4">
-          <ol className="space-y-1">
-            {bootstrap.stageOrder.map((name) => {
-              const rec = bootstrap.stages[name]
-              if (!rec) return null
-              return (
-                <li
-                  key={name}
-                  className={clsx(
-                    'flex items-center gap-3 rounded-md px-3 py-2 text-sm transition-colors',
-                    rec.state === 'running' && 'bg-card text-foreground',
-                    rec.state === 'succeeded' && 'text-foreground/80',
-                    rec.state === 'skipped' && 'text-muted-foreground',
-                    rec.state === 'failed' &&
-                      'bg-destructive/10 text-destructive',
-                    !rec.state && 'text-muted-foreground/60'
-                  )}
-                >
-                  <StateIcon state={rec.state ?? null} />
-                  <span className="flex-1 truncate">{rec.info.title}</span>
-                  {rec.durationMs != null && (
-                    <span className="text-xs text-muted-foreground">
-                      {formatDuration(rec.durationMs)}
-                    </span>
-                  )}
-                </li>
-              )
-            })}
-          </ol>
-        </div>
-
-        {showLogs && (
-          <div className="flex w-1/2 flex-col border-l border-border bg-card/40">
-            <div className="flex shrink-0 items-center justify-between border-b border-border px-3 py-2">
-              <div className="text-xs font-medium text-foreground/80">
-                Live output
-              </div>
-              <div className="text-xs text-muted-foreground">
-                {bootstrap.logs.length} lines
-              </div>
-            </div>
-            <div className="flex-1 overflow-y-auto px-3 py-2 font-mono text-[11px] leading-relaxed">
-              {bootstrap.logs.map((entry, idx) => (
-                <div
-                  key={idx}
-                  className={clsx(
-                    'whitespace-pre-wrap',
-                    entry.line.startsWith('stderr:')
-                      ? 'text-destructive'
-                      : 'text-foreground/70'
-                  )}
-                >
-                  {entry.line}
-                </div>
-              ))}
-              <div ref={logEndRef} />
-            </div>
-          </div>
-        )}
-      </div>
-
-      <div className="flex shrink-0 items-center justify-between border-t border-border px-6 py-3">
-        <button
-          type="button"
-          onClick={() => setShowLogs((v) => !v)}
-          className="inline-flex items-center gap-1.5 text-xs text-muted-foreground transition-colors hover:text-foreground"
-        >
-          <FileText size={14} />
-          {showLogs ? 'Hide details' : 'Show details'}
-          <ChevronRight
-            size={12}
-            className={clsx(
-              'transition-transform',
-              showLogs && 'rotate-90'
-            )}
-          />
-        </button>
-
-        {bootstrap.status === 'running' && (
-          <Button
-            variant="outline"
-            size="sm"
-            onClick={() => void cancelInstall()}
-          >
-            Cancel
-          </Button>
-        )}
-      </div>
-    </div>
-  )
-}
-
-function StateIcon({ state }: { state: StageState | null }) {
-  if (state === 'running') {
-    return <Loader2 size={14} className="animate-spin text-primary" />
-  }
-  if (state === 'succeeded') {
-    return <Check size={14} className="text-emerald-400" />
-  }
-  if (state === 'skipped') {
-    return <ChevronRight size={14} className="text-muted-foreground/70" />
-  }
-  if (state === 'failed') {
-    return <X size={14} className="text-destructive" />
-  }
-  return (
-    <div
-      className="h-[6px] w-[6px] rounded-full bg-muted-foreground/40"
-      aria-hidden
-    />
-  )
-}
-
-function formatDuration(ms: number): string {
-  if (ms < 1000) return `${ms}ms`
-  if (ms < 60000) return `${(ms / 1000).toFixed(1)}s`
-  const m = Math.floor(ms / 60000)
-  const s = Math.round((ms % 60000) / 1000)
-  return `${m}m ${s}s`
-}
@@ -1,87 +0,0 @@
-import { useState } from 'react'
-import { type CSSProperties } from 'react'
-import { Button } from '../components/button'
-import { launchHermesDesktop } from '../store'
-import { Rocket, AlertCircle } from 'lucide-react'
-
-/*
- * Success screen. HERMES AGENT wordmark stays as the visual anchor
- * (same Collapse Bold treatment as Welcome + the desktop chat intro),
- * with a status line below.
- *
- * Launching the desktop can fail (e.g. Stage-Desktop was skipped and
- * Hermes.exe doesn't exist). We catch the Tauri error and surface it
- * inline rather than silently doing nothing — the previous version
- * had `onClick={() => void launchHermesDesktop()}` which swallowed
- * the rejection and left the user staring at an unresponsive button.
- */
-export default function Success() {
-  const [error, setError] = useState<string | null>(null)
-  const [launching, setLaunching] = useState(false)
-
-  async function handleLaunch() {
-    setError(null)
-    setLaunching(true)
-    try {
-      await launchHermesDesktop()
-      // On success the installer exits — control never returns here.
-    } catch (e) {
-      const msg = e instanceof Error ? e.message : String(e)
-      setError(msg)
-      setLaunching(false)
-    }
-  }
-
-  return (
-    <div className="hermes-fade-in flex h-full flex-col items-center justify-center gap-8 px-12 py-10">
-      <div className="w-full max-w-2xl min-w-0 text-center">
-        <p
-          className="fit-text mx-auto mb-4 w-full font-['Collapse'] font-bold uppercase leading-[0.9] tracking-[0.08em] text-midground mix-blend-plus-lighter dark:text-foreground/90"
-          style={
-            {
-              '--fit-text-line-height': '0.9',
-              '--fit-text-max': '5rem',
-              '--fit-text-min': '2.25rem'
-            } as CSSProperties
-          }
-        >
-          <span>
-            <span>Hermes is ready</span>
-          </span>
-          <span aria-hidden="true">Hermes is ready</span>
-        </p>
-
-        <p className="m-0 text-center text-base leading-normal tracking-tight text-muted-foreground">
-          You can launch from here, or any time from your terminal with{' '}
-          <code className="rounded bg-muted/60 px-1 py-0.5 font-mono text-sm">
-            hermes desktop
-          </code>
-          .
-        </p>
-      </div>
-
-      <Button
-        onClick={() => void handleLaunch()}
-        size="lg"
-        disabled={launching}
-        className="inline-flex items-center gap-2 px-6"
-      >
-        <Rocket size={18} />
-        {launching ? 'Launching…' : 'Launch Hermes'}
-      </Button>
-
-      {error && (
-        <div
-          role="alert"
-          className="flex max-w-2xl items-start gap-2 rounded-md border border-destructive/30 bg-destructive/10 px-4 py-3 text-sm text-destructive"
-        >
-          <AlertCircle size={16} className="mt-0.5 shrink-0" />
-          <div className="min-w-0">
-            <div className="font-medium">Couldn&rsquo;t launch the desktop app</div>
-            <div className="mt-1 text-destructive/80">{error}</div>
-          </div>
-        </div>
-      )}
-    </div>
-  )
-}
@@ -1,58 +0,0 @@
-import { type CSSProperties } from 'react'
-import { Button } from '../components/button'
-import { startInstall } from '../store'
-import { ArrowRight } from 'lucide-react'
-
-/*
- * Welcome screen.
- *
- * Mirrors the desktop's chat intro (apps/desktop/src/components/chat/intro.tsx):
- *   - HERMES AGENT wordmark rendered in Collapse Bold, uppercase, tracked
- *   - mix-blend-plus-lighter so the type "glows" on the canvas
- *   - fit-text utility so the wordmark sizes itself to the column
- *
- * No install-path footer. The default install location is correct for
- * 99% of users; the rest will use the CLI installer with a -HermesHome
- * flag. Showing %LOCALAPPDATA% to grandma is developer-brain.
- */
-export default function Welcome() {
-  return (
-    <div className="hermes-fade-in flex h-full flex-col items-center justify-center gap-10 px-12 py-10">
-      {/* Hero — same recipe the desktop's chat/intro.tsx uses */}
-      <div className="w-full max-w-2xl min-w-0 text-center">
-        <p
-          className="fit-text mx-auto mb-4 w-full font-['Collapse'] font-bold uppercase leading-[0.9] tracking-[0.08em] text-midground mix-blend-plus-lighter dark:text-foreground/90"
-          style={
-            {
-              '--fit-text-line-height': '0.9',
-              '--fit-text-max': '6rem',
-              '--fit-text-min': '2.5rem'
-            } as CSSProperties
-          }
-        >
-          <span>
-            <span>HERMES AGENT</span>
-          </span>
-          <span aria-hidden="true">HERMES AGENT</span>
-        </p>
-
-        <p className="m-0 text-center text-base leading-normal tracking-tight text-muted-foreground">
-          The agent that grows with you. We&rsquo;ll set things up in the
-          background &mdash; takes a few minutes.
-        </p>
-      </div>
-
-      <Button
-        onClick={() => void startInstall()}
-        size="lg"
-        className="group inline-flex items-center gap-2 px-6"
-      >
-        Install Hermes
-        <ArrowRight
-          size={18}
-          className="transition-transform group-hover:translate-x-0.5"
-        />
-      </Button>
-    </div>
-  )
-}
@@ -1,247 +0,0 @@
-import { atom, computed } from 'nanostores'
-import { listen, type UnlistenFn } from '@tauri-apps/api/event'
-import { invoke } from '@tauri-apps/api/core'
-
-/*
- * Bootstrap state store — single source of truth for installer screens.
- *
- * Lives in nanostores per the project's TypeScript guidelines (apps/desktop
- * AGENTS.md): "Prefer small nanostores over component state when state is
- * shared, reused, or read by distant UI."
- *
- * One channel from Rust ('bootstrap' event), discriminated by payload.type.
- * We translate those events into typed atom updates here so the rest of
- * the app only deals with React-friendly state.
- */
-
-// ---------------------------------------------------------------------------
-// Types — mirror src-tauri/src/events.rs
-// ---------------------------------------------------------------------------
-
-export interface StageInfo {
-  name: string
-  title: string
-  category: string
-  needs_user_input: boolean
-}
-
-export type StageState = 'running' | 'succeeded' | 'skipped' | 'failed'
-
-export interface StageRecord {
-  info: StageInfo
-  state: StageState | null
-  durationMs?: number
-  error?: string
-}
-
-export interface BootstrapStateModel {
-  status: 'idle' | 'running' | 'completed' | 'failed'
-  protocolVersion: number | null
-  stages: Record<string, StageRecord>
-  stageOrder: string[]
-  currentStage: string | null
-  installRoot: string | null
-  error: string | null
-  logs: Array<{ stage?: string; line: string }>
-}
-
-const INITIAL: BootstrapStateModel = {
-  status: 'idle',
-  protocolVersion: null,
-  stages: {},
-  stageOrder: [],
-  currentStage: null,
-  installRoot: null,
-  error: null,
-  logs: []
-}
-
-// ---------------------------------------------------------------------------
-// Atoms
-// ---------------------------------------------------------------------------
-
-export type Route = 'welcome' | 'progress' | 'success' | 'failure'
-
-export const $route = atom<Route>('welcome')
-export const $bootstrap = atom<BootstrapStateModel>(INITIAL)
-export const $logPath = atom<string | null>(null)
-export const $hermesHome = atom<string | null>(null)
-
-export const $progress = computed($bootstrap, (b) => {
-  const total = b.stageOrder.length
-  if (total === 0) return { done: 0, total: 0, fraction: 0 }
-  let done = 0
-  for (const name of b.stageOrder) {
-    const s = b.stages[name]?.state
-    if (s === 'succeeded' || s === 'skipped' || s === 'failed') done += 1
-  }
-  return { done, total, fraction: done / total }
-})
-
-// ---------------------------------------------------------------------------
-// Tauri event subscription
-// ---------------------------------------------------------------------------
-
-interface BootstrapManifestEvent {
-  type: 'manifest'
-  stages: StageInfo[]
-  protocolVersion: number | null
-}
-
-interface BootstrapStageEvent {
-  type: 'stage'
-  name: string
-  state: StageState
-  durationMs?: number
-  error?: string
-}
-
-interface BootstrapLogEvent {
-  type: 'log'
-  stage?: string
-  line: string
-}
-
-interface BootstrapCompleteEvent {
-  type: 'complete'
-  installRoot: string
-  marker: unknown
-}
-
-interface BootstrapFailedEvent {
-  type: 'failed'
-  stage?: string
-  error: string
-}
-
-type BootstrapEvent =
-  | BootstrapManifestEvent
-  | BootstrapStageEvent
-  | BootstrapLogEvent
-  | BootstrapCompleteEvent
-  | BootstrapFailedEvent
-
-let unlisten: UnlistenFn | null = null
-
-export async function initialize(): Promise<void> {
-  if (unlisten) return
-
-  // Pull static info on mount for the diagnostics footer.
-  try {
-    const [logPath, hermesHome] = await Promise.all([
-      invoke<string>('get_log_path'),
-      invoke<string>('get_hermes_home')
-    ])
-    $logPath.set(logPath)
-    $hermesHome.set(hermesHome)
-  } catch (err) {
-    console.warn('failed to fetch installer paths', err)
-  }
-
-  unlisten = await listen<BootstrapEvent>('bootstrap', (event) => {
-    const payload = event.payload
-    const cur = $bootstrap.get()
-    switch (payload.type) {
-      case 'manifest': {
-        const stages: Record<string, StageRecord> = {}
-        const order: string[] = []
-        for (const s of payload.stages) {
-          stages[s.name] = { info: s, state: null }
-          order.push(s.name)
-        }
-        $bootstrap.set({
-          ...cur,
-          status: 'running',
-          protocolVersion: payload.protocolVersion,
-          stages,
-          stageOrder: order,
-          currentStage: null,
-          installRoot: null,
-          error: null,
-          logs: []
-        })
-        $route.set('progress')
-        break
-      }
-      case 'stage': {
-        const existing = cur.stages[payload.name]
-        if (!existing) {
-          console.warn('stage event for unknown stage', payload.name)
-          break
-        }
-        const next: StageRecord = {
-          ...existing,
-          state: payload.state,
-          durationMs: payload.durationMs,
-          error: payload.error
-        }
-        $bootstrap.set({
-          ...cur,
-          stages: { ...cur.stages, [payload.name]: next },
-          currentStage:
-            payload.state === 'running' ? payload.name : cur.currentStage
-        })
-        break
-      }
-      case 'log': {
-        const logs = [...cur.logs, { stage: payload.stage, line: payload.line }]
-        // Keep the rolling buffer bounded so the UI doesn't get OOM'd
-        // during a long install (playwright chromium download is ~10k lines).
-        const trimmed = logs.length > 2000 ? logs.slice(-2000) : logs
-        $bootstrap.set({ ...cur, logs: trimmed })
-        break
-      }
-      case 'complete':
-        $bootstrap.set({
-          ...cur,
-          status: 'completed',
-          installRoot: payload.installRoot,
-          currentStage: null
-        })
-        $route.set('success')
-        break
-      case 'failed':
-        $bootstrap.set({
-          ...cur,
-          status: 'failed',
-          error: payload.error,
-          currentStage: null
-        })
-        $route.set('failure')
-        break
-    }
-  })
-}
-
-// ---------------------------------------------------------------------------
-// Actions
-// ---------------------------------------------------------------------------
-
-export async function startInstall(opts?: { branch?: string }): Promise<void> {
-  // Reset before kicking off so a retry from the failure screen clears
-  // the previous run's state.
-  $bootstrap.set(INITIAL)
-  $route.set('progress')
-  await invoke('start_bootstrap', {
-    args: {
-      commit: null,
-      branch: opts?.branch ?? null,
-      include_desktop: true,
-      hermes_home: null
-    }
-  })
-}
-
-export async function cancelInstall(): Promise<void> {
-  await invoke('cancel_bootstrap')
-}
-
-export async function launchHermesDesktop(): Promise<void> {
-  const installRoot = $bootstrap.get().installRoot
-  if (!installRoot) throw new Error('no install root')
-  await invoke('launch_hermes_desktop', { installRoot })
-}
-
-export async function openLogDir(): Promise<void> {
-  await invoke('open_log_dir')
-}
@@ -1,51 +0,0 @@
-/*
- * Hermes Setup — defer entirely to the desktop's styles.css.
- *
- * Rather than re-implement the Hermes design system (and inevitably drift
- * from it), we import apps/desktop/src/styles.css wholesale. The desktop
- * is the canonical source of truth for fonts, color tokens, button chrome,
- * scrollbars, layout utilities, and animations. Any change to the
- * Hermes look propagates here automatically with no copy-paste maintenance.
- *
- * Path resolution caveats:
- *   - Tailwind v4's `@import` resolves relative to this file. The desktop's
- *     `@source '../../../node_modules/...'` declarations therefore re-resolve
- *     against apps/bootstrap-installer/src/. Since both apps live two levels
- *     deep under the same repo root, `../../../node_modules` lands in the
- *     same place. (Verify if either app ever moves.)
- *   - The desktop's `@font-face url('../../../node_modules/...')` references
- *     are baked into the *imported* stylesheet; CSS resolves url()s relative
- *     to the file that contains them, so they continue to point at the
- *     correct node_modules path even from here.
- *
- * Forced light mode: the desktop ships with a runtime theme switcher
- * (ThemeProvider + applyTheme) that can flip to dark via document.documentElement.
- * The installer has no UI for theme switching, so we stay on the desktop's
- * default light surface (Nous-blue accent on near-white chrome).
- */
-@import '../../desktop/src/styles.css';
-
-/* Installer-only additions: a fade-in animation and a warm radial glow
-   for the welcome screen. Everything else inherits from the desktop. */
-@keyframes hermes-fade-in {
-  from {
-    opacity: 0;
-    transform: translateY(4px);
-  }
-  to {
-    opacity: 1;
-    transform: translateY(0);
-  }
-}
-
-.hermes-fade-in {
-  animation: hermes-fade-in 0.45s ease-out both;
-}
-
-.hermes-glow {
-  background: radial-gradient(
-    ellipse at center,
-    color-mix(in srgb, var(--ui-warm) 18%, transparent) 0%,
-    transparent 60%
-  );
-}
@@ -1 +0,0 @@
-/// <reference types="vite/client" />
@@ -1,26 +0,0 @@
-{
-  "compilerOptions": {
-    "target": "ES2022",
-    "useDefineForClassFields": true,
-    "lib": ["ES2022", "DOM", "DOM.Iterable"],
-    "module": "ESNext",
-    "skipLibCheck": true,
-    "moduleResolution": "bundler",
-    "allowImportingTsExtensions": true,
-    "resolveJsonModule": true,
-    "isolatedModules": true,
-    "noEmit": true,
-    "jsx": "react-jsx",
-    "strict": true,
-    "noUnusedLocals": true,
-    "noUnusedParameters": true,
-    "esModuleInterop": true,
-    "noFallthroughCasesInSwitch": true,
-    "baseUrl": ".",
-    "paths": {
-      "@/*": ["src/*"]
-    }
-  },
-  "include": ["src"],
-  "references": [{ "path": "./tsconfig.node.json" }]
-}
@@ -1,11 +0,0 @@
-{
-  "compilerOptions": {
-    "composite": true,
-    "skipLibCheck": true,
-    "module": "ESNext",
-    "moduleResolution": "bundler",
-    "allowSyntheticDefaultImports": true,
-    "strict": true
-  },
-  "include": ["vite.config.ts"]
-}
@@ -1,46 +0,0 @@
-import { defineConfig } from 'vite'
-import react from '@vitejs/plugin-react'
-import tailwindcss from '@tailwindcss/vite'
-import path from 'node:path'
-
-// Hermes Setup — Tauri-targeted Vite config.
-//
-// Port 5175 keeps us out of the way of:
-//   apps/dashboard       (vite default 5173)
-//   apps/desktop dev     (5174 per its package.json)
-//
-// `clearScreen: false` is the Tauri convention — they spawn vite as a child
-// process and want our errors to stay visible.
-
-const host = process.env.TAURI_DEV_HOST
-
-export default defineConfig({
-  plugins: [react(), tailwindcss()],
-  resolve: {
-    alias: {
-      '@': path.resolve(__dirname, './src')
-    }
-  },
-  clearScreen: false,
-  server: {
-    port: 5175,
-    strictPort: true,
-    host: host || '127.0.0.1',
-    hmr: host
-      ? {
-          protocol: 'ws',
-          host,
-          port: 5176
-        }
-      : undefined,
-    watch: {
-      // Don't watch the Rust side — tauri-cli handles it.
-      ignored: ['**/src-tauri/**']
-    }
-  },
-  build: {
-    target: 'esnext',
-    outDir: 'dist',
-    emptyOutDir: true
-  }
-})
@@ -1,38 +0,0 @@
-import {
-  JsonRpcGatewayClient,
-  type ConnectionState,
-  type GatewayEvent,
-  type GatewayEventName,
-} from "@hermes/shared";
-
-import { HERMES_BASE_PATH } from "@/lib/api";
-
-export type { ConnectionState, GatewayEvent, GatewayEventName };
-
-/**
- * Browser wrapper for the shared tui_gateway JSON-RPC client.
- *
- * Dashboard resolves its token and host from the served page. Desktop uses the
- * same shared protocol client, but supplies an absolute wsUrl from Electron.
- */
-export class GatewayClient extends JsonRpcGatewayClient {
-  async connect(token?: string): Promise<void> {
-    const resolved = token ?? window.__HERMES_SESSION_TOKEN__ ?? "";
-    if (!resolved) {
-      throw new Error(
-        "Session token not available — page must be served by the Hermes dashboard",
-      );
-    }
-
-    const scheme = location.protocol === "https:" ? "wss:" : "ws:";
-    await super.connect(
-      `${scheme}//${location.host}${HERMES_BASE_PATH}/api/ws?token=${encodeURIComponent(resolved)}`,
-    );
-  }
-}
-
-declare global {
-  interface Window {
-    __HERMES_SESSION_TOKEN__?: string;
-  }
-}
@@ -1,11 +0,0 @@
-{
-  "arrowParens": "avoid",
-  "bracketSpacing": true,
-  "endOfLine": "auto",
-  "printWidth": 120,
-  "semi": false,
-  "singleQuote": true,
-  "tabWidth": 2,
-  "trailingComma": "none",
-  "useTabs": false
-}
@@ -1,284 +0,0 @@
-# Hermes Desktop
-
-Native Electron shell for Hermes. It packages the desktop renderer, a bundled Hermes source payload, and installer targets for macOS and Windows.
-
-## Setup
-
-Install workspace dependencies from the repo root so `apps/desktop`, `apps/dashboard`, and `apps/shared` stay linked:
-
-```bash
-npm install
-```
-
-For Python, you have two options:
-
-**Option A — let the desktop provision it for you (recommended for first-time setup):** just run `npm run dev`. On first launch the desktop creates a venv at `HERMES_HOME/hermes-agent/venv` and runs `pip install -e .` against the resolved Hermes source automatically. Requires Python 3.11+ on `PATH`.
-
-**Option B — share an existing CLI install:** if you already ran `scripts/install.ps1` / `scripts/install.sh`, that's the same layout the desktop uses. The desktop reuses your existing venv and editable install — no extra steps. See [Runtime Bootstrap](#runtime-bootstrap) below for details.
-
-If you're hacking on Hermes from a clone outside `HERMES_HOME/hermes-agent`, point the desktop at it explicitly:
-
-```bash
-HERMES_DESKTOP_HERMES_ROOT=/path/to/your/clone npm run dev
-```
-
-### Runtime prerequisites
-
-Hermes Desktop needs:
-
- **Python 3.11+** — for the agent runtime, dashboard backend, and tool execution. (required)
- **Git for Windows** (Windows only) — provides Git Bash, which Hermes' terminal tool calls directly. Linux and macOS already ship a system bash. (required)
- **ripgrep** — used by Hermes' `search_files` tool for fast `.gitignore`-aware file/content search. Recommended on all platforms; Hermes falls back to `grep`/`find` if missing (works but slower and noisier).
-
-The packaged Windows installer (`Hermes-*.exe`) detects all three at install time. Required items missing are auto-installed via `winget install -e --id Python.Python.3.11 --scope user` and `winget install -e --id Git.Git`. The recommended ripgrep is offered as `winget install -e --id BurntSushi.ripgrep.MSVC --scope user`. If `winget` isn't available the installer shows manual download URLs and lets you continue. The MSI installer (`Hermes-*.msi`) doesn't run the prereq page — enterprise deploys are expected to handle prereqs out-of-band.
-
-For dev (`npm run dev`) the Python and Git Bash checks happen at first launch via the Electron bootstrapper, which throws a clear error if either prereq is missing. Manual install commands you can run yourself:
-
-```powershell
-winget install -e --id Python.Python.3.11 --scope user
-winget install -e --id Git.Git
-winget install -e --id BurntSushi.ripgrep.MSVC --scope user
-```
-
-## Development
-
-```bash
-cd apps/desktop
-npm run dev
-```
-
-`npm run dev` starts Vite on `127.0.0.1:5174`, launches Electron, and lets Electron boot the Hermes backend (`hermes dashboard --no-open --tui`) on an open port in `9120-9199`. This path is for UI iteration and may still show Electron/dev identities in OS prompts.
-
-Useful overrides:
-
-```bash
-HERMES_DESKTOP_HERMES_ROOT=/path/to/hermes-agent npm run dev
-HERMES_DESKTOP_PYTHON=/path/to/python npm run dev
-HERMES_DESKTOP_CWD=/path/to/project npm run dev
-HERMES_DESKTOP_IGNORE_EXISTING=1 npm run dev
-HERMES_HOME=/tmp/throwaway-hermes-home npm run dev
-HERMES_DESKTOP_BOOT_FAKE=1 npm run dev
-HERMES_DESKTOP_BOOT_FAKE=1 HERMES_DESKTOP_BOOT_FAKE_STEP_MS=900 npm run dev
-```
-
-`HERMES_DESKTOP_IGNORE_EXISTING=1` skips any `hermes` CLI already on `PATH`, which is useful when testing the factory-image bootstrap path.
-
-`HERMES_HOME` overrides the install root (default: `%LOCALAPPDATA%\hermes` on Windows, `~/.hermes` elsewhere) — handy for sandboxed dev runs that shouldn't touch your real config.
-
-`HERMES_DESKTOP_BOOT_FAKE=1` adds deterministic per-phase delays to desktop startup so you can validate the startup overlay and progress bar. For convenience, `npm run dev:fake-boot` enables fake mode with defaults.
-
-On a fresh Hermes profile, Desktop shows a first-run setup overlay after boot. The overlay saves the minimum required provider credential (for example `OPENROUTER_API_KEY`, `ANTHROPIC_API_KEY`, or `OPENAI_API_KEY`) to the active Hermes `.env`, reloads the backend env, and then lets the user continue without opening Settings manually.
-
-## Dashboard Dev
-
-Run the Python dashboard backend with embedded chat enabled:
-
-```bash
-hermes dashboard --tui --no-open
-```
-
-For dashboard HMR, start Vite in another terminal:
-
-```bash
-cd apps/dashboard
-npm run dev
-```
-
-Open the Vite URL. The dev server proxies `/api`, `/api/pty`, and plugin assets to `http://127.0.0.1:9119` and fetches the live dashboard HTML so the ephemeral session token matches the running backend.
-
-## Build
-
-```bash
-npm run build
-npm run pack          # unpacked app at release/mac-<arch>/Hermes.app
-npm run dist:mac      # macOS DMG + zip
-npm run dist:mac:dmg  # DMG only
-npm run dist:mac:zip  # zip only
-npm run dist:win      # NSIS + MSI
-```
-
-Before packaging, the desktop app no longer bundles a copy of the Hermes Agent Python source. Instead, the packaged Electron app will fetch and install Hermes Agent at first launch via `scripts/install.ps1`'s stage protocol (Windows) — see the bootstrap flow documented in `electron/main.cjs`. macOS and Linux packaged builds are temporarily non-functional until `install.sh` gains the same stage protocol; dev workflows on all three platforms continue to work since they resolve a sibling source checkout.
-
-## Automated Releases
-
-Desktop installers are published by [`.github/workflows/desktop-release.yml`](../../.github/workflows/desktop-release.yml) with two channels:
-
- **Stable:** runs on published GitHub releases and uploads signed artifacts to that release tag.
- **Nightly:** runs on `main` pushes and updates the rolling `desktop-nightly` prerelease.
-
-The workflow injects a channel-aware desktop version at build time:
-
- stable: derived from the release tag (for example `v2026.5.5` -> `2026.5.5`)
- nightly: `0.0.0-nightly.YYYYMMDD.<sha>`
-
-Artifact names include channel, platform, and architecture:
-
-```text
-Hermes-<version>-<channel>-<platform>-<arch>.<ext>
-```
-
-Each run also publishes `SHA256SUMS-<platform>.txt` so installers can be verified.
-
-### Stable release gates
-
-Stable builds fail fast if signing credentials are missing:
-
- macOS signing + notarization: `CSC_LINK`, `CSC_KEY_PASSWORD`, `APPLE_API_KEY`, `APPLE_API_KEY_ID`, `APPLE_API_ISSUER`
- Windows signing: `WIN_CSC_LINK`, `WIN_CSC_KEY_PASSWORD`
-
-Stable macOS builds also validate stapling and Gatekeeper assessment in CI before upload.
-
-## Icons
-
-Desktop icons live in `assets/`:
-
- `assets/icon.icns`
- `assets/icon.ico`
- `assets/icon.png`
-
-The builder config points at `assets/icon`. Replace these files directly if the app icon changes.
-
-## Testing Install Paths
-
-Use the package-local test scripts from this directory:
-
-```bash
-npm run test:desktop:all
-npm run test:desktop:existing
-npm run test:desktop:fresh
-npm run test:desktop:dmg
-npm run test:desktop:platforms
-```
-
-`test:desktop:existing` builds the packaged app and opens it normally. It should use an existing `hermes` CLI if one is on `PATH`, preserving the user’s real `~/.hermes` config.
-
-`test:desktop:fresh` builds the packaged app and launches it in a throwaway fresh-install sandbox. It sets `HERMES_DESKTOP_IGNORE_EXISTING=1`, points Electron `userData` at a temp dir, points `HERMES_HOME` at a temp dir, and launches through the factory-image bootstrap path without touching your real desktop runtime or `~/.hermes`.
-
-`test:desktop:dmg` builds and opens the DMG.
-
-`test:desktop:platforms` runs platform bootstrap-path assertions, including:
- existing-CLI vs factory-image runtime path selection semantics
- WSL2 protection against Windows `.exe/.cmd/.bat/.ps1` overrides
- platform-specific runtime import checks (`winpty` vs `ptyprocess`)
-
-For fast reruns without rebuilding:
-
-```bash
-HERMES_DESKTOP_SKIP_BUILD=1 npm run test:desktop:fresh
-HERMES_DESKTOP_SKIP_BUILD=1 npm run test:desktop:existing
-HERMES_DESKTOP_SKIP_BUILD=1 npm run test:desktop:dmg
-```
-
-## Installing Locally
-
-```bash
-npm run dist:mac:dmg
-open release/Hermes-0.0.0-arm64.dmg
-```
-
-Drag `Hermes` to Applications. If testing repeated installs, replace the existing app.
-
-## Runtime Bootstrap
-
-Hermes Desktop shares its install layout with the CLI installers (`scripts/install.ps1`, `scripts/install.sh`) so a desktop-only user and a CLI-only user end up with the same files in the same places.
-
-### Where things live
-
-```text
-HERMES_HOME/                       # %LOCALAPPDATA%\hermes (Windows)
-                                   # ~/.hermes (macOS / Linux)
-├── hermes-agent/                  # ACTIVE_HERMES_ROOT — git checkout
-│   ├── .git/                      # canonical install is always a git checkout
-│   ├── hermes_cli/, agent/, ...   # Python source
-│   ├── pyproject.toml             # source of truth for deps
-│   ├── venv/                      # virtualenv (Scripts\python.exe on Windows,
-│   │                              #             bin/python elsewhere)
-│   └── .hermes-bootstrap-complete # marker: first-launch install.ps1 succeeded
-├── git/                           # PortableGit (Windows; installed by install.ps1)
-├── config.yaml                    # user config
-├── .env                           # API keys
-└── logs/
-    ├── desktop.log                # Electron-side boot log
-    ├── agent.log
-    ├── errors.log
-    └── gateway.log
-```
-
-The packaged installer ships only the Electron app — Hermes Agent itself is fetched and installed at first launch by running `scripts/install.ps1` (Windows) against the git ref baked into the .exe at build time (see `apps/desktop/scripts/write-build-stamp.cjs`).
-
-### Resolution order
-
-The desktop resolves a Hermes backend in this order:
-
-1. `HERMES_DESKTOP_HERMES_ROOT` — explicit dev override.
-2. Repo source root — only when running `npm run dev` from a checkout. Takes precedence over `HERMES_HOME/hermes-agent` so devs always run their local edits.
-3. `HERMES_HOME/hermes-agent` if the `.hermes-bootstrap-complete` marker is present. The marker attests that install.ps1 succeeded and the user finished initial configuration; we trust the install and skip the bootstrap flow on every launch after the first.
-4. Existing `hermes` CLI on PATH (skipped when `HERMES_DESKTOP_IGNORE_EXISTING=1`).
-5. Pip-installed `hermes_cli` module via system Python.
-6. None of the above → bootstrap-needed sentinel. The desktop's first-launch wizard runs `scripts/install.ps1` stages, then writes the marker on success.
-
-### First-launch flow on a packaged install
-
-1. `resolveHermesBackend()` returns `kind: 'bootstrap-needed'`.
-2. The renderer shows the install overlay; main fetches `scripts/install.ps1` from GitHub at the pinned commit (from `install-stamp.json`).
-3. Main drives `install.ps1 -Manifest` to get the stage list, then iterates `install.ps1 -Stage <name> -NonInteractive -Json` with live progress events to the renderer.
-4. On all stages succeeding, main writes `.hermes-bootstrap-complete` with `{ schemaVersion, pinnedCommit, pinnedBranch, completedAt, desktopVersion }`.
-5. Renderer hands off to the existing onboarding overlay (API key / model / persona).
-6. Subsequent launches see the marker and skip everything in steps 1-5.
-
-### Updates
-
-Once bootstrapped, the install is a real git checkout. Updates flow through the in-app update path (`applyUpdates()` → `git fetch && git pull --ff-only` against the configured branch) or `hermes update` from the CLI. Both check `pyproject.toml` drift and re-run `pip install -e .` only when needed.
-
-A user who installed via `scripts/install.ps1` directly (so `HERMES_HOME/hermes-agent/.git` exists but no `.hermes-bootstrap-complete` marker) is detected via resolver step 4 (their `hermes` CLI on PATH) and the desktop reuses their install without re-running the bootstrap.
-
-## Debugging
-
-Desktop boot logs are written to:
-
-```text
-HERMES_HOME/logs/desktop.log     # %LOCALAPPDATA%\hermes\logs\desktop.log on Windows
-                                  # ~/.hermes/logs/desktop.log on macOS / Linux
-```
-
-If the UI reports `Desktop boot failed`, check that log first. It includes the backend command output and recent Python traceback context.
-
-To force a fresh first-launch bootstrap (rare — useful for development / dogfooding the install flow):
-
-```bash
-# macOS / Linux
-rm "$HOME/.hermes/hermes-agent/.hermes-bootstrap-complete"
-
-# Windows (PowerShell)
-Remove-Item "$env:LOCALAPPDATA\hermes\hermes-agent\.hermes-bootstrap-complete"
-```
-
-For a full reset of just the Python venv (rare — usually only needed if the venv is broken):
-
-```bash
-# macOS / Linux
-rm -rf "$HOME/.hermes/hermes-agent/venv"
-
-# Windows (PowerShell)
-Remove-Item -Recurse -Force "$env:LOCALAPPDATA\hermes\hermes-agent\venv"
-```
-
-To reset stale macOS microphone permission prompts:
-
-```bash
-tccutil reset Microphone com.github.Electron
-tccutil reset Microphone com.nousresearch.hermes
-```
-
-## Verification
-
-Run before handing off installer changes:
-
-```bash
-npm run fix
-npm run type-check
-npm run lint
-npm run test:desktop:all
-```
-
-Current lint may report existing warnings, but it should exit with no errors.
@@ -1,21 +0,0 @@
-{
-  "$schema": "https://ui.shadcn.com/schema.json",
-  "style": "new-york",
-  "rsc": false,
-  "tsx": true,
-  "tailwind": {
-    "config": "",
-    "css": "src/styles.css",
-    "baseColor": "neutral",
-    "cssVariables": true,
-    "prefix": ""
-  },
-  "aliases": {
-    "components": "@/components",
-    "utils": "@/lib/utils",
-    "ui": "@/components/ui",
-    "lib": "@/lib",
-    "hooks": "@/hooks"
-  },
-  "iconLibrary": "lucide"
-}
@@ -1,106 +0,0 @@
-/**
- * backend-probes.cjs
- *
- * Cheap "does this candidate backend actually work" checks used by
- * resolveHermesBackend (main.cjs). The resolver walks a ladder of
- * candidates -- bootstrap marker, `hermes` on PATH, system Python with
- * hermes_cli installed -- and historically returned the first candidate
- * whose binary existed on disk. That assumption breaks when a user has
- * a pre-installed Python 3.11-3.13 (so findSystemPython() returns a
- * path) but no hermes_cli in its site-packages: the resolver hands back
- * a backend the spawn step can't actually run, and the user gets a
- * dead-on-arrival "ModuleNotFoundError: No module named 'hermes_cli'"
- * instead of the first-launch installer.
- *
- * These probes give the resolver a way to verify a candidate before
- * trusting it. Failure (non-zero exit, exception, timeout) means "skip
- * this rung, try the next one"; success means "spawn this for real."
- * Falling off the bottom of the ladder lands on the bootstrap-needed
- * sentinel, which is exactly what we want when nothing pre-existing
- * actually works.
- *
- * Both probes are deliberately fast and forgiving:
- *   - 5s timeout (a hung interpreter beats forever, but we still give
- *     slow disks / cold caches room to breathe)
- *   - stdio ignored (we only care about exit code; stdout/stderr are
- *     not surfaced to the user, just to recentHermesLog for forensics
- *     via the caller's catch block if it chooses)
- *   - any throw -> false (never propagate -- resolver wants a boolean)
- *
- * Kept in a standalone cjs module so it can be unit-tested with
- * `node --test` without dragging in the electron runtime (same pattern
- * as bootstrap-platform.cjs and hardening.cjs).
- */
-
-const { execFileSync } = require('node:child_process')
-
-const PROBE_TIMEOUT_MS = 5000
-
-/**
- * Return true iff `python -c "import hermes_cli"` exits 0.
- *
- * Used to gate the "fallback to system Python with hermes_cli installed"
- * rung of resolveHermesBackend. Without this, a system Python 3.11-3.13
- * registered in PEP 514 makes findSystemPython() succeed regardless of
- * whether hermes_cli has actually been pip-installed into its
- * site-packages -- and the resolver returns a backend that immediately
- * dies on spawn.
- *
- * @param {string} pythonPath - Absolute path to a python.exe / python.
- * @returns {boolean}
- */
-function canImportHermesCli(pythonPath) {
-  if (!pythonPath) return false
-  try {
-    execFileSync(pythonPath, ['-c', 'import hermes_cli'], {
-      stdio: 'ignore',
-      timeout: PROBE_TIMEOUT_MS,
-      windowsHide: true
-    })
-    return true
-  } catch {
-    return false
-  }
-}
-
-/**
- * Return true iff `<hermesCommand> --version` exits 0.
- *
- * Used to gate the "existing `hermes` on PATH" rung. Without this, a
- * stale hermes.cmd shim left behind by an uninstalled pip install (or
- * a half-built venv whose `hermes` entry-point points at a deleted
- * Python) survives findOnPath() and gets selected as the backend.
- *
- * We intentionally avoid invoking the command with the dashboard args
- * here -- `--version` is the cheapest "is this binary alive" smoke
- * test that every hermes_cli entry-point has supported since 0.1.
- *
- * @param {string} hermesCommand - Resolved absolute path to a hermes
- *   executable (or an interpreter+script wrapper).
- * @param {object} [opts]
- * @param {boolean} [opts.shell] - Whether to run through a shell. For
- *   .cmd/.bat shims on Windows execFileSync needs shell:true to find
- *   the cmd interpreter; mirrors the same flag isCommandScript() drives
- *   in resolveHermesBackend.
- * @returns {boolean}
- */
-function verifyHermesCli(hermesCommand, opts = {}) {
-  if (!hermesCommand) return false
-  try {
-    execFileSync(hermesCommand, ['--version'], {
-      stdio: 'ignore',
-      timeout: PROBE_TIMEOUT_MS,
-      shell: Boolean(opts.shell),
-      windowsHide: true
-    })
-    return true
-  } catch {
-    return false
-  }
-}
-
-module.exports = {
-  canImportHermesCli,
-  verifyHermesCli,
-  PROBE_TIMEOUT_MS
-}
@@ -1,80 +0,0 @@
-/**
- * Tests for electron/backend-probes.cjs.
- *
- * Run with: node --test electron/backend-probes.test.cjs
- * (Wired into npm test:desktop:platforms in package.json.)
- */
-
-const test = require('node:test')
-const assert = require('node:assert/strict')
-const fs = require('node:fs')
-const os = require('node:os')
-const path = require('node:path')
-
-const { canImportHermesCli, verifyHermesCli } = require('./backend-probes.cjs')
-
-// Resolve the host's own Node binary -- guaranteed to be on disk and
-// runnable. We use it as both a stand-in for "a python that doesn't
-// have hermes_cli" (since `node -c "import hermes_cli"` will exit
-// non-zero) and as a way to script verifyHermesCli's success path
-// (a tiny script we write to disk that exits 0 on --version).
-const NODE_BIN = process.execPath
-
-test('canImportHermesCli returns false when path is falsy', () => {
-  assert.equal(canImportHermesCli(''), false)
-  assert.equal(canImportHermesCli(null), false)
-  assert.equal(canImportHermesCli(undefined), false)
-})
-
-test('canImportHermesCli returns false when interpreter cannot run -c', () => {
-  // node IS an interpreter, but `node -c "import hermes_cli"` is a
-  // SyntaxError -- different exit reason from a real Python's
-  // ModuleNotFoundError, but the predicate is "exit 0 or not" and
-  // both land on "not", which is exactly what we want for the
-  // resolver fall-through.
-  assert.equal(canImportHermesCli(NODE_BIN), false)
-})
-
-test('canImportHermesCli returns false when binary does not exist', () => {
-  const ghost = path.join(os.tmpdir(), 'hermes-probes-ghost-' + Date.now() + '.exe')
-  assert.equal(canImportHermesCli(ghost), false)
-})
-
-test('verifyHermesCli returns false when command is falsy', () => {
-  assert.equal(verifyHermesCli(''), false)
-  assert.equal(verifyHermesCli(null), false)
-  assert.equal(verifyHermesCli(undefined), false)
-})
-
-test('verifyHermesCli returns false when binary does not exist', () => {
-  const ghost = path.join(os.tmpdir(), 'hermes-probes-ghost-' + Date.now() + '.exe')
-  assert.equal(verifyHermesCli(ghost), false)
-})
-
-test('verifyHermesCli returns true when --version exits 0', () => {
-  // Write a tiny script that exits 0 regardless of args, then invoke
-  // it through node. This stands in for a working hermes binary --
-  // verifyHermesCli only cares about the exit code.
-  const scriptPath = path.join(os.tmpdir(), `hermes-probes-ok-${Date.now()}-${process.pid}.cjs`)
-  fs.writeFileSync(scriptPath, 'process.exit(0)\n')
-  try {
-    // Use node as the launcher and our script as the "command". Pass
-    // shell:false (default) -- node is a real binary, no shim.
-    // execFileSync passes ['--version'] as args, which node ignores
-    // gracefully (well, it prints its version and exits 0, which is
-    // perfect -- exit code 0 is the only signal we read).
-    assert.equal(verifyHermesCli(NODE_BIN), true)
-  } finally {
-    try {
-      fs.unlinkSync(scriptPath)
-    } catch {}
-  }
-})
-
-test('verifyHermesCli swallows timeouts (does not throw)', () => {
-  // We can't easily provoke a real 5s hang in CI without slowing the
-  // suite, but we CAN confirm that an invocation that DOES throw
-  // (because the binary is missing) returns false rather than
-  // propagating. Same code path the timeout case takes.
-  assert.equal(verifyHermesCli('/definitely/not/a/real/binary/anywhere'), false)
-})
@@ -1,30 +0,0 @@
-function isWslEnvironment(env = process.env, platform = process.platform) {
-  if (platform !== 'linux') return false
-  return Boolean(env.WSL_DISTRO_NAME || env.WSL_INTEROP)
-}
-
-function isWindowsBinaryPathInWsl(filePath, options = {}) {
-  const isWsl = options.isWsl ?? isWslEnvironment(options.env, options.platform)
-  if (!isWsl) return false
-
-  const normalized = String(filePath || '')
-    .replace(/\\/g, '/')
-    .toLowerCase()
-
-  return (
-    normalized.endsWith('.exe') ||
-    normalized.endsWith('.cmd') ||
-    normalized.endsWith('.bat') ||
-    normalized.endsWith('.ps1')
-  )
-}
-
-function bundledRuntimeImportCheck(platform = process.platform) {
-  return platform === 'win32' ? 'import fastapi, uvicorn, winpty' : 'import fastapi, uvicorn, ptyprocess'
-}
-
-module.exports = {
-  bundledRuntimeImportCheck,
-  isWindowsBinaryPathInWsl,
-  isWslEnvironment
-}
@@ -1,52 +0,0 @@
-const assert = require('node:assert/strict')
-const fs = require('node:fs')
-const path = require('node:path')
-const test = require('node:test')
-
-const { bundledRuntimeImportCheck, isWindowsBinaryPathInWsl, isWslEnvironment } = require('./bootstrap-platform.cjs')
-
-test('isWslEnvironment detects WSL2 env vars on linux', () => {
-  assert.equal(isWslEnvironment({ WSL_DISTRO_NAME: 'Ubuntu' }, 'linux'), true)
-  assert.equal(isWslEnvironment({ WSL_INTEROP: '/run/WSL/123_interop' }, 'linux'), true)
-  assert.equal(isWslEnvironment({}, 'linux'), false)
-  assert.equal(isWslEnvironment({ WSL_DISTRO_NAME: 'Ubuntu' }, 'darwin'), false)
-})
-
-test('isWindowsBinaryPathInWsl blocks Windows binary types on WSL', () => {
-  assert.equal(isWindowsBinaryPathInWsl('/mnt/c/Tools/hermes.exe', { isWsl: true }), true)
-  assert.equal(isWindowsBinaryPathInWsl('/mnt/c/Tools/hermes.cmd', { isWsl: true }), true)
-  assert.equal(isWindowsBinaryPathInWsl('/mnt/c/Tools/hermes.bat', { isWsl: true }), true)
-  assert.equal(isWindowsBinaryPathInWsl('/mnt/c/Tools/install.ps1', { isWsl: true }), true)
-  assert.equal(isWindowsBinaryPathInWsl('/usr/local/bin/hermes', { isWsl: true }), false)
-  assert.equal(isWindowsBinaryPathInWsl('/mnt/c/Tools/hermes.exe', { isWsl: false }), false)
-})
-
-test('bundledRuntimeImportCheck selects platform-specific import checks', () => {
-  assert.equal(bundledRuntimeImportCheck('win32'), 'import fastapi, uvicorn, winpty')
-  assert.equal(bundledRuntimeImportCheck('darwin'), 'import fastapi, uvicorn, ptyprocess')
-  assert.equal(bundledRuntimeImportCheck('linux'), 'import fastapi, uvicorn, ptyprocess')
-})
-
-test('packaged electron entrypoints do not require unpackaged npm modules', () => {
-  const electronDir = __dirname
-  const entrypoints = ['main.cjs', 'preload.cjs', 'bootstrap-platform.cjs']
-  // - electron: provided by the electron runtime, always resolvable in packaged builds.
-  // - node-pty: hoisted by workspace dedup AND shipped via extraResources to
-  //   resources/native-deps/node-pty (see scripts/stage-native-deps.cjs). main.cjs
-  //   has a try/catch fallback at line ~38 that resolves the staged copy when the
-  //   bare require fails in the packaged asar, so the bare require itself is by
-  //   design rather than an oversight.
-  const allowedBareRequires = new Set(['electron', 'node-pty'])
-  const requirePattern = /require\(['"]([^'"]+)['"]\)/g
-
-  for (const entrypoint of entrypoints) {
-    const source = fs.readFileSync(path.join(electronDir, entrypoint), 'utf8')
-    const bareRequires = Array.from(source.matchAll(requirePattern))
-      .map(match => match[1])
-      .filter(specifier => !specifier.startsWith('node:'))
-      .filter(specifier => !specifier.startsWith('.'))
-      .filter(specifier => !allowedBareRequires.has(specifier))
-
-    assert.deepEqual(bareRequires, [], `${entrypoint} has unpackaged runtime requires`)
-  }
-})
@@ -1,466 +0,0 @@
-'use strict'
-
-/**
- * bootstrap-runner.cjs
- *
- * Drives apps/desktop's first-launch install of Hermes Agent by spawning
- * scripts/install.ps1 stage-by-stage and streaming progress events back to
- * the renderer.
- *
- * Wired from electron/main.cjs:
- *   const { runBootstrap } = require('./bootstrap-runner.cjs')
- *   const result = await runBootstrap({
- *     installStamp,        // INSTALL_STAMP from main.cjs (may be null in dev)
- *     activeRoot,          // ACTIVE_HERMES_ROOT
- *     sourceRepoRoot,      // SOURCE_REPO_ROOT (for dev install.ps1 lookup)
- *     hermesHome,          // HERMES_HOME
- *     logRoot,             // HERMES_HOME/logs
- *     emit: ev => {...}    // event sink (sender.send or similar)
- *   })
- *
- * Emits events with shape:
- *   { type: 'manifest',  stages: [{name, title, category, needs_user_input}, ...] }
- *   { type: 'stage',     name, state: 'running'|'succeeded'|'skipped'|'failed',
- *                        json?, durationMs?, error? }
- *   { type: 'log',       stage?, line }      // raw line from install.ps1
- *   { type: 'complete',  marker: <written marker payload> }
- *   { type: 'failed',    stage?, error }     // bootstrap aborted
- *
- * Resolves with the same shape as the final 'complete' or 'failed' event so
- * callers can await either way.
- *
- * NOT implemented yet (deferred to Phase 1E / 1F):
- *   - User-facing retry / cancel from the renderer (event channels exist;
- *     no UI consumes them yet)
- *   - macOS / Linux install.sh equivalent
- */
-
-const fs = require('node:fs')
-const fsp = require('node:fs/promises')
-const path = require('node:path')
-const https = require('node:https')
-const { spawn } = require('node:child_process')
-
-const STAMP_COMMIT_RE = /^[0-9a-f]{7,40}$/i
-
-// Stages flagged needs_user_input=true in the manifest are skipped by the
-// runner (passed -NonInteractive to install.ps1, which the install script
-// itself handles by emitting skipped=true frames). The renderer / 1E onboarding
-// overlay takes over for those concerns (API keys, model, persona, gateway).
-// We let install.ps1's own -NonInteractive logic drive this rather than
-// filtering client-side -- single source of truth.
-
-// ---------------------------------------------------------------------------
-// install.ps1 source resolution
-// ---------------------------------------------------------------------------
-
-function resolveLocalInstallScript(sourceRepoRoot) {
-  if (!sourceRepoRoot) return null
-  const candidate = path.join(sourceRepoRoot, 'scripts', 'install.ps1')
-  try {
-    fs.accessSync(candidate, fs.constants.R_OK)
-    return candidate
-  } catch {
-    return null
-  }
-}
-
-function bootstrapCacheDir(hermesHome) {
-  return path.join(hermesHome, 'bootstrap-cache')
-}
-
-function cachedScriptPath(hermesHome, commit) {
-  return path.join(bootstrapCacheDir(hermesHome), `install-${commit}.ps1`)
-}
-
-function downloadInstallScript(commit, destPath) {
-  // Fetch from GitHub raw at the pinned commit. The raw URL with a SHA
-  // is immutable (unlike a branch ref), so we don't need integrity
-  // verification beyond "did the file we wrote pass a syntax probe."
-  const url = `https://raw.githubusercontent.com/NousResearch/hermes-agent/${commit}/scripts/install.ps1`
-  return new Promise((resolve, reject) => {
-    fs.mkdirSync(path.dirname(destPath), { recursive: true })
-    const tmpPath = destPath + '.tmp'
-    const out = fs.createWriteStream(tmpPath)
-    https
-      .get(url, res => {
-        if (res.statusCode === 301 || res.statusCode === 302) {
-          // GitHub raw shouldn't redirect for a SHA URL, but follow once
-          // defensively.
-          out.close()
-          fs.unlinkSync(tmpPath)
-          https
-            .get(res.headers.location, res2 => {
-              if (res2.statusCode !== 200) {
-                reject(new Error(`Failed to download install.ps1: HTTP ${res2.statusCode} from redirect ${res.headers.location}`))
-                return
-              }
-              const out2 = fs.createWriteStream(tmpPath)
-              res2.pipe(out2)
-              out2.on('finish', () => {
-                out2.close()
-                fs.renameSync(tmpPath, destPath)
-                resolve(destPath)
-              })
-              out2.on('error', reject)
-            })
-            .on('error', reject)
-          return
-        }
-        if (res.statusCode !== 200) {
-          out.close()
-          try {
-            fs.unlinkSync(tmpPath)
-          } catch {}
-          reject(new Error(`Failed to download install.ps1: HTTP ${res.statusCode} from ${url}`))
-          return
-        }
-        res.pipe(out)
-        out.on('finish', () => {
-          out.close()
-          fs.renameSync(tmpPath, destPath)
-          resolve(destPath)
-        })
-        out.on('error', err => {
-          try {
-            fs.unlinkSync(tmpPath)
-          } catch {}
-          reject(err)
-        })
-      })
-      .on('error', err => {
-        try {
-          fs.unlinkSync(tmpPath)
-        } catch {}
-        reject(err)
-      })
-  })
-}
-
-async function resolveInstallScript({ installStamp, sourceRepoRoot, hermesHome, emit }) {
-  // 1. Dev shortcut: prefer a local checkout's install.ps1 so we can iterate
-  //    without pushing. SOURCE_REPO_ROOT comes from main.cjs (path.resolve
-  //    of APP_ROOT/../..).
-  const localScript = resolveLocalInstallScript(sourceRepoRoot)
-  if (localScript) {
-    emit({ type: 'log', line: `[bootstrap] using local install.ps1 at ${localScript}` })
-    return { path: localScript, source: 'local' }
-  }
-
-  // 2. Packaged path: download from GitHub at the pinned commit (1B's stamp).
-  if (!installStamp || !installStamp.commit || !STAMP_COMMIT_RE.test(installStamp.commit)) {
-    throw new Error(
-      'Cannot resolve install.ps1: no SOURCE_REPO_ROOT and no install stamp. ' +
-        'This packaged build was produced without a valid build-time stamp.'
-    )
-  }
-
-  const cached = cachedScriptPath(hermesHome, installStamp.commit)
-  try {
-    await fsp.access(cached, fs.constants.R_OK)
-    emit({ type: 'log', line: `[bootstrap] using cached install.ps1 for ${installStamp.commit.slice(0, 12)}` })
-    return { path: cached, source: 'cache', commit: installStamp.commit }
-  } catch {
-    // not cached; download
-  }
-
-  emit({ type: 'log', line: `[bootstrap] fetching install.ps1 for ${installStamp.commit.slice(0, 12)} from GitHub` })
-  await downloadInstallScript(installStamp.commit, cached)
-  emit({ type: 'log', line: `[bootstrap] saved to ${cached}` })
-  return { path: cached, source: 'download', commit: installStamp.commit }
-}
-
-// ---------------------------------------------------------------------------
-// powershell wrapper
-// ---------------------------------------------------------------------------
-
-function spawnPowerShell(scriptPath, args, { emit, stageName, abortSignal, hermesHome } = {}) {
-  return new Promise((resolve, reject) => {
-    const ps = process.platform === 'win32' ? 'powershell.exe' : 'pwsh'
-    const fullArgs = ['-NoProfile', '-ExecutionPolicy', 'Bypass', '-File', scriptPath, ...args]
-
-    const child = spawn(ps, fullArgs, {
-      stdio: ['ignore', 'pipe', 'pipe'],
-      env: {
-        ...process.env,
-        // Pass HERMES_HOME through so install.ps1 respects the caller's
-        // choice rather than re-computing the default.
-        HERMES_HOME: hermesHome || process.env.HERMES_HOME || ''
-      }
-    })
-
-    let stdout = ''
-    let stderr = ''
-    let killed = false
-
-    const onAbort = () => {
-      killed = true
-      try {
-        child.kill('SIGTERM')
-      } catch {}
-    }
-    if (abortSignal) {
-      if (abortSignal.aborted) {
-        onAbort()
-      } else {
-        abortSignal.addEventListener('abort', onAbort, { once: true })
-      }
-    }
-
-    child.stdout.setEncoding('utf8')
-    child.stderr.setEncoding('utf8')
-
-    // Stream stdout line-by-line so the renderer sees progress in real time.
-    let stdoutBuf = ''
-    child.stdout.on('data', chunk => {
-      stdout += chunk
-      stdoutBuf += chunk
-      let nl
-      while ((nl = stdoutBuf.indexOf('\n')) !== -1) {
-        const line = stdoutBuf.slice(0, nl).replace(/\r$/, '')
-        stdoutBuf = stdoutBuf.slice(nl + 1)
-        if (line) emit && emit({ type: 'log', stage: stageName, line })
-      }
-    })
-
-    let stderrBuf = ''
-    child.stderr.on('data', chunk => {
-      stderr += chunk
-      stderrBuf += chunk
-      let nl
-      while ((nl = stderrBuf.indexOf('\n')) !== -1) {
-        const line = stderrBuf.slice(0, nl).replace(/\r$/, '')
-        stderrBuf = stderrBuf.slice(nl + 1)
-        if (line) emit && emit({ type: 'log', stage: stageName, line: `stderr: ${line}` })
-      }
-    })
-
-    child.on('error', err => {
-      if (abortSignal) abortSignal.removeEventListener('abort', onAbort)
-      reject(err)
-    })
-
-    child.on('close', (code, signal) => {
-      if (abortSignal) abortSignal.removeEventListener('abort', onAbort)
-      // Flush any trailing bytes
-      if (stdoutBuf) emit && emit({ type: 'log', stage: stageName, line: stdoutBuf })
-      if (stderrBuf) emit && emit({ type: 'log', stage: stageName, line: `stderr: ${stderrBuf}` })
-      resolve({ stdout, stderr, code, signal, killed })
-    })
-  })
-}
-
-// ---------------------------------------------------------------------------
-// Manifest + stage dispatch
-// ---------------------------------------------------------------------------
-
-// Build the install.ps1 pin args (-Commit / -Branch) from the install-stamp
-// so the repository stage clones the exact SHA the .exe was tested with
-// instead of falling back to install.ps1's default ($Branch = "main").
-function buildPinArgs(installStamp) {
-  const args = []
-  if (installStamp && installStamp.commit) {
-    args.push('-Commit', installStamp.commit)
-  }
-  if (installStamp && installStamp.branch) {
-    args.push('-Branch', installStamp.branch)
-  }
-  return args
-}
-
-async function fetchManifest({ scriptPath, emit, hermesHome, installStamp }) {
-  const pinArgs = buildPinArgs(installStamp)
-  const result = await spawnPowerShell(scriptPath, ['-Manifest', ...pinArgs], {
-    emit,
-    stageName: '__manifest__',
-    hermesHome
-  })
-  if (result.code !== 0) {
-    throw new Error(`install.ps1 -Manifest failed: exit ${result.code}\n${result.stderr || result.stdout}`)
-  }
-  // The manifest is the LAST JSON line on stdout (install.ps1 may print
-  // banner / info lines first depending on Console.OutputEncoding effects).
-  // Find the last line that parses as JSON with a `stages` field.
-  const lines = result.stdout.split(/\r?\n/).filter(Boolean)
-  for (let i = lines.length - 1; i >= 0; i--) {
-    try {
-      const parsed = JSON.parse(lines[i])
-      if (parsed && Array.isArray(parsed.stages)) {
-        return parsed
-      }
-    } catch {}
-  }
-  throw new Error(`install.ps1 -Manifest produced no parseable JSON payload\n${result.stdout}`)
-}
-
-// Parse the JSON result frame from a stage run. The protocol guarantees
-// exactly one JSON line per stage in -Json or -Stage mode (post #27224 fix
-// for the double-emit bug we addressed in the install.ps1 PR).
-function parseStageResult(stdout) {
-  const lines = stdout.split(/\r?\n/).filter(Boolean)
-  for (let i = lines.length - 1; i >= 0; i--) {
-    try {
-      const parsed = JSON.parse(lines[i])
-      if (parsed && typeof parsed.ok === 'boolean' && typeof parsed.stage === 'string') {
-        return parsed
-      }
-    } catch {}
-  }
-  return null
-}
-
-async function runStage({ scriptPath, stage, emit, hermesHome, abortSignal, installStamp }) {
-  const startedAt = Date.now()
-  emit({ type: 'stage', name: stage.name, state: 'running' })
-
-  const pinArgs = buildPinArgs(installStamp)
-  const result = await spawnPowerShell(
-    scriptPath,
-    ['-Stage', stage.name, '-NonInteractive', '-Json', ...pinArgs],
-    { emit, stageName: stage.name, abortSignal, hermesHome }
-  )
-
-  const durationMs = Date.now() - startedAt
-
-  if (result.killed) {
-    const ev = { type: 'stage', name: stage.name, state: 'failed', durationMs, error: 'cancelled by user' }
-    emit(ev)
-    return ev
-  }
-
-  const json = parseStageResult(result.stdout)
-
-  if (!json) {
-    const ev = {
-      type: 'stage',
-      name: stage.name,
-      state: 'failed',
-      durationMs,
-      error: `install.ps1 -Stage ${stage.name} produced no JSON result frame (exit=${result.code})`,
-      json: null
-    }
-    emit(ev)
-    return ev
-  }
-
-  if (json.ok && json.skipped) {
-    const ev = { type: 'stage', name: stage.name, state: 'skipped', durationMs, json }
-    emit(ev)
-    return ev
-  }
-  if (json.ok) {
-    const ev = { type: 'stage', name: stage.name, state: 'succeeded', durationMs, json }
-    emit(ev)
-    return ev
-  }
-  const ev = { type: 'stage', name: stage.name, state: 'failed', durationMs, json, error: json.reason || `exit code ${result.code}` }
-  emit(ev)
-  return ev
-}
-
-// ---------------------------------------------------------------------------
-// Per-run log file
-// ---------------------------------------------------------------------------
-
-function openRunLog(logRoot) {
-  fs.mkdirSync(logRoot, { recursive: true })
-  const ts = new Date().toISOString().replace(/[:.]/g, '-')
-  const logPath = path.join(logRoot, `bootstrap-${ts}.log`)
-  const stream = fs.createWriteStream(logPath, { flags: 'a' })
-  return { path: logPath, stream }
-}
-
-// ---------------------------------------------------------------------------
-// Public entrypoint
-// ---------------------------------------------------------------------------
-
-async function runBootstrap(opts) {
-  const {
-    installStamp,
-    activeRoot,
-    sourceRepoRoot,
-    hermesHome,
-    logRoot,
-    onEvent,
-    abortSignal,
-    writeMarker // callback to write the bootstrap-complete marker; main.cjs provides
-  } = opts
-
-  const runLog = openRunLog(logRoot || path.join(hermesHome, 'logs'))
-
-  // Tee every event to the runLog AND the caller's onEvent. This gives us a
-  // forensic trail per bootstrap run AND lets the renderer subscribe live.
-  const emit = ev => {
-    try {
-      runLog.stream.write(JSON.stringify(ev) + '\n')
-    } catch {}
-    try {
-      if (typeof onEvent === 'function') onEvent(ev)
-    } catch (err) {
-      // Don't let a subscriber bug crash the bootstrap
-      runLog.stream.write(`emit error: ${err && err.message}\n`)
-    }
-  }
-
-  emit({
-    type: 'log',
-    line:
-      `[bootstrap] starting at ${new Date().toISOString()}; ` +
-      `activeRoot=${activeRoot}; ` +
-      `stamp=${installStamp ? installStamp.commit.slice(0, 12) : '<none>'}; ` +
-      `runLog=${runLog.path}`
-  })
-
-  try {
-    // 1. Resolve install.ps1
-    const scriptInfo = await resolveInstallScript({ installStamp, sourceRepoRoot, hermesHome, emit })
-
-    // 2. Fetch manifest
-    const manifest = await fetchManifest({ scriptPath: scriptInfo.path, emit, hermesHome, installStamp })
-    emit({
-      type: 'manifest',
-      stages: manifest.stages,
-      protocolVersion: manifest.protocol_version || manifest.protocolVersion || null
-    })
-
-    // 3. Iterate stages in order. Stages flagged needs_user_input are still
-    //    invoked -- install.ps1's own -NonInteractive handler in those stages
-    //    emits skipped=true. We trust the protocol rather than filtering
-    //    client-side.
-    for (const stage of manifest.stages) {
-      if (abortSignal && abortSignal.aborted) {
-        emit({ type: 'failed', error: 'bootstrap cancelled by user' })
-        return { ok: false, cancelled: true }
-      }
-      const ev = await runStage({ scriptPath: scriptInfo.path, stage, emit, hermesHome, abortSignal, installStamp })
-      if (ev.state === 'failed') {
-        emit({ type: 'failed', stage: stage.name, error: ev.error || 'stage failed' })
-        return { ok: false, failedStage: stage.name, error: ev.error }
-      }
-    }
-
-    // 4. Write the bootstrap-complete marker.
-    const markerPayload = {
-      pinnedCommit: installStamp ? installStamp.commit : null,
-      pinnedBranch: installStamp ? installStamp.branch : null
-    }
-    const marker = typeof writeMarker === 'function' ? writeMarker(markerPayload) : markerPayload
-    emit({ type: 'complete', marker })
-    return { ok: true, marker }
-  } catch (err) {
-    emit({ type: 'failed', error: err.message || String(err) })
-    return { ok: false, error: err.message || String(err) }
-  } finally {
-    try {
-      runLog.stream.end()
-    } catch {}
-  }
-}
-
-module.exports = {
-  runBootstrap,
-  // Exposed for testability
-  parseStageResult,
-  resolveLocalInstallScript,
-  cachedScriptPath
-}
@@ -1,12 +0,0 @@
-<?xml version="1.0" encoding="UTF-8"?>
-<!DOCTYPE plist PUBLIC "-//Apple//DTD PLIST 1.0//EN" "http://www.apple.com/DTDs/PropertyList-1.0.dtd">
-<plist version="1.0">
-<dict>
-  <key>com.apple.security.cs.allow-jit</key>
-  <true/>
-  <key>com.apple.security.cs.allow-unsigned-executable-memory</key>
-  <true/>
-  <key>com.apple.security.cs.disable-library-validation</key>
-  <true/>
-</dict>
-</plist>
@@ -1,14 +0,0 @@
-<?xml version="1.0" encoding="UTF-8"?>
-<!DOCTYPE plist PUBLIC "-//Apple//DTD PLIST 1.0//EN" "http://www.apple.com/DTDs/PropertyList-1.0.dtd">
-<plist version="1.0">
-<dict>
-  <key>com.apple.security.cs.allow-jit</key>
-  <true/>
-  <key>com.apple.security.cs.allow-unsigned-executable-memory</key>
-  <true/>
-  <key>com.apple.security.cs.disable-library-validation</key>
-  <true/>
-  <key>com.apple.security.device.audio-input</key>
-  <true/>
-</dict>
-</plist>
--- a/Show More
+++ b/Show More