chore: remove vendor-specific references from app_tools

fix: use 'is not None' checks for session/session_id, remove dead _EXECUTE_STRIP_KEYS
- 'if session:' drops empty dict {} which is schema-valid - 'if session_id:' drops empty string which shouldn't be silently eaten - _EXECUTE_STRIP_KEYS frozenset was defined but never referenced (handler uses allowlist approach instead)
2026-05-23 22:59:01 +05:30 · 2026-05-23 22:19:24 +05:30 · 2026-05-23 22:13:08 +05:30 · 2026-05-23 21:16:42 +05:30 · 2026-05-23 21:08:04 +05:30 · 2026-05-23 20:52:08 +05:30
256 changed files with 16771 additions and 2818 deletions
@@ -27,9 +27,9 @@ on:
 permissions:
  contents: read

-# Concurrency: push/release runs are NEVER cancelled so every merge gets its
-# own SHA-tagged image; :main and :latest are guarded separately by the
-# move-main and move-latest jobs.  PR runs reuse a PR-scoped group with
+# Concurrency: push/release runs are NEVER cancelled so every merge gets
+# its own :main or release-tagged image.  :latest is guarded separately
+# by the move-latest job.  PR runs reuse a PR-scoped group with
 # cancel-in-progress: true so rapid pushes to the same PR collapse to the
 # latest commit.
 concurrency:
@@ -92,10 +92,10 @@ jobs:
      # pattern for multi-runner multi-platform builds.
      #
      # We apply the OCI revision label here (and again on arm64) because
-      # the move-main / move-latest jobs read it off the linux/amd64
-      # sub-manifest config of the floating tag to decide whether it's safe
-      # to advance.  The label must be on each per-arch image — manifest
-      # lists themselves don't carry image config labels.
+      # the move-latest job reads it off the linux/amd64 sub-manifest
+      # config of the floating tag to decide whether it's safe to advance.
+      # The label must be on each per-arch image — manifest lists themselves
+      # don't carry image config labels.
      - name: Push amd64 by digest
        id: push
        if: github.event_name == 'push' && github.ref == 'refs/heads/main' || github.event_name == 'release'
@@ -208,8 +208,14 @@ jobs:
  # ---------------------------------------------------------------------------
  # Stitch both per-arch digests into a single tagged multi-arch manifest.
  # This is a registry-side operation — no building, no layer re-push —
-  # so it runs in ~30 seconds.  On main pushes it produces :sha-<sha>.
-  # On releases it produces :<release_tag_name>.
+  # so it runs in ~30 seconds.  On main pushes it produces :main; on
+  # releases it produces :<release_tag_name>.
+  #
+  # For main pushes the ancestor check runs BEFORE the manifest push so
+  # we never overwrite :main with an older commit.  The top-level
+  # concurrency group (`docker-${{ github.ref }}` with
+  # `cancel-in-progress: false`) already serialises runs per ref; the
+  # ancestor check is defense-in-depth.
  # ---------------------------------------------------------------------------
  merge:
    if: github.repository == 'NousResearch/hermes-agent' && (github.event_name == 'push' && github.ref == 'refs/heads/main' || github.event_name == 'release')
@@ -217,10 +223,15 @@ jobs:
    needs: [build-amd64, build-arm64]
    timeout-minutes: 10
    outputs:
-      pushed_sha_tag: ${{ steps.mark_pushed.outputs.pushed }}
      pushed_release_tag: ${{ steps.mark_release_pushed.outputs.pushed }}
      release_tag: ${{ steps.tag.outputs.tag }}
    steps:
+      - name: Checkout code
+        if: github.event_name == 'push' && github.ref == 'refs/heads/main'
+        uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd  # v6.0.2
+        with:
+          fetch-depth: 1000
+
      - name: Download digests
        uses: actions/download-artifact@d3f86a106a0bac45b974a628896c90dbdf5c8093  # v4
        with:
@@ -237,120 +248,19 @@ jobs:
          username: ${{ secrets.DOCKERHUB_USERNAME }}
          password: ${{ secrets.DOCKERHUB_TOKEN }}

-      # Compute the tag for this run.  Main pushes use sha-<sha> (so every
-      # commit gets its own immutable tag); releases use the release tag name.
-      - name: Compute tag
-        id: tag
-        run: |
-          if [ "${{ github.event_name }}" = "release" ]; then
-            echo "tag=${{ github.event.release.tag_name }}" >> "$GITHUB_OUTPUT"
-          else
-            echo "tag=sha-${{ github.sha }}" >> "$GITHUB_OUTPUT"
-          fi
-
-      - name: Create manifest list and push
-        working-directory: /tmp/digests
-        run: |
-          set -euo pipefail
-          # Build the arg array from each digest file (filename = the digest
-          # hex, with no sha256: prefix; empty file content, only the name
-          # matters).  Using an array avoids shellcheck SC2046 and keeps
-          # every digest a single argv token even under pathological names.
-          args=()
-          for digest_file in *; do
-            args+=("${IMAGE_NAME}@sha256:${digest_file}")
-          done
-          docker buildx imagetools create \
-            -t "${IMAGE_NAME}:${TAG}" \
-            "${args[@]}"
-        env:
-          IMAGE_NAME: ${{ env.IMAGE_NAME }}
-          TAG: ${{ steps.tag.outputs.tag }}
-
-      - name: Inspect image
-        run: |
-          docker buildx imagetools inspect "${IMAGE_NAME}:${TAG}"
-        env:
-          IMAGE_NAME: ${{ env.IMAGE_NAME }}
-          TAG: ${{ steps.tag.outputs.tag }}
-
-      # Signal to move-main that the SHA tag is live.  Only on main pushes;
-      # releases set pushed_release_tag instead.
-      - name: Mark SHA tag pushed
-        id: mark_pushed
-        if: github.event_name == 'push' && github.ref == 'refs/heads/main'
-        run: echo "pushed=true" >> "$GITHUB_OUTPUT"
-
-      # Signal to move-latest that the release tag is live.
-      - name: Mark release tag pushed
-        id: mark_release_pushed
-        if: github.event_name == 'release'
-        run: echo "pushed=true" >> "$GITHUB_OUTPUT"
-
-  # ---------------------------------------------------------------------------
-  # Move :main to point at the SHA tag the merge job pushed.
-  #
-  # :main is the floating tag that tracks the tip of the main branch.  Every
-  # merge to main retags :main forward.  Users who want "latest dev build"
-  # pull :main; users who want stable releases pull :latest.
-  #
-  # The real serialization guarantee comes from the top-level concurrency
-  # group (`docker-${{ github.ref }}` with `cancel-in-progress: false`),
-  # which ensures at most one workflow run for this ref executes at a time.
-  # That means two move-main steps for the same ref cannot overlap.
-  #
-  # This job has its own concurrency group as defense-in-depth: if the
-  # top-level group is ever loosened, queued move-mains will run serially
-  # in arrival order, each one running the ancestor check below and either
-  # advancing :main or skipping.  `cancel-in-progress: false` matches the
-  # top-level setting — we don't want rapid pushes to cancel a queued
-  # move-main, because the ancestor check is the real safety mechanism
-  # and queueing is cheap (move-main is a ~30s registry op).
-  #
-  # Combined with the ancestor check, this means :main only ever moves
-  # forward in git history.
-  # ---------------------------------------------------------------------------
-  move-main:
-    if: |
-      github.repository == 'NousResearch/hermes-agent'
-      && github.event_name == 'push'
-      && github.ref == 'refs/heads/main'
-      && needs.merge.outputs.pushed_sha_tag == 'true'
-    needs: merge
-    runs-on: ubuntu-latest
-    timeout-minutes: 10
-    concurrency:
-      group: docker-move-main-${{ github.ref }}
-      cancel-in-progress: false
-    steps:
-      - name: Checkout code
-        uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd  # v6.0.2
-        with:
-          fetch-depth: 1000
-
-      - name: Set up Docker Buildx
-        uses: docker/setup-buildx-action@8d2750c68a42422c14e847fe6c8ac0403b4cbd6f  # v3
-
-      - name: Log in to Docker Hub
-        uses: docker/login-action@4907a6ddec9925e35a0a9e82d7399ccc52663121  # v4.1.0
-        with:
-          username: ${{ secrets.DOCKERHUB_USERNAME }}
-          password: ${{ secrets.DOCKERHUB_TOKEN }}
-
      # Read the git revision label off the current :main manifest, then
-      # use `git merge-base --is-ancestor` to check whether our commit is a
-      # descendant of it.  If :main doesn't exist yet, or its label is
-      # missing, we treat that as "safe to publish".  If another run already
-      # advanced :main past us (or diverged), we skip and leave it alone.
+      # use `git merge-base --is-ancestor` to check whether our commit is
+      # a descendant of it.  If :main doesn't exist yet, or its label is
+      # missing, we treat that as "safe to publish".  If another run
+      # already advanced :main past us (or diverged), we skip and leave
+      # it alone.
      - name: Decide whether to move :main
+        if: github.event_name == 'push' && github.ref == 'refs/heads/main'
        id: main_check
        run: |
          set -euo pipefail
          image=nousresearch/hermes-agent

-          # Pull the JSON for the linux/amd64 sub-manifest's config and extract
-          # the OCI revision label with jq — Go template field access can't
-          # handle dots in map keys, so using json+jq is the robust route.
          image_json=$(
            docker buildx imagetools inspect "${image}:main" \
              --format '{{ json (index .Image "linux/amd64") }}' \
@@ -383,7 +293,6 @@ jobs:
            exit 0
          fi

-          # Make sure we have the :main commit locally for merge-base.
          if ! git cat-file -e "${current_sha}^{commit}" 2>/dev/null; then
            git fetch --no-tags --prune origin \
              "+refs/heads/main:refs/remotes/origin/main" \
@@ -396,7 +305,6 @@ jobs:
            exit 0
          fi

-          # Our SHA must be a descendant of the current :main to be safe.
          if git merge-base --is-ancestor "${current_sha}" "${GITHUB_SHA}"; then
            echo "Our commit is a descendant of :main — safe to advance."
            echo "push_main=true" >> "$GITHUB_OUTPUT"
@@ -405,19 +313,48 @@ jobs:
            echo "push_main=false" >> "$GITHUB_OUTPUT"
          fi

-      # Retag the already-pushed SHA manifest as :main.  This is a registry-
-      # side operation — no rebuild, no layer re-push — so it's quick and
-      # atomic per-tag.  The ancestor check above plus the cancel-in-progress
-      # concurrency on this job together guarantee we only ever move :main
-      # forward in git history.
-      - name: Move :main to this SHA
-        if: steps.main_check.outputs.push_main == 'true'
+      # Compute the tag for this run.  Main pushes tag directly as :main
+      # (no per-commit SHA tags); releases use the release tag name.
+      - name: Compute tag
+        id: tag
+        run: |
+          if [ "${{ github.event_name }}" = "release" ]; then
+            echo "tag=${{ github.event.release.tag_name }}" >> "$GITHUB_OUTPUT"
+          else
+            echo "tag=main" >> "$GITHUB_OUTPUT"
+          fi
+
+      # Gate the manifest push on the ancestor check for main pushes.
+      # For releases there is no gate — the check doesn't even run.
+      - name: Create manifest list and push
+        if: github.event_name != 'push' || steps.main_check.outputs.push_main == 'true'
+        working-directory: /tmp/digests
        run: |
          set -euo pipefail
-          image=nousresearch/hermes-agent
+          args=()
+          for digest_file in *; do
+            args+=("${IMAGE_NAME}@sha256:${digest_file}")
+          done
          docker buildx imagetools create \
-            --tag "${image}:main" \
-            "${image}:sha-${GITHUB_SHA}"
+            -t "${IMAGE_NAME}:${TAG}" \
+            "${args[@]}"
+        env:
+          IMAGE_NAME: ${{ env.IMAGE_NAME }}
+          TAG: ${{ steps.tag.outputs.tag }}
+
+      - name: Inspect image
+        if: github.event_name != 'push' || steps.main_check.outputs.push_main == 'true'
+        run: |
+          docker buildx imagetools inspect "${IMAGE_NAME}:${TAG}"
+        env:
+          IMAGE_NAME: ${{ env.IMAGE_NAME }}
+          TAG: ${{ steps.tag.outputs.tag }}
+
+      # Signal to move-latest that the release tag is live.
+      - name: Mark release tag pushed
+        id: mark_release_pushed
+        if: github.event_name == 'release'
+        run: echo "pushed=true" >> "$GITHUB_OUTPUT"

  # ---------------------------------------------------------------------------
  # Move :latest to point at the release tag the merge job pushed.
@@ -427,10 +364,10 @@ jobs:
  #
  # We still run an ancestor check against the existing :latest so that a
  # backport release on an older branch (e.g. patching v1.1.5 after v1.2.3
-  # is out) doesn't drag :latest backwards.  The check is the same shape as
-  # move-main: read the OCI revision label off the current :latest, look up
-  # that commit in git, and only advance if our release commit is a strict
-  # descendant.
+  # is out) doesn't drag :latest backwards.  The check is the same shape
+  # as the ancestor check in the merge job for :main: read the OCI
+  # revision label off the current :latest, look up that commit in git,
+  # and only advance if our release commit is a strict descendant.
  # ---------------------------------------------------------------------------
  move-latest:
    if: |
@@ -23,13 +23,24 @@ concurrency:
 jobs:
  test:
    runs-on: ubuntu-latest
-    timeout-minutes: 30
+    timeout-minutes: 60
    steps:
      - name: Checkout code
        uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd  # v6.0.2

-      - name: Install system dependencies
-        run: sudo apt-get update && sudo apt-get install -y ripgrep
+      - name: Install ripgrep (prebuilt binary)
+        run: |
+          set -euo pipefail
+          RG_VERSION=15.1.0
+          RG_SHA256=1c9297be4a084eea7ecaedf93eb03d058d6faae29bbc57ecdaf5063921491599
+          RG_TARBALL=ripgrep-${RG_VERSION}-x86_64-unknown-linux-musl.tar.gz
+          curl -sSfL -o "$RG_TARBALL" \
+            "https://github.com/BurntSushi/ripgrep/releases/download/${RG_VERSION}/${RG_TARBALL}"
+          echo "${RG_SHA256}  ${RG_TARBALL}" | sha256sum -c -
+          tar -xzf "$RG_TARBALL"
+          sudo mv "ripgrep-${RG_VERSION}-x86_64-unknown-linux-musl/rg" /usr/local/bin/rg
+          rm -rf "$RG_TARBALL" "ripgrep-${RG_VERSION}-x86_64-unknown-linux-musl"
+          rg --version

      - name: Install uv
        uses: astral-sh/setup-uv@d4b2f3b6ecc6e67c4457f6d3e41ec42d3d0fcb86  # v5
@@ -44,9 +55,26 @@ jobs:
          uv pip install -e ".[all,dev]"

      - name: Run tests
+        # Per-file isolation via scripts/run_tests_parallel.py: discovers
+        # every test_*.py file under tests/ (excluding integration/ + e2e/),
+        # then runs `python -m pytest <file>` in a freshly-spawned subprocess
+        # with bounded parallelism. No xdist, no shared workers, no
+        # module-level state leakage between files.
+        #
+        # Why per-file (not per-test): per-test spawn cost (~250ms × 17k
+        # tests = 70min CPU minimum) blew the wall-clock budget. Per-file
+        # spawn (~250ms × ~850 files = ~3.5min) fits while still giving
+        # every file a fresh interpreter — the only isolation boundary
+        # that matters in practice (cross-file leakage was the original
+        # flake source; intra-file is the test author's responsibility).
+        #
+        # Why drop xdist entirely: xdist's persistent workers accumulate
+        # state across files, which is exactly the leakage we wanted to
+        # fix. ThreadPoolExecutor + subprocess.run is ~60 lines and does
+        # the job with cleaner semantics.
        run: |
          source .venv/bin/activate
-          python -m pytest tests/ -q --ignore=tests/integration --ignore=tests/e2e --tb=short -n auto --timeout=30 --timeout-method=signal
+          python scripts/run_tests_parallel.py
        env:
          # Ensure tests don't accidentally call real APIs
          OPENROUTER_API_KEY: ""
@@ -60,8 +88,19 @@ jobs:
      - name: Checkout code
        uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd  # v6.0.2

-      - name: Install system dependencies
-        run: sudo apt-get update && sudo apt-get install -y ripgrep
+      - name: Install ripgrep (prebuilt binary)
+        run: |
+          set -euo pipefail
+          RG_VERSION=15.1.0
+          RG_SHA256=1c9297be4a084eea7ecaedf93eb03d058d6faae29bbc57ecdaf5063921491599
+          RG_TARBALL=ripgrep-${RG_VERSION}-x86_64-unknown-linux-musl.tar.gz
+          curl -sSfL -o "$RG_TARBALL" \
+            "https://github.com/BurntSushi/ripgrep/releases/download/${RG_VERSION}/${RG_TARBALL}"
+          echo "${RG_SHA256}  ${RG_TARBALL}" | sha256sum -c -
+          tar -xzf "$RG_TARBALL"
+          sudo mv "ripgrep-${RG_VERSION}-x86_64-unknown-linux-musl/rg" /usr/local/bin/rg
+          rm -rf "$RG_TARBALL" "ripgrep-${RG_VERSION}-x86_64-unknown-linux-musl"
+          rg --version

      - name: Install uv
        uses: astral-sh/setup-uv@d4b2f3b6ecc6e67c4457f6d3e41ec42d3d0fcb86  # v5
@@ -18,6 +18,7 @@ __pycache__/web_tools.cpython-310.pyc
 logs/
 data/
 .pytest_cache/
+.pytest-cache/
 tmp/
 temp_vision_images/
 hermes-*/*
@@ -1013,17 +1013,39 @@ def profile_env(tmp_path, monkeypatch):

 **ALWAYS use `scripts/run_tests.sh`** — do not call `pytest` directly. The script enforces
 hermetic environment parity with CI (unset credential vars, TZ=UTC, LANG=C.UTF-8,
-4 xdist workers matching GHA ubuntu-latest). Direct `pytest` on a 16+ core
-developer machine with API keys set diverges from CI in ways that have caused
-multiple "works locally, fails in CI" incidents (and the reverse).
+`-n auto` xdist workers, in-tree subprocess-isolation plugin). Direct `pytest`
+on a 16+ core developer machine with API keys set diverges from CI in ways
+that have caused multiple "works locally, fails in CI" incidents (and the reverse).

 ```bash
 scripts/run_tests.sh                                  # full suite, CI-parity
 scripts/run_tests.sh tests/gateway/                   # one directory
 scripts/run_tests.sh tests/agent/test_foo.py::test_x  # one test
 scripts/run_tests.sh -v --tb=long                     # pass-through pytest flags
+scripts/run_tests.sh --no-isolate tests/foo/          # disable subprocess isolation (faster, for debugging)
 ```

+### Subprocess-per-test isolation
+
+Every test runs in a freshly-spawned Python subprocess via the in-tree plugin
+at `tests/_isolate_plugin.py`. This means module-level dicts/sets and
+ContextVars from one test cannot leak into the next — the historic
+`_reset_module_state` autouse fixture is gone.
+
+Implementation notes:
+
+- The plugin uses `multiprocessing.get_context("spawn")`, which works on
+  Linux, macOS, and Windows alike (POSIX `fork` is not used).
+- Per-test overhead is ~0.5–1.0s (Python startup + pytest collection). xdist
+  parallelism amortizes this across cores; on a 20-core box the full suite
+  finishes in roughly the same wall time as before, but flake-free.
+- `isolate_timeout` (configured in `pyproject.toml`) caps each test at 30s.
+  Hangs are killed and surfaced as a failure report.
+- Pass `--no-isolate` to disable isolation — useful when debugging a single
+  test interactively, or when you specifically want to verify state leakage.
+- The plugin disables itself in child processes (sentinel envvar
+  `HERMES_ISOLATE_CHILD=1`), so there's no fork-bomb risk.
+
 ### Why the wrapper (and why the old "just call pytest" doesn't work)

 Five real sources of local-vs-CI drift the script closes:
@@ -1034,7 +1056,7 @@ Five real sources of local-vs-CI drift the script closes:
 | HOME / `~/.hermes/` | Your real config+auth.json | Temp dir per test |
 | Timezone | Local TZ (PDT etc.) | UTC |
 | Locale | Whatever is set | C.UTF-8 |
-| xdist workers | `-n auto` = all cores (20+ on a workstation) | `-n 4` matching CI |
+| xdist workers | `-n auto` = all cores | `-n auto` (safe — subprocess isolation prevents cross-worker flakes) |

 `tests/conftest.py` also enforces points 1-4 as an autouse fixture so ANY pytest
 invocation (including IDE integrations) gets hermetic behavior — but the wrapper
@@ -1042,15 +1064,21 @@ is belt-and-suspenders.

 ### Running without the wrapper (only if you must)

-If you can't use the wrapper (e.g. on Windows or inside an IDE that shells
-pytest directly), at minimum activate the venv and pass `-n 4`:
+If you can't use the wrapper (e.g. inside an IDE that shells pytest directly),
+at minimum activate the venv. The isolation plugin loads automatically from
+`addopts` in `pyproject.toml`, so you get the same per-test process isolation
+either way.

 ```bash
 source .venv/bin/activate   # or: source venv/bin/activate
-python -m pytest tests/ -q -n 4
+python -m pytest tests/ -q
 ```

-Worker count above 4 will surface test-ordering flakes that CI never sees.
+If you need to bypass isolation for fast feedback while debugging:
+
+```bash
+python -m pytest tests/agent/test_foo.py -q --no-isolate
+```

 Always run the full suite before pushing changes.

@@ -210,7 +210,7 @@ hermes-agent/
 | `~/.hermes/skills/` | All active skills (bundled + hub-installed + agent-created) |
 | `~/.hermes/memories/` | Persistent memory (MEMORY.md, USER.md) |
 | `~/.hermes/state.db` | SQLite session database |
-| `~/.hermes/sessions/` | JSON session logs |
+| `~/.hermes/sessions/` | Gateway routing index (`sessions.json`), request-dump breadcrumbs, gateway `*.jsonl` transcripts, and (optionally) per-session JSON snapshots when `sessions.write_json_snapshots: true` is set. The per-session snapshots are off by default; state.db is canonical. |
 | `~/.hermes/cron/` | Scheduled job data |
 | `~/.hermes/whatsapp/session/` | WhatsApp bridge credentials |

@@ -239,7 +239,7 @@ User message → AIAgent._run_agent_loop()

 - **Self-registering tools**: Each tool file calls `registry.register()` at import time. `model_tools.py` triggers discovery by importing all tool modules.
 - **Toolset grouping**: Tools are grouped into toolsets (`web`, `terminal`, `file`, `browser`, etc.) that can be enabled/disabled per platform.
- **Session persistence**: All conversations are stored in SQLite (`hermes_state.py`) with full-text search and unique session titles. JSON logs go to `~/.hermes/sessions/`.
+- **Session persistence**: All conversations are stored in SQLite (`hermes_state.py`) with full-text search and unique session titles. Per-session JSON snapshots in `~/.hermes/sessions/` were superseded by the SQLite store and are off by default; opt back in with `sessions.write_json_snapshots: true` if you have external tooling that consumes the JSON files directly.
 - **Ephemeral injection**: System prompts and prefill messages are injected at API call time, never persisted to the database or logs.
 - **Provider abstraction**: The agent works with any OpenAI-compatible API. Provider resolution happens at init time (Nous Portal OAuth, OpenRouter API key, or custom endpoint).
 - **Provider routing**: When using OpenRouter, `provider_routing` in config.yaml controls provider selection (sort by throughput/latency/price, allow/ignore specific providers, data retention policies). These are injected as `extra_body.provider` in API requests.
@@ -71,6 +71,71 @@ def _ra():
    return run_agent


+def _normalized_custom_base_url(value: Any) -> str:
+    if not isinstance(value, str):
+        return ""
+    return value.strip().rstrip("/")
+
+
+def _custom_provider_model_matches(agent_model: str, entry: Dict[str, Any]) -> bool:
+    provider_model = str(entry.get("model", "") or "").strip().lower()
+    if not provider_model:
+        return True
+    return provider_model == str(agent_model or "").strip().lower()
+
+
+def _custom_provider_extra_body_for_agent(
+    *,
+    provider: str,
+    model: str,
+    base_url: str,
+    custom_providers: List[Dict[str, Any]],
+) -> Optional[Dict[str, Any]]:
+    if (provider or "").strip().lower() != "custom":
+        return None
+
+    target_url = _normalized_custom_base_url(base_url)
+    if not target_url:
+        return None
+
+    fallback: Optional[Dict[str, Any]] = None
+    for entry in custom_providers or []:
+        if not isinstance(entry, dict):
+            continue
+        if _normalized_custom_base_url(entry.get("base_url")) != target_url:
+            continue
+        extra_body = entry.get("extra_body")
+        if not isinstance(extra_body, dict) or not extra_body:
+            continue
+        provider_model = str(entry.get("model", "") or "").strip()
+        if provider_model:
+            if _custom_provider_model_matches(model, entry):
+                return dict(extra_body)
+        elif fallback is None:
+            fallback = dict(extra_body)
+
+    return fallback
+
+
+def _merge_custom_provider_extra_body(agent, custom_providers: List[Dict[str, Any]]) -> None:
+    extra_body = _custom_provider_extra_body_for_agent(
+        provider=agent.provider,
+        model=agent.model,
+        base_url=agent.base_url,
+        custom_providers=custom_providers,
+    )
+    if not extra_body:
+        return
+
+    overrides = dict(getattr(agent, "request_overrides", {}) or {})
+    merged_extra_body = dict(extra_body)
+    existing_extra_body = overrides.get("extra_body")
+    if isinstance(existing_extra_body, dict):
+        merged_extra_body.update(existing_extra_body)
+    overrides["extra_body"] = merged_extra_body
+    agent.request_overrides = overrides
+
+
 def init_agent(
    agent,
    base_url: str = None,
@@ -901,7 +966,19 @@ def init_agent(
    hermes_home = get_hermes_home()
    agent.logs_dir = hermes_home / "sessions"
    agent.logs_dir.mkdir(parents=True, exist_ok=True)
-    agent.session_log_file = agent.logs_dir / f"session_{agent.session_id}.json"
+    # Per-session JSON snapshot writer (~/.hermes/sessions/session_{sid}.json)
+    # is opt-in via sessions.write_json_snapshots (default False).  state.db
+    # is canonical — the snapshot is only useful for external tooling that
+    # reads the JSON files directly.  See run_agent._save_session_log.
+    agent._session_json_enabled = False
+    try:
+        from hermes_cli.config import load_config as _load_sess_cfg
+        _sess_cfg = (_load_sess_cfg().get("sessions") or {})
+        agent._session_json_enabled = bool(_sess_cfg.get("write_json_snapshots", False))
+    except Exception:
+        pass
+    # logs_dir is retained unconditionally for request_dump_*.json (debug
+    # breadcrumb path written by agent_runtime_helpers.dump_api_request_debug).
    
    # Track conversation messages for session logging
    agent._session_messages: List[Dict[str, Any]] = []
@@ -1048,7 +1125,18 @@ def init_agent(
    # through _ra().get_tool_definitions()).  Duplicate function names cause
    # 400 errors on providers that enforce unique names (e.g. Xiaomi
    # MiMo via Nous Portal).
-    if agent._memory_manager and agent.tools is not None:
+    #
+    # Respect the platform's enabled_toolsets configuration (#5544):
+    #   enabled_toolsets is None        → no filter, inject (backward compat)
+    #   "memory" in enabled_toolsets    → user opted in, inject
+    #   otherwise (incl. [])            → user excluded memory, skip injection
+    #
+    # Without this gate, `platform_toolsets: telegram: []` still leaks memory
+    # provider tools (fact_store, etc.) into the tool surface — a 10x latency
+    # penalty on local models and a frequent trigger of tool-call loops.
+    if agent._memory_manager and agent.tools is not None and (
+        agent.enabled_toolsets is None or "memory" in agent.enabled_toolsets
+    ):
        _existing_tool_names = {
            t.get("function", {}).get("name")
            for t in agent.tools
@@ -1201,6 +1289,7 @@ def init_agent(
    # Store for reuse by _check_compression_model_feasibility (auxiliary
    # compression model context-length detection needs the same list).
    agent._custom_providers = _custom_providers
+    _merge_custom_provider_extra_body(agent, _custom_providers)

    # Check custom_providers per-model context_length
    if _config_context_length is None and _custom_providers:
@@ -1357,8 +1446,22 @@ def init_agent(
    # errors. Even with the cache fix, dedup is the right defense
    # against plugin paths that may register the same schemas via
    # ctx.register_tool(). Mirrors the memory tools dedup above.
+    #
+    # Respect the platform's enabled_toolsets configuration (#5544):
+    # context engine tools follow the same gating pattern as memory
+    # provider tools — without the gate, `platform_toolsets: telegram: []`
+    # would still leak lcm_* tools into the tool surface and incur the
+    # same local-model latency penalty.
    agent._context_engine_tool_names: set = set()
-    if hasattr(agent, "context_compressor") and agent.context_compressor and agent.tools is not None:
+    if (
+        hasattr(agent, "context_compressor")
+        and agent.context_compressor
+        and agent.tools is not None
+        and (
+            agent.enabled_toolsets is None
+            or "context_engine" in agent.enabled_toolsets
+        )
+    ):
        _existing_tool_names = {
            t.get("function", {}).get("name")
            for t in agent.tools
@@ -1869,6 +1869,77 @@ def copy_reasoning_content_for_api(agent, source_msg: dict, api_msg: dict) -> No



+def _iter_pool_sockets(client: Any):
+    """Yield raw sockets reachable from an OpenAI/httpx client pool.
+
+    httpcore 1.x stores the concrete HTTP11/HTTP2 connection under
+    ``conn._connection``; older versions exposed stream attributes directly
+    on the pool entry. Keep the traversal defensive because these are private
+    transport internals and vary across httpx/httpcore releases.
+    """
+    try:
+        http_client = getattr(client, "_client", None)
+        if http_client is None:
+            return
+        transport = getattr(http_client, "_transport", None)
+        if transport is None:
+            return
+        pool = getattr(transport, "_pool", None)
+        if pool is None:
+            return
+        connections = (
+            getattr(pool, "_connections", None)
+            or getattr(pool, "_pool", None)
+            or []
+        )
+    except Exception:
+        return
+
+    seen: set[int] = set()
+    for conn in list(connections):
+        candidates = [conn]
+        inner = getattr(conn, "_connection", None)
+        if inner is not None:
+            candidates.append(inner)
+        for candidate in candidates:
+            stream = (
+                getattr(candidate, "_network_stream", None)
+                or getattr(candidate, "_stream", None)
+            )
+            if stream is None:
+                continue
+            sock = getattr(stream, "_sock", None)
+            if sock is None:
+                get_extra_info = getattr(stream, "get_extra_info", None)
+                if callable(get_extra_info):
+                    try:
+                        sock = get_extra_info("socket")
+                    except Exception:
+                        sock = None
+            if sock is None:
+                wrapped = getattr(stream, "stream", None)
+                if wrapped is not None:
+                    sock = getattr(wrapped, "_sock", None)
+            if sock is None:
+                # anyio-backed streams expose the raw socket through
+                # SocketAttribute.raw_socket when available.
+                wrapped = getattr(stream, "_stream", None)
+                extra = getattr(wrapped, "extra", None)
+                if callable(extra):
+                    try:
+                        from anyio.abc import SocketAttribute
+                        sock = extra(SocketAttribute.raw_socket)
+                    except Exception:
+                        sock = None
+            if sock is None:
+                continue
+            marker = id(sock)
+            if marker in seen:
+                continue
+            seen.add(marker)
+            yield sock
+
+
 def cleanup_dead_connections(agent) -> bool:
    """Detect and clean up dead TCP connections on the primary client.

@@ -1882,36 +1953,8 @@ def cleanup_dead_connections(agent) -> bool:
    if client is None:
        return False
    try:
-        http_client = getattr(client, "_client", None)
-        if http_client is None:
-            return False
-        transport = getattr(http_client, "_transport", None)
-        if transport is None:
-            return False
-        pool = getattr(transport, "_pool", None)
-        if pool is None:
-            return False
-        connections = (
-            getattr(pool, "_connections", None)
-            or getattr(pool, "_pool", None)
-            or []
-        )
        dead_count = 0
-        for conn in list(connections):
-            # Check for connections that are idle but have closed sockets
-            stream = (
-                getattr(conn, "_network_stream", None)
-                or getattr(conn, "_stream", None)
-            )
-            if stream is None:
-                continue
-            sock = getattr(stream, "_sock", None)
-            if sock is None:
-                sock = getattr(stream, "stream", None)
-                if sock is not None:
-                    sock = getattr(sock, "_sock", None)
-            if sock is None:
-                continue
+        for sock in _iter_pool_sockets(client):
            # Probe socket health with a non-blocking recv peek
            import socket as _socket
            try:
@@ -2087,36 +2130,7 @@ def force_close_tcp_sockets(client: Any) -> int:

    closed = 0
    try:
-        http_client = getattr(client, "_client", None)
-        if http_client is None:
-            return 0
-        transport = getattr(http_client, "_transport", None)
-        if transport is None:
-            return 0
-        pool = getattr(transport, "_pool", None)
-        if pool is None:
-            return 0
-        # httpx uses httpcore connection pools; connections live in
-        # _connections (list) or _pool (list) depending on version.
-        connections = (
-            getattr(pool, "_connections", None)
-            or getattr(pool, "_pool", None)
-            or []
-        )
-        for conn in list(connections):
-            stream = (
-                getattr(conn, "_network_stream", None)
-                or getattr(conn, "_stream", None)
-            )
-            if stream is None:
-                continue
-            sock = getattr(stream, "_sock", None)
-            if sock is None:
-                sock = getattr(stream, "stream", None)
-                if sock is not None:
-                    sock = getattr(sock, "_sock", None)
-            if sock is None:
-                continue
+        for sock in _iter_pool_sockets(client):
            try:
                sock.shutdown(_socket.SHUT_RDWR)
            except OSError:
@@ -2154,5 +2168,6 @@ __all__ = [
    "cleanup_dead_connections",
    "extract_api_error_context",
    "apply_pending_steer_to_tool_results",
+    "_iter_pool_sockets",
    "force_close_tcp_sockets",
 ]
@@ -1606,182 +1606,155 @@ def _content_parts_to_anthropic_blocks(parts: Any) -> List[Dict[str, Any]]:
    return out


-def convert_messages_to_anthropic(
-    messages: List[Dict],
-    base_url: str | None = None,
-    model: str | None = None,
-) -> Tuple[Optional[Any], List[Dict]]:
-    """Convert OpenAI-format messages to Anthropic format.
+def _convert_assistant_message(m: Dict[str, Any]) -> Dict[str, Any]:
+    """Convert an assistant message to Anthropic content blocks.

-    Returns (system_prompt, anthropic_messages).
-    System messages are extracted since Anthropic takes them as a separate param.
-    system_prompt is a string or list of content blocks (when cache_control present).
-
-    When *base_url* is provided and points to a third-party Anthropic-compatible
-    endpoint, all thinking block signatures are stripped.  Signatures are
-    Anthropic-proprietary — third-party endpoints cannot validate them and will
-    reject them with HTTP 400 "Invalid signature in thinking block".
-
-    When *model* is provided and matches the Kimi / Moonshot family (or
-    *base_url* is a Kimi / Moonshot host), unsigned thinking blocks
-    synthesised from ``reasoning_content`` are preserved on replayed
-    assistant tool-call messages — Kimi requires the field to exist, even
-    if empty.
+    Handles thinking blocks, regular content, tool calls, and
+    reasoning_content injection for Kimi/DeepSeek endpoints.
    """
-    system = None
-    result = []
-
-    for m in messages:
-        role = m.get("role", "user")
-        content = m.get("content", "")
-
-        if role == "system":
-            if isinstance(content, list):
-                # Preserve cache_control markers on content blocks
-                has_cache = any(
-                    p.get("cache_control") for p in content if isinstance(p, dict)
-                )
-                if has_cache:
-                    system = [p for p in content if isinstance(p, dict)]
-                else:
-                    system = "\n".join(
-                        p["text"] for p in content if p.get("type") == "text"
-                    )
-            else:
-                system = content
-            continue
-
-        if role == "assistant":
-            blocks = _extract_preserved_thinking_blocks(m)
-            if content:
-                if isinstance(content, list):
-                    converted_content = _convert_content_to_anthropic(content)
-                    if isinstance(converted_content, list):
-                        blocks.extend(converted_content)
-                else:
-                    blocks.append({"type": "text", "text": str(content)})
-            for tc in m.get("tool_calls", []):
-                if not tc or not isinstance(tc, dict):
-                    continue
-                fn = tc.get("function", {})
-                args = fn.get("arguments", "{}")
-                try:
-                    parsed_args = json.loads(args) if isinstance(args, str) else args
-                except (json.JSONDecodeError, ValueError):
-                    parsed_args = {}
-                blocks.append({
-                    "type": "tool_use",
-                    "id": _sanitize_tool_id(tc.get("id", "")),
-                    "name": fn.get("name", ""),
-                    "input": parsed_args,
-                })
-            # Kimi's /coding endpoint (Anthropic protocol) requires assistant
-            # tool-call messages to carry reasoning_content when thinking is
-            # enabled server-side.  Preserve it as a thinking block so Kimi
-            # can validate the message history.  See hermes-agent#13848.
-            #
-            # Accept empty string "" — _copy_reasoning_content_for_api()
-            # injects "" as a tier-3 fallback for Kimi tool-call messages
-            # that had no reasoning.  Kimi requires the field to exist, even
-            # if empty.
-            #
-            # Prepend (not append): Anthropic protocol requires thinking
-            # blocks before text and tool_use blocks.
-            #
-            # Guard: only add when reasoning_details didn't already contribute
-            # thinking blocks.  On native Anthropic, reasoning_details produces
-            # signed thinking blocks — adding another unsigned one from
-            # reasoning_content would create a duplicate (same text) that gets
-            # downgraded to a spurious text block on the last assistant message.
-            reasoning_content = m.get("reasoning_content")
-            _already_has_thinking = any(
-                isinstance(b, dict) and b.get("type") in {"thinking", "redacted_thinking"}
-                for b in blocks
-            )
-            if isinstance(reasoning_content, str) and not _already_has_thinking:
-                blocks.insert(0, {"type": "thinking", "thinking": reasoning_content})
-            # Anthropic rejects empty assistant content
-            effective = blocks or content
-            if not effective or effective == "":
-                effective = [{"type": "text", "text": "(empty)"}]
-            result.append({"role": "assistant", "content": effective})
-            continue
-
-        if role == "tool":
-            # Sanitize tool_use_id and ensure non-empty content.
-            # Computer-use (and other multimodal) tool results arrive as
-            # either a list of OpenAI-style content parts, or a dict
-            # marked `_multimodal` with an embedded `content` list. Convert
-            # both into Anthropic `tool_result` inner blocks (text + image).
-            multimodal_blocks: Optional[List[Dict[str, Any]]] = None
-            if isinstance(content, dict) and content.get("_multimodal"):
-                multimodal_blocks = _content_parts_to_anthropic_blocks(
-                    content.get("content") or []
-                )
-                # Fallback text if the conversion produced nothing usable.
-                if not multimodal_blocks and content.get("text_summary"):
-                    multimodal_blocks = [
-                        {"type": "text", "text": str(content["text_summary"])}
-                    ]
-            elif isinstance(content, list):
-                converted = _content_parts_to_anthropic_blocks(content)
-                if any(b.get("type") == "image" for b in converted):
-                    multimodal_blocks = converted
-            # Back-compat: some callers stash blocks under a private key.
-            if multimodal_blocks is None:
-                stashed = m.get("_anthropic_content_blocks")
-                if isinstance(stashed, list) and stashed:
-                    text_content = content if isinstance(content, str) and content.strip() else None
-                    multimodal_blocks = (
-                        [{"type": "text", "text": text_content}] + stashed
-                        if text_content else list(stashed)
-                    )
-
-            if multimodal_blocks:
-                result_content: Any = multimodal_blocks
-            elif isinstance(content, str):
-                result_content = content
-            else:
-                result_content = json.dumps(content) if content else "(no output)"
-            if not result_content:
-                result_content = "(no output)"
-            tool_result = {
-                "type": "tool_result",
-                "tool_use_id": _sanitize_tool_id(m.get("tool_call_id", "")),
-                "content": result_content,
-            }
-            if isinstance(m.get("cache_control"), dict):
-                tool_result["cache_control"] = dict(m["cache_control"])
-            # Merge consecutive tool results into one user message
-            if (
-                result
-                and result[-1]["role"] == "user"
-                and isinstance(result[-1]["content"], list)
-                and result[-1]["content"]
-                and result[-1]["content"][0].get("type") == "tool_result"
-            ):
-                result[-1]["content"].append(tool_result)
-            else:
-                result.append({"role": "user", "content": [tool_result]})
-            continue
-
-        # Regular user message — validate non-empty content (Anthropic rejects empty)
+    content = m.get("content", "")
+    blocks = _extract_preserved_thinking_blocks(m)
+    if content:
        if isinstance(content, list):
-            converted_blocks = _convert_content_to_anthropic(content)
-            # Check if all text blocks are empty
-            if not converted_blocks or all(
-                b.get("text", "").strip() == ""
-                for b in converted_blocks
-                if isinstance(b, dict) and b.get("type") == "text"
-            ):
-                converted_blocks = [{"type": "text", "text": "(empty message)"}]
-            result.append({"role": "user", "content": converted_blocks})
+            converted_content = _convert_content_to_anthropic(content)
+            if isinstance(converted_content, list):
+                blocks.extend(converted_content)
        else:
-            # Validate string content is non-empty
-            if not content or (isinstance(content, str) and not content.strip()):
-                content = "(empty message)"
-            result.append({"role": "user", "content": content})
+            blocks.append({"type": "text", "text": str(content)})
+    for tc in m.get("tool_calls", []):
+        if not tc or not isinstance(tc, dict):
+            continue
+        fn = tc.get("function", {})
+        args = fn.get("arguments", "{}")
+        try:
+            parsed_args = json.loads(args) if isinstance(args, str) else args
+        except (json.JSONDecodeError, ValueError):
+            parsed_args = {}
+        blocks.append({
+            "type": "tool_use",
+            "id": _sanitize_tool_id(tc.get("id", "")),
+            "name": fn.get("name", ""),
+            "input": parsed_args,
+        })
+    # Kimi's /coding endpoint (Anthropic protocol) requires assistant
+    # tool-call messages to carry reasoning_content when thinking is
+    # enabled server-side.  Preserve it as a thinking block so Kimi
+    # can validate the message history.  See hermes-agent#13848.
+    #
+    # Accept empty string "" — _copy_reasoning_content_for_api()
+    # injects "" as a tier-3 fallback for Kimi tool-call messages
+    # that had no reasoning.  Kimi requires the field to exist, even
+    # if empty.
+    #
+    # Prepend (not append): Anthropic protocol requires thinking
+    # blocks before text and tool_use blocks.
+    #
+    # Guard: only add when reasoning_details didn't already contribute
+    # thinking blocks.  On native Anthropic, reasoning_details produces
+    # signed thinking blocks — adding another unsigned one from
+    # reasoning_content would create a duplicate (same text) that gets
+    # downgraded to a spurious text block on the last assistant message.
+    reasoning_content = m.get("reasoning_content")
+    _already_has_thinking = any(
+        isinstance(b, dict) and b.get("type") in {"thinking", "redacted_thinking"}
+        for b in blocks
+    )
+    if isinstance(reasoning_content, str) and not _already_has_thinking:
+        blocks.insert(0, {"type": "thinking", "thinking": reasoning_content})
+    # Anthropic rejects empty assistant content
+    effective = blocks or content
+    if not effective or effective == "":
+        effective = [{"type": "text", "text": "(empty)"}]
+    return {"role": "assistant", "content": effective}

+
+def _convert_tool_message_to_result(
+    result: List[Dict[str, Any]], m: Dict[str, Any]
+) -> None:
+    """Convert a tool message to an Anthropic tool_result, merging consecutive
+    results into one user message.
+
+    Mutates ``result`` in place — either appends a new user message or extends
+    the trailing user message's tool_result list.
+    """
+    content = m.get("content", "")
+    multimodal_blocks: Optional[List[Dict[str, Any]]] = None
+    if isinstance(content, dict) and content.get("_multimodal"):
+        multimodal_blocks = _content_parts_to_anthropic_blocks(
+            content.get("content") or []
+        )
+        # Fallback text if the conversion produced nothing usable.
+        if not multimodal_blocks and content.get("text_summary"):
+            multimodal_blocks = [
+                {"type": "text", "text": str(content["text_summary"])}
+            ]
+    elif isinstance(content, list):
+        converted = _content_parts_to_anthropic_blocks(content)
+        if any(b.get("type") == "image" for b in converted):
+            multimodal_blocks = converted
+    # Back-compat: some callers stash blocks under a private key.
+    if multimodal_blocks is None:
+        stashed = m.get("_anthropic_content_blocks")
+        if isinstance(stashed, list) and stashed:
+            text_content = content if isinstance(content, str) and content.strip() else None
+            multimodal_blocks = (
+                [{"type": "text", "text": text_content}] + stashed
+                if text_content else list(stashed)
+            )
+
+    if multimodal_blocks:
+        result_content: Any = multimodal_blocks
+    elif isinstance(content, str):
+        result_content = content
+    else:
+        result_content = json.dumps(content) if content else "(no output)"
+    if not result_content:
+        result_content = "(no output)"
+    tool_result = {
+        "type": "tool_result",
+        "tool_use_id": _sanitize_tool_id(m.get("tool_call_id", "")),
+        "content": result_content,
+    }
+    if isinstance(m.get("cache_control"), dict):
+        tool_result["cache_control"] = dict(m["cache_control"])
+    # Merge consecutive tool results into one user message
+    if (
+        result
+        and result[-1]["role"] == "user"
+        and isinstance(result[-1]["content"], list)
+        and result[-1]["content"]
+        and result[-1]["content"][0].get("type") == "tool_result"
+    ):
+        result[-1]["content"].append(tool_result)
+    else:
+        result.append({"role": "user", "content": [tool_result]})
+
+
+def _convert_user_message(content: Any) -> Dict[str, Any]:
+    """Validate and convert a user message to anthropic format."""
+    if isinstance(content, list):
+        converted_blocks = _convert_content_to_anthropic(content)
+        if not converted_blocks or all(
+            b.get("text", "").strip() == ""
+            for b in converted_blocks
+            if isinstance(b, dict) and b.get("type") == "text"
+        ):
+            converted_blocks = [{"type": "text", "text": "(empty message)"}]
+        return {"role": "user", "content": converted_blocks}
+    else:
+        if not content or (isinstance(content, str) and not content.strip()):
+            content = "(empty message)"
+        return {"role": "user", "content": content}
+
+
+def _strip_orphaned_tool_blocks(result: List[Dict[str, Any]]) -> None:
+    """Strip tool_use blocks with no matching tool_result, and vice versa.
+
+    Context compression or session truncation can remove either side of a
+    tool-call pair.  Anthropic rejects both orphans with HTTP 400.
+
+    Mutates ``result`` in place.
+    """
    # Strip orphaned tool_use blocks (no matching tool_result follows)
    tool_result_ids = set()
    for m in result:
@@ -1799,10 +1772,7 @@ def convert_messages_to_anthropic(
            if not m["content"]:
                m["content"] = [{"type": "text", "text": "(tool call removed)"}]

-    # Strip orphaned tool_result blocks (no matching tool_use precedes them).
-    # This is the mirror of the above: context compression or session truncation
-    # can remove an assistant message containing a tool_use while leaving the
-    # subsequent tool_result intact.  Anthropic rejects these with a 400.
+    # Strip orphaned tool_result blocks (no matching tool_use precedes them)
    tool_use_ids = set()
    for m in result:
        if m["role"] == "assistant" and isinstance(m["content"], list):
@@ -1819,12 +1789,16 @@ def convert_messages_to_anthropic(
            if not m["content"]:
                m["content"] = [{"type": "text", "text": "(tool result removed)"}]

-    # Enforce strict role alternation (Anthropic rejects consecutive same-role messages)
+
+def _merge_consecutive_roles(result: List[Dict[str, Any]]) -> List[Dict[str, Any]]:
+    """Merge consecutive same-role messages to enforce Anthropic alternation.
+
+    Returns a new list (caller must rebind ``result``).
+    """
    fixed = []
    for m in result:
        if fixed and fixed[-1]["role"] == m["role"]:
            if m["role"] == "user":
-                # Merge consecutive user messages
                prev_content = fixed[-1]["content"]
                curr_content = m["content"]
                if isinstance(prev_content, str) and isinstance(curr_content, str):
@@ -1832,7 +1806,6 @@ def convert_messages_to_anthropic(
                elif isinstance(prev_content, list) and isinstance(curr_content, list):
                    fixed[-1]["content"] = prev_content + curr_content
                else:
-                    # Mixed types — wrap string in list
                    if isinstance(prev_content, str):
                        prev_content = [{"type": "text", "text": prev_content}]
                    if isinstance(curr_content, str):
@@ -1855,7 +1828,6 @@ def convert_messages_to_anthropic(
                elif isinstance(prev_blocks, str) and isinstance(curr_blocks, str):
                    fixed[-1]["content"] = prev_blocks + "\n" + curr_blocks
                else:
-                    # Mixed types — normalize both to list and merge
                    if isinstance(prev_blocks, str):
                        prev_blocks = [{"type": "text", "text": prev_blocks}]
                    if isinstance(curr_blocks, str):
@@ -1863,37 +1835,34 @@ def convert_messages_to_anthropic(
                    fixed[-1]["content"] = prev_blocks + curr_blocks
        else:
            fixed.append(m)
-    result = fixed
+    return fixed

-    # ── Thinking block signature management ──────────────────────────
-    # Anthropic signs thinking blocks against the full turn content.
-    # Any upstream mutation (context compression, session truncation,
-    # orphan stripping, message merging) invalidates the signature,
-    # causing HTTP 400 "Invalid signature in thinking block".
-    #
-    # Signatures are Anthropic-proprietary.  Third-party endpoints
-    # (MiniMax, Microsoft Foundry, self-hosted proxies) cannot validate
-    # them and will reject them outright.  When targeting a third-party
-    # endpoint, strip ALL thinking/redacted_thinking blocks from every
-    # assistant message — the third-party will generate its own
-    # thinking blocks if it supports extended thinking.
-    #
-    # For direct Anthropic (strategy following clawdbot/OpenClaw):
-    # 1. Strip thinking/redacted_thinking from all assistant messages
-    #    EXCEPT the last one — preserves reasoning continuity on the
-    #    current tool-use chain while avoiding stale signature errors.
-    # 2. Downgrade unsigned thinking blocks (no signature) to text —
-    #    Anthropic can't validate them and will reject them.
-    # 3. Strip cache_control from thinking/redacted_thinking blocks —
-    #    cache markers can interfere with signature validation.
+
+def _manage_thinking_signatures(
+    result: List[Dict[str, Any]], base_url: str | None, model: str | None
+) -> None:
+    """Strip or preserve thinking blocks based on endpoint type.
+
+    Anthropic signs thinking blocks against the full turn content.
+    Any upstream mutation (context compression, session truncation, orphan
+    stripping, message merging) invalidates the signature, causing HTTP 400
+    "Invalid signature in thinking block".
+
+    Signatures are Anthropic-proprietary.  Third-party endpoints (MiniMax,
+    Azure AI Foundry, AWS Bedrock, self-hosted proxies) cannot validate them
+    and will reject them outright.  Kimi's /coding and DeepSeek's /anthropic
+    endpoints speak the Anthropic protocol upstream but require unsigned
+    thinking blocks (synthesised from ``reasoning_content``) to round-trip on
+    replayed assistant tool-call messages.  See hermes-agent#13848 (Kimi) and
+    hermes-agent#16748 (DeepSeek).
+
+    Mutates ``result`` in place.
+    """
    _THINKING_TYPES = frozenset(("thinking", "redacted_thinking"))
    _is_third_party = _is_third_party_anthropic_endpoint(base_url)
-    # Kimi /coding and DeepSeek /anthropic share a contract: both speak the
-    # Anthropic Messages protocol upstream but require that thinking blocks
-    # synthesised from reasoning_content round-trip on subsequent turns when
-    # thinking is enabled.  Signed Anthropic blocks still have to be stripped
-    # (neither endpoint can validate Anthropic's signatures); unsigned blocks
-    # are preserved.  See hermes-agent#13848 (Kimi) and #16748 (DeepSeek).
+    # Kimi / DeepSeek share a contract: strip signed Anthropic blocks
+    # (neither upstream can validate Anthropic signatures), preserve unsigned
+    # ones synthesised from reasoning_content.  See #13848, #16748.
    _preserve_unsigned_thinking = (
        _is_kimi_family_endpoint(base_url, model)
        or _is_deepseek_anthropic_endpoint(base_url)
@@ -1910,26 +1879,19 @@ def convert_messages_to_anthropic(
            continue

        if _preserve_unsigned_thinking:
-            # Kimi's /coding and DeepSeek's /anthropic endpoints both enable
-            # thinking server-side and require unsigned thinking blocks on
-            # replayed assistant tool-call messages.  Strip signed Anthropic
-            # blocks (neither upstream can validate Anthropic signatures) but
-            # preserve the unsigned ones we synthesised from reasoning_content.
+            # Kimi / DeepSeek: strip signed, preserve unsigned.
            new_content = []
            for b in m["content"]:
                if not isinstance(b, dict) or b.get("type") not in _THINKING_TYPES:
                    new_content.append(b)
                    continue
                if b.get("signature") or b.get("data"):
-                    # Anthropic-signed block — upstream can't validate, strip
+                    # Signed (or redacted-with-data) — upstream can't validate, strip.
                    continue
-                # Unsigned thinking (synthesised from reasoning_content) —
-                # keep it: the upstream needs it for message-history validation.
                new_content.append(b)
            m["content"] = new_content or [{"type": "text", "text": "(empty)"}]
        elif _is_third_party or idx != last_assistant_idx:
-            # Third-party endpoint: strip ALL thinking blocks from every
-            # assistant message — signatures are Anthropic-proprietary.
+            # Third-party: strip ALL thinking blocks (signatures are proprietary).
            # Direct Anthropic: strip from non-latest assistant messages only.
            stripped = [
                b for b in m["content"]
@@ -1937,24 +1899,21 @@ def convert_messages_to_anthropic(
            ]
            m["content"] = stripped or [{"type": "text", "text": "(thinking elided)"}]
        else:
-            # Latest assistant on direct Anthropic: keep signed thinking
-            # blocks for reasoning continuity; downgrade unsigned ones to
-            # plain text.
+            # Latest assistant on direct Anthropic: keep signed, downgrade unsigned
+            # to text so the reasoning isn't lost.
            new_content = []
            for b in m["content"]:
                if not isinstance(b, dict) or b.get("type") not in _THINKING_TYPES:
                    new_content.append(b)
                    continue
                if b.get("type") == "redacted_thinking":
-                    # Redacted blocks use 'data' for the signature payload
+                    # Redacted blocks use 'data' for the signature payload —
+                    # drop the block when 'data' is missing (can't be validated).
                    if b.get("data"):
                        new_content.append(b)
-                    # else: drop — no data means it can't be validated
                elif b.get("signature"):
-                    # Signed thinking block — keep it
                    new_content.append(b)
                else:
-                    # Unsigned thinking — downgrade to text so it's not lost
                    thinking_text = b.get("thinking", "")
                    if thinking_text:
                        new_content.append({"type": "text", "text": thinking_text})
@@ -1966,12 +1925,15 @@ def convert_messages_to_anthropic(
            if isinstance(b, dict) and b.get("type") in _THINKING_TYPES:
                b.pop("cache_control", None)

-    # ── Image eviction: keep only the most recent N screenshots ─────
-    # computer_use screenshots (base64 images) sit inside tool_result
-    # blocks: they accumulate and are sent with every API call. Each
-    # costs ~1,465 tokens; after 10+ the conversation becomes slow
-    # even for simple text queries. Walk backward, keep the most recent
-    # _MAX_KEEP_IMAGES, replace older ones with a text placeholder.
+
+def _evict_old_screenshots(result: List[Dict[str, Any]]) -> None:
+    """Keep only the most recent ``_MAX_KEEP_IMAGES`` computer-use screenshots.
+
+    Base64 images cost ~1,465 tokens each and accumulate across tool calls.
+    Walk backward, keep the most recent N, replace older ones with a placeholder.
+
+    Mutates ``result`` in place.
+    """
    _MAX_KEEP_IMAGES = 3
    _image_count = 0
    for msg in reversed(result):
@@ -1998,6 +1960,68 @@ def convert_messages_to_anthropic(
                    for b in inner
                ]

+
+def convert_messages_to_anthropic(
+    messages: List[Dict],
+    base_url: str | None = None,
+    model: str | None = None,
+) -> Tuple[Optional[Any], List[Dict]]:
+    """Convert OpenAI-format messages to Anthropic format.
+
+    Returns (system_prompt, anthropic_messages).
+    System messages are extracted since Anthropic takes them as a separate param.
+    system_prompt is a string or list of content blocks (when cache_control present).
+
+    When *base_url* is provided and points to a third-party Anthropic-compatible
+    endpoint, all thinking block signatures are stripped.  Signatures are
+    Anthropic-proprietary — third-party endpoints cannot validate them and will
+    reject them with HTTP 400 "Invalid signature in thinking block".
+
+    When *model* is provided and matches the Kimi / Moonshot family (or
+    *base_url* is a Kimi / Moonshot host), unsigned thinking blocks
+    synthesised from ``reasoning_content`` are preserved on replayed
+    assistant tool-call messages — Kimi requires the field to exist, even
+    if empty.
+    """
+    system = None
+    result: List[Dict[str, Any]] = []
+
+    for m in messages:
+        role = m.get("role", "user")
+        content = m.get("content", "")
+
+        if role == "system":
+            if isinstance(content, list):
+                # Preserve cache_control markers on content blocks
+                has_cache = any(
+                    p.get("cache_control") for p in content if isinstance(p, dict)
+                )
+                if has_cache:
+                    system = [p for p in content if isinstance(p, dict)]
+                else:
+                    system = "\n".join(
+                        p["text"] for p in content if p.get("type") == "text"
+                    )
+            else:
+                system = content
+            continue
+
+        if role == "assistant":
+            result.append(_convert_assistant_message(m))
+            continue
+
+        if role == "tool":
+            _convert_tool_message_to_result(result, m)
+            continue
+
+        # Regular user message
+        result.append(_convert_user_message(content))
+
+    _strip_orphaned_tool_blocks(result)
+    result = _merge_consecutive_roles(result)
+    _manage_thinking_signatures(result, base_url, model)
+    _evict_old_screenshots(result)
+
    return system, result


@@ -390,6 +390,9 @@ def _run_review_in_thread(
            # parent below so memory(action="add") writes from
            # the review still land on disk; the review just
            # has zero side effects on external providers.
+            # Match parent's toolset config so ``tools[]`` is byte-identical
+            # in the request body — Anthropic's cache key includes it.
+            # (The runtime whitelist below still restricts dispatch.)
            review_agent = AIAgent(
                model=agent.model,
                max_iterations=16,
@@ -401,6 +404,8 @@ def _run_review_in_thread(
                api_key=_parent_runtime.get("api_key") or None,
                credential_pool=getattr(agent, "_credential_pool", None),
                parent_session_id=agent.session_id,
+                enabled_toolsets=getattr(agent, "enabled_toolsets", None),
+                disabled_toolsets=getattr(agent, "disabled_toolsets", None),
                skip_memory=True,
            )
            review_agent._memory_write_origin = "background_review"
@@ -92,17 +92,36 @@ def interruptible_api_call(agent, api_kwargs: dict):
    """
    result = {"response": None, "error": None}
    request_client_holder = {"client": None}
+    request_client_lock = threading.Lock()
+
+    def _set_request_client(client):
+        with request_client_lock:
+            request_client_holder["client"] = client
+        return client
+
+    def _take_request_client():
+        with request_client_lock:
+            client = request_client_holder.get("client")
+            request_client_holder["client"] = None
+            return client
+
+    def _close_request_client_once(reason: str) -> None:
+        request_client = _take_request_client()
+        if request_client is not None:
+            agent._close_request_openai_client(request_client, reason=reason)

    def _call():
        try:
            if agent.api_mode == "codex_responses":
-                request_client_holder["client"] = agent._create_request_openai_client(
-                    reason="codex_stream_request",
-                    api_kwargs=api_kwargs,
+                request_client = _set_request_client(
+                    agent._create_request_openai_client(
+                        reason="codex_stream_request",
+                        api_kwargs=api_kwargs,
+                    )
                )
                result["response"] = agent._run_codex_stream(
                    api_kwargs,
-                    client=request_client_holder["client"],
+                    client=request_client,
                    on_first_delta=getattr(agent, "_codex_on_first_delta", None),
                )
            elif agent.api_mode == "anthropic_messages":
@@ -131,17 +150,17 @@ def interruptible_api_call(agent, api_kwargs: dict):
                    raise
                result["response"] = normalize_converse_response(raw_response)
            else:
-                request_client_holder["client"] = agent._create_request_openai_client(
-                    reason="chat_completion_request",
-                    api_kwargs=api_kwargs,
+                request_client = _set_request_client(
+                    agent._create_request_openai_client(
+                        reason="chat_completion_request",
+                        api_kwargs=api_kwargs,
+                    )
                )
-                result["response"] = request_client_holder["client"].chat.completions.create(**api_kwargs)
+                result["response"] = request_client.chat.completions.create(**api_kwargs)
        except Exception as e:
            result["error"] = e
        finally:
-            request_client = request_client_holder.get("client")
-            if request_client is not None:
-                agent._close_request_openai_client(request_client, reason="request_complete")
+            _close_request_client_once("request_complete")

    # ── Stale-call timeout (mirrors streaming stale detector) ────────
    # Non-streaming calls return nothing until the full response is
@@ -192,9 +211,7 @@ def interruptible_api_call(agent, api_kwargs: dict):
                    agent._anthropic_client.close()
                    agent._rebuild_anthropic_client()
                else:
-                    rc = request_client_holder.get("client")
-                    if rc is not None:
-                        agent._close_request_openai_client(rc, reason="stale_call_kill")
+                    _close_request_client_once("stale_call_kill")
            except Exception:
                pass
            agent._touch_activity(
@@ -218,9 +235,7 @@ def interruptible_api_call(agent, api_kwargs: dict):
                    agent._anthropic_client.close()
                    agent._rebuild_anthropic_client()
                else:
-                    request_client = request_client_holder.get("client")
-                    if request_client is not None:
-                        agent._close_request_openai_client(request_client, reason="interrupt_abort")
+                    _close_request_client_once("interrupt_abort")
            except Exception:
                pass
            raise InterruptedError("Agent interrupted during API call")
@@ -1257,6 +1272,24 @@ def interruptible_streaming_api_call(agent, api_kwargs: dict, *, on_first_delta=

    result = {"response": None, "error": None, "partial_tool_names": []}
    request_client_holder = {"client": None, "diag": None}
+    request_client_lock = threading.Lock()
+
+    def _set_request_client(client):
+        with request_client_lock:
+            request_client_holder["client"] = client
+        return client
+
+    def _take_request_client():
+        with request_client_lock:
+            client = request_client_holder.get("client")
+            request_client_holder["client"] = None
+            return client
+
+    def _close_request_client_once(reason: str) -> None:
+        request_client = _take_request_client()
+        if request_client is not None:
+            agent._close_request_openai_client(request_client, reason=reason)
+
    first_delta_fired = {"done": False}
    deltas_were_sent = {"yes": False}  # Track if any deltas were fired (for fallback)
    # Wall-clock timestamp of the last real streaming chunk.  The outer
@@ -1313,9 +1346,11 @@ def interruptible_streaming_api_call(agent, api_kwargs: dict, *, on_first_delta=
                pool=_conn_cap,
            ),
        }
-        request_client_holder["client"] = agent._create_request_openai_client(
-            reason="chat_completion_stream_request",
-            api_kwargs=stream_kwargs,
+        request_client = _set_request_client(
+            agent._create_request_openai_client(
+                reason="chat_completion_stream_request",
+                api_kwargs=stream_kwargs,
+            )
        )
        # Reset stale-stream timer so the detector measures from this
        # attempt's start, not a previous attempt's last chunk.
@@ -1326,7 +1361,7 @@ def interruptible_streaming_api_call(agent, api_kwargs: dict, *, on_first_delta=
        # ``request_client_holder["diag"]`` for closure access.
        _diag = agent._stream_diag_init()
        request_client_holder["diag"] = _diag
-        stream = request_client_holder["client"].chat.completions.create(**stream_kwargs)
+        stream = request_client.chat.completions.create(**stream_kwargs)

        # Capture rate limit headers from the initial HTTP response.
        # The OpenAI SDK Stream object exposes the underlying httpx
@@ -1765,12 +1800,7 @@ def interruptible_streaming_api_call(agent, api_kwargs: dict, *, on_first_delta=
                            mid_tool_call=True,
                            diag=request_client_holder.get("diag"),
                        )
-                        stale = request_client_holder.get("client")
-                        if stale is not None:
-                            agent._close_request_openai_client(
-                                stale, reason="stream_mid_tool_retry_cleanup"
-                            )
-                            request_client_holder["client"] = None
+                        _close_request_client_once("stream_mid_tool_retry_cleanup")
                        try:
                            agent._replace_primary_openai_client(
                                reason="stream_mid_tool_retry_pool_cleanup"
@@ -1821,12 +1851,7 @@ def interruptible_streaming_api_call(agent, api_kwargs: dict, *, on_first_delta=
                                diag=request_client_holder.get("diag"),
                            )
                            # Close the stale request client before retry
-                            stale = request_client_holder.get("client")
-                            if stale is not None:
-                                agent._close_request_openai_client(
-                                    stale, reason="stream_retry_cleanup"
-                                )
-                                request_client_holder["client"] = None
+                            _close_request_client_once("stream_retry_cleanup")
                            # Also rebuild the primary client to purge
                            # any dead connections from the pool.
                            try:
@@ -1894,9 +1919,7 @@ def interruptible_streaming_api_call(agent, api_kwargs: dict, *, on_first_delta=
            result["error"] = e
            return
        finally:
-            request_client = request_client_holder.get("client")
-            if request_client is not None:
-                agent._close_request_openai_client(request_client, reason="stream_request_complete")
+            _close_request_client_once("stream_request_complete")

    # Provider-configured stale timeout takes priority over env default.
    _cfg_stale = get_provider_stale_timeout(agent.provider, agent.model)
@@ -1966,9 +1989,7 @@ def interruptible_streaming_api_call(agent, api_kwargs: dict, *, on_first_delta=
                f"Reconnecting..."
            )
            try:
-                rc = request_client_holder.get("client")
-                if rc is not None:
-                    agent._close_request_openai_client(rc, reason="stale_stream_kill")
+                _close_request_client_once("stale_stream_kill")
            except Exception:
                pass
            # Rebuild the primary client too — its connection pool
@@ -1990,9 +2011,7 @@ def interruptible_streaming_api_call(agent, api_kwargs: dict, *, on_first_delta=
                    agent._anthropic_client.close()
                    agent._rebuild_anthropic_client()
                else:
-                    request_client = request_client_holder.get("client")
-                    if request_client is not None:
-                        agent._close_request_openai_client(request_client, reason="stream_interrupt_abort")
+                    _close_request_client_once("stream_interrupt_abort")
            except Exception:
                pass
            raise InterruptedError("Agent interrupted during streaming API call")
@@ -251,13 +251,16 @@ def _chat_messages_to_responses_input(
 ) -> List[Dict[str, Any]]:
    """Convert internal chat-style messages to Responses input items.

-    ``is_xai_responses=True`` strips ``encrypted_content`` from replayed
-    reasoning items.  xAI's OAuth/SuperGrok ``/v1/responses`` surface
-    rejects encrypted reasoning blobs minted by prior turns: the request
-    streams an ``error`` SSE frame before ``response.created`` and the
-    OpenAI SDK collapses it into a generic stream-ordering error.  Native
-    Codex (chatgpt.com backend-api) DOES accept replayed encrypted_content
-    — keep the default off.
+    ``is_xai_responses`` is kept for transport signature compatibility but
+    no longer suppresses encrypted reasoning replay.  Earlier (PR #26644,
+    May 2026) we believed xAI's OAuth/SuperGrok ``/v1/responses`` surface
+    rejected replayed ``encrypted_content`` reasoning items minted by
+    prior turns, and we stripped them.  That decision was wrong — xAI
+    explicitly relies on Hermes threading encrypted reasoning back across
+    turns for cross-turn coherence (the whole point of their partnership
+    integration).  We now replay encrypted reasoning on every Responses
+    transport (xAI, native Codex, custom relays) and let xAI tell us
+    explicitly if a specific surface ever rejects a payload.
    """
    items: List[Dict[str, Any]] = []
    seen_item_ids: set = set()
@@ -284,17 +287,12 @@ def _chat_messages_to_responses_input(
            if role == "assistant":
                # Replay encrypted reasoning items from previous turns
                # so the API can maintain coherent reasoning chains.
-                #
-                # xAI OAuth (SuperGrok/Premium) rejects replayed
-                # ``encrypted_content`` reasoning items minted by prior
-                # turns — see _chat_messages_to_responses_input docstring.
-                # When ``is_xai_responses`` is set we drop the replay
-                # entirely; Grok still reasons on each turn server-side,
-                # we just don't try to thread the prior turn's encrypted
-                # blob back in.
+                # This applies to every Responses transport including
+                # xAI — see _chat_messages_to_responses_input docstring
+                # for the May 2026 reversal of the earlier xAI gate.
                codex_reasoning = msg.get("codex_reasoning_items")
                has_codex_reasoning = False
-                if isinstance(codex_reasoning, list) and not is_xai_responses:
+                if isinstance(codex_reasoning, list):
                    for ri in codex_reasoning:
                        if isinstance(ri, dict) and ri.get("encrypted_content"):
                            item_id = ri.get("id")
@@ -387,8 +387,6 @@ def compress_context(
                _SESSION_ID.set(agent.session_id)
            except Exception:
                pass
-            # Update session_log_file to point to the new session's JSON file
-            agent.session_log_file = agent.logs_dir / f"session_{agent.session_id}.json"
            agent._session_db_created = False
            agent._session_db.create_session(
                session_id=agent.session_id,
@@ -46,6 +46,7 @@ from agent.message_sanitization import (
    _strip_non_ascii,
 )
 from agent.model_metadata import (
+    MINIMUM_CONTEXT_LENGTH,
    estimate_messages_tokens_rough,
    estimate_request_tokens_rough,
    get_next_probe_tier,
@@ -73,6 +74,50 @@ from utils import base_url_host_matches, env_var_enabled
 logger = logging.getLogger(__name__)


+def _ollama_context_limit_error(agent: Any, request_tokens: int) -> Optional[str]:
+    """Return a user-facing error when Ollama is loaded with too little context."""
+    if not getattr(agent, "tools", None):
+        return None
+
+    runtime_ctx = getattr(agent, "_ollama_num_ctx", None)
+    if not isinstance(runtime_ctx, int) or runtime_ctx <= 0:
+        return None
+    if runtime_ctx >= MINIMUM_CONTEXT_LENGTH:
+        return None
+
+    model = getattr(agent, "model", "") or "the selected model"
+    base_url = getattr(agent, "base_url", "") or "unknown base URL"
+    provider = getattr(agent, "provider", "") or "unknown"
+    tool_count = len(getattr(agent, "tools", None) or [])
+
+    logger.warning(
+        "Ollama runtime context too small for Hermes tool use: "
+        "model=%s provider=%s base_url=%s runtime_context=%d "
+        "minimum_context=%d estimated_request_tokens=%d tool_count=%d "
+        "session=%s",
+        model,
+        provider,
+        base_url,
+        runtime_ctx,
+        MINIMUM_CONTEXT_LENGTH,
+        request_tokens,
+        tool_count,
+        getattr(agent, "session_id", None) or "none",
+    )
+
+    return (
+        f"Ollama loaded `{model}` with only {runtime_ctx:,} tokens of runtime "
+        f"context, but Hermes needs at least {MINIMUM_CONTEXT_LENGTH:,} tokens "
+        "for reliable tool use.\n\n"
+        "Increase the Ollama context for this model and restart/reload the "
+        "model before trying again. A known-good starting point is 65,536 "
+        "tokens. In Hermes config, set `model.ollama_num_ctx: 65536` "
+        "(and `model.context_length: 65536` if you also override the displayed "
+        "model context). If you manage the model through an Ollama Modelfile, "
+        "set `PARAMETER num_ctx 65536` there instead."
+    )
+
+
 def _ra():
    """Lazy reference to ``run_agent`` so callers can patch
    ``run_agent.handle_function_call`` / ``run_agent._set_interrupt`` /
@@ -527,6 +572,7 @@ def run_conversation(
    api_call_count = 0
    final_response = None
    interrupted = False
+    failed = False
    codex_ack_continuations = 0
    length_continue_retries = 0
    truncated_tool_call_retries = 0
@@ -883,6 +929,26 @@ def run_conversation(
        # Calculate approximate request size for logging
        total_chars = sum(len(str(msg)) for msg in api_messages)
        approx_tokens = estimate_messages_tokens_rough(api_messages)
+        approx_request_tokens = estimate_request_tokens_rough(
+            api_messages, tools=agent.tools or None
+        )
+
+        _runtime_context_error = _ollama_context_limit_error(
+            agent, approx_request_tokens
+        )
+        if _runtime_context_error:
+            final_response = _runtime_context_error
+            failed = True
+            _turn_exit_reason = "ollama_runtime_context_too_small"
+            messages.append({"role": "assistant", "content": final_response})
+            agent._emit_status("❌ Ollama runtime context is too small for Hermes tool use")
+            api_call_count -= 1
+            agent._api_call_count = api_call_count
+            try:
+                agent.iteration_budget.refund()
+            except Exception:
+                pass
+            break
        
        # Thinking spinner for quiet mode (animated during API call)
        thinking_spinner = None
@@ -923,6 +989,7 @@ def run_conversation(
        copilot_auth_retry_attempted=False
        thinking_sig_retry_attempted = False
        image_shrink_retry_attempted = False
+        multimodal_tool_content_retry_attempted = False
        oauth_1m_beta_retry_attempted = False
        llama_cpp_grammar_retry_attempted = False
        has_retried_429 = False
@@ -1454,7 +1521,6 @@ def run_conversation(
                                }
                                messages.append(continue_msg)
                                agent._session_messages = messages
-                                agent._save_session_log(messages)
                                restart_with_length_continuation = True
                                break

@@ -1995,6 +2061,31 @@ def run_conversation(
                            "or shrink didn't reduce size; surfacing original error."
                        )

+                # Multimodal-tool-content recovery: providers that follow
+                # the OpenAI spec strictly (tool message content must be a
+                # string) reject our list-type content with a 400.  Strip
+                # image parts from any list-type tool messages, mark the
+                # (provider, model) as no-list-tool-content for the rest
+                # of this session so future tool results preemptively
+                # downgrade, and retry once.  See issue #27344.
+                if (
+                    classified.reason == FailoverReason.multimodal_tool_content_unsupported
+                    and not multimodal_tool_content_retry_attempted
+                ):
+                    multimodal_tool_content_retry_attempted = True
+                    if agent._try_strip_image_parts_from_tool_messages(api_messages):
+                        agent._vprint(
+                            f"{agent.log_prefix}📐 Provider rejected list-type tool content — "
+                            f"downgraded screenshots to text and retrying...",
+                            force=True,
+                        )
+                        continue
+                    else:
+                        logger.info(
+                            "multimodal-tool-content recovery: no list-type tool "
+                            "messages with image parts found; surfacing original error."
+                        )
+
                # Anthropic OAuth subscription rejected the 1M-context beta
                # header ("long context beta is not yet available for this
                # subscription"). Disable the beta for the rest of this
@@ -3086,7 +3177,6 @@ def run_conversation(
                    if not agent.quiet_mode:
                        agent._vprint(f"{agent.log_prefix}↻ Codex response incomplete; continuing turn ({agent._codex_incomplete_retries}/3)")
                    agent._session_messages = messages
-                    agent._save_session_log(messages)
                    continue

                agent._codex_incomplete_retries = 0
@@ -3411,7 +3501,6 @@ def run_conversation(
                
                # Save session log incrementally (so progress is visible even if interrupted)
                agent._session_messages = messages
-                agent._save_session_log(messages)
                
                # Continue loop for next response
                continue
@@ -3578,7 +3667,6 @@ def run_conversation(
                        interim_msg["_thinking_prefill"] = True
                        messages.append(interim_msg)
                        agent._session_messages = messages
-                        agent._save_session_log(messages)
                        continue

                    # ── Empty response retry ──────────────────────
@@ -3712,7 +3800,6 @@ def run_conversation(
                    }
                    messages.append(continue_msg)
                    agent._session_messages = messages
-                    agent._save_session_log(messages)
                    continue

                codex_ack_continuations = 0
@@ -3853,7 +3940,11 @@ def run_conversation(
                )

    # Determine if conversation completed successfully
-    completed = final_response is not None and api_call_count < agent.max_iterations
+    completed = (
+        final_response is not None
+        and api_call_count < agent.max_iterations
+        and not failed
+    )

    # Save trajectory if enabled.  ``user_message`` may be a multimodal
    # list of parts; the trajectory format wants a plain string.
@@ -4003,6 +4094,7 @@ def run_conversation(
        "api_calls": api_call_count,
        "completed": completed,
        "turn_exit_reason": _turn_exit_reason,
+        "failed": failed,
        "partial": False,  # True only when stopped due to invalid tool calls
        "interrupted": interrupted,
        "response_previewed": getattr(agent, "_response_was_previewed", False),
@@ -50,6 +50,7 @@ from pathlib import Path
 from typing import Any, Dict, List, Optional, Tuple

 from hermes_constants import get_hermes_home
+from agent.skill_utils import is_excluded_skill_path

 logger = logging.getLogger(__name__)

@@ -176,7 +177,9 @@ def get_keep() -> int:

 def _count_skill_files(base: Path) -> int:
    try:
-        return sum(1 for _ in base.rglob("SKILL.md"))
+        return sum(
+            1 for p in base.rglob("SKILL.md") if not is_excluded_skill_path(p)
+        )
    except OSError:
        return 0

@@ -50,6 +50,7 @@ class FailoverReason(enum.Enum):

    # Request format
    format_error = "format_error"        # 400 bad request — abort or strip + retry
+    multimodal_tool_content_unsupported = "multimodal_tool_content_unsupported"  # Provider rejected list-type content in tool messages (e.g. Xiaomi MiMo) — downgrade to text and retry

    # Provider-specific
    thinking_signature = "thinking_signature"  # Anthropic thinking block sig invalid
@@ -165,6 +166,32 @@ _IMAGE_TOO_LARGE_PATTERNS = [
    # the likely culprit; we still try the shrink path before giving up.
 ]

+# Providers that follow the OpenAI spec strictly require tool message
+# ``content`` to be a string.  Some (Anthropic native, Codex Responses,
+# Gemini native, first-party OpenAI) extend this to accept a content-parts
+# list (text + image_url) so screenshots from computer_use survive.  Others
+# (Xiaomi MiMo, some Alibaba endpoints, a long tail of OpenAI-compatible
+# providers) reject the list with a 400 — the patterns below are the most
+# common error shapes we see.  Recovery: strip image parts from tool
+# messages in-place, record the (provider, model) for the rest of the
+# session so we don't waste another call learning the same lesson, retry.
+#
+# See: https://github.com/NousResearch/hermes-agent/issues/27344
+_MULTIMODAL_TOOL_CONTENT_PATTERNS = [
+    # Xiaomi MiMo: {"error":{"code":"400","message":"Param Incorrect","param":"text is not set"}}
+    "text is not set",
+    # Generic "tool message must be string" shapes
+    "tool message content must be a string",
+    "tool content must be a string",
+    "tool message must be a string",
+    # OpenAI-compat servers that reject list-type tool content with a
+    # schema-validation message
+    "expected string, got list",
+    "expected string, got array",
+    # Alibaba/DashScope variant
+    "tool_call.content must be string",
+]
+
 # Context overflow patterns
 _CONTEXT_OVERFLOW_PATTERNS = [
    "context length",
@@ -781,6 +808,19 @@ def _classify_400(
 ) -> ClassifiedError:
    """Classify 400 Bad Request — context overflow, format error, or generic."""

+    # Multimodal tool content rejected from 400.  Must be checked BEFORE
+    # image_too_large because the recovery is different (strip image parts
+    # from tool messages, mark the model as no-list-tool-content for the
+    # rest of the session) and BEFORE context_overflow because some of the
+    # patterns ("text is not set") are ambiguous in isolation but become
+    # specific when combined with a 400 on a request known to contain
+    # multimodal tool content.
+    if any(p in error_msg for p in _MULTIMODAL_TOOL_CONTENT_PATTERNS):
+        return result_fn(
+            FailoverReason.multimodal_tool_content_unsupported,
+            retryable=True,
+        )
+
    # Image-too-large from 400 (Anthropic's 5 MB per-image check fires this way).
    # Must be checked BEFORE context_overflow because messages can trip both
    # patterns ("exceeds" + "image") and image-shrink is a cheaper recovery.
@@ -922,6 +962,13 @@ def _classify_by_message(
            should_compress=True,
        )

+    # Multimodal tool content patterns (from message text when no status_code)
+    if any(p in error_msg for p in _MULTIMODAL_TOOL_CONTENT_PATTERNS):
+        return result_fn(
+            FailoverReason.multimodal_tool_content_unsupported,
+            retryable=True,
+        )
+
    # Image-too-large patterns (from message text when no status_code)
    if any(p in error_msg for p in _IMAGE_TOO_LARGE_PATTERNS):
        return result_fn(
@@ -16,9 +16,19 @@ def _hermes_home_path() -> Path:
        return Path(os.path.expanduser("~/.hermes"))


+def _hermes_root_path() -> Path:
+    """Resolve the Hermes root dir (always the parent of any profile, never per-profile)."""
+    try:
+        from hermes_constants import get_default_hermes_root  # local import to avoid cycles
+        return get_default_hermes_root()
+    except Exception:
+        return Path(os.path.expanduser("~/.hermes"))
+
+
 def build_write_denied_paths(home: str) -> set[str]:
    """Return exact sensitive paths that must never be written."""
    hermes_home = _hermes_home_path()
+    hermes_root = _hermes_root_path()
    return {
        os.path.realpath(p)
        for p in [
@@ -26,7 +36,11 @@ def build_write_denied_paths(home: str) -> set[str]:
            os.path.join(home, ".ssh", "id_rsa"),
            os.path.join(home, ".ssh", "id_ed25519"),
            os.path.join(home, ".ssh", "config"),
+            # Active profile .env (or top-level .env when not in profile mode).
            str(hermes_home / ".env"),
+            # Top-level .env, even when running under a profile — overwriting it
+            # leaks credentials across every profile that inherits from root (#15981).
+            str(hermes_root / ".env"),
            os.path.join(home, ".bashrc"),
            os.path.join(home, ".zshrc"),
            os.path.join(home, ".profile"),
@@ -59,7 +59,7 @@ from dataclasses import dataclass
 from pathlib import Path
 from typing import Any, Dict, Optional, Tuple

-from hermes_constants import get_hermes_home
+from hermes_constants import get_hermes_home, secure_parent_dir

 logger = logging.getLogger(__name__)

@@ -491,10 +491,8 @@ def save_credentials(creds: GoogleCredentials) -> Path:
    path.parent.mkdir(parents=True, exist_ok=True)
    # Tighten parent dir to 0o700 so siblings can't traverse to the creds file.
    # On Windows this is a no-op (POSIX mode bits aren't enforced); ignore failures.
-    try:
-        os.chmod(path.parent, 0o700)
-    except OSError:
-        pass
+    # secure_parent_dir refuses to chmod / or top-level dirs (#25821).
+    secure_parent_dir(path)
    payload = json.dumps(creds.to_dict(), indent=2, sort_keys=True) + "\n"

    with _credentials_lock():
@@ -46,6 +46,84 @@ logger = logging.getLogger(__name__)
 _VALID_MODES = frozenset({"auto", "native", "text"})


+# Strict YAML/JSON boolean coercion for capability overrides.
+#
+# ``bool("false")`` is True in Python because non-empty strings are truthy, so
+# a user writing ``supports_vision: "false"`` (quoted — a common YAML mistake)
+# would silently enable native vision routing on a model that can't actually
+# handle it. Accept only the values YAML 1.1 / 1.2 treat as booleans, plus
+# real ``bool`` and integer 0/1. Anything else returns None so the caller
+# falls through to models.dev rather than honouring garbage.
+_TRUE_TOKENS = frozenset({"true", "yes", "on", "1"})
+_FALSE_TOKENS = frozenset({"false", "no", "off", "0"})
+
+
+def _coerce_capability_bool(raw: Any) -> Optional[bool]:
+    """Return True/False for recognised boolean values, None otherwise."""
+    if isinstance(raw, bool):
+        return raw
+    if isinstance(raw, int):
+        if raw in (0, 1):
+            return bool(raw)
+        return None
+    if isinstance(raw, str):
+        s = raw.strip().lower()
+        if s in _TRUE_TOKENS:
+            return True
+        if s in _FALSE_TOKENS:
+            return False
+    return None
+
+
+def _supports_vision_override(
+    cfg: Optional[Dict[str, Any]],
+    provider: str,
+    model: str,
+) -> Optional[bool]:
+    """Resolve user-declared vision capability from config.yaml.
+
+    Resolution order, first hit wins:
+      1. ``model.supports_vision`` (top-level shortcut for the active model)
+      2. ``providers.<provider>.models.<model>.supports_vision``
+         (named custom providers — ``provider`` may be the runtime-resolved
+         value ``"custom"`` and/or the user-declared name under
+         ``model.provider``; both are tried)
+
+    Returns None when no override is set, so the caller falls through to
+    models.dev. Returns False explicitly only when the user wrote a
+    recognised boolean false token.
+    """
+    if not isinstance(cfg, dict):
+        return None
+
+    # 1. Top-level shortcut
+    model_cfg_raw = cfg.get("model")
+    model_cfg: Dict[str, Any] = model_cfg_raw if isinstance(model_cfg_raw, dict) else {}
+    top = _coerce_capability_bool(model_cfg.get("supports_vision"))
+    if top is not None:
+        return top
+
+    # 2. Per-provider, per-model. Named custom providers (e.g. "my-vllm")
+    # get rewritten to provider="custom" at runtime
+    # (hermes_cli/runtime_provider.py:_resolve_named_custom_runtime), so the
+    # config still holds the user-declared name under model.provider. Try
+    # both as candidate provider keys.
+    config_provider = str(model_cfg.get("provider") or "").strip()
+    providers_raw = cfg.get("providers")
+    providers_cfg: Dict[str, Any] = providers_raw if isinstance(providers_raw, dict) else {}
+    for p in dict.fromkeys(filter(None, (provider, config_provider))):
+        entry_raw = providers_cfg.get(p)
+        entry: Dict[str, Any] = entry_raw if isinstance(entry_raw, dict) else {}
+        models_raw = entry.get("models")
+        models_cfg: Dict[str, Any] = models_raw if isinstance(models_raw, dict) else {}
+        per_model_raw = models_cfg.get(model)
+        per_model: Dict[str, Any] = per_model_raw if isinstance(per_model_raw, dict) else {}
+        coerced = _coerce_capability_bool(per_model.get("supports_vision"))
+        if coerced is not None:
+            return coerced
+    return None
+
+
 def _coerce_mode(raw: Any) -> str:
    """Normalize a config value into one of the valid modes."""
    if not isinstance(raw, str):
@@ -81,8 +159,20 @@ def _explicit_aux_vision_override(cfg: Optional[Dict[str, Any]]) -> bool:
    return True


-def _lookup_supports_vision(provider: str, model: str) -> Optional[bool]:
-    """Return True/False if we can resolve caps, None if unknown."""
+def _lookup_supports_vision(
+    provider: str,
+    model: str,
+    cfg: Optional[Dict[str, Any]] = None,
+) -> Optional[bool]:
+    """Return True/False if we can resolve caps, None if unknown.
+
+    Consults the user's ``supports_vision`` override in config.yaml first
+    (so custom/local models declared as vision-capable don't fall through to
+    text routing in ``auto`` mode), then falls back to models.dev.
+    """
+    override = _supports_vision_override(cfg, provider, model)
+    if override is not None:
+        return override
    if not provider or not model:
        return None
    try:
@@ -123,7 +213,7 @@ def decide_image_input_mode(
    if _explicit_aux_vision_override(cfg):
        return "text"

-    supports = _lookup_supports_vision(provider, model)
+    supports = _lookup_supports_vision(provider, model, cfg)
    if supports is True:
        return "native"
    return "text"
@@ -1258,6 +1258,10 @@ def build_nous_subscription_prompt(valid_tool_names: "set[str] | None" = None) -
        "terminal",
        "process",
        "execute_code",
+        "app_search_tools",
+        "app_tool_schemas",
+        "app_execute_tools",
+        "app_manage_connections",
    }

    if valid_names and not (valid_names & relevant_tool_names):
@@ -1279,7 +1283,7 @@ def build_nous_subscription_prompt(valid_tool_names: "set[str] | None" = None) -

    lines = [
        "# Nous Subscription",
-        "Nous subscription includes managed web tools (Firecrawl), image generation (FAL), OpenAI TTS, and browser automation (Browser Use) by default. Modal execution is optional.",
+        "Nous subscription includes managed web tools (Firecrawl), image generation (FAL), OpenAI TTS, browser automation (Browser Use), and app integrations (500+ apps) by default. Modal execution is optional.",
        "Current capability status:",
    ]
    lines.extend(_status_line(feature) for feature in features.items())
@@ -1294,6 +1298,39 @@ def build_nous_subscription_prompt(valid_tool_names: "set[str] | None" = None) -
    return "\n".join(lines)


+# =========================================================================
+# App tools (500+ external integrations) behavioural prompt
+# =========================================================================
+
+_APP_TOOLS_PROMPT = """\
+## App Tools (500+ External Integrations)
+
+You have app_search_tools, app_tool_schemas, app_execute_tools, and app_manage_connections available RIGHT NOW as callable tools. They are already configured and connected to the Nous tool gateway — no SDK installation, no API keys, no plugin setup needed. Just call them.
+
+**When to use:** When a user asks to interact with ANY external app or service — Gmail, Slack, GitHub, Jira, Notion, Google Sheets, Linear, HubSpot, Figma, Salesforce, or any of 500+ other apps. ALWAYS prefer these tools over loading skills about the same service (e.g. do NOT load the 'linear', 'airtable', 'google-workspace', 'notion', or any similar skill — use app_search_tools instead). Do NOT suggest installing SDKs, CLI tools, MCP servers, or API keys for external services — call app_search_tools directly.
+
+**Workflow:**
+1. Call app_search_tools with a clear use_case description to discover available tools
+2. Check the response for connection status — if no active connection, call app_manage_connections and share the auth link with the user
+3. Review the execution plan and pitfalls in the search response before executing
+4. If a tool has schemaRef instead of input_schema, call app_tool_schemas to get the full schema
+5. Execute tools via app_execute_tools with schema-compliant arguments
+
+**Session tracking:** Pass session: {generate_id: true} on your first app_search_tools call. Reuse the returned session.id in all subsequent calls. Generate a new session when the user pivots to a different task.
+
+**Important:** Never fabricate tool slugs or argument field names. Only use slugs and schemas returned by app_search_tools or app_tool_schemas."""
+
+
+def build_app_tools_prompt(valid_tool_names: "set[str] | None" = None) -> str:
+    """Return the app tools behavioural guidance when the toolset is active."""
+    if valid_tool_names and "app_search_tools" not in valid_tool_names:
+        return ""
+    if not valid_tool_names:
+        # No tool names known — skip (conservative)
+        return ""
+    return _APP_TOOLS_PROMPT
+
+
 # =========================================================================
 # Context files (SOUL.md, AGENTS.md, .cursorrules)
 # =========================================================================
@@ -0,0 +1,13 @@
+"""External secret source integrations.
+
+A secret source is anything that can supply environment-variable-shaped
+credentials at process startup, _after_ ~/.hermes/.env has loaded.  By
+default sources are non-destructive: they only set values for env vars
+that aren't already present, so .env and shell exports continue to win.
+
+Currently shipped:
+
+  - ``bitwarden`` — Bitwarden Secrets Manager (`bws` CLI).  See
+    ``agent.secret_sources.bitwarden`` for the integration and
+    ``hermes_cli.secrets_cli`` for the user-facing setup wizard.
+"""
@@ -0,0 +1,515 @@
+"""Bitwarden Secrets Manager (`bws` CLI) integration.
+
+Hermes pulls API keys from Bitwarden Secrets Manager at process startup
+so they don't have to live in plaintext in ``~/.hermes/.env``.
+
+Design summary
+--------------
+
+* The ``bws`` binary is auto-installed into ``<hermes_home>/bin/bws`` on
+  first use.  Hermes pins one version (``_BWS_VERSION``) and downloads
+  the matching asset from the official GitHub Releases page, verifying
+  the SHA-256 against the release's published checksum file.
+* The access token is stored in ``~/.hermes/.env`` as
+  ``BWS_ACCESS_TOKEN`` (or whatever name the user picked in
+  ``secrets.bitwarden.access_token_env``).  This is the one
+  bootstrap secret — every other provider key can live in Bitwarden.
+* Pulling secrets is a single ``bws secret list <project_id>
+  --output json`` call.  We cache the result in-process for
+  ``cache_ttl_seconds`` so back-to-back ``hermes`` invocations don't
+  hammer the API.
+* Failures NEVER block Hermes startup.  Missing binary, no network,
+  expired token, etc. all emit a one-line warning and continue with
+  whatever credentials ``.env`` already had.
+
+The module is intentionally subprocess-driven rather than going through
+the ``bitwarden-sdk-secrets`` Python package: one cross-platform binary
+is easier to lazy-install than a wheels-with-Rust-extension dependency.
+"""
+
+from __future__ import annotations
+
+import hashlib
+import json
+import logging
+import os
+import platform
+import shutil
+import stat
+import subprocess
+import sys
+import tempfile
+import time
+import urllib.error
+import urllib.request
+import zipfile
+from dataclasses import dataclass, field
+from pathlib import Path
+from typing import Dict, List, Optional, Tuple
+
+logger = logging.getLogger(__name__)
+
+
+# ---------------------------------------------------------------------------
+# Configuration constants
+# ---------------------------------------------------------------------------
+
+# Pinned upstream version.  Bump in a follow-up PR — never auto-resolve
+# "latest" because upstream release shape (asset names, CLI flags) is
+# allowed to change between majors and we want updates to be deliberate.
+_BWS_VERSION = "2.0.0"
+
+_BWS_RELEASE_BASE = (
+    f"https://github.com/bitwarden/sdk-sm/releases/download/bws-v{_BWS_VERSION}"
+)
+_BWS_CHECKSUM_NAME = f"bws-sha256-checksums-{_BWS_VERSION}.txt"
+
+# How long to wait for bws subprocesses and HTTP downloads, in seconds.
+_BWS_DOWNLOAD_TIMEOUT = 60
+_BWS_RUN_TIMEOUT = 30
+
+# In-process cache so repeated load_hermes_dotenv() calls (CLI startup,
+# gateway hot-reload, test suites) don't re-fetch from BSM.
+_CacheKey = Tuple[str, str]  # (access_token_fingerprint, project_id)
+_CACHE: Dict[_CacheKey, "_CachedFetch"] = {}
+
+
+@dataclass
+class _CachedFetch:
+    secrets: Dict[str, str]
+    fetched_at: float
+
+    def is_fresh(self, ttl_seconds: float) -> bool:
+        if ttl_seconds <= 0:
+            return False
+        return (time.time() - self.fetched_at) < ttl_seconds
+
+
+# ---------------------------------------------------------------------------
+# Public dataclasses
+# ---------------------------------------------------------------------------
+
+
+@dataclass
+class FetchResult:
+    """Outcome of a single BSM pull."""
+
+    secrets: Dict[str, str] = field(default_factory=dict)
+    applied: List[str] = field(default_factory=list)   # set into os.environ
+    skipped: List[str] = field(default_factory=list)   # already set, not overridden
+    warnings: List[str] = field(default_factory=list)  # non-fatal issues
+    error: Optional[str] = None                        # fatal: nothing was fetched
+    binary_path: Optional[Path] = None
+
+    @property
+    def ok(self) -> bool:
+        return self.error is None
+
+
+# ---------------------------------------------------------------------------
+# Binary discovery + lazy install
+# ---------------------------------------------------------------------------
+
+
+def _hermes_bin_dir() -> Path:
+    """Where Hermes stores its managed binaries.  Profile-aware."""
+    from hermes_constants import get_hermes_home
+
+    return get_hermes_home() / "bin"
+
+
+def find_bws(*, install_if_missing: bool = False) -> Optional[Path]:
+    """Return a path to a usable ``bws`` binary, or None.
+
+    Resolution order:
+      1. ``<hermes_home>/bin/bws``  (our managed copy — preferred)
+      2. ``shutil.which("bws")``    (system PATH)
+
+    When ``install_if_missing`` is True and neither resolves, this calls
+    :func:`install_bws` to download and verify the pinned version.
+    """
+    managed = _hermes_bin_dir() / _platform_binary_name()
+    if managed.exists() and os.access(managed, os.X_OK):
+        return managed
+
+    system = shutil.which("bws")
+    if system:
+        return Path(system)
+
+    if install_if_missing:
+        try:
+            return install_bws()
+        except Exception as exc:  # noqa: BLE001 — never block startup
+            logger.warning("bws auto-install failed: %s", exc)
+            return None
+    return None
+
+
+def _platform_binary_name() -> str:
+    return "bws.exe" if platform.system() == "Windows" else "bws"
+
+
+def _platform_asset_name() -> str:
+    """Map (uname, arch, libc) → the upstream asset filename.
+
+    Asset names follow Rust's target triple convention.  Linux defaults
+    to gnu (glibc); we switch to musl only if ldd --version says so.
+    """
+    system = platform.system()
+    machine = platform.machine().lower()
+
+    if system == "Darwin":
+        # Universal binary works on both Intel and Apple Silicon — no
+        # need to pick a per-arch asset.
+        return f"bws-macos-universal-{_BWS_VERSION}.zip"
+
+    if system == "Windows":
+        arch = "aarch64" if machine in ("arm64", "aarch64") else "x86_64"
+        return f"bws-{arch}-pc-windows-msvc-{_BWS_VERSION}.zip"
+
+    if system == "Linux":
+        arch = "aarch64" if machine in ("arm64", "aarch64") else "x86_64"
+        libc = "gnu"
+        # ldd --version writes to stderr on glibc, stdout on musl.  We
+        # don't need bullet-proof detection — getting it wrong falls
+        # back to a clear error from the binary loader, which we catch.
+        try:
+            res = subprocess.run(
+                ["ldd", "--version"],
+                capture_output=True,
+                text=True,
+                timeout=2,
+            )
+            if "musl" in (res.stdout + res.stderr).lower():
+                libc = "musl"
+        except (OSError, subprocess.TimeoutExpired):
+            pass
+        return f"bws-{arch}-unknown-linux-{libc}-{_BWS_VERSION}.zip"
+
+    raise RuntimeError(
+        f"Unsupported platform for bws auto-install: {system} {machine}"
+    )
+
+
+def install_bws(*, force: bool = False) -> Path:
+    """Download, verify, and install the pinned ``bws`` binary.
+
+    Returns the path to the installed executable.  Raises on any
+    failure (network, checksum, extraction) — callers in the auto-install
+    path catch these; the user-facing ``hermes secrets bitwarden setup``
+    surface lets them propagate so the wizard can show a clear error.
+    """
+    bin_dir = _hermes_bin_dir()
+    bin_dir.mkdir(parents=True, exist_ok=True)
+    target = bin_dir / _platform_binary_name()
+
+    if target.exists() and not force:
+        return target
+
+    asset_name = _platform_asset_name()
+    asset_url = f"{_BWS_RELEASE_BASE}/{asset_name}"
+    checksum_url = f"{_BWS_RELEASE_BASE}/{_BWS_CHECKSUM_NAME}"
+
+    with tempfile.TemporaryDirectory(prefix="hermes-bws-") as tmpdir:
+        tmp = Path(tmpdir)
+        zip_path = tmp / asset_name
+        checksum_path = tmp / _BWS_CHECKSUM_NAME
+
+        logger.info("Downloading %s", asset_url)
+        _http_download(asset_url, zip_path)
+        _http_download(checksum_url, checksum_path)
+
+        expected = _expected_sha256(checksum_path, asset_name)
+        actual = _sha256_file(zip_path)
+        if expected.lower() != actual.lower():
+            raise RuntimeError(
+                f"Checksum mismatch for {asset_name}: "
+                f"expected {expected}, got {actual}"
+            )
+
+        with zipfile.ZipFile(zip_path) as zf:
+            member = _pick_zip_member(zf, _platform_binary_name())
+            zf.extract(member, tmp)
+            extracted = tmp / member
+
+        # Move into place atomically.  We write to a sibling tempfile in
+        # the final directory so the rename can't cross filesystems.
+        fd, staged = tempfile.mkstemp(dir=str(bin_dir), prefix=".bws_")
+        os.close(fd)
+        shutil.copy2(extracted, staged)
+        os.chmod(
+            staged,
+            stat.S_IRUSR | stat.S_IWUSR | stat.S_IXUSR
+            | stat.S_IRGRP | stat.S_IXGRP
+            | stat.S_IROTH | stat.S_IXOTH,
+        )
+        os.replace(staged, target)
+
+    logger.info("Installed bws %s at %s", _BWS_VERSION, target)
+    return target
+
+
+def _http_download(url: str, dest: Path) -> None:
+    req = urllib.request.Request(url, headers={"User-Agent": "hermes-agent"})
+    try:
+        with urllib.request.urlopen(req, timeout=_BWS_DOWNLOAD_TIMEOUT) as resp:  # noqa: S310
+            with open(dest, "wb") as f:
+                shutil.copyfileobj(resp, f)
+    except urllib.error.URLError as exc:
+        raise RuntimeError(f"Failed to download {url}: {exc}") from exc
+
+
+def _expected_sha256(checksum_file: Path, asset_name: str) -> str:
+    """Parse the upstream ``bws-sha256-checksums-X.Y.Z.txt`` file.
+
+    Format is the standard ``sha256sum`` output: ``<hex>  <filename>``,
+    one per line.
+    """
+    text = checksum_file.read_text(encoding="utf-8", errors="replace")
+    for line in text.splitlines():
+        parts = line.strip().split()
+        if len(parts) >= 2 and parts[-1] == asset_name:
+            return parts[0]
+    raise RuntimeError(
+        f"No checksum entry for {asset_name} in {checksum_file.name}"
+    )
+
+
+def _sha256_file(path: Path) -> str:
+    h = hashlib.sha256()
+    with open(path, "rb") as f:
+        for chunk in iter(lambda: f.read(65536), b""):
+            h.update(chunk)
+    return h.hexdigest()
+
+
+def _pick_zip_member(zf: zipfile.ZipFile, binary_name: str) -> str:
+    """Find the binary inside the upstream zip.
+
+    Historically the archive has been flat (``bws`` at the root) but we
+    tolerate a top-level directory just in case upstream changes.
+    """
+    candidates = [n for n in zf.namelist() if n.split("/")[-1] == binary_name]
+    if not candidates:
+        raise RuntimeError(
+            f"Could not find {binary_name} inside downloaded archive "
+            f"(members: {zf.namelist()[:5]}...)"
+        )
+    # Prefer the shortest path (i.e. root over nested) for determinism.
+    candidates.sort(key=len)
+    return candidates[0]
+
+
+# ---------------------------------------------------------------------------
+# Secret fetch + apply
+# ---------------------------------------------------------------------------
+
+
+def _token_fingerprint(token: str) -> str:
+    """SHA-256 prefix used as a cache key — never logged, never displayed."""
+    return hashlib.sha256(token.encode("utf-8")).hexdigest()[:16]
+
+
+def fetch_bitwarden_secrets(
+    *,
+    access_token: str,
+    project_id: str,
+    binary: Optional[Path] = None,
+    cache_ttl_seconds: float = 300,
+    use_cache: bool = True,
+) -> Tuple[Dict[str, str], List[str]]:
+    """Pull the secrets for ``project_id`` from Bitwarden Secrets Manager.
+
+    Returns ``(secrets_dict, warnings_list)``.
+
+    Raises :class:`RuntimeError` for fatal conditions (missing binary,
+    auth failure, unparseable output).  Callers in the env_loader path
+    catch this and emit a single warning; callers in the user-facing
+    setup wizard let it propagate.
+    """
+    if not access_token:
+        raise RuntimeError("Bitwarden access token is empty")
+    if not project_id:
+        raise RuntimeError("Bitwarden project_id is empty")
+
+    cache_key = (_token_fingerprint(access_token), project_id)
+    if use_cache:
+        cached = _CACHE.get(cache_key)
+        if cached and cached.is_fresh(cache_ttl_seconds):
+            return cached.secrets, []
+
+    bws = binary or find_bws(install_if_missing=True)
+    if bws is None:
+        raise RuntimeError(
+            "bws binary not available — auto-install failed and `bws` is "
+            "not on PATH.  Install manually from "
+            "https://github.com/bitwarden/sdk-sm/releases or re-run "
+            "`hermes secrets bitwarden setup`."
+        )
+
+    secrets, warnings = _run_bws_list(bws, access_token, project_id)
+    _CACHE[cache_key] = _CachedFetch(secrets=secrets, fetched_at=time.time())
+    return secrets, warnings
+
+
+def _run_bws_list(
+    bws: Path, access_token: str, project_id: str
+) -> Tuple[Dict[str, str], List[str]]:
+    cmd = [str(bws), "secret", "list", project_id, "--output", "json"]
+    env = os.environ.copy()
+    env["BWS_ACCESS_TOKEN"] = access_token
+    # Make sure we're not echoing telemetry / colour codes into json.
+    env.setdefault("NO_COLOR", "1")
+
+    try:
+        proc = subprocess.run(  # noqa: S603 — bws path is trusted
+            cmd,
+            env=env,
+            capture_output=True,
+            text=True,
+            timeout=_BWS_RUN_TIMEOUT,
+        )
+    except subprocess.TimeoutExpired as exc:
+        raise RuntimeError(
+            f"bws timed out after {_BWS_RUN_TIMEOUT}s fetching secrets"
+        ) from exc
+    except OSError as exc:
+        raise RuntimeError(f"failed to invoke bws: {exc}") from exc
+
+    if proc.returncode != 0:
+        # bws writes auth/network errors to stderr in plain English.
+        # Strip ANSI just in case and surface the first 200 chars.
+        err = (proc.stderr or proc.stdout or "").strip().replace("\x1b", "")
+        raise RuntimeError(
+            f"bws exited {proc.returncode}: {err[:200]}"
+        )
+
+    raw = proc.stdout.strip()
+    if not raw:
+        return {}, ["bws returned no output (empty project?)"]
+
+    try:
+        payload = json.loads(raw)
+    except json.JSONDecodeError as exc:
+        raise RuntimeError(f"bws returned non-JSON output: {exc}") from exc
+
+    if not isinstance(payload, list):
+        raise RuntimeError(
+            f"bws returned unexpected shape: {type(payload).__name__}"
+        )
+
+    secrets: Dict[str, str] = {}
+    warnings: List[str] = []
+    for item in payload:
+        if not isinstance(item, dict):
+            continue
+        key = item.get("key")
+        value = item.get("value")
+        if not isinstance(key, str) or not isinstance(value, str):
+            continue
+        if not _is_valid_env_name(key):
+            warnings.append(
+                f"Skipping secret {key!r}: not a valid env-var name"
+            )
+            continue
+        secrets[key] = value
+    return secrets, warnings
+
+
+def _is_valid_env_name(name: str) -> bool:
+    if not name:
+        return False
+    if not (name[0].isalpha() or name[0] == "_"):
+        return False
+    return all(c.isalnum() or c == "_" for c in name)
+
+
+# ---------------------------------------------------------------------------
+# Public entry point — called from hermes_cli.env_loader
+# ---------------------------------------------------------------------------
+
+
+def apply_bitwarden_secrets(
+    *,
+    enabled: bool,
+    access_token_env: str = "BWS_ACCESS_TOKEN",
+    project_id: str = "",
+    override_existing: bool = False,
+    cache_ttl_seconds: float = 300,
+    auto_install: bool = True,
+) -> FetchResult:
+    """Pull secrets from BSM and set them on ``os.environ``.
+
+    This is the function ``load_hermes_dotenv()`` calls after the .env
+    files have loaded.  It is intentionally defensive — any failure
+    returns a :class:`FetchResult` with ``error`` set; it never raises.
+
+    Parameters mirror the ``secrets.bitwarden.*`` config keys so the
+    caller can just splat the dict in.
+    """
+    result = FetchResult()
+
+    if not enabled:
+        return result
+
+    access_token = os.environ.get(access_token_env, "").strip()
+    if not access_token:
+        result.error = (
+            f"secrets.bitwarden.enabled is true but {access_token_env} is "
+            "not set.  Run `hermes secrets bitwarden setup`."
+        )
+        return result
+
+    if not project_id:
+        result.error = (
+            "secrets.bitwarden.project_id is empty.  "
+            "Run `hermes secrets bitwarden setup`."
+        )
+        return result
+
+    binary = find_bws(install_if_missing=auto_install)
+    result.binary_path = binary
+    if binary is None:
+        result.error = (
+            "bws binary not available and auto-install is disabled.  "
+            "Run `hermes secrets bitwarden setup` to install."
+        )
+        return result
+
+    try:
+        secrets, warnings = fetch_bitwarden_secrets(
+            access_token=access_token,
+            project_id=project_id,
+            binary=binary,
+            cache_ttl_seconds=cache_ttl_seconds,
+        )
+    except RuntimeError as exc:
+        result.error = str(exc)
+        return result
+
+    result.secrets = secrets
+    result.warnings.extend(warnings)
+
+    for key, value in secrets.items():
+        if key == access_token_env:
+            # Don't let BSM clobber the very token we used to fetch
+            # itself — that would be a footgun if someone stored the
+            # token as a BSM secret too.
+            result.skipped.append(key)
+            continue
+        if not override_existing and os.environ.get(key):
+            result.skipped.append(key)
+            continue
+        os.environ[key] = value
+        result.applied.append(key)
+
+    return result
+
+
+# ---------------------------------------------------------------------------
+# Test hook — used by hermetic tests to flush the cache between cases.
+# ---------------------------------------------------------------------------
+
+
+def _reset_cache_for_tests() -> None:
+    _CACHE.clear()
@@ -12,7 +12,7 @@ import sys
 from pathlib import Path
 from typing import Any, Dict, List, Optional, Set, Tuple

-from hermes_constants import get_config_path, get_skills_dir
+from hermes_constants import get_config_path, get_skills_dir, is_termux

 logger = logging.getLogger(__name__)

@@ -24,7 +24,43 @@ PLATFORM_MAP = {
    "windows": "win32",
 }

-EXCLUDED_SKILL_DIRS = frozenset((".git", ".github", ".hub", ".archive"))
+EXCLUDED_SKILL_DIRS = frozenset(
+    (
+        ".git",
+        ".github",
+        ".hub",
+        ".archive",
+        ".venv",
+        "venv",
+        "node_modules",
+        "site-packages",
+        "__pycache__",
+        ".tox",
+        ".nox",
+        ".pytest_cache",
+        ".mypy_cache",
+        ".ruff_cache",
+    )
+)
+
+
+def is_excluded_skill_path(path) -> bool:
+    """True if any component of *path* is in EXCLUDED_SKILL_DIRS.
+
+    Use this on every SKILL.md path produced by ``rglob`` to prune
+    dependency, virtualenv, VCS, and cache directories. Centralising the
+    check here keeps every skill-scanning site in sync with the shared
+    exclusion set.
+
+    Accepts a Path or string.
+    """
+    try:
+        parts = path.parts  # Path
+    except AttributeError:
+        from pathlib import PurePath
+        parts = PurePath(str(path)).parts
+    return any(part in EXCLUDED_SKILL_DIRS for part in parts)
+

 # ── Lazy YAML loader ─────────────────────────────────────────────────────

@@ -100,6 +136,14 @@ def skill_matches_platform(frontmatter: Dict[str, Any]) -> bool:

    If the field is absent or empty the skill is compatible with **all**
    platforms (backward-compatible default).
+
+    Termux note: on Termux/Android, ``sys.platform`` is ``"linux"`` on
+    older Pythons but became ``"android"`` on Python 3.13+. Termux is a
+    Linux userland riding on the Android kernel, so skills tagged
+    ``linux`` are treated as compatible in Termux regardless of which
+    ``sys.platform`` value Python reports. Individual Linux commands
+    inside a skill may still misbehave (no systemd, BusyBox utils, no
+    apt/dnf, etc.) but that is on the skill, not on platform gating.
    """
    platforms = frontmatter.get("platforms")
    if not platforms:
@@ -107,11 +151,21 @@ def skill_matches_platform(frontmatter: Dict[str, Any]) -> bool:
    if not isinstance(platforms, list):
        platforms = [platforms]
    current = sys.platform
+    running_in_termux = is_termux()
    for platform in platforms:
        normalized = str(platform).lower().strip()
        mapped = PLATFORM_MAP.get(normalized, normalized)
        if current.startswith(mapped):
            return True
+        # Termux runs a Linux userland on Android. Accept linux-tagged
+        # skills regardless of whether sys.platform is "linux" (pre-3.13
+        # Termux) or "android" (Python 3.13+ Termux, and any other
+        # Android runtime).
+        if running_in_termux and mapped == "linux":
+            return True
+        # Explicit termux/android tags match a Termux session too.
+        if running_in_termux and mapped in ("termux", "android"):
+            return True
    return False


@@ -478,7 +532,8 @@ def extract_skill_description(frontmatter: Dict[str, Any]) -> str:
 def iter_skill_index_files(skills_dir: Path, filename: str):
    """Walk skills_dir yielding sorted paths matching *filename*.

-    Excludes ``.git``, ``.github``, ``.hub``, ``.archive`` directories.
+    Excludes Hermes metadata, VCS, virtualenv/dependency, and cache
+    directories so dependencies cannot register nested skills.
    """
    matches = []
    for root, dirs, files in os.walk(skills_dir, followlinks=True):
@@ -130,6 +130,12 @@ def build_system_prompt_parts(agent: Any, system_message: Optional[str] = None)
    nous_subscription_prompt = _r.build_nous_subscription_prompt(agent.valid_tool_names)
    if nous_subscription_prompt:
        stable_parts.append(nous_subscription_prompt)
+
+    # App tools (500+ external integrations) behavioural guidance
+    app_tools_prompt = _r.build_app_tools_prompt(agent.valid_tool_names)
+    if app_tools_prompt:
+        stable_parts.append(app_tools_prompt)
+
    # Tool-use enforcement: tells the model to actually call tools instead
    # of describing intended actions.  Controlled by config.yaml
    # agent.tool_use_enforcement:
@@ -112,17 +112,31 @@ class ChatCompletionsTransport(ProviderTransport):
    def convert_messages(
        self, messages: list[dict[str, Any]], **kwargs
    ) -> list[dict[str, Any]]:
-        """Messages are already in OpenAI format — sanitize Codex leaks only.
+        """Messages are already in OpenAI format — strip internal fields
+        that strict chat-completions providers reject with HTTP 400/422.

-        Strips Codex Responses API fields (``codex_reasoning_items`` /
-        ``codex_message_items`` on the message, ``call_id``/``response_item_id``
-        on tool_calls) that strict chat-completions providers reject with 400/422.
+        Strips:
+
+        - Codex Responses API fields: ``codex_reasoning_items`` /
+          ``codex_message_items`` on the message, ``call_id`` /
+          ``response_item_id`` on ``tool_calls`` entries.
+        - ``tool_name`` on tool-result messages — written by
+          ``make_tool_result_message()`` for the SQLite FTS index, but not
+          part of the Chat Completions schema. Strict providers (Fireworks,
+          Moonshot/Kimi) reject any payload containing it with
+          ``Extra inputs are not permitted, field: 'messages[N].tool_name'``.
+          Permissive providers (OpenRouter, MiniMax) silently ignore the
+          field, which masked the bug for months.
        """
        needs_sanitize = False
        for msg in messages:
            if not isinstance(msg, dict):
                continue
-            if "codex_reasoning_items" in msg or "codex_message_items" in msg:
+            if (
+                "codex_reasoning_items" in msg
+                or "codex_message_items" in msg
+                or "tool_name" in msg
+            ):
                needs_sanitize = True
                break
            tool_calls = msg.get("tool_calls")
@@ -145,6 +159,7 @@ class ChatCompletionsTransport(ProviderTransport):
                continue
            msg.pop("codex_reasoning_items", None)
            msg.pop("codex_message_items", None)
+            msg.pop("tool_name", None)
            tool_calls = msg.get("tool_calls")
            if isinstance(tool_calls, list):
                for tc in tool_calls:
@@ -116,14 +116,11 @@ class ResponsesApiTransport(ProviderTransport):
        if reasoning_enabled and is_xai_responses:
            from agent.model_metadata import grok_supports_reasoning_effort

-            # NOTE: Hermes does NOT ask xAI to return ``reasoning.encrypted_content``
-            # any more.  xAI's OAuth/SuperGrok ``/v1/responses`` surface rejects
-            # replayed encrypted reasoning items on turn 2+ — see
-            # _chat_messages_to_responses_input docstring.  Requesting the field
-            # back would just have us cache something we then must strip.  Grok
-            # still reasons natively each turn; coherence across turns rides on
-            # the visible message text alone.
-            kwargs["include"] = []
+            # Ask xAI to echo back encrypted reasoning items so we can
+            # replay them on subsequent turns for cross-turn coherence.
+            # See agent/codex_responses_adapter._chat_messages_to_responses_input
+            # for the May 2026 reversal of the earlier suppression gate.
+            kwargs["include"] = ["reasoning.encrypted_content"]
            # xAI rejects `reasoning.effort` on grok-4 / grok-4-fast / grok-3
            # / grok-code-fast / grok-4.20-0309-* with HTTP 400 even though
            # those models reason natively. Only send the effort dial when
@@ -6501,12 +6501,6 @@ class HermesCLI:
        if self.agent:
            self.agent.session_id = new_session_id
            self.agent.session_start = now
-            # Redirect the JSON session log to the new branch session file so
-            # messages written after branching land in the correct file.
-            if hasattr(self.agent, "session_log_file") and hasattr(self.agent, "logs_dir"):
-                self.agent.session_log_file = (
-                    self.agent.logs_dir / f"session_{new_session_id}.json"
-                )
            self.agent.reset_session_state()
            if hasattr(self.agent, "_last_flushed_db_idx"):
                self.agent._last_flushed_db_idx = len(self.conversation_history)
@@ -10227,6 +10221,7 @@ class HermesCLI:
            self._voice_processing = True

        submitted = False
+        transcription_failed = False
        wav_path = None
        try:
            if self._voice_recorder is None:
@@ -10275,18 +10270,24 @@ class HermesCLI:
            else:
                error = result.get("error", "Unknown error")
                _cprint(f"\n{_DIM}Transcription failed: {error}{_RST}")
+                transcription_failed = True

        except Exception as e:
            _cprint(f"\n{_DIM}Voice processing error: {e}{_RST}")
+            transcription_failed = wav_path is not None
        finally:
            with self._voice_lock:
                self._voice_processing = False
            if hasattr(self, '_app') and self._app:
                self._app.invalidate()
-            # Clean up temp file
+            # Clean up temp file unless transcription failed. On failure, keep
+            # the source recording so long dictation is not lost.
            try:
                if wav_path and os.path.isfile(wav_path):
-                    os.unlink(wav_path)
+                    if transcription_failed:
+                        _cprint(f"{_DIM}Recording preserved at: {wav_path}{_RST}")
+                    else:
+                        os.unlink(wav_path)
            except Exception:
                pass

@@ -14429,13 +14430,54 @@ def main(
            # Only print the final response and parseable session info.
            cli.tool_progress_mode = "off"
            if cli._ensure_runtime_credentials():
-                effective_query = query
+                effective_query: Any = query
                if single_query_images:
-                    effective_query = cli._preprocess_images_with_vision(
-                        query,
-                        single_query_images,
-                        announce=False,
-                    )
+                    # Honour the same image-routing decision used by the
+                    # interactive path. With a vision-capable model (incl.
+                    # custom-provider models declared via
+                    # `model.supports_vision: true`), attach images natively
+                    # as image_url content parts. Otherwise fall back to the
+                    # text-pipeline (vision_analyze pre-description).
+                    _img_mode = "text"
+                    _build_parts = None
+                    try:
+                        from agent.image_routing import (
+                            build_native_content_parts as _build_parts,  # noqa: F811
+                        )
+                        from agent.image_routing import decide_image_input_mode
+                        from hermes_cli.config import load_config
+
+                        _img_mode = decide_image_input_mode(
+                            (cli.provider or "").strip(),
+                            (cli.model or "").strip(),
+                            load_config(),
+                        )
+                    except Exception:
+                        _img_mode = "text"
+
+                    if _img_mode == "native" and _build_parts is not None:
+                        try:
+                            _parts, _skipped = _build_parts(
+                                query if isinstance(query, str) else "",
+                                [str(p) for p in single_query_images],
+                            )
+                            if any(p.get("type") == "image_url" for p in _parts):
+                                effective_query = _parts
+                            else:
+                                # All images unreadable — text fallback.
+                                effective_query = cli._preprocess_images_with_vision(
+                                    query, single_query_images, announce=False,
+                                )
+                        except Exception:
+                            effective_query = cli._preprocess_images_with_vision(
+                                query, single_query_images, announce=False,
+                            )
+                    else:
+                        effective_query = cli._preprocess_images_with_vision(
+                            query,
+                            single_query_images,
+                            announce=False,
+                        )
                turn_route = cli._resolve_turn_agent_config(effective_query)
                if turn_route["signature"] != cli._active_agent_route_signature:
                    cli.agent = None
@@ -830,6 +830,8 @@ def load_gateway_config() -> GatewayConfig:
                    bridged["require_mention"] = platform_cfg["require_mention"]
                if plat == Platform.TELEGRAM and "allowed_chats" in platform_cfg:
                    bridged["allowed_chats"] = platform_cfg["allowed_chats"]
+                if plat == Platform.TELEGRAM and "group_allowed_chats" in platform_cfg:
+                    bridged["group_allowed_chats"] = platform_cfg["group_allowed_chats"]
                if plat == Platform.TELEGRAM and "allowed_topics" in platform_cfg:
                    bridged["allowed_topics"] = platform_cfg["allowed_topics"]
                if "free_response_channels" in platform_cfg:
@@ -838,6 +840,8 @@ def load_gateway_config() -> GatewayConfig:
                    bridged["mention_patterns"] = platform_cfg["mention_patterns"]
                if "exclusive_bot_mentions" in platform_cfg:
                    bridged["exclusive_bot_mentions"] = platform_cfg["exclusive_bot_mentions"]
+                if plat == Platform.TELEGRAM and "observe_unmentioned_group_messages" in platform_cfg:
+                    bridged["observe_unmentioned_group_messages"] = platform_cfg["observe_unmentioned_group_messages"]
                if "dm_policy" in platform_cfg:
                    bridged["dm_policy"] = platform_cfg["dm_policy"]
                if "allow_from" in platform_cfg:
@@ -1024,6 +1028,8 @@ def load_gateway_config() -> GatewayConfig:
                    os.environ["TELEGRAM_EXCLUSIVE_BOT_MENTIONS"] = str(telegram_cfg["exclusive_bot_mentions"]).lower()
                if "guest_mode" in telegram_cfg and not os.getenv("TELEGRAM_GUEST_MODE"):
                    os.environ["TELEGRAM_GUEST_MODE"] = str(telegram_cfg["guest_mode"]).lower()
+                if "observe_unmentioned_group_messages" in telegram_cfg and not os.getenv("TELEGRAM_OBSERVE_UNMENTIONED_GROUP_MESSAGES"):
+                    os.environ["TELEGRAM_OBSERVE_UNMENTIONED_GROUP_MESSAGES"] = str(telegram_cfg["observe_unmentioned_group_messages"]).lower()
                frc = telegram_cfg.get("free_response_chats")
                if frc is not None and not os.getenv("TELEGRAM_FREE_RESPONSE_CHATS"):
                    if isinstance(frc, list):
@@ -1074,7 +1080,7 @@ def load_gateway_config() -> GatewayConfig:
                    if isinstance(group_allowed_chats, list):
                        group_allowed_chats = ",".join(str(v) for v in group_allowed_chats)
                    os.environ["TELEGRAM_GROUP_ALLOWED_CHATS"] = str(group_allowed_chats)
-                for _telegram_extra_key in ("guest_mode", "disable_link_previews"):
+                for _telegram_extra_key in ("guest_mode", "disable_link_previews", "observe_unmentioned_group_messages"):
                    if _telegram_extra_key in telegram_cfg:
                        plat_data = platforms_data.setdefault(Platform.TELEGRAM.value, {})
                        if not isinstance(plat_data, dict):
@@ -18,6 +18,7 @@ Security features (based on OWASP + NIST SP 800-63-4 guidance):
 Storage: ~/.hermes/pairing/
 """

+import hashlib
 import json
 import os
 import secrets
@@ -148,6 +149,11 @@ class PairingStore:

    # ----- Pending codes -----

+    @staticmethod
+    def _hash_code(code: str, salt: bytes) -> str:
+        """Hash a pairing code with the given salt using SHA-256."""
+        return hashlib.sha256(salt + code.encode("utf-8")).hexdigest()
+
    def generate_code(
        self, platform: str, user_id: str, user_name: str = ""
    ) -> Optional[str]:
@@ -158,6 +164,9 @@ class PairingStore:
          - User is rate-limited (too recent request)
          - Max pending codes reached for this platform
          - User/platform is in lockout due to failed attempts
+
+        The code is NOT stored in plaintext.  Only a salted SHA-256 hash is
+        persisted so that reading the pending file does not reveal codes.
        """
        with self._lock:
            self._cleanup_expired(platform)
@@ -178,8 +187,17 @@ class PairingStore:
            # Generate cryptographically random code
            code = "".join(secrets.choice(ALPHABET) for _ in range(CODE_LENGTH))

-            # Store pending request
-            pending[code] = {
+            # Hash the code with a random salt before storing
+            salt = os.urandom(16)
+            code_hash = self._hash_code(code, salt)
+
+            # Use a unique entry id as the key (not the code itself)
+            entry_id = secrets.token_hex(8)
+
+            # Store pending request with hashed code
+            pending[entry_id] = {
+                "hash": code_hash,
+                "salt": salt.hex(),
                "user_id": user_id,
                "user_name": user_name,
                "created_at": time.time(),
@@ -195,10 +213,16 @@ class PairingStore:
        """
        Approve a pairing code. Adds the user to the approved list.

-        Returns {user_id, user_name} on success, None if code is
+        Returns ``{user_id, user_name}`` on success, ``None`` if the code is
        invalid/expired OR the platform is currently locked out after
        ``MAX_FAILED_ATTEMPTS`` failed approvals (#10195). Callers can
        disambiguate with ``_is_locked_out(platform)``.
+
+        Verification: the user-provided code is hashed with each stored
+        entry's salt and compared to the stored hash using constant-time
+        comparison. Pre-hash entries (legacy plaintext-key format from
+        pre-upgrade pending.json files) are silently ignored — they get
+        pruned at TTL by ``_cleanup_expired``.
        """
        with self._lock:
            self._cleanup_expired(platform)
@@ -213,34 +237,73 @@ class PairingStore:
                return None

            pending = self._load_json(self._pending_path(platform))
-            if code not in pending:
+
+            # Find the entry whose hash matches the provided code.
+            # Tolerate legacy plaintext-key entries (no salt/hash) and
+            # malformed entries — skip them rather than KeyError, so an
+            # in-place upgrade across an existing pending.json doesn't
+            # crash on the first approve call. Legacy entries get pruned
+            # at their TTL by _cleanup_expired.
+            matched_key = None
+            matched_entry = None
+            for entry_id, entry in pending.items():
+                if not isinstance(entry, dict):
+                    continue
+                if "salt" not in entry or "hash" not in entry:
+                    continue
+                try:
+                    salt = bytes.fromhex(entry["salt"])
+                except ValueError:
+                    continue
+                candidate_hash = self._hash_code(code, salt)
+                if secrets.compare_digest(candidate_hash, entry["hash"]):
+                    matched_key = entry_id
+                    matched_entry = entry
+                    break
+
+            if matched_key is None:
                self._record_failed_attempt(platform)
                return None

-            entry = pending.pop(code)
+            del pending[matched_key]
            self._save_json(self._pending_path(platform), pending)

            # Add to approved list
-            self._approve_user(platform, entry["user_id"], entry.get("user_name", ""))
+            self._approve_user(platform, matched_entry["user_id"],
+                               matched_entry.get("user_name", ""))

            return {
-                "user_id": entry["user_id"],
-                "user_name": entry.get("user_name", ""),
+                "user_id": matched_entry["user_id"],
+                "user_name": matched_entry.get("user_name", ""),
            }

    def list_pending(self, platform: str = None) -> list:
-        """List pending pairing requests, optionally filtered by platform."""
+        """List pending pairing requests, optionally filtered by platform.
+
+        Codes are stored hashed — the ``code`` field is replaced with the
+        first 8 hex characters of the hash so admins can distinguish entries
+        without revealing the original code. Legacy plaintext-key entries
+        (pre-hash format) are shown with a "legacy" placeholder so admins
+        can see them age out without crashing on a missing ``hash`` field.
+        """
        results = []
        platforms = [platform] if platform else self._all_platforms("pending")
        for p in platforms:
            self._cleanup_expired(p)
            pending = self._load_json(self._pending_path(p))
-            for code, info in pending.items():
-                age_min = int((time.time() - info["created_at"]) / 60)
+            for entry_id, info in pending.items():
+                if not isinstance(info, dict):
+                    continue
+                created_at = info.get("created_at")
+                if not isinstance(created_at, (int, float)):
+                    continue
+                age_min = int((time.time() - created_at) / 60)
+                hash_val = info.get("hash")
+                code_display = hash_val[:8] if isinstance(hash_val, str) else "legacy"
                results.append({
                    "platform": p,
-                    "code": code,
-                    "user_id": info["user_id"],
+                    "code": code_display,
+                    "user_id": info.get("user_id", ""),
                    "user_name": info.get("user_name", ""),
                    "age_minutes": age_min,
                })
@@ -297,17 +360,29 @@ class PairingStore:
    # ----- Cleanup -----

    def _cleanup_expired(self, platform: str) -> None:
-        """Remove expired pending codes."""
+        """Remove expired pending codes.
+
+        Tolerant of malformed / legacy entries — anything without a numeric
+        ``created_at`` is treated as expired (it's effectively unusable
+        with the new hash-keyed schema anyway).
+        """
        path = self._pending_path(platform)
        pending = self._load_json(path)
        now = time.time()
-        expired = [
-            code for code, info in pending.items()
-            if (now - info["created_at"]) > CODE_TTL_SECONDS
-        ]
+        expired = []
+        for entry_id, info in pending.items():
+            if not isinstance(info, dict):
+                expired.append(entry_id)
+                continue
+            created_at = info.get("created_at")
+            if not isinstance(created_at, (int, float)):
+                expired.append(entry_id)
+                continue
+            if (now - created_at) > CODE_TTL_SECONDS:
+                expired.append(entry_id)
        if expired:
-            for code in expired:
-                del pending[code]
+            for entry_id in expired:
+                del pending[entry_id]
            self._save_json(path, pending)

    def _all_platforms(self, suffix: str) -> list:
@@ -2706,8 +2706,13 @@ class DiscordAdapter(BasePlatformAdapter):

        Discord's TYPING_START gateway event is unreliable in DMs for bots.
        Instead, start a background loop that hits the typing endpoint every
-        8 seconds (typing indicator lasts ~10s).  The loop is cancelled when
+        12 seconds (typing indicator lasts ~10s).  The loop is cancelled when
        stop_typing() is called (after the response is sent).
+
+        Rate-limit handling: if a 429 is encountered, the loop logs a
+        warning, sleeps for the ``retry_after`` duration (or a sensible
+        default), and continues — it does NOT die on a single rate-limit
+        hit.  Only CancelledError (from stop_typing) stops the loop.
        """
        if not self._client:
            return
@@ -2727,9 +2732,22 @@ class DiscordAdapter(BasePlatformAdapter):
                    except asyncio.CancelledError:
                        return
                    except Exception as e:
-                        logger.debug("Discord typing indicator failed for %s: %s", chat_id, e)
-                        return
-                    await asyncio.sleep(8)
+                        # Don't die on 429 — backoff and continue
+                        retry_after = self._extract_discord_retry_after(e)
+                        if retry_after is not None:
+                            logger.warning(
+                                "Typing indicator rate-limited for %s; retrying in %.1fs",
+                                chat_id, retry_after,
+                            )
+                        else:
+                            logger.debug(
+                                "Discord typing indicator failed for %s: %s",
+                                chat_id, e,
+                            )
+                            return
+                        await asyncio.sleep(retry_after)
+                        continue
+                    await asyncio.sleep(12)
            except asyncio.CancelledError:
                pass
            finally:
@@ -8,12 +8,14 @@ Uses python-telegram-bot library for:
 """

 import asyncio
+import dataclasses
 import json
 import logging
 import os
 import tempfile
 import html as _html
 import re
+from datetime import datetime, timezone
 from typing import Dict, List, Optional, Any

 logger = logging.getLogger(__name__)
@@ -4178,6 +4180,23 @@ class TelegramAdapter(BasePlatformAdapter):
            return bool(configured)
        return os.getenv("TELEGRAM_REQUIRE_MENTION", "false").lower() in {"true", "1", "yes", "on"}

+    def _telegram_observe_unmentioned_group_messages(self) -> bool:
+        """Return whether skipped unmentioned group messages are stored as context.
+
+        When enabled with ``require_mention``, Telegram matches the Yuanbao /
+        OpenClaw-style group UX: observe ordinary group chatter in the session
+        transcript, but only dispatch the agent when the bot is explicitly
+        addressed.
+        """
+        configured = self.config.extra.get("observe_unmentioned_group_messages")
+        if configured is None:
+            configured = self.config.extra.get("ingest_unmentioned_group_messages")
+        if configured is not None:
+            if isinstance(configured, str):
+                return configured.lower() in {"true", "1", "yes", "on"}
+            return bool(configured)
+        return os.getenv("TELEGRAM_OBSERVE_UNMENTIONED_GROUP_MESSAGES", "false").lower() in {"true", "1", "yes", "on"}
+
    def _telegram_guest_mode(self) -> bool:
        """Return whether non-allowlisted groups may trigger via direct @mention."""
        configured = self.config.extra.get("guest_mode")
@@ -4219,6 +4238,30 @@ class TelegramAdapter(BasePlatformAdapter):
            return {str(part).strip() for part in raw if str(part).strip()}
        return {part.strip() for part in str(raw).split(",") if part.strip()}

+    def _telegram_group_allowed_chats(self) -> set[str]:
+        """Return Telegram chats authorized at group scope."""
+        raw = self.config.extra.get("group_allowed_chats")
+        if raw is None:
+            raw = os.getenv("TELEGRAM_GROUP_ALLOWED_CHATS", "")
+        if isinstance(raw, list):
+            return {str(part).strip() for part in raw if str(part).strip()}
+        return {part.strip() for part in str(raw).split(",") if part.strip()}
+
+    def _telegram_observe_allowed_chats(self) -> set[str]:
+        """Chats where observed group context may use a shared source.
+
+        ``group_allowed_chats`` is the gateway authorization allowlist for
+        user-less group sources.  ``allowed_chats`` remains an optional response
+        gate; when set, observed context must satisfy both lists.
+        """
+        group_allowed = self._telegram_group_allowed_chats()
+        if not group_allowed:
+            return set()
+        response_allowed = self._telegram_allowed_chats()
+        if response_allowed:
+            return group_allowed & response_allowed
+        return group_allowed
+
    def _telegram_allowed_topics(self) -> set[str]:
        """Return the whitelist of Telegram forum topic IDs this bot handles.

@@ -4466,6 +4509,126 @@ class TelegramAdapter(BasePlatformAdapter):
        cleaned = re.sub(rf"(?i)@{username}\b[,:\-]*\s*", "", text).strip()
        return cleaned or text

+    def _should_observe_unmentioned_group_message(self, message: Message) -> bool:
+        """Return True when a group message should be stored but not dispatched."""
+        if not self._telegram_observe_unmentioned_group_messages():
+            return False
+        if not self._is_group_chat(message):
+            return False
+
+        thread_id = getattr(message, "message_thread_id", None)
+        allowed_topics = self._telegram_allowed_topics()
+        if allowed_topics:
+            topic_id = str(thread_id) if thread_id is not None else self._GENERAL_TOPIC_THREAD_ID
+            if topic_id not in allowed_topics:
+                return False
+
+        if thread_id is not None:
+            try:
+                if int(thread_id) in self._telegram_ignored_threads():
+                    return False
+            except (TypeError, ValueError):
+                return False
+
+        chat_id_str = str(getattr(getattr(message, "chat", None), "id", ""))
+        if self._telegram_exclusive_bot_mentions() and self._explicit_bot_mentions_exclude_self(message):
+            return False
+
+        allowed = self._telegram_observe_allowed_chats()
+        # Observed context is shared at chat/topic scope so a later trigger from
+        # another user can see it.  Require an explicit chat allowlist; that
+        # keeps shared observed history limited to operator-approved groups and
+        # lets gateway authorization pass even after the shared session source
+        # drops the per-sender user_id.
+        if not allowed or chat_id_str not in allowed:
+            return False
+
+        # Only observe messages skipped by the require_mention gate.  If the
+        # message would be processed normally, let the dispatcher handle it;
+        # if require_mention is disabled, every group message is a request.
+        if chat_id_str in self._telegram_free_response_chats():
+            return False
+        if not self._telegram_require_mention():
+            return False
+        if self._is_reply_to_bot(message):
+            return False
+        if self._message_mentions_bot(message):
+            return False
+        if self._message_matches_mention_patterns(message):
+            return False
+        return True
+
+    def _telegram_group_observe_shared_source(self, source):
+        """Return a chat/topic-scoped source for observed Telegram group context."""
+        return dataclasses.replace(source, user_id=None, user_name=None, user_id_alt=None)
+
+    def _telegram_group_observe_attributed_text(self, event: MessageEvent) -> str:
+        user_id = event.source.user_id or "unknown"
+        sender = event.source.user_name or user_id
+        return f"[{sender}|{user_id}]\n{event.text or ''}"
+
+    def _telegram_group_observe_channel_prompt(self) -> str:
+        username = getattr(getattr(self, "_bot", None), "username", None) or "unknown"
+        bot_id = getattr(getattr(self, "_bot", None), "id", None) or "unknown"
+        return (
+            "You are handling a Telegram group chat message.\n"
+            f"- Your identity: user_id={bot_id}, @-mention name in this group=@{username}\n"
+            "- Lines in history prefixed with `[nickname|user_id]` are observed Telegram group context "
+            "and are not necessarily addressed to you.\n"
+            "- Treat only the current new message as a request explicitly directed at you, "
+            "and answer it directly."
+        )
+
+    def _apply_telegram_group_observe_attribution(self, event: MessageEvent) -> MessageEvent:
+        """Align triggered group turns with observed-history attribution."""
+        if not self._telegram_observe_unmentioned_group_messages():
+            return event
+        raw_message = getattr(event, "raw_message", None)
+        if not raw_message or not self._is_group_chat(raw_message):
+            return event
+        chat_id_str = str(getattr(getattr(raw_message, "chat", None), "id", ""))
+        allowed = self._telegram_observe_allowed_chats()
+        if not allowed or chat_id_str not in allowed:
+            return event
+        shared_source = self._telegram_group_observe_shared_source(event.source)
+        observe_prompt = self._telegram_group_observe_channel_prompt()
+        channel_prompt = f"{event.channel_prompt}\n\n{observe_prompt}" if event.channel_prompt else observe_prompt
+        return dataclasses.replace(
+            event,
+            text=self._telegram_group_observe_attributed_text(event),
+            source=shared_source,
+            channel_prompt=channel_prompt,
+        )
+
+    def _observe_unmentioned_group_message(self, message: Message, msg_type: MessageType, update_id: Optional[int] = None) -> None:
+        """Append skipped group chatter to the target session without dispatching."""
+        store = getattr(self, "_session_store", None)
+        if not store:
+            return
+        try:
+            event = self._build_message_event(message, msg_type, update_id=update_id)
+            shared_source = self._telegram_group_observe_shared_source(event.source)
+            session_entry = store.get_or_create_session(shared_source)
+            entry = {
+                "role": "user",
+                "content": self._telegram_group_observe_attributed_text(event),
+                "timestamp": datetime.now(tz=timezone.utc).isoformat(),
+                "observed": True,
+            }
+            if event.message_id:
+                entry["message_id"] = str(event.message_id)
+            store.append_to_transcript(session_entry.session_id, entry)
+            adapter_name = getattr(self, "name", "telegram")
+            logger.info(
+                "[%s] Telegram group message observed (no bot trigger): chat=%s from=%s",
+                adapter_name,
+                getattr(getattr(message, "chat", None), "id", "unknown"),
+                event.source.user_id or "unknown",
+            )
+        except Exception as exc:
+            adapter_name = getattr(self, "name", "telegram")
+            logger.warning("[%s] Failed to observe Telegram group message: %s", adapter_name, exc)
+
    def _should_process_message(self, message: Message, *, is_command: bool = False) -> bool:
        """Apply Telegram group trigger rules.

@@ -4590,11 +4753,14 @@ class TelegramAdapter(BasePlatformAdapter):
        if not msg or not msg.text:
            return
        if not self._should_process_message(msg):
+            if self._should_observe_unmentioned_group_message(msg):
+                self._observe_unmentioned_group_message(msg, MessageType.TEXT, update_id=update.update_id)
            return
        await self._ensure_forum_commands(update.message)

        event = self._build_message_event(msg, MessageType.TEXT, update_id=update.update_id)
        event.text = self._clean_bot_trigger_text(event.text)
+        event = self._apply_telegram_group_observe_attribution(event)
        self._enqueue_text_event(event)

    async def _handle_command(self, update: Update, context: ContextTypes.DEFAULT_TYPE) -> None:
@@ -4607,6 +4773,8 @@ class TelegramAdapter(BasePlatformAdapter):
        await self._ensure_forum_commands(msg)

        event = self._build_message_event(msg, MessageType.COMMAND, update_id=update.update_id)
+        event.text = self._clean_bot_trigger_text(event.text)
+        event = self._apply_telegram_group_observe_attribution(event)
        await self.handle_message(event)

    async def _handle_location_message(self, update: Update, context: ContextTypes.DEFAULT_TYPE) -> None:
@@ -4615,6 +4783,8 @@ class TelegramAdapter(BasePlatformAdapter):
        if not msg:
            return
        if not self._should_process_message(msg):
+            if self._should_observe_unmentioned_group_message(msg):
+                self._observe_unmentioned_group_message(msg, MessageType.LOCATION, update_id=update.update_id)
            return

        venue = getattr(msg, "venue", None)
@@ -4644,6 +4814,7 @@ class TelegramAdapter(BasePlatformAdapter):

        event = self._build_message_event(msg, MessageType.LOCATION, update_id=update.update_id)
        event.text = "\n".join(parts)
+        event = self._apply_telegram_group_observe_attribution(event)
        await self.handle_message(event)

    # ------------------------------------------------------------------
@@ -4788,8 +4959,23 @@ class TelegramAdapter(BasePlatformAdapter):
        if not update.message:
            return
        if not self._should_process_message(update.message):
+            if self._should_observe_unmentioned_group_message(update.message):
+                _m = update.message
+                if _m.sticker:
+                    _observe_type = MessageType.STICKER
+                elif _m.photo:
+                    _observe_type = MessageType.PHOTO
+                elif _m.video:
+                    _observe_type = MessageType.VIDEO
+                elif _m.audio:
+                    _observe_type = MessageType.AUDIO
+                elif _m.voice:
+                    _observe_type = MessageType.VOICE
+                else:
+                    _observe_type = MessageType.DOCUMENT
+                self._observe_unmentioned_group_message(_m, _observe_type, update_id=update.update_id)
            return
-        
+
        msg = update.message
        
        # Determine media type
@@ -4817,9 +5003,14 @@ class TelegramAdapter(BasePlatformAdapter):
        # Handle stickers: describe via vision tool with caching
        if msg.sticker:
            await self._handle_sticker(msg, event)
+            event = self._apply_telegram_group_observe_attribution(event)
            await self.handle_message(event)
            return
-        
+
+        # Apply observe attribution after caption is set; sticker is handled above
+        # because _handle_sticker overwrites event.text with its vision description.
+        event = self._apply_telegram_group_observe_attribution(event)
+
        # Download photo to local image cache so the vision tool can access it
        # even after Telegram's ephemeral file URLs expire (~1 hour).
        if msg.photo:
@@ -308,11 +308,26 @@ class WebhookAdapter(BasePlatformAdapter):
            data = json.loads(subs_path.read_text(encoding="utf-8"))
            if not isinstance(data, dict):
                return
-            # Merge: static routes take precedence over dynamic ones
-            self._dynamic_routes = {
-                k: v for k, v in data.items()
-                if k not in self._static_routes
-            }
+            # Merge: static routes take precedence over dynamic ones.
+            # Reject any dynamic route whose effective secret is empty —
+            # an empty secret would cause _handle_webhook to skip HMAC
+            # validation entirely, letting unauthenticated callers in.
+            new_dynamic: Dict[str, dict] = {}
+            for k, v in data.items():
+                if k in self._static_routes:
+                    continue
+                effective_secret = v.get("secret", self._global_secret)
+                if not effective_secret:
+                    logger.warning(
+                        "[webhook] Dynamic route '%s' skipped: 'secret' is "
+                        "missing or empty. Set a valid HMAC secret, or use "
+                        "'%s' to explicitly disable auth (testing only).",
+                        k,
+                        _INSECURE_NO_AUTH,
+                    )
+                    continue
+                new_dynamic[k] = v
+            self._dynamic_routes = new_dynamic
            self._routes = {**self._dynamic_routes, **self._static_routes}
            self._dynamic_routes_mtime = mtime
            logger.info(
@@ -1410,33 +1410,43 @@ class RecallGuardMiddleware(InboundMiddleware):
            logger.warning("[%s] Recall: failed to resolve session: %s", adapter.name, exc)
            return

-        # Load transcript from canonical store (state.db). See Branch A below
-        # for why we can no longer match by platform `message_id`.
+        # Load transcript from canonical store (state.db).  Since PR #29278
+        # added a ``platform_message_id`` column to the messages table and
+        # ``append_to_transcript`` wires the incoming dict's ``message_id``
+        # into it, ``load_transcript`` returns rows with ``message_id`` set
+        # for any message that was observed with one — Branch A1 (exact id
+        # match) is the canonical path again.
        try:
            transcript = store.load_transcript(sid)
        except Exception as exc:
            logger.warning("[%s] Recall: failed to load transcript: %s", adapter.name, exc)
            return

-        # Branch A: content-match redaction. state.db does NOT preserve the
-        # platform `message_id` (only its own autoincrement primary key), so we
-        # cannot redact by exact id. Match by content instead. Most yuanbao
-        # recalls carry the recalled text via `recalled_content`, which is
-        # sufficient for any non-duplicate message.
-        #
-        # TODO: add a `platform_message_id` column to state.db messages to
-        # restore exact-id matching. Tracked separately.
+        # Branch A1: exact platform message_id match. Authoritative when the
+        # row was persisted with a platform_message_id (observed group
+        # messages and any inbound message whose adapter carried a msg_id).
        target = None
-        if recalled_content:
+        branch_label = ""
+        for entry in transcript:
+            if entry.get("message_id") == recalled_id:
+                target = entry
+                branch_label = "branch A1: id match"
+                break
+        # Branch A2: content-match fallback for messages that lack an exact
+        # platform id on the row — e.g. agent-processed @bot messages
+        # (run.py doesn't carry msg_id through) or older rows persisted
+        # before the platform_message_id column existed.
+        if target is None and recalled_content:
            for entry in transcript:
                if entry.get("role") == "user" and entry.get("content") == recalled_content:
                    target = entry
+                    branch_label = "branch A2: content match"
                    break
        if target is not None:
            target["content"] = cls._REDACTED
            try:
                store.rewrite_transcript(sid, transcript)
-                logger.info("[%s] Recall: redacted msg_id=%s (branch A: content match)", adapter.name, recalled_id)
+                logger.info("[%s] Recall: redacted msg_id=%s (%s)", adapter.name, recalled_id, branch_label)
            except Exception as exc:
                logger.warning("[%s] Recall: rewrite_transcript failed: %s", adapter.name, exc)
            return
@@ -1109,7 +1109,7 @@ def _check_unavailable_skill(command_name: str) -> str | None:
    normalized = command_name.lower().replace("_", "-")
    try:
        from tools.skills_tool import _get_disabled_skill_names
-        from agent.skill_utils import get_all_skills_dirs
+        from agent.skill_utils import get_all_skills_dirs, is_excluded_skill_path
        disabled = _get_disabled_skill_names()

        # Check disabled skills across all dirs (local + external)
@@ -1117,7 +1117,7 @@ def _check_unavailable_skill(command_name: str) -> str | None:
            if not skills_dir.exists():
                continue
            for skill_md in skills_dir.rglob("SKILL.md"):
-                if any(part in {'.git', '.github', '.hub', '.archive'} for part in skill_md.parts):
+                if is_excluded_skill_path(skill_md):
                    continue
                slug, declared_name = _skill_slug_from_frontmatter(skill_md)
                if not slug or not declared_name:
@@ -1136,6 +1136,8 @@ def _check_unavailable_skill(command_name: str) -> str | None:
        optional_dir = get_optional_skills_dir(repo_root / "optional-skills")
        if optional_dir.exists():
            for skill_md in optional_dir.rglob("SKILL.md"):
+                if is_excluded_skill_path(skill_md):
+                    continue
                slug, _declared = _skill_slug_from_frontmatter(skill_md)
                if not slug:
                    continue
@@ -1271,6 +1271,12 @@ class SessionStore:
                    reasoning_details=message.get("reasoning_details") if message.get("role") == "assistant" else None,
                    codex_reasoning_items=message.get("codex_reasoning_items") if message.get("role") == "assistant" else None,
                    codex_message_items=message.get("codex_message_items") if message.get("role") == "assistant" else None,
+                    # Platform-side message id (yuanbao msg_id, telegram update_id, …).
+                    # Accept either explicit ``platform_message_id`` or the legacy
+                    # ``message_id`` key the JSONL transcript used.
+                    platform_message_id=(
+                        message.get("platform_message_id") or message.get("message_id")
+                    ),
                )
            except Exception as e:
                logger.debug("Session DB operation failed: %s", e)
@@ -48,7 +48,7 @@ import httpx
 import yaml

 from hermes_cli.config import get_hermes_home, get_config_path, read_raw_config
-from hermes_constants import OPENROUTER_BASE_URL
+from hermes_constants import OPENROUTER_BASE_URL, secure_parent_dir
 from utils import atomic_replace, atomic_yaml_write, is_truthy_value

 logger = logging.getLogger(__name__)
@@ -1030,10 +1030,8 @@ def _save_auth_store(auth_store: Dict[str, Any]) -> Path:
    auth_file.parent.mkdir(parents=True, exist_ok=True)
    # Tighten parent dir to 0o700 so siblings can't traverse to creds.
    # No-op on Windows (POSIX mode bits not enforced); ignore failures.
-    try:
-        os.chmod(auth_file.parent, 0o700)
-    except OSError:
-        pass
+    # secure_parent_dir refuses to chmod / or top-level dirs (#25821).
+    secure_parent_dir(auth_file)
    auth_store["version"] = AUTH_STORE_VERSION
    auth_store["updated_at"] = datetime.now(timezone.utc).isoformat()
    payload = json.dumps(auth_store, indent=2) + "\n"
@@ -1863,10 +1861,8 @@ def _read_qwen_cli_tokens() -> Dict[str, Any]:
 def _save_qwen_cli_tokens(tokens: Dict[str, Any]) -> Path:
    auth_path = _qwen_cli_auth_path()
    auth_path.parent.mkdir(parents=True, exist_ok=True)
-    try:
-        os.chmod(auth_path.parent, 0o700)
-    except OSError:
-        pass
+    # secure_parent_dir refuses to chmod / or top-level dirs (#25821).
+    secure_parent_dir(auth_path)
    # Per-process random temp suffix avoids collisions between concurrent
    # writers and stale leftovers from a crashed prior write.
    tmp_path = auth_path.with_name(f"{auth_path.name}.tmp.{os.getpid()}.{uuid.uuid4().hex}")
@@ -4168,10 +4164,8 @@ def _write_shared_nous_state(state: Dict[str, Any]) -> None:
        with _nous_shared_store_lock():
            path = _nous_shared_store_path()
            path.parent.mkdir(parents=True, exist_ok=True)
-            try:
-                os.chmod(path.parent, 0o700)
-            except OSError:
-                pass
+            # secure_parent_dir refuses to chmod / or top-level dirs (#25821).
+            secure_parent_dir(path)
            tmp = path.with_name(f"{path.name}.tmp.{os.getpid()}.{uuid.uuid4().hex}")
            # Create with 0o600 atomically via os.open(O_EXCL) — closes the TOCTOU
            # window where write_text() + post-write chmod briefly exposed Nous
@@ -508,6 +508,68 @@ def telegram_bot_commands() -> list[tuple[str, str]]:
    return result


+_TELEGRAM_MENU_PRIORITY = (
+    # Most-typed everyday commands first.
+    "help",
+    "new",
+    "stop",
+    "status",
+    "resume",
+    "sessions",
+    "model",
+    # Maintenance / diagnostics — the ones that prompted this priority list.
+    "debug",
+    "restart",
+    "update",
+    "verbose",
+    "commands",
+    # Mid-turn session control.
+    "approve",
+    "deny",
+    "queue",
+    "steer",
+    "background",
+    # Lower-priority but still useful operational built-ins.
+    "reasoning",
+    "usage",
+    "platforms",
+    "platform",
+    "profile",
+    "whoami",
+)
+"""Built-in commands that should stay visible in Telegram's capped menu.
+
+Telegram only displays a small BotCommand menu in practice.  The full Hermes
+registry is still dispatchable when typed manually, but operational commands
+need to survive the visible menu cap ahead of lower-priority built-ins.
+"""
+
+
+def _prioritize_telegram_menu_commands(
+    commands: list[tuple[str, str]],
+) -> list[tuple[str, str]]:
+    priority = {
+        _sanitize_telegram_name(name): index
+        for index, name in enumerate(_TELEGRAM_MENU_PRIORITY)
+    }
+    return [
+        command
+        for _index, command in sorted(
+            enumerate(commands),
+            key=lambda item: (
+                0,
+                priority[item[1][0]],
+                item[0],
+            )
+            if item[1][0] in priority
+            else (
+                1,
+                item[0],
+            ),
+        )
+    ]
+
+
 _CMD_NAME_LIMIT = 32
 """Max command name length shared by Telegram and Discord."""

@@ -721,11 +783,12 @@ def telegram_menu_commands(max_commands: int = 100) -> tuple[list[tuple[str, str

    Returns:
        (menu_commands, hidden_count) where hidden_count is the number of
-        skill commands omitted due to the cap.
+        commands omitted due to the cap.
    """
-    core_commands = list(telegram_bot_commands())
+    core_commands = _prioritize_telegram_menu_commands(list(telegram_bot_commands()))
    reserved_names = {n for n, _ in core_commands}
    all_commands = list(core_commands)
+    hidden_core_count = max(0, len(all_commands) - max_commands)

    remaining_slots = max(0, max_commands - len(all_commands))
    entries, hidden_count = _collect_gateway_skill_entries(
@@ -737,7 +800,7 @@ def telegram_menu_commands(max_commands: int = 100) -> tuple[list[tuple[str, str
    )
    # Drop the cmd_key — Telegram only needs (name, desc) pairs.
    all_commands.extend((n, d) for n, d, _k in entries)
-    return all_commands[:max_commands], hidden_count
+    return all_commands[:max_commands], hidden_count + hidden_core_count


 def discord_skill_commands(
@@ -1648,6 +1648,15 @@ DEFAULT_CONFIG = {
        # the sweep on every CLI invocation).  Tracked via state_meta in
        # state.db itself, so it's shared across all processes.
        "min_interval_hours": 24,
+        # Legacy per-session JSON snapshot writer.  When true, the agent
+        # rewrites ``~/.hermes/sessions/session_{sid}.json`` on every turn
+        # boundary with the full message list.  state.db is canonical and
+        # has every field the snapshot stored (plus per-message timestamps
+        # and token counts), so this is off by default — the snapshots had
+        # no consumer outside their own overwrite guard and accumulated
+        # GBs of disk on heavy users.  Opt in only if you have an external
+        # tool that consumes the JSON files directly.
+        "write_json_snapshots": False,
    },

    # Contextual first-touch onboarding hints (see agent/onboarding.py).
@@ -1738,8 +1747,48 @@ DEFAULT_CONFIG = {
        "retries": 2,
    },

+    # =========================================================================
+    # External secret sources
+    # =========================================================================
+    # Pull credentials from external secret managers at process startup
+    # rather than storing them in ~/.hermes/.env.
+    "secrets": {
+        "bitwarden": {
+            # Master switch.  When false, BSM is never contacted and the
+            # bws binary is never auto-installed — same as not having
+            # this section at all.
+            "enabled": False,
+            # Name of the env var that holds the Bitwarden machine-account
+            # access token.  This is the one bootstrap secret; it lives
+            # in ~/.hermes/.env (or your shell) and never in config.yaml.
+            "access_token_env": "BWS_ACCESS_TOKEN",
+            # UUID of the BSM project to sync from.
+            "project_id": "",
+            # Seconds to cache fetched secrets in-process.  0 disables.
+            "cache_ttl_seconds": 300,
+            # When True, BSM values overwrite existing env vars.  Default
+            # True because the point of using BSM is centralized rotation —
+            # if .env had the final say, rotating in Bitwarden wouldn't
+            # take effect until you also cleared the matching .env line.
+            "override_existing": True,
+            # When True, the bws binary is auto-downloaded into
+            # ~/.hermes/bin/ on first use.  When False you must install
+            # bws yourself and have it on PATH.
+            "auto_install": True,
+        },
+    },
+
+    # ── Nous Portal feature flags ──────────────────────────────────────
+    "portal": {
+        # App tools: 500+ external app integrations (Gmail, Slack, GitHub,
+        # Notion, etc.) via the Nous tool gateway.  Requires an active Nous
+        # subscription.  Set to False to hide the app_tools toolset even
+        # when a subscription is present.
+        "app_tools": True,
+    },
+
    # Config schema version - bump this when adding new required fields
-    "_config_version": 23,
+    "_config_version": 24,
 }

 # =============================================================================
@@ -2227,6 +2276,22 @@ OPTIONAL_ENV_VARS = {
        "category": "tool",
        "advanced": True,
    },
+    "TOOLS_GATEWAY_URL": {
+        "description": "Explicit URL for the tools-gateway (app integrations). Overrides the auto-derived tools-gateway.nousresearch.com",
+        "prompt": "Tools-gateway URL",
+        "url": None,
+        "password": False,
+        "category": "tool",
+        "advanced": True,
+    },
+    "PORTAL_APP_TOOLS": {
+        "description": "Enable app integration tools (500+ apps via Nous tool gateway). Requires Nous subscription.",
+        "prompt": "Enable app tools (500+ apps)",
+        "url": None,
+        "password": False,
+        "category": "tool",
+        "advanced": True,
+    },
    "TAVILY_API_KEY": {
        "description": "Tavily API key for AI-native web search, extract, and crawl",
        "prompt": "Tavily API key",
@@ -3008,7 +3073,7 @@ def _normalize_custom_provider_entry(
        "api_mode", "transport", "model", "default_model", "models",
        "context_length", "rate_limit_delay",
        "request_timeout_seconds", "stale_timeout_seconds",
-        "discover_models",
+        "discover_models", "extra_body",
    }
    for camel, snake in _CAMEL_ALIASES.items():
        if camel in entry and snake not in entry:
@@ -3103,6 +3168,10 @@ def _normalize_custom_provider_entry(
    if isinstance(discover_models, bool):
        normalized["discover_models"] = discover_models

+    extra_body = entry.get("extra_body")
+    if isinstance(extra_body, dict):
+        normalized["extra_body"] = dict(extra_body)
+
    return normalized


@@ -3257,13 +3326,13 @@ _KNOWN_ROOT_KEYS = {
    "fallback_providers", "credential_pool_strategies", "toolsets",
    "agent", "terminal", "display", "compression", "delegation",
    "auxiliary", "custom_providers", "context", "memory", "gateway",
-    "sessions",
+    "sessions", "portal",
 }

 # Valid fields inside a custom_providers list entry
 _VALID_CUSTOM_PROVIDER_FIELDS = {
    "name", "base_url", "api_key", "api_mode", "model", "models",
-    "context_length", "rate_limit_delay",
+    "context_length", "rate_limit_delay", "extra_body",
    # key_env is read at runtime by runtime_provider.py and auxiliary_client.py
    # — include it here so the set accurately describes the supported schema.
    "key_env",
@@ -3920,6 +3989,26 @@ def migrate_config(interactive: bool = True, quiet: bool = False) -> Dict[str, A
                        f"{', '.join(added_aux)}"
                    )

+    # ── Version 23 → 24: inject app_tools into saved platform_toolsets ──
+    # The portal.app_tools config flag is handled by deep-merge (DEFAULT_CONFIG
+    # has it, so load_config() always includes it). But platform_toolsets are
+    # user-owned lists that deep-merge can't append to — existing users who
+    # ran `hermes tools` have a saved list that won't include app_tools.
+    if current_ver < 24:
+        config = read_raw_config()
+        pt = config.get("platform_toolsets")
+        if isinstance(pt, dict):
+            patched = False
+            for plat_key, ts_list in pt.items():
+                if isinstance(ts_list, list) and "app_tools" not in ts_list:
+                    ts_list.append("app_tools")
+                    patched = True
+            if patched:
+                save_config(config)
+                results["config_added"].append("app_tools added to platform_toolsets")
+                if not quiet:
+                    print("  ✓ Added app_tools to saved platform toolset lists")
+
    if current_ver < latest_ver and not quiet:
        print(f"Config version: {current_ver} → {latest_ver}")
    
@@ -71,7 +71,7 @@ def curses_checklist(
                curses.use_default_colors()
                curses.init_pair(1, curses.COLOR_GREEN, -1)
                curses.init_pair(2, curses.COLOR_YELLOW, -1)
-                curses.init_pair(3, 8, -1)  # dim gray
+                curses.init_pair(3, 8 if curses.COLORS > 8 else curses.COLOR_WHITE, -1)  # dim gray
            cursor = 0
            scroll_offset = 0

@@ -777,7 +777,33 @@ def run_doctor(args):
        except Exception:
            pass

+    _section("xAI Model Retirement (May 15, 2026)")
+
+    try:
+        from hermes_cli.config import load_config
+        from hermes_cli.xai_retirement import (
+            MIGRATION_GUIDE_URL,
+            find_retired_xai_refs,
+            format_issue,
+        )
+
+        _xai_cfg = load_config()
+        retired_refs = find_retired_xai_refs(_xai_cfg)
+        if not retired_refs:
+            check_ok("No retired xAI models in config")
+        else:
+            for ref in retired_refs:
+                check_warn(format_issue(ref))
+            check_info(f"Migration guide: {MIGRATION_GUIDE_URL}")
+            manual_issues.append(
+                f"Update {len(retired_refs)} retired xAI model reference(s) "
+                f"in config.yaml — see {MIGRATION_GUIDE_URL}"
+            )
+    except Exception as _xai_check_err:
+        check_warn("xAI retirement check skipped", f"({_xai_check_err})")
+
    _section("Auth Providers")
+
    try:
        from hermes_cli.auth import (
            get_nous_auth_status,
@@ -16,6 +16,7 @@ from pathlib import Path
 from hermes_cli.config import get_hermes_home, get_env_path, get_project_root, load_config
 from hermes_cli.env_loader import load_hermes_dotenv
 from hermes_constants import display_hermes_home
+from agent.skill_utils import is_excluded_skill_path


 def _get_git_commit(project_root: Path) -> str:
@@ -69,6 +70,8 @@ def _count_skills(hermes_home: Path) -> int:
        return 0
    count = 0
    for item in skills_dir.rglob("SKILL.md"):
+        if is_excluded_skill_path(item):
+            continue
        count += 1
    return count

@@ -21,6 +21,44 @@ _CREDENTIAL_SUFFIXES = ("_API_KEY", "_TOKEN", "_SECRET", "_KEY")
 # tests) don't spam the same warning multiple times.
 _WARNED_KEYS: set[str] = set()

+# Map of env-var name → source label ("bitwarden", etc.) for credentials
+# that were injected by an external secret source during load_hermes_dotenv().
+# Used by setup / `hermes model` flows to label detected credentials so
+# users understand WHERE a key came from when their .env doesn't contain it
+# directly (otherwise the "credentials detected ✓" line looks identical to
+# the .env case and they don't know Bitwarden is wired up).
+_SECRET_SOURCES: dict[str, str] = {}
+
+
+def get_secret_source(env_var: str) -> str | None:
+    """Return the label of the secret source that supplied ``env_var``, if any.
+
+    Returns ``"bitwarden"`` for keys pulled from Bitwarden Secrets Manager
+    during the current process's ``load_hermes_dotenv()`` call.  Returns
+    ``None`` for keys that came from ``.env``, the shell environment, or
+    aren't tracked.
+    """
+    return _SECRET_SOURCES.get(env_var)
+
+
+def format_secret_source_suffix(env_var: str) -> str:
+    """Return a human-readable suffix like ``" (from Bitwarden)"`` or ``""``.
+
+    Use this when printing a detected credential so the user can see where
+    it came from.  Empty string when the credential came from ``.env`` or
+    the shell — those are the implicit / "default" cases users already
+    understand.
+    """
+    source = get_secret_source(env_var)
+    if not source:
+        return ""
+    if source == "bitwarden":
+        return " (from Bitwarden)"
+    # Generic fallback — future-proofing for additional secret sources
+    # (e.g. 1Password, HashiCorp Vault) without having to update every
+    # call site.
+    return f" (from {source})"
+

 def _format_offending_chars(value: str, limit: int = 3) -> str:
    """Return a compact 'U+XXXX ('c'), ...' summary of non-ASCII codepoints."""
@@ -172,4 +210,87 @@ def load_hermes_dotenv(
        _load_dotenv_with_fallback(project_env_path, override=not loaded)
        loaded.append(project_env_path)

+    _apply_external_secret_sources(home_path)
+
    return loaded
+
+
+def _apply_external_secret_sources(home_path: Path) -> None:
+    """Pull secrets from external sources (currently Bitwarden) into env.
+
+    Runs AFTER dotenv loads so .env values are visible (we use them to
+    locate the access token) but BEFORE the rest of Hermes reads
+    ``os.environ`` for credentials.  Any failure here is logged and
+    swallowed — external secret sources must never block startup.
+    """
+    try:
+        cfg = _load_secrets_config(home_path)
+    except Exception:  # noqa: BLE001 — config errors must not block startup
+        return
+
+    bw_cfg = (cfg or {}).get("bitwarden") or {}
+    if not bw_cfg.get("enabled"):
+        return
+
+    try:
+        from agent.secret_sources.bitwarden import apply_bitwarden_secrets
+    except ImportError:
+        return
+
+    result = apply_bitwarden_secrets(
+        enabled=True,
+        access_token_env=bw_cfg.get("access_token_env", "BWS_ACCESS_TOKEN"),
+        project_id=bw_cfg.get("project_id", ""),
+        override_existing=bool(bw_cfg.get("override_existing", False)),
+        cache_ttl_seconds=float(bw_cfg.get("cache_ttl_seconds", 300)),
+        auto_install=bool(bw_cfg.get("auto_install", True)),
+    )
+
+    if result.applied:
+        # Re-run the ASCII sanitization pass: BSM values are user-supplied
+        # and might have the same copy-paste corruption as a manually
+        # edited .env (see #6843).
+        _sanitize_loaded_credentials()
+        # Remember where these came from so the setup / `hermes model`
+        # flows can label detected credentials with "(from Bitwarden)" —
+        # otherwise users see "credentials ✓" with no hint that the value
+        # came from BSM rather than .env.
+        for name in result.applied:
+            _SECRET_SOURCES[name] = "bitwarden"
+        print(
+            f"  Bitwarden Secrets Manager: applied {len(result.applied)} "
+            f"secret{'s' if len(result.applied) != 1 else ''} "
+            f"({', '.join(sorted(result.applied))})",
+            file=sys.stderr,
+        )
+    if result.error:
+        print(
+            f"  Bitwarden Secrets Manager: {result.error}",
+            file=sys.stderr,
+        )
+    for warn in result.warnings:
+        print(
+            f"  Bitwarden Secrets Manager: {warn}",
+            file=sys.stderr,
+        )
+
+
+def _load_secrets_config(home_path: Path) -> dict:
+    """Read just the ``secrets:`` section out of config.yaml.
+
+    Imported lazily and isolated from the main config loader so a
+    malformed config can't take down dotenv loading entirely.
+    """
+    config_path = home_path / "config.yaml"
+    if not config_path.exists():
+        return {}
+    try:
+        import yaml  # type: ignore
+    except ImportError:
+        return {}
+    try:
+        with open(config_path, "r", encoding="utf-8") as f:
+            data = yaml.safe_load(f) or {}
+    except Exception:  # noqa: BLE001
+        return {}
+    return data.get("secrets") or {}
@@ -951,6 +951,58 @@ CREATE INDEX IF NOT EXISTS idx_notify_task           ON kanban_notify_subs(task_

 _INITIALIZED_PATHS: set[str] = set()
 _INIT_LOCK = threading.RLock()
+_SQLITE_HEADER = b"SQLite format 3\x00"
+
+
+def _looks_like_tls_record_at(data: bytes, offset: int) -> bool:
+    """Return True for a TLS record header at ``data[offset:]``."""
+    if len(data) < offset + 5:
+        return False
+    content_type = data[offset]
+    major = data[offset + 1]
+    minor = data[offset + 2]
+    length = int.from_bytes(data[offset + 3:offset + 5], "big")
+    return (
+        content_type in {0x14, 0x15, 0x16, 0x17}
+        and major == 0x03
+        and minor in {0x00, 0x01, 0x02, 0x03, 0x04}
+        and 0 < length <= 18432
+    )
+
+
+def _validate_sqlite_header(path: Path) -> None:
+    """Fail early with an actionable error for non-SQLite Kanban DB files.
+
+    ``sqlite3.connect()`` creates missing and zero-byte files, so those are
+    allowed. Existing non-empty files must have the SQLite header before we
+    hand them to SQLite/WAL setup. This keeps corrupted page-0 failures from
+    being collapsed into a generic PRAGMA error and lets the gateway's corrupt
+    board handling identify the board by fingerprint.
+    """
+    try:
+        stat = path.stat()
+    except FileNotFoundError:
+        return
+    except OSError:
+        return
+    if stat.st_size == 0:
+        return
+    try:
+        with path.open("rb") as handle:
+            head = handle.read(64)
+    except OSError:
+        return
+    if head.startswith(_SQLITE_HEADER):
+        return
+    signature = ""
+    if head.startswith(b"SQLit") and _looks_like_tls_record_at(head, 5):
+        signature = " (TLS record header detected at byte offset 5)"
+    elif _looks_like_tls_record_at(head, 0):
+        signature = " (TLS record header detected at byte offset 0)"
+    raise sqlite3.DatabaseError(
+        "file is not a database: invalid SQLite header for "
+        f"{path}{signature}; first_32={head[:32].hex(' ')}"
+    )


 def connect(
@@ -981,6 +1033,7 @@ def connect(
    else:
        path = kanban_db_path(board=board)
    path.parent.mkdir(parents=True, exist_ok=True)
+    _validate_sqlite_header(path)
    resolved = str(path.resolve())
    conn = sqlite3.connect(str(path), isolation_level=None, timeout=30)
    try:
@@ -261,11 +261,147 @@ import time as _time
 from datetime import datetime

 from hermes_cli import __version__, __release_date__
-from hermes_constants import AI_GATEWAY_BASE_URL, OPENROUTER_BASE_URL
-
 logger = logging.getLogger(__name__)


+def _is_termux_startup_environment(env: dict[str, str] | None = None) -> bool:
+    """Import-safe Termux check for cold-start-sensitive CLI paths."""
+    check = env or os.environ
+    prefix = str(check.get("PREFIX", ""))
+    return bool(
+        check.get("TERMUX_VERSION")
+        or "com.termux/files/usr" in prefix
+        or prefix.startswith("/data/data/com.termux/")
+    )
+
+
+def _read_packed_ref(common_dir: Path, ref: str) -> str | None:
+    """Look up a ref in .git/packed-refs without spawning git.
+
+    packed-refs lines look like ``<sha> <ref>`` with optional ``^<sha>``
+    peel lines and ``#``-prefixed comments / ``# pack-refs with:`` header.
+    """
+    try:
+        text = (common_dir / "packed-refs").read_text(encoding="utf-8", errors="replace")
+    except OSError:
+        return None
+    for line in text.splitlines():
+        if not line or line.startswith("#") or line.startswith("^"):
+            continue
+        parts = line.split(" ", 1)
+        if len(parts) == 2 and parts[1].strip() == ref:
+            return parts[0].strip()
+    return None
+
+
+def _read_git_revision_fingerprint(repo_root: Path) -> str | None:
+    """Return a cheap checkout fingerprint without spawning git."""
+    git_dir = repo_root / ".git"
+    try:
+        if git_dir.is_file():
+            for line in git_dir.read_text(encoding="utf-8", errors="replace").splitlines():
+                key, _, value = line.partition(":")
+                if key.strip() == "gitdir" and value.strip():
+                    git_dir = (repo_root / value.strip()).resolve()
+                    break
+        # Worktrees point HEAD at a per-worktree gitdir but pack their refs
+        # in the main repo's gitdir (referenced via ``commondir``). Resolve
+        # that up front so packed-refs lookups hit the right file.
+        common_dir = git_dir
+        commondir_file = git_dir / "commondir"
+        if commondir_file.exists():
+            try:
+                rel = commondir_file.read_text(encoding="utf-8", errors="replace").strip()
+                if rel:
+                    common_dir = (git_dir / rel).resolve()
+            except OSError:
+                pass
+        head_file = git_dir / "HEAD"
+        head = head_file.read_text(encoding="utf-8", errors="replace").strip()
+        if head.startswith("ref:"):
+            ref = head.split(":", 1)[1].strip()
+            # Loose refs may live in the worktree gitdir OR the common dir
+            # (branches created via `git worktree add` typically live in the
+            # common dir's refs/heads/).
+            for candidate in (git_dir, common_dir):
+                ref_file = candidate / ref
+                if ref_file.exists():
+                    return f"git:{ref}:{ref_file.read_text(encoding='utf-8', errors='replace').strip()}"
+            packed_sha = _read_packed_ref(common_dir, ref)
+            if packed_sha:
+                return f"git:{ref}:{packed_sha}"
+            # Ref name is known but unresolved — still stable across launches,
+            # and the version/release fallback in the caller will invalidate
+            # after `hermes update`.
+            return f"git:{ref}:unresolved"
+        return f"git:HEAD:{head}"
+    except OSError:
+        return None
+
+
+def _termux_bundled_skills_fingerprint() -> str:
+    """Cheap invalidation key for Termux bundled-skill startup sync."""
+    git_fp = _read_git_revision_fingerprint(PROJECT_ROOT)
+    if git_fp:
+        return git_fp
+    skills_dir = PROJECT_ROOT / "skills"
+    try:
+        stat = skills_dir.stat()
+        return f"skills:{__version__}:{__release_date__}:{stat.st_mtime_ns}:{stat.st_size}"
+    except OSError:
+        return f"skills:{__version__}:{__release_date__}:missing"
+
+
+def _termux_bundled_skills_stamp_path() -> Path:
+    return get_hermes_home() / "skills" / ".termux_bundled_sync_stamp"
+
+
+def _termux_bundled_skills_sync_needed() -> bool:
+    if not _is_termux_startup_environment():
+        return True
+    if os.environ.get("HERMES_TERMUX_FORCE_SKILLS_SYNC") == "1":
+        return True
+    try:
+        stamp = _termux_bundled_skills_stamp_path()
+        return stamp.read_text(encoding="utf-8").strip() != _termux_bundled_skills_fingerprint()
+    except OSError:
+        return True
+
+
+def _mark_termux_bundled_skills_synced() -> None:
+    if not _is_termux_startup_environment():
+        return
+    try:
+        stamp = _termux_bundled_skills_stamp_path()
+        stamp.parent.mkdir(parents=True, exist_ok=True)
+        stamp.write_text(_termux_bundled_skills_fingerprint() + "\n", encoding="utf-8")
+    except OSError:
+        pass
+
+
+def _sync_bundled_skills_for_startup() -> bool:
+    """Sync bundled skills, but skip unchanged Termux checkouts cheaply.
+
+    Hashing every bundled skill is safe but expensive on older Android
+    storage. The git/ref stamp keeps post-update correctness: a changed
+    checkout revision forces one real sync, then later starts skip it.
+    """
+    if _is_termux_startup_environment() and not _termux_bundled_skills_sync_needed():
+        return False
+
+    from tools.skills_sync import sync_skills
+
+    sync_skills(quiet=True)
+    _mark_termux_bundled_skills_synced()
+    return True
+
+
+def _termux_should_prefetch_update_check() -> bool:
+    if not _is_termux_startup_environment():
+        return True
+    return os.environ.get("HERMES_TERMUX_PREFETCH_UPDATES") == "1"
+
+
 def _relative_time(ts) -> str:
    """Format a timestamp as relative time (e.g., '2h ago', 'yesterday')."""
    if not ts:
@@ -455,7 +591,7 @@ def _session_browse_picker(sessions: list) -> Optional[str]:
                curses.init_pair(1, curses.COLOR_GREEN, -1)  # selected
                curses.init_pair(2, curses.COLOR_YELLOW, -1)  # header
                curses.init_pair(3, curses.COLOR_CYAN, -1)  # search
-                curses.init_pair(4, 8, -1)  # dim
+                curses.init_pair(4, 8 if curses.COLORS > 8 else curses.COLOR_WHITE, -1)  # dim

            cursor = 0
            scroll_offset = 0
@@ -967,6 +1103,72 @@ def _tui_need_npm_install(root: Path) -> bool:
    return False


+_TUI_BUILD_INPUT_DIRS = (
+    "src",
+    "packages/hermes-ink/src",
+)
+
+_TUI_BUILD_INPUT_FILES = (
+    "package.json",
+    "package-lock.json",
+    "tsconfig.json",
+    "tsconfig.build.json",
+    "babel.compiler.config.cjs",
+    "scripts/build.mjs",
+    "packages/hermes-ink/package.json",
+    "packages/hermes-ink/package-lock.json",
+    "packages/hermes-ink/index.js",
+    "packages/hermes-ink/text-input.js",
+)
+
+_TUI_BUILD_INPUT_SUFFIXES = frozenset(
+    {".cjs", ".js", ".jsx", ".json", ".mjs", ".ts", ".tsx"}
+)
+
+
+def _iter_tui_build_inputs(root: Path):
+    """Yield source/config files that affect ``ui-tui/dist/entry.js``."""
+    for rel in _TUI_BUILD_INPUT_FILES:
+        path = root / rel
+        if path.is_file():
+            yield path
+
+    for rel in _TUI_BUILD_INPUT_DIRS:
+        base = root / rel
+        if not base.is_dir():
+            continue
+        for path in base.rglob("*"):
+            if path.is_file() and path.suffix in _TUI_BUILD_INPUT_SUFFIXES:
+                yield path
+
+
+def _tui_need_rebuild(root: Path) -> bool:
+    """True when ``dist/entry.js`` is missing or older than TUI inputs.
+
+    The TUI bundle is self-contained. Rebuilding it on every launch adds a
+    visible cold-start tax on slow Termux CPUs, while a simple mtime freshness
+    check still rebuilds immediately after source updates, dependency updates,
+    or local edits. Set ``HERMES_TUI_FORCE_BUILD=1`` to force the old behaviour.
+    """
+    force = (os.environ.get("HERMES_TUI_FORCE_BUILD") or "").strip().lower()
+    if force in {"1", "true", "yes", "on"}:
+        return True
+
+    entry = root / "dist" / "entry.js"
+    try:
+        output_mtime = entry.stat().st_mtime
+    except OSError:
+        return True
+
+    for path in _iter_tui_build_inputs(root):
+        try:
+            if path.stat().st_mtime > output_mtime:
+                return True
+        except OSError:
+            return True
+    return False
+
+
 def _ensure_tui_node() -> None:
    """Make sure `node` + `npm` are on PATH for the TUI.

@@ -1071,16 +1273,17 @@ def _make_tui_argv(tui_dir: Path, tui_dev: bool) -> tuple[list[str], Path]:
            p = Path(ext_dir)
            if (p / "dist" / "entry.js").is_file():
                node = _node_bin("node")
-                return [node, str(p / "dist" / "entry.js")], p
+                return [node, "--expose-gc", str(p / "dist" / "entry.js")], p

        # 1b. Bundled in wheel (pip install)
        bundled = _find_bundled_tui()
        if bundled is not None:
            node = _node_bin("node")
-            return [node, str(bundled)], bundled.parent
+            return [node, "--expose-gc", str(bundled)], bundled.parent

    # 2. Normal flow: npm install if needed, always esbuild, then node dist/entry.js.
    #    --dev flow: npm install if needed, then tsx src/entry.tsx.
+    did_install = False
    if _tui_need_npm_install(tui_dir):
        npm = _node_bin("npm")
        if not os.environ.get("HERMES_QUIET"):
@@ -1100,6 +1303,7 @@ def _make_tui_argv(tui_dir: Path, tui_dev: bool) -> tuple[list[str], Path]:
            if preview:
                print(preview)
            sys.exit(1)
+        did_install = True

    if tui_dev:
        # Keep the local @hermes/ink package exports in sync with source.
@@ -1128,24 +1332,31 @@ def _make_tui_argv(tui_dir: Path, tui_dev: bool) -> tuple[list[str], Path]:
            return [str(tsx), "src/entry.tsx"], tui_dir
        return [npm, "start"], tui_dir

-    # Always rebuild — esbuild is fast and this avoids staleness-edge-case bugs.
-    npm = _node_bin("npm")
-    result = subprocess.run(
-        [npm, "run", "build"],
-        cwd=str(tui_dir),
-        capture_output=True,
-        text=True,
-    )
-    if result.returncode != 0:
-        combined = f"{result.stdout or ''}{result.stderr or ''}".strip()
-        preview = "\n".join(combined.splitlines()[-30:])
-        print("TUI build failed.")
-        if preview:
-            print(preview)
-        sys.exit(1)
+    # Desktop/dev launches retain the historical "always rebuild" behaviour.
+    # Termux cold starts use the freshness check because esbuild startup is
+    # expensive on old mobile CPUs.
+    should_build = True
+    if _is_termux_startup_environment():
+        should_build = did_install or _tui_need_rebuild(tui_dir)
+
+    if should_build:
+        npm = _node_bin("npm")
+        result = subprocess.run(
+            [npm, "run", "build"],
+            cwd=str(tui_dir),
+            capture_output=True,
+            text=True,
+        )
+        if result.returncode != 0:
+            combined = f"{result.stdout or ''}{result.stderr or ''}".strip()
+            preview = "\n".join(combined.splitlines()[-30:])
+            print("TUI build failed.")
+            if preview:
+                print(preview)
+            sys.exit(1)

    node = _node_bin("node")
-    return [node, str(tui_dir / "dist" / "entry.js")], tui_dir
+    return [node, "--expose-gc", str(tui_dir / "dist" / "entry.js")], tui_dir


 def _normalize_tui_toolsets(toolsets: object) -> list[str]:
@@ -1267,16 +1478,16 @@ def _launch_tui(
        env["HERMES_TUI_TOOL_PROGRESS"] = "off"
    if accept_hooks:
        env["HERMES_ACCEPT_HOOKS"] = "1"
-    # Guarantee an 8GB V8 heap + exposed GC for the TUI. Default node cap is
-    # ~1.5–4GB depending on version and can fatal-OOM on long sessions with
-    # large transcripts / reasoning blobs. Token-level merge: respect any
-    # user-supplied --max-old-space-size (they may have set it higher) and
-    # avoid duplicating --expose-gc.
+    # Guarantee an 8GB V8 heap for the TUI. Default node cap is ~1.5–4GB
+    # depending on version and can fatal-OOM on long sessions with large
+    # transcripts / reasoning blobs. Token-level merge: respect any
+    # user-supplied --max-old-space-size (they may have set it higher).
+    # --expose-gc is *not* added here: Node rejects it in NODE_OPTIONS
+    # ("--expose-gc is not allowed in NODE_OPTIONS") and refuses to start.
+    # It is passed as a direct argv flag in _make_tui_argv() instead.
    _tokens = env.get("NODE_OPTIONS", "").split()
    if not any(t.startswith("--max-old-space-size=") for t in _tokens):
        _tokens.append("--max-old-space-size=8192")
-    if "--expose-gc" not in _tokens:
-        _tokens.append("--expose-gc")
    env["NODE_OPTIONS"] = " ".join(_tokens)
    # HERMES_TUI_RESUME is an internal hand-off from the Python wrapper to the
    # Ink app.  Because we start from os.environ.copy(), an exported/stale value
@@ -1384,6 +1595,29 @@ def cmd_chat(args):
        # If resolution fails, keep the original value — _init_agent will
        # report "Session not found" with the original input

+    # xAI retirement warning — one-shot, non-blocking, never fails startup
+    try:
+        from hermes_cli.xai_retirement import (
+            MIGRATION_GUIDE_URL,
+            RETIREMENT_DATE,
+            find_retired_xai_refs,
+            format_issue,
+        )
+        from hermes_cli.config import load_config as _load_config_for_xai_check
+
+        _retired_xai_refs = find_retired_xai_refs(_load_config_for_xai_check())
+        if _retired_xai_refs:
+            sys.stderr.write(
+                f"\033[33m⚠ xAI retires {len(_retired_xai_refs)} model(s) "
+                f"in your config on {RETIREMENT_DATE}:\033[0m\n"
+            )
+            for _ref in _retired_xai_refs:
+                sys.stderr.write(f"  \033[33m⚠\033[0m {format_issue(_ref)}\n")
+            sys.stderr.write(f"  \033[2mMigration guide: {MIGRATION_GUIDE_URL}\033[0m\n")
+            sys.stderr.write("  \033[2mRun 'hermes doctor' for details.\033[0m\n\n")
+    except Exception:
+        pass
+
    # First-run guard: check if any provider is configured before launching
    if not _has_any_provider_configured():
        print()
@@ -1416,19 +1650,20 @@ def cmd_chat(args):
        print("You can run 'hermes setup' at any time to configure.")
        sys.exit(1)

-    # Start update check in background (runs while other init happens)
-    try:
-        from hermes_cli.banner import prefetch_update_check
+    # Start update check in background (runs while other init happens).
+    # On Termux this imports rich/prompt_toolkit in the foreground and then
+    # competes for CPU on single-core devices, so keep it opt-in there.
+    if _termux_should_prefetch_update_check():
+        try:
+            from hermes_cli.banner import prefetch_update_check

-        prefetch_update_check()
-    except Exception:
-        pass
+            prefetch_update_check()
+        except Exception:
+            pass

    # Sync bundled skills on every CLI launch (fast -- skips unchanged skills)
    try:
-        from tools.skills_sync import sync_skills
-
-        sync_skills(quiet=True)
+        _sync_bundled_skills_for_startup()
    except Exception:
        pass

@@ -2198,6 +2433,9 @@ _AUX_TASKS: list[tuple[str, str, str]] = [
    ("mcp", "MCP", "MCP tool reasoning"),
    ("title_generation", "Title generation", "session titles"),
    ("skills_hub", "Skills hub", "skills search/install"),
+    ("triage_specifier", "Triage specifier", "kanban spec fleshing"),
+    ("kanban_decomposer", "Kanban decomposer", "task decomposition"),
+    ("profile_describer", "Profile describer", "auto profile descriptions"),
    ("curator", "Curator", "skill-usage review pass"),
 ]

@@ -2566,6 +2804,7 @@ def _prompt_provider_choice(choices, *, default=0):

 def _model_flow_openrouter(config, current_model=""):
    """OpenRouter provider: ensure API key, then pick model."""
+    from hermes_constants import OPENROUTER_BASE_URL
    from hermes_cli.auth import (
        ProviderConfig,
        _prompt_model_selection,
@@ -2626,6 +2865,7 @@ def _model_flow_openrouter(config, current_model=""):

 def _model_flow_ai_gateway(config, current_model=""):
    """Vercel AI Gateway provider: ensure API key, then pick model with pricing."""
+    from hermes_constants import AI_GATEWAY_BASE_URL
    from hermes_cli.auth import (
        PROVIDER_REGISTRY,
        _prompt_model_selection,
@@ -4219,8 +4459,11 @@ def _model_flow_named_custom(config, provider_info):
    print(f"   Provider: {name} ({base_url})")


-# Curated model lists for direct API-key providers — single source in models.py
-from hermes_cli.models import _PROVIDER_MODELS
+# Keep the historical eager model catalog import on desktop/CI. Termux defers
+# it to the model-selection handlers so plain `hermes --tui` does not pay for
+# requests/models.dev catalog imports before the Node TUI starts.
+if not _is_termux_startup_environment():
+    from hermes_cli.models import _PROVIDER_MODELS


 def _current_reasoning_effort(config) -> str:
@@ -4337,6 +4580,7 @@ def _model_flow_copilot(config, current_model=""):
    )
    from hermes_cli.config import save_env_value, load_config, save_config
    from hermes_cli.models import (
+        _PROVIDER_MODELS,
        fetch_api_models,
        fetch_github_model_catalog,
        github_model_reasoning_efforts,
@@ -4421,7 +4665,9 @@ def _model_flow_copilot(config, current_model=""):
        source = creds.get("source", "")
    else:
        if source in {"GITHUB_TOKEN", "GH_TOKEN"}:
-            print(f"  GitHub token: {api_key[:8]}... ✓ ({source})")
+            from hermes_cli.env_loader import format_secret_source_suffix
+            bw_suffix = format_secret_source_suffix(source)
+            print(f"  GitHub token: {api_key[:8]}... ✓ ({source}{bw_suffix})")
        elif source == "gh auth token":
            print("  GitHub token: ✓ (from `gh auth token`)")
        else:
@@ -4529,6 +4775,7 @@ def _model_flow_copilot_acp(config, current_model=""):
        resolve_external_process_provider_credentials,
    )
    from hermes_cli.models import (
+        _PROVIDER_MODELS,
        fetch_github_model_catalog,
        normalize_copilot_model_id,
    )
@@ -4677,7 +4924,10 @@ def _prompt_api_key(pconfig, existing_key: str, provider_id: str = "") -> tuple:
        return new_key, False

    # Already configured — offer K / R / C ────────────────────────────────
-    print(f"  {pconfig.name} API key: {existing_key[:8]}... ✓")
+    from hermes_cli.env_loader import format_secret_source_suffix
+
+    source_suffix = format_secret_source_suffix(key_env) if key_env else ""
+    print(f"  {pconfig.name} API key: {existing_key[:8]}... ✓{source_suffix}")
    if not key_env:
        # Nothing we can rewrite; just acknowledge and move on.
        print()
@@ -4732,6 +4982,7 @@ def _model_flow_kimi(config, current_model=""):
        load_config,
        save_config,
    )
+    from hermes_cli.models import _PROVIDER_MODELS

    provider_id = "kimi-coding"
    pconfig = PROVIDER_REGISTRY[provider_id]
@@ -4842,7 +5093,7 @@ def _model_flow_stepfun(config, current_model=""):
        load_config,
        save_config,
    )
-    from hermes_cli.models import fetch_api_models
+    from hermes_cli.models import _PROVIDER_MODELS, fetch_api_models

    provider_id = "stepfun"
    pconfig = PROVIDER_REGISTRY[provider_id]
@@ -4959,7 +5210,9 @@ def _model_flow_bedrock_api_key(config, region, current_model=""):
    # Prompt for API key
    existing_key = get_env_value("AWS_BEARER_TOKEN_BEDROCK") or ""
    if existing_key:
-        print(f"  Bedrock API Key: {existing_key[:12]}... ✓")
+        from hermes_cli.env_loader import format_secret_source_suffix
+        source_suffix = format_secret_source_suffix("AWS_BEARER_TOKEN_BEDROCK")
+        print(f"  Bedrock API Key: {existing_key[:12]}... ✓{source_suffix}")
    else:
        print(f"  Endpoint: {mantle_base_url}")
        print()
@@ -5222,6 +5475,7 @@ def _model_flow_api_key_provider(config, provider_id, current_model=""):
        save_config,
    )
    from hermes_cli.models import (
+        _PROVIDER_MODELS,
        fetch_api_models,
        opencode_model_api_mode,
        normalize_opencode_model_id,
@@ -5629,7 +5883,22 @@ def _model_flow_anthropic(config, current_model=""):
    if has_creds:
        # Show what we found
        if existing_key:
-            print(f"  Anthropic credentials: {existing_key[:12]}... ✓")
+            from hermes_cli.env_loader import format_secret_source_suffix
+            from hermes_cli.auth import PROVIDER_REGISTRY
+
+            # Surface which env var supplied the key so users with
+            # Bitwarden see "(from Bitwarden)" — without this, a detected
+            # BSM key looks identical to a key in .env and users assume
+            # nothing is wired up.
+            source_suffix = ""
+            for var in PROVIDER_REGISTRY["anthropic"].api_key_env_vars:
+                if os.getenv(var, "").strip() == existing_key:
+                    source_suffix = format_secret_source_suffix(var)
+                    if source_suffix:
+                        break
+            print(
+                f"  Anthropic credentials: {existing_key[:12]}... ✓{source_suffix}"
+            )
        elif cc_available:
            print("  Claude Code credentials: ✓ (auto-detected)")
        print()
@@ -5855,8 +6124,7 @@ def cmd_import(args):
    run_import(args)


-def cmd_version(args):
-    """Show version."""
+def _print_version_info(*, check_updates: bool = True) -> None:
    print(f"Hermes Agent v{__version__} ({__release_date__})")
    print(f"Project: {PROJECT_ROOT}")

@@ -5876,6 +6144,9 @@ def cmd_version(args):
    except ImportError:
        print("OpenAI SDK: Not installed")

+    if not check_updates:
+        return
+
    # Show update status (synchronous — acceptable since user asked for version info)
    try:
        from hermes_cli.banner import check_for_updates
@@ -5894,6 +6165,11 @@ def cmd_version(args):
        pass


+def cmd_version(args):
+    """Show version."""
+    _print_version_info(check_updates=True)
+
+
 def cmd_uninstall(args):
    """Uninstall Hermes Agent."""
    _require_tty("uninstall")
@@ -5970,24 +6246,36 @@ def _validate_critical_files_syntax(root) -> tuple[bool, str | None, str | None]
    them after a successful ``git pull`` so we can auto-roll-back instead of
    leaving the user with a bricked install.

+    The compiled ``.pyc`` is written to a temp directory rather than the
+    source tree's ``__pycache__/`` so we don't race with concurrent test
+    workers that walk the same dir, and so we don't leave a stale pyc
+    behind in production if the next interpreter run picks a different
+    Python version. The pyc is discarded on function return either way —
+    we only care about the compile-or-not signal.
+
    Returns ``(ok, failing_path, error_message)``. ``ok=True`` means every
    file parsed cleanly.
    """
    import py_compile
+    import tempfile

    root = Path(root)
-    for relpath in _UPDATE_CRITICAL_FILES:
-        path = root / relpath
-        if not path.exists():
-            # Missing file is suspicious but not necessarily fatal — a future
-            # refactor may legitimately remove one of these. Skip and move on.
-            continue
-        try:
-            py_compile.compile(str(path), doraise=True)
-        except py_compile.PyCompileError as exc:
-            return False, str(path), str(exc)
-        except OSError as exc:
-            return False, str(path), f"could not read: {exc}"
+    with tempfile.TemporaryDirectory(prefix="hermes-syntax-check-") as tmpdir:
+        for relpath in _UPDATE_CRITICAL_FILES:
+            path = root / relpath
+            if not path.exists():
+                # Missing file is suspicious but not necessarily fatal — a future
+                # refactor may legitimately remove one of these. Skip and move on.
+                continue
+            # Mirror the relative path under the tmpdir so two different
+            # files with the same basename don't collide on the cfile name.
+            cfile = Path(tmpdir) / (relpath.replace("/", "__") + "c")
+            try:
+                py_compile.compile(str(path), cfile=str(cfile), doraise=True)
+            except py_compile.PyCompileError as exc:
+                return False, str(path), str(exc)
+            except OSError as exc:
+                return False, str(path), f"could not read: {exc}"
    return True, None, None


@@ -7639,9 +7927,7 @@ def _install_python_dependencies_with_optional_fallback(


 def _is_termux_env(env: dict[str, str] | None = None) -> bool:
-    check = env or os.environ
-    prefix = str(check.get("PREFIX", ""))
-    return "com.termux" in prefix or prefix.startswith("/data/data/com.termux/")
+    return _is_termux_startup_environment(env)


 def _is_android_python() -> bool:
@@ -10295,11 +10581,11 @@ _BUILTIN_SUBCOMMANDS = frozenset(
        "computer-use",
        "config", "cron", "curator", "dashboard", "debug", "doctor",
        "dump", "fallback", "gateway", "hooks", "import", "insights",
-        "kanban", "login", "logout", "logs", "lsp", "mcp", "memory",
+        "kanban", "login", "logout", "logs", "lsp", "mcp", "memory", "migrate",
        "model", "pairing", "plugins", "postinstall", "profile", "proxy",
        "send", "sessions", "setup",
        "skills", "slack", "status", "tools", "uninstall", "update",
-        "version", "webhook", "whatsapp", "chat",
+        "version", "webhook", "whatsapp", "chat", "secrets",
        # Help-ish invocations — plugin commands not being listed in
        # top-level --help is an acceptable trade-off for skipping an
        # expensive eager import of every bundled plugin module.
@@ -10389,6 +10675,178 @@ def _plugin_cli_discovery_needed() -> bool:
    return True


+_AGENT_COMMANDS = {None, "chat", "acp", "rl"}
+_AGENT_SUBCOMMANDS = {
+    "cron": ("cron_command", {"run", "tick"}),
+    "gateway": ("gateway_command", {"run"}),
+    "mcp": ("mcp_action", {"serve"}),
+}
+
+
+def _prepare_agent_startup(args) -> None:
+    """Discover plugins/MCP/hooks for commands that can run an agent turn."""
+    _sub_attr, _sub_set = _AGENT_SUBCOMMANDS.get(args.command, (None, None))
+    if not (
+        args.command in _AGENT_COMMANDS
+        or (_sub_attr and getattr(args, _sub_attr, None) in _sub_set)
+    ):
+        return
+
+    _accept_hooks = bool(getattr(args, "accept_hooks", False))
+    try:
+        from hermes_cli.plugins import discover_plugins
+
+        discover_plugins()
+    except Exception:
+        logger.warning(
+            "plugin discovery failed at CLI startup",
+            exc_info=True,
+        )
+    try:
+        # MCP tool discovery — no event loop running in CLI/TUI startup,
+        # so inline is safe.  Moved here from model_tools.py module scope
+        # to avoid freezing the gateway's event loop on its first message
+        # via the same lazy import path (#16856).
+        from tools.mcp_tool import discover_mcp_tools
+
+        discover_mcp_tools()
+    except Exception:
+        logger.debug(
+            "MCP tool discovery failed at CLI startup",
+            exc_info=True,
+        )
+    try:
+        from hermes_cli.config import load_config
+        from agent.shell_hooks import register_from_config
+
+        register_from_config(load_config(), accept_hooks=_accept_hooks)
+    except Exception:
+        logger.debug(
+            "shell-hook registration failed at CLI startup",
+            exc_info=True,
+        )
+
+
+def _set_chat_arg_defaults(args) -> None:
+    for attr, default in [
+        ("query", None),
+        ("model", None),
+        ("provider", None),
+        ("toolsets", None),
+        ("verbose", False),
+        ("resume", None),
+        ("continue_last", None),
+        ("worktree", False),
+    ]:
+        if not hasattr(args, attr):
+            setattr(args, attr, default)
+
+
+def _is_termux_fast_version_argv(argv: list[str]) -> bool:
+    return argv in (["--version"], ["-V"], ["version"])
+
+
+def _try_termux_fast_cli_launch() -> bool:
+    """Run obvious Termux non-TUI chat/oneshot/version paths on a light parser."""
+    if not _is_termux_startup_environment():
+        return False
+    if os.environ.get("HERMES_TERMUX_DISABLE_FAST_CLI") == "1":
+        return False
+
+    argv = sys.argv[1:]
+    if "-h" in argv or "--help" in argv:
+        return False
+    if os.environ.get("HERMES_TUI") == "1" or "--tui" in argv:
+        return False
+
+    if _is_termux_fast_version_argv(argv):
+        _print_version_info(check_updates=False)
+        return True
+
+    first = _first_positional_argv()
+    has_oneshot = any(
+        arg == "-z" or arg == "--oneshot" or arg.startswith("--oneshot=")
+        for arg in argv
+    )
+    if not has_oneshot and first not in {None, "chat"}:
+        return False
+
+    from hermes_cli._parser import build_top_level_parser
+
+    parser, _subparsers, chat_parser = build_top_level_parser()
+    chat_parser.set_defaults(func=cmd_chat)
+    args = parser.parse_args(_coalesce_session_name_args(argv))
+
+    if getattr(args, "version", False):
+        _print_version_info(check_updates=False)
+        return True
+
+    if getattr(args, "oneshot", None):
+        _prepare_agent_startup(args)
+        from hermes_cli.oneshot import run_oneshot
+
+        sys.exit(
+            run_oneshot(
+                args.oneshot,
+                model=getattr(args, "model", None),
+                provider=getattr(args, "provider", None),
+                toolsets=getattr(args, "toolsets", None),
+            )
+        )
+
+    if (args.resume or args.continue_last) and args.command is None:
+        args.command = "chat"
+
+    if args.command in {None, "chat"}:
+        _set_chat_arg_defaults(args)
+        _prepare_agent_startup(args)
+        cmd_chat(args)
+        return True
+
+    return False
+
+
+def _try_termux_fast_tui_launch() -> bool:
+    """Launch obvious Termux TUI invocations before building every subparser.
+
+    `hermes --tui` is the hot path on phones. The full parser setup imports
+    command modules for model, fallback, migrate, kanban, bundles, plugins,
+    etc. even though the TUI immediately execs Node. On Termux only, parse the
+    lightweight top-level/chat parser and hand off to ``cmd_chat`` when the
+    invocation is unambiguously the built-in TUI/chat path.
+    """
+    if not _is_termux_startup_environment():
+        return False
+
+    if "-h" in sys.argv[1:] or "--help" in sys.argv[1:]:
+        return False
+
+    wants_tui = os.environ.get("HERMES_TUI") == "1" or "--tui" in sys.argv[1:]
+    if not wants_tui:
+        return False
+
+    first = _first_positional_argv()
+    if first not in {None, "chat"}:
+        return False
+
+    from hermes_cli._parser import build_top_level_parser
+
+    parser, _subparsers, chat_parser = build_top_level_parser()
+    chat_parser.set_defaults(func=cmd_chat)
+    args = parser.parse_args(_coalesce_session_name_args(sys.argv[1:]))
+
+    # Preserve top-level behaviours whose semantics are not "launch chat/TUI".
+    if getattr(args, "version", False) or getattr(args, "oneshot", None):
+        return False
+    if getattr(args, "command", None) not in {None, "chat"}:
+        return False
+    if not (getattr(args, "tui", False) or os.environ.get("HERMES_TUI") == "1"):
+        return False
+
+    cmd_chat(args)
+    return True
+
+
 def main():
    """Main entry point for hermes CLI."""
    # Force UTF-8 stdio on Windows before anything prints.  No-op elsewhere.
@@ -10406,6 +10864,11 @@ def main():
    except Exception:
        pass

+    if _try_termux_fast_tui_launch():
+        return
+    if _try_termux_fast_cli_launch():
+        return
+
    from hermes_cli._parser import build_top_level_parser

    parser, subparsers, chat_parser = build_top_level_parser()
@@ -10502,6 +10965,80 @@ def main():
    )
    fallback_parser.set_defaults(func=cmd_fallback)

+    # =========================================================================
+    # secrets command — external secret managers (currently: Bitwarden)
+    # =========================================================================
+    secrets_parser = subparsers.add_parser(
+        "secrets",
+        help="Manage external secret sources (Bitwarden Secrets Manager)",
+        description=(
+            "Pull API keys from an external secret manager at process startup "
+            "instead of storing them in ~/.hermes/.env.  Currently supports "
+            "Bitwarden Secrets Manager.  See: "
+            "https://hermes-agent.nousresearch.com/docs/user-guide/secrets/bitwarden"
+        ),
+    )
+    secrets_subparsers = secrets_parser.add_subparsers(dest="secrets_command")
+
+    secrets_bw = secrets_subparsers.add_parser(
+        "bitwarden",
+        aliases=["bw"],
+        help="Bitwarden Secrets Manager integration",
+    )
+
+    # Lazy import — only pays for itself when this subcommand is actually used.
+    from hermes_cli import secrets_cli as _secrets_cli
+
+    _secrets_cli.register_cli(secrets_bw)
+
+    def _dispatch_secrets(args):  # noqa: ANN001
+        sub = getattr(args, "secrets_command", None)
+        bw_sub = getattr(args, "secrets_bw_command", None)
+        if sub in ("bitwarden", "bw") and bw_sub is not None:
+            return args.func(args)
+        secrets_parser.print_help()
+        return 0
+
+    secrets_parser.set_defaults(func=_dispatch_secrets)
+
+    # =========================================================================
+    # migrate command
+    # =========================================================================
+    from hermes_cli.migrate import cmd_migrate, cmd_migrate_xai
+
+    migrate_parser = subparsers.add_parser(
+        "migrate",
+        help="Migrate configuration for retired models or deprecated settings",
+        description=(
+            "Diagnose and (optionally) rewrite the active config.yaml to "
+            "replace references to retired models or deprecated settings."
+        ),
+    )
+    migrate_subparsers = migrate_parser.add_subparsers(dest="migrate_type")
+
+    migrate_xai = migrate_subparsers.add_parser(
+        "xai",
+        help="Migrate xAI models scheduled for retirement on May 15, 2026",
+        description=(
+            "Scan config.yaml for references to xAI models retiring on "
+            "May 15, 2026 and, with --apply, rewrite them in-place to the "
+            "official replacements per the xAI migration guide. The original "
+            "config.yaml is backed up before any rewrite."
+        ),
+    )
+    migrate_xai.add_argument(
+        "--apply",
+        action="store_true",
+        help="Rewrite config.yaml in-place (default: dry-run, no writes)",
+    )
+    migrate_xai.add_argument(
+        "--no-backup",
+        action="store_true",
+        help="Skip the timestamped backup of config.yaml when applying",
+    )
+    migrate_xai.set_defaults(func=cmd_migrate_xai)
+    migrate_parser.set_defaults(func=cmd_migrate)
+
    # =========================================================================
    # gateway command
    # =========================================================================
@@ -13129,51 +13666,7 @@ Examples:
    # so introspection/management commands (hermes hooks list, cron
    # list, gateway status, mcp add, ...) don't pay discovery cost or
    # trigger consent prompts for hooks the user is still inspecting.
-    # Groups with mixed admin/CRUD vs. agent-running entries narrow via
-    # the nested subcommand (dest varies by parser).
-    _AGENT_COMMANDS = {None, "chat", "acp", "rl"}
-    _AGENT_SUBCOMMANDS = {
-        "cron": ("cron_command", {"run", "tick"}),
-        "gateway": ("gateway_command", {"run"}),
-        "mcp": ("mcp_action", {"serve"}),
-    }
-    _sub_attr, _sub_set = _AGENT_SUBCOMMANDS.get(args.command, (None, None))
-    if args.command in _AGENT_COMMANDS or (
-        _sub_attr and getattr(args, _sub_attr, None) in _sub_set
-    ):
-        _accept_hooks = bool(getattr(args, "accept_hooks", False))
-        try:
-            from hermes_cli.plugins import discover_plugins
-
-            discover_plugins()
-        except Exception:
-            logger.warning(
-                "plugin discovery failed at CLI startup",
-                exc_info=True,
-            )
-        try:
-            # MCP tool discovery — no event loop running in CLI/TUI startup,
-            # so inline is safe.  Moved here from model_tools.py module scope
-            # to avoid freezing the gateway's event loop on its first message
-            # via the same lazy import path (#16856).
-            from tools.mcp_tool import discover_mcp_tools
-
-            discover_mcp_tools()
-        except Exception:
-            logger.debug(
-                "MCP tool discovery failed at CLI startup",
-                exc_info=True,
-            )
-        try:
-            from hermes_cli.config import load_config
-            from agent.shell_hooks import register_from_config
-
-            register_from_config(load_config(), accept_hooks=_accept_hooks)
-        except Exception:
-            logger.debug(
-                "shell-hook registration failed at CLI startup",
-                exc_info=True,
-            )
+    _prepare_agent_startup(args)

    # Handle top-level --oneshot / -z: single-shot mode, stdout = final
    # response only, nothing else. Bypasses cli.py entirely.
@@ -0,0 +1,115 @@
+"""CLI handlers for ``hermes migrate ...``.
+
+Currently exposes only ``hermes migrate xai`` — diagnoses and (with --apply)
+rewrites references to xAI models retired on May 15, 2026.
+"""
+from __future__ import annotations
+
+import sys
+from pathlib import Path
+from typing import Any
+
+from hermes_cli.colors import Colors, color
+from hermes_cli.config import load_config
+
+
+def cmd_migrate(args: Any) -> int:
+    """Dispatcher for ``hermes migrate <subtype>``."""
+    sub = getattr(args, "migrate_type", None)
+    if sub == "xai":
+        return cmd_migrate_xai(args)
+
+    print("usage: hermes migrate xai [--apply] [--no-backup]", file=sys.stderr)
+    return 2
+
+
+def cmd_migrate_xai(args: Any) -> int:
+    """Run xAI May-15 model migration in dry-run or apply mode."""
+    from hermes_cli.xai_retirement import (
+        MIGRATION_GUIDE_URL,
+        RETIREMENT_DATE,
+        apply_migration,
+        find_retired_xai_refs,
+        format_issue,
+    )
+
+    apply = bool(getattr(args, "apply", False))
+    no_backup = bool(getattr(args, "no_backup", False))
+
+    config = load_config()
+    issues = find_retired_xai_refs(config)
+
+    print()
+    print(color(
+        f"◆ xAI Model Retirement Migration ({RETIREMENT_DATE})",
+        Colors.CYAN, Colors.BOLD,
+    ))
+    print()
+
+    if not issues:
+        print(f"  {color('✓', Colors.GREEN)} No retired xAI models in config — nothing to migrate.")
+        return 0
+
+    print(f"  Found {len(issues)} retired xAI model reference(s):")
+    print()
+    for issue in issues:
+        print(f"    {color('⚠', Colors.YELLOW)} {format_issue(issue)}")
+    print()
+    print(f"    {color('→', Colors.CYAN)} Migration guide: {MIGRATION_GUIDE_URL}")
+    print()
+
+    config_path = _resolve_config_path()
+
+    if not apply:
+        print(color("Dry-run mode — no changes written.", Colors.DIM))
+        print(color(
+            "Re-run with `hermes migrate xai --apply` to rewrite "
+            f"{config_path} in-place (backup created automatically).",
+            Colors.DIM,
+        ))
+        return 0
+
+    if not config_path or not config_path.exists():
+        print(
+            f"  {color('✗', Colors.RED)} Could not locate config.yaml "
+            f"(looked at: {config_path})",
+            file=sys.stderr,
+        )
+        return 1
+
+    try:
+        result = apply_migration(
+            config_path=config_path,
+            issues=issues,
+            backup=not no_backup,
+        )
+    except Exception as exc:
+        print(
+            f"  {color('✗', Colors.RED)} Migration failed: {exc}",
+            file=sys.stderr,
+        )
+        return 1
+
+    if not result.config_changed:
+        print(f"  {color('⚠', Colors.YELLOW)} No changes written.")
+        return 0
+
+    if result.backup_path is not None:
+        print(f"  {color('✓', Colors.GREEN)} Backup: {result.backup_path}")
+    print(
+        f"  {color('✓', Colors.GREEN)} Updated {len(result.issues_resolved)} "
+        f"slot(s) in {result.file_path}"
+    )
+    print()
+    print(color(
+        "Run `hermes doctor` to confirm no retired xAI models remain.",
+        Colors.DIM,
+    ))
+    return 0
+
+
+def _resolve_config_path() -> Path:
+    """Best-effort: locate the active config.yaml on disk."""
+    from hermes_cli.config import get_hermes_home
+
+    return get_hermes_home() / "config.yaml"
@@ -74,8 +74,12 @@ class NousSubscriptionFeatures:
    def modal(self) -> NousFeatureState:
        return self.features["modal"]

+    @property
+    def app_tools(self) -> NousFeatureState:
+        return self.features["app_tools"]
+
    def items(self) -> Iterable[NousFeatureState]:
-        ordered = ("web", "image_gen", "tts", "browser", "modal")
+        ordered = ("web", "image_gen", "tts", "browser", "modal", "app_tools")
        for key in ordered:
            yield self.features[key]

@@ -225,6 +229,22 @@ def _resolve_browser_feature_state(
    return "local", available, active, False


+def _read_portal_app_tools_enabled(config: Optional[Dict[str, object]] = None) -> bool:
+    """Return True when the portal.app_tools config flag is on."""
+    if config is not None:
+        # Fast path: use the pre-loaded config snapshot from the caller
+        import os
+        env_val = os.getenv("PORTAL_APP_TOOLS")
+        if env_val is not None:
+            return is_truthy_value(env_val)
+        portal = config.get("portal")
+        if isinstance(portal, dict):
+            return bool(portal.get("app_tools", True))
+        return True
+    from tools.tool_backend_helpers import portal_app_tools_enabled
+    return portal_app_tools_enabled()
+
+
 def get_nous_subscription_features(
    config: Optional[Dict[str, object]] = None,
 ) -> NousSubscriptionFeatures:
@@ -313,6 +333,8 @@ def get_nous_subscription_features(
    managed_tts_available = managed_tools_flag and nous_auth_present and is_managed_tool_gateway_ready("openai-audio")
    managed_browser_available = managed_tools_flag and nous_auth_present and is_managed_tool_gateway_ready("browser-use")
    managed_modal_available = managed_tools_flag and nous_auth_present and is_managed_tool_gateway_ready("modal")
+    app_gw_ready = bool(managed_tools_flag and nous_auth_present and is_managed_tool_gateway_ready("tools"))
+    app_config_on = _read_portal_app_tools_enabled(config)
    modal_state = resolve_modal_backend_state(
        modal_mode,
        has_direct=direct_modal,
@@ -476,6 +498,17 @@ def get_nous_subscription_features(
            current_provider="Modal" if terminal_backend == "modal" else terminal_backend or "local",
            explicit_configured=terminal_backend == "modal",
        ),
+        "app_tools": NousFeatureState(
+            key="app_tools",
+            label="App tools (500+ apps)",
+            included_by_default=True,
+            available=app_gw_ready,
+            active=app_gw_ready and app_config_on,
+            managed_by_nous=app_gw_ready and app_config_on,
+            direct_override=False,
+            toolset_enabled=app_config_on,
+            current_provider="Nous Tool Gateway",
+        ),
    }

    return NousSubscriptionFeatures(
@@ -1051,7 +1051,7 @@ def _run_composite_ui(curses, plugin_names, plugin_labels, plugin_selected,
            curses.init_pair(1, curses.COLOR_GREEN, -1)
            curses.init_pair(2, curses.COLOR_YELLOW, -1)
            curses.init_pair(3, curses.COLOR_CYAN, -1)
-            curses.init_pair(4, 8, -1)  # dim gray
+            curses.init_pair(4, 8 if curses.COLORS > 8 else curses.COLOR_WHITE, -1)  # dim gray
        cursor = 0
        scroll_offset = 0

@@ -1196,7 +1196,7 @@ def _run_composite_ui(curses, plugin_names, plugin_labels, plugin_selected,
                            curses.init_pair(1, curses.COLOR_GREEN, -1)
                            curses.init_pair(2, curses.COLOR_YELLOW, -1)
                            curses.init_pair(3, curses.COLOR_CYAN, -1)
-                            curses.init_pair(4, 8, -1)
+                            curses.init_pair(4, 8 if curses.COLORS > 8 else curses.COLOR_WHITE, -1)
                        curses.curs_set(0)
            elif key in {curses.KEY_ENTER, 10, 13}:
                if cursor < n_plugins:
@@ -1228,7 +1228,7 @@ def _run_composite_ui(curses, plugin_names, plugin_labels, plugin_selected,
                            curses.init_pair(1, curses.COLOR_GREEN, -1)
                            curses.init_pair(2, curses.COLOR_YELLOW, -1)
                            curses.init_pair(3, curses.COLOR_CYAN, -1)
-                            curses.init_pair(4, 8, -1)
+                            curses.init_pair(4, 8 if curses.COLORS > 8 else curses.COLOR_WHITE, -1)
                        curses.curs_set(0)
            elif key in {27, ord("q")}:
                # Save plugin changes on exit
@@ -35,6 +35,7 @@ from pathlib import Path
 from typing import Optional

 from hermes_cli import profiles as profiles_mod
+from agent.skill_utils import is_excluded_skill_path

 logger = logging.getLogger(__name__)

@@ -109,8 +110,7 @@ def _collect_skills(profile_dir: Path) -> list[str]:
        return []
    names: list[str] = []
    for md in skills_dir.rglob("SKILL.md"):
-        path_str = str(md)
-        if "/.hub/" in path_str or "/.git/" in path_str:
+        if is_excluded_skill_path(md):
            continue
        try:
            rel = md.relative_to(skills_dir)
@@ -201,7 +201,7 @@ def describe_profile(
    skill_list = "\n".join(f"  - {n}" for n in skill_names) or "  (no skills installed)"
    skill_count = sum(
        1 for _ in (profile_dir / "skills").rglob("SKILL.md")
-        if "/.hub/" not in str(_) and "/.git/" not in str(_)
+        if not is_excluded_skill_path(_)
    ) if (profile_dir / "skills").is_dir() else 0

    # Read model + provider from the profile's config.
@@ -70,6 +70,8 @@ from datetime import datetime, timezone
 from pathlib import Path
 from typing import Any, Dict, List, Optional, Tuple

+from agent.skill_utils import is_excluded_skill_path
+

 # ---------------------------------------------------------------------------
 # Constants
@@ -463,7 +465,9 @@ def _count_skills(staged: Path) -> int:
    skills_dir = staged / "skills"
    if not skills_dir.is_dir():
        return 0
-    return sum(1 for _ in skills_dir.rglob("SKILL.md"))
+    return sum(
+        1 for p in skills_dir.rglob("SKILL.md") if not is_excluded_skill_path(p)
+    )


 def plan_install(
@@ -30,6 +30,8 @@ from dataclasses import dataclass
 from pathlib import Path, PurePosixPath, PureWindowsPath
 from typing import List, Optional

+from agent.skill_utils import is_excluded_skill_path
+
 _PROFILE_ID_RE = re.compile(r"^[a-z0-9][a-z0-9_-]{0,63}$")

 # Directories bootstrapped inside every new profile
@@ -485,8 +487,9 @@ def _count_skills(profile_dir: Path) -> int:
        return 0
    count = 0
    for md in skills_dir.rglob("SKILL.md"):
-        if "/.hub/" not in str(md) and "/.git/" not in str(md):
-            count += 1
+        if is_excluded_skill_path(md):
+            continue
+        count += 1
    return count


@@ -902,7 +905,49 @@ def delete_profile(name: str, yes: bool = False) -> Path:

    # 4. Remove profile directory
    try:
-        shutil.rmtree(profile_dir)
+        def _make_writable(func, path, exc):
+            """onexc/onerror handler: add +w on PermissionError so rmtree can proceed.
+
+            Handles two cases on NixOS (and other systems with read-only
+            copies from immutable stores):
+            1. The path itself isn't writable (e.g. a file with mode 0444)
+            2. The *parent* directory isn't writable (e.g. mode 0555)
+
+            Compatible with both the ``onexc`` API (3.12+, receives an
+            exception instance) and the ``onerror`` API (3.11-, receives
+            ``sys.exc_info()`` tuple).
+            """
+            import stat as _stat
+            import sys as _sys
+
+            # Normalise the two callback signatures:
+            #   onexc(func, path, exc_instance)   — 3.12+
+            #   onerror(func, path, exc_info_tuple) — 3.11
+            if isinstance(exc, tuple):
+                exc = exc[1]  # exc_info → actual exception object
+
+            if isinstance(exc, PermissionError):
+                # Make the path writable
+                try:
+                    os.chmod(path, os.stat(path).st_mode | _stat.S_IWUSR)
+                except OSError:
+                    pass
+                # Also make the parent writable (needed for unlink/rmdir)
+                parent = os.path.dirname(path)
+                if parent:
+                    try:
+                        os.chmod(parent, os.stat(parent).st_mode | _stat.S_IWUSR)
+                    except OSError:
+                        pass
+                func(path)
+            else:
+                raise
+
+        # ``onexc`` was added in 3.12; fall back to ``onerror`` on 3.11.
+        try:
+            shutil.rmtree(profile_dir, onexc=_make_writable)
+        except TypeError:
+            shutil.rmtree(profile_dir, onerror=_make_writable)
        print(f"✓ Removed {profile_dir}")
    except Exception as e:
        print(f"⚠ Could not remove {profile_dir}: {e}")
@@ -100,6 +100,63 @@ def _detect_api_mode_for_url(base_url: str) -> Optional[str]:
    return None


+def _host_derived_api_key(base_url: str) -> str:
+    """Look up `<VENDOR>_API_KEY` in the env, derived from the base URL host.
+
+    Examples:
+        https://api.deepseek.com/v1   → DEEPSEEK_API_KEY
+        https://api.groq.com/openai/v1 → GROQ_API_KEY
+        https://api.mistral.ai/v1     → MISTRAL_API_KEY
+        https://generativelanguage.googleapis.com/v1beta/openai/ → GOOGLEAPIS_API_KEY
+
+    Returns the env value (stripped) or "". Never returns env vars whose names
+    are already explicitly checked elsewhere — those are handled by their own
+    host-gated paths (OPENAI/OPENROUTER/OLLAMA).
+
+    The vendor label is the *registrable* portion of the hostname: strip
+    ``api.`` / ``www.`` prefixes, then take the second-to-last label
+    (``api.deepseek.com`` → ``deepseek``). Falls back to "" for hostnames
+    that don't yield a usable vendor label (IPs, loopback, single-label
+    hosts).
+    """
+    hostname = base_url_hostname(base_url)
+    if not hostname:
+        return ""
+    # Reject IPv4 / IPv6 / loopback — no meaningful vendor label.
+    if any(ch.isdigit() for ch in hostname.split(".")[-1]):
+        # Last label starts with a digit → likely IP. (TLDs are never numeric.)
+        return ""
+    if hostname in ("localhost",) or ":" in hostname:
+        return ""
+    labels = [lbl for lbl in hostname.split(".") if lbl]
+    # Strip common API/CDN prefixes.
+    while labels and labels[0] in ("api", "www"):
+        labels.pop(0)
+    if len(labels) < 2:
+        return ""
+    # Take the *registrable* label (second-to-last). For typical provider
+    # hosts this is what users intuitively call "the vendor":
+    #   deepseek.com               → labels[-2] = "deepseek"  ✓
+    #   api.groq.com → groq.com    → labels[-2] = "groq"      ✓
+    #   api.mistral.ai             → labels[-2] = "mistral"   ✓
+    # Crucially, lookalike hosts pick the ATTACKER's label, not the spoofed
+    # vendor:
+    #   api.deepseek.com.attacker.test → labels[-2] = "attacker"
+    # so DEEPSEEK_API_KEY stays put and the chain falls through to
+    # no-key-required. This mirrors how `base_url_host_matches` resists the
+    # same lookalike attack for explicit hosts.
+    vendor = labels[-2]
+    # Sanitize to env var charset: A-Z, 0-9, underscore.
+    sanitized = "".join(ch if ch.isalnum() else "_" for ch in vendor).upper()
+    if not sanitized or not sanitized[0].isalpha():
+        return ""
+    # Don't re-derive env vars already handled by explicit host-gated paths.
+    if sanitized in ("OPENAI", "OPENROUTER", "OLLAMA"):
+        return ""
+    env_name = f"{sanitized}_API_KEY"
+    return (os.getenv(env_name, "") or "").strip()
+
+
 def _auto_detect_local_model(base_url: str) -> str:
    """Query a local server for its model name when only one model is loaded."""
    if not base_url:
@@ -471,6 +528,9 @@ def _get_named_custom_provider(requested_provider: str) -> Optional[Dict[str, An
                        "api_key": resolved_api_key,
                        "model": entry.get("default_model", ""),
                    }
+                    extra_body = entry.get("extra_body")
+                    if isinstance(extra_body, dict):
+                        result["extra_body"] = dict(extra_body)
                    # The v11→v12 migration writes the API mode under the new
                    # ``transport`` field, but hand-edited configs may still
                    # use the legacy ``api_mode`` spelling.  Accept both —
@@ -496,6 +556,9 @@ def _get_named_custom_provider(requested_provider: str) -> Optional[Dict[str, An
                            "api_key": resolved_api_key,
                            "model": entry.get("default_model", ""),
                        }
+                        extra_body = entry.get("extra_body")
+                        if isinstance(extra_body, dict):
+                            result["extra_body"] = dict(extra_body)
                        api_mode = _parse_api_mode(entry.get("api_mode") or entry.get("transport"))
                        if api_mode:
                            result["api_mode"] = api_mode
@@ -539,6 +602,9 @@ def _get_named_custom_provider(requested_provider: str) -> Optional[Dict[str, An
            result["key_env"] = key_env
        if provider_key:
            result["provider_key"] = provider_key
+        extra_body = entry.get("extra_body")
+        if isinstance(extra_body, dict):
+            result["extra_body"] = dict(extra_body)
        api_mode = _parse_api_mode(entry.get("api_mode"))
        if api_mode:
            result["api_mode"] = api_mode
@@ -550,6 +616,13 @@ def _get_named_custom_provider(requested_provider: str) -> Optional[Dict[str, An
    return None


+def _custom_provider_request_overrides(custom_provider: Dict[str, Any]) -> Dict[str, Any]:
+    extra_body = custom_provider.get("extra_body")
+    if not isinstance(extra_body, dict) or not extra_body:
+        return {}
+    return {"extra_body": dict(extra_body)}
+
+
 def _resolve_named_custom_runtime(
    *,
    requested_provider: str,
@@ -582,10 +655,17 @@ def _resolve_named_custom_runtime(
        if pool_result:
            pool_result["source"] = "direct-alias"
            return pool_result
+        _da_is_openai_url   = base_url_host_matches(base_url, "openai.com") or base_url_host_matches(base_url, "openai.azure.com")
+        _da_is_openrouter   = base_url_host_matches(base_url, "openrouter.ai")
        api_key_candidates = [
            (explicit_api_key or "").strip(),
-            os.getenv("OPENAI_API_KEY", "").strip(),
-            os.getenv("OPENROUTER_API_KEY", "").strip(),
+            # Gate env key fallbacks on authoritative hosts (#28660)
+            (os.getenv("OPENAI_API_KEY", "").strip()     if _da_is_openai_url else ""),
+            (os.getenv("OPENROUTER_API_KEY", "").strip() if _da_is_openrouter  else ""),
+            # Bonus (#28660): derive `<VENDOR>_API_KEY` from the host so users
+            # who set DEEPSEEK_API_KEY / GROQ_API_KEY / MISTRAL_API_KEY get the
+            # intuitive match without configuring `custom_providers` first.
+            _host_derived_api_key(base_url),
        ]
        api_key = next(
            (c for c in api_key_candidates if has_usable_secret(c)),
@@ -619,14 +699,27 @@ def _resolve_named_custom_runtime(
        model_name = custom_provider.get("model")
        if model_name:
            pool_result["model"] = model_name
+        request_overrides = _custom_provider_request_overrides(custom_provider)
+        if request_overrides:
+            pool_result["request_overrides"] = {
+                **dict(pool_result.get("request_overrides") or {}),
+                **request_overrides,
+            }
        return pool_result

+    _cp_is_openai_url   = base_url_host_matches(base_url, "openai.com") or base_url_host_matches(base_url, "openai.azure.com")
+    _cp_is_openrouter   = base_url_host_matches(base_url, "openrouter.ai")
    api_key_candidates = [
        (explicit_api_key or "").strip(),
        str(custom_provider.get("api_key", "") or "").strip(),
        os.getenv(str(custom_provider.get("key_env", "") or "").strip(), "").strip(),
-        os.getenv("OPENAI_API_KEY", "").strip(),
-        os.getenv("OPENROUTER_API_KEY", "").strip(),
+        # Gate provider env keys on their authoritative hosts — sending
+        # OPENAI_API_KEY to a local-llm endpoint leaks credentials (#28660).
+        (os.getenv("OPENAI_API_KEY", "").strip()     if _cp_is_openai_url  else ""),
+        (os.getenv("OPENROUTER_API_KEY", "").strip() if _cp_is_openrouter  else ""),
+        # Bonus (#28660): derive `<VENDOR>_API_KEY` from the host as a final
+        # fallback when key_env wasn't set explicitly.
+        _host_derived_api_key(base_url),
    ]
    api_key = next((candidate for candidate in api_key_candidates if has_usable_secret(candidate)), "")

@@ -643,6 +736,9 @@ def _resolve_named_custom_runtime(
    # provider name differs from the actual model string the API expects.
    if custom_provider.get("model"):
        result["model"] = custom_provider["model"]
+    request_overrides = _custom_provider_request_overrides(custom_provider)
+    if request_overrides:
+        result["request_overrides"] = request_overrides
    return result


@@ -707,7 +803,15 @@ def _resolve_openrouter_runtime(
    # OPENAI_API_KEY so the OpenRouter key doesn't leak to an unrelated
    # provider (issues #420, #560).
    _is_openrouter_url = base_url_host_matches(base_url, "openrouter.ai")
-    if _is_openrouter_url:
+    # Also treat explicitly-configured OpenRouter mirrors/proxies as OpenRouter
+    # for key selection — if the user set OPENROUTER_BASE_URL or requested
+    # provider=openrouter explicitly, OPENROUTER_API_KEY should still be used.
+    _is_openrouter_context = _is_openrouter_url or (
+        requested_norm == "openrouter"
+        and (env_openrouter_base_url or base_url == env_openrouter_base_url)
+        and base_url == (env_openrouter_base_url or "").rstrip("/")
+    )
+    if _is_openrouter_context:
        api_key_candidates = [
            explicit_api_key,
            os.getenv("OPENROUTER_API_KEY"),
@@ -721,13 +825,24 @@ def _resolve_openrouter_runtime(
        # "ollama.com" (e.g. http://127.0.0.1/ollama.com/v1) or whose
        # hostname is a look-alike (ollama.com.attacker.test) must not
        # receive the Ollama credential. See GHSA-76xc-57q6-vm5m.
-        _is_ollama_url = base_url_host_matches(base_url, "ollama.com")
+        _is_ollama_url    = base_url_host_matches(base_url, "ollama.com")
+        _is_openai_url    = base_url_host_matches(base_url, "openai.com")
+        _is_openai_azure  = base_url_host_matches(base_url, "openai.azure.com")
+        # Gate each provider key on its own host — sending OPENAI_API_KEY or
+        # OPENROUTER_API_KEY to an unrelated custom endpoint (DeepSeek, Groq,
+        # Mistral, …) leaks credentials and causes 401s (issue #28660).
+        # Mirrors the OLLAMA_API_KEY host-gate added in GHSA-76xc-57q6-vm5m.
        api_key_candidates = [
            explicit_api_key,
            (cfg_api_key if use_config_base_url else ""),
-            (os.getenv("OLLAMA_API_KEY") if _is_ollama_url else ""),
-            os.getenv("OPENAI_API_KEY"),
-            os.getenv("OPENROUTER_API_KEY"),
+            (os.getenv("OLLAMA_API_KEY")     if _is_ollama_url                       else ""),
+            (os.getenv("OPENAI_API_KEY")     if (_is_openai_url or _is_openai_azure) else ""),
+            (os.getenv("OPENROUTER_API_KEY") if _is_openrouter_url                   else ""),
+            # Bonus (#28660): derive `<VENDOR>_API_KEY` from the host so users
+            # who set DEEPSEEK_API_KEY / GROQ_API_KEY / MISTRAL_API_KEY get the
+            # intuitive match. Helper returns "" for IPs/loopback and for env
+            # vars already handled by the explicit host-gated paths above.
+            _host_derived_api_key(base_url),
        ]
    api_key = next(
        (str(candidate or "").strip() for candidate in api_key_candidates if has_usable_secret(candidate)),
@@ -0,0 +1,445 @@
+"""CLI handlers for ``hermes secrets bitwarden ...``.
+
+Subcommands:
+    setup    — interactive wizard: install bws, prompt for token + project, test fetch
+    status   — show current config + binary version + last fetch outcome
+    sync     — run a fetch right now and show what would be applied (dry-run friendly)
+    disable  — flip ``secrets.bitwarden.enabled`` to False
+    install  — just download the bws binary (no token / project required)
+"""
+
+from __future__ import annotations
+
+import argparse
+import getpass
+import json
+import os
+import subprocess
+import sys
+from pathlib import Path
+from typing import List, Optional, Tuple
+
+from rich.console import Console
+from rich.panel import Panel
+from rich.table import Table
+
+from agent.secret_sources import bitwarden as bw
+from hermes_cli.config import (
+    get_env_path,
+    load_config,
+    save_config,
+    save_env_value,
+)
+
+
+# ---------------------------------------------------------------------------
+# Argparse wiring — called from hermes_cli.main
+# ---------------------------------------------------------------------------
+
+
+def register_cli(parent_parser: argparse.ArgumentParser) -> None:
+    """Attach the ``bitwarden`` subcommand tree to a parent parser.
+
+    Called from ``hermes_cli.main`` as part of building the top-level
+    ``hermes secrets`` parser.
+    """
+    sub = parent_parser.add_subparsers(dest="secrets_bw_command")
+
+    setup = sub.add_parser(
+        "setup",
+        help="Interactive wizard: install bws, store access token, pick project",
+    )
+    setup.add_argument(
+        "--project-id",
+        help="Pre-select a project UUID instead of prompting",
+    )
+    setup.add_argument(
+        "--access-token",
+        help="Provide the access token non-interactively (will be stored in .env)",
+    )
+    setup.set_defaults(func=cmd_setup)
+
+    status = sub.add_parser("status", help="Show config + binary + last fetch")
+    status.set_defaults(func=cmd_status)
+
+    sync = sub.add_parser("sync", help="Fetch secrets now and report what changed")
+    sync.add_argument(
+        "--apply",
+        action="store_true",
+        help="Actually export the secrets into the current shell's env (default: dry-run)",
+    )
+    sync.set_defaults(func=cmd_sync)
+
+    disable = sub.add_parser("disable", help="Turn off the Bitwarden integration")
+    disable.set_defaults(func=cmd_disable)
+
+    install = sub.add_parser(
+        "install",
+        help=f"Download and verify the pinned bws binary (v{bw._BWS_VERSION})",
+    )
+    install.add_argument(
+        "--force",
+        action="store_true",
+        help="Re-download even if a managed copy already exists",
+    )
+    install.set_defaults(func=cmd_install)
+
+
+# ---------------------------------------------------------------------------
+# Handlers
+# ---------------------------------------------------------------------------
+
+
+def cmd_setup(args: argparse.Namespace) -> int:
+    console = Console()
+    console.print(
+        Panel.fit(
+            "[bold]Bitwarden Secrets Manager setup[/bold]\n\n"
+            "Need an access token? In the Bitwarden web app:\n"
+            "  Secrets Manager → Machine accounts → [your account] →\n"
+            "  Access tokens → Create access token\n\n"
+            "Copy the token (starts with [cyan]0.[/cyan]…) — it cannot be retrieved later.",
+            border_style="cyan",
+        )
+    )
+
+    # ------------------------------------------------------------------ binary
+    console.print()
+    console.print("[bold]Step 1[/bold]  Install the bws CLI")
+    try:
+        binary = bw.find_bws(install_if_missing=False)
+        if binary is None:
+            console.print("  No bws on PATH — downloading…")
+            binary = bw.install_bws()
+        version = _bws_version(binary)
+        console.print(f"  [green]✓[/green] {binary}  ({version})")
+    except Exception as exc:  # noqa: BLE001
+        console.print(f"  [red]✗ Could not install bws: {exc}[/red]")
+        console.print(
+            "  Manual install: "
+            "https://github.com/bitwarden/sdk-sm/releases"
+        )
+        return 1
+
+    # ------------------------------------------------------------------- token
+    console.print()
+    console.print("[bold]Step 2[/bold]  Provide your access token")
+    cfg = load_config()
+    secrets_cfg = (cfg.setdefault("secrets", {})
+                     .setdefault("bitwarden", {}))
+    token_env = secrets_cfg.get("access_token_env", "BWS_ACCESS_TOKEN")
+
+    token = (args.access_token or "").strip()
+    if not token:
+        token = getpass.getpass(f"  Paste access token ({token_env}): ").strip()
+    if not token:
+        console.print("  [red]Empty token, aborting.[/red]")
+        return 1
+    if not token.startswith("0."):
+        console.print(
+            "  [yellow]Warning: token doesn't start with '0.' — usually that means "
+            "you pasted something other than a BSM access token.  Continuing anyway.[/yellow]"
+        )
+
+    save_env_value(token_env, token)
+    os.environ[token_env] = token  # so the test fetch below sees it
+    console.print(f"  [green]✓[/green] stored in {get_env_path()} as {token_env}")
+
+    # ------------------------------------------------------------------- project
+    if args.project_id and args.project_id.strip():
+        project_id = args.project_id.strip()
+    else:
+        console.print()
+        console.print("[bold]Step 3[/bold]  Pick a project")
+        project_id = ""
+        projects = _list_projects(binary, token, console)
+        if projects is None:
+            return 1
+        if not projects:
+            console.print("  [yellow]No projects visible to this machine account.[/yellow]")
+            console.print(
+                "  In the Bitwarden web app, open the machine account → Projects tab "
+                "and grant it access to at least one project."
+            )
+            return 1
+
+        table = Table(show_header=True, header_style="bold")
+        table.add_column("#", style="cyan", width=4)
+        table.add_column("Name")
+        table.add_column("ID", style="dim")
+        for i, p in enumerate(projects, 1):
+            table.add_row(str(i), p.get("name", "?"), p.get("id", "?"))
+        console.print(table)
+
+        while True:
+            choice = console.input(f"  Select project [1-{len(projects)}]: ").strip()
+            if not choice:
+                continue
+            try:
+                idx = int(choice)
+            except ValueError:
+                console.print("  [red]Enter a number.[/red]")
+                continue
+            if 1 <= idx <= len(projects):
+                project_id = projects[idx - 1]["id"]
+                break
+            console.print(f"  [red]Out of range — pick 1-{len(projects)}.[/red]")
+
+    # ------------------------------------------------------------------- test
+    console.print()
+    step_num = 4 if not (args.project_id and args.project_id.strip()) else 3
+    console.print(f"[bold]Step {step_num}[/bold]  Test fetch")
+    try:
+        secrets, warnings = bw.fetch_bitwarden_secrets(
+            access_token=token,
+            project_id=project_id,
+            binary=binary,
+            use_cache=False,
+        )
+    except Exception as exc:  # noqa: BLE001
+        console.print(f"  [red]✗ Fetch failed: {exc}[/red]")
+        return 1
+
+    if not secrets:
+        console.print("  [yellow]Fetch succeeded but the project has no secrets.[/yellow]")
+    else:
+        table = Table(show_header=True, header_style="bold")
+        table.add_column("Name", style="cyan")
+        table.add_column("Status")
+        for key in sorted(secrets):
+            if key == token_env:
+                status = "[dim]bootstrap token — never overrides itself[/dim]"
+            elif os.environ.get(key):
+                status = "[yellow]already set in env (will be overwritten)[/yellow]"
+            else:
+                status = "[green]new[/green]"
+            table.add_row(key, status)
+        console.print(table)
+    for w in warnings:
+        console.print(f"  [yellow]warning:[/yellow] {w}")
+
+    # ------------------------------------------------------------------- save
+    secrets_cfg["enabled"] = True
+    secrets_cfg["project_id"] = project_id
+    secrets_cfg.setdefault("access_token_env", token_env)
+    secrets_cfg.setdefault("cache_ttl_seconds", 300)
+    secrets_cfg.setdefault("override_existing", True)
+    secrets_cfg.setdefault("auto_install", True)
+    save_config(cfg)
+
+    console.print()
+    console.print(
+        "[green]✓ Bitwarden Secrets Manager is enabled.[/green]  "
+        "Secrets will be pulled at the start of every Hermes process."
+    )
+    console.print(
+        "  Status:  [cyan]hermes secrets bitwarden status[/cyan]\n"
+        "  Refresh: [cyan]hermes secrets bitwarden sync[/cyan]\n"
+        "  Disable: [cyan]hermes secrets bitwarden disable[/cyan]"
+    )
+    return 0
+
+
+def cmd_status(args: argparse.Namespace) -> int:
+    console = Console()
+    cfg = load_config()
+    bw_cfg = (cfg.get("secrets") or {}).get("bitwarden") or {}
+
+    enabled = bool(bw_cfg.get("enabled"))
+    token_env = bw_cfg.get("access_token_env", "BWS_ACCESS_TOKEN")
+    project_id = bw_cfg.get("project_id", "")
+    token_set = bool(os.environ.get(token_env))
+
+    table = Table(show_header=False, box=None, padding=(0, 2))
+    table.add_column("", style="bold")
+    table.add_column("")
+    table.add_row("Enabled",         _yn(enabled))
+    table.add_row("Token env var",   token_env)
+    table.add_row("Token in env",    _yn(token_set))
+    table.add_row("Project ID",      project_id or "[dim](unset)[/dim]")
+    table.add_row("Override existing", _yn(bool(bw_cfg.get("override_existing", False))))
+    table.add_row("Cache TTL (s)",   str(bw_cfg.get("cache_ttl_seconds", 300)))
+    table.add_row("Auto-install",    _yn(bool(bw_cfg.get("auto_install", True))))
+
+    binary = bw.find_bws(install_if_missing=False)
+    if binary:
+        table.add_row("bws binary",  f"{binary} ({_bws_version(binary)})")
+    else:
+        table.add_row("bws binary",  "[yellow]not installed[/yellow]")
+
+    console.print(Panel(table, title="Bitwarden Secrets Manager", border_style="cyan"))
+
+    if not enabled:
+        console.print("\n  Run [cyan]hermes secrets bitwarden setup[/cyan] to enable.")
+        return 0
+    if not token_set:
+        console.print(
+            f"\n  [yellow]Enabled but {token_env} is not set — Hermes will skip BSM "
+            "and warn on next startup.[/yellow]"
+        )
+    if not project_id:
+        console.print(
+            "\n  [yellow]Enabled but no project_id — nothing to fetch.[/yellow]"
+        )
+    return 0
+
+
+def cmd_sync(args: argparse.Namespace) -> int:
+    console = Console()
+    cfg = load_config()
+    bw_cfg = (cfg.get("secrets") or {}).get("bitwarden") or {}
+    if not bw_cfg.get("enabled"):
+        console.print(
+            "[yellow]Bitwarden integration is disabled.  Run "
+            "`hermes secrets bitwarden setup` first.[/yellow]"
+        )
+        return 1
+
+    token_env = bw_cfg.get("access_token_env", "BWS_ACCESS_TOKEN")
+    token = os.environ.get(token_env, "").strip()
+    if not token:
+        console.print(f"[red]{token_env} is not set.[/red]")
+        return 1
+
+    project_id = bw_cfg.get("project_id", "")
+    if not project_id:
+        console.print("[red]No project_id configured.[/red]")
+        return 1
+
+    try:
+        secrets, warnings = bw.fetch_bitwarden_secrets(
+            access_token=token,
+            project_id=project_id,
+            use_cache=False,
+        )
+    except Exception as exc:  # noqa: BLE001
+        console.print(f"[red]Fetch failed: {exc}[/red]")
+        return 1
+
+    if not secrets:
+        console.print("[yellow]No secrets in project.[/yellow]")
+        return 0
+
+    override = bool(bw_cfg.get("override_existing", False)) or args.apply
+    table = Table(show_header=True, header_style="bold")
+    table.add_column("Name", style="cyan")
+    table.add_column("Action")
+    applied = 0
+    for key in sorted(secrets):
+        if key == token_env:
+            table.add_row(key, "[dim]skip (bootstrap token)[/dim]")
+            continue
+        already = bool(os.environ.get(key))
+        if already and not override:
+            table.add_row(key, "[dim]skip (already set)[/dim]")
+            continue
+        if args.apply:
+            os.environ[key] = secrets[key]
+            applied += 1
+            table.add_row(key, "[green]exported[/green]" + (" (overrode)" if already else ""))
+        else:
+            table.add_row(key, "[green]would export[/green]" + (" (overrides)" if already else ""))
+
+    console.print(table)
+    for w in warnings:
+        console.print(f"[yellow]warning:[/yellow] {w}")
+
+    if not args.apply:
+        console.print(
+            "\n  This was a dry-run — secrets are picked up automatically on the "
+            "next [cyan]hermes[/cyan] invocation.  Re-run with [cyan]--apply[/cyan] "
+            "to export into the current shell instead."
+        )
+    else:
+        console.print(f"\n  [green]Exported {applied} secret(s) into current process.[/green]")
+    return 0
+
+
+def cmd_disable(args: argparse.Namespace) -> int:
+    console = Console()
+    cfg = load_config()
+    bw_cfg = (cfg.setdefault("secrets", {})
+                .setdefault("bitwarden", {}))
+    bw_cfg["enabled"] = False
+    save_config(cfg)
+    console.print(
+        "[green]Disabled.[/green]  Bitwarden secrets will NOT be pulled on the next "
+        "Hermes invocation.\n"
+        "  Your access token is left in .env — remove it manually if you also want "
+        "to revoke the credential."
+    )
+    return 0
+
+
+def cmd_install(args: argparse.Namespace) -> int:
+    console = Console()
+    try:
+        path = bw.install_bws(force=bool(args.force))
+        console.print(f"[green]✓[/green] {path}  ({_bws_version(path)})")
+        return 0
+    except Exception as exc:  # noqa: BLE001
+        console.print(f"[red]Install failed: {exc}[/red]")
+        return 1
+
+
+# ---------------------------------------------------------------------------
+# Helpers
+# ---------------------------------------------------------------------------
+
+
+def _yn(b: bool) -> str:
+    return "[green]yes[/green]" if b else "[dim]no[/dim]"
+
+
+def _bws_version(binary: Path) -> str:
+    try:
+        res = subprocess.run(
+            [str(binary), "--version"],
+            capture_output=True,
+            text=True,
+            timeout=5,
+        )
+        if res.returncode == 0:
+            return (res.stdout or res.stderr).strip().splitlines()[0]
+    except (OSError, subprocess.TimeoutExpired):
+        pass
+    return "version unknown"
+
+
+def _list_projects(
+    binary: Path, token: str, console: Console
+) -> Optional[List[dict]]:
+    """Call ``bws project list`` and return the parsed list, or None on failure."""
+    env = os.environ.copy()
+    env["BWS_ACCESS_TOKEN"] = token
+    env.setdefault("NO_COLOR", "1")
+    try:
+        res = subprocess.run(
+            [str(binary), "project", "list", "--output", "json"],
+            env=env,
+            capture_output=True,
+            text=True,
+            timeout=15,
+        )
+    except (OSError, subprocess.TimeoutExpired) as exc:
+        console.print(f"  [red]Couldn't list projects: {exc}[/red]")
+        return None
+
+    if res.returncode != 0:
+        err = (res.stderr or res.stdout).strip()[:300]
+        console.print(f"  [red]bws project list failed: {err}[/red]")
+        if "authorization" in err.lower() or "invalid" in err.lower():
+            console.print(
+                "  [yellow]This usually means the access token is wrong or revoked. "
+                "Double-check it in the Bitwarden web app.[/yellow]"
+            )
+        return None
+
+    try:
+        data = json.loads(res.stdout or "[]")
+    except json.JSONDecodeError as exc:
+        console.print(f"  [red]bws returned non-JSON: {exc}[/red]")
+        return None
+    if not isinstance(data, list):
+        return []
+    return [p for p in data if isinstance(p, dict) and p.get("id")]
@@ -23,6 +23,7 @@ from rich.table import Table
 # Lazy imports to avoid circular dependencies and slow startup.
 # tools.skills_hub and tools.skills_guard are imported inside functions.
 from hermes_constants import display_hermes_home
+from agent.skill_utils import is_excluded_skill_path

 _console = Console()

@@ -178,9 +179,12 @@ def _existing_categories() -> List[str]:
            # top level (no category); otherwise treat as a category bucket.
            if (entry / "SKILL.md").exists():
                continue
-            # Has at least one nested SKILL.md?
+            # Has at least one nested SKILL.md (excluding dependency/cache dirs)?
            try:
-                if any(entry.rglob("SKILL.md")):
+                if any(
+                    not is_excluded_skill_path(p)
+                    for p in entry.rglob("SKILL.md")
+                ):
                    out.append(entry.name)
            except OSError:
                continue
@@ -319,12 +323,14 @@ def do_browse(page: int = 1, page_size: int = 20, source: str = "all",
        c.print("[dim]No skills found in the Skills Hub.[/]\n")
        return

-    # Deduplicate by name, preferring higher trust
+    # Deduplicate by identifier, preferring higher trust.
+    # identifier is always unique per skill; name is not (browse-sh skills from different
+    # sites can share the same task name, e.g. "search-listings" on Airbnb and Booking.com).
    seen: dict = {}
    for r in all_results:
        rank = _TRUST_RANK.get(r.trust_level, 0)
-        if r.name not in seen or rank > _TRUST_RANK.get(seen[r.name].trust_level, 0):
-            seen[r.name] = r
+        if r.identifier not in seen or rank > _TRUST_RANK.get(seen[r.identifier].trust_level, 0):
+            seen[r.identifier] = r
    deduped = list(seen.values())

    # Sort: official first, then by trust level (desc), then alphabetically
@@ -702,8 +708,8 @@ def browse_skills(page: int = 1, page_size: int = 20, source: str = "all") -> di
    seen: dict = {}
    for r in all_results:
        rank = _TRUST_RANK.get(r.trust_level, 0)
-        if r.name not in seen or rank > _TRUST_RANK.get(seen[r.name].trust_level, 0):
-            seen[r.name] = r
+        if r.identifier not in seen or rank > _TRUST_RANK.get(seen[r.identifier].trust_level, 0):
+            seen[r.identifier] = r
    deduped = list(seen.values())
    deduped.sort(key=lambda r: (-_TRUST_RANK.get(r.trust_level, 0), r.source != "official", r.name.lower()))
    total = len(deduped)
@@ -78,6 +78,7 @@ CONFIGURABLE_TOOLSETS = [
    ("discord_admin",   "🛡️  Discord Server Admin",    "list channels/roles, pin, assign roles"),
    ("yuanbao",          "🤖 Yuanbao",                  "group info, member queries, DM"),
    ("computer_use",     "🖱️  Computer Use (macOS)",     "background desktop control via cua-driver"),
+    ("app_tools",        "🔌 App Integrations (500+)",   "Gmail, Slack, GitHub, Jira, Notion, etc. via Nous tool gateway"),
 ]

 # Toolsets that are OFF by default for new installs.
@@ -311,6 +312,16 @@ TOOL_CATEGORIES = {
    "image_gen": {
        "name": "Image Generation",
        "icon": "🎨",
+        # Per-provider rows for FAL.ai (`plugins/image_gen/fal`), OpenAI,
+        # OpenAI Codex, and xAI are injected at runtime from each
+        # ``plugins.image_gen.<vendor>`` package via
+        # ``_plugin_image_gen_providers()`` in ``_visible_providers``.
+        # Only non-provider UX setup-flow rows remain here:
+        #   - "Nous Subscription" — managed FAL billed via the Nous
+        #     subscription (requires_nous_auth + override_env_vars).
+        #     Uses the fal plugin as the underlying backend but has a
+        #     distinct setup UX.
+        # Mirrors the shape browser/video_gen ship today.
        "providers": [
            {
                "name": "Nous Subscription",
@@ -322,15 +333,6 @@ TOOL_CATEGORIES = {
                "override_env_vars": ["FAL_KEY"],
                "imagegen_backend": "fal",
            },
-            {
-                "name": "FAL.ai",
-                "badge": "paid",
-                "tag": "Pick from flux-2-klein, flux-2-pro, gpt-image, nano-banana, etc.",
-                "env_vars": [
-                    {"key": "FAL_KEY", "prompt": "FAL API key", "url": "https://fal.ai/dashboard/keys"},
-                ],
-                "imagegen_backend": "fal",
-            },
        ],
    },
    "video_gen": {
@@ -482,6 +484,11 @@ TOOLSET_ENV_REQUIREMENTS = {
 # ─── Post-Setup Hooks ─────────────────────────────────────────────────────────


+def _cua_driver_cmd() -> str:
+    """Return the cua-driver executable name/path, honoring non-empty overrides."""
+    return os.environ.get("HERMES_CUA_DRIVER_CMD", "").strip() or "cua-driver"
+
+
 def _pip_install(
    args: List[str],
    *,
@@ -550,6 +557,55 @@ def _pip_install(
    )


+
+def _check_cua_driver_asset_for_arch() -> bool:
+    """Check whether the latest CUA release ships an asset for this architecture.
+
+    Returns True if the asset likely exists (or if we cannot determine it).
+    Returns False and prints a warning when the asset is confirmed missing,
+    so callers can skip the install attempt and avoid a raw 404.
+    """
+    import platform as _plat
+    import urllib.request
+
+    machine = _plat.machine()  # "x86_64" or "arm64"
+    if machine == "arm64":
+        # arm64 (Apple Silicon) assets are always published.
+        return True
+
+    # x86_64 / Intel — probe the latest release for an architecture-specific
+    # asset before falling through to the upstream installer.
+    api_url = (
+        "https://api.github.com/repos/trycua/cua/releases/latest"
+    )
+    try:
+        req = urllib.request.Request(api_url, headers={"Accept": "application/vnd.github+json"})
+        with urllib.request.urlopen(req, timeout=10) as resp:
+            release = _json.loads(resp.read().decode())
+        tag = release.get("tag_name", "")
+        assets = release.get("assets", [])
+        arch_names = {"x86_64", "amd64"}
+        has_asset = any(
+            any(a in a_info.get("name", "").lower() for a in arch_names)
+            for a_info in assets
+        )
+        if not has_asset:
+            _print_warning(
+                f"    Latest CUA release ({tag}) has no Intel (x86_64) asset."
+            )
+            _print_info(
+                "    CUA Driver currently only ships Apple Silicon builds."
+            )
+            _print_info(
+                "    See: https://github.com/trycua/cua/issues/1493"
+            )
+            return False
+    except Exception:
+        # Network / API failure — proceed and let the installer handle it.
+        pass
+    return True
+
+
 def install_cua_driver(upgrade: bool = False) -> bool:
    """Install or refresh the cua-driver binary used by Computer Use.

@@ -579,7 +635,8 @@ def install_cua_driver(upgrade: bool = False) -> bool:
        _print_warning("    Computer Use (cua-driver) is macOS-only; skipping.")
        return False

-    binary = shutil.which("cua-driver")
+    driver_cmd = _cua_driver_cmd()
+    binary = shutil.which(driver_cmd)

    # Not installed → fresh install path (only when caller asked for it).
    if not binary and not upgrade:
@@ -587,18 +644,20 @@ def install_cua_driver(upgrade: bool = False) -> bool:
            _print_warning("    curl not found — install manually:")
            _print_info("      https://github.com/trycua/cua/blob/main/libs/cua-driver/README.md")
            return False
+        if not _check_cua_driver_asset_for_arch():
+            return False
        return _run_cua_driver_installer(label="Installing")

    # Already installed and caller didn't ask to upgrade → just confirm.
    if binary and not upgrade:
        try:
            version = subprocess.run(
-                ["cua-driver", "--version"],
+                [driver_cmd, "--version"],
                capture_output=True, text=True, timeout=5,
            ).stdout.strip()
-            _print_success(f"    cua-driver already installed: {version or 'unknown version'}")
+            _print_success(f"    {driver_cmd} already installed: {version or 'unknown version'}")
        except Exception:
-            _print_success("    cua-driver already installed.")
+            _print_success(f"    {driver_cmd} already installed.")
        _print_info("    Grant macOS permissions if not done yet:")
        _print_info("      System Settings > Privacy & Security > Accessibility")
        _print_info("      System Settings > Privacy & Security > Screen Recording")
@@ -609,11 +668,14 @@ def install_cua_driver(upgrade: bool = False) -> bool:
        _print_warning("    curl not found — cannot refresh cua-driver.")
        return bool(binary)

+    if not _check_cua_driver_asset_for_arch():
+        return bool(binary)
+
    if binary:
        # Show before/after version when we have a baseline. Best-effort.
        try:
            before = subprocess.run(
-                ["cua-driver", "--version"],
+                [driver_cmd, "--version"],
                capture_output=True, text=True, timeout=5,
            ).stdout.strip()
        except Exception:
@@ -625,13 +687,13 @@ def install_cua_driver(upgrade: bool = False) -> bool:
    if ok and before:
        try:
            after = subprocess.run(
-                ["cua-driver", "--version"],
+                [driver_cmd, "--version"],
                capture_output=True, text=True, timeout=5,
            ).stdout.strip()
            if after and after != before:
-                _print_success(f"    cua-driver upgraded: {before} → {after}")
+                _print_success(f"    {driver_cmd} upgraded: {before} → {after}")
            elif after:
-                _print_info(f"    cua-driver up to date: {after}")
+                _print_info(f"    {driver_cmd} up to date: {after}")
        except Exception:
            pass
    return ok
@@ -655,11 +717,12 @@ def _run_cua_driver_installer(label: str = "Installing", verbose: bool = True) -
        _print_info(f"    {label} cua-driver (macOS background computer-use)...")
    else:
        _print_info(f"    {label} cua-driver...")
+    driver_cmd = _cua_driver_cmd()
    try:
        result = subprocess.run(install_cmd, shell=True, timeout=300)
-        if result.returncode == 0 and shutil.which("cua-driver"):
+        if result.returncode == 0 and shutil.which(driver_cmd):
            if verbose:
-                _print_success("    cua-driver installed.")
+                _print_success(f"    {driver_cmd} installed.")
                _print_info("    IMPORTANT — grant macOS permissions now:")
                _print_info("      System Settings > Privacy & Security > Accessibility")
                _print_info("      System Settings > Privacy & Security > Screen Recording")
@@ -1506,12 +1569,9 @@ def _plugin_image_gen_providers() -> list[dict]:
    Each returned dict looks like a regular ``TOOL_CATEGORIES`` provider
    row but carries an ``image_gen_plugin_name`` marker so downstream
    code (config writing, model picker) knows to route through the
-    plugin registry instead of the in-tree FAL backend.
-
-    FAL is skipped — it's already exposed by the hardcoded
-    ``TOOL_CATEGORIES["image_gen"]`` entries. When FAL gets ported to
-    a plugin in a follow-up PR, the hardcoded entries go away and this
-    function surfaces it alongside OpenAI automatically.
+    plugin registry. Every image-gen backend is a plugin now — there
+    are no hardcoded rows left in ``TOOL_CATEGORIES["image_gen"]`` for
+    this function to dedupe against (see issue #26241).
    """
    try:
        from agent.image_gen_registry import list_providers
@@ -1524,9 +1584,6 @@ def _plugin_image_gen_providers() -> list[dict]:

    rows: list[dict] = []
    for provider in providers:
-        if getattr(provider, "name", None) == "fal":
-            # FAL has its own hardcoded rows today.
-            continue
        try:
            schema = provider.get_setup_schema()
        except Exception:
@@ -1751,7 +1808,7 @@ _POST_SETUP_INSTALLED: dict = {
    # entry when (a) the post_setup is the ONLY install side-effect for
    # a no-key provider, and (b) an installed-state check is cheap and
    # doesn't trigger a heavy import.
-    "cua_driver": lambda: bool(shutil.which("cua-driver")),
+    "cua_driver": lambda: bool(shutil.which(_cua_driver_cmd())),
 }


@@ -975,11 +975,13 @@ _AUX_TASK_SLOTS: Tuple[str, ...] = (
    "vision",
    "web_extract",
    "compression",
-    "session_search",
    "skills_hub",
    "approval",
    "mcp",
    "title_generation",
+    "triage_specifier",
+    "kanban_decomposer",
+    "profile_describer",
    "curator",
 )

@@ -0,0 +1,253 @@
+"""Detect xAI models retired on May 15, 2026.
+
+Source: https://docs.x.ai/developers/migration/may-15-retirement
+
+Pure logic: walks a Hermes config dict, returns issues for any reference
+to a retired xAI model. No I/O, no CLI dependencies — testable in isolation
+and reusable from both `hermes doctor` and a future `hermes migrate xai`.
+"""
+from __future__ import annotations
+
+from dataclasses import dataclass
+from typing import Any, Dict, List, Optional
+
+
+MIGRATION_GUIDE_URL = "https://docs.x.ai/developers/migration/may-15-retirement"
+RETIREMENT_DATE = "May 15, 2026"
+
+
+# Official mapping per xAI migration guide.
+# Some entries set ``reasoning_effort`` because non-reasoning variants don't
+# have a one-to-one replacement: ``grok-4.3`` reasons by default, so emulating
+# ``*-non-reasoning`` behavior on it requires ``reasoning_effort="none"``.
+_RETIRED_MODELS: Dict[str, Dict[str, Optional[str]]] = {
+    "grok-4-0709":                  {"replacement": "grok-4.3", "reasoning_effort": None,  "note": None},
+    "grok-4-fast-reasoning":        {"replacement": "grok-4.3", "reasoning_effort": None,  "note": None},
+    "grok-4-fast-non-reasoning":    {"replacement": "grok-4.3", "reasoning_effort": "none", "note": None},
+    "grok-4-1-fast-reasoning":      {"replacement": "grok-4.3", "reasoning_effort": None,  "note": None},
+    "grok-4-1-fast-non-reasoning":  {"replacement": "grok-4.3", "reasoning_effort": "none", "note": None},
+    "grok-code-fast-1":             {"replacement": "grok-4.3", "reasoning_effort": None,  "note": None},
+    "grok-3":                       {"replacement": "grok-4.3", "reasoning_effort": None,  "note": None},
+    "grok-imagine-image-pro":       {"replacement": "grok-imagine-image-quality", "reasoning_effort": None, "note": None},
+}
+
+
+@dataclass(frozen=True)
+class RetirementIssue:
+    """A reference to a retired xAI model found in a Hermes config."""
+
+    config_path: str            # e.g. "principal.model" or "auxiliary.vision.model"
+    current_model: str          # exact value found in config (preserves casing/prefix)
+    replacement: str            # recommended xAI replacement
+    reasoning_effort: Optional[str] = None  # set if non-reasoning variant migration
+    note: Optional[str] = None  # disambiguation note when applicable
+
+
+def _normalize(model_id: str) -> str:
+    """Strip provider prefix (``x-ai/grok-4`` → ``grok-4``) and lowercase."""
+    m = model_id.strip().lower()
+    for prefix in ("x-ai/", "xai/"):
+        if m.startswith(prefix):
+            m = m[len(prefix):]
+            break
+    return m
+
+
+def _looks_like_xai(model_id: Optional[str]) -> bool:
+    if not isinstance(model_id, str) or not model_id.strip():
+        return False
+    return _normalize(model_id).startswith("grok-")
+
+
+def find_retired_xai_refs(config: Dict[str, Any]) -> List[RetirementIssue]:
+    """Walk all model slots in a Hermes config and return retirement issues.
+
+    Slots scanned:
+      - ``principal.model``
+      - ``auxiliary.<any>.model`` (introspective — covers future aux slots)
+      - ``delegation.model``
+      - ``tts.xai.model``
+      - ``plugins.image_gen.xai.model``
+    """
+    issues: List[RetirementIssue] = []
+
+    def _check(path: str, model: Any) -> None:
+        if not _looks_like_xai(model):
+            return
+        norm = _normalize(model)
+        entry = _RETIRED_MODELS.get(norm)
+        if entry is None:
+            return
+        issues.append(RetirementIssue(
+            config_path=path,
+            current_model=model,
+            replacement=entry["replacement"],
+            reasoning_effort=entry.get("reasoning_effort"),
+            note=entry.get("note"),
+        ))
+
+    if not isinstance(config, dict):
+        return issues
+
+    principal = config.get("principal")
+    if isinstance(principal, dict):
+        _check("principal.model", principal.get("model"))
+
+    aux = config.get("auxiliary")
+    if isinstance(aux, dict):
+        for slot_name, slot_cfg in aux.items():
+            if isinstance(slot_cfg, dict):
+                _check(f"auxiliary.{slot_name}.model", slot_cfg.get("model"))
+
+    delegation = config.get("delegation")
+    if isinstance(delegation, dict):
+        _check("delegation.model", delegation.get("model"))
+
+    tts = config.get("tts")
+    if isinstance(tts, dict):
+        tts_xai = tts.get("xai")
+        if isinstance(tts_xai, dict):
+            _check("tts.xai.model", tts_xai.get("model"))
+
+    plugins = config.get("plugins")
+    if isinstance(plugins, dict):
+        image_gen = plugins.get("image_gen")
+        if isinstance(image_gen, dict):
+            ig_xai = image_gen.get("xai")
+            if isinstance(ig_xai, dict):
+                _check("plugins.image_gen.xai.model", ig_xai.get("model"))
+
+    return issues
+
+
+def format_issue(issue: RetirementIssue) -> str:
+    """One-line human-readable rendering of a retirement issue."""
+    parts = [
+        f"{issue.config_path}: {issue.current_model!r} → use {issue.replacement!r}"
+    ]
+    if issue.reasoning_effort:
+        parts.append(f'(set reasoning_effort: "{issue.reasoning_effort}")')
+    if issue.note:
+        parts.append(f"[note: {issue.note}]")
+    return " ".join(parts)
+
+
+# ---------------------------------------------------------------------------
+# Apply migration to config.yaml (round-trip preserves comments/order/types)
+# ---------------------------------------------------------------------------
+
+import datetime as _dt
+from pathlib import Path
+import shutil
+
+
+@dataclass(frozen=True)
+class ApplyResult:
+    """Outcome of an apply_migration call."""
+
+    file_path: Path
+    backup_path: Optional[Path]
+    issues_resolved: List[RetirementIssue]
+    config_changed: bool
+
+
+def _walk_to_parent(yaml_doc: Any, dotted_path: str) -> "tuple[Any, str]":
+    """Resolve a dotted slot path to (parent_mapping, leaf_key).
+
+    Example: "auxiliary.vision.model" -> (yaml_doc["auxiliary"]["vision"], "model").
+    Raises KeyError if any intermediate node is missing or not a mapping.
+    """
+    parts = dotted_path.split(".")
+    if len(parts) < 2:
+        raise ValueError(f"Path must have at least one parent: {dotted_path!r}")
+    node = yaml_doc
+    for segment in parts[:-1]:
+        if not isinstance(node, dict) or segment not in node:
+            raise KeyError(f"Path segment {segment!r} missing in {dotted_path!r}")
+        node = node[segment]
+    return node, parts[-1]
+
+
+def apply_migration(
+    config_path: Path,
+    issues: List[RetirementIssue],
+    backup: bool = True,
+) -> ApplyResult:
+    """Rewrite ``config_path`` in-place so each issue is resolved.
+
+    For every issue, the model name is replaced by ``issue.replacement``. If the
+    issue has ``reasoning_effort`` set (i.e. the migration is from a
+    ``*-non-reasoning`` variant), a sibling ``reasoning_effort`` key is added
+    or updated alongside the model.
+
+    Uses ``ruamel.yaml`` round-trip mode so comments, key order, indentation,
+    and type literals (booleans, ints) are preserved.
+
+    A backup copy is written to
+    ``<config_path>.bak-pre-migrate-xai-YYYYMMDD-HHMMSS`` before rewriting,
+    unless ``backup=False``.
+    """
+    from ruamel.yaml import YAML  # local import — avoid hard dep at module load
+
+    config_path = Path(config_path)
+    if not config_path.exists():
+        raise FileNotFoundError(config_path)
+
+    if not issues:
+        return ApplyResult(
+            file_path=config_path,
+            backup_path=None,
+            issues_resolved=[],
+            config_changed=False,
+        )
+
+    yaml = YAML(typ="rt")
+    yaml.preserve_quotes = True
+    with config_path.open("r", encoding="utf-8") as fh:
+        doc = yaml.load(fh)
+
+    if doc is None:
+        return ApplyResult(
+            file_path=config_path,
+            backup_path=None,
+            issues_resolved=[],
+            config_changed=False,
+        )
+
+    resolved: List[RetirementIssue] = []
+    for issue in issues:
+        try:
+            parent, leaf = _walk_to_parent(doc, issue.config_path)
+        except KeyError:
+            # Slot vanished between scan and apply — skip silently
+            continue
+        parent[leaf] = issue.replacement
+        if issue.reasoning_effort:
+            parent["reasoning_effort"] = issue.reasoning_effort
+        resolved.append(issue)
+
+    if not resolved:
+        return ApplyResult(
+            file_path=config_path,
+            backup_path=None,
+            issues_resolved=[],
+            config_changed=False,
+        )
+
+    backup_path: Optional[Path] = None
+    if backup:
+        ts = _dt.datetime.now().strftime("%Y%m%d-%H%M%S")
+        backup_path = config_path.with_name(
+            f"{config_path.name}.bak-pre-migrate-xai-{ts}"
+        )
+        shutil.copy2(config_path, backup_path)
+
+    with config_path.open("w", encoding="utf-8") as fh:
+        yaml.dump(doc, fh)
+
+    return ApplyResult(
+        file_path=config_path,
+        backup_path=backup_path,
+        issues_resolved=resolved,
+        config_changed=True,
+    )
@@ -235,6 +235,26 @@ def display_hermes_home() -> str:
        return str(home)


+def secure_parent_dir(path: Path) -> None:
+    """Chmod ``0o700`` on the parent directory of *path*, but only if safe.
+
+    Refuses to chmod ``/`` or any top-level directory (resolved parent with
+    fewer than 3 parts, i.e. ``/`` or any direct child like ``/usr``) to
+    prevent catastrophic host bricking when ``HERMES_HOME`` or other path
+    env vars resolve to an unexpected location.
+
+    See https://github.com/NousResearch/hermes-agent/issues/25821.
+    """
+    parent = path.parent.resolve()
+    # Refuse root and its direct children (/usr, /home, /var, /tmp, …).
+    if parent == Path("/") or len(parent.parts) < 3:
+        return
+    try:
+        os.chmod(parent, 0o700)
+    except OSError:
+        pass
+
+
 def get_subprocess_home() -> str | None:
    """Return a per-profile HOME directory for subprocesses, or None.

@@ -33,7 +33,7 @@ T = TypeVar("T")

 DEFAULT_DB_PATH = get_hermes_home() / "state.db"

-SCHEMA_VERSION = 11
+SCHEMA_VERSION = 12

 # ---------------------------------------------------------------------------
 # WAL-compatibility fallback
@@ -236,7 +236,8 @@ CREATE TABLE IF NOT EXISTS messages (
    reasoning_content TEXT,
    reasoning_details TEXT,
    codex_reasoning_items TEXT,
-    codex_message_items TEXT
+    codex_message_items TEXT,
+    platform_message_id TEXT
 );

 CREATE TABLE IF NOT EXISTS state_meta (
@@ -571,6 +572,19 @@ class SessionDB:
        # column gets created here.
        self._reconcile_columns(cursor)

+        # Indexes that reference reconciler-added columns must be created
+        # AFTER _reconcile_columns runs — declaring them in SCHEMA_SQL
+        # makes the initial executescript fail on legacy DBs (the index's
+        # WHERE clause references a column that doesn't exist yet).
+        try:
+            cursor.execute(
+                "CREATE INDEX IF NOT EXISTS idx_messages_platform_msg_id "
+                "ON messages(session_id, platform_message_id) "
+                "WHERE platform_message_id IS NOT NULL"
+            )
+        except sqlite3.OperationalError as exc:
+            logger.debug("idx_messages_platform_msg_id create skipped: %s", exc)
+
        # ── Schema version bookkeeping ─────────────────────────────────
        # Bump to current so future data migrations (if any) can gate on
        # version.  No version-gated column additions remain.
@@ -1445,12 +1459,19 @@ class SessionDB:
        reasoning_details: Any = None,
        codex_reasoning_items: Any = None,
        codex_message_items: Any = None,
+        platform_message_id: str = None,
    ) -> int:
        """
        Append a message to a session. Returns the message row ID.

        Also increments the session's message_count (and tool_call_count
        if role is 'tool' or tool_calls is present).
+
+        ``platform_message_id`` is the external messaging platform's own
+        message ID (e.g. Telegram update_id, Yuanbao msg_id).  It is
+        independent of the SQLite autoincrement primary key and is used by
+        platform-specific flows like yuanbao's recall guard to redact a
+        message by its platform-side identifier.
        """
        # Serialize structured fields to JSON before entering the write txn
        reasoning_details_json = (
@@ -1480,8 +1501,8 @@ class SessionDB:
                """INSERT INTO messages (session_id, role, content, tool_call_id,
                   tool_calls, tool_name, timestamp, token_count, finish_reason,
                   reasoning, reasoning_content, reasoning_details, codex_reasoning_items,
-                   codex_message_items)
-                   VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)""",
+                   codex_message_items, platform_message_id)
+                   VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)""",
                (
                    session_id,
                    role,
@@ -1497,6 +1518,7 @@ class SessionDB:
                    reasoning_details_json,
                    codex_items_json,
                    codex_message_items_json,
+                    platform_message_id,
                ),
            )
            msg_id = cursor.lastrowid
@@ -1558,13 +1580,18 @@ class SessionDB:
                    json.dumps(codex_message_items) if codex_message_items else None
                )
                tool_calls_json = json.dumps(tool_calls) if tool_calls else None
+                # Accept either `platform_message_id` (new explicit name) or
+                # `message_id` (yuanbao's existing convention on message dicts).
+                platform_msg_id = (
+                    msg.get("platform_message_id") or msg.get("message_id")
+                )

                conn.execute(
                    """INSERT INTO messages (session_id, role, content, tool_call_id,
                       tool_calls, tool_name, timestamp, token_count, finish_reason,
                       reasoning, reasoning_content, reasoning_details, codex_reasoning_items,
-                       codex_message_items)
-                       VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)""",
+                       codex_message_items, platform_message_id)
+                       VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)""",
                    (
                        session_id,
                        role,
@@ -1580,6 +1607,7 @@ class SessionDB:
                        reasoning_details_json,
                        codex_items_json,
                        codex_message_items_json,
+                        platform_msg_id,
                    ),
                )
                total_messages += 1
@@ -1897,7 +1925,7 @@ class SessionDB:
            rows = self._conn.execute(
                "SELECT role, content, tool_call_id, tool_calls, tool_name, "
                "finish_reason, reasoning, reasoning_content, reasoning_details, "
-                "codex_reasoning_items, codex_message_items "
+                "codex_reasoning_items, codex_message_items, platform_message_id "
                f"FROM messages WHERE session_id IN ({placeholders}) ORDER BY id",
                tuple(session_ids),
            ).fetchall()
@@ -1918,6 +1946,13 @@ class SessionDB:
                except (json.JSONDecodeError, TypeError):
                    logger.warning("Failed to deserialize tool_calls in conversation replay, falling back to []")
                    msg["tool_calls"] = []
+            # Surface the platform-side message id (e.g. yuanbao msg_id,
+            # telegram update_id) so platform-specific flows like recall
+            # can match by external identifier instead of having to fall
+            # back to content-match heuristics.  Exposed as ``message_id``
+            # for backward compatibility with the JSONL transcript shape.
+            if row["platform_message_id"]:
+                msg["message_id"] = row["platform_message_id"]
            # Restore reasoning fields on assistant messages so providers
            # that replay reasoning (OpenRouter, OpenAI, Nous) receive
            # coherent multi-turn reasoning context.
@@ -0,0 +1,121 @@
+Create a professional infographic following these specifications:
+
+## Image Specifications
+
+- **Type**: Infographic
+- **Layout**: bento-grid
+- **Style**: retro-pop-grid
+- **Aspect Ratio**: 1:1 (square)
+- **Language**: en
+
+## Core Principles
+
+- Follow the layout structure precisely for information architecture
+- Apply style aesthetics consistently throughout
+- Keep information concise, highlight keywords and core concepts
+- Use ample whitespace for visual clarity
+- Maintain clear visual hierarchy
+
+## Text Requirements
+
+- All text must match the specified style treatment
+- Main titles should be prominent and readable
+- Key concepts should be visually emphasized
+- Labels should be clear and appropriately sized
+- Use English for all text content
+
+## Layout Guidelines (bento-grid)
+
+- Grid of rectangular cells with varied sizes (1x1, 2x1, 1x2, 2x2)
+- Hero cell ("ONE TOKEN, EVERY KEY") takes the largest position (top-center or upper-left, 2x2)
+- Supporting cells around the hero, mixed cell sizes for rhythm
+- Each cell self-contained with its own title + icon + brief content
+- Title strip at the top: "BITWARDEN SECRETS MANAGER — HERMES-AGENT PR #30035"
+- Footer strip at the bottom with commit SHA + repo
+
+## Style Guidelines (retro-pop-grid)
+
+- 1970s retro pop art with strict Swiss international grid
+- Background: warm vintage cream/beige (#F5F0E6)
+- Accents: salmon pink, sky blue, mustard yellow, mint green — all muted retro tones
+- Pure solid black (#000000) and solid white (#FFFFFF) for extreme-contrast cells
+- Uniform thick black outlines on ALL illustrations, text boxes, grid dividers
+- Pure 2D flat vector aesthetic with subtle screen-print texture
+- One cell inverted to black-background-with-white-text for the "NEVER BLOCKS STARTUP" warning section
+- Geometric fill patterns in empty cells: checkerboards, diagonal lines, dot grids
+- Flat abstract symbols: shields (security), wrenches (install), arrows (rotation), keyholes (auth), checkmarks (tests)
+- Vintage comic-style smiley face for "26/26 PASSING" cell
+- Bold brutalist or thick retro display fonts for headers; clean sans-serif body
+- Decorative stylistic labels acceptable: "WARNING", "NEW DEFAULT", "PINNED", "VERIFIED", "ROTATE"
+
+## Avoid
+
+- 3D rendering, gradients, soft shadows, sketch-like lines
+- Free-floating elements — everything anchored in grid cells
+- Pure white background — must use warm cream/beige
+
+---
+
+Generate the infographic based on the content below:
+
+### Title (top strip)
+BITWARDEN SECRETS MANAGER → HERMES-AGENT
+PR #30035
+
+### HERO CELL (largest, top-center, salmon pink background with thick black border)
+ONE TOKEN, EVERY KEY
+Rotate once in the Bitwarden web app.
+Every Hermes process picks it up on next start.
+NEW DEFAULT: override_existing = true
+
+### Cell — LAZY INSTALL (sky blue background)
+~/.hermes/bin/bws
+bws v2.0.0 PINNED
+SHA-256 VERIFIED
+No apt · no brew · no sudo
+Icon: wrench + downward arrow
+
+### Cell — CLI SURFACE (mustard yellow background, checkerboard accents)
+$ hermes secrets bitwarden
+  setup    wizard
+  status   diagnose
+  sync     fetch
+  install  binary
+  disable  off
+Icon: terminal prompt symbol
+
+### Cell — SOURCE OF TRUTH (mint green background)
+BITWARDEN WINS
+Overwrites stale .env on every start
+Bootstrap token never overwritten (exception)
+Icon: keyhole + arrow
+
+### Cell — INVERTED BLACK CELL with WHITE TEXT — NEVER BLOCKS STARTUP (extreme contrast)
+WARNING-FREE STARTUP
+Missing binary → warn + continue
+Bad token → warn + continue
+Network down → warn + continue
+Checksum mismatch → refuse + warn
+30s timeout ceiling
+Icon: white triangle warning sign
+
+### Cell — TESTS (cream with thick black outline, vintage comic smiley face)
+26 / 26
+HERMETIC
+subprocess + urllib mocked
+linux · macos · windows
+x86_64 · arm64
+Icon: comic-style smiley face with checkmark
+
+### Cell — CONFIG YAML (white background with black grid)
+secrets:
+  bitwarden:
+    enabled: true
+    project_id: ...
+    override_existing: true
+    cache_ttl_seconds: 300
+    auto_install: true
+
+### Footer strip (bottom, black-on-cream)
+PR #30035 · commit 7f9b05668 · NousResearch/hermes-agent
+10 files · +1743 / -1 · agent/secret_sources/ · hermes_cli/secrets_cli.py
@@ -0,0 +1,57 @@
+# Hermes-Agent PR #30035 — Bitwarden Secrets Manager Integration
+
+## Hero
+**ONE TOKEN, EVERY KEY**
+Rotate once. Every Hermes process picks it up on next start.
+`secrets.bitwarden.override_existing: true` (default)
+
+## Cells
+
+### Lazy Install
+- `bws v2.0.0` pinned
+- Downloaded into `~/.hermes/bin/bws`
+- SHA-256 verified vs GitHub Releases checksum file
+- No apt, no brew, no sudo
+- Cross-platform: linux gnu+musl, macos universal, windows x86_64+arm64
+
+### CLI Surface
+- `hermes secrets bitwarden setup`     wizard
+- `hermes secrets bitwarden status`    diagnose
+- `hermes secrets bitwarden sync`      dry-run / --apply
+- `hermes secrets bitwarden install`   binary only
+- `hermes secrets bitwarden disable`   off switch
+
+### Source of Truth
+- Bitwarden WINS on every Hermes start
+- BSM values overwrite stale `.env` lines
+- Rotate a key once → all your machines reload it
+- Bootstrap token `BWS_ACCESS_TOKEN` is the lone exception (never overwritten)
+
+### Never Blocks Startup
+- Missing binary → warn + continue
+- Bad token → warn + continue
+- Checksum mismatch → refuse install + warn
+- No network → warn + continue
+- Timeout → 30s ceiling, warn + continue
+
+### Tests
+- 26/26 passing, hermetic
+- subprocess + urllib mocked
+- Platform matrix tested (linux, macos, windows × x86_64, arm64)
+- Cache hit/miss, auth fail, non-JSON, timeout, override behavior
+
+### Config
+```yaml
+secrets:
+  bitwarden:
+    enabled: true
+    project_id: <uuid>
+    override_existing: true   # NEW DEFAULT
+    cache_ttl_seconds: 300
+    auto_install: true
+```
+
+## Footer
+PR #30035 · commit 7f9b05668 · NousResearch/hermes-agent
+
+10 files changed · +1743 / -1 · agent/secret_sources/ · hermes_cli/secrets_cli.py · tests · docs
@@ -0,0 +1,85 @@
+Create a professional infographic following these specifications:
+
+## Image Specifications
+
+- **Type**: Infographic
+- **Layout**: bento-grid
+- **Style**: technical-schematic (engineering blueprint variant)
+- **Aspect Ratio**: 1:1 (square)
+- **Language**: English
+
+## Core Principles
+
+- Follow the bento-grid layout precisely with varied cell sizes
+- Apply technical-schematic aesthetics consistently throughout
+- Keep information concise, highlight keywords and core concepts
+- Use ample whitespace for visual clarity
+- Maintain clear visual hierarchy with a hero cell for the headline metric
+
+## Style Guidelines (technical-schematic blueprint)
+
+- Color palette: deep blue background (#1E3A5F), white lines and text, amber accent (#F59E0B) ONLY on the hero metric and critical deltas, cyan callouts for measurement annotations
+- Grid pattern overlay across the entire canvas — fine white grid lines on the deep blue background
+- All-caps technical stencil typography for headers; clean sans-serif for body
+- Dimension lines with arrowheads connecting metrics to their cells
+- Technical symbols where appropriate (gear icons, flow arrows, modular block diagrams)
+- Consistent stroke weights — bold for cell borders, thin for grid, medium for connector lines
+- Engineering spec-sheet aesthetic: feels like a printed architectural blueprint, austere and precise
+
+## Layout Guidelines (bento-grid)
+
+- Hero cell (TOP-CENTER or LEFT, occupying ~40% of canvas): "−61 COMPLEXITY · 79 → 18" headline metric in massive amber-on-blue, with subtitle "convert_messages_to_anthropic refactored"
+- 7 helper cells in a 2x4 or 3x3 grid showing each extracted helper as its own modular block — each cell has the helper name in all-caps, its complexity number, and one-line role
+- Metrics strip cell: BEFORE/AFTER table with deltas (185 statements → ~70, 79 C → 18 C, +5 violations intentional)
+- Test validation cell: "152/152 + 213/213 PASS" with checkmark stencil
+- Footer strip across bottom: "PR #27784 · agent/anthropic_adapter.py · @kshitijk4poor · NousResearch/hermes-agent"
+
+## Content to render
+
+**Main title (top of canvas, all caps):** "ANTHROPIC ADAPTER · 1-INTO-7 EXTRACTION"
+**Subtitle:** "PR #27784 — convert_messages_to_anthropic refactor"
+
+**Hero cell (largest, amber accent):**
+- "−61"
+- "CYCLOMATIC COMPLEXITY"
+- "79 → 18 MAX (−77%)"
+- Subtext: "convert_messages_to_anthropic · pure code motion · zero behavior change"
+
+**7 helper cells (one per helper, each its own modular block):**
+
+1. _convert_assistant_message · C<10 · "Assistant msg → content blocks"
+2. _convert_tool_message_to_result · C=12 · "Tool msg → tool_result + merge"
+3. _convert_user_message · C<10 · "User msg validation"
+4. _strip_orphaned_tool_blocks · C=15 · "Orphan tool_use removal"
+5. _merge_consecutive_roles · C=13 · "Anthropic role-alternation"
+6. _manage_thinking_signatures · C=18 · "Strip/preserve by endpoint"
+7. _evict_old_screenshots · C<10 · "Keep most recent 3 images"
+
+**Metrics cell (table format with arrows):**
+- MAX FUNCTION COMPLEXITY: 79 → 18 (−77%)
+- MAX STATEMENTS/FUNCTION: 185 → ~70 (−62%)
+- LOC FILE-WIDE: −4
+- MAIN FUNCTION LOC: 395 → 63
+
+**Test validation cell (checkmark stencil):**
+- test_anthropic_adapter.py: 152/152 PASS
+- test_auxiliary_client.py: 172/172 PASS
+- test_azure_identity_adapter.py: 39/39 PASS
+- test_bedrock_1m_context.py: 2/2 PASS
+
+**Behavior preservation cell:**
+"ZERO LOGIC CHANGES · ANTHROPIC + KIMI + DEEPSEEK + MINIMAX + AZURE FOUNDRY + BEDROCK SEMANTICS PRESERVED"
+
+**Footer strip:**
+"PR #27784 · agent/anthropic_adapter.py · cherry-picked from #23968 · @kshitijk4poor · NousResearch/hermes-agent"
+
+## Text Requirements
+
+- All text in English, all-caps for headers
+- Hero metric "−61" in amber (#F59E0B), oversized, with thick blueprint stencil treatment
+- Helper names in white technical stencil
+- Complexity numbers (C=12, C=18, etc.) in cyan callouts
+- "BEFORE" labels in white-on-blue, "AFTER" labels in amber-on-blue
+- Footer in small white stencil
+
+Generate the infographic now as a square engineering blueprint.
@@ -0,0 +1,66 @@
+# Infographic: PR #27784 — convert_messages_to_anthropic refactor
+
+## Hero metric
+**−61 cyclomatic complexity** in `agent/anthropic_adapter.py` (79 → 18 max).
+**−4 LOC** net file-wide. **77% drop** in single-function complexity ceiling.
+
+## Title
+ANTHROPIC ADAPTER · 1-INTO-7 EXTRACTION
+PR #27784 · agent/anthropic_adapter.py · @kshitijk4poor
+
+## Section 1: BEFORE (left side)
+**convert_messages_to_anthropic**
+- 185 statements
+- 90 branches
+- Cyclomatic: 79
+- Did 7 jobs in one function
+
+Inline responsibilities mixed together:
+1. Walk + dispatch by role
+2. Tool-result conversion
+3. Orphan tool-use stripping
+4. Same-role merging
+5. Thinking-signature management
+6. Screenshot eviction
+7. Final assembly
+
+## Section 2: AFTER (right side)
+**convert_messages_to_anthropic** — now 63 lines, C<10
+Plus 7 single-responsibility helpers:
+
+| Helper | C | Role |
+|---|---|---|
+| _convert_assistant_message | <10 | Assistant msg → content blocks |
+| _convert_tool_message_to_result | 12 | Tool msg → tool_result + merge |
+| _convert_user_message | <10 | User msg validation + conversion |
+| _strip_orphaned_tool_blocks | 15 | Strip orphan tool_use + tool_result |
+| _merge_consecutive_roles | 13 | Anthropic role-alternation enforce |
+| _manage_thinking_signatures | 18 | Strip/preserve/downgrade by endpoint |
+| _evict_old_screenshots | <10 | Keep most recent 3 images |
+
+## Section 3: METRICS
+| Metric | Before | After | Δ |
+|---|---:|---:|---:|
+| Max function complexity | 79 | 18 | −77% |
+| Max statements/function | 185 | ~70 | −62% |
+| LOC (file-wide) | — | — | **−4** |
+| C901 violations | 3 | 8 | +5 (intentional split) |
+
+## Section 4: ZERO BEHAVIOR CHANGE
+- Pure code motion — no logic edits
+- Mutating helpers update `result` in place (same as inline)
+- `_merge_consecutive_roles` returns new list — caller rebinds
+- Anthropic / Kimi / DeepSeek / MiniMax / Azure Foundry / Bedrock semantics preserved
+- Thinking-signature handling identical to pre-refactor
+
+## Section 5: TEST VALIDATION
+- tests/agent/test_anthropic_adapter.py — **152 / 152 pass**
+- tests/agent/test_auxiliary_client.py — **172 / 172 pass**
+- tests/agent/test_azure_identity_adapter.py — **39 / 39 pass**
+- tests/agent/test_bedrock_1m_context.py — **2 / 2 pass**
+
+## Footer
+File: agent/anthropic_adapter.py
+Original PR: #27784 (cherry-pick of #23968)
+Salvage commit: 9c102b937 (kshitijk4poor authorship preserved)
+Repo: NousResearch/hermes-agent
@@ -16,6 +16,11 @@
  openssh,
  ffmpeg,
  tirith,
+
+  # linux-only deps
+  wl-clipboard,
+  xclip,
+
  # Flake inputs — passed explicitly by packages.nix and overlays.nix
  uv2nix,
  pyproject-nix,
@@ -68,6 +73,10 @@ let
    openssh
    ffmpeg
    tirith
+  ]
+  ++ lib.optionals stdenv.isLinux [
+    wl-clipboard
+    xclip
  ];

  runtimePath = lib.makeBinPath runtimeDeps;
@@ -4,7 +4,7 @@ let
  src = ../ui-tui;
  npmDeps = pkgs.fetchNpmDeps {
    inherit src;
-    hash = "sha256-dNL/J4tyQQ7Ji3xfIE5b5Jdi6rQyCFjqYpzLYftJVdc=";
+    hash = "sha256-F6/MzZOWc0zhW9mIfnaY+PrllPvJcsA/OdFdEM+NpLY=";
  };

  npm = hermesNpmLib.mkNpmPassthru { folder = "ui-tui"; attr = "tui"; pname = "hermes-tui"; };
@@ -4,7 +4,7 @@ let
  src = ../web;
  npmDeps = pkgs.fetchNpmDeps {
    inherit src;
-    hash = "sha256-FL2E8Vv8gyeClEa5b/pHn/ekWoHWTd4YwzV6zhLEos4=";
+    hash = "sha256-xSsyluzU2lNhwGqB6XMCGMv3QFHZizE6hgUyc1jvyOw=";
  };

  npm = hermesNpmLib.mkNpmPassthru { folder = "web"; attr = "web"; pname = "hermes-web"; };
@@ -148,7 +148,7 @@ class BrowserUseBrowserProvider(BrowserProvider):

        return {
            "api_key": managed.nous_user_token,
-            "base_url": managed.gateway_origin.rstrip("/"),
+            "base_url": managed.resolved_origin.rstrip("/"),
            "managed_mode": True,
        }

@@ -0,0 +1,182 @@
+"""FAL.ai image generation backend.
+
+Wraps the 18-model FAL catalog (FLUX 2, Z-Image, Nano Banana, GPT
+Image 1.5, Recraft, Imagen 4, Qwen, Ideogram, …) as an
+:class:`ImageGenProvider` implementation.
+
+The heavy lifting — model catalog, payload construction, request
+submission, managed-Nous-gateway selection, Clarity Upscaler chaining
+— lives in :mod:`tools.image_generation_tool`. This plugin reaches into
+that module via call-time indirection (``import tools.image_generation_tool as _it``)
+so:
+
+* the existing test suite (``tests/tools/test_image_generation.py``,
+  ``tests/tools/test_managed_media_gateways.py``) keeps patching
+  ``image_tool._submit_fal_request`` / ``image_tool.fal_client`` /
+  ``image_tool._managed_fal_client`` without modification, and
+* there's exactly one canonical FAL code path on disk — the plugin is a
+  registration adapter, not a parallel implementation.
+
+See issue #26241 for the migration plan and the
+``plugin-extraction-test-patch-compatibility.md`` rules this follows.
+"""
+
+from __future__ import annotations
+
+import json
+import logging
+import os
+from typing import Any, Dict, List, Optional
+
+from agent.image_gen_provider import (
+    DEFAULT_ASPECT_RATIO,
+    ImageGenProvider,
+    resolve_aspect_ratio,
+)
+
+logger = logging.getLogger(__name__)
+
+
+class FalImageGenProvider(ImageGenProvider):
+    """FAL.ai image generation backend.
+
+    Delegates to ``tools.image_generation_tool.image_generate_tool`` so
+    the in-tree FAL implementation (model catalog, payload builder,
+    managed-gateway selection, Clarity Upscaler chaining) is the single
+    source of truth. Everything is resolved at call time via the
+    ``_it`` indirection so tests can monkey-patch the legacy module.
+    """
+
+    @property
+    def name(self) -> str:
+        return "fal"
+
+    @property
+    def display_name(self) -> str:
+        return "FAL.ai"
+
+    def is_available(self) -> bool:
+        # Available when direct FAL_KEY is set OR the managed Nous
+        # gateway resolves a fal-queue origin. Both checks come from the
+        # legacy module so this provider tracks whatever logic ships
+        # there.
+        import tools.image_generation_tool as _it
+        try:
+            return bool(_it.check_fal_api_key())
+        except Exception:  # noqa: BLE001 — defensive; never break the picker
+            return False
+
+    def list_models(self) -> List[Dict[str, Any]]:
+        import tools.image_generation_tool as _it
+        return [
+            {
+                "id": model_id,
+                "display": meta.get("display", model_id),
+                "speed": meta.get("speed", ""),
+                "strengths": meta.get("strengths", ""),
+                "price": meta.get("price", ""),
+            }
+            for model_id, meta in _it.FAL_MODELS.items()
+        ]
+
+    def default_model(self) -> Optional[str]:
+        import tools.image_generation_tool as _it
+        return _it.DEFAULT_MODEL
+
+    def get_setup_schema(self) -> Dict[str, Any]:
+        return {
+            "name": "FAL.ai",
+            "badge": "paid",
+            "tag": "Pick from flux-2-klein, flux-2-pro, gpt-image, nano-banana, etc.",
+            "env_vars": [
+                {
+                    "key": "FAL_KEY",
+                    "prompt": "FAL API key",
+                    "url": "https://fal.ai/dashboard/keys",
+                },
+            ],
+        }
+
+    def generate(
+        self,
+        prompt: str,
+        aspect_ratio: str = DEFAULT_ASPECT_RATIO,
+        **kwargs: Any,
+    ) -> Dict[str, Any]:
+        """Generate an image via the legacy FAL pipeline.
+
+        Forwards prompt + aspect_ratio (and any forward-compat extras
+        the schema supports) into :func:`tools.image_generation_tool.image_generate_tool`,
+        then reshapes its JSON-string response into the provider-ABC
+        dict format consumed by ``_dispatch_to_plugin_provider``.
+        """
+        import tools.image_generation_tool as _it
+
+        aspect = resolve_aspect_ratio(aspect_ratio)
+        passthrough = {
+            key: kwargs[key]
+            for key in (
+                "num_inference_steps",
+                "guidance_scale",
+                "num_images",
+                "output_format",
+                "seed",
+            )
+            if key in kwargs and kwargs[key] is not None
+        }
+
+        try:
+            raw = _it.image_generate_tool(
+                prompt=prompt,
+                aspect_ratio=aspect,
+                **passthrough,
+            )
+        except Exception as exc:  # noqa: BLE001 — never raise out of generate
+            logger.warning("FAL image_generate_tool raised: %s", exc, exc_info=True)
+            return {
+                "success": False,
+                "image": None,
+                "error": f"FAL image generation failed: {exc}",
+                "error_type": type(exc).__name__,
+                "provider": "fal",
+                "prompt": prompt,
+                "aspect_ratio": aspect,
+            }
+
+        try:
+            response = json.loads(raw) if isinstance(raw, str) else raw
+        except Exception:  # noqa: BLE001
+            response = {"success": False, "image": None, "error": "Invalid JSON from FAL pipeline"}
+
+        if not isinstance(response, dict):
+            response = {
+                "success": False,
+                "image": None,
+                "error": "FAL pipeline returned a non-dict response",
+                "error_type": "provider_contract",
+            }
+
+        # Stamp provider/prompt/aspect_ratio so downstream consumers see
+        # the uniform shape declared in ``agent.image_gen_provider``.
+        response.setdefault("provider", "fal")
+        response.setdefault("prompt", prompt)
+        response.setdefault("aspect_ratio", aspect)
+        # Annotate model best-effort — the legacy pipeline resolves it
+        # internally, so query it after the fact for the response shape.
+        if "model" not in response:
+            try:
+                model_id, _meta = _it._resolve_fal_model()
+                response["model"] = model_id
+            except Exception:  # noqa: BLE001
+                pass
+        return response
+
+
+# ---------------------------------------------------------------------------
+# Plugin entry point
+# ---------------------------------------------------------------------------
+
+
+def register(ctx) -> None:
+    """Plugin entry point — wire ``FalImageGenProvider`` into the registry."""
+    ctx.register_image_gen_provider(FalImageGenProvider())
@@ -0,0 +1,7 @@
+name: fal
+version: 1.0.0
+description: "FAL.ai image generation backend (flux-2-klein, flux-2-pro, nano-banana, gpt-image-1.5, recraft-v3, etc.)."
+author: NousResearch
+kind: backend
+requires_env:
+  - FAL_KEY
@@ -24,6 +24,23 @@
  const { useState, useEffect, useCallback, useMemo, useRef } = SDK.hooks;
  const { cn, timeAgo } = SDK.utils;

+  // Newer host dashboards expose a DS-styled Checkbox on the plugin SDK.
+  // Fall back to a native <input type="checkbox"> shim so older hosts that
+  // predate the design-system rollout still render. The shim normalises
+  // Radix's onCheckedChange(checked) signature to native onChange(event).
+  const Checkbox = SDK.components.Checkbox || function (props) {
+    const { checked, onCheckedChange, className, onClick, ...rest } = props;
+    return h("input", Object.assign({
+      type: "checkbox",
+      checked: !!checked,
+      className: className,
+      onClick: onClick,
+      onChange: function (e) {
+        if (onCheckedChange) onCheckedChange(e.target.checked);
+      },
+    }, rest));
+  };
+
  // useI18n is a hook each component calls locally. Older host dashboards
  // may not expose it yet; fall back to a shim so the bundle still renders
  // English against an older host SDK. English fallback strings live
@@ -1648,11 +1665,10 @@
            h(Label, { className: "text-xs text-muted-foreground" },
              "Orchestration mode"),
            h("label", { className: "flex items-center gap-2 text-xs h-8" },
-              h("input", {
-                type: "checkbox",
+              h(Checkbox, {
                checked: !!settings.auto_decompose,
-                onChange: function (e) {
-                  saveSettings({ auto_decompose: !!e.target.checked });
+                onCheckedChange: function (checked) {
+                  saveSettings({ auto_decompose: checked === true });
                },
              }),
              "Auto-decompose triage tasks",
@@ -1908,10 +1924,9 @@
            }),
          ),
          h("label", { className: "flex items-center gap-2 text-xs" },
-            h("input", {
-              type: "checkbox",
+            h(Checkbox, {
              checked: switchTo,
-              onChange: function (e) { setSwitchTo(e.target.checked); },
+              onCheckedChange: function (checked) { setSwitchTo(checked === true); },
            }),
            tx(t, "switchAfterCreate", "Switch to this board after creating it"),
          ),
@@ -1981,19 +1996,17 @@
      ),
      h("label", { className: "flex items-center gap-2 text-xs",
                   title: "Include archived tasks in the board view. Archived tasks are hidden by default." },
-        h("input", {
-          type: "checkbox",
+        h(Checkbox, {
          checked: props.includeArchived,
-          onChange: function (e) { props.setIncludeArchived(e.target.checked); },
+          onCheckedChange: function (checked) { props.setIncludeArchived(checked === true); },
        }),
        tx(t, "showArchived", "Show archived"),
      ),
      h("label", { className: "flex items-center gap-2 text-xs",
                   title: "Group the Running column by assigned profile" },
-        h("input", {
-          type: "checkbox",
+        h(Checkbox, {
          checked: props.laneByProfile,
-          onChange: function (e) { props.setLaneByProfile(e.target.checked); },
+          onCheckedChange: function (checked) { props.setLaneByProfile(checked === true); },
        }),
        tx(t, "lanesByProfile", "Lanes by profile"),
      ),
@@ -2122,10 +2135,9 @@
        }, tx(t, "apply", "Apply")),
      ),
      h("label", { className: "hermes-kanban-bulk-reclaim-first", title: "Reclaim any active claims before reassigning" },
-        h("input", {
-          type: "checkbox",
+        h(Checkbox, {
          checked: reclaimFirst,
-          onChange: function (e) { setReclaimFirst(e.target.checked); },
+          onCheckedChange: function (checked) { setReclaimFirst(checked === true); },
        }),
        "Reclaim first",
      ),
@@ -2313,14 +2325,12 @@
    },
      h("div", { className: "hermes-kanban-column-header",
                 title: colHelp || "" },
-        h("input", {
-          type: "checkbox",
+        h(Checkbox, {
          className: "hermes-kanban-col-check",
          title: "Select all tasks in this column",
          "aria-label": `Select all tasks in ${colLabel || props.column.name}`,
          checked: props.column.tasks.length > 0 && props.column.tasks.every(function (t) { return props.selectedIds.has(t.id); }),
-          onChange: function (e) {
-            e.stopPropagation();
+          onCheckedChange: function () {
            if (props.selectAllInColumn) props.selectAllInColumn(props.column.name);
          },
          onClick: function (e) { e.stopPropagation(); },
@@ -2461,8 +2471,7 @@
        if (props.toggleSelected) props.toggleSelected(t.id, false);
      }
    };
-    const handleCheckbox = function (e) {
-      e.stopPropagation();
+    const handleCheckedChange = function () {
      props.toggleSelected(t.id, true);
    };

@@ -2495,11 +2504,10 @@
              title: tx(i18n, "selectForBulk", "Select for bulk actions"),
              onClick: function (e) { e.stopPropagation(); },
            },
-              h("input", {
-                type: "checkbox",
+              h(Checkbox, {
                className: "hermes-kanban-card-check",
                checked: props.selected,
-                onChange: handleCheckbox,
+                onCheckedChange: handleCheckedChange,
                onClick: function (e) { e.stopPropagation(); },
                "aria-label": `Select task ${t.id}`,
              }),
@@ -47,6 +47,25 @@ _DEFAULT_ENDPOINT = "http://127.0.0.1:1933"
 _TIMEOUT = 30.0
 _REMOTE_RESOURCE_PREFIXES = ("http://", "https://", "git@", "ssh://", "git://")

+# Maps the viking_remember `category` enum to a viking:// subdirectory.
+# Keep in sync with REMEMBER_SCHEMA.parameters.properties.category.enum.
+_CATEGORY_SUBDIR_MAP = {
+    "preference": "preferences",
+    "entity": "entities",
+    "event": "events",
+    "case": "cases",
+    "pattern": "patterns",
+}
+_DEFAULT_MEMORY_SUBDIR = "preferences"
+
+# Maps the built-in memory tool's `target` ("user" vs "memory") to a subdir
+# for on_memory_write mirroring. User profile facts → preferences; agent
+# notes / observations → patterns. Anything unknown falls back to the default.
+_MEMORY_WRITE_TARGET_SUBDIR_MAP = {
+    "user": "preferences",
+    "memory": "patterns",
+}
+

 # ---------------------------------------------------------------------------
 # Process-level atexit safety net — ensures pending sessions are committed
@@ -607,24 +626,35 @@ class OpenVikingMemoryProvider(MemoryProvider):
        except Exception as e:
            logger.warning("OpenViking session commit failed: %s", e)

-    def on_memory_write(self, action: str, target: str, content: str) -> None:
-        """Mirror built-in memory writes to OpenViking as explicit memories."""
+    def _build_memory_uri(self, subdir: str) -> str:
+        """Build a viking:// memory URI under the configured user/subdir."""
+        slug = uuid.uuid4().hex[:12]
+        return f"viking://user/{self._user}/memories/{subdir}/mem_{slug}.md"
+
+    def on_memory_write(
+        self,
+        action: str,
+        target: str,
+        content: str,
+        metadata: Optional[Dict[str, Any]] = None,
+    ) -> None:
+        """Mirror built-in memory writes to OpenViking via content/write."""
        if not self._client or action != "add" or not content:
            return

+        subdir = _MEMORY_WRITE_TARGET_SUBDIR_MAP.get(target, _DEFAULT_MEMORY_SUBDIR)
+        uri = self._build_memory_uri(subdir)
+
        def _write():
            try:
                client = _VikingClient(
                    self._endpoint, self._api_key,
                    account=self._account, user=self._user, agent=self._agent,
                )
-                # Add as a user message with memory context so the commit
-                # picks it up as an explicit memory during extraction
-                client.post(f"/api/v1/sessions/{self._session_id}/messages", {
-                    "role": "user",
-                    "parts": [
-                        {"type": "text", "text": f"[Memory note — {target}] {content}"},
-                    ],
+                client.post("/api/v1/content/write", {
+                    "uri": uri,
+                    "content": content,
+                    "mode": "create",
                })
            except Exception as e:
                logger.debug("OpenViking memory mirror failed: %s", e)
@@ -858,24 +888,27 @@ class OpenVikingMemoryProvider(MemoryProvider):
        if not content:
            return tool_error("content is required")

-        # Store as a session message that will be extracted during commit.
-        # The category hint helps OpenViking's extraction classify correctly.
        category = args.get("category", "")
-        text = f"[Remember] {content}"
-        if category:
-            text = f"[Remember — {category}] {content}"
+        subdir = _CATEGORY_SUBDIR_MAP.get(category, _DEFAULT_MEMORY_SUBDIR)
+        uri = self._build_memory_uri(subdir)

-        self._client.post(f"/api/v1/sessions/{self._session_id}/messages", {
-            "role": "user",
-            "parts": [
-                {"type": "text", "text": text},
-            ],
-        })
-
-        return json.dumps({
-            "status": "stored",
-            "message": "Memory recorded. Will be extracted and indexed on session commit.",
-        })
+        # Write directly via content/write API.
+        # This creates the file, stores the content, and queues vector indexing
+        # in a single call — no dependency on session commit / VLM extraction.
+        try:
+            result = self._client.post("/api/v1/content/write", {
+                "uri": uri,
+                "content": content,
+                "mode": "create",
+            })
+            written = result.get("result", {}).get("written_bytes", 0)
+            return json.dumps({
+                "status": "stored",
+                "message": f"Memory stored ({written}b) and queued for vector indexing.",
+            })
+        except Exception as e:
+            logger.error("OpenViking content/write failed: %s", e)
+            return tool_error(f"Failed to store memory: {e}")

    def _tool_add_resource(self, args: dict) -> str:
        url = args.get("url", "")
@@ -282,20 +282,24 @@ def _build_payload(


 # ---------------------------------------------------------------------------
-# fal_client lazy import (same pattern as image_generation_tool)
+# fal_client lazy import (shared with image_generation_tool via fal_common)
 # ---------------------------------------------------------------------------

 _fal_client: Any = None


 def _load_fal_client() -> Any:
+    """Lazy-load the ``fal_client`` SDK and cache it on this module.
+
+    Delegates the actual import to :func:`tools.fal_common.import_fal_client`
+    so the ``lazy_deps`` ensure-install handling stays in one place.
+    """
    global _fal_client
    if _fal_client is not None:
        return _fal_client
-    import fal_client  # type: ignore
-
-    _fal_client = fal_client
-    return fal_client
+    from tools.fal_common import import_fal_client
+    _fal_client = import_fal_client()
+    return _fal_client


 # ---------------------------------------------------------------------------
@@ -238,7 +238,7 @@ def _get_firecrawl_client() -> Any:

        kwargs = {
            "api_key": managed_gateway.nous_user_token,
-            "api_url": managed_gateway.gateway_origin,
+            "api_url": managed_gateway.resolved_origin,
        }
        client_config = (
            "tool-gateway",
@@ -41,7 +41,11 @@ dependencies = [
  "ruamel.yaml==0.18.17",
  "requests==2.33.0",  # CVE-2026-25645
  "jinja2==3.1.6",
-  "pydantic==2.12.5",
+  # Bumped from 2.12.5 to 2.13.4 to pull in pydantic-core 2.46.4.
+  # pydantic-core 2.41.5 (pulled by 2.12.5) segfaults when the OpenAI SDK's
+  # Responses API resource is exercised from a non-main thread, which is the
+  # codex_responses dispatch in agent/chat_completion_helpers.py:_call.
+  "pydantic==2.13.4",
  # Interactive CLI (prompt_toolkit is used directly by cli.py)
  "prompt_toolkit==3.0.52",
  # Cron scheduler (built-in feature — scheduled cron/interval jobs use croniter).
@@ -80,7 +84,7 @@ modal = ["modal==1.3.4"]
 daytona = ["daytona==0.155.0"]
 vercel = ["vercel==0.5.7"]
 hindsight = ["hindsight-client==0.6.1"]
-dev = ["debugpy==1.8.20", "pytest==9.0.2", "pytest-asyncio==1.3.0", "pytest-xdist==3.8.0", "pytest-split==0.11.0", "pytest-timeout==2.4.0", "mcp==1.26.0", "ty==0.0.21", "ruff==0.15.10"]
+dev = ["debugpy==1.8.20", "pytest==9.0.2", "pytest-asyncio==1.3.0", "pytest-timeout==2.4.0", "mcp==1.26.0", "ty==0.0.21", "ruff==0.15.10"]
 messaging = ["python-telegram-bot[webhooks]==22.6", "discord.py[voice]==2.7.1", "aiohttp==3.13.3", "brotlicffi==1.2.0.1", "slack-bolt==1.27.0", "slack-sdk==3.40.1", "qrcode==7.4.2"]
 cron = []  # croniter is now a core dependency; this extra kept for back-compat
 slack = ["slack-bolt==1.27.0", "slack-sdk==3.40.1", "aiohttp==3.13.3"]
@@ -228,16 +232,12 @@ markers = [
    "integration: marks tests requiring external services (API keys, Modal, etc.)",
    "real_concurrent_gate: opt out of the autouse stub that disables _detect_concurrent_hermes_instances",
 ]
-# pytest-timeout: per-test 60s hard cap with thread method.
-# Discovered May 2026: the suite reliably hangs at ~96% on full runs even
-# though every individual test completes in <30s. Root cause is leaked
-# threads / atexit handlers accumulating across thousands of tests until
-# something deadlocks at session teardown. Adding pytest-timeout (with
-# thread method, which forces an interrupt into the test thread) breaks
-# the deadlock — the suite then completes cleanly. The 60s cap is large
-# enough that no legitimate test trips it; if a test exceeds it that's a
-# real bug worth surfacing as a Timeout failure.
-addopts = "-m 'not integration' -n auto --timeout=30 --timeout-method=signal"
+# pytest-timeout: per-test 30s hard cap with signal method.
+# This is the fallback inside each per-file pytest subprocess (see
+# scripts/run_tests_parallel.py). Per-file isolation gives every test
+# file a fresh Python interpreter; pytest-timeout catches Python-level
+# hangs within a file.
+addopts = "-m 'not integration' --timeout=30 --timeout-method=signal"

 [tool.ty.environment]
 python-version = "3.13"
@@ -168,7 +168,7 @@ from agent.tool_result_classification import (
    file_mutation_result_landed,
 )
 from agent.trajectory import (
-    convert_scratchpad_to_think, has_incomplete_scratchpad,
+    convert_scratchpad_to_think,
    save_trajectory as _save_trajectory_to_file,
 )
 from agent.message_sanitization import (
@@ -1517,23 +1517,35 @@ class AIAgent:
        return content.strip()

    def _save_session_log(self, messages: List[Dict[str, Any]] = None):
-        """
-        Save the full raw session to a JSON file.
+        """Optional per-session JSON snapshot writer.

-        Stores every message exactly as the agent sees it: user messages,
-        assistant messages (with reasoning, finish_reason, tool_calls),
-        tool responses (with tool_call_id, tool_name), and injected system
-        messages (compression summaries, todo snapshots, etc.).
+        Gated by ``sessions.write_json_snapshots`` (default False).  state.db
+        is the canonical message store; this writer exists only for users
+        whose external tooling consumes ``~/.hermes/sessions/session_{sid}.json``
+        directly.  When the flag is off this is a fast no-op.

-        REASONING_SCRATCHPAD tags are converted to <think> blocks for consistency.
-        Overwritten after each turn so it always reflects the latest state.
+        When enabled, rewrites the snapshot after every persistence point with
+        the full message list (assistant content normalized via
+        ``_clean_session_content`` to convert REASONING_SCRATCHPAD to think
+        tags).  The truncation guard ("don't overwrite a larger log with
+        fewer messages") is preserved so resume + branch don't clobber a
+        fuller existing snapshot.
        """
+        if not getattr(self, "_session_json_enabled", False):
+            return
        messages = messages or self._session_messages
        if not messages:
            return

+        # Re-derive the target path each call so /branch and /compress
+        # session-id changes land in the right file without any re-point
+        # bookkeeping at the call sites.
+        try:
+            log_file = self.logs_dir / f"session_{self.session_id}.json"
+        except Exception:
+            return
+
        try:
-            # Clean assistant content for session logs
            cleaned = []
            for msg in messages:
                if msg.get("role") == "assistant" and msg.get("content"):
@@ -1542,12 +1554,11 @@ class AIAgent:
                cleaned.append(msg)

            # Guard: never overwrite a larger session log with fewer messages.
-            # This protects against data loss when --resume loads a session whose
-            # messages weren't fully written to SQLite — the resumed agent starts
-            # with partial history and would otherwise clobber the full JSON log.
-            if self.session_log_file.exists():
+            # Protects against data loss when a resumed agent starts with
+            # partial history and would otherwise clobber the full JSON log.
+            if log_file.exists():
                try:
-                    existing = json.loads(self.session_log_file.read_text(encoding="utf-8"))
+                    existing = json.loads(log_file.read_text(encoding="utf-8"))
                    existing_count = existing.get("message_count", len(existing.get("messages", [])))
                    if existing_count > len(cleaned):
                        logging.debug(
@@ -1572,7 +1583,7 @@ class AIAgent:
            }

            atomic_json_write(
-                self.session_log_file,
+                log_file,
                entry,
                indent=2,
                default=str,
@@ -1582,6 +1593,7 @@ class AIAgent:
            if self.verbose_logging:
                logging.warning(f"Failed to save session log: {e}")

+
    def interrupt(self, message: str = None) -> None:
        """
        Request the agent to interrupt its current tool-calling loop.
@@ -3188,17 +3200,21 @@ class AIAgent:
        Used to decide whether to strip image content parts from API-bound
        messages (for non-vision models) or let the provider adapter handle
        them natively (for vision-capable models).
+
+        Resolution order (see ``agent.image_routing._supports_vision_override``):
+          1. ``model.supports_vision`` (top-level, single-model shortcut)
+          2. ``providers.<provider>.models.<model>.supports_vision``
+          3. models.dev capability lookup
+        Custom/local models absent from models.dev would otherwise be
+        misclassified as non-vision and have their images stripped.
        """
        try:
-            from agent.models_dev import get_model_capabilities
+            from hermes_cli.config import load_config
+            from agent.image_routing import _lookup_supports_vision
+            cfg = load_config()
            provider = (getattr(self, "provider", "") or "").strip()
            model = (getattr(self, "model", "") or "").strip()
-            if not provider or not model:
-                return False
-            caps = get_model_capabilities(provider, model)
-            if caps is None:
-                return False
-            return bool(caps.supports_vision)
+            return _lookup_supports_vision(provider, model, cfg) is True
        except Exception:
            return False

@@ -3341,6 +3357,25 @@ class AIAgent:
            return content

        if self._model_supports_vision():
+            # Vision-capable on paper — but if we've already learned in this
+            # session that the active (provider, model) rejects list-type
+            # tool content (e.g. Xiaomi MiMo's 400 "text is not set"),
+            # short-circuit to a text summary so we don't burn another
+            # round-trip relearning the same lesson.  Cache populated by
+            # the 400 recovery path in agent.conversation_loop.  Transient
+            # per-session; next session retries.
+            key = (
+                (getattr(self, "provider", "") or "").strip().lower(),
+                (getattr(self, "model", "") or "").strip(),
+            )
+            no_list = getattr(self, "_no_list_tool_content_models", None)
+            if no_list and key in no_list:
+                logger.debug(
+                    "Tool %s: model %s/%s known to reject list-type tool "
+                    "content this session — sending text summary",
+                    tool_name, key[0], key[1],
+                )
+                return _multimodal_text_summary(result)
            return content

        summary = _multimodal_text_summary(result)
@@ -3369,6 +3404,80 @@ class AIAgent:
        from agent.conversation_compression import try_shrink_image_parts_in_messages
        return try_shrink_image_parts_in_messages(api_messages)

+    def _try_strip_image_parts_from_tool_messages(self, api_messages: list) -> bool:
+        """Downgrade list-type tool messages to text summaries in-place.
+
+        Recovery path for providers that reject list-type tool message content
+        (e.g. Xiaomi MiMo's 400 "text is not set"; see issue #27344).  Walks
+        ``api_messages`` for any ``role: "tool"`` message whose ``content`` is
+        a list containing image parts, replaces the content with the existing
+        text part(s) (or a minimal placeholder if none survive), and records
+        the active (provider, model) in ``self._no_list_tool_content_models``
+        so subsequent ``_tool_result_content_for_active_model`` calls in this
+        session preemptively downgrade screenshots without a round-trip.
+
+        Returns True when at least one tool message was downgraded — the
+        caller (the 400 recovery branch in ``agent.conversation_loop``) uses
+        this to decide whether to retry the API call with the modified
+        history or surface the original error.
+        """
+        if not isinstance(api_messages, list):
+            return False
+
+        # Record (provider, model) so we don't relearn this lesson.
+        key = (
+            (getattr(self, "provider", "") or "").strip().lower(),
+            (getattr(self, "model", "") or "").strip(),
+        )
+        if not hasattr(self, "_no_list_tool_content_models"):
+            self._no_list_tool_content_models = set()
+        if key[1]:  # only record when we actually have a model id
+            self._no_list_tool_content_models.add(key)
+
+        changed = False
+        for msg in api_messages:
+            if not isinstance(msg, dict) or msg.get("role") != "tool":
+                continue
+            content = msg.get("content")
+            if not isinstance(content, list):
+                continue
+
+            # Salvage any text parts so the model still sees some signal.
+            text_parts: List[str] = []
+            had_image = False
+            for part in content:
+                if not isinstance(part, dict):
+                    if isinstance(part, str) and part.strip():
+                        text_parts.append(part.strip())
+                    continue
+                ptype = part.get("type")
+                if ptype == "image_url" or ptype == "input_image":
+                    had_image = True
+                    continue
+                if ptype in {"text", "input_text"}:
+                    text = str(part.get("text") or "").strip()
+                    if text:
+                        text_parts.append(text)
+
+            if not had_image:
+                # List-type content but no image parts — leave alone (some
+                # providers reject ANY list content, but stripping a
+                # text-only list doesn't reduce ambiguity; let the caller
+                # surface the original error if this turns out to be the
+                # case).
+                continue
+
+            if text_parts:
+                msg["content"] = "\n\n".join(text_parts)
+            else:
+                msg["content"] = (
+                    "[image content removed — provider does not accept "
+                    "list-type tool message content]"
+                )
+            changed = True
+
+        return changed
+
    def _anthropic_preserve_dots(self) -> bool:
        """True when using an anthropic-compatible endpoint that preserves dots in model names.
        Alibaba/DashScope keeps dots (e.g. qwen3.5-plus).
@@ -47,6 +47,10 @@ ACP_REGISTRY_MANIFEST = REPO_ROOT / "acp_registry" / "agent.json"
 AUTHOR_MAP = {
    # teknium (multiple emails)
    "teknium1@gmail.com": "teknium1",
+    "cipherframe@users.noreply.github.com": "CipherFrame",
+    "me@promplate.dev": "CNSeniorious000",
+    "yichengqiao21@gmail.com": "YarrowQiao",
+    "erhanyasarx@gmail.com": "erhnysr",
    "30366221+WorldWriter@users.noreply.github.com": "WorldWriter",
    "dafeng@DafengdeMacBook-Pro.local": "WorldWriter",
    "anadi.jaggia@gmail.com": "Jaggia",
@@ -56,12 +60,18 @@ AUTHOR_MAP = {
    "mgongzai@gmail.com": "vKongv",
    "0x.badfriend@gmail.com": "discodirector",
    "altriatree@gmail.com": "TruaShamu",
+    "contact-me@stark-x.cn": "Stark-X",
+    "nat@nthrow.io": "nthrow",
    "m@mobrienv.dev": "mikeyobrien",
    "saeed919@pm.me": "falasi",
+    "chrisdlc119@outlook.com": "chdlc",
+    "omar@techdeveloper.site": "nycomar",
    "qiyin.zuo@pcitc.com": "qiyin-code",
    "mr.aashiz@gmail.com": "aashizpoudel",
    "70629228+shaun0927@users.noreply.github.com": "shaun0927",
    "98262967+Bihruze@users.noreply.github.com": "Bihruze",
+    "189280367+Lempkey@users.noreply.github.com": "Lempkey",
+    "leovillalbajr@gmail.com": "Lempkey",
    "nidhi2894@gmail.com": "nidhi-singh02",
    "30312689+aashizpoudel@users.noreply.github.com": "aashizpoudel",
    "oleksii.lisikh@gmail.com": "olisikh",
@@ -74,6 +84,7 @@ AUTHOR_MAP = {
    "108427749+buntingszn@users.noreply.github.com": "buntingszn",
    "yanglongwei06@gmail.com": "Alex-yang00",
    "teknium@nousresearch.com": "teknium1",
+    "markuscontasul@gmail.com": "Glucksberg",
    "piyushvp1@gmail.com": "thelumiereguy",
    "dskwelmcy@163.com": "dskwe",
    "421774554@qq.com": "wuli666",
@@ -372,6 +383,7 @@ AUTHOR_MAP = {
    "bloodcarter@gmail.com": "bloodcarter",
    "scott@scotttrinh.com": "scotttrinh",
    "quocanh261997@gmail.com": "quocanh261997",
+    "savanne.kham@protonmail.com": "savanne-kham",  # PR #28958 salvage (strip tool_name for strict providers)
    # contributors (from noreply pattern)
    "david.vv@icloud.com": "davidvv",
    "wangqiang@wangqiangdeMac-mini.local": "xiaoqiang243",
@@ -680,7 +692,7 @@ AUTHOR_MAP = {
    "hmbown@gmail.com": "Hmbown",
    "iacobs@m0n5t3r.info": "m0n5t3r",
    "jiayuw794@gmail.com": "JiayuuWang",
-    "jonny@nousresearch.com": "jquesnelle",
+    "jonny@nousresearch.com": "yoniebans",
    "jake@nousresearch.com": "simpolism",
    "juan.ovalle@mistral.ai": "jjovalle99",
    "julien.talbot@ergonomia.re": "Julientalbot",
@@ -713,6 +725,7 @@ AUTHOR_MAP = {
    "9219265+cresslank@users.noreply.github.com": "cresslank",
    "trevmanthony@gmail.com": "trevthefoolish",
    "ziliangpeng@users.noreply.github.com": "ziliangpeng",
+    "ziliangdotme@gmail.com": "ziliangpeng",
    "centripetal-star@users.noreply.github.com": "centripetal-star",
    "LeonSGP43@users.noreply.github.com": "LeonSGP43",
    "154585401+LeonSGP43@users.noreply.github.com": "LeonSGP43",
@@ -922,6 +935,8 @@ AUTHOR_MAP = {
    "holynn@placeholder.local": "holynn-q",
    "agent@hermes.local": "jacdevos",
    "sunsky.lau@gmail.com": "liuhao1024",
+    "fabianoeq@gmail.com": "rodrigoeqnit",
+    "178342791+sgtworkman@users.noreply.github.com": "sgtworkman",
    "qiuqfang98@qq.com": "keepcalmqqf",
    "261867348+ai-ag2026@users.noreply.github.com": "ai-ag2026",
    "yanzh.su@gmail.com": "YanzhongSu",
@@ -3,29 +3,36 @@
 # `pytest` directly to guarantee your local run matches CI behavior.
 #
 # What this script enforces:
-#   * -n 4 xdist workers (CI has 4 cores; -n auto diverges locally)
+#   * Per-file isolation via scripts/run_tests_parallel.py — each test
+#     file runs in its own freshly-spawned `python -m pytest <file>`
+#     subprocess. No xdist, no shared workers, no module-level leakage
+#     between files.
 #   * TZ=UTC, LANG=C.UTF-8, PYTHONHASHSEED=0 (deterministic)
-#   * Credential env vars blanked (conftest.py also does this, but this
-#     is belt-and-suspenders for anyone running `pytest` outside of
-#     our conftest path — e.g. calling pytest on a single file)
-#   * Proper venv activation
+#   * Env vars blanked (conftest.py also does this, but this
+#     is belt-and-suspenders for anyone running pytest outside our
+#     conftest path — e.g. on a single file)
+#   * Proper venv activation (probes .venv, venv, then ~/.hermes/...)
 #
 # Usage:
-#   scripts/run_tests.sh                     # full suite
-#   scripts/run_tests.sh tests/agent/        # one directory
-#   scripts/run_tests.sh tests/agent/test_foo.py::TestClass::test_method
-#   scripts/run_tests.sh --tb=long -v        # pass-through pytest args
+#   scripts/run_tests.sh                            # full suite
+#   scripts/run_tests.sh -j 4                       # cap parallelism
+#   scripts/run_tests.sh tests/agent/               # discover only here
+#   scripts/run_tests.sh tests/agent/ tests/acp/    # multiple roots
+#   scripts/run_tests.sh tests/foo.py               # single file
+#   scripts/run_tests.sh tests/foo.py -- --tb=long  # path + pytest args
+#   scripts/run_tests.sh -- -v --tb=long            # pytest args only
+#
+# Everything after a literal '--' is passed through to each per-file
+# pytest invocation. Positional path arguments before '--' override
+# the default discovery root (tests/).

 set -euo pipefail

 # ── Locate repo root ────────────────────────────────────────────────────────
-# Works whether this is the main checkout or a worktree.
 SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
 REPO_ROOT="$(cd "$SCRIPT_DIR/.." && pwd)"

 # ── Activate venv ───────────────────────────────────────────────────────────
-# Prefer a .venv in the current tree, fall back to the main checkout's venv
-# (useful for worktrees where we don't always duplicate the venv).
 VENV=""
 for candidate in "$REPO_ROOT/.venv" "$REPO_ROOT/venv" "$HOME/.hermes/hermes-agent/venv"; do
  if [ -f "$candidate/bin/activate" ]; then
@@ -41,94 +48,31 @@ fi

 PYTHON="$VENV/bin/python"

-# ── Ensure pytest-split is installed (required for shard-equivalent runs) ──
-if ! "$PYTHON" -c "import pytest_split" 2>/dev/null; then
-  echo "→ installing pytest-split into $VENV"
-  if command -v uv >/dev/null 2>&1; then
-    uv pip install --python "$PYTHON" --quiet "pytest-split>=0.9,<1"
-  elif "$PYTHON" -m pip --version >/dev/null 2>&1; then
-    "$PYTHON" -m pip install --quiet "pytest-split>=0.9,<1"
-  else
-    echo "error: neither uv nor pip is available in $VENV — pytest-split is missing" >&2
-    echo "  fix: run  uv pip install -e \".[dev]\"  from $REPO_ROOT" >&2
-    exit 1
-  fi
-fi

-# ── Hermetic environment ────────────────────────────────────────────────────
-# Mirror what CI does in .github/workflows/tests.yml + what conftest.py does.
-# Unset every credential-shaped var currently in the environment.
-while IFS='=' read -r name _; do
-  case "$name" in
-    *_API_KEY|*_TOKEN|*_SECRET|*_PASSWORD|*_CREDENTIALS|*_ACCESS_KEY| \
-    *_SECRET_ACCESS_KEY|*_PRIVATE_KEY|*_OAUTH_TOKEN|*_WEBHOOK_SECRET| \
-    *_ENCRYPT_KEY|*_APP_SECRET|*_CLIENT_SECRET|*_CORP_SECRET|*_AES_KEY| \
-    AWS_ACCESS_KEY_ID|AWS_SECRET_ACCESS_KEY|AWS_SESSION_TOKEN|FAL_KEY| \
-    GH_TOKEN|GITHUB_TOKEN)
-      unset "$name"
-      ;;
-  esac
-done < <(env)
-
-# Unset HERMES_* behavioral vars too.
-unset HERMES_YOLO_MODE HERMES_INTERACTIVE HERMES_QUIET HERMES_TOOL_PROGRESS \
-      HERMES_TOOL_PROGRESS_MODE HERMES_MAX_ITERATIONS HERMES_SESSION_PLATFORM \
-      HERMES_SESSION_CHAT_ID HERMES_SESSION_CHAT_NAME HERMES_SESSION_THREAD_ID \
-      HERMES_SESSION_SOURCE HERMES_SESSION_KEY HERMES_GATEWAY_SESSION \
-      HERMES_CRON_SESSION \
-      HERMES_PLATFORM HERMES_INFERENCE_PROVIDER HERMES_MANAGED HERMES_DEV \
-      HERMES_CONTAINER HERMES_EPHEMERAL_SYSTEM_PROMPT HERMES_TIMEZONE \
-      HERMES_REDACT_SECRETS HERMES_BACKGROUND_NOTIFICATIONS HERMES_EXEC_ASK \
-      HERMES_HOME_MODE 2>/dev/null || true
-
-# Pin deterministic runtime.
-export TZ=UTC
-export LANG=C.UTF-8
-export LC_ALL=C.UTF-8
-export PYTHONHASHSEED=0
-
-# ── Live-gateway test guard (developer machines) ────────────────────────────
-# If a system-wide hermes pytest_live_guard plugin is installed at
-# $HOME/.hermes/pytest_live_guard.py, force-load it here so every test run
-# from this script gets the protection regardless of which worktree is
-# checked out (in-tree tests/conftest.py guard may be missing on stale
-# branches). Harmless on CI / fresh machines that don't have the file.
+# ── Live-gateway plugin (computed before we drop env) ───────────────────────
+EXTRA_PYTHONPATH=""
+EXTRA_PYTEST_PLUGINS=""
 if [ -f "$HOME/.hermes/pytest_live_guard.py" ]; then
-  case ":${PYTHONPATH:-}:" in
-    *":$HOME/.hermes:"*) ;;
-    *) export PYTHONPATH="${PYTHONPATH:+$PYTHONPATH:}$HOME/.hermes" ;;
-  esac
-  if [[ ",${PYTEST_PLUGINS:-}," != *,pytest_live_guard,* ]]; then
-    export PYTEST_PLUGINS="${PYTEST_PLUGINS:+$PYTEST_PLUGINS,}pytest_live_guard"
-  fi
+  EXTRA_PYTHONPATH="$HOME/.hermes"
+  EXTRA_PYTEST_PLUGINS="pytest_live_guard"
 fi

-# ── Worker count ────────────────────────────────────────────────────────────
-# CI uses `-n auto` on ubuntu-latest which gives 4 workers. A 20-core
-# workstation with `-n auto` gets 20 workers and exposes test-ordering
-# flakes that CI will never see. Pin to 4 so local matches CI.
-WORKERS="${HERMES_TEST_WORKERS:-4}"

-# ── Run pytest ──────────────────────────────────────────────────────────────
+# ── Run in hermetic env ──────────────────────────────────────────────────────
+# env -i: start with empty environment, opt-in only what we need.
+# No credential var can leak — you'd have to explicitly add it here.
+echo "▶ running per-file parallel test suite via run_tests_parallel.py"
+echo "  (TZ=UTC LANG=C.UTF-8 PYTHONHASHSEED=0; clean env)"
+
 cd "$REPO_ROOT"

-# If the first argument starts with `-` treat all args as pytest flags;
-# otherwise treat them as test paths.
-ARGS=("$@")
-
-echo "▶ running pytest with $WORKERS workers, hermetic env, in $REPO_ROOT"
-echo "  (TZ=UTC LANG=C.UTF-8 PYTHONHASHSEED=0; all credential env vars unset)"
-
-# -o "addopts=" clears pyproject.toml's `-n auto` so our -n wins.
-# We re-add --timeout/--timeout-method here because pyproject.toml's
-# addopts is wiped above. The 60s cap is essential: see pyproject.toml
-# for why (suite deadlocks at session teardown without it).
-exec "$PYTHON" -m pytest \
-  -o "addopts=" \
-  -n "$WORKERS" \
-  --timeout=30 \
-  --timeout-method=signal \
-  --ignore=tests/integration \
-  --ignore=tests/e2e \
-  -m "not integration" \
-  "${ARGS[@]}"
+exec env -i \
+  PATH="$PATH" \
+  HOME="$HOME" \
+  TZ=UTC \
+  LANG=C.UTF-8 \
+  LC_ALL=C.UTF-8 \
+  PYTHONHASHSEED=0 \
+  ${EXTRA_PYTHONPATH:+PYTHONPATH="$EXTRA_PYTHONPATH"} \
+  ${EXTRA_PYTEST_PLUGINS:+PYTEST_PLUGINS="$EXTRA_PYTEST_PLUGINS"} \
+  "$PYTHON" "$SCRIPT_DIR/run_tests_parallel.py" "$@"
@@ -0,0 +1,650 @@
+#!/usr/bin/env python3
+"""Per-file parallel test runner.
+
+The minimum-viable replacement for pytest-xdist + a subprocess-isolation
+plugin. Discovers test files under ``tests/`` (excluding integration/e2e
+unless explicitly requested), then runs one ``python -m pytest <file>``
+subprocess per file, with bounded parallelism (default: ``os.cpu_count()``).
+
+Why per-file rather than per-test?
+    Per-test spawn overhead (~250ms × 17k tests = 70min CPU minimum)
+    swamped the actual work. Per-file spawn (~250ms × ~850 files = ~3.5min)
+    fits in the budget while still giving every file a fresh Python
+    interpreter — the only isolation boundary that actually matters
+    (cross-file module-level state leakage was the original flake source;
+    intra-file state is the test author's responsibility).
+
+Why drop xdist entirely?
+    xdist's persistent workers accumulate state across files, which is
+    exactly the leakage we wanted to fix. xdist also adds complexity
+    (loadfile vs loadscope, --max-worker-restart, internal control plane)
+    that we don't need when the unit of work is "run pytest on one file".
+    A subprocess.Popen pool gated by a semaphore is ~60 lines and does
+    the job.
+
+Usage:
+    python scripts/run_tests_parallel.py [pytest_args...]
+
+    Common pytest args pass through (e.g. ``-v``, ``-x``, ``--tb=long``,
+    ``-k 'pattern'``, ``--lf``).
+
+Environment:
+    HERMES_TEST_WORKERS  Override worker count (default: os.cpu_count())
+    HERMES_TEST_PATHS    Override discovery roots (colon-sep, default: 'tests')
+
+Exit code: 0 if every file's pytest exited 0; 1 otherwise.
+"""
+
+from __future__ import annotations
+
+import argparse
+import os
+import subprocess
+import sys
+import threading
+import time
+from concurrent.futures import ThreadPoolExecutor, Future
+from pathlib import Path
+from typing import Dict, List, Tuple
+
+
+# Default test discovery roots.
+_DEFAULT_ROOTS = ["tests"]
+
+# Directories to skip during discovery — the e2e + integration suites
+# require real services and are run separately. Match exactly the
+# ``--ignore=`` flags the previous CI command used.
+_SKIP_PARTS = {"integration", "e2e"}
+
+# Per-file wall-clock cap. Generous default — pytest-timeout still
+# enforces per-test caps inside each subprocess; this is just an outer
+# safety net so a single hung file can't stall the whole suite. Override
+# via --file-timeout or HERMES_TEST_FILE_TIMEOUT.
+_DEFAULT_FILE_TIMEOUT_SECONDS = 600.0  # 10 minutes
+
+
+def _count_tests(
+    files: List[Path], repo_root: Path, pytest_passthrough: List[str]
+) -> dict[Path, int]:
+    """Run ``pytest --co -q`` once to count individual tests per file.
+
+    Returns a mapping ``{file_path: test_count}``. Files with zero
+    collected tests are omitted from the dict (not an error — e.g. the
+    file only defines fixtures / conftest helpers).
+
+    This is a single subprocess call (~2-5s for ~1k files) that gives
+    us the total test count for the discovery announcement and
+    per-file counts for the progress lines.
+
+    ``--ignore`` flags for directories in ``_SKIP_PARTS`` are added
+    automatically so that pytest's own collection machinery (conftest
+    walking, directory traversal) doesn't pull in tests we intend to
+    skip — matching what the per-file runs will actually execute.
+    """
+    # Build --ignore flags for skipped dirs so the --co collection
+    # mirrors what we'll actually run (not what pytest might find via
+    # conftest walking or directory traversal).
+    ignore_args: List[str] = []
+    for root in [repo_root / p for p in _DEFAULT_ROOTS]:
+        for part in _SKIP_PARTS:
+            d = root / part
+            if d.is_dir():
+                ignore_args.extend(["--ignore", str(d)])
+
+    cmd = [
+        sys.executable, "-m", "pytest",
+        "--co", "-q",
+        *ignore_args,
+        *[str(f) for f in files],
+        *pytest_passthrough,
+    ]
+    try:
+        result = subprocess.run(
+            cmd,
+            cwd=repo_root,
+            capture_output=True,
+            text=True,
+            timeout=120,
+        )
+    except (subprocess.TimeoutExpired, OSError):
+        return {}
+
+    counts: dict[Path, int] = {}
+    for line in result.stdout.splitlines():
+        # Lines look like: tests/acp/test_auth.py::TestClass::test_name
+        if "::" not in line:
+            continue
+        file_part = line.split("::", 1)[0]
+        key = repo_root / file_part
+        counts[key] = counts.get(key, 0) + 1
+
+    return counts
+
+
+def _discover_files(roots: List[Path]) -> List[Path]:
+    """Return every ``test_*.py`` under the given roots (sorted).
+
+    Roots may be directories (recursed for ``test_*.py``) or explicit
+    ``.py`` files (included as-is, even if they don't match the
+    ``test_*`` prefix — caller knows what they want).
+
+    Exclude any file whose path contains a component in ``_SKIP_PARTS``,
+    UNLESS the user explicitly named it as a root (in which case the
+    user's intent overrides the skip filter).
+    """
+    seen: set[Path] = set()
+    out: List[Path] = []
+    for root in roots:
+        if not root.exists():
+            continue
+        if root.is_file():
+            # Explicit file: include it as-is, skip the _SKIP_PARTS filter
+            # since the user named it directly.
+            real = root.resolve()
+            if real not in seen:
+                seen.add(real)
+                out.append(root)
+            continue
+        for path in root.rglob("test_*.py"):
+            if any(part in _SKIP_PARTS for part in path.parts):
+                continue
+            real = path.resolve()
+            if real in seen:
+                continue
+            seen.add(real)
+            out.append(path)
+    return sorted(out)
+
+
+def _kill_tree(proc: "subprocess.Popen", pgid: int | None = None) -> None:
+    """Kill the pytest subprocess and every descendant it spawned.
+
+    A test run can spin up uvicorn servers, async runtimes, or other
+    long-running grandchildren that survive the pytest subprocess exit
+    if we don't kill the whole tree. ``subprocess.Popen.kill()`` only
+    targets the immediate child; grandchildren reparent to PID 1
+    (Linux) / get adopted by services.exe (Windows) and leak.
+
+    POSIX: the caller must pass ``pgid`` — the process group id captured
+    immediately after Popen (via ``os.getpgid(proc.pid)``). We can't
+    look it up here in the happy path because by the time we get
+    called the leader process has already been reaped and its pid is
+    gone from the kernel's process table, even though descendants in
+    the group are still alive. SIGKILL'ing the captured pgid takes out
+    everything in that group atomically.
+
+    Windows: ``taskkill /F /T /PID`` walks the recorded ppid chain and
+    terminates the whole tree, even when the root has already exited.
+
+    Why not psutil: psutil walks the parent-child tree, but in the
+    happy path the root has already been reaped so ``psutil.Process(pid)``
+    can't find it; grandchildren reparented to PID 1 are also
+    unreachable by tree walk at that point. The platform-native
+    primitives (process groups / taskkill) handle both cases correctly
+    without an extra abstraction layer.
+    """
+    if proc.pid is None:
+        return
+
+    if sys.platform == "win32":
+        try:
+            
+            subprocess.run(
+                ["taskkill", "/F", "/T", "/PID", str(proc.pid)],
+                stdout=subprocess.DEVNULL,
+                stderr=subprocess.DEVNULL,
+                timeout=10,
+            )  # windows-footgun: ok
+        except (subprocess.TimeoutExpired, FileNotFoundError, OSError):
+            pass
+    else:
+        # POSIX: kill the captured pgid. Local-import signal so the
+        # SIGKILL attribute is never referenced on Windows.
+        if pgid is not None:
+            try:
+                import signal as _signal
+                os.killpg(pgid, _signal.SIGKILL)  # windows-footgun: ok
+            except (ProcessLookupError, PermissionError, OSError):
+                pass
+
+    # Belt-and-suspenders: ensure subprocess.communicate() sees the exit.
+    try:
+        proc.kill()
+    except (ProcessLookupError, OSError):
+        pass
+
+
+def _run_one_file(
+    file: Path,
+    pytest_args: List[str],
+    repo_root: Path,
+    file_timeout: float,
+) -> Tuple[Path, int, str, dict[str, int]]:
+    """Run ``python -m pytest <file> <pytest_args>`` in a fresh subprocess.
+
+    Returns (file, returncode, captured_combined_output, summary_counts).
+
+    ``summary_counts`` is the result of ``_parse_pytest_summary(output)`` —
+
+    pytest exit codes (https://docs.pytest.org/en/stable/reference/exit-codes.html):
+        0 = all tests passed
+        1 = some tests failed
+        2 = test execution interrupted
+        3 = internal error
+        4 = pytest CLI usage error
+        5 = no tests collected
+
+    We treat exit 5 as a pass: it just means every test in the file was
+    skipped or filtered by a marker (e.g. ``-m 'not integration'`` skips
+    files where every test is marked integration). That's intentional and
+    not a failure mode.
+
+    On per-file timeout (``file_timeout`` seconds) or any other exception
+    during ``communicate()``, we kill the whole process group / process
+    tree so grandchildren (uvicorn servers, async runtimes, etc.) do not
+    orphan onto PID 1. The pytest-timeout plugin enforces per-test
+    timeouts inside the subprocess; this outer timeout exists only to
+    bound a pathologically slow or hung file as a whole.
+    """
+    cmd = [sys.executable, "-m", "pytest", str(file), *pytest_args]
+    proc = subprocess.Popen(
+        cmd,
+        cwd=repo_root,
+        stdout=subprocess.PIPE,
+        stderr=subprocess.STDOUT,
+        text=True,
+        # POSIX: place the child at the head of its own process group so
+        # _kill_tree can SIGKILL the group atomically.
+        # Windows: this maps to CREATE_NEW_PROCESS_GROUP in CPython 3.12+;
+        # _kill_tree handles the Windows path via taskkill /F /T.
+        start_new_session=True,
+    )
+
+    # Capture the pgid NOW, before the leader can exit and be reaped.
+    # Once the leader is reaped, os.getpgid(proc.pid) raises
+    # ProcessLookupError even though grandchildren in that group are
+    # still alive — defeating the whole cleanup. None on Windows where
+    # the pgid concept doesn't apply (taskkill walks ppid chain instead).
+    pgid: int | None = None
+    if sys.platform != "win32":
+        try:
+            pgid = os.getpgid(proc.pid)
+        except (ProcessLookupError, PermissionError):
+            # Astonishingly fast child? Already dead. _kill_tree's
+            # fallback will handle this case as a no-op.
+            pgid = None
+
+    try:
+        output, _ = proc.communicate(timeout=file_timeout)
+        rc = proc.returncode
+    except subprocess.TimeoutExpired:
+        _kill_tree(proc, pgid=pgid)
+        # Drain whatever the child wrote before we killed it so we have
+        # something to surface in the failure dump.
+        try:
+            output, _ = proc.communicate(timeout=10)
+        except subprocess.TimeoutExpired:
+            output = "(file timeout exceeded; output unavailable)"
+        rc = 124  # de facto convention for "killed by timeout".
+        output = (
+            f"(per-file timeout: {file_timeout:.0f}s exceeded; "
+            f"process tree SIGKILL'd)\n{output}"
+        )
+    except BaseException:
+        # KeyboardInterrupt / runner crash — make sure no zombie
+        # grandchildren outlive us.
+        _kill_tree(proc, pgid=pgid)
+        raise
+    else:
+        # Happy path: pytest exited on its own. The child process already
+        # cleaned up its grandchildren if it's well-behaved, but
+        # well-behaved is not universal — kill the group anyway. Already-
+        # dead processes are a no-op.
+        _kill_tree(proc, pgid=pgid)
+
+    if rc == 5:
+        # No tests collected — every test in the file was filtered out.
+        # Treat as a pass; surface info in a slightly distinct status
+        # so the operator can spot it.
+        rc = 0
+    summary = _parse_pytest_summary(output)
+    return file, rc, output, summary
+
+
+def _parse_pytest_summary(output: str) -> dict[str, int]:
+    """Extract per-file test pass/fail/skip counts from pytest output.
+
+    pytest prints a summary line like ``12 passed, 3 skipped, 1 failed in 2.1s``
+    as the last non-empty line before the short test summary.  We scrape that
+    line for the individual counts so the progress display can show test-level
+    granularity instead of just file-level pass/fail.
+
+    Returns a dict with keys ``passed``, ``failed``, ``skipped``, ``errors``,
+    ``xfailed``, ``xpassed`` (only keys found in the output are present).
+    """
+    import re
+
+    result: dict[str, int] = {}
+    # Walk backwards from the end — the summary line is always near the tail.
+    for line in reversed(output.splitlines()):
+        line = line.strip()
+        if not line:
+            continue
+        # Match "N passed", "N failed", "N skipped", "N errors", "N xfailed", "N xpassed"
+        for m in re.finditer(r"(\d+)\s+(passed|failed|skipped|errors|xfailed|xpassed)", line):
+            result[m.group(2)] = int(m.group(1))
+        # Also match "N error" (singular — pytest uses this sometimes).
+        for m in re.finditer(r"(\d+)\s+error\b", line):
+            result.setdefault("errors", result.get("errors", 0) + int(m.group(1)))
+        if result:
+            # Found the counts line — done.
+            break
+        # Stop at the short test summary header (if any) — everything above
+        # that is individual failure details, not the counts line.
+        if line.startswith("FAILED") or line.startswith("SHORT TEST SUMMARY"):
+            break
+    return result
+
+
+def _format_file(file: Path, repo_root: Path) -> str:
+    """Render a test-file path for display: strip the repo-root prefix
+    when possible so output reads ``tests/acp/test_auth.py`` instead of
+    ``/home/runner/work/hermes-agent/hermes-agent/tests/acp/test_auth.py``.
+
+    Falls back to the absolute path for anything outside the repo root.
+    """
+    try:
+        return str(file.resolve().relative_to(repo_root.resolve()))
+    except ValueError:
+        return str(file)
+
+
+def _print_progress(
+    tests_done: int,
+    total_tests: int,
+    file: Path,
+    rc: int,
+    dur: float,
+    repo_root: Path,
+    tests_passed: int,
+    tests_failed: int,
+    test_counts: dict[Path, int],
+    file_summary: dict[str, int] | None = None,
+) -> None:
+    """Single-line live progress.
+
+    When ``file_summary`` is provided (parsed from pytest output), the
+    per-file parenthetical shows individual test pass/fail counts instead
+    of just the total test count.
+    """
+    status = "✓" if rc == 0 else "✗"
+    pct = (tests_done / total_tests * 100) if total_tests else 0
+    # Digit width for left-side counter padding (derived from total file count).
+    fw = len(str(tests_passed + tests_failed))
+    # Build per-file test count string.
+    if file_summary:
+        parts = []
+        p = file_summary.get("passed", 0)
+        f = file_summary.get("failed", 0)
+        s = file_summary.get("skipped", 0)
+        e = file_summary.get("errors", 0)
+        if p:
+            parts.append(f"{p}✓")
+        if f:
+            parts.append(f"{f}✗")
+        if s:
+            parts.append(f"{s}s")
+        if e:
+            parts.append(f"{e}e")
+        # xfailed/xpassed are rare; include if present.
+        xf = file_summary.get("xfailed", 0)
+        xp = file_summary.get("xpassed", 0)
+        if xf:
+            parts.append(f"{xf}xf")
+        if xp:
+            parts.append(f"{xp}xp")
+        test_str = " ".join(parts) + ", " if parts else ""
+    else:
+        n_tests = test_counts.get(file, 0)
+        test_str = f"{n_tests} tests, " if n_tests else ""
+    msg = (
+        f"[{pct:5.1f}% | {tests_done:>5}/{total_tests}"
+        f" | ✓{tests_passed:>{fw}} | ✗{tests_failed:>{fw}}] "
+        f"{status} {_format_file(file, repo_root)} ({test_str}{dur:.1f}s)"
+    )
+    # Truncate to terminal width if available (no clobbering ANSI lines).
+    try:
+        cols = os.get_terminal_size().columns
+        if len(msg) > cols:
+            msg = msg[: cols - 1] + "…"
+    except OSError:
+        pass
+    print(msg, flush=True)
+
+
+def _print_inline_failure(
+    file: Path, output: str, repo_root: Path, pytest_passthrough: List[str]
+) -> None:
+    """Print a compact failure summary immediately when a file fails.
+
+    Shows the tail of the pytest output (the failure section with stack
+    traces) and a ready-to-run repro command, so the developer doesn't
+    have to wait for the full run to finish before seeing what broke.
+    """
+    rel = _format_file(file, repo_root)
+    # Build a repro command the developer can copy-paste.
+    passthrough_str = " ".join(pytest_passthrough) if pytest_passthrough else ""
+    repro = f"python -m pytest {rel}"
+    if passthrough_str:
+        repro += f" {passthrough_str}"
+
+    # Grab just the failure lines (last ~30 lines of pytest output —
+    # typically the FAILED summary + short test info).
+    lines = output.rstrip().splitlines()
+    tail = "\n".join(lines[-30:])
+
+    print(flush=True)
+    print(f"  ╔╍ Failed: {rel} ╍╍╍╍╍╍╍╍╍╍╍╍╍╍╍╍╍╍╍╍╍╍╍╍╍╍╍╍╍╍╍╍╍╍╍╍╍╍╍╍╍╍╍╍╍╍╍╍╍", flush=True)
+    for line in tail.splitlines():
+        print(f"  ║ {line}", flush=True)
+    print(f"  ║", flush=True)
+    print(f"  ║  Repro: {repro}", flush=True)
+    print(f"  ╚╍╍╍╍╍╍╍╍╍╍╍╍╍╍╍╍╍╍╍╍╍╍╍╍╍╍╍╍╍╍╍╍╍╍╍╍╍╍╍╍╍╍╍╍╍╍╍╍╍╍╍╍╍╍╍╍╍╍╍╍╍╍╍╍╍╍╍╍╍╍╍", flush=True)
+    print(flush=True)
+
+
+def main() -> int:
+    parser = argparse.ArgumentParser(
+        description=__doc__,
+        formatter_class=argparse.RawDescriptionHelpFormatter,
+    )
+    parser.add_argument(
+        "-j",
+        "--jobs",
+        type=int,
+        default=int(os.environ.get("HERMES_TEST_WORKERS") or (os.cpu_count() or 4) * 2),
+        help="Parallel worker count (default: $HERMES_TEST_WORKERS or cpu_count*2)",
+    )
+    parser.add_argument(
+        "--paths",
+        default=os.environ.get("HERMES_TEST_PATHS", ":".join(_DEFAULT_ROOTS)),
+        help="Colon-separated discovery roots (default: 'tests')",
+    )
+    parser.add_argument(
+        "--include-integration",
+        action="store_true",
+        help="Don't skip integration/ e2e/ during discovery",
+    )
+    parser.add_argument(
+        "--file-timeout",
+        type=float,
+        default=float(
+            os.environ.get("HERMES_TEST_FILE_TIMEOUT", _DEFAULT_FILE_TIMEOUT_SECONDS)
+        ),
+        help=(
+            "Per-file wall-clock cap in seconds. On timeout, the pytest "
+            "subprocess and its full process tree are SIGKILL'd. "
+            "Default: 600 (10 min), env: HERMES_TEST_FILE_TIMEOUT."
+        ),
+    )
+    parser.add_argument(
+        "paths_positional",
+        nargs="*",
+        metavar="PATH",
+        help=(
+            "Restrict discovery to these paths (directories or .py files). "
+            "Mutually exclusive with --paths. Anything after a literal '--' "
+            "separator is passed through to each per-file pytest invocation."
+        ),
+    )
+    # Manually split argv on '--' so positional paths and pytest passthrough
+    # args don't fight over each other. argparse's nargs="*" positional is
+    # greedy and will swallow everything after '--' including the pytest
+    # flags, defeating the convention.
+    argv = sys.argv[1:]
+    if "--" in argv:
+        sep = argv.index("--")
+        our_args, pytest_passthrough = argv[:sep], argv[sep + 1 :]
+    else:
+        our_args, pytest_passthrough = argv, []
+    args = parser.parse_args(our_args)
+
+    repo_root = Path(__file__).resolve().parent.parent
+
+    # Resolve discovery roots: positional path args override --paths if any
+    # were supplied, otherwise --paths (which itself defaults to 'tests').
+    if args.paths_positional:
+        # Positionals can be directories OR explicit .py files. Either is
+        # fine — _discover_files handles both via rglob('test_*.py') for
+        # dirs and direct inclusion for files.
+        roots = [repo_root / p for p in args.paths_positional]
+    else:
+        roots = [repo_root / p for p in args.paths.split(":") if p]
+
+    if args.include_integration:
+        # Caller takes responsibility — typically used via explicit -k filter.
+        global _SKIP_PARTS  # noqa: PLW0603 — config knob
+        _SKIP_PARTS = set()
+
+    files = _discover_files(roots)
+    if not files:
+        print(f"No test files discovered under {[str(r) for r in roots]}", file=sys.stderr)
+        return 1
+
+    # Count individual tests per file via a single pytest --co pass.
+    test_counts = _count_tests(files, repo_root, pytest_passthrough)
+    total_tests = sum(test_counts.values())
+
+    print(
+        f"Discovered {len(files)} test files ({total_tests} tests) under "
+        f"{[str(r.relative_to(repo_root)) if r.is_relative_to(repo_root) else str(r) for r in roots]}; "
+        f"running with -j {args.jobs}",
+        flush=True,
+    )
+
+    # Capture and print on completion (out-of-order is fine — keeps the
+    # terminal clean rather than interleaving N parallel pytest outputs).
+    failures: List[Tuple[Path, str, Dict[str, int]]] = []
+    started = time.monotonic()
+    files_done = 0
+    tests_done = 0
+    pass_count = 0
+    fail_count = 0
+    tests_passed = 0
+    tests_failed = 0
+    lock = threading.Lock()
+
+    def _on_done(file: Path, started_at: float, fut: "Future[Tuple[Path, int, str, dict[str, int]]]") -> None:
+        nonlocal files_done, tests_done, pass_count, fail_count, tests_passed, tests_failed
+        n_tests = test_counts.get(file, 0)
+        try:
+            fpath, rc, output, summary = fut.result()
+        except Exception as exc:  # noqa: BLE001 — must always advance counter
+            with lock:
+                files_done += 1
+                tests_done += n_tests
+                fail_count += 1
+                failures.append((file, f"runner crashed: {exc!r}", {}))
+                _print_progress(
+                    tests_done, total_tests, file, 1,
+                    time.monotonic() - started_at,
+                    repo_root, tests_passed, tests_failed,
+                    test_counts,
+                )
+            return
+        with lock:
+            files_done += 1
+            tests_done += n_tests
+            # Accumulate test-level counts from parsed summary.
+            tests_passed += summary.get("passed", 0)
+            tests_failed += summary.get("failed", 0)
+            if rc == 0:
+                pass_count += 1
+            else:
+                fail_count += 1
+                failures.append((fpath, output, summary))
+            _print_progress(
+                tests_done, total_tests, fpath, rc,
+                time.monotonic() - started_at,
+                repo_root, tests_passed, tests_failed,
+                test_counts,
+                file_summary=summary,
+            )
+            if rc != 0:
+                _print_inline_failure(fpath, output, repo_root, pytest_passthrough)
+
+    with ThreadPoolExecutor(max_workers=args.jobs) as pool:
+        futures: List[Future] = []
+        for file in files:
+            t0 = time.monotonic()
+            fut = pool.submit(
+                _run_one_file, file, pytest_passthrough, repo_root, args.file_timeout
+            )
+            fut.add_done_callback(lambda f, file=file, t0=t0: _on_done(file, t0, f))
+            futures.append(fut)
+        # Block until everything's done. ThreadPoolExecutor.__exit__ waits
+        # for all submitted work, but doing it explicitly here makes the
+        # control flow obvious.
+        for fut in futures:
+            fut.result() if fut.exception() is None else None
+
+    elapsed = time.monotonic() - started
+    print()
+    pct = (tests_done / total_tests * 100) if total_tests else 0
+    print(f"=== Summary: {len(files)} files, {tests_passed} tests passed, {tests_failed} failed ({pct:.0f}% complete) in {elapsed:.1f}s ({args.jobs} workers) ===")
+
+    if failures:
+        print()
+        print("=== Failure output ===")
+        for file, output, _summary in failures:
+            print()
+            print(f"--- {_format_file(file, repo_root)} ---")
+            print(output.rstrip())
+        print()
+        # Split: files with actual test failures vs non-zero exit for other reasons
+        test_fail_files = [(f, s) for f, _o, s in failures if s.get("failed", 0) > 0]
+        all_passed_but_nonzero = [(f, s) for f, _o, s in failures
+                                  if s.get("failed", 0) == 0 and s.get("passed", 0) > 0]
+        no_tests_ran = [(f, s) for f, _o, s in failures
+                        if s.get("failed", 0) == 0 and s.get("passed", 0) == 0]
+        if test_fail_files:
+            total_tf = sum(s.get("failed", 0) for _, s in test_fail_files)
+            print(f"=== {len(test_fail_files)} file{'s' if len(test_fail_files) != 1 else ''} with test failures ({total_tf} test{'s' if total_tf != 1 else ''} failed) ===")
+            for file, s in test_fail_files:
+                nf = s.get("failed", 0)
+                print(f"  {_format_file(file, repo_root)}  ({nf} test{'s' if nf != 1 else ''} failed)")
+        if all_passed_but_nonzero:
+            print(f"=== {len(all_passed_but_nonzero)} file{'s' if len(all_passed_but_nonzero) != 1 else ''} where all tests passed but pytest exited non-zero (warnings-as-errors, hook failures, etc.) ===")
+            for file, s in all_passed_but_nonzero:
+                print(f"  {_format_file(file, repo_root)}  ({s.get('passed', 0)} passed)")
+        if no_tests_ran:
+            print(f"=== {len(no_tests_ran)} file{'s' if len(no_tests_ran) != 1 else ''} where no tests ran (collection/import error, timeout before collection, etc.) ===")
+            for file, s in no_tests_ran:
+                print(f"  {_format_file(file, repo_root)}")
+        return 1
+
+    return 0
+
+
+if __name__ == "__main__":
+    sys.exit(main())
@@ -629,13 +629,12 @@
      "license": "BSD-3-Clause"
    },
    "node_modules/@protobufjs/fetch": {
-      "version": "1.1.0",
-      "resolved": "https://registry.npmjs.org/@protobufjs/fetch/-/fetch-1.1.0.tgz",
-      "integrity": "sha512-lljVXpqXebpsijW71PZaCYeIcE5on1w5DlQy5WH6GLbFryLUrBD4932W/E2BSpfRJWseIL4v/KPgBFxDOIdKpQ==",
+      "version": "1.1.1",
+      "resolved": "https://registry.npmjs.org/@protobufjs/fetch/-/fetch-1.1.1.tgz",
+      "integrity": "sha512-GpptLrs57adMSuHi3VNj0mAF8dwh36LMaYF6XyJ6JMWlVsc+t42tm1HSEDmOs3A8fC9yyeisgLhsTVQokOZ0zw==",
      "license": "BSD-3-Clause",
      "dependencies": {
-        "@protobufjs/aspromise": "^1.1.1",
-        "@protobufjs/inquire": "^1.1.0"
+        "@protobufjs/aspromise": "^1.1.1"
      }
    },
    "node_modules/@protobufjs/float": {
@@ -645,9 +644,9 @@
      "license": "BSD-3-Clause"
    },
    "node_modules/@protobufjs/inquire": {
-      "version": "1.1.1",
-      "resolved": "https://registry.npmjs.org/@protobufjs/inquire/-/inquire-1.1.1.tgz",
-      "integrity": "sha512-mnzgDV26ueAvk7rsbt9L7bE0SuAoqyuys/sMMrmVcN5x9VsxpcG3rqAUSgDyLp0UZlmNfIbQ4fHfCtreVBk8Ew==",
+      "version": "1.1.2",
+      "resolved": "https://registry.npmjs.org/@protobufjs/inquire/-/inquire-1.1.2.tgz",
+      "integrity": "sha512-pa0vFRuws4wkvaXKK1uXZMAwAX4/t8ANaJo45iw/oQHNQ9q5xUzwgFmVJGXiga2BeN+zpX7Vf9vmsiIa2J+MUw==",
      "license": "BSD-3-Clause"
    },
    "node_modules/@protobufjs/path": {
@@ -1620,9 +1619,9 @@
      "license": "MIT"
    },
    "node_modules/protobufjs": {
-      "version": "7.5.6",
-      "resolved": "https://registry.npmjs.org/protobufjs/-/protobufjs-7.5.6.tgz",
-      "integrity": "sha512-M71sTMB146U3u0di3yup8iM+zv8yPRNQVr1KK4tyBitl3qFvEGucq/rGDRShD2rsJhtN02RJaJ7j5X5hmy8SJg==",
+      "version": "7.6.0",
+      "resolved": "https://registry.npmjs.org/protobufjs/-/protobufjs-7.6.0.tgz",
+      "integrity": "sha512-LtESOsMPTZgyYtwxhvdgdjGL0HmXEaRA/hVD6sol4zA60hVXXXP/SGmxnqDbgGE8gy7pYex7cym+5vYPcmaXBQ==",
      "hasInstallScript": true,
      "license": "BSD-3-Clause",
      "dependencies": {
@@ -1630,14 +1629,14 @@
        "@protobufjs/base64": "^1.1.2",
        "@protobufjs/codegen": "^2.0.5",
        "@protobufjs/eventemitter": "^1.1.0",
-        "@protobufjs/fetch": "^1.1.0",
+        "@protobufjs/fetch": "^1.1.1",
        "@protobufjs/float": "^1.0.2",
-        "@protobufjs/inquire": "^1.1.1",
+        "@protobufjs/inquire": "^1.1.2",
        "@protobufjs/path": "^1.1.2",
        "@protobufjs/pool": "^1.1.0",
        "@protobufjs/utf8": "^1.1.1",
        "@types/node": ">=13.7.0",
-        "long": "^5.0.0"
+        "long": "^5.3.2"
      },
      "engines": {
        "node": ">=12.0.0"
@@ -2117,9 +2116,9 @@
      "license": "MIT"
    },
    "node_modules/ws": {
-      "version": "8.20.0",
-      "resolved": "https://registry.npmjs.org/ws/-/ws-8.20.0.tgz",
-      "integrity": "sha512-sAt8BhgNbzCtgGbt2OxmpuryO63ZoDk/sqaB/znQm94T4fCEsy/yV+7CdC1kJhOU9lboAEU7R3kquuycDoibVA==",
+      "version": "8.20.1",
+      "resolved": "https://registry.npmjs.org/ws/-/ws-8.20.1.tgz",
+      "integrity": "sha512-It4dO0K5v//JtTXuPkfEOaI3uUN87iYPnqo/ZzqCoG3g8uhA66QUMs/SrM0YK7/NAu+r4LMh/9dq2A7k+rHs+w==",
      "license": "MIT",
      "engines": {
        "node": ">=10.0.0"
@@ -336,7 +336,8 @@ The registry of record is `hermes_cli/commands.py` — every consumer
 ~/.hermes/config.yaml       Main configuration
 ~/.hermes/.env              API keys and secrets
 $HERMES_HOME/skills/        Installed skills
-~/.hermes/sessions/         Session transcripts
+~/.hermes/sessions/         Gateway routing index, request dumps, *.jsonl transcripts (and optional per-session JSON snapshots when sessions.write_json_snapshots: true)
+~/.hermes/state.db          Canonical session store (SQLite + FTS5)
 ~/.hermes/logs/             Gateway and error logs
 ~/.hermes/auth.json         OAuth tokens and credential pools
 ~/.hermes/hermes-agent/     Source code (if git-installed)
@@ -867,7 +868,7 @@ hermes config set auxiliary.vision.model <model_name>
 | Env variables | `hermes config env-path` or [Env vars reference](https://hermes-agent.nousresearch.com/docs/reference/environment-variables) |
 | CLI commands | `hermes --help` or [CLI reference](https://hermes-agent.nousresearch.com/docs/reference/cli-commands) |
 | Gateway logs | `~/.hermes/logs/gateway.log` |
-| Session files | `~/.hermes/sessions/` or `hermes sessions browse` |
+| Session files | `hermes sessions browse` (reads state.db) |
 | Source code | `~/.hermes/hermes-agent/` |

 ---
@@ -40,6 +40,16 @@ def _clean_env(monkeypatch):
        "ANTHROPIC_API_KEY", "ANTHROPIC_TOKEN", "CLAUDE_CODE_OAUTH_TOKEN",
    ):
        monkeypatch.delenv(key, raising=False)
+    # Module-level unhealthy cache (10-min TTL) leaks between tests;
+    # earlier tests that call _mark_provider_unhealthy() poison the
+    # cache for later ones, causing _resolve_auto to skip providers
+    # that the test patched to return valid clients.
+    import agent.auxiliary_client as _aux_mod
+    _aux_mod._aux_unhealthy_until.clear()
+    _aux_mod._aux_unhealthy_logged_at.clear()
+    yield
+    _aux_mod._aux_unhealthy_until.clear()
+    _aux_mod._aux_unhealthy_logged_at.clear()


@pytest.fixture
@@ -461,6 +471,17 @@ class TestExpiredCodexFallback:
        import base64
        import time as _time

+        # Belt-and-suspenders: _try_openrouter marks openrouter unhealthy
+        # when OPENROUTER_API_KEY is absent (which the preceding test in
+        # this class exercises).  The file-level _clean_env autouse fixture
+        # clears the cache, but fixture ordering with the conftest
+        # _hermetic_environment autouse can leave a narrow window where
+        # the mark reappears.  Explicitly clear here so this test is
+        # independent of run order.
+        import agent.auxiliary_client as _aux_mod
+        _aux_mod._aux_unhealthy_until.clear()
+        _aux_mod._aux_unhealthy_logged_at.clear()
+
        header = base64.urlsafe_b64encode(b'{"alg":"RS256","typ":"JWT"}').rstrip(b"=").decode()
        payload_data = json.dumps({"exp": int(_time.time()) - 3600}).encode()
        payload = base64.urlsafe_b64encode(payload_data).rstrip(b"=").decode()
@@ -1047,6 +1068,20 @@ class TestGetProviderChain:
 class TestTryPaymentFallback:
    """_try_payment_fallback skips the failed provider and tries alternatives."""

+    @pytest.fixture(autouse=True)
+    def _clear_unhealthy_cache(self):
+        """Earlier tests in this file call _mark_provider_unhealthy() which
+        pollutes the module-level ``_aux_unhealthy_until`` dict (10-min TTL).
+        Without this cleanup the fallback chain skips providers we've patched
+        to return valid clients — the patched function is never called.
+        """
+        from agent.auxiliary_client import _aux_unhealthy_until, _aux_unhealthy_logged_at
+        _aux_unhealthy_until.clear()
+        _aux_unhealthy_logged_at.clear()
+        yield
+        _aux_unhealthy_until.clear()
+        _aux_unhealthy_logged_at.clear()
+
    def test_skips_failed_provider(self):
        mock_client = MagicMock()
        with patch("agent.auxiliary_client._try_openrouter", return_value=(None, None)), \
@@ -0,0 +1,93 @@
+from types import SimpleNamespace
+
+from agent.agent_init import _merge_custom_provider_extra_body
+
+
+def test_custom_provider_extra_body_merges_into_request_overrides():
+    agent = SimpleNamespace(
+        provider="custom",
+        model="google/gemma-4-31b-it",
+        base_url="https://example.test/v1",
+        request_overrides={"service_tier": "priority"},
+    )
+
+    _merge_custom_provider_extra_body(
+        agent,
+        [
+            {
+                "name": "gemma",
+                "base_url": "https://example.test/v1/",
+                "model": "google/gemma-4-31b-it",
+                "extra_body": {
+                    "enable_thinking": True,
+                    "reasoning_effort": "high",
+                },
+            }
+        ],
+    )
+
+    assert agent.request_overrides == {
+        "service_tier": "priority",
+        "extra_body": {
+            "enable_thinking": True,
+            "reasoning_effort": "high",
+        },
+    }
+
+
+def test_custom_provider_extra_body_preserves_caller_override():
+    agent = SimpleNamespace(
+        provider="custom",
+        model="google/gemma-4-31b-it",
+        base_url="https://example.test/v1",
+        request_overrides={
+            "extra_body": {
+                "reasoning_effort": "low",
+                "caller_only": True,
+            }
+        },
+    )
+
+    _merge_custom_provider_extra_body(
+        agent,
+        [
+            {
+                "name": "gemma",
+                "base_url": "https://example.test/v1",
+                "model": "google/gemma-4-31b-it",
+                "extra_body": {
+                    "enable_thinking": True,
+                    "reasoning_effort": "high",
+                },
+            }
+        ],
+    )
+
+    assert agent.request_overrides["extra_body"] == {
+        "enable_thinking": True,
+        "reasoning_effort": "low",
+        "caller_only": True,
+    }
+
+
+def test_custom_provider_extra_body_ignores_other_custom_models():
+    agent = SimpleNamespace(
+        provider="custom",
+        model="other-model",
+        base_url="https://example.test/v1",
+        request_overrides={},
+    )
+
+    _merge_custom_provider_extra_body(
+        agent,
+        [
+            {
+                "name": "gemma",
+                "base_url": "https://example.test/v1",
+                "model": "google/gemma-4-31b-it",
+                "extra_body": {"enable_thinking": True},
+            }
+        ],
+    )
+
+    assert agent.request_overrides == {}
@@ -56,6 +56,7 @@ class TestFailoverReason:
            "overloaded", "server_error", "timeout",
            "context_overflow", "payload_too_large", "image_too_large",
            "model_not_found", "format_error",
+            "multimodal_tool_content_unsupported",
            "provider_policy_blocked",
            "thinking_signature", "long_context_tier",
            "oauth_long_context_beta_forbidden",
@@ -1256,3 +1257,66 @@ class TestRateLimitErrorWithoutStatusCode:
        e.status_code = None
        result = classify_api_error(e, provider="copilot", model="gpt-4o")
        assert result.reason != FailoverReason.rate_limit
+
+
+
+# ── Test: multimodal_tool_content_unsupported pattern ───────────────────
+
+class TestMultimodalToolContentUnsupported:
+    """Issue #27344 — providers that reject list-type tool message content
+    should be classified as ``multimodal_tool_content_unsupported`` so the
+    retry loop can downgrade screenshots to text and try again.
+    """
+
+    def test_xiaomi_mimo_text_is_not_set_pattern(self):
+        """The actual Xiaomi MiMo 400 wording from the bug report."""
+        e = MockAPIError(
+            "Error code: 400 - {'error': {'code': '400', 'message': 'Param Incorrect', 'param': 'text is not set', 'type': ''}}",
+            status_code=400,
+        )
+        result = classify_api_error(e, provider="xiaomi", model="mimo-v2.5")
+        assert result.reason == FailoverReason.multimodal_tool_content_unsupported
+        assert result.retryable is True
+
+    def test_generic_tool_message_must_be_string(self):
+        e = MockAPIError(
+            "tool message content must be a string",
+            status_code=400,
+        )
+        result = classify_api_error(e, provider="custom", model="some-model")
+        assert result.reason == FailoverReason.multimodal_tool_content_unsupported
+
+    def test_expected_string_got_list(self):
+        e = MockAPIError(
+            "Schema validation failed: expected string, got list",
+            status_code=400,
+        )
+        result = classify_api_error(e, provider="custom", model="some-model")
+        assert result.reason == FailoverReason.multimodal_tool_content_unsupported
+
+    def test_multimodal_tool_content_takes_priority_over_context_overflow(self):
+        """Some providers return a 400 whose message contains BOTH
+        'text is not set' and a length-shaped phrase; the tool-content
+        recovery is cheaper than compression so it must win the priority.
+        """
+        e = MockAPIError(
+            "text is not set; context length exceeded",
+            status_code=400,
+        )
+        result = classify_api_error(e, provider="xiaomi", model="mimo-v2.5")
+        assert result.reason == FailoverReason.multimodal_tool_content_unsupported
+
+    def test_no_status_code_path_also_classifies(self):
+        """When the error reaches us without a status code (transport
+        layer ate it) the message-only classifier branch must also
+        recognise the pattern.
+        """
+        e = MockTransportError("tool_call.content must be string")
+        result = classify_api_error(e, provider="alibaba", model="qwen3.5-plus")
+        assert result.reason == FailoverReason.multimodal_tool_content_unsupported
+
+    def test_unrelated_400_is_not_misclassified(self):
+        """Make sure the patterns don't false-positive on normal 400s."""
+        e = MockAPIError("bad request: missing field 'model'", status_code=400)
+        result = classify_api_error(e, provider="openrouter", model="anthropic/claude-sonnet-4")
+        assert result.reason != FailoverReason.multimodal_tool_content_unsupported
@@ -9,8 +9,11 @@ from unittest.mock import patch
 import pytest

 from agent.image_routing import (
+    _coerce_capability_bool,
    _coerce_mode,
    _explicit_aux_vision_override,
+    _lookup_supports_vision,
+    _supports_vision_override,
    build_native_content_parts,
    decide_image_input_mode,
 )
@@ -125,6 +128,168 @@ class TestDecideImageInputMode:
            assert decide_image_input_mode("xiaomi", "mimo-v2.5-pro", {}) == "text"


+# ─── _coerce_capability_bool ─────────────────────────────────────────────────
+
+
+class TestCoerceCapabilityBool:
+    def test_real_bool_passes_through(self):
+        assert _coerce_capability_bool(True) is True
+        assert _coerce_capability_bool(False) is False
+
+    def test_int_0_and_1(self):
+        assert _coerce_capability_bool(1) is True
+        assert _coerce_capability_bool(0) is False
+
+    def test_other_ints_return_none(self):
+        assert _coerce_capability_bool(2) is None
+        assert _coerce_capability_bool(-1) is None
+
+    def test_yaml_true_tokens(self):
+        for s in ("true", "TRUE", "True", "yes", "on", "1", "  true  "):
+            assert _coerce_capability_bool(s) is True
+
+    def test_yaml_false_tokens(self):
+        for s in ("false", "FALSE", "False", "no", "off", "0", "  false  "):
+            assert _coerce_capability_bool(s) is False
+
+    def test_quoted_false_does_not_silently_become_true(self):
+        # Regression: bool("false") is True in Python. A user writing
+        # supports_vision: "false" must NOT enable native vision routing.
+        assert _coerce_capability_bool("false") is False
+
+    def test_unrecognised_strings_return_none(self):
+        # None == fall through to models.dev, not a silent truthy.
+        assert _coerce_capability_bool("maybe") is None
+        assert _coerce_capability_bool("") is None
+        assert _coerce_capability_bool("definitely") is None
+
+    def test_other_types_return_none(self):
+        assert _coerce_capability_bool(None) is None
+        assert _coerce_capability_bool([]) is None
+        assert _coerce_capability_bool({}) is None
+        assert _coerce_capability_bool(1.5) is None
+
+
+# ─── _supports_vision_override ───────────────────────────────────────────────
+
+
+class TestSupportsVisionOverride:
+    def test_no_cfg_returns_none(self):
+        assert _supports_vision_override(None, "custom", "my-llava") is None
+        assert _supports_vision_override({}, "custom", "my-llava") is None
+
+    def test_top_level_shortcut_wins(self):
+        cfg = {"model": {"supports_vision": True}}
+        assert _supports_vision_override(cfg, "custom", "my-llava") is True
+
+    def test_top_level_false_propagates(self):
+        cfg = {"model": {"supports_vision": False}}
+        assert _supports_vision_override(cfg, "custom", "my-llava") is False
+
+    def test_per_provider_per_model_via_runtime_name(self):
+        cfg = {
+            "providers": {
+                "custom": {"models": {"my-llava": {"supports_vision": True}}},
+            },
+        }
+        assert _supports_vision_override(cfg, "custom", "my-llava") is True
+
+    def test_per_provider_per_model_via_config_name(self):
+        # Named custom provider — runtime self.provider == "custom", config
+        # holds the original name under model.provider.
+        cfg = {
+            "model": {"provider": "my-vllm"},
+            "providers": {
+                "my-vllm": {"models": {"my-llava": {"supports_vision": True}}},
+            },
+        }
+        assert _supports_vision_override(cfg, "custom", "my-llava") is True
+
+    def test_quoted_false_string_in_yaml_does_not_enable(self):
+        # Real-world: user writes supports_vision: "false" (quoted).
+        cfg = {"model": {"supports_vision": "false"}}
+        assert _supports_vision_override(cfg, "custom", "my-llava") is False
+
+    def test_unrecognised_value_falls_through(self):
+        cfg = {"model": {"supports_vision": "maybe"}}
+        assert _supports_vision_override(cfg, "custom", "my-llava") is None
+
+    def test_no_override_returns_none(self):
+        cfg = {"model": {"default": "my-llava"}}
+        assert _supports_vision_override(cfg, "custom", "my-llava") is None
+
+    def test_malformed_sections_are_ignored(self):
+        # User accidentally wrote a string where a section was expected —
+        # don't blow up, just fall through.
+        cfg = {"model": "some-string", "providers": ["not-a-dict"]}
+        assert _supports_vision_override(cfg, "custom", "my-llava") is None
+
+
+# ─── _lookup_supports_vision (override-aware) ────────────────────────────────
+
+
+class TestLookupSupportsVisionOverride:
+    def test_config_override_short_circuits_models_dev(self):
+        # Config says True, models.dev says None — config wins.
+        cfg = {"model": {"supports_vision": True}}
+        with patch("agent.models_dev.get_model_capabilities", return_value=None):
+            assert _lookup_supports_vision("custom", "my-llava", cfg) is True
+
+    def test_config_override_false_beats_vision_capable_models_dev(self):
+        # User explicitly disables vision on a models.dev-vision-capable model.
+        fake_caps = type("Caps", (), {"supports_vision": True})()
+        cfg = {"model": {"supports_vision": False}}
+        with patch("agent.models_dev.get_model_capabilities", return_value=fake_caps):
+            assert _lookup_supports_vision("anthropic", "claude-sonnet-4", cfg) is False
+
+    def test_no_override_falls_back_to_models_dev(self):
+        fake_caps = type("Caps", (), {"supports_vision": True})()
+        with patch("agent.models_dev.get_model_capabilities", return_value=fake_caps):
+            assert _lookup_supports_vision("anthropic", "claude-sonnet-4", {}) is True
+
+    def test_no_override_no_models_dev_entry_returns_none(self):
+        with patch("agent.models_dev.get_model_capabilities", return_value=None):
+            assert _lookup_supports_vision("custom", "my-llava", {}) is None
+
+    def test_cfg_none_falls_back_to_models_dev(self):
+        # Caller didn't pass cfg at all — old call sites must still work.
+        with patch("agent.models_dev.get_model_capabilities", return_value=None):
+            assert _lookup_supports_vision("openrouter", "x", None) is None
+
+
+# ─── decide_image_input_mode with auto + override ────────────────────────────
+
+
+class TestAutoModeRespectsOverride:
+    def test_auto_native_for_custom_with_supports_vision_true(self):
+        # The motivating bug: Qwen3.6 on local llama.cpp via provider=custom.
+        # Without the override, auto falls back to text. With it, auto picks
+        # native — no need to also set agent.image_input_mode: native.
+        cfg = {"model": {"supports_vision": True}}
+        with patch("agent.models_dev.get_model_capabilities", return_value=None):
+            assert decide_image_input_mode("custom", "qwen3.6-35b", cfg) == "native"
+
+    def test_auto_text_for_custom_with_supports_vision_false(self):
+        cfg = {"model": {"supports_vision": False}}
+        with patch("agent.models_dev.get_model_capabilities", return_value=None):
+            assert decide_image_input_mode("custom", "some-text-only", cfg) == "text"
+
+    def test_auto_text_for_custom_with_no_override(self):
+        # Unchanged baseline: unknown custom model → text.
+        with patch("agent.models_dev.get_model_capabilities", return_value=None):
+            assert decide_image_input_mode("custom", "unknown", {}) == "text"
+
+    def test_explicit_aux_vision_override_still_wins(self):
+        # If the user has configured a dedicated vision aux backend, respect
+        # it even when supports_vision: true is also set.
+        cfg = {
+            "model": {"supports_vision": True},
+            "auxiliary": {"vision": {"provider": "openrouter", "model": "gemini-2.5-pro"}},
+        }
+        with patch("agent.models_dev.get_model_capabilities", return_value=None):
+            assert decide_image_input_mode("custom", "qwen3.6-35b", cfg) == "text"
+
+
 # ─── build_native_content_parts ──────────────────────────────────────────────


@@ -1060,3 +1060,191 @@ class TestHonchoCadenceTracking:
        p.on_turn_start(2, "second message")
        should_skip = p._injection_frequency == "first-turn" and p._turn_count > 1
        assert should_skip, "Second turn (turn 2) SHOULD be skipped"
+
+
+class TestMemoryToolToolsetGate:
+    """Issue #5544: memory provider tools must respect platform_toolsets.
+
+    Before the fix, MemoryManager.get_all_tool_schemas() output was appended
+    to AIAgent.tools unconditionally in agent_init.py — bypassing the
+    enabled_toolsets filter. Result: `platform_toolsets: telegram: []`
+    still leaked fact_store and other memory tools into the tool surface,
+    causing 10x latency on local models (Qwen3-30B: 1.7s → 42s) and
+    tool-call loops on small models.
+
+    These tests mirror the gate logic in agent/agent_init.py around the
+    memory provider tool injection block. The gate condition is:
+
+        enabled_toolsets is None        → no filter, inject (backward compat)
+        "memory" in enabled_toolsets    → user opted in, inject
+        otherwise (incl. [])            → skip injection
+    """
+
+    @staticmethod
+    def _run_memory_injection(enabled_toolsets, memory_manager):
+        """Simulate the gated memory-tool injection block from agent_init.py."""
+        tools = []
+        valid_tool_names = set()
+
+        if memory_manager and tools is not None and (
+            enabled_toolsets is None or "memory" in enabled_toolsets
+        ):
+            _existing = {
+                t.get("function", {}).get("name")
+                for t in tools
+                if isinstance(t, dict)
+            }
+            for _schema in memory_manager.get_all_tool_schemas():
+                _tname = _schema.get("name", "")
+                if _tname and _tname in _existing:
+                    continue
+                tools.append({"type": "function", "function": _schema})
+                if _tname:
+                    valid_tool_names.add(_tname)
+                    _existing.add(_tname)
+
+        return tools, valid_tool_names
+
+    def _mgr_with_tools(self, *tool_names):
+        """Build a MemoryManager whose providers expose the named tool schemas."""
+        mgr = MemoryManager()
+        p = FakeMemoryProvider(
+            "ext",
+            tools=[{"name": n, "description": n, "parameters": {}} for n in tool_names],
+        )
+        mgr.add_provider(p)
+        return mgr
+
+    def test_none_toolsets_injects(self):
+        """enabled_toolsets=None (no filter) injects memory tools — backward compat."""
+        mgr = self._mgr_with_tools("fact_store")
+        tools, names = self._run_memory_injection(None, mgr)
+        assert "fact_store" in names
+        assert any(t["function"]["name"] == "fact_store" for t in tools)
+
+    def test_memory_in_toolsets_injects(self):
+        """enabled_toolsets including 'memory' injects memory tools."""
+        mgr = self._mgr_with_tools("fact_store")
+        tools, names = self._run_memory_injection(["terminal", "memory", "web"], mgr)
+        assert "fact_store" in names
+
+    def test_empty_toolsets_blocks_injection(self):
+        """`platform_toolsets: telegram: []` must suppress memory tools. (#5544)"""
+        mgr = self._mgr_with_tools("fact_store")
+        tools, names = self._run_memory_injection([], mgr)
+        assert tools == []
+        assert names == set()
+
+    def test_toolsets_without_memory_blocks_injection(self):
+        """Toolset list that doesn't name 'memory' must suppress injection."""
+        mgr = self._mgr_with_tools("fact_store")
+        tools, names = self._run_memory_injection(["terminal", "web"], mgr)
+        assert tools == []
+        assert names == set()
+
+    def test_no_memory_manager_no_injection(self):
+        """Gate is moot without a memory manager."""
+        tools, names = self._run_memory_injection(None, None)
+        assert tools == []
+
+    def test_multiple_schemas_all_blocked_together(self):
+        """When the gate is closed, no memory tools leak — not even partially."""
+        mgr = self._mgr_with_tools("fact_store", "memory_search", "memory_add")
+        tools, names = self._run_memory_injection(["terminal"], mgr)
+        assert tools == []
+        assert names == set()
+
+    def test_multiple_schemas_all_injected_when_enabled(self):
+        """When the gate is open, every memory tool schema is injected."""
+        mgr = self._mgr_with_tools("fact_store", "memory_search", "memory_add")
+        tools, names = self._run_memory_injection(None, mgr)
+        assert names == {"fact_store", "memory_search", "memory_add"}
+
+
+class TestContextEngineToolsetGate:
+    """Issue #5544 (sibling): context engine tools follow the same gate.
+
+    `agent.context_compressor.get_tool_schemas()` (e.g. lcm_grep, lcm_describe,
+    lcm_expand) was appended to AIAgent.tools unconditionally. Same blind
+    injection class as the memory bug; same local-model penalty. Gate name:
+    "context_engine" (matches the existing plugin-system convention).
+    """
+
+    @staticmethod
+    def _run_context_engine_injection(enabled_toolsets, compressor):
+        """Simulate the gated context-engine injection block from agent_init.py."""
+        tools = []
+        valid_tool_names = set()
+        engine_tool_names = set()
+
+        if (
+            compressor is not None
+            and tools is not None
+            and (
+                enabled_toolsets is None
+                or "context_engine" in enabled_toolsets
+            )
+        ):
+            _existing = {
+                t.get("function", {}).get("name")
+                for t in tools
+                if isinstance(t, dict)
+            }
+            for _schema in compressor.get_tool_schemas():
+                _tname = _schema.get("name", "")
+                if _tname and _tname in _existing:
+                    continue
+                tools.append({"type": "function", "function": _schema})
+                if _tname:
+                    valid_tool_names.add(_tname)
+                    engine_tool_names.add(_tname)
+                    _existing.add(_tname)
+
+        return tools, valid_tool_names, engine_tool_names
+
+    class _FakeCompressor:
+        def __init__(self, schemas):
+            self._schemas = schemas
+
+        def get_tool_schemas(self):
+            return list(self._schemas)
+
+    def _compressor_with(self, *tool_names):
+        return self._FakeCompressor(
+            [{"name": n, "description": n, "parameters": {}} for n in tool_names]
+        )
+
+    def test_none_toolsets_injects(self):
+        """enabled_toolsets=None injects context-engine tools — backward compat."""
+        c = self._compressor_with("lcm_grep", "lcm_describe", "lcm_expand")
+        tools, names, engine_names = self._run_context_engine_injection(None, c)
+        assert engine_names == {"lcm_grep", "lcm_describe", "lcm_expand"}
+
+    def test_context_engine_in_toolsets_injects(self):
+        """enabled_toolsets including 'context_engine' injects the tools."""
+        c = self._compressor_with("lcm_grep")
+        tools, names, engine_names = self._run_context_engine_injection(
+            ["terminal", "context_engine"], c
+        )
+        assert "lcm_grep" in engine_names
+
+    def test_empty_toolsets_blocks_injection(self):
+        """`platform_toolsets: telegram: []` must suppress context-engine tools."""
+        c = self._compressor_with("lcm_grep")
+        tools, names, engine_names = self._run_context_engine_injection([], c)
+        assert tools == []
+        assert engine_names == set()
+
+    def test_toolsets_without_context_engine_blocks_injection(self):
+        """A toolset list that doesn't name 'context_engine' suppresses injection."""
+        c = self._compressor_with("lcm_grep", "lcm_describe")
+        tools, names, engine_names = self._run_context_engine_injection(
+            ["terminal", "memory"], c
+        )
+        assert tools == []
+        assert engine_names == set()
+
+    def test_no_compressor_no_injection(self):
+        """Gate is moot without a context_compressor."""
+        tools, names, engine_names = self._run_context_engine_injection(None, None)
+        assert tools == []
@@ -444,6 +444,7 @@ class TestBuildNousSubscriptionPrompt:
                    "tts": NousFeatureState("tts", "OpenAI TTS", True, True, True, True, False, True, "OpenAI TTS"),
                    "browser": NousFeatureState("browser", "Browser automation", True, True, True, True, False, True, "Browser Use"),
                    "modal": NousFeatureState("modal", "Modal execution", False, True, False, False, False, True, "local"),
+                    "app_tools": NousFeatureState("app_tools", "App tools (500+ apps)", True, True, True, True, False, True, "Nous Subscription"),
                },
            ),
        )
@@ -468,6 +469,7 @@ class TestBuildNousSubscriptionPrompt:
                    "tts": NousFeatureState("tts", "OpenAI TTS", True, False, False, False, False, True, ""),
                    "browser": NousFeatureState("browser", "Browser automation", True, False, False, False, False, True, ""),
                    "modal": NousFeatureState("modal", "Modal execution", False, False, False, False, False, True, ""),
+                    "app_tools": NousFeatureState("app_tools", "App tools (500+ apps)", True, False, False, False, False, True, ""),
                },
            ),
        )
@@ -556,10 +556,11 @@ Generate some audio.
            raising=False,
        )

-        with patch.dict(
-            os.environ, {"HERMES_SESSION_PLATFORM": "telegram"}, clear=False
-        ):
-            with patch("tools.skills_tool.SKILLS_DIR", tmp_path):
+        with patch("tools.skills_tool.SKILLS_DIR", tmp_path):
+            from gateway.session_context import clear_session_vars, set_session_vars
+
+            tokens = set_session_vars(platform="telegram")
+            try:
                _make_skill(
                    tmp_path,
                    "test-skill",
@@ -571,6 +572,8 @@ Generate some audio.
                )
                scan_skill_commands()
                msg = build_skill_invocation_message("/test-skill", "do stuff")
+            finally:
+                clear_session_vars(tokens)

        assert msg is not None
        assert "local cli" in msg.lower()
@@ -1,6 +1,12 @@
-"""Tests for agent/skill_utils.py — extract_skill_conditions metadata handling."""
+"""Tests for agent/skill_utils.py."""

-from agent.skill_utils import extract_skill_conditions
+from unittest.mock import patch
+
+from agent.skill_utils import (
+    extract_skill_conditions,
+    iter_skill_index_files,
+    skill_matches_platform,
+)


 def test_metadata_as_dict_with_hermes():
@@ -56,3 +62,138 @@ def test_metadata_missing_entirely():
        "fallback_for_tools": [],
        "requires_tools": [],
    }
+
+
+def test_iter_skill_index_files_prunes_dependency_dirs(tmp_path):
+    real = tmp_path / "real-skill"
+    real.mkdir()
+    (real / "SKILL.md").write_text("---\nname: real-skill\n---\n", encoding="utf-8")
+
+    nested = (
+        tmp_path
+        / "bring"
+        / "scripts"
+        / ".venv"
+        / "lib"
+        / "python3.13"
+        / "site-packages"
+        / "typer"
+        / ".agents"
+        / "skills"
+        / "typer"
+    )
+    nested.mkdir(parents=True)
+    (nested / "SKILL.md").write_text("---\nname: typer\n---\n", encoding="utf-8")
+
+    node_module = (
+        tmp_path
+        / "web-skill"
+        / "node_modules"
+        / "dep"
+        / ".agents"
+        / "skills"
+        / "dep"
+    )
+    node_module.mkdir(parents=True)
+    (node_module / "SKILL.md").write_text("---\nname: dep\n---\n", encoding="utf-8")
+
+    found = list(iter_skill_index_files(tmp_path, "SKILL.md"))
+
+    assert found == [real / "SKILL.md"]
+
+
+# ── skill_matches_platform on Termux ──────────────────────────────────────
+
+
+class TestSkillMatchesPlatformTermux:
+    """Termux is Linux userland on Android. Skills tagged platforms:[linux]
+    must load there regardless of whether Python reports sys.platform as
+    "linux" (pre-3.13) or "android" (3.13+). Reported by user @LikiusInik
+    in May 2026 — only 3 built-in skills appeared on Termux because every
+    github/productivity/mlops skill is tagged platforms:[linux,macos,windows]
+    and sys.platform=="android" did not start with "linux".
+    """
+
+    def test_no_platforms_field_matches_everywhere(self):
+        # Backward-compat default — skills without a platforms tag load
+        # on any OS, Termux included.
+        with patch("agent.skill_utils.sys.platform", "android"), patch(
+            "agent.skill_utils.is_termux", return_value=True
+        ):
+            assert skill_matches_platform({}) is True
+            assert skill_matches_platform({"name": "foo"}) is True
+
+    def test_linux_skill_loads_on_termux_android_platform(self):
+        # Python 3.13+ on Termux reports sys.platform == "android".
+        fm = {"platforms": ["linux"]}
+        with patch("agent.skill_utils.sys.platform", "android"), patch(
+            "agent.skill_utils.is_termux", return_value=True
+        ):
+            assert skill_matches_platform(fm) is True
+
+    def test_linux_macos_windows_skill_loads_on_termux(self):
+        # The common "[linux, macos, windows]" tag used by github-*,
+        # productivity, mlops, etc.
+        fm = {"platforms": ["linux", "macos", "windows"]}
+        with patch("agent.skill_utils.sys.platform", "android"), patch(
+            "agent.skill_utils.is_termux", return_value=True
+        ):
+            assert skill_matches_platform(fm) is True
+
+    def test_linux_skill_loads_on_termux_linux_platform(self):
+        # Pre-3.13 Termux reports sys.platform == "linux" already — this
+        # works without the Termux escape hatch but must still pass.
+        fm = {"platforms": ["linux"]}
+        with patch("agent.skill_utils.sys.platform", "linux"), patch(
+            "agent.skill_utils.is_termux", return_value=True
+        ):
+            assert skill_matches_platform(fm) is True
+
+    def test_macos_only_skill_still_excluded_on_termux(self):
+        # macOS-only skills (apple-notes, imessage, ...) should NOT load
+        # on Termux. The Termux fallback only widens platforms:[linux,...].
+        fm = {"platforms": ["macos"]}
+        with patch("agent.skill_utils.sys.platform", "android"), patch(
+            "agent.skill_utils.is_termux", return_value=True
+        ):
+            assert skill_matches_platform(fm) is False
+
+    def test_windows_only_skill_still_excluded_on_termux(self):
+        fm = {"platforms": ["windows"]}
+        with patch("agent.skill_utils.sys.platform", "android"), patch(
+            "agent.skill_utils.is_termux", return_value=True
+        ):
+            assert skill_matches_platform(fm) is False
+
+    def test_explicit_termux_or_android_tag_matches(self):
+        # Skills can also opt in explicitly via platforms:[termux] or
+        # platforms:[android] — both should match a Termux session.
+        with patch("agent.skill_utils.sys.platform", "android"), patch(
+            "agent.skill_utils.is_termux", return_value=True
+        ):
+            assert skill_matches_platform({"platforms": ["termux"]}) is True
+            assert skill_matches_platform({"platforms": ["android"]}) is True
+
+    def test_non_termux_android_does_not_widen(self):
+        # If we're somehow on a plain Android Python (not Termux), don't
+        # silently load Linux skills — Termux is the supported environment.
+        fm = {"platforms": ["linux"]}
+        with patch("agent.skill_utils.sys.platform", "android"), patch(
+            "agent.skill_utils.is_termux", return_value=False
+        ):
+            assert skill_matches_platform(fm) is False
+
+    def test_linux_skill_on_real_linux_unaffected(self):
+        # The non-Termux Linux path must not change.
+        fm = {"platforms": ["linux"]}
+        with patch("agent.skill_utils.sys.platform", "linux"), patch(
+            "agent.skill_utils.is_termux", return_value=False
+        ):
+            assert skill_matches_platform(fm) is True
+
+    def test_macos_skill_on_real_macos_unaffected(self):
+        fm = {"platforms": ["macos"]}
+        with patch("agent.skill_utils.sys.platform", "darwin"), patch(
+            "agent.skill_utils.is_termux", return_value=False
+        ):
+            assert skill_matches_platform(fm) is True
@@ -46,6 +46,26 @@ class TestChatCompletionsBasic:
        assert "codex_reasoning_items" in msgs[0]
        assert "codex_message_items" in msgs[0]

+    def test_convert_messages_strips_tool_name(self, transport):
+        """Internal `tool_name` (used for FTS indexing in the SQLite store) is
+        not part of the OpenAI Chat Completions schema. Strict providers like
+        Moonshot/Kimi reject it with HTTP 400 'Extra inputs are not permitted'.
+        """
+        msgs = [
+            {"role": "user", "content": "hi"},
+            {"role": "assistant", "content": None,
+             "tool_calls": [{"id": "call_1", "type": "function",
+                             "function": {"name": "execute_code", "arguments": "{}"}}]},
+            {"role": "tool", "tool_call_id": "call_1", "tool_name": "execute_code",
+             "content": "result"},
+        ]
+        result = transport.convert_messages(msgs)
+        assert "tool_name" not in result[2]
+        assert result[2]["content"] == "result"
+        assert result[2]["tool_call_id"] == "call_1"
+        # Original list untouched (deepcopy-on-demand)
+        assert msgs[2]["tool_name"] == "execute_code"
+

 class TestChatCompletionsBuildKwargs:

@@ -196,14 +196,13 @@ class TestCodexBuildKwargs:
        )
        # xAI Responses receives reasoning.effort on the allowlisted models.
        assert kw.get("reasoning") == {"effort": "high"}
-        # As of May 2026 we deliberately do NOT request
-        # reasoning.encrypted_content back from xAI — the OAuth/SuperGrok
-        # surface rejects replayed encrypted reasoning items on turn 2+
-        # (the multi-turn "Expected to have received response.created
-        # before error" failure).  Grok still reasons natively each turn;
-        # we just don't try to thread the prior turn's encrypted blob back
-        # in.  See tests/run_agent/test_codex_xai_oauth_recovery.py.
-        assert "reasoning.encrypted_content" not in kw.get("include", [])
+        # As of May 2026 (post-revert of PR #26644) we DO request
+        # reasoning.encrypted_content back from xAI so we can replay it
+        # across turns for cross-turn coherence — xAI explicitly relies
+        # on this for their partnership integration.  See
+        # tests/run_agent/test_codex_xai_oauth_recovery.py for the
+        # full history.
+        assert "reasoning.encrypted_content" in kw.get("include", [])

    def test_xai_reasoning_disabled_no_reasoning_key(self, transport):
        messages = [{"role": "user", "content": "Hi"}]
@@ -229,9 +228,9 @@ class TestCodexBuildKwargs:
    # api.x.ai 400s with "Model X does not support parameter reasoningEffort"
    # on grok-4 / grok-4-fast / grok-3 / grok-code-fast / grok-4.20-0309-*.
    # Those models reason natively but don't expose the dial. The transport
-    # must omit the `reasoning` key for them.  As of May 2026 we also no
-    # longer request ``reasoning.encrypted_content`` back from xAI on ANY
-    # model — see test_xai_reasoning_effort_passed for the rationale.
+    # must omit the `reasoning` key for them.  As of May 2026 we DO request
+    # ``reasoning.encrypted_content`` back from xAI on every model —
+    # see test_xai_reasoning_effort_passed for the rationale.

    def test_xai_grok_4_omits_reasoning_effort(self, transport):
        """grok-4 / grok-4-0709 reject reasoning.effort with HTTP 400."""
@@ -245,9 +244,9 @@ class TestCodexBuildKwargs:
            assert "reasoning" not in kw, (
                f"{model} must not receive a reasoning key (xAI rejects it)"
            )
-            # We no longer ask xAI for encrypted_content back (see comment
-            # above) — verify the include list is empty.
-            assert "reasoning.encrypted_content" not in kw.get("include", [])
+            # Even without the effort dial we still ask xAI to echo back
+            # encrypted reasoning content so it can be replayed next turn.
+            assert "reasoning.encrypted_content" in kw.get("include", [])

    def test_xai_grok_4_fast_omits_reasoning_effort(self, transport):
        """grok-4-fast and grok-4-1-fast variants reject reasoning.effort."""
@@ -160,30 +160,6 @@ class TestBranchCommandCLI:
        assert agent.reset_session_state.called
        assert agent._last_flushed_db_idx == 4  # len(conversation_history)

-    def test_branch_updates_agent_session_log_file(self, cli_instance, session_db, tmp_path):
-        """Branching must redirect the agent's session_log_file to the new session's path."""
-        from cli import HermesCLI
-        from pathlib import Path
-
-        logs_dir = tmp_path / "sessions"
-        logs_dir.mkdir()
-
-        agent = MagicMock()
-        agent._last_flushed_db_idx = 0
-        agent.logs_dir = logs_dir
-        agent.session_log_file = logs_dir / f"session_{cli_instance.session_id}.json"
-        cli_instance.agent = agent
-
-        old_log_file = agent.session_log_file
-        HermesCLI._handle_branch_command(cli_instance, "/branch")
-
-        new_session_id = cli_instance.session_id
-        expected_log = logs_dir / f"session_{new_session_id}.json"
-        assert agent.session_log_file == expected_log, (
-            "session_log_file must point to the branch session, not the original"
-        )
-        assert agent.session_log_file != old_log_file
-
    def test_branch_sets_resumed_flag(self, cli_instance, session_db):
        """Branch should set _resumed=True to prevent auto-title generation."""
        from cli import HermesCLI
@@ -20,12 +20,9 @@ test runner at ``scripts/run_tests.sh``.
 """

 import asyncio
-import logging
 import os
 import re
-import signal
 import sys
-import tempfile
 from pathlib import Path
 from unittest.mock import patch

@@ -37,6 +34,22 @@ if str(PROJECT_ROOT) not in sys.path:
    sys.path.insert(0, str(PROJECT_ROOT))


+# ── Per-file process isolation ──────────────────────────────────────────────
+# Tests run via ``scripts/run_tests_parallel.py``, which spawns a fresh
+# ``python -m pytest <file>`` subprocess per test file. Cross-file state
+# leakage (module-level dicts, ContextVars, caches) is impossible: each
+# file gets a clean Python interpreter. Intra-file ordering is the test
+# author's responsibility — if test A in foo.py mutates state that test B
+# in foo.py reads, that's a real bug to fix in the file (it would also
+# bite anyone running ``pytest tests/foo.py`` directly).
+#
+# This replaces the historic _reset_module_state autouse fixture (manual
+# state clearing) and the brief experiment with subprocess-per-test
+# isolation (too slow at ~17k tests).
+#
+# See ``scripts/run_tests_parallel.py`` for the runner.
+
+
 # ── Credential env-var filter ──────────────────────────────────────────────
 #
 # Any env var in the current process matching ONE of these patterns is
@@ -279,7 +292,7 @@ _HERMES_BEHAVIORAL_VARS = frozenset({
    "WECOM_HOME_CHANNEL_NAME",
    # Platform gating — set by load_gateway_config() as a side effect when
    # a config.yaml is present, so individual test bodies that call the
-    # loader leak these values into later tests on the same xdist worker.
+    # loader leak these values into later tests in the same process.
    # Force-clear on every test setup so the leak can't happen.
    "SLACK_REQUIRE_MENTION",
    "SLACK_STRICT_MENTION",
@@ -368,144 +381,21 @@ def _isolate_hermes_home(_hermetic_environment):
    return None


-# ── Module-level state reset ───────────────────────────────────────────────
+# ── Module-level state reset — replaced by per-file process isolation ──────
 #
-# Python modules are singletons per process, and pytest-xdist workers are
-# long-lived. Module-level dicts/sets (tool registries, approval state,
-# interrupt flags) and ContextVars persist across tests in the same worker,
-# causing tests that pass alone to fail when run with siblings.
+# Each test FILE runs in a freshly-spawned ``python -m pytest <file>``
+# subprocess via ``scripts/run_tests_parallel.py``, so module-level dicts /
+# sets / ContextVars from tests in one file cannot leak into tests in
+# another file. No manual per-module clearing needed.
 #
-# Each entry in this fixture clears state that belongs to a specific module.
-# New state buckets go here too — this is the single gate that prevents
-# "works alone, flakes in CI" bugs from state leakage.
+# Within a single file, ordering is the author's responsibility. If your
+# tests in the same file share mutable state, either reset it explicitly
+# in a fixture or split them across files.
 #
-# The skill `test-suite-cascade-diagnosis` documents the concrete patterns
-# this closes; the running example was `test_command_guards` failing 12/15
-# CI runs because ``tools.approval._session_approved`` carried approvals
-# from one test's session into another's.
-
-@pytest.fixture(autouse=True)
-def _reset_module_state():
-    """Clear module-level mutable state and ContextVars between tests.
-
-    Keeps state from leaking across tests on the same xdist worker. Modules
-    that don't exist yet (test collection before production import) are
-    skipped silently — production import later creates fresh empty state.
-    """
-    # --- logging — quiet/one-shot paths mutate process-global logger state ---
-    logging.disable(logging.NOTSET)
-    for _logger_name in ("tools", "run_agent", "trajectory_compressor", "cron", "hermes_cli"):
-        _logger = logging.getLogger(_logger_name)
-        _logger.disabled = False
-        _logger.setLevel(logging.NOTSET)
-        _logger.propagate = True
-
-    # --- tools.approval — the single biggest source of cross-test pollution ---
-    try:
-        from tools import approval as _approval_mod
-        _approval_mod._session_approved.clear()
-        _approval_mod._session_yolo.clear()
-        _approval_mod._permanent_approved.clear()
-        _approval_mod._pending.clear()
-        _approval_mod._gateway_queues.clear()
-        _approval_mod._gateway_notify_cbs.clear()
-        # ContextVar: reset to empty string so get_current_session_key()
-        # falls through to the env var / default path, matching a fresh
-        # process.
-        _approval_mod._approval_session_key.set("")
-    except Exception:
-        pass
-
-    # --- tools.interrupt — per-thread interrupt flag set ---
-    try:
-        from tools import interrupt as _interrupt_mod
-        with _interrupt_mod._lock:
-            _interrupt_mod._interrupted_threads.clear()
-    except Exception:
-        pass
-
-    # --- gateway.session_context — 9 ContextVars that represent
-    #     the active gateway session. If set in one test and not reset,
-    #     the next test's get_session_env() reads stale values.
-    try:
-        from gateway import session_context as _sc_mod
-        for _cv in (
-            _sc_mod._SESSION_PLATFORM,
-            _sc_mod._SESSION_CHAT_ID,
-            _sc_mod._SESSION_CHAT_NAME,
-            _sc_mod._SESSION_THREAD_ID,
-            _sc_mod._SESSION_USER_ID,
-            _sc_mod._SESSION_USER_NAME,
-            _sc_mod._SESSION_KEY,
-            _sc_mod._CRON_AUTO_DELIVER_PLATFORM,
-            _sc_mod._CRON_AUTO_DELIVER_CHAT_ID,
-            _sc_mod._CRON_AUTO_DELIVER_THREAD_ID,
-        ):
-            _cv.set(_sc_mod._UNSET)
-    except Exception:
-        pass
-
-    # --- tools.env_passthrough — ContextVar<set[str]> with no default ---
-    # LookupError is normal if the test never set it. Setting it to an
-    # empty set unconditionally normalizes the starting state.
-    try:
-        from tools import env_passthrough as _envp_mod
-        _envp_mod._allowed_env_vars_var.set(set())
-    except Exception:
-        pass
-
-    # --- tools.terminal_tool — active environment/cwd cache ---
-    # File tools prefer a live terminal cwd when one is cached for the task.
-    # Clear terminal environments between tests so a prior terminal call can't
-    # override TERMINAL_CWD in path-resolution tests.
-    try:
-        from tools import terminal_tool as _term_mod
-        _envs_to_cleanup = []
-        with _term_mod._env_lock:
-            _envs_to_cleanup = list(_term_mod._active_environments.values())
-            _term_mod._active_environments.clear()
-            _term_mod._last_activity.clear()
-            _term_mod._creation_locks.clear()
-        for _env in _envs_to_cleanup:
-            try:
-                _env.cleanup()
-            except Exception:
-                pass
-    except Exception:
-        pass
-
-    # --- tools.credential_files — ContextVar<dict> ---
-    try:
-        from tools import credential_files as _credf_mod
-        _credf_mod._registered_files_var.set({})
-    except Exception:
-        pass
-
-    # --- agent.auxiliary_client — runtime main provider/model override and
-    #     payment-error health cache. Both are process-global in production;
-    #     reset them per test so one worker's fallback/402 test does not make
-    #     later auxiliary-client tests skip otherwise-available providers.
-    try:
-        from agent import auxiliary_client as _aux_mod
-        _aux_mod.clear_runtime_main()
-        _aux_mod._reset_aux_unhealthy_cache()
-    except Exception:
-        pass
-
-    # --- tools.file_tools — per-task read history + file-ops cache ---
-    # _read_tracker accumulates per-task_id read history for loop detection,
-    # capped by _READ_HISTORY_CAP. If entries from a prior test persist, the
-    # cap is hit faster than expected and capacity-related tests flake.
-    try:
-        from tools import file_tools as _ft_mod
-        with _ft_mod._read_tracker_lock:
-            _ft_mod._read_tracker.clear()
-        with _ft_mod._file_ops_lock:
-            _ft_mod._file_ops_cache.clear()
-    except Exception:
-        pass
-
-    yield
+# The skill ``test-suite-cascade-diagnosis`` documents the cascade patterns
+# this replaces; the running example was ``test_command_guards`` failing
+# 12/15 CI runs because ``tools.approval._session_approved`` carried
+# approvals from one test's session into another's.


@pytest.fixture()
@@ -532,13 +422,12 @@ def mock_config():
    }


-# ── Global test timeout ─────────────────────────────────────────────────────
-# Kill any individual test that takes longer than 30 seconds.
-# Prevents hanging tests (subprocess spawns, blocking I/O) from stalling the
-# entire test suite.
+# ── Per-test timeout — handled by the isolation plugin ─────────────────────
+#
+# The subprocess-per-test plugin enforces the configured ``isolate_timeout``
+# ini key by terminating the child if it overruns. The old SIGALRM-based
+# fixture (POSIX-only, didn't work on Windows) is gone.

-def _timeout_handler(signum, frame):
-    raise TimeoutError("Test exceeded 30 second timeout")

@pytest.fixture(autouse=True)
 def _ensure_current_event_loop(request):
@@ -584,45 +473,6 @@ def _ensure_current_event_loop(request):
                asyncio.set_event_loop(None)


-@pytest.fixture(autouse=True)
-def _enforce_test_timeout():
-    """Kill any individual test that takes longer than 30 seconds.
-    SIGALRM is Unix-only; skip on Windows."""
-    if sys.platform == "win32":
-        yield
-        return
-    old = signal.signal(signal.SIGALRM, _timeout_handler)
-    signal.alarm(30)
-    yield
-    signal.alarm(0)
-    signal.signal(signal.SIGALRM, old)
-
-
-@pytest.fixture(autouse=True)
-def _reset_tool_registry_caches():
-    """Clear tool-registry-level caches between tests.
-
-    The production registry caches ``check_fn()`` results for 30 s
-    (see tools/registry.py) and :func:`get_tool_definitions` memoizes
-    its result (see model_tools.py). Both are keyed on state that tests
-    routinely mutate (env vars, registry._generation, config.yaml mtime)
-    — but a stale result from test A can still be served to test B
-    because 30 s covers the entire suite, and xdist worker reuse means
-    one test's cache lands in another's process. Clearing before every
-    test keeps hermetic behavior.
-    """
-    try:
-        from tools.registry import invalidate_check_fn_cache
-        invalidate_check_fn_cache()
-    except ImportError:
-        pass
-    try:
-        from model_tools import _clear_tool_defs_cache
-        _clear_tool_defs_cache()
-    except ImportError:
-        pass
-
-
 # ── Live-system guard ──────────────────────────────────────────────────────
 #
 # Several test files exercise the gateway-restart / kill code paths
@@ -74,7 +74,6 @@ class _Codex401ThenSuccessAgent(run_agent.AIAgent):
        self._cleanup_task_resources = lambda task_id: None
        self._persist_session = lambda messages, history=None: None
        self._save_trajectory = lambda messages, user_message, completed: None
-        self._save_session_log = lambda messages: None

    def _try_refresh_codex_client_credentials(self, *, force: bool = True) -> bool:
        type(self).refresh_attempts += 1
@@ -313,19 +313,30 @@ def _scan_for_plugin_adapter_antipattern(source: str) -> list[str]:
    return offenses


-def pytest_configure(config):
-    """Reject plugin-adapter tests that use the sys.path anti-pattern.
+def _fingerprint_gateway_tests() -> str:
+    """Return a short fingerprint that changes when any gateway test file changes.

-    Runs once per pytest session on the controller, BEFORE any xdist
-    worker is spawned. If any file under ``tests/gateway/`` matches the
-    anti-pattern, we fail the whole session with a clear message —
-    before a polluted ``sys.path`` can cascade across workers.
+    Uses (mtime, size) pairs instead of content hashing — fast to compute
+    (stat-only, no reads) and sufficient for cache invalidation across
+    per-file subprocess runs.
    """
-    # Only run on the xdist controller (or in non-xdist runs). Skip on
-    # worker subprocesses so we don't scan the filesystem N times.
-    if hasattr(config, "workerinput"):
-        return
+    import hashlib

+    h = hashlib.sha256()
+    for path in sorted(_GATEWAY_DIR.rglob("test_*.py")):
+        try:
+            st = path.stat()
+            h.update(f"{path.name}:{st.st_mtime_ns}:{st.st_size}".encode())
+        except OSError:
+            h.update(f"{path.name}:missing".encode())
+    return h.hexdigest()[:16]
+
+
+def _run_adapter_antipattern_scan() -> list[str]:
+    """Scan gateway test files for the plugin-adapter anti-pattern.
+
+    Returns a list of violation strings (empty if clean).
+    """
    violations: list[str] = []
    for path in _GATEWAY_DIR.rglob("test_*.py"):
        if path.name in {"_plugin_adapter_loader.py", "conftest.py"}:
@@ -334,20 +345,108 @@ def pytest_configure(config):
            source = path.read_text(encoding="utf-8")
        except OSError:
            continue
+        # Fast string pre-filter: skip files that can't possibly violate.
+        # A violating file MUST contain both (a) an adapter/plugins/platforms
+        # reference AND (b) either sys.path manipulation or a bare adapter import.
        if "adapter" not in source and "plugins/platforms" not in source:
            continue
+        if not (
+            "sys.path" in source
+            or "import adapter" in source
+            or "from adapter import" in source
+        ):
+            continue
        offenses = _scan_for_plugin_adapter_antipattern(source)
        if offenses:
            violations.append(
                f"  {path.relative_to(_GATEWAY_DIR.parent.parent)}:\n    "
                + "\n    ".join(offenses)
            )
+    return violations

-    if violations:
-        raise pytest.UsageError(
-            "Plugin-adapter-import anti-pattern detected in gateway tests:\n"
-            + "\n".join(violations)
-            + "\n\n"
-            + _GUARD_HINT
-        )
+
+def pytest_configure(config):
+    """Reject plugin-adapter tests that use the sys.path anti-pattern.
+
+    Runs once per pytest session on the controller, BEFORE any xdist
+    worker is spawned. If any file under ``tests/gateway/`` matches the
+    anti-pattern, we fail the whole session with a clear message —
+    before a polluted ``sys.path`` can cascade across workers.
+
+    **Performance**: in the per-file subprocess isolation model (no xdist),
+    every subprocess is a "controller" — so the naive scan would run 257
+    times, each costing ~1s of AST walking.  We avoid this with two
+    strategies:
+
+    1. **Tight string pre-filter**: a file can only violate if it contains
+       *both* an adapter/plugins/platforms reference *and* a sys.path
+       manipulation or bare ``import adapter``.  This drops ~95% of files
+       from needing AST parsing.
+    2. **File-locked cache**: the scan result is cached in
+       ``.pytest-cache/gw-adapter-guard-<fingerprint>`` keyed on a
+       fingerprint of the gateway test file mtimes/sizes.  Concurrent
+       subprocesses acquire a lock; only the first performs the scan;
+       the rest wait and read the cached result.
+    """
+    # Only run on the xdist controller (or in non-xdist runs). Skip on
+    # worker subprocesses so we don't scan the filesystem N times.
+    if hasattr(config, "workerinput"):
+        return
+
+    fp = _fingerprint_gateway_tests()
+    cache_dir = Path.cwd() / ".pytest-cache"
+    cache_file = cache_dir / f"gw-adapter-guard-{fp}"
+    lock_file = cache_dir / f".gw-adapter-guard-{fp}.lock"
+
+    cache_dir.mkdir(parents=True, exist_ok=True)
+
+    # Evict stale cache entries from previous fingerprints (best-effort).
+    try:
+        for old in cache_dir.glob("gw-adapter-guard-*"):
+            if old.name != f"gw-adapter-guard-{fp}":
+                old.unlink(missing_ok=True)
+        for old in cache_dir.glob(".gw-adapter-guard-*.lock"):
+            if old.name != f".gw-adapter-guard-{fp}.lock":
+                old.unlink(missing_ok=True)
+    except OSError:
+        pass  # Non-critical; old files are harmless.
+
+    # Use filelock to ensure only one process scans at a time.
+    # Concurrent subprocesses all hit pytest_configure simultaneously;
+    # without a lock they'd all find no cache and all run the scan.
+    try:
+        from filelock import FileLock
+        lock = FileLock(str(lock_file), timeout=120)
+    except ImportError:
+        # Fallback: no locking (still correct, just slower under contention).
+        import contextlib
+
+        class _NoLock:
+            def __enter__(self):
+                return self
+            def __exit__(self, *a):
+                pass
+        lock = _NoLock()
+
+    with lock:
+        if cache_file.exists():
+            cached = cache_file.read_text(encoding="utf-8")
+            if cached == "clean":
+                return
+            raise pytest.UsageError(cached)
+
+        # Slow path: this process is the first to acquire the lock.
+        violations = _run_adapter_antipattern_scan()
+
+        if violations:
+            msg = (
+                "Plugin-adapter-import anti-pattern detected in gateway tests:\n"
+                + "\n".join(violations)
+                + "\n\n"
+                + _GUARD_HINT
+            )
+            cache_file.write_text(msg, encoding="utf-8")
+            raise pytest.UsageError(msg)
+        else:
+            cache_file.write_text("clean", encoding="utf-8")

@@ -1,31 +1,88 @@
-"""Yuanbao recall: branch A (content-match) works against DB-only transcripts."""
+"""Yuanbao recall: branch A1 (exact id) and A2 (content-match) against DB-only transcripts.
+
+state.db persists the platform-side ``message_id`` via the
+``platform_message_id`` column (added in the salvage of PR #29211) and
+``load_transcript`` surfaces it back on each message dict as ``message_id``
+— so the recall guard's exact-id match path stays canonical even with the
+JSONL file gone.  When a row has no platform id (e.g. agent-processed
+@bot messages whose adapter didn't carry a msg_id, or pre-column legacy
+rows), recall falls through to content-match.
+"""
 from gateway.session import SessionStore
 from gateway.config import GatewayConfig


-def test_recall_content_match_finds_target_in_db_transcript(tmp_path, monkeypatch):
-    """state.db doesn't preserve message_id, so recall uses content-match.
-
-    Pin DEFAULT_DB_PATH to tmp_path so SessionDB() can't write to the real
-    ~/.hermes/state.db. (Module-level constant snapshot, see test_load_transcript_db_only.)
-    """
+def _pin_db(monkeypatch, tmp_path):
+    """Force SessionDB() to write into tmp_path instead of the real ~/.hermes."""
    import hermes_state
    monkeypatch.setattr(hermes_state, "DEFAULT_DB_PATH", tmp_path / "state.db")

+
+def test_recall_branch_a1_exact_id_match_round_trips_through_db(tmp_path, monkeypatch):
+    """A user message persisted with ``message_id`` must round-trip through
+    state.db so recall can find and redact it by exact id (branch A1)."""
+    _pin_db(monkeypatch, tmp_path)
+
    config = GatewayConfig()
    store = SessionStore(sessions_dir=tmp_path, config=config)

-    sid = "test-yuanbao-recall"
+    sid = "test-yuanbao-recall-a1"
    store._db.create_session(session_id=sid, source="yuanbao:group:G")
-    store.append_to_transcript(sid, {"role": "user", "content": "sensitive content", "timestamp": 1.0})
-    store.append_to_transcript(sid, {"role": "assistant", "content": "ack", "timestamp": 2.0})
+    store.append_to_transcript(sid, {
+        "role": "user",
+        "content": "sensitive content",
+        "timestamp": 1.0,
+        "message_id": "platform-msg-abc",
+    })
+    store.append_to_transcript(sid, {
+        "role": "assistant",
+        "content": "ack",
+        "timestamp": 2.0,
+    })

-    # DB-only history carries no platform message_id (PR #29211 dropped that path).
    history = store.load_transcript(sid)
-    assert all("message_id" not in msg for msg in history)
+    # The user row must carry its platform id back so the recall guard can
+    # match by exact id; the assistant row had no platform id so it should
+    # not gain one spuriously.
+    user_msg = next(m for m in history if m["role"] == "user")
+    assistant_msg = next(m for m in history if m["role"] == "assistant")
+    assert user_msg.get("message_id") == "platform-msg-abc"
+    assert "message_id" not in assistant_msg

-    # Branch A: content match finds the target row that recall would redact.
-    target = next((m for m in history
-                   if m.get("role") == "user" and m.get("content") == "sensitive content"), None)
+    # Branch A1: locate the row by exact platform id — no content heuristics.
+    target = next(
+        (m for m in history if m.get("message_id") == "platform-msg-abc"),
+        None,
+    )
+    assert target is not None
+    assert target["content"] == "sensitive content"
+
+
+def test_recall_branch_a2_content_match_when_no_platform_id(tmp_path, monkeypatch):
+    """Rows that lack a platform_message_id (e.g. agent-processed @bot
+    messages) still match by content as a fallback."""
+    _pin_db(monkeypatch, tmp_path)
+
+    config = GatewayConfig()
+    store = SessionStore(sessions_dir=tmp_path, config=config)
+
+    sid = "test-yuanbao-recall-a2"
+    store._db.create_session(session_id=sid, source="yuanbao:group:G")
+    # No message_id on the dict — simulates an agent-processed message
+    # that did not carry the platform msg_id through.
+    store.append_to_transcript(sid, {
+        "role": "user",
+        "content": "sensitive content",
+        "timestamp": 1.0,
+    })
+
+    history = store.load_transcript(sid)
+    assert all("message_id" not in m for m in history)
+
+    # Branch A2: content match recovers the target.
+    target = next(
+        (m for m in history
+         if m.get("role") == "user" and m.get("content") == "sensitive content"),
+        None,
+    )
    assert target is not None
-    # Caller would then redact: target["content"] = REDACTED; store.rewrite_transcript(sid, history)
@@ -22,19 +22,26 @@ from gateway.config import PlatformConfig


 def _ensure_telegram_mock():
-    if "telegram" in sys.modules and hasattr(sys.modules["telegram"], "__file__"):
-        return
-
    telegram_mod = MagicMock()
    telegram_mod.ext.ContextTypes.DEFAULT_TYPE = type(None)
-    telegram_mod.constants.ParseMode.MARKDOWN_V2 = "MarkdownV2"
-    telegram_mod.constants.ChatType.GROUP = "group"
-    telegram_mod.constants.ChatType.SUPERGROUP = "supergroup"
-    telegram_mod.constants.ChatType.CHANNEL = "channel"
-    telegram_mod.constants.ChatType.PRIVATE = "private"

-    for name in ("telegram", "telegram.ext", "telegram.constants", "telegram.request"):
-        sys.modules.setdefault(name, telegram_mod)
+    # Register telegram.constants as a separate module mock so that
+    # ``from telegram.constants import ChatType`` resolves to our mock
+    # with string-valued members (not auto-generated MagicMocks).
+    constants_mod = MagicMock()
+    constants_mod.ParseMode.MARKDOWN_V2 = "MarkdownV2"
+    constants_mod.ChatType.GROUP = "group"
+    constants_mod.ChatType.SUPERGROUP = "supergroup"
+    constants_mod.ChatType.CHANNEL = "channel"
+    constants_mod.ChatType.PRIVATE = "private"
+
+    sys.modules["telegram"] = telegram_mod
+    sys.modules["telegram.ext"] = telegram_mod.ext
+    sys.modules["telegram.constants"] = constants_mod
+    sys.modules["telegram.request"] = telegram_mod.request
+
+    # Force reimport so the adapter picks up the mock ChatType.
+    sys.modules.pop("gateway.platforms.telegram", None)


 _ensure_telegram_mock()
--- a/Show More
+++ b/Show More