refactor: unify transport dispatch + collapse normalize shims

Consolidate 4 per-transport lazy singleton helpers (_get_anthropic_transport, _get_codex_transport, _get_chat_completions_transport, _get_bedrock_transport) into one generic _get_transport(api_mode) with a shared dict cache. Collapse the 65-line main normalize block (3 api_mode branches, each with its own SimpleNamespace shim) into 7 lines: one _get_transport() call + one _nr_to_assistant_message() shared shim. The shim extracts provider_data fields (codex_reasoning_items, reasoning_details, call_id, response_item_id) into the SimpleNamespace shape downstream code expects. Wire chat_completions and bedrock_converse normalize through their transports for the first time — these were previously falling into the raw response.choices[0].message else branch. Remove 8 dead codex adapter imports that have zero callers after PRs 1-6. Transport lifecycle improvements: - Eagerly warm transport cache at __init__ (surfaces import errors early) - Invalidate transport cache on api_mode change (switch_model, fallback activation, fallback restore, transport recovery) — prevents stale transport after mid-session provider switch run_agent.py: -32 net lines (11,988 -> 11,956). PR 7 of the provider transport refactor.
feat(plugins): pluggable image_gen backends + OpenAI provider (#13799 )
2026-04-22 14:30:09 +05:30 · 2026-04-21 21:30:10 -07:00 · 2026-04-21 21:27:40 -07:00 · 2026-04-21 21:19:14 -07:00 · 2026-04-21 21:10:20 -07:00 · 2026-04-21 20:58:37 -07:00
386 changed files with 35090 additions and 3660 deletions
@@ -0,0 +1,8 @@
+name: 'Setup Nix'
+description: 'Install Nix with DeterminateSystems and enable magic-nix-cache'
+
+runs:
+  using: composite
+  steps:
+    - uses: DeterminateSystems/nix-installer-action@ef8a148080ab6020fd15196c2084a2eea5ff2d25 # v22
+    - uses: DeterminateSystems/magic-nix-cache-action@565684385bcd71bad329742eefe8d12f2e765b39 # v13
@@ -0,0 +1,68 @@
+name: Nix Lockfile Check
+
+on:
+  pull_request:
+  workflow_dispatch:
+
+permissions:
+  contents: read
+  pull-requests: write
+
+concurrency:
+  group: nix-lockfile-check-${{ github.ref }}
+  cancel-in-progress: true
+
+jobs:
+  check:
+    runs-on: ubuntu-latest
+    timeout-minutes: 20
+    steps:
+      - uses: actions/checkout@34e114876b0b11c390a56381ad16ebd13914f8d5  # v4
+
+      - uses: ./.github/actions/nix-setup
+
+      - name: Resolve head SHA
+        id: sha
+        shell: bash
+        run: |
+          FULL="${{ github.event.pull_request.head.sha || github.sha }}"
+          echo "full=$FULL" >> "$GITHUB_OUTPUT"
+          echo "short=${FULL:0:7}" >> "$GITHUB_OUTPUT"
+
+      - name: Check lockfile hashes
+        id: check
+        continue-on-error: true
+        env:
+          LINK_SHA: ${{ steps.sha.outputs.full }}
+        run: nix run .#fix-lockfiles -- --check
+
+      - name: Post sticky PR comment (stale)
+        if: steps.check.outputs.stale == 'true' && github.event_name == 'pull_request'
+        uses: marocchino/sticky-pull-request-comment@52423e01640425a022ef5fd42c6fb5f633a02728  # v2.9.1
+        with:
+          header: nix-lockfile-check
+          message: |
+            ### ⚠️ npm lockfile hash out of date
+
+            Checked against commit [`${{ steps.sha.outputs.short }}`](${{ github.server_url }}/${{ github.repository }}/commit/${{ steps.sha.outputs.full }}) (PR head at check time).
+
+            The `hash = "sha256-..."` line in these nix files no longer matches the committed `package-lock.json`:
+
+            ${{ steps.check.outputs.report }}
+
+            #### Apply the fix
+
+            - [ ] **Apply lockfile fix** — tick to push a commit with the correct hashes to this PR branch
+            - Or [run the Nix Lockfile Fix workflow](${{ github.server_url }}/${{ github.repository }}/actions/workflows/nix-lockfile-fix.yml) manually (pass PR `#${{ github.event.pull_request.number }}`)
+            - Or locally: `nix run .#fix-lockfiles -- --apply` and commit the diff
+
+      - name: Clear sticky PR comment (resolved)
+        if: steps.check.outputs.stale == 'false' && github.event_name == 'pull_request'
+        uses: marocchino/sticky-pull-request-comment@52423e01640425a022ef5fd42c6fb5f633a02728  # v2.9.1
+        with:
+          header: nix-lockfile-check
+          delete: true
+
+      - name: Fail if stale
+        if: steps.check.outputs.stale == 'true'
+        run: exit 1
@@ -0,0 +1,149 @@
+name: Nix Lockfile Fix
+
+on:
+  workflow_dispatch:
+    inputs:
+      pr_number:
+        description: 'PR number to fix (leave empty to run on the selected branch)'
+        required: false
+        type: string
+  issue_comment:
+    types: [edited]
+
+permissions:
+  contents: write
+  pull-requests: write
+
+concurrency:
+  group: nix-lockfile-fix-${{ github.event.issue.number || github.event.inputs.pr_number || github.ref }}
+  cancel-in-progress: false
+
+jobs:
+  fix:
+    # Run on manual dispatch OR when a task-list checkbox in the sticky
+    # lockfile-check comment flips from `[ ]` to `[x]`.
+    if: |
+      github.event_name == 'workflow_dispatch' ||
+      (github.event_name == 'issue_comment'
+       && github.event.issue.pull_request != null
+       && contains(github.event.comment.body, '[x] **Apply lockfile fix**')
+       && !contains(github.event.changes.body.from, '[x] **Apply lockfile fix**'))
+    runs-on: ubuntu-latest
+    timeout-minutes: 25
+    steps:
+      - name: Authorize & resolve PR
+        id: resolve
+        uses: actions/github-script@60a0d83039c74a4aee543508d2ffcb1c3799cdea  # v7.0.1
+        with:
+          script: |
+            // 1. Verify the actor has write access — applies to both checkbox
+            //    clicks and manual dispatch.
+            const { data: perm } =
+              await github.rest.repos.getCollaboratorPermissionLevel({
+                owner: context.repo.owner,
+                repo: context.repo.repo,
+                username: context.actor,
+              });
+            if (!['admin', 'write', 'maintain'].includes(perm.permission)) {
+              core.setFailed(
+                `${context.actor} lacks write access (has: ${perm.permission})`
+              );
+              return;
+            }
+
+            // 2. Resolve which ref to check out.
+            let prNumber = '';
+            if (context.eventName === 'issue_comment') {
+              prNumber = String(context.payload.issue.number);
+            } else if (context.eventName === 'workflow_dispatch') {
+              prNumber = context.payload.inputs.pr_number || '';
+            }
+
+            if (!prNumber) {
+              core.setOutput('ref', context.ref.replace(/^refs\/heads\//, ''));
+              core.setOutput('repo', context.repo.repo);
+              core.setOutput('owner', context.repo.owner);
+              core.setOutput('pr', '');
+              return;
+            }
+
+            const { data: pr } = await github.rest.pulls.get({
+              owner: context.repo.owner,
+              repo: context.repo.repo,
+              pull_number: Number(prNumber),
+            });
+            core.setOutput('ref', pr.head.ref);
+            core.setOutput('repo', pr.head.repo.name);
+            core.setOutput('owner', pr.head.repo.owner.login);
+            core.setOutput('pr', String(pr.number));
+
+      # Wipe the sticky lockfile-check comment to a "running" state as soon
+      # as the job is authorized, so the user sees their click was picked up
+      # before the ~minute of nix build work.
+      - name: Mark sticky as running
+        if: steps.resolve.outputs.pr != ''
+        uses: marocchino/sticky-pull-request-comment@52423e01640425a022ef5fd42c6fb5f633a02728  # v2.9.1
+        with:
+          header: nix-lockfile-check
+          number: ${{ steps.resolve.outputs.pr }}
+          message: |
+            ### 🔄 Applying lockfile fix…
+
+            Triggered by @${{ github.actor }} — [workflow run](${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }}).
+
+      - uses: actions/checkout@34e114876b0b11c390a56381ad16ebd13914f8d5  # v4
+        with:
+          repository: ${{ steps.resolve.outputs.owner }}/${{ steps.resolve.outputs.repo }}
+          ref: ${{ steps.resolve.outputs.ref }}
+          token: ${{ secrets.GITHUB_TOKEN }}
+          fetch-depth: 0
+
+      - uses: ./.github/actions/nix-setup
+
+      - name: Apply lockfile hashes
+        id: apply
+        run: nix run .#fix-lockfiles -- --apply
+
+      - name: Commit & push
+        if: steps.apply.outputs.changed == 'true'
+        shell: bash
+        run: |
+          set -euo pipefail
+          git config user.name 'github-actions[bot]'
+          git config user.email '41898282+github-actions[bot]@users.noreply.github.com'
+          git add nix/tui.nix nix/web.nix
+          git commit -m "fix(nix): refresh npm lockfile hashes"
+          git push
+
+      - name: Update sticky (applied)
+        if: steps.apply.outputs.changed == 'true' && steps.resolve.outputs.pr != ''
+        uses: marocchino/sticky-pull-request-comment@52423e01640425a022ef5fd42c6fb5f633a02728  # v2.9.1
+        with:
+          header: nix-lockfile-check
+          number: ${{ steps.resolve.outputs.pr }}
+          message: |
+            ### ✅ Lockfile fix applied
+
+            Pushed a commit refreshing the npm lockfile hashes — [workflow run](${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }}).
+
+      - name: Update sticky (already current)
+        if: steps.apply.outputs.changed == 'false' && steps.resolve.outputs.pr != ''
+        uses: marocchino/sticky-pull-request-comment@52423e01640425a022ef5fd42c6fb5f633a02728  # v2.9.1
+        with:
+          header: nix-lockfile-check
+          number: ${{ steps.resolve.outputs.pr }}
+          message: |
+            ### ✅ Lockfile hashes already current
+
+            Nothing to commit — [workflow run](${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }}).
+
+      - name: Update sticky (failed)
+        if: failure() && steps.resolve.outputs.pr != ''
+        uses: marocchino/sticky-pull-request-comment@52423e01640425a022ef5fd42c6fb5f633a02728  # v2.9.1
+        with:
+          header: nix-lockfile-check
+          number: ${{ steps.resolve.outputs.pr }}
+          message: |
+            ### ❌ Lockfile fix failed
+
+            See the [workflow run](${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }}) for logs.
@@ -4,15 +4,6 @@ on:
  push:
    branches: [main]
  pull_request:
-    paths:
-      - 'flake.nix'
-      - 'flake.lock'
-      - 'nix/**'
-      - 'pyproject.toml'
-      - 'uv.lock'
-      - 'hermes_cli/**'
-      - 'run_agent.py'
-      - 'acp_adapter/**'

 permissions:
  contents: read
@@ -29,9 +20,8 @@ jobs:
    runs-on: ${{ matrix.os }}
    timeout-minutes: 30
    steps:
-      - uses: actions/checkout@34e114876b0b11c390a56381ad16ebd13914f8d5  # v4
-      - uses: DeterminateSystems/nix-installer-action@ef8a148080ab6020fd15196c2084a2eea5ff2d25  # v22
-      - uses: DeterminateSystems/magic-nix-cache-action@565684385bcd71bad329742eefe8d12f2e765b39  # v13
+      - uses: actions/checkout@34e114876b0b11c390a56381ad16ebd13914f8d5 # v4
+      - uses: ./.github/actions/nix-setup
      - name: Check flake
        if: runner.os == 'Linux'
        run: nix flake check --print-build-logs
@@ -566,3 +566,52 @@ python -m pytest tests/ -q -n 4
 Worker count above 4 will surface test-ordering flakes that CI never sees.

 Always run the full suite before pushing changes.
+
+### Don't write change-detector tests
+
+A test is a **change-detector** if it fails whenever data that is **expected
+to change** gets updated — model catalogs, config version numbers,
+enumeration counts, hardcoded lists of provider models. These tests add no
+behavioral coverage; they just guarantee that routine source updates break
+CI and cost engineering time to "fix."
+
+**Do not write:**
+
+```python
+# catalog snapshot — breaks every model release
+assert "gemini-2.5-pro" in _PROVIDER_MODELS["gemini"]
+assert "MiniMax-M2.7" in models
+
+# config version literal — breaks every schema bump
+assert DEFAULT_CONFIG["_config_version"] == 21
+
+# enumeration count — breaks every time a skill/provider is added
+assert len(_PROVIDER_MODELS["huggingface"]) == 8
+```
+
+**Do write:**
+
+```python
+# behavior: does the catalog plumbing work at all?
+assert "gemini" in _PROVIDER_MODELS
+assert len(_PROVIDER_MODELS["gemini"]) >= 1
+
+# behavior: does migration bump the user's version to current latest?
+assert raw["_config_version"] == DEFAULT_CONFIG["_config_version"]
+
+# invariant: no plan-only model leaks into the legacy list
+assert not (set(moonshot_models) & coding_plan_only_models)
+
+# invariant: every model in the catalog has a context-length entry
+for m in _PROVIDER_MODELS["huggingface"]:
+    assert m.lower() in DEFAULT_CONTEXT_LENGTHS_LOWER
+```
+
+The rule: if the test reads like a snapshot of current data, delete it. If
+it reads like a contract about how two pieces of data must relate, keep it.
+When a PR adds a new provider/model and you want a test, make the test
+assert the relationship (e.g. "catalog entries all have context lengths"),
+not the specific names.
+
+Reviewers should reject new change-detector tests; authors should convert
+them into invariants before re-requesting review.
@@ -27,12 +27,10 @@ WORKDIR /opt/hermes
 # Copy only package manifests first so npm install + Playwright are cached
 # unless the lockfiles themselves change.
 COPY package.json package-lock.json ./
-COPY scripts/whatsapp-bridge/package.json scripts/whatsapp-bridge/package-lock.json scripts/whatsapp-bridge/
 COPY web/package.json web/package-lock.json web/

 RUN npm install --prefer-offline --no-audit && \
    npx playwright install --with-deps chromium --only-shell && \
-    (cd scripts/whatsapp-bridge && npm install --prefer-offline --no-audit) && \
    (cd web && npm install --prefer-offline --no-audit) && \
    npm cache clean --force

@@ -63,6 +63,9 @@ def make_approval_callback(
            logger.warning("Permission request timed out or failed: %s", exc)
            return "deny"

+        if response is None:
+            return "deny"
+
        outcome = response.outcome
        if isinstance(outcome, AllowedOutcome):
            option_id = outcome.option_id
@@ -4,6 +4,7 @@ from __future__ import annotations

 import asyncio
 import logging
+import os
 from collections import defaultdict, deque
 from concurrent.futures import ThreadPoolExecutor
 from typing import Any, Deque, Optional
@@ -51,7 +52,7 @@ try:
 except ImportError:
    from acp.schema import AuthMethod as AuthMethodAgent  # type: ignore[attr-defined]

-from acp_adapter.auth import detect_provider, has_provider
+from acp_adapter.auth import detect_provider
 from acp_adapter.events import (
    make_message_cb,
    make_step_cb,
@@ -71,6 +72,11 @@ except Exception:
 # Thread pool for running AIAgent (synchronous) in parallel.
 _executor = ThreadPoolExecutor(max_workers=4, thread_name_prefix="acp-agent")

+# Server-side page size for list_sessions. The ACP ListSessionsRequest schema
+# does not expose a client-side limit, so this is a fixed cap that clients
+# paginate against using `cursor` / `next_cursor`.
+_LIST_SESSIONS_PAGE_SIZE = 50
+

 def _extract_text(
    prompt: list[
@@ -351,9 +357,18 @@ class HermesACPAgent(acp.Agent):
        )

    async def authenticate(self, method_id: str, **kwargs: Any) -> AuthenticateResponse | None:
-        if has_provider():
-            return AuthenticateResponse()
-        return None
+        # Only accept authenticate() calls whose method_id matches the
+        # provider we advertised in initialize(). Without this check,
+        # authenticate() would acknowledge any method_id as long as the
+        # server has provider credentials configured — harmless under
+        # Hermes' threat model (ACP is stdio-only, local-trust), but poor
+        # API hygiene and confusing if ACP ever grows multi-method auth.
+        provider = detect_provider()
+        if not provider:
+            return None
+        if not isinstance(method_id, str) or method_id.strip().lower() != provider:
+            return None
+        return AuthenticateResponse()

    # ---- Session management -------------------------------------------------

@@ -437,7 +452,28 @@ class HermesACPAgent(acp.Agent):
        cwd: str | None = None,
        **kwargs: Any,
    ) -> ListSessionsResponse:
+        """List ACP sessions with optional ``cwd`` filtering and cursor pagination.
+
+        ``cwd`` is passed through to ``SessionManager.list_sessions`` which already
+        normalizes and filters by working directory. ``cursor`` is a ``session_id``
+        previously returned as ``next_cursor``; results resume after that entry.
+        Server-side page size is capped at ``_LIST_SESSIONS_PAGE_SIZE``; when more
+        results remain, ``next_cursor`` is set to the last returned ``session_id``.
+        """
        infos = self.session_manager.list_sessions(cwd=cwd)
+
+        if cursor:
+            for idx, s in enumerate(infos):
+                if s["session_id"] == cursor:
+                    infos = infos[idx + 1:]
+                    break
+            else:
+                # Unknown cursor -> empty page (do not fall back to full list).
+                infos = []
+
+        has_more = len(infos) > _LIST_SESSIONS_PAGE_SIZE
+        infos = infos[:_LIST_SESSIONS_PAGE_SIZE]
+
        sessions = []
        for s in infos:
            updated_at = s.get("updated_at")
@@ -451,7 +487,9 @@ class HermesACPAgent(acp.Agent):
                    updated_at=updated_at,
                )
            )
-        return ListSessionsResponse(sessions=sessions)
+
+        next_cursor = sessions[-1].session_id if has_more and sessions else None
+        return ListSessionsResponse(sessions=sessions, next_cursor=next_cursor)

    # ---- Prompt (core) ------------------------------------------------------

@@ -517,15 +555,32 @@ class HermesACPAgent(acp.Agent):
        agent.step_callback = step_cb
        agent.message_callback = message_cb

-        if approval_cb:
-            try:
-                from tools import terminal_tool as _terminal_tool
-                previous_approval_cb = getattr(_terminal_tool, "_approval_callback", None)
-                _terminal_tool.set_approval_callback(approval_cb)
-            except Exception:
-                logger.debug("Could not set ACP approval callback", exc_info=True)
+        # Approval callback is per-thread (thread-local, GHSA-qg5c-hvr5-hjgr).
+        # Set it INSIDE _run_agent so the TLS write happens in the executor
+        # thread — setting it here would write to the event-loop thread's TLS,
+        # not the executor's. Also set HERMES_INTERACTIVE so approval.py
+        # takes the CLI-interactive path (which calls the registered
+        # callback via prompt_dangerous_approval) instead of the
+        # non-interactive auto-approve branch (GHSA-96vc-wcxf-jjff).
+        # ACP's conn.request_permission maps cleanly to the interactive
+        # callback shape — not the gateway-queue HERMES_EXEC_ASK path,
+        # which requires a notify_cb registered in _gateway_notify_cbs.
+        previous_approval_cb = None
+        previous_interactive = None

        def _run_agent() -> dict:
+            nonlocal previous_approval_cb, previous_interactive
+            if approval_cb:
+                try:
+                    from tools import terminal_tool as _terminal_tool
+                    previous_approval_cb = _terminal_tool._get_approval_callback()
+                    _terminal_tool.set_approval_callback(approval_cb)
+                except Exception:
+                    logger.debug("Could not set ACP approval callback", exc_info=True)
+            # Signal to tools.approval that we have an interactive callback
+            # and the non-interactive auto-approve path must not fire.
+            previous_interactive = os.environ.get("HERMES_INTERACTIVE")
+            os.environ["HERMES_INTERACTIVE"] = "1"
            try:
                result = agent.run_conversation(
                    user_message=user_text,
@@ -537,6 +592,11 @@ class HermesACPAgent(acp.Agent):
                logger.exception("Agent error in session %s", session_id)
                return {"final_response": f"Error: {e}", "messages": state.history}
            finally:
+                # Restore HERMES_INTERACTIVE.
+                if previous_interactive is None:
+                    os.environ.pop("HERMES_INTERACTIVE", None)
+                else:
+                    os.environ["HERMES_INTERACTIVE"] = previous_interactive
                if approval_cb:
                    try:
                        from tools import terminal_tool as _terminal_tool
@@ -613,8 +673,8 @@ class HermesACPAgent(acp.Agent):
            await self._conn.session_update(
                session_id=session_id,
                update=AvailableCommandsUpdate(
-                    sessionUpdate="available_commands_update",
-                    availableCommands=self._available_commands(),
+                    session_update="available_commands_update",
+                    available_commands=self._available_commands(),
                ),
            )
        except Exception:
@@ -0,0 +1,326 @@
+from __future__ import annotations
+
+from dataclasses import dataclass
+from datetime import datetime, timezone
+from typing import Any, Optional
+
+import httpx
+
+from agent.anthropic_adapter import _is_oauth_token, resolve_anthropic_token
+from hermes_cli.auth import _read_codex_tokens, resolve_codex_runtime_credentials
+from hermes_cli.runtime_provider import resolve_runtime_provider
+
+
+def _utc_now() -> datetime:
+    return datetime.now(timezone.utc)
+
+
+@dataclass(frozen=True)
+class AccountUsageWindow:
+    label: str
+    used_percent: Optional[float] = None
+    reset_at: Optional[datetime] = None
+    detail: Optional[str] = None
+
+
+@dataclass(frozen=True)
+class AccountUsageSnapshot:
+    provider: str
+    source: str
+    fetched_at: datetime
+    title: str = "Account limits"
+    plan: Optional[str] = None
+    windows: tuple[AccountUsageWindow, ...] = ()
+    details: tuple[str, ...] = ()
+    unavailable_reason: Optional[str] = None
+
+    @property
+    def available(self) -> bool:
+        return bool(self.windows or self.details) and not self.unavailable_reason
+
+
+def _title_case_slug(value: Optional[str]) -> Optional[str]:
+    cleaned = str(value or "").strip()
+    if not cleaned:
+        return None
+    return cleaned.replace("_", " ").replace("-", " ").title()
+
+
+def _parse_dt(value: Any) -> Optional[datetime]:
+    if value in (None, ""):
+        return None
+    if isinstance(value, (int, float)):
+        return datetime.fromtimestamp(float(value), tz=timezone.utc)
+    if isinstance(value, str):
+        text = value.strip()
+        if not text:
+            return None
+        if text.endswith("Z"):
+            text = text[:-1] + "+00:00"
+        try:
+            dt = datetime.fromisoformat(text)
+            return dt if dt.tzinfo else dt.replace(tzinfo=timezone.utc)
+        except ValueError:
+            return None
+    return None
+
+
+def _format_reset(dt: Optional[datetime]) -> str:
+    if not dt:
+        return "unknown"
+    local_dt = dt.astimezone()
+    delta = dt - _utc_now()
+    total_seconds = int(delta.total_seconds())
+    if total_seconds <= 0:
+        return f"now ({local_dt.strftime('%Y-%m-%d %H:%M %Z')})"
+    hours, rem = divmod(total_seconds, 3600)
+    minutes = rem // 60
+    if hours >= 24:
+        days, hours = divmod(hours, 24)
+        rel = f"in {days}d {hours}h"
+    elif hours > 0:
+        rel = f"in {hours}h {minutes}m"
+    else:
+        rel = f"in {minutes}m"
+    return f"{rel} ({local_dt.strftime('%Y-%m-%d %H:%M %Z')})"
+
+
+def render_account_usage_lines(snapshot: Optional[AccountUsageSnapshot], *, markdown: bool = False) -> list[str]:
+    if not snapshot:
+        return []
+    header = f"📈 {'**' if markdown else ''}{snapshot.title}{'**' if markdown else ''}"
+    lines = [header]
+    if snapshot.plan:
+        lines.append(f"Provider: {snapshot.provider} ({snapshot.plan})")
+    else:
+        lines.append(f"Provider: {snapshot.provider}")
+    for window in snapshot.windows:
+        if window.used_percent is None:
+            base = f"{window.label}: unavailable"
+        else:
+            remaining = max(0, round(100 - float(window.used_percent)))
+            used = max(0, round(float(window.used_percent)))
+            base = f"{window.label}: {remaining}% remaining ({used}% used)"
+        if window.reset_at:
+            base += f" • resets {_format_reset(window.reset_at)}"
+        elif window.detail:
+            base += f" • {window.detail}"
+        lines.append(base)
+    for detail in snapshot.details:
+        lines.append(detail)
+    if snapshot.unavailable_reason:
+        lines.append(f"Unavailable: {snapshot.unavailable_reason}")
+    return lines
+
+
+def _resolve_codex_usage_url(base_url: str) -> str:
+    normalized = (base_url or "").strip().rstrip("/")
+    if not normalized:
+        normalized = "https://chatgpt.com/backend-api/codex"
+    if normalized.endswith("/codex"):
+        normalized = normalized[: -len("/codex")]
+    if "/backend-api" in normalized:
+        return normalized + "/wham/usage"
+    return normalized + "/api/codex/usage"
+
+
+def _fetch_codex_account_usage() -> Optional[AccountUsageSnapshot]:
+    creds = resolve_codex_runtime_credentials(refresh_if_expiring=True)
+    token_data = _read_codex_tokens()
+    tokens = token_data.get("tokens") or {}
+    account_id = str(tokens.get("account_id", "") or "").strip() or None
+    headers = {
+        "Authorization": f"Bearer {creds['api_key']}",
+        "Accept": "application/json",
+        "User-Agent": "codex-cli",
+    }
+    if account_id:
+        headers["ChatGPT-Account-Id"] = account_id
+    with httpx.Client(timeout=15.0) as client:
+        response = client.get(_resolve_codex_usage_url(creds.get("base_url", "")), headers=headers)
+        response.raise_for_status()
+    payload = response.json() or {}
+    rate_limit = payload.get("rate_limit") or {}
+    windows: list[AccountUsageWindow] = []
+    for key, label in (("primary_window", "Session"), ("secondary_window", "Weekly")):
+        window = rate_limit.get(key) or {}
+        used = window.get("used_percent")
+        if used is None:
+            continue
+        windows.append(
+            AccountUsageWindow(
+                label=label,
+                used_percent=float(used),
+                reset_at=_parse_dt(window.get("reset_at")),
+            )
+        )
+    details: list[str] = []
+    credits = payload.get("credits") or {}
+    if credits.get("has_credits"):
+        balance = credits.get("balance")
+        if isinstance(balance, (int, float)):
+            details.append(f"Credits balance: ${float(balance):.2f}")
+        elif credits.get("unlimited"):
+            details.append("Credits balance: unlimited")
+    return AccountUsageSnapshot(
+        provider="openai-codex",
+        source="usage_api",
+        fetched_at=_utc_now(),
+        plan=_title_case_slug(payload.get("plan_type")),
+        windows=tuple(windows),
+        details=tuple(details),
+    )
+
+
+def _fetch_anthropic_account_usage() -> Optional[AccountUsageSnapshot]:
+    token = (resolve_anthropic_token() or "").strip()
+    if not token:
+        return None
+    if not _is_oauth_token(token):
+        return AccountUsageSnapshot(
+            provider="anthropic",
+            source="oauth_usage_api",
+            fetched_at=_utc_now(),
+            unavailable_reason="Anthropic account limits are only available for OAuth-backed Claude accounts.",
+        )
+    headers = {
+        "Authorization": f"Bearer {token}",
+        "Accept": "application/json",
+        "Content-Type": "application/json",
+        "anthropic-beta": "oauth-2025-04-20",
+        "User-Agent": "claude-code/2.1.0",
+    }
+    with httpx.Client(timeout=15.0) as client:
+        response = client.get("https://api.anthropic.com/api/oauth/usage", headers=headers)
+        response.raise_for_status()
+    payload = response.json() or {}
+    windows: list[AccountUsageWindow] = []
+    mapping = (
+        ("five_hour", "Current session"),
+        ("seven_day", "Current week"),
+        ("seven_day_opus", "Opus week"),
+        ("seven_day_sonnet", "Sonnet week"),
+    )
+    for key, label in mapping:
+        window = payload.get(key) or {}
+        util = window.get("utilization")
+        if util is None:
+            continue
+        used = float(util) * 100 if float(util) <= 1 else float(util)
+        windows.append(
+            AccountUsageWindow(
+                label=label,
+                used_percent=used,
+                reset_at=_parse_dt(window.get("resets_at")),
+            )
+        )
+    details: list[str] = []
+    extra = payload.get("extra_usage") or {}
+    if extra.get("is_enabled"):
+        used_credits = extra.get("used_credits")
+        monthly_limit = extra.get("monthly_limit")
+        currency = extra.get("currency") or "USD"
+        if isinstance(used_credits, (int, float)) and isinstance(monthly_limit, (int, float)):
+            details.append(
+                f"Extra usage: {used_credits:.2f} / {monthly_limit:.2f} {currency}"
+            )
+    return AccountUsageSnapshot(
+        provider="anthropic",
+        source="oauth_usage_api",
+        fetched_at=_utc_now(),
+        windows=tuple(windows),
+        details=tuple(details),
+    )
+
+
+def _fetch_openrouter_account_usage(base_url: Optional[str], api_key: Optional[str]) -> Optional[AccountUsageSnapshot]:
+    runtime = resolve_runtime_provider(
+        requested="openrouter",
+        explicit_base_url=base_url,
+        explicit_api_key=api_key,
+    )
+    token = str(runtime.get("api_key", "") or "").strip()
+    if not token:
+        return None
+    normalized = str(runtime.get("base_url", "") or "").rstrip("/")
+    credits_url = f"{normalized}/credits"
+    key_url = f"{normalized}/key"
+    headers = {
+        "Authorization": f"Bearer {token}",
+        "Accept": "application/json",
+    }
+    with httpx.Client(timeout=10.0) as client:
+        credits_resp = client.get(credits_url, headers=headers)
+        credits_resp.raise_for_status()
+        credits = (credits_resp.json() or {}).get("data") or {}
+        try:
+            key_resp = client.get(key_url, headers=headers)
+            key_resp.raise_for_status()
+            key_data = (key_resp.json() or {}).get("data") or {}
+        except Exception:
+            key_data = {}
+    total_credits = float(credits.get("total_credits") or 0.0)
+    total_usage = float(credits.get("total_usage") or 0.0)
+    details = [f"Credits balance: ${max(0.0, total_credits - total_usage):.2f}"]
+    windows: list[AccountUsageWindow] = []
+    limit = key_data.get("limit")
+    limit_remaining = key_data.get("limit_remaining")
+    limit_reset = str(key_data.get("limit_reset") or "").strip()
+    usage = key_data.get("usage")
+    if (
+        isinstance(limit, (int, float))
+        and float(limit) > 0
+        and isinstance(limit_remaining, (int, float))
+        and 0 <= float(limit_remaining) <= float(limit)
+    ):
+        limit_value = float(limit)
+        remaining_value = float(limit_remaining)
+        used_percent = ((limit_value - remaining_value) / limit_value) * 100
+        detail_parts = [f"${remaining_value:.2f} of ${limit_value:.2f} remaining"]
+        if limit_reset:
+            detail_parts.append(f"resets {limit_reset}")
+        windows.append(
+            AccountUsageWindow(
+                label="API key quota",
+                used_percent=used_percent,
+                detail=" • ".join(detail_parts),
+            )
+        )
+    if isinstance(usage, (int, float)):
+        usage_parts = [f"API key usage: ${float(usage):.2f} total"]
+        for value, label in (
+            (key_data.get("usage_daily"), "today"),
+            (key_data.get("usage_weekly"), "this week"),
+            (key_data.get("usage_monthly"), "this month"),
+        ):
+            if isinstance(value, (int, float)) and float(value) > 0:
+                usage_parts.append(f"${float(value):.2f} {label}")
+        details.append(" • ".join(usage_parts))
+    return AccountUsageSnapshot(
+        provider="openrouter",
+        source="credits_api",
+        fetched_at=_utc_now(),
+        windows=tuple(windows),
+        details=tuple(details),
+    )
+
+
+def fetch_account_usage(
+    provider: Optional[str],
+    *,
+    base_url: Optional[str] = None,
+    api_key: Optional[str] = None,
+) -> Optional[AccountUsageSnapshot]:
+    normalized = str(provider or "").strip().lower()
+    if normalized in {"", "auto", "custom"}:
+        return None
+    try:
+        if normalized == "openai-codex":
+            return _fetch_codex_account_usage()
+        if normalized == "anthropic":
+            return _fetch_anthropic_account_usage()
+        if normalized == "openrouter":
+            return _fetch_openrouter_account_usage(base_url, api_key)
+    except Exception:
+        return None
+    return None
@@ -19,6 +19,7 @@ from pathlib import Path
 from hermes_constants import get_hermes_home
 from types import SimpleNamespace
 from typing import Any, Dict, List, Optional, Tuple
+from utils import normalize_proxy_env_vars

 try:
    import anthropic as _anthropic_sdk
@@ -265,6 +266,14 @@ def _is_third_party_anthropic_endpoint(base_url: str | None) -> bool:
    return True  # Any other endpoint is a third-party proxy


+def _is_kimi_coding_endpoint(base_url: str | None) -> bool:
+    """Return True for Kimi's /coding endpoint that requires claude-code UA."""
+    normalized = _normalize_base_url_text(base_url)
+    if not normalized:
+        return False
+    return normalized.rstrip("/").lower().startswith("https://api.kimi.com/coding")
+
+
 def _requires_bearer_auth(base_url: str | None) -> bool:
    """Return True for Anthropic-compatible providers that require Bearer auth.

@@ -308,6 +317,9 @@ def build_anthropic_client(api_key: str, base_url: str = None, timeout: float =
            "The 'anthropic' package is required for the Anthropic provider. "
            "Install it with: pip install 'anthropic>=0.39.0'"
        )
+
+    normalize_proxy_env_vars()
+
    from httpx import Timeout

    normalized_base_url = _normalize_base_url_text(base_url)
@@ -319,9 +331,18 @@ def build_anthropic_client(api_key: str, base_url: str = None, timeout: float =
        kwargs["base_url"] = normalized_base_url
    common_betas = _common_betas_for_base_url(normalized_base_url)

-    if _requires_bearer_auth(normalized_base_url):
+    if _is_kimi_coding_endpoint(base_url):
+        # Kimi's /coding endpoint requires User-Agent: claude-code/0.1.0
+        # to be recognized as a valid Coding Agent. Without it, returns 403.
+        # Check this BEFORE _requires_bearer_auth since both match api.kimi.com/coding.
+        kwargs["api_key"] = api_key
+        kwargs["default_headers"] = {
+            "User-Agent": "claude-code/0.1.0",
+            **( {"anthropic-beta": ",".join(common_betas)} if common_betas else {} )
+        }
+    elif _requires_bearer_auth(normalized_base_url):
        # Some Anthropic-compatible providers (e.g. MiniMax) expect the API key in
-        # Authorization: Bearer even for regular API keys. Route those endpoints
+        # Authorization: Bearer *** for regular API keys. Route those endpoints
        # through auth_token so the SDK sends Bearer auth instead of x-api-key.
        # Check this before OAuth token shape detection because MiniMax secrets do
        # not use Anthropic's sk-ant-api prefix and would otherwise be misread as
@@ -1405,11 +1426,25 @@ def build_anthropic_kwargs(
    # MiniMax Anthropic-compat endpoints support thinking (manual mode only,
    # not adaptive).  Haiku does NOT support extended thinking — skip entirely.
    #
+    # Kimi's /coding endpoint speaks the Anthropic Messages protocol but has
+    # its own thinking semantics: when ``thinking.enabled`` is sent, Kimi
+    # validates the message history and requires every prior assistant
+    # tool-call message to carry OpenAI-style ``reasoning_content``.  The
+    # Anthropic path never populates that field, and
+    # ``convert_messages_to_anthropic`` strips all Anthropic thinking blocks
+    # on third-party endpoints — so the request fails with HTTP 400
+    # "thinking is enabled but reasoning_content is missing in assistant
+    # tool call message at index N".  Kimi's reasoning is driven server-side
+    # on the /coding route, so skip Anthropic's thinking parameter entirely
+    # for that host.  (Kimi on chat_completions enables thinking via
+    # extra_body in the ChatCompletionsTransport — see #13503.)
+    #
    # On 4.7+ the `thinking.display` field defaults to "omitted", which
    # silently hides reasoning text that Hermes surfaces in its CLI. We
    # request "summarized" so the reasoning blocks stay populated — matching
    # 4.6 behavior and preserving the activity-feed UX during long tool runs.
-    if reasoning_config and isinstance(reasoning_config, dict):
+    _is_kimi_coding = _is_kimi_coding_endpoint(base_url)
+    if reasoning_config and isinstance(reasoning_config, dict) and not _is_kimi_coding:
        if reasoning_config.get("enabled") is not False and "haiku" not in model.lower():
            effort = str(reasoning_config.get("effort", "medium")).lower()
            budget = THINKING_BUDGET.get(effort, 8000)
@@ -48,6 +48,7 @@ from openai import OpenAI
 from agent.credential_pool import load_pool
 from hermes_cli.config import get_hermes_home
 from hermes_constants import OPENROUTER_BASE_URL
+from utils import base_url_host_matches, base_url_hostname, normalize_proxy_env_vars

 logger = logging.getLogger(__name__)

@@ -95,84 +96,37 @@ def _normalize_aux_provider(provider: Optional[str]) -> str:
    return _PROVIDER_ALIASES.get(normalized, normalized)


-_FIXED_TEMPERATURE_MODELS: Dict[str, float] = {
-    "kimi-for-coding": 0.6,
-}
+# Sentinel: when returned by _fixed_temperature_for_model(), callers must
+# strip the ``temperature`` key from API kwargs entirely so the provider's
+# server-side default applies.  Kimi/Moonshot models manage temperature
+# internally — sending *any* value (even the "correct" one) can conflict
+# with gateway-side mode selection (thinking → 1.0, non-thinking → 0.6).
+OMIT_TEMPERATURE: object = object()

-# Moonshot's kimi-for-coding endpoint (api.kimi.com/coding) documents:
-# "k2.5 model will use a fixed value 1.0, non-thinking mode will use a fixed
-# value 0.6.  Any other value will result in an error."  The same lock applies
-# to the other k2.* models served on that endpoint.  Enumerated explicitly so
-# non-coding siblings like `kimi-k2-instruct` (variable temperature, served on
-# the standard chat API and third parties) are NOT clamped.
-# Source: https://platform.kimi.ai/docs/guide/kimi-k2-5-quickstart
-_KIMI_INSTANT_MODELS: frozenset = frozenset({
-    "kimi-k2.5",
-    "kimi-k2-turbo-preview",
-    "kimi-k2-0905-preview",
-})
-_KIMI_THINKING_MODELS: frozenset = frozenset({
-    "kimi-k2-thinking",
-    "kimi-k2-thinking-turbo",
-})

-# Moonshot's public chat endpoint (api.moonshot.ai/v1) enforces a different
-# temperature contract than the Coding Plan endpoint above.  Empirically,
-# `kimi-k2.5` on the public API rejects 0.6 with HTTP 400
-# "invalid temperature: only 1 is allowed for this model" — the Coding Plan
-# lock (0.6 for non-thinking) does not apply.  `kimi-k2-turbo-preview` and the
-# thinking variants already match the Coding Plan contract on the public
-# endpoint, so we only override the models that diverge.
-# Users hit this endpoint when `KIMI_API_KEY` is a legacy `sk-*` key (the
-# `sk-kimi-*` prefix routes to api.kimi.com/coding/v1 instead — see
-# hermes_cli/auth.py:_kimi_base_url_for_key).
-_KIMI_PUBLIC_API_OVERRIDES: Dict[str, float] = {
-    "kimi-k2.5": 1.0,
-}
+def _is_kimi_model(model: Optional[str]) -> bool:
+    """True for any Kimi / Moonshot model that manages temperature server-side."""
+    bare = (model or "").strip().lower().rsplit("/", 1)[-1]
+    return bare.startswith("kimi-") or bare == "kimi"


 def _fixed_temperature_for_model(
    model: Optional[str],
    base_url: Optional[str] = None,
-) -> Optional[float]:
-    """Return a required temperature override for models with strict contracts.
+) -> "Optional[float] | object":
+    """Return a temperature directive for models with strict contracts.

-    Moonshot's kimi-for-coding endpoint rejects any non-approved temperature on
-    the k2.5 family.  Non-thinking variants require exactly 0.6; thinking
-    variants require 1.0.  An optional ``vendor/`` prefix (e.g.
-    ``moonshotai/kimi-k2.5``) is tolerated for aggregator routings.
-
-    When ``base_url`` points to Moonshot's public chat endpoint
-    (``api.moonshot.ai``), the contract changes for ``kimi-k2.5``: the public
-    API only accepts ``temperature=1``, not 0.6.  That override takes precedence
-    over the Coding Plan defaults above.
-
-    Returns ``None`` for every other model, including ``kimi-k2-instruct*``
-    which is the separate non-coding K2 family with variable temperature.
+    Returns:
+        ``OMIT_TEMPERATURE`` — caller must remove the ``temperature`` key so the
+            provider chooses its own default.  Used for all Kimi / Moonshot
+            models whose gateway selects temperature server-side.
+        ``float`` — a specific value the caller must use (reserved for future
+            models with fixed-temperature contracts).
+        ``None`` — no override; caller should use its own default.
    """
-    normalized = (model or "").strip().lower()
-    bare = normalized.rsplit("/", 1)[-1]
-
-    # Public Moonshot API has a stricter contract for some models than the
-    # Coding Plan endpoint — check it first so it wins on conflict.
-    if base_url and ("api.moonshot.ai" in base_url.lower() or "api.moonshot.cn" in base_url.lower()):
-        public = _KIMI_PUBLIC_API_OVERRIDES.get(bare)
-        if public is not None:
-            logger.debug(
-                "Forcing temperature=%s for %r on public Moonshot API", public, model
-            )
-            return public
-
-    fixed = _FIXED_TEMPERATURE_MODELS.get(normalized)
-    if fixed is not None:
-        logger.debug("Forcing temperature=%s for model %r (fixed map)", fixed, model)
-        return fixed
-    if bare in _KIMI_THINKING_MODELS:
-        logger.debug("Forcing temperature=1.0 for kimi thinking model %r", model)
-        return 1.0
-    if bare in _KIMI_INSTANT_MODELS:
-        logger.debug("Forcing temperature=0.6 for kimi instant model %r", model)
-        return 0.6
+    if _is_kimi_model(model):
+        logger.debug("Omitting temperature for Kimi model %r (server-managed)", model)
+        return OMIT_TEMPERATURE
    return None

 # Default auxiliary models for direct API-key providers (cheap/fast for side tasks)
@@ -207,6 +161,16 @@ _OR_HEADERS = {
    "X-OpenRouter-Categories": "productivity,cli-agent",
 }

+# Vercel AI Gateway app attribution headers. HTTP-Referer maps to
+# referrerUrl and X-Title maps to appName in the gateway's analytics.
+from hermes_cli import __version__ as _HERMES_VERSION
+
+_AI_GATEWAY_HEADERS = {
+    "HTTP-Referer": "https://hermes-agent.nousresearch.com",
+    "X-Title": "Hermes Agent",
+    "User-Agent": f"HermesAgent/{_HERMES_VERSION}",
+}
+
 # Nous Portal extra_body for product attribution.
 # Callers should pass this as extra_body in chat.completions.create()
 # when the auxiliary client is backed by Nous Portal.
@@ -218,8 +182,6 @@ auxiliary_is_nous: bool = False
 # Default auxiliary models per provider
 _OPENROUTER_MODEL = "google/gemini-3-flash-preview"
 _NOUS_MODEL = "google/gemini-3-flash-preview"
-_NOUS_FREE_TIER_VISION_MODEL = "xiaomi/mimo-v2-omni"
-_NOUS_FREE_TIER_AUX_MODEL = "xiaomi/mimo-v2-pro"
 _NOUS_DEFAULT_BASE_URL = "https://inference-api.nousresearch.com/v1"
 _ANTHROPIC_DEFAULT_BASE_URL = "https://api.anthropic.com"
 _AUTH_JSON_PATH = get_hermes_home() / "auth.json"
@@ -764,6 +726,33 @@ def _nous_base_url() -> str:
    return os.getenv("NOUS_INFERENCE_BASE_URL", _NOUS_DEFAULT_BASE_URL)


+def _resolve_nous_runtime_api(*, force_refresh: bool = False) -> Optional[tuple[str, str]]:
+    """Return fresh Nous runtime credentials when available.
+
+    This mirrors the main agent's 401 recovery path and keeps auxiliary
+    clients aligned with the singleton auth store + mint flow instead of
+    relying only on whatever raw tokens happen to be sitting in auth.json
+    or the credential pool.
+    """
+    try:
+        from hermes_cli.auth import resolve_nous_runtime_credentials
+
+        creds = resolve_nous_runtime_credentials(
+            min_key_ttl_seconds=max(60, int(os.getenv("HERMES_NOUS_MIN_KEY_TTL_SECONDS", "1800"))),
+            timeout_seconds=float(os.getenv("HERMES_NOUS_TIMEOUT_SECONDS", "15")),
+            force_mint=force_refresh,
+        )
+    except Exception as exc:
+        logger.debug("Auxiliary Nous runtime credential resolution failed: %s", exc)
+        return None
+
+    api_key = str(creds.get("api_key") or "").strip()
+    base_url = str(creds.get("base_url") or "").strip().rstrip("/")
+    if not api_key or not base_url:
+        return None
+    return api_key, base_url
+
+
 def _read_codex_access_token() -> Optional[str]:
    """Read a valid, non-expired Codex OAuth access token from Hermes auth store.

@@ -853,9 +842,9 @@ def _resolve_api_key_provider() -> Tuple[Optional[OpenAI], Optional[str]]:
                if is_native_gemini_base_url(base_url):
                    return GeminiNativeClient(api_key=api_key, base_url=base_url), model
            extra = {}
-            if "api.kimi.com" in base_url.lower():
-                extra["default_headers"] = {"User-Agent": "KimiCLI/1.30.0"}
-            elif "api.githubcopilot.com" in base_url.lower():
+            if base_url_host_matches(base_url, "api.kimi.com"):
+                extra["default_headers"] = {"User-Agent": "claude-code/0.1.0"}
+            elif base_url_host_matches(base_url, "api.githubcopilot.com"):
                from hermes_cli.models import copilot_default_headers

                extra["default_headers"] = copilot_default_headers()
@@ -879,9 +868,9 @@ def _resolve_api_key_provider() -> Tuple[Optional[OpenAI], Optional[str]]:
            if is_native_gemini_base_url(base_url):
                return GeminiNativeClient(api_key=api_key, base_url=base_url), model
        extra = {}
-        if "api.kimi.com" in base_url.lower():
-            extra["default_headers"] = {"User-Agent": "KimiCLI/1.30.0"}
-        elif "api.githubcopilot.com" in base_url.lower():
+        if base_url_host_matches(base_url, "api.kimi.com"):
+            extra["default_headers"] = {"User-Agent": "claude-code/0.1.0"}
+        elif base_url_host_matches(base_url, "api.githubcopilot.com"):
            from hermes_cli.models import copilot_default_headers

            extra["default_headers"] = copilot_default_headers()
@@ -930,29 +919,50 @@ def _try_nous(vision: bool = False) -> Tuple[Optional[OpenAI], Optional[str]]:
        pass

    nous = _read_nous_auth()
-    if not nous:
+    runtime = _resolve_nous_runtime_api(force_refresh=False)
+    if runtime is None and not nous:
        return None, None
    global auxiliary_is_nous
    auxiliary_is_nous = True
    logger.debug("Auxiliary client: Nous Portal")
-    if nous.get("source") == "pool":
-        model = "gemini-3-flash"
-    else:
-        model = _NOUS_MODEL
-    # Free-tier users can't use paid auxiliary models — use the free
-    # models instead: mimo-v2-omni for vision, mimo-v2-pro for text tasks.
+
+    # Ask the Portal which model it currently recommends for this task type.
+    # The /api/nous/recommended-models endpoint is the authoritative source:
+    # it distinguishes paid vs free tier recommendations, and get_nous_recommended_aux_model
+    # auto-detects the caller's tier via check_nous_free_tier().  Fall back to
+    # _NOUS_MODEL (google/gemini-3-flash-preview) when the Portal is unreachable
+    # or returns a null recommendation for this task type.
+    model = _NOUS_MODEL
    try:
-        from hermes_cli.models import check_nous_free_tier
-        if check_nous_free_tier():
-            model = _NOUS_FREE_TIER_VISION_MODEL if vision else _NOUS_FREE_TIER_AUX_MODEL
-            logger.debug("Free-tier Nous account — using %s for auxiliary/%s",
-                         model, "vision" if vision else "text")
-    except Exception:
-        pass
+        from hermes_cli.models import get_nous_recommended_aux_model
+        recommended = get_nous_recommended_aux_model(vision=vision)
+        if recommended:
+            model = recommended
+            logger.debug(
+                "Auxiliary/%s: using Portal-recommended model %s",
+                "vision" if vision else "text", model,
+            )
+        else:
+            logger.debug(
+                "Auxiliary/%s: no Portal recommendation, falling back to %s",
+                "vision" if vision else "text", model,
+            )
+    except Exception as exc:
+        logger.debug(
+            "Auxiliary/%s: recommended-models lookup failed (%s); "
+            "falling back to %s",
+            "vision" if vision else "text", exc, model,
+        )
+
+    if runtime is not None:
+        api_key, base_url = runtime
+    else:
+        api_key = _nous_api_key(nous or {})
+        base_url = str((nous or {}).get("inference_base_url") or _nous_base_url()).rstrip("/")
    return (
        OpenAI(
-            api_key=_nous_api_key(nous),
-            base_url=str(nous.get("inference_base_url") or _nous_base_url()).rstrip("/"),
+            api_key=api_key,
+            base_url=base_url,
        ),
        model,
    )
@@ -1030,7 +1040,7 @@ def _resolve_custom_runtime() -> Tuple[Optional[str], Optional[str], Optional[st
        return None, None, None

    custom_base = custom_base.strip().rstrip("/")
-    if "openrouter.ai" in custom_base.lower():
+    if base_url_host_matches(custom_base, "openrouter.ai"):
        # requested='custom' falls back to OpenRouter when no custom endpoint is
        # configured. Treat that as "no custom endpoint" for auxiliary routing.
        return None, None, None
@@ -1064,6 +1074,8 @@ def _validate_proxy_env_urls() -> None:
    """
    from urllib.parse import urlparse

+    normalize_proxy_env_vars()
+
    for key in ("HTTPS_PROXY", "HTTP_PROXY", "ALL_PROXY",
                "https_proxy", "http_proxy", "all_proxy"):
        value = str(os.environ.get(key) or "").strip()
@@ -1294,6 +1306,15 @@ def _is_connection_error(exc: Exception) -> bool:
    return False


+def _is_auth_error(exc: Exception) -> bool:
+    """Detect auth failures that should trigger provider-specific refresh."""
+    status = getattr(exc, "status_code", None)
+    if status == 401:
+        return True
+    err_lower = str(exc).lower()
+    return "error code: 401" in err_lower or "authenticationerror" in type(exc).__name__.lower()
+
+
 def _try_payment_fallback(
    failed_provider: str,
    task: str = None,
@@ -1469,15 +1490,15 @@ def _to_async_client(sync_client, model: str):
        "api_key": sync_client.api_key,
        "base_url": str(sync_client.base_url),
    }
-    base_lower = str(sync_client.base_url).lower()
-    if "openrouter" in base_lower:
+    sync_base_url = str(sync_client.base_url)
+    if base_url_host_matches(sync_base_url, "openrouter.ai"):
        async_kwargs["default_headers"] = dict(_OR_HEADERS)
-    elif "api.githubcopilot.com" in base_lower:
+    elif base_url_host_matches(sync_base_url, "api.githubcopilot.com"):
        from hermes_cli.models import copilot_default_headers

        async_kwargs["default_headers"] = copilot_default_headers()
-    elif "api.kimi.com" in base_lower:
-        async_kwargs["default_headers"] = {"User-Agent": "KimiCLI/1.30.0"}
+    elif base_url_host_matches(sync_base_url, "api.kimi.com"):
+        async_kwargs["default_headers"] = {"User-Agent": "claude-code/0.1.0"}
    return AsyncOpenAI(**async_kwargs), model


@@ -1553,8 +1574,7 @@ def resolve_provider_client(
        # Auto-detect: api.openai.com + codex model name pattern
        if api_mode and api_mode != "codex_responses":
            return False  # explicit non-codex mode
-        normalized_base = (base_url_str or "").strip().lower()
-        if "api.openai.com" in normalized_base and "openrouter" not in normalized_base:
+        if base_url_hostname(base_url_str) == "api.openai.com":
            model_lower = (model_str or "").lower()
            if "codex" in model_lower:
                return True
@@ -1602,7 +1622,13 @@ def resolve_provider_client(

    # ── Nous Portal (OAuth) ──────────────────────────────────────────
    if provider == "nous":
-        client, default = _try_nous()
+        # Detect vision tasks: either explicit model override from
+        # _PROVIDER_VISION_MODELS, or caller passed a known vision model.
+        _is_vision = (
+            model in _PROVIDER_VISION_MODELS.values()
+            or (model or "").strip().lower() == "mimo-v2-omni"
+        )
+        client, default = _try_nous(vision=_is_vision)
        if client is None:
            logger.warning("resolve_provider_client: nous requested "
                           "but Nous Portal not configured (run: hermes auth)")
@@ -1658,9 +1684,9 @@ def resolve_provider_client(
                provider,
            )
            extra = {}
-            if "api.kimi.com" in custom_base.lower():
-                extra["default_headers"] = {"User-Agent": "KimiCLI/1.30.0"}
-            elif "api.githubcopilot.com" in custom_base.lower():
+            if base_url_host_matches(custom_base, "api.kimi.com"):
+                extra["default_headers"] = {"User-Agent": "claude-code/0.1.0"}
+            elif base_url_host_matches(custom_base, "api.githubcopilot.com"):
                from hermes_cli.models import copilot_default_headers
                extra["default_headers"] = copilot_default_headers()
            client = OpenAI(api_key=custom_key, base_url=custom_base, **extra)
@@ -1765,9 +1791,9 @@ def resolve_provider_client(

        # Provider-specific headers
        headers = {}
-        if "api.kimi.com" in base_url.lower():
-            headers["User-Agent"] = "KimiCLI/1.30.0"
-        elif "api.githubcopilot.com" in base_url.lower():
+        if base_url_host_matches(base_url, "api.kimi.com"):
+            headers["User-Agent"] = "claude-code/0.1.0"
+        elif base_url_host_matches(base_url, "api.githubcopilot.com"):
            from hermes_cli.models import copilot_default_headers

            headers.update(copilot_default_headers())
@@ -1998,24 +2024,35 @@ def resolve_vision_provider_client(
        #      _PROVIDER_VISION_MODELS provides per-provider vision model
        #      overrides when the provider has a dedicated multimodal model
        #      that differs from the chat model (e.g. xiaomi → mimo-v2-omni,
-        #      zai → glm-5v-turbo).
+        #      zai → glm-5v-turbo). Nous is the exception: it has a dedicated
+        #      strict vision backend with tier-aware defaults, so it must not
+        #      fall through to the user's text chat model here.
        #   2. OpenRouter  (vision-capable aggregator fallback)
        #   3. Nous Portal (vision-capable aggregator fallback)
        #   4. Stop
        main_provider = _read_main_provider()
        main_model = _read_main_model()
        if main_provider and main_provider not in ("auto", ""):
-            vision_model = _PROVIDER_VISION_MODELS.get(main_provider, main_model)
-            rpc_client, rpc_model = resolve_provider_client(
-                main_provider, vision_model,
-                api_mode=resolved_api_mode)
-            if rpc_client is not None:
-                logger.info(
-                    "Vision auto-detect: using main provider %s (%s)",
-                    main_provider, rpc_model or vision_model,
-                )
-                return _finalize(
-                    main_provider, rpc_client, rpc_model or vision_model)
+            if main_provider == "nous":
+                sync_client, default_model = _resolve_strict_vision_backend(main_provider)
+                if sync_client is not None:
+                    logger.info(
+                        "Vision auto-detect: using main provider %s (%s)",
+                        main_provider, default_model or resolved_model or main_model,
+                    )
+                    return _finalize(main_provider, sync_client, default_model)
+            else:
+                vision_model = _PROVIDER_VISION_MODELS.get(main_provider, main_model)
+                rpc_client, rpc_model = resolve_provider_client(
+                    main_provider, vision_model,
+                    api_mode=resolved_api_mode)
+                if rpc_client is not None:
+                    logger.info(
+                        "Vision auto-detect: using main provider %s (%s)",
+                        main_provider, rpc_model or vision_model,
+                    )
+                    return _finalize(
+                        main_provider, rpc_client, rpc_model or vision_model)

        # Fall back through aggregators (uses their dedicated vision model,
        # not the user's main model) when main provider has no client.
@@ -2062,7 +2099,7 @@ def auxiliary_max_tokens_param(value: int) -> dict:
    # Only use max_completion_tokens for direct OpenAI custom endpoints
    if (not or_key
            and _read_nous_auth() is None
-            and "api.openai.com" in custom_base.lower()):
+            and base_url_hostname(custom_base) == "api.openai.com"):
        return {"max_completion_tokens": value}
    return {"max_tokens": value}

@@ -2090,6 +2127,76 @@ _client_cache_lock = threading.Lock()
 _CLIENT_CACHE_MAX_SIZE = 64  # safety belt — evict oldest when exceeded


+def _client_cache_key(
+    provider: str,
+    *,
+    async_mode: bool,
+    base_url: Optional[str] = None,
+    api_key: Optional[str] = None,
+    api_mode: Optional[str] = None,
+    main_runtime: Optional[Dict[str, Any]] = None,
+) -> tuple:
+    runtime = _normalize_main_runtime(main_runtime)
+    runtime_key = tuple(runtime.get(field, "") for field in _MAIN_RUNTIME_FIELDS) if provider == "auto" else ()
+    return (provider, async_mode, base_url or "", api_key or "", api_mode or "", runtime_key)
+
+
+def _store_cached_client(cache_key: tuple, client: Any, default_model: Optional[str], *, bound_loop: Any = None) -> None:
+    with _client_cache_lock:
+        old_entry = _client_cache.get(cache_key)
+        if old_entry is not None and old_entry[0] is not client:
+            _force_close_async_httpx(old_entry[0])
+            try:
+                close_fn = getattr(old_entry[0], "close", None)
+                if callable(close_fn):
+                    close_fn()
+            except Exception:
+                pass
+        _client_cache[cache_key] = (client, default_model, bound_loop)
+
+
+def _refresh_nous_auxiliary_client(
+    *,
+    cache_provider: str,
+    model: Optional[str],
+    async_mode: bool,
+    base_url: Optional[str] = None,
+    api_key: Optional[str] = None,
+    api_mode: Optional[str] = None,
+    main_runtime: Optional[Dict[str, Any]] = None,
+) -> Tuple[Optional[Any], Optional[str]]:
+    """Refresh Nous runtime creds, rebuild the client, and replace the cache entry."""
+    runtime = _resolve_nous_runtime_api(force_refresh=True)
+    if runtime is None:
+        return None, model
+
+    fresh_key, fresh_base_url = runtime
+    sync_client = OpenAI(api_key=fresh_key, base_url=fresh_base_url)
+    final_model = model
+
+    current_loop = None
+    if async_mode:
+        try:
+            import asyncio as _aio
+            current_loop = _aio.get_event_loop()
+        except RuntimeError:
+            pass
+        client, final_model = _to_async_client(sync_client, final_model or "")
+    else:
+        client = sync_client
+
+    cache_key = _client_cache_key(
+        cache_provider,
+        async_mode=async_mode,
+        base_url=base_url,
+        api_key=api_key,
+        api_mode=api_mode,
+        main_runtime=main_runtime,
+    )
+    _store_cached_client(cache_key, client, final_model, bound_loop=current_loop)
+    return client, final_model
+
+
 def neuter_async_httpx_del() -> None:
    """Monkey-patch ``AsyncHttpxClientWrapper.__del__`` to be a no-op.

@@ -2191,7 +2298,7 @@ def cleanup_stale_async_clients() -> None:

 def _is_openrouter_client(client: Any) -> bool:
    for obj in (client, getattr(client, "_client", None), getattr(client, "client", None)):
-        if obj and "openrouter" in str(getattr(obj, "base_url", "") or "").lower():
+        if obj and base_url_host_matches(str(getattr(obj, "base_url", "") or ""), "openrouter.ai"):
            return True
    return False

@@ -2243,8 +2350,14 @@ def _get_cached_client(
        except RuntimeError:
            pass
    runtime = _normalize_main_runtime(main_runtime)
-    runtime_key = tuple(runtime.get(field, "") for field in _MAIN_RUNTIME_FIELDS) if provider == "auto" else ()
-    cache_key = (provider, async_mode, base_url or "", api_key or "", api_mode or "", runtime_key)
+    cache_key = _client_cache_key(
+        provider,
+        async_mode=async_mode,
+        base_url=base_url,
+        api_key=api_key,
+        api_mode=api_mode,
+        main_runtime=main_runtime,
+    )
    with _client_cache_lock:
        if cache_key in _client_cache:
            cached_client, cached_default, cached_loop = _client_cache[cache_key]
@@ -2475,7 +2588,9 @@ def _build_call_kwargs(
    }

    fixed_temperature = _fixed_temperature_for_model(model, base_url)
-    if fixed_temperature is not None:
+    if fixed_temperature is OMIT_TEMPERATURE:
+        temperature = None  # strip — let server choose
+    elif fixed_temperature is not None:
        temperature = fixed_temperature

    # Opus 4.7+ rejects any non-default temperature/top_p/top_k — silently
@@ -2495,7 +2610,7 @@ def _build_call_kwargs(
        # Direct OpenAI api.openai.com with newer models needs max_completion_tokens.
        if provider == "custom":
            custom_base = base_url or _current_custom_base_url()
-            if "api.openai.com" in custom_base.lower():
+            if base_url_hostname(custom_base) == "api.openai.com":
                kwargs["max_completion_tokens"] = max_tokens
            else:
                kwargs["max_tokens"] = max_tokens
@@ -2690,6 +2805,29 @@ def call_llm(
                    raise
                first_err = retry_err

+        # ── Nous auth refresh parity with main agent ──────────────────
+        client_is_nous = (
+            resolved_provider == "nous"
+            or base_url_host_matches(_base_info, "inference-api.nousresearch.com")
+        )
+        if _is_auth_error(first_err) and client_is_nous:
+            refreshed_client, refreshed_model = _refresh_nous_auxiliary_client(
+                cache_provider=resolved_provider or "nous",
+                model=final_model,
+                async_mode=False,
+                base_url=resolved_base_url,
+                api_key=resolved_api_key,
+                api_mode=resolved_api_mode,
+                main_runtime=main_runtime,
+            )
+            if refreshed_client is not None:
+                logger.info("Auxiliary %s: refreshed Nous runtime credentials after 401, retrying",
+                            task or "call")
+                if refreshed_model and refreshed_model != kwargs.get("model"):
+                    kwargs["model"] = refreshed_model
+                return _validate_llm_response(
+                    refreshed_client.chat.completions.create(**kwargs), task)
+
        # ── Payment / credit exhaustion fallback ──────────────────────
        # When the resolved provider returns 402 or a credit-related error,
        # try alternative providers instead of giving up.  This handles the
@@ -2888,6 +3026,28 @@ async def async_call_llm(
                    raise
                first_err = retry_err

+        # ── Nous auth refresh parity with main agent ──────────────────
+        client_is_nous = (
+            resolved_provider == "nous"
+            or base_url_host_matches(_client_base, "inference-api.nousresearch.com")
+        )
+        if _is_auth_error(first_err) and client_is_nous:
+            refreshed_client, refreshed_model = _refresh_nous_auxiliary_client(
+                cache_provider=resolved_provider or "nous",
+                model=final_model,
+                async_mode=True,
+                base_url=resolved_base_url,
+                api_key=resolved_api_key,
+                api_mode=resolved_api_mode,
+            )
+            if refreshed_client is not None:
+                logger.info("Auxiliary %s (async): refreshed Nous runtime credentials after 401, retrying",
+                            task or "call")
+                if refreshed_model and refreshed_model != kwargs.get("model"):
+                    kwargs["model"] = refreshed_model
+                return _validate_llm_response(
+                    await refreshed_client.chat.completions.create(**kwargs), task)
+
        # ── Payment / connection fallback (mirrors sync call_llm) ─────
        should_fallback = _is_payment_error(first_err) or _is_connection_error(first_err)
        is_auto = resolved_provider in ("auto", "", None)
@@ -22,6 +22,98 @@ from agent.prompt_builder import DEFAULT_AGENT_IDENTITY

 logger = logging.getLogger(__name__)

+
+# ---------------------------------------------------------------------------
+# Multimodal content helpers
+# ---------------------------------------------------------------------------
+
+def _chat_content_to_responses_parts(content: Any) -> List[Dict[str, Any]]:
+    """Convert chat-style multimodal content to Responses API input parts.
+
+    Input:  ``[{"type":"text"|"image_url", ...}]`` (native OpenAI Chat format)
+    Output: ``[{"type":"input_text"|"input_image", ...}]`` (Responses format)
+
+    Returns an empty list when ``content`` is not a list or contains no
+    recognized parts — callers fall back to the string path.
+    """
+    if not isinstance(content, list):
+        return []
+    converted: List[Dict[str, Any]] = []
+    for part in content:
+        if isinstance(part, str):
+            if part:
+                converted.append({"type": "input_text", "text": part})
+            continue
+        if not isinstance(part, dict):
+            continue
+        ptype = str(part.get("type") or "").strip().lower()
+        if ptype in {"text", "input_text", "output_text"}:
+            text = part.get("text")
+            if isinstance(text, str) and text:
+                converted.append({"type": "input_text", "text": text})
+            continue
+        if ptype in {"image_url", "input_image"}:
+            image_ref = part.get("image_url")
+            detail = part.get("detail")
+            if isinstance(image_ref, dict):
+                url = image_ref.get("url")
+                detail = image_ref.get("detail", detail)
+            else:
+                url = image_ref
+            if not isinstance(url, str) or not url:
+                continue
+            image_part: Dict[str, Any] = {"type": "input_image", "image_url": url}
+            if isinstance(detail, str) and detail.strip():
+                image_part["detail"] = detail.strip()
+            converted.append(image_part)
+    return converted
+
+
+def _summarize_user_message_for_log(content: Any) -> str:
+    """Return a short text summary of a user message for logging/trajectory.
+
+    Multimodal messages arrive as a list of ``{type:"text"|"image_url", ...}``
+    parts from the API server.  Logging, spinner previews, and trajectory
+    files all want a plain string — this helper extracts the first chunk of
+    text and notes any attached images.  Returns an empty string for empty
+    lists and ``str(content)`` for unexpected scalar types.
+    """
+    if content is None:
+        return ""
+    if isinstance(content, str):
+        return content
+    if isinstance(content, list):
+        text_bits: List[str] = []
+        image_count = 0
+        for part in content:
+            if isinstance(part, str):
+                if part:
+                    text_bits.append(part)
+                continue
+            if not isinstance(part, dict):
+                continue
+            ptype = str(part.get("type") or "").strip().lower()
+            if ptype in {"text", "input_text", "output_text"}:
+                text = part.get("text")
+                if isinstance(text, str) and text:
+                    text_bits.append(text)
+            elif ptype in {"image_url", "input_image"}:
+                image_count += 1
+        summary = " ".join(text_bits).strip()
+        if image_count:
+            note = f"[{image_count} image{'s' if image_count != 1 else ''}]"
+            summary = f"{note} {summary}" if summary else note
+        return summary
+    try:
+        return str(content)
+    except Exception:
+        return ""
+
+
+# ---------------------------------------------------------------------------
+# ID helpers
+# ---------------------------------------------------------------------------
+
 def _deterministic_call_id(fn_name: str, arguments: str, index: int = 0) -> str:
    """Generate a deterministic call_id from tool call content.

@@ -80,14 +172,17 @@ def _derive_responses_function_call_id(
    return f"fc_{digest}"


+# ---------------------------------------------------------------------------
+# Schema conversion
+# ---------------------------------------------------------------------------
+
 def _responses_tools(tools: Optional[List[Dict[str, Any]]] = None) -> Optional[List[Dict[str, Any]]]:
    """Convert chat-completions tool schemas to Responses function-tool schemas."""
-    source_tools = tools
-    if not source_tools:
+    if not tools:
        return None

    converted: List[Dict[str, Any]] = []
-    for item in source_tools:
+    for item in tools:
        fn = item.get("function", {}) if isinstance(item, dict) else {}
        name = fn.get("name")
        if not isinstance(name, str) or not name.strip():
@@ -102,6 +197,10 @@ def _responses_tools(tools: Optional[List[Dict[str, Any]]] = None) -> Optional[L
    return converted or None


+# ---------------------------------------------------------------------------
+# Message format conversion
+# ---------------------------------------------------------------------------
+
 def _chat_messages_to_responses_input(messages: List[Dict[str, Any]]) -> List[Dict[str, Any]]:
    """Convert internal chat-style messages to Responses input items."""
    items: List[Dict[str, Any]] = []
@@ -116,7 +215,14 @@ def _chat_messages_to_responses_input(messages: List[Dict[str, Any]]) -> List[Di

        if role in {"user", "assistant"}:
            content = msg.get("content", "")
-            content_text = str(content) if content is not None else ""
+            if isinstance(content, list):
+                content_parts = _chat_content_to_responses_parts(content)
+                content_text = "".join(
+                    p.get("text", "") for p in content_parts if p.get("type") == "input_text"
+                )
+            else:
+                content_parts = []
+                content_text = str(content) if content is not None else ""

            if role == "assistant":
                # Replay encrypted reasoning items from previous turns
@@ -139,7 +245,9 @@ def _chat_messages_to_responses_input(messages: List[Dict[str, Any]]) -> List[Di
                                seen_item_ids.add(item_id)
                            has_codex_reasoning = True

-                if content_text.strip():
+                if content_parts:
+                    items.append({"role": "assistant", "content": content_parts})
+                elif content_text.strip():
                    items.append({"role": "assistant", "content": content_text})
                elif has_codex_reasoning:
                    # The Responses API requires a following item after each
@@ -192,7 +300,12 @@ def _chat_messages_to_responses_input(messages: List[Dict[str, Any]]) -> List[Di
                        })
                continue

-            items.append({"role": role, "content": content_text})
+            # Non-assistant (user) role: emit multimodal parts when present,
+            # otherwise fall back to the text payload.
+            if content_parts:
+                items.append({"role": role, "content": content_parts})
+            else:
+                items.append({"role": role, "content": content_text})
            continue

        if role == "tool":
@@ -212,6 +325,10 @@ def _chat_messages_to_responses_input(messages: List[Dict[str, Any]]) -> List[Di
    return items


+# ---------------------------------------------------------------------------
+# Input preflight / validation
+# ---------------------------------------------------------------------------
+
 def _preflight_codex_input_items(raw_items: Any) -> List[Dict[str, Any]]:
    if not isinstance(raw_items, list):
        raise ValueError("Codex Responses input must be a list of input items.")
@@ -293,6 +410,46 @@ def _preflight_codex_input_items(raw_items: Any) -> List[Dict[str, Any]]:
            content = item.get("content", "")
            if content is None:
                content = ""
+            if isinstance(content, list):
+                # Multimodal content from ``_chat_messages_to_responses_input``
+                # is already in Responses format (``input_text`` / ``input_image``).
+                # Validate each part and pass through.
+                validated: List[Dict[str, Any]] = []
+                for part_idx, part in enumerate(content):
+                    if isinstance(part, str):
+                        if part:
+                            validated.append({"type": "input_text", "text": part})
+                        continue
+                    if not isinstance(part, dict):
+                        raise ValueError(
+                            f"Codex Responses input[{idx}].content[{part_idx}] must be an object or string."
+                        )
+                    ptype = str(part.get("type") or "").strip().lower()
+                    if ptype in {"input_text", "text", "output_text"}:
+                        text = part.get("text", "")
+                        if not isinstance(text, str):
+                            text = str(text or "")
+                        validated.append({"type": "input_text", "text": text})
+                    elif ptype in {"input_image", "image_url"}:
+                        image_ref = part.get("image_url", "")
+                        detail = part.get("detail")
+                        if isinstance(image_ref, dict):
+                            url = image_ref.get("url", "")
+                            detail = image_ref.get("detail", detail)
+                        else:
+                            url = image_ref
+                        if not isinstance(url, str):
+                            url = str(url or "")
+                        image_part: Dict[str, Any] = {"type": "input_image", "image_url": url}
+                        if isinstance(detail, str) and detail.strip():
+                            image_part["detail"] = detail.strip()
+                        validated.append(image_part)
+                    else:
+                        raise ValueError(
+                            f"Codex Responses input[{idx}].content[{part_idx}] has unsupported type {part.get('type')!r}."
+                        )
+                normalized.append({"role": role, "content": validated})
+                continue
            if not isinstance(content, str):
                content = str(content)

@@ -449,6 +606,10 @@ def _preflight_codex_api_kwargs(
    return normalized


+# ---------------------------------------------------------------------------
+# Response extraction helpers
+# ---------------------------------------------------------------------------
+
 def _extract_responses_message_text(item: Any) -> str:
    """Extract assistant text from a Responses message output item."""
    content = getattr(item, "content", None)
@@ -483,6 +644,10 @@ def _extract_responses_reasoning_text(item: Any) -> str:
    return ""


+# ---------------------------------------------------------------------------
+# Full response normalization
+# ---------------------------------------------------------------------------
+
 def _normalize_codex_response(response: Any) -> tuple[Any, str]:
    """Normalize a Responses API object to an assistant_message-like object."""
    output = getattr(response, "output", None)
@@ -646,5 +811,3 @@ def _normalize_codex_response(response: Any) -> tuple[Any, str]:
    else:
        finish_reason = "stop"
    return assistant_message, finish_reason
-
-
@@ -31,6 +31,7 @@ from agent.model_metadata import (
    get_model_context_length,
    estimate_messages_tokens_rough,
 )
+from agent.redact import redact_sensitive_text

 logger = logging.getLogger(__name__)

@@ -550,11 +551,15 @@ class ContextCompressor(ContextEngine):
        Includes tool call arguments and result content (up to
        ``_CONTENT_MAX`` chars per message) so the summarizer can preserve
        specific details like file paths, commands, and outputs.
+
+        All content is redacted before serialization to prevent secrets
+        (API keys, tokens, passwords) from leaking into the summary that
+        gets sent to the auxiliary model and persisted across compactions.
        """
        parts = []
        for msg in turns:
            role = msg.get("role", "unknown")
-            content = msg.get("content") or ""
+            content = redact_sensitive_text(msg.get("content") or "")

            # Tool results: keep enough content for the summarizer
            if role == "tool":
@@ -575,7 +580,7 @@ class ContextCompressor(ContextEngine):
                        if isinstance(tc, dict):
                            fn = tc.get("function", {})
                            name = fn.get("name", "?")
-                            args = fn.get("arguments", "")
+                            args = redact_sensitive_text(fn.get("arguments", ""))
                            # Truncate long arguments but keep enough for context
                            if len(args) > self._TOOL_ARGS_MAX:
                                args = args[:self._TOOL_ARGS_HEAD] + "..."
@@ -635,7 +640,11 @@ class ContextCompressor(ContextEngine):
            "only output the structured summary. "
            "Do NOT include any preamble, greeting, or prefix. "
            "Write the summary in the same language the user was using in the "
-            "conversation — do not translate or switch to English."
+            "conversation — do not translate or switch to English. "
+            "NEVER include API keys, tokens, passwords, secrets, credentials, "
+            "or connection strings in the summary — replace any that appear "
+            "with [REDACTED]. Note that the user had credentials present, but "
+            "do not preserve their values."
        )

        # Shared structured template (used by both paths).
@@ -692,7 +701,7 @@ Be specific with file paths, commands, line numbers, and results.]
 [What remains to be done — framed as context, not instructions]

 ## Critical Context
-[Any specific values, error messages, configuration details, or data that would be lost without explicit preservation]
+[Any specific values, error messages, configuration details, or data that would be lost without explicit preservation. NEVER include API keys, tokens, passwords, or credentials — write [REDACTED] instead.]

 Target ~{summary_budget} tokens. Be CONCRETE — include file paths, command outputs, error messages, line numbers, and specific values. Avoid vague descriptions like "made some changes" — say exactly what changed.

@@ -732,7 +741,7 @@ Use this exact structure:
            prompt += f"""

 FOCUS TOPIC: "{focus_topic}"
-The user has requested that this compaction PRIORITISE preserving all information related to the focus topic above. For content related to "{focus_topic}", include full detail — exact values, file paths, command outputs, error messages, and decisions. For content NOT related to the focus topic, summarise more aggressively (brief one-liners or omit if truly irrelevant). The focus topic sections should receive roughly 60-70% of the summary token budget."""
+The user has requested that this compaction PRIORITISE preserving all information related to the focus topic above. For content related to "{focus_topic}", include full detail — exact values, file paths, command outputs, error messages, and decisions. For content NOT related to the focus topic, summarise more aggressively (brief one-liners or omit if truly irrelevant). The focus topic sections should receive roughly 60-70% of the summary token budget. Even for the focus topic, NEVER preserve API keys, tokens, passwords, or credentials — use [REDACTED]."""

        try:
            call_kwargs = {
@@ -755,7 +764,9 @@ The user has requested that this compaction PRIORITISE preserving all informatio
            # Handle cases where content is not a string (e.g., dict from llama.cpp)
            if not isinstance(content, str):
                content = str(content) if content else ""
-            summary = content.strip()
+            # Redact the summary output as well — the summarizer LLM may
+            # ignore prompt instructions and echo back secrets verbatim.
+            summary = redact_sensitive_text(content.strip())
            # Store for iterative updates on next compaction
            self._previous_summary = summary
            self._summary_failure_cooldown_until = 0.0
@@ -796,7 +807,7 @@ The user has requested that this compaction PRIORITISE preserving all informatio
                )
                self.summary_model = ""  # empty = use main model
                self._summary_failure_cooldown_until = 0.0  # no cooldown
-                return self._generate_summary(messages, summary_budget)  # retry immediately
+                return self._generate_summary(turns_to_summarize)  # retry immediately

            # Transient errors (timeout, rate limit, network) — shorter cooldown
            _transient_cooldown = 60
@@ -21,6 +21,9 @@ from pathlib import Path
 from types import SimpleNamespace
 from typing import Any

+from agent.file_safety import get_read_block_error, is_write_denied
+from agent.redact import redact_sensitive_text
+
 ACP_MARKER_BASE_URL = "acp://copilot"
 _DEFAULT_TIMEOUT_SECONDS = 900.0

@@ -54,6 +57,18 @@ def _jsonrpc_error(message_id: Any, code: int, message: str) -> dict[str, Any]:
    }


+def _permission_denied(message_id: Any) -> dict[str, Any]:
+    return {
+        "jsonrpc": "2.0",
+        "id": message_id,
+        "result": {
+            "outcome": {
+                "outcome": "cancelled",
+            }
+        },
+    }
+
+
 def _format_messages_as_prompt(
    messages: list[dict[str, Any]],
    model: str | None = None,
@@ -386,6 +401,8 @@ class CopilotACPClient:
        stderr_tail: deque[str] = deque(maxlen=40)

        def _stdout_reader() -> None:
+            if proc.stdout is None:
+                return
            for line in proc.stdout:
                try:
                    inbox.put(json.loads(line))
@@ -533,18 +550,13 @@ class CopilotACPClient:
        params = msg.get("params") or {}

        if method == "session/request_permission":
-            response = {
-                "jsonrpc": "2.0",
-                "id": message_id,
-                "result": {
-                    "outcome": {
-                        "outcome": "allow_once",
-                    }
-                },
-            }
+            response = _permission_denied(message_id)
        elif method == "fs/read_text_file":
            try:
                path = _ensure_path_within_cwd(str(params.get("path") or ""), cwd)
+                block_error = get_read_block_error(str(path))
+                if block_error:
+                    raise PermissionError(block_error)
                content = path.read_text() if path.exists() else ""
                line = params.get("line")
                limit = params.get("limit")
@@ -553,6 +565,8 @@ class CopilotACPClient:
                    start = line - 1
                    end = start + limit if isinstance(limit, int) and limit > 0 else None
                    content = "".join(lines[start:end])
+                if content:
+                    content = redact_sensitive_text(content)
                response = {
                    "jsonrpc": "2.0",
                    "id": message_id,
@@ -565,6 +579,10 @@ class CopilotACPClient:
        elif method == "fs/write_text_file":
            try:
                path = _ensure_path_within_cwd(str(params.get("path") or ""), cwd)
+                if is_write_denied(str(path)):
+                    raise PermissionError(
+                        f"Write denied: '{path}' is a protected system/credential file."
+                    )
                path.parent.mkdir(parents=True, exist_ok=True)
                path.write_text(str(params.get("content") or ""))
                response = {
@@ -983,6 +983,14 @@ def _seed_from_singletons(provider: str, entries: List[PooledCredential]) -> Tup
    active_sources: Set[str] = set()
    auth_store = _load_auth_store()

+    # Shared suppression gate — used at every upsert site so
+    # `hermes auth remove <provider> <N>` is stable across all source types.
+    try:
+        from hermes_cli.auth import is_source_suppressed as _is_suppressed
+    except ImportError:
+        def _is_suppressed(_p, _s):  # type: ignore[misc]
+            return False
+
    if provider == "anthropic":
        # Only auto-discover external credentials (Claude Code, Hermes PKCE)
        # when the user has explicitly configured anthropic as their provider.
@@ -1002,13 +1010,8 @@ def _seed_from_singletons(provider: str, entries: List[PooledCredential]) -> Tup
            ("claude_code", read_claude_code_credentials()),
        ):
            if creds and creds.get("accessToken"):
-                # Check if user explicitly removed this source
-                try:
-                    from hermes_cli.auth import is_source_suppressed
-                    if is_source_suppressed(provider, source_name):
-                        continue
-                except ImportError:
-                    pass
+                if _is_suppressed(provider, source_name):
+                    continue
                active_sources.add(source_name)
                changed |= _upsert_entry(
                    entries,
@@ -1026,7 +1029,7 @@ def _seed_from_singletons(provider: str, entries: List[PooledCredential]) -> Tup

    elif provider == "nous":
        state = _load_provider_state(auth_store, "nous")
-        if state:
+        if state and not _is_suppressed(provider, "device_code"):
            active_sources.add("device_code")
            # Prefer a user-supplied label embedded in the singleton state
            # (set by persist_nous_credentials(label=...) when the user ran
@@ -1067,20 +1070,21 @@ def _seed_from_singletons(provider: str, entries: List[PooledCredential]) -> Tup
            token, source = resolve_copilot_token()
            if token:
                source_name = "gh_cli" if "gh" in source.lower() else f"env:{source}"
-                active_sources.add(source_name)
-                pconfig = PROVIDER_REGISTRY.get(provider)
-                changed |= _upsert_entry(
-                    entries,
-                    provider,
-                    source_name,
-                    {
-                        "source": source_name,
-                        "auth_type": AUTH_TYPE_API_KEY,
-                        "access_token": token,
-                        "base_url": pconfig.inference_base_url if pconfig else "",
-                        "label": source,
-                    },
-                )
+                if not _is_suppressed(provider, source_name):
+                    active_sources.add(source_name)
+                    pconfig = PROVIDER_REGISTRY.get(provider)
+                    changed |= _upsert_entry(
+                        entries,
+                        provider,
+                        source_name,
+                        {
+                            "source": source_name,
+                            "auth_type": AUTH_TYPE_API_KEY,
+                            "access_token": token,
+                            "base_url": pconfig.inference_base_url if pconfig else "",
+                            "label": source,
+                        },
+                    )
        except Exception as exc:
            logger.debug("Copilot token seed failed: %s", exc)

@@ -1096,20 +1100,21 @@ def _seed_from_singletons(provider: str, entries: List[PooledCredential]) -> Tup
            token = creds.get("api_key", "")
            if token:
                source_name = creds.get("source", "qwen-cli")
-                active_sources.add(source_name)
-                changed |= _upsert_entry(
-                    entries,
-                    provider,
-                    source_name,
-                    {
-                        "source": source_name,
-                        "auth_type": AUTH_TYPE_OAUTH,
-                        "access_token": token,
-                        "expires_at_ms": creds.get("expires_at_ms"),
-                        "base_url": creds.get("base_url", ""),
-                        "label": creds.get("auth_file", source_name),
-                    },
-                )
+                if not _is_suppressed(provider, source_name):
+                    active_sources.add(source_name)
+                    changed |= _upsert_entry(
+                        entries,
+                        provider,
+                        source_name,
+                        {
+                            "source": source_name,
+                            "auth_type": AUTH_TYPE_OAUTH,
+                            "access_token": token,
+                            "expires_at_ms": creds.get("expires_at_ms"),
+                            "base_url": creds.get("base_url", ""),
+                            "label": creds.get("auth_file", source_name),
+                        },
+                    )
        except Exception as exc:
            logger.debug("Qwen OAuth token seed failed: %s", exc)

@@ -1118,13 +1123,7 @@ def _seed_from_singletons(provider: str, entries: List[PooledCredential]) -> Tup
        # the device_code source as suppressed so it won't be re-seeded from
        # the Hermes auth store.  Without this gate the removal is instantly
        # undone on the next load_pool() call.
-        codex_suppressed = False
-        try:
-            from hermes_cli.auth import is_source_suppressed
-            codex_suppressed = is_source_suppressed(provider, "device_code")
-        except ImportError:
-            pass
-        if codex_suppressed:
+        if _is_suppressed(provider, "device_code"):
            return changed, active_sources

        state = _load_provider_state(auth_store, "openai-codex")
@@ -1158,10 +1157,22 @@ def _seed_from_singletons(provider: str, entries: List[PooledCredential]) -> Tup
 def _seed_from_env(provider: str, entries: List[PooledCredential]) -> Tuple[bool, Set[str]]:
    changed = False
    active_sources: Set[str] = set()
+    # Honour user suppression — `hermes auth remove <provider> <N>` for an
+    # env-seeded credential marks the env:<VAR> source as suppressed so it
+    # won't be re-seeded from the user's shell environment or ~/.hermes/.env.
+    # Without this gate the removal is silently undone on the next
+    # load_pool() call whenever the var is still exported by the shell.
+    try:
+        from hermes_cli.auth import is_source_suppressed as _is_source_suppressed
+    except ImportError:
+        def _is_source_suppressed(_p, _s):  # type: ignore[misc]
+            return False
    if provider == "openrouter":
        token = os.getenv("OPENROUTER_API_KEY", "").strip()
        if token:
            source = "env:OPENROUTER_API_KEY"
+            if _is_source_suppressed(provider, source):
+                return changed, active_sources
            active_sources.add(source)
            changed |= _upsert_entry(
                entries,
@@ -1198,6 +1209,8 @@ def _seed_from_env(provider: str, entries: List[PooledCredential]) -> Tuple[bool
        if not token:
            continue
        source = f"env:{env_var}"
+        if _is_source_suppressed(provider, source):
+            continue
        active_sources.add(source)
        auth_type = AUTH_TYPE_OAUTH if provider == "anthropic" and not token.startswith("sk-ant-api") else AUTH_TYPE_API_KEY
        base_url = env_url or pconfig.inference_base_url
@@ -1242,6 +1255,13 @@ def _seed_custom_pool(pool_key: str, entries: List[PooledCredential]) -> Tuple[b
    changed = False
    active_sources: Set[str] = set()

+    # Shared suppression gate — same pattern as _seed_from_env/_seed_from_singletons.
+    try:
+        from hermes_cli.auth import is_source_suppressed as _is_suppressed
+    except ImportError:
+        def _is_suppressed(_p, _s):  # type: ignore[misc]
+            return False
+
    # Seed from the custom_providers config entry's api_key field
    cp_config = _get_custom_provider_config(pool_key)
    if cp_config:
@@ -1250,19 +1270,20 @@ def _seed_custom_pool(pool_key: str, entries: List[PooledCredential]) -> Tuple[b
        name = str(cp_config.get("name") or "").strip()
        if api_key:
            source = f"config:{name}"
-            active_sources.add(source)
-            changed |= _upsert_entry(
-                entries,
-                pool_key,
-                source,
-                {
-                    "source": source,
-                    "auth_type": AUTH_TYPE_API_KEY,
-                    "access_token": api_key,
-                    "base_url": base_url,
-                    "label": name or source,
-                },
-            )
+            if not _is_suppressed(pool_key, source):
+                active_sources.add(source)
+                changed |= _upsert_entry(
+                    entries,
+                    pool_key,
+                    source,
+                    {
+                        "source": source,
+                        "auth_type": AUTH_TYPE_API_KEY,
+                        "access_token": api_key,
+                        "base_url": base_url,
+                        "label": name or source,
+                    },
+                )

    # Seed from model.api_key if model.provider=='custom' and model.base_url matches
    try:
@@ -1282,19 +1303,20 @@ def _seed_custom_pool(pool_key: str, entries: List[PooledCredential]) -> Tuple[b
                matched_key = get_custom_provider_pool_key(model_base_url)
                if matched_key == pool_key:
                    source = "model_config"
-                    active_sources.add(source)
-                    changed |= _upsert_entry(
-                        entries,
-                        pool_key,
-                        source,
-                        {
-                            "source": source,
-                            "auth_type": AUTH_TYPE_API_KEY,
-                            "access_token": model_api_key,
-                            "base_url": model_base_url,
-                            "label": "model_config",
-                        },
-                    )
+                    if not _is_suppressed(pool_key, source):
+                        active_sources.add(source)
+                        changed |= _upsert_entry(
+                            entries,
+                            pool_key,
+                            source,
+                            {
+                                "source": source,
+                                "auth_type": AUTH_TYPE_API_KEY,
+                                "access_token": model_api_key,
+                                "base_url": model_base_url,
+                                "label": "model_config",
+                            },
+                        )
    except Exception:
        pass

@@ -0,0 +1,401 @@
+"""Unified removal contract for every credential source Hermes reads from.
+
+Hermes seeds its credential pool from many places:
+
+    env:<VAR>     — os.environ / ~/.hermes/.env
+    claude_code   — ~/.claude/.credentials.json
+    hermes_pkce   — ~/.hermes/.anthropic_oauth.json
+    device_code   — auth.json providers.<provider> (nous, openai-codex, ...)
+    qwen-cli      — ~/.qwen/oauth_creds.json
+    gh_cli        — gh auth token
+    config:<name> — custom_providers config entry
+    model_config  — model.api_key when model.provider == "custom"
+    manual        — user ran `hermes auth add`
+
+Each source has its own reader inside ``agent.credential_pool._seed_from_*``
+(which keep their existing shape — we haven't restructured them).  What we
+unify here is **removal**:
+
+    ``hermes auth remove <provider> <N>`` must make the pool entry stay gone.
+
+Before this module, every source had an ad-hoc removal branch in
+``auth_remove_command``, and several sources had no branch at all — so
+``auth remove`` silently reverted on the next ``load_pool()`` call for
+qwen-cli, nous device_code (partial), hermes_pkce, copilot gh_cli, and
+custom-config sources.
+
+Now every source registers a ``RemovalStep`` that does exactly three things
+in the same shape:
+
+    1. Clean up whatever externally-readable state the source reads from
+       (.env line, auth.json block, OAuth file, etc.)
+    2. Suppress the ``(provider, source_id)`` in auth.json so the
+       corresponding ``_seed_from_*`` branch skips the upsert on re-load
+    3. Return ``RemovalResult`` describing what was cleaned and any
+       diagnostic hints the user should see (shell-exported env vars,
+       external credential files we deliberately don't delete, etc.)
+
+Adding a new credential source is:
+    - wire up a reader branch in ``_seed_from_*`` (existing pattern)
+    - gate that reader behind ``is_source_suppressed(provider, source_id)``
+    - register a ``RemovalStep`` here
+
+No more per-source if/elif chain in ``auth_remove_command``.
+"""
+
+from __future__ import annotations
+
+import os
+from dataclasses import dataclass, field
+from pathlib import Path
+from typing import Callable, List, Optional
+
+
+@dataclass
+class RemovalResult:
+    """Outcome of removing a credential source.
+
+    Attributes:
+        cleaned: Short strings describing external state that was actually
+            mutated (``"Cleared XAI_API_KEY from .env"``,
+            ``"Cleared openai-codex OAuth tokens from auth store"``).
+            Printed as plain lines to the user.
+        hints: Diagnostic lines ABOUT state the user may need to clean up
+            themselves or is deliberately left intact (shell-exported env
+            var, Claude Code credential file we don't delete, etc.).
+            Printed as plain lines to the user.  Always non-destructive.
+        suppress: Whether to call ``suppress_credential_source`` after
+            cleanup so future ``load_pool`` calls skip this source.
+            Default True — almost every source needs this to stay sticky.
+            The only legitimate False is ``manual`` entries, which aren't
+            seeded from anywhere external.
+    """
+
+    cleaned: List[str] = field(default_factory=list)
+    hints: List[str] = field(default_factory=list)
+    suppress: bool = True
+
+
+@dataclass
+class RemovalStep:
+    """How to remove one specific credential source cleanly.
+
+    Attributes:
+        provider: Provider pool key (``"xai"``, ``"anthropic"``, ``"nous"``, ...).
+            Special value ``"*"`` means "matches any provider" — used for
+            sources like ``manual`` that aren't provider-specific.
+        source_id: Source identifier as it appears in
+            ``PooledCredential.source``.  May be a literal (``"claude_code"``)
+            or a prefix pattern matched via ``match_fn``.
+        match_fn: Optional predicate overriding literal ``source_id``
+            matching.  Gets the removed entry's source string.  Used for
+            ``env:*`` (any env-seeded key), ``config:*`` (any custom
+            pool), and ``manual:*`` (any manual-source variant).
+        remove_fn: ``(provider, removed_entry) -> RemovalResult``.  Does the
+            actual cleanup and returns what happened for the user.
+        description: One-line human-readable description for docs / tests.
+    """
+
+    provider: str
+    source_id: str
+    remove_fn: Callable[..., RemovalResult]
+    match_fn: Optional[Callable[[str], bool]] = None
+    description: str = ""
+
+    def matches(self, provider: str, source: str) -> bool:
+        if self.provider != "*" and self.provider != provider:
+            return False
+        if self.match_fn is not None:
+            return self.match_fn(source)
+        return source == self.source_id
+
+
+_REGISTRY: List[RemovalStep] = []
+
+
+def register(step: RemovalStep) -> RemovalStep:
+    _REGISTRY.append(step)
+    return step
+
+
+def find_removal_step(provider: str, source: str) -> Optional[RemovalStep]:
+    """Return the first matching RemovalStep, or None if unregistered.
+
+    Unregistered sources fall through to the default remove path in
+    ``auth_remove_command``: the pool entry is already gone (that happens
+    before dispatch), no external cleanup, no suppression.  This is the
+    correct behaviour for ``manual`` entries — they were only ever stored
+    in the pool, nothing external to clean up.
+    """
+    for step in _REGISTRY:
+        if step.matches(provider, source):
+            return step
+    return None
+
+
+# ---------------------------------------------------------------------------
+# Individual RemovalStep implementations — one per source.
+# ---------------------------------------------------------------------------
+# Each remove_fn is intentionally small and single-purpose.  Adding a new
+# credential source means adding ONE entry here — no other changes to
+# auth_remove_command.
+
+
+def _remove_env_source(provider: str, removed) -> RemovalResult:
+    """env:<VAR> — the most common case.
+
+    Handles three user situations:
+      1. Var lives only in ~/.hermes/.env  → clear it
+      2. Var lives only in the user's shell (shell profile, systemd
+         EnvironmentFile, launchd plist) → hint them where to unset it
+      3. Var lives in both → clear from .env, hint about shell
+    """
+    from hermes_cli.config import get_env_path, remove_env_value
+
+    result = RemovalResult()
+    env_var = removed.source[len("env:"):]
+    if not env_var:
+        return result
+
+    # Detect shell vs .env BEFORE remove_env_value pops os.environ.
+    env_in_process = bool(os.getenv(env_var))
+    env_in_dotenv = False
+    try:
+        env_path = get_env_path()
+        if env_path.exists():
+            env_in_dotenv = any(
+                line.strip().startswith(f"{env_var}=")
+                for line in env_path.read_text(errors="replace").splitlines()
+            )
+    except OSError:
+        pass
+    shell_exported = env_in_process and not env_in_dotenv
+
+    cleared = remove_env_value(env_var)
+    if cleared:
+        result.cleaned.append(f"Cleared {env_var} from .env")
+
+    if shell_exported:
+        result.hints.extend([
+            f"Note: {env_var} is still set in your shell environment "
+            f"(not in ~/.hermes/.env).",
+            "  Unset it there (shell profile, systemd EnvironmentFile, "
+            "launchd plist, etc.) or it will keep being visible to Hermes.",
+            f"  The pool entry is now suppressed — Hermes will ignore "
+            f"{env_var} until you run `hermes auth add {provider}`.",
+        ])
+    else:
+        result.hints.append(
+            f"Suppressed env:{env_var} — it will not be re-seeded even "
+            f"if the variable is re-exported later."
+        )
+    return result
+
+
+def _remove_claude_code(provider: str, removed) -> RemovalResult:
+    """~/.claude/.credentials.json is owned by Claude Code itself.
+
+    We don't delete it — the user's Claude Code install still needs to
+    work.  We just suppress it so Hermes stops reading it.
+    """
+    return RemovalResult(hints=[
+        "Suppressed claude_code credential — it will not be re-seeded.",
+        "Note: Claude Code credentials still live in ~/.claude/.credentials.json",
+        "Run `hermes auth add anthropic` to re-enable if needed.",
+    ])
+
+
+def _remove_hermes_pkce(provider: str, removed) -> RemovalResult:
+    """~/.hermes/.anthropic_oauth.json is ours — delete it outright."""
+    from hermes_constants import get_hermes_home
+
+    result = RemovalResult()
+    oauth_file = get_hermes_home() / ".anthropic_oauth.json"
+    if oauth_file.exists():
+        try:
+            oauth_file.unlink()
+            result.cleaned.append("Cleared Hermes Anthropic OAuth credentials")
+        except OSError as exc:
+            result.hints.append(f"Could not delete {oauth_file}: {exc}")
+    return result
+
+
+def _clear_auth_store_provider(provider: str) -> bool:
+    """Delete auth_store.providers[provider].  Returns True if deleted."""
+    from hermes_cli.auth import (
+        _auth_store_lock,
+        _load_auth_store,
+        _save_auth_store,
+    )
+
+    with _auth_store_lock():
+        auth_store = _load_auth_store()
+        providers_dict = auth_store.get("providers")
+        if isinstance(providers_dict, dict) and provider in providers_dict:
+            del providers_dict[provider]
+            _save_auth_store(auth_store)
+            return True
+    return False
+
+
+def _remove_nous_device_code(provider: str, removed) -> RemovalResult:
+    """Nous OAuth lives in auth.json providers.nous — clear it and suppress.
+
+    We suppress in addition to clearing because nothing else stops the
+    user's next `hermes login` run from writing providers.nous again
+    before they decide to.  Suppression forces them to go through
+    `hermes auth add nous` to re-engage, which is the documented re-add
+    path and clears the suppression atomically.
+    """
+    result = RemovalResult()
+    if _clear_auth_store_provider(provider):
+        result.cleaned.append(f"Cleared {provider} OAuth tokens from auth store")
+    return result
+
+
+def _remove_codex_device_code(provider: str, removed) -> RemovalResult:
+    """Codex tokens live in TWO places: our auth store AND ~/.codex/auth.json.
+
+    refresh_codex_oauth_pure() writes both every time, so clearing only
+    the Hermes auth store is not enough — _seed_from_singletons() would
+    re-import from ~/.codex/auth.json on the next load_pool() call and
+    the removal would be instantly undone.  We suppress instead of
+    deleting Codex CLI's file, so the Codex CLI itself keeps working.
+
+    The canonical source name in ``_seed_from_singletons`` is
+    ``"device_code"`` (no prefix).  Entries may show up in the pool as
+    either ``"device_code"`` (seeded) or ``"manual:device_code"`` (added
+    via ``hermes auth add openai-codex``), but in both cases the re-seed
+    gate lives at the ``"device_code"`` suppression key.  We suppress
+    that canonical key here; the central dispatcher also suppresses
+    ``removed.source`` which is fine — belt-and-suspenders, idempotent.
+    """
+    from hermes_cli.auth import suppress_credential_source
+
+    result = RemovalResult()
+    if _clear_auth_store_provider(provider):
+        result.cleaned.append(f"Cleared {provider} OAuth tokens from auth store")
+    # Suppress the canonical re-seed source, not just whatever source the
+    # removed entry had.  Otherwise `manual:device_code` removals wouldn't
+    # block the `device_code` re-seed path.
+    suppress_credential_source(provider, "device_code")
+    result.hints.extend([
+        "Suppressed openai-codex device_code source — it will not be re-seeded.",
+        "Note: Codex CLI credentials still live in ~/.codex/auth.json",
+        "Run `hermes auth add openai-codex` to re-enable if needed.",
+    ])
+    return result
+
+
+def _remove_qwen_cli(provider: str, removed) -> RemovalResult:
+    """~/.qwen/oauth_creds.json is owned by the Qwen CLI.
+
+    Same pattern as claude_code — suppress, don't delete.  The user's
+    Qwen CLI install still reads from that file.
+    """
+    return RemovalResult(hints=[
+        "Suppressed qwen-cli credential — it will not be re-seeded.",
+        "Note: Qwen CLI credentials still live in ~/.qwen/oauth_creds.json",
+        "Run `hermes auth add qwen-oauth` to re-enable if needed.",
+    ])
+
+
+def _remove_copilot_gh(provider: str, removed) -> RemovalResult:
+    """Copilot token comes from `gh auth token` or COPILOT_GITHUB_TOKEN / GH_TOKEN / GITHUB_TOKEN.
+
+    Copilot is special: the same token can be seeded as multiple source
+    entries (gh_cli from ``_seed_from_singletons`` plus env:<VAR> from
+    ``_seed_from_env``), so removing one entry without suppressing the
+    others lets the duplicates resurrect.  We suppress ALL known copilot
+    sources here so removal is stable regardless of which entry the
+    user clicked.
+
+    We don't touch the user's gh CLI or shell state — just suppress so
+    Hermes stops picking the token up.
+    """
+    # Suppress ALL copilot source variants up-front so no path resurrects
+    # the pool entry.  The central dispatcher in auth_remove_command will
+    # ALSO suppress removed.source, but it's idempotent so double-calling
+    # is harmless.
+    from hermes_cli.auth import suppress_credential_source
+    suppress_credential_source(provider, "gh_cli")
+    for env_var in ("COPILOT_GITHUB_TOKEN", "GH_TOKEN", "GITHUB_TOKEN"):
+        suppress_credential_source(provider, f"env:{env_var}")
+
+    return RemovalResult(hints=[
+        "Suppressed all copilot token sources (gh_cli + env vars) — they will not be re-seeded.",
+        "Note: Your gh CLI / shell environment is unchanged.",
+        "Run `hermes auth add copilot` to re-enable if needed.",
+    ])
+
+
+def _remove_custom_config(provider: str, removed) -> RemovalResult:
+    """Custom provider pools are seeded from custom_providers config or
+    model.api_key.  Both are in config.yaml — modifying that from here
+    is more invasive than suppression.  We suppress; the user can edit
+    config.yaml if they want to remove the key from disk entirely.
+    """
+    source_label = removed.source
+    return RemovalResult(hints=[
+        f"Suppressed {source_label} — it will not be re-seeded.",
+        "Note: The underlying value in config.yaml is unchanged.  Edit it "
+        "directly if you want to remove the credential from disk.",
+    ])
+
+
+def _register_all_sources() -> None:
+    """Called once on module import.
+
+    ORDER MATTERS — ``find_removal_step`` returns the first match.  Put
+    provider-specific steps before the generic ``env:*`` step so that e.g.
+    copilot's ``env:GH_TOKEN`` goes through the copilot removal (which
+    doesn't touch the user's shell), not the generic env-var removal
+    (which would try to clear .env).
+    """
+    register(RemovalStep(
+        provider="copilot", source_id="gh_cli",
+        match_fn=lambda src: src == "gh_cli" or src.startswith("env:"),
+        remove_fn=_remove_copilot_gh,
+        description="gh auth token / COPILOT_GITHUB_TOKEN / GH_TOKEN",
+    ))
+    register(RemovalStep(
+        provider="*", source_id="env:",
+        match_fn=lambda src: src.startswith("env:"),
+        remove_fn=_remove_env_source,
+        description="Any env-seeded credential (XAI_API_KEY, DEEPSEEK_API_KEY, etc.)",
+    ))
+    register(RemovalStep(
+        provider="anthropic", source_id="claude_code",
+        remove_fn=_remove_claude_code,
+        description="~/.claude/.credentials.json",
+    ))
+    register(RemovalStep(
+        provider="anthropic", source_id="hermes_pkce",
+        remove_fn=_remove_hermes_pkce,
+        description="~/.hermes/.anthropic_oauth.json",
+    ))
+    register(RemovalStep(
+        provider="nous", source_id="device_code",
+        remove_fn=_remove_nous_device_code,
+        description="auth.json providers.nous",
+    ))
+    register(RemovalStep(
+        provider="openai-codex", source_id="device_code",
+        match_fn=lambda src: src == "device_code" or src.endswith(":device_code"),
+        remove_fn=_remove_codex_device_code,
+        description="auth.json providers.openai-codex + ~/.codex/auth.json",
+    ))
+    register(RemovalStep(
+        provider="qwen-oauth", source_id="qwen-cli",
+        remove_fn=_remove_qwen_cli,
+        description="~/.qwen/oauth_creds.json",
+    ))
+    register(RemovalStep(
+        provider="*", source_id="config:",
+        match_fn=lambda src: src.startswith("config:") or src == "model_config",
+        remove_fn=_remove_custom_config,
+        description="Custom provider config.yaml api_key field",
+    ))
+
+
+_register_all_sources()
@@ -0,0 +1,111 @@
+"""Shared file safety rules used by both tools and ACP shims."""
+
+from __future__ import annotations
+
+import os
+from pathlib import Path
+from typing import Optional
+
+
+def _hermes_home_path() -> Path:
+    """Resolve the active HERMES_HOME (profile-aware) without circular imports."""
+    try:
+        from hermes_constants import get_hermes_home  # local import to avoid cycles
+        return get_hermes_home()
+    except Exception:
+        return Path(os.path.expanduser("~/.hermes"))
+
+
+def build_write_denied_paths(home: str) -> set[str]:
+    """Return exact sensitive paths that must never be written."""
+    hermes_home = _hermes_home_path()
+    return {
+        os.path.realpath(p)
+        for p in [
+            os.path.join(home, ".ssh", "authorized_keys"),
+            os.path.join(home, ".ssh", "id_rsa"),
+            os.path.join(home, ".ssh", "id_ed25519"),
+            os.path.join(home, ".ssh", "config"),
+            str(hermes_home / ".env"),
+            os.path.join(home, ".bashrc"),
+            os.path.join(home, ".zshrc"),
+            os.path.join(home, ".profile"),
+            os.path.join(home, ".bash_profile"),
+            os.path.join(home, ".zprofile"),
+            os.path.join(home, ".netrc"),
+            os.path.join(home, ".pgpass"),
+            os.path.join(home, ".npmrc"),
+            os.path.join(home, ".pypirc"),
+            "/etc/sudoers",
+            "/etc/passwd",
+            "/etc/shadow",
+        ]
+    }
+
+
+def build_write_denied_prefixes(home: str) -> list[str]:
+    """Return sensitive directory prefixes that must never be written."""
+    return [
+        os.path.realpath(p) + os.sep
+        for p in [
+            os.path.join(home, ".ssh"),
+            os.path.join(home, ".aws"),
+            os.path.join(home, ".gnupg"),
+            os.path.join(home, ".kube"),
+            "/etc/sudoers.d",
+            "/etc/systemd",
+            os.path.join(home, ".docker"),
+            os.path.join(home, ".azure"),
+            os.path.join(home, ".config", "gh"),
+        ]
+    ]
+
+
+def get_safe_write_root() -> Optional[str]:
+    """Return the resolved HERMES_WRITE_SAFE_ROOT path, or None if unset."""
+    root = os.getenv("HERMES_WRITE_SAFE_ROOT", "")
+    if not root:
+        return None
+    try:
+        return os.path.realpath(os.path.expanduser(root))
+    except Exception:
+        return None
+
+
+def is_write_denied(path: str) -> bool:
+    """Return True if path is blocked by the write denylist or safe root."""
+    home = os.path.realpath(os.path.expanduser("~"))
+    resolved = os.path.realpath(os.path.expanduser(str(path)))
+
+    if resolved in build_write_denied_paths(home):
+        return True
+    for prefix in build_write_denied_prefixes(home):
+        if resolved.startswith(prefix):
+            return True
+
+    safe_root = get_safe_write_root()
+    if safe_root and not (resolved == safe_root or resolved.startswith(safe_root + os.sep)):
+        return True
+
+    return False
+
+
+def get_read_block_error(path: str) -> Optional[str]:
+    """Return an error message when a read targets internal Hermes cache files."""
+    resolved = Path(path).expanduser().resolve()
+    hermes_home = _hermes_home_path().resolve()
+    blocked_dirs = [
+        hermes_home / "skills" / ".hub" / "index-cache",
+        hermes_home / "skills" / ".hub",
+    ]
+    for blocked in blocked_dirs:
+        try:
+            resolved.relative_to(blocked)
+        except ValueError:
+            continue
+        return (
+            f"Access denied: {path} is an internal Hermes cache file "
+            "and cannot be read directly to prevent prompt injection. "
+            "Use the skills_list or skill_view tools instead."
+        )
+    return None
@@ -799,7 +799,8 @@ def _gemini_http_error(response: httpx.Response) -> CodeAssistError:
        err_obj = {}
    err_status = str(err_obj.get("status") or "").strip()
    err_message = str(err_obj.get("message") or "").strip()
-    err_details_list = err_obj.get("details") if isinstance(err_obj.get("details"), list) else []
+    _raw_details = err_obj.get("details")
+    err_details_list = _raw_details if isinstance(_raw_details, list) else []

    # Extract google.rpc.ErrorInfo reason + metadata.  There may be more
    # than one ErrorInfo (rare), so we pick the first one with a reason.
@@ -613,7 +613,8 @@ def gemini_http_error(response: httpx.Response) -> GeminiAPIError:
        err_obj = {}
    err_status = str(err_obj.get("status") or "").strip()
    err_message = str(err_obj.get("message") or "").strip()
-    details_list = err_obj.get("details") if isinstance(err_obj.get("details"), list) else []
+    _raw_details = err_obj.get("details")
+    details_list = _raw_details if isinstance(_raw_details, list) else []

    reason = ""
    retry_after: Optional[float] = None
@@ -0,0 +1,242 @@
+"""
+Image Generation Provider ABC
+=============================
+
+Defines the pluggable-backend interface for image generation. Providers register
+instances via ``PluginContext.register_image_gen_provider()``; the active one
+(selected via ``image_gen.provider`` in ``config.yaml``) services every
+``image_generate`` tool call.
+
+Providers live in ``<repo>/plugins/image_gen/<name>/`` (built-in, auto-loaded
+as ``kind: backend``) or ``~/.hermes/plugins/image_gen/<name>/`` (user, opt-in
+via ``plugins.enabled``).
+
+Response shape
+--------------
+All providers return a dict that :func:`success_response` / :func:`error_response`
+produce. The tool wrapper JSON-serializes it. Keys:
+
+    success        bool
+    image          str | None       URL or absolute file path
+    model          str              provider-specific model identifier
+    prompt         str              echoed prompt
+    aspect_ratio   str              "landscape" | "square" | "portrait"
+    provider       str              provider name (for diagnostics)
+    error          str              only when success=False
+    error_type     str              only when success=False
+"""
+
+from __future__ import annotations
+
+import abc
+import base64
+import datetime
+import logging
+import uuid
+from pathlib import Path
+from typing import Any, Dict, List, Optional, Tuple
+
+logger = logging.getLogger(__name__)
+
+
+VALID_ASPECT_RATIOS: Tuple[str, ...] = ("landscape", "square", "portrait")
+DEFAULT_ASPECT_RATIO = "landscape"
+
+
+# ---------------------------------------------------------------------------
+# ABC
+# ---------------------------------------------------------------------------
+
+
+class ImageGenProvider(abc.ABC):
+    """Abstract base class for an image generation backend.
+
+    Subclasses must implement :meth:`generate`. Everything else has sane
+    defaults — override only what your provider needs.
+    """
+
+    @property
+    @abc.abstractmethod
+    def name(self) -> str:
+        """Stable short identifier used in ``image_gen.provider`` config.
+
+        Lowercase, no spaces. Examples: ``fal``, ``openai``, ``replicate``.
+        """
+
+    @property
+    def display_name(self) -> str:
+        """Human-readable label shown in ``hermes tools``. Defaults to ``name.title()``."""
+        return self.name.title()
+
+    def is_available(self) -> bool:
+        """Return True when this provider can service calls.
+
+        Typically checks for a required API key. Default: True
+        (providers with no external dependencies are always available).
+        """
+        return True
+
+    def list_models(self) -> List[Dict[str, Any]]:
+        """Return catalog entries for ``hermes tools`` model picker.
+
+        Each entry::
+
+            {
+                "id": "gpt-image-1.5",               # required
+                "display": "GPT Image 1.5",          # optional; defaults to id
+                "speed": "~10s",                     # optional
+                "strengths": "...",                  # optional
+                "price": "$...",                     # optional
+            }
+
+        Default: empty list (provider has no user-selectable models).
+        """
+        return []
+
+    def get_setup_schema(self) -> Dict[str, Any]:
+        """Return provider metadata for the ``hermes tools`` picker.
+
+        Used by ``tools_config.py`` to inject this provider as a row in
+        the Image Generation provider list. Shape::
+
+            {
+                "name": "OpenAI",                     # picker label
+                "badge": "paid",                      # optional short tag
+                "tag": "One-line description...",     # optional subtitle
+                "env_vars": [                         # keys to prompt for
+                    {"key": "OPENAI_API_KEY",
+                     "prompt": "OpenAI API key",
+                     "url": "https://platform.openai.com/api-keys"},
+                ],
+            }
+
+        Default: minimal entry derived from ``display_name``. Override to
+        expose API key prompts and custom badges.
+        """
+        return {
+            "name": self.display_name,
+            "badge": "",
+            "tag": "",
+            "env_vars": [],
+        }
+
+    def default_model(self) -> Optional[str]:
+        """Return the default model id, or None if not applicable."""
+        models = self.list_models()
+        if models:
+            return models[0].get("id")
+        return None
+
+    @abc.abstractmethod
+    def generate(
+        self,
+        prompt: str,
+        aspect_ratio: str = DEFAULT_ASPECT_RATIO,
+        **kwargs: Any,
+    ) -> Dict[str, Any]:
+        """Generate an image.
+
+        Implementations should return the dict from :func:`success_response`
+        or :func:`error_response`. ``kwargs`` may contain forward-compat
+        parameters future versions of the schema will expose — implementations
+        should ignore unknown keys.
+        """
+
+
+# ---------------------------------------------------------------------------
+# Helpers
+# ---------------------------------------------------------------------------
+
+
+def resolve_aspect_ratio(value: Optional[str]) -> str:
+    """Clamp an aspect_ratio value to the valid set, defaulting to landscape.
+
+    Invalid values are coerced rather than rejected so the tool surface is
+    forgiving of agent mistakes.
+    """
+    if not isinstance(value, str):
+        return DEFAULT_ASPECT_RATIO
+    v = value.strip().lower()
+    if v in VALID_ASPECT_RATIOS:
+        return v
+    return DEFAULT_ASPECT_RATIO
+
+
+def _images_cache_dir() -> Path:
+    """Return ``$HERMES_HOME/cache/images/``, creating parents as needed."""
+    from hermes_constants import get_hermes_home
+
+    path = get_hermes_home() / "cache" / "images"
+    path.mkdir(parents=True, exist_ok=True)
+    return path
+
+
+def save_b64_image(
+    b64_data: str,
+    *,
+    prefix: str = "image",
+    extension: str = "png",
+) -> Path:
+    """Decode base64 image data and write it under ``$HERMES_HOME/cache/images/``.
+
+    Returns the absolute :class:`Path` to the saved file.
+
+    Filename format: ``<prefix>_<YYYYMMDD_HHMMSS>_<short-uuid>.<ext>``.
+    """
+    raw = base64.b64decode(b64_data)
+    ts = datetime.datetime.now().strftime("%Y%m%d_%H%M%S")
+    short = uuid.uuid4().hex[:8]
+    path = _images_cache_dir() / f"{prefix}_{ts}_{short}.{extension}"
+    path.write_bytes(raw)
+    return path
+
+
+def success_response(
+    *,
+    image: str,
+    model: str,
+    prompt: str,
+    aspect_ratio: str,
+    provider: str,
+    extra: Optional[Dict[str, Any]] = None,
+) -> Dict[str, Any]:
+    """Build a uniform success response dict.
+
+    ``image`` may be an HTTP URL or an absolute filesystem path (for b64
+    providers like OpenAI). Callers that need to pass through additional
+    backend-specific fields can supply ``extra``.
+    """
+    payload: Dict[str, Any] = {
+        "success": True,
+        "image": image,
+        "model": model,
+        "prompt": prompt,
+        "aspect_ratio": aspect_ratio,
+        "provider": provider,
+    }
+    if extra:
+        for k, v in extra.items():
+            payload.setdefault(k, v)
+    return payload
+
+
+def error_response(
+    *,
+    error: str,
+    error_type: str = "provider_error",
+    provider: str = "",
+    model: str = "",
+    prompt: str = "",
+    aspect_ratio: str = DEFAULT_ASPECT_RATIO,
+) -> Dict[str, Any]:
+    """Build a uniform error response dict."""
+    return {
+        "success": False,
+        "image": None,
+        "error": error,
+        "error_type": error_type,
+        "model": model,
+        "prompt": prompt,
+        "aspect_ratio": aspect_ratio,
+        "provider": provider,
+    }
@@ -0,0 +1,120 @@
+"""
+Image Generation Provider Registry
+==================================
+
+Central map of registered providers. Populated by plugins at import-time via
+``PluginContext.register_image_gen_provider()``; consumed by the
+``image_generate`` tool to dispatch each call to the active backend.
+
+Active selection
+----------------
+The active provider is chosen by ``image_gen.provider`` in ``config.yaml``.
+If unset, :func:`get_active_provider` applies fallback logic:
+
+1. If exactly one provider is registered, use it.
+2. Otherwise if a provider named ``fal`` is registered, use it (legacy
+   default — matches pre-plugin behavior).
+3. Otherwise return ``None`` (the tool surfaces a helpful error pointing
+   the user at ``hermes tools``).
+"""
+
+from __future__ import annotations
+
+import logging
+import threading
+from typing import Dict, List, Optional
+
+from agent.image_gen_provider import ImageGenProvider
+
+logger = logging.getLogger(__name__)
+
+
+_providers: Dict[str, ImageGenProvider] = {}
+_lock = threading.Lock()
+
+
+def register_provider(provider: ImageGenProvider) -> None:
+    """Register an image generation provider.
+
+    Re-registration (same ``name``) overwrites the previous entry and logs
+    a debug message — this makes hot-reload scenarios (tests, dev loops)
+    behave predictably.
+    """
+    if not isinstance(provider, ImageGenProvider):
+        raise TypeError(
+            f"register_provider() expects an ImageGenProvider instance, "
+            f"got {type(provider).__name__}"
+        )
+    name = provider.name
+    if not isinstance(name, str) or not name.strip():
+        raise ValueError("Image gen provider .name must be a non-empty string")
+    with _lock:
+        existing = _providers.get(name)
+        _providers[name] = provider
+    if existing is not None:
+        logger.debug("Image gen provider '%s' re-registered (was %r)", name, type(existing).__name__)
+    else:
+        logger.debug("Registered image gen provider '%s' (%s)", name, type(provider).__name__)
+
+
+def list_providers() -> List[ImageGenProvider]:
+    """Return all registered providers, sorted by name."""
+    with _lock:
+        items = list(_providers.values())
+    return sorted(items, key=lambda p: p.name)
+
+
+def get_provider(name: str) -> Optional[ImageGenProvider]:
+    """Return the provider registered under *name*, or None."""
+    if not isinstance(name, str):
+        return None
+    with _lock:
+        return _providers.get(name.strip())
+
+
+def get_active_provider() -> Optional[ImageGenProvider]:
+    """Resolve the currently-active provider.
+
+    Reads ``image_gen.provider`` from config.yaml; falls back per the
+    module docstring.
+    """
+    configured: Optional[str] = None
+    try:
+        from hermes_cli.config import load_config
+
+        cfg = load_config()
+        section = cfg.get("image_gen") if isinstance(cfg, dict) else None
+        if isinstance(section, dict):
+            raw = section.get("provider")
+            if isinstance(raw, str) and raw.strip():
+                configured = raw.strip()
+    except Exception as exc:
+        logger.debug("Could not read image_gen.provider from config: %s", exc)
+
+    with _lock:
+        snapshot = dict(_providers)
+
+    if configured:
+        provider = snapshot.get(configured)
+        if provider is not None:
+            return provider
+        logger.debug(
+            "image_gen.provider='%s' configured but not registered; falling back",
+            configured,
+        )
+
+    # Fallback: single-provider case
+    if len(snapshot) == 1:
+        return next(iter(snapshot.values()))
+
+    # Fallback: prefer legacy FAL for backward compat
+    if "fal" in snapshot:
+        return snapshot["fal"]
+
+    return None
+
+
+def _reset_for_tests() -> None:
+    """Clear the registry. **Test-only.**"""
+    with _lock:
+        _providers.clear()
@@ -124,6 +124,7 @@ class InsightsEngine:
        # Gather raw data
        sessions = self._get_sessions(cutoff, source)
        tool_usage = self._get_tool_usage(cutoff, source)
+        skill_usage = self._get_skill_usage(cutoff, source)
        message_stats = self._get_message_stats(cutoff, source)

        if not sessions:
@@ -135,6 +136,15 @@ class InsightsEngine:
                "models": [],
                "platforms": [],
                "tools": [],
+                "skills": {
+                    "summary": {
+                        "total_skill_loads": 0,
+                        "total_skill_edits": 0,
+                        "total_skill_actions": 0,
+                        "distinct_skills_used": 0,
+                    },
+                    "top_skills": [],
+                },
                "activity": {},
                "top_sessions": [],
            }
@@ -144,6 +154,7 @@ class InsightsEngine:
        models = self._compute_model_breakdown(sessions)
        platforms = self._compute_platform_breakdown(sessions)
        tools = self._compute_tool_breakdown(tool_usage)
+        skills = self._compute_skill_breakdown(skill_usage)
        activity = self._compute_activity_patterns(sessions)
        top_sessions = self._compute_top_sessions(sessions)

@@ -156,6 +167,7 @@ class InsightsEngine:
            "models": models,
            "platforms": platforms,
            "tools": tools,
+            "skills": skills,
            "activity": activity,
            "top_sessions": top_sessions,
        }
@@ -284,6 +296,82 @@ class InsightsEngine:
            for name, count in tool_counts.most_common()
        ]

+    def _get_skill_usage(self, cutoff: float, source: str = None) -> List[Dict]:
+        """Extract per-skill usage from assistant tool calls."""
+        skill_counts: Dict[str, Dict[str, Any]] = {}
+
+        if source:
+            cursor = self._conn.execute(
+                """SELECT m.tool_calls, m.timestamp
+                   FROM messages m
+                   JOIN sessions s ON s.id = m.session_id
+                   WHERE s.started_at >= ? AND s.source = ?
+                     AND m.role = 'assistant' AND m.tool_calls IS NOT NULL""",
+                (cutoff, source),
+            )
+        else:
+            cursor = self._conn.execute(
+                """SELECT m.tool_calls, m.timestamp
+                   FROM messages m
+                   JOIN sessions s ON s.id = m.session_id
+                   WHERE s.started_at >= ?
+                     AND m.role = 'assistant' AND m.tool_calls IS NOT NULL""",
+                (cutoff,),
+            )
+
+        for row in cursor.fetchall():
+            try:
+                calls = row["tool_calls"]
+                if isinstance(calls, str):
+                    calls = json.loads(calls)
+                if not isinstance(calls, list):
+                    continue
+            except (json.JSONDecodeError, TypeError):
+                continue
+
+            timestamp = row["timestamp"]
+            for call in calls:
+                if not isinstance(call, dict):
+                    continue
+                func = call.get("function", {})
+                tool_name = func.get("name")
+                if tool_name not in {"skill_view", "skill_manage"}:
+                    continue
+
+                args = func.get("arguments")
+                if isinstance(args, str):
+                    try:
+                        args = json.loads(args)
+                    except (json.JSONDecodeError, TypeError):
+                        continue
+                if not isinstance(args, dict):
+                    continue
+
+                skill_name = args.get("name")
+                if not isinstance(skill_name, str) or not skill_name.strip():
+                    continue
+
+                entry = skill_counts.setdefault(
+                    skill_name,
+                    {
+                        "skill": skill_name,
+                        "view_count": 0,
+                        "manage_count": 0,
+                        "last_used_at": None,
+                    },
+                )
+                if tool_name == "skill_view":
+                    entry["view_count"] += 1
+                else:
+                    entry["manage_count"] += 1
+
+                if timestamp is not None and (
+                    entry["last_used_at"] is None or timestamp > entry["last_used_at"]
+                ):
+                    entry["last_used_at"] = timestamp
+
+        return list(skill_counts.values())
+
    def _get_message_stats(self, cutoff: float, source: str = None) -> Dict:
        """Get aggregate message statistics."""
        if source:
@@ -475,6 +563,46 @@ class InsightsEngine:
            })
        return result

+    def _compute_skill_breakdown(self, skill_usage: List[Dict]) -> Dict[str, Any]:
+        """Process per-skill usage into summary + ranked list."""
+        total_skill_loads = sum(s["view_count"] for s in skill_usage) if skill_usage else 0
+        total_skill_edits = sum(s["manage_count"] for s in skill_usage) if skill_usage else 0
+        total_skill_actions = total_skill_loads + total_skill_edits
+
+        top_skills = []
+        for skill in skill_usage:
+            total_count = skill["view_count"] + skill["manage_count"]
+            percentage = (total_count / total_skill_actions * 100) if total_skill_actions else 0
+            top_skills.append({
+                "skill": skill["skill"],
+                "view_count": skill["view_count"],
+                "manage_count": skill["manage_count"],
+                "total_count": total_count,
+                "percentage": percentage,
+                "last_used_at": skill.get("last_used_at"),
+            })
+
+        top_skills.sort(
+            key=lambda s: (
+                s["total_count"],
+                s["view_count"],
+                s["manage_count"],
+                s["last_used_at"] or 0,
+                s["skill"],
+            ),
+            reverse=True,
+        )
+
+        return {
+            "summary": {
+                "total_skill_loads": total_skill_loads,
+                "total_skill_edits": total_skill_edits,
+                "total_skill_actions": total_skill_actions,
+                "distinct_skills_used": len(skill_usage),
+            },
+            "top_skills": top_skills,
+        }
+
    def _compute_activity_patterns(self, sessions: List[Dict]) -> Dict:
        """Analyze activity patterns by day of week and hour."""
        day_counts = Counter()  # 0=Monday ... 6=Sunday
@@ -670,6 +798,28 @@ class InsightsEngine:
                lines.append(f"  ... and {len(report['tools']) - 15} more tools")
            lines.append("")

+        # Skill usage
+        skills = report.get("skills", {})
+        top_skills = skills.get("top_skills", [])
+        if top_skills:
+            lines.append("  🧠 Top Skills")
+            lines.append("  " + "─" * 56)
+            lines.append(f"  {'Skill':<28} {'Loads':>7} {'Edits':>7} {'Last used':>11}")
+            for skill in top_skills[:10]:
+                last_used = "—"
+                if skill.get("last_used_at"):
+                    last_used = datetime.fromtimestamp(skill["last_used_at"]).strftime("%b %d")
+                lines.append(
+                    f"  {skill['skill'][:28]:<28} {skill['view_count']:>7,} {skill['manage_count']:>7,} {last_used:>11}"
+                )
+            summary = skills.get("summary", {})
+            lines.append(
+                f"  Distinct skills: {summary.get('distinct_skills_used', 0)}  "
+                f"Loads: {summary.get('total_skill_loads', 0):,}  "
+                f"Edits: {summary.get('total_skill_edits', 0):,}"
+            )
+            lines.append("")
+
        # Activity patterns
        act = report.get("activity", {})
        if act.get("by_day"):
@@ -753,6 +903,18 @@ class InsightsEngine:
                lines.append(f"  {t['tool']} — {t['count']:,} calls ({t['percentage']:.1f}%)")
            lines.append("")

+        skills = report.get("skills", {})
+        if skills.get("top_skills"):
+            lines.append("**🧠 Top Skills:**")
+            for skill in skills["top_skills"][:5]:
+                suffix = ""
+                if skill.get("last_used_at"):
+                    suffix = f", last used {datetime.fromtimestamp(skill['last_used_at']).strftime('%b %d')}"
+                lines.append(
+                    f"  {skill['skill']} — {skill['view_count']:,} loads, {skill['manage_count']:,} edits{suffix}"
+                )
+            lines.append("")
+
        # Activity summary
        act = report.get("activity", {})
        if act.get("busiest_day") and act.get("busiest_hour"):
@@ -14,6 +14,8 @@ from urllib.parse import urlparse
 import requests
 import yaml

+from utils import base_url_host_matches, base_url_hostname
+
 from hermes_constants import OPENROUTER_MODELS_URL

 logger = logging.getLogger(__name__)
@@ -116,7 +118,6 @@ DEFAULT_CONTEXT_LENGTHS = {
    "gpt-5.4-nano": 400000,           # 400k (not 1.05M like full 5.4)
    "gpt-5.4-mini": 400000,           # 400k (not 1.05M like full 5.4)
    "gpt-5.4": 1050000,               # GPT-5.4, GPT-5.4 Pro (1.05M context)
-    "gpt-5.3-codex-spark": 128000,    # Spark variant has reduced 128k context
    "gpt-5.1-chat": 128000,           # Chat variant has 128k context
    "gpt-5": 400000,                  # GPT-5.x base, mini, codex variants (400k)
    "gpt-4.1": 1047576,
@@ -169,6 +170,7 @@ DEFAULT_CONTEXT_LENGTHS = {
    "Qwen/Qwen3.5-35B-A3B": 131072,
    "deepseek-ai/DeepSeek-V3.2": 65536,
    "moonshotai/Kimi-K2.5": 262144,
+    "moonshotai/Kimi-K2.6": 262144,
    "moonshotai/Kimi-K2-Thinking": 262144,
    "MiniMaxAI/MiniMax-M2.5": 204800,
    "XiaomiMiMo/MiMo-V2-Flash": 256000,
@@ -211,8 +213,15 @@ def _normalize_base_url(base_url: str) -> str:
    return (base_url or "").strip().rstrip("/")


+def _auth_headers(api_key: str = "") -> Dict[str, str]:
+    token = str(api_key or "").strip()
+    if not token:
+        return {}
+    return {"Authorization": f"Bearer {token}"}
+
+
 def _is_openrouter_base_url(base_url: str) -> bool:
-    return "openrouter.ai" in _normalize_base_url(base_url).lower()
+    return base_url_host_matches(base_url, "openrouter.ai")


 def _is_custom_endpoint(base_url: str) -> bool:
@@ -310,7 +319,7 @@ def is_local_endpoint(base_url: str) -> bool:
    return False


-def detect_local_server_type(base_url: str) -> Optional[str]:
+def detect_local_server_type(base_url: str, api_key: str = "") -> Optional[str]:
    """Detect which local server is running at base_url by probing known endpoints.

    Returns one of: "ollama", "lm-studio", "vllm", "llamacpp", or None.
@@ -322,8 +331,10 @@ def detect_local_server_type(base_url: str) -> Optional[str]:
    if server_url.endswith("/v1"):
        server_url = server_url[:-3]

+    headers = _auth_headers(api_key)
+
    try:
-        with httpx.Client(timeout=2.0) as client:
+        with httpx.Client(timeout=2.0, headers=headers) as client:
            # LM Studio exposes /api/v1/models — check first (most specific)
            try:
                r = client.get(f"{server_url}/api/v1/models")
@@ -510,6 +521,59 @@ def fetch_endpoint_model_metadata(
    headers = {"Authorization": f"Bearer {api_key}"} if api_key else {}
    last_error: Optional[Exception] = None

+    if is_local_endpoint(normalized):
+        try:
+            if detect_local_server_type(normalized, api_key=api_key) == "lm-studio":
+                server_url = normalized[:-3].rstrip("/") if normalized.endswith("/v1") else normalized
+                response = requests.get(
+                    server_url.rstrip("/") + "/api/v1/models",
+                    headers=headers,
+                    timeout=10,
+                )
+                response.raise_for_status()
+                payload = response.json()
+                cache: Dict[str, Dict[str, Any]] = {}
+                for model in payload.get("models", []):
+                    if not isinstance(model, dict):
+                        continue
+                    model_id = model.get("key") or model.get("id")
+                    if not model_id:
+                        continue
+                    entry: Dict[str, Any] = {"name": model.get("name", model_id)}
+
+                    context_length = None
+                    for inst in model.get("loaded_instances", []) or []:
+                        if not isinstance(inst, dict):
+                            continue
+                        cfg = inst.get("config", {})
+                        ctx = cfg.get("context_length") if isinstance(cfg, dict) else None
+                        if isinstance(ctx, int) and ctx > 0:
+                            context_length = ctx
+                            break
+                    if context_length is None:
+                        context_length = _extract_context_length(model)
+                    if context_length is not None:
+                        entry["context_length"] = context_length
+
+                    max_completion_tokens = _extract_max_completion_tokens(model)
+                    if max_completion_tokens is not None:
+                        entry["max_completion_tokens"] = max_completion_tokens
+
+                    pricing = _extract_pricing(model)
+                    if pricing:
+                        entry["pricing"] = pricing
+
+                    _add_model_aliases(cache, model_id, entry)
+                    alt_id = model.get("id")
+                    if isinstance(alt_id, str) and alt_id and alt_id != model_id:
+                        _add_model_aliases(cache, alt_id, entry)
+
+                _endpoint_model_metadata_cache[normalized] = cache
+                _endpoint_model_metadata_cache_time[normalized] = time.time()
+                return cache
+        except Exception as exc:
+            last_error = exc
+
    for candidate in candidates:
        url = candidate.rstrip("/") + "/models"
        try:
@@ -716,7 +780,7 @@ def _model_id_matches(candidate_id: str, lookup_model: str) -> bool:
    return False


-def query_ollama_num_ctx(model: str, base_url: str) -> Optional[int]:
+def query_ollama_num_ctx(model: str, base_url: str, api_key: str = "") -> Optional[int]:
    """Query an Ollama server for the model's context length.

    Returns the model's maximum context from GGUF metadata via ``/api/show``,
@@ -734,14 +798,16 @@ def query_ollama_num_ctx(model: str, base_url: str) -> Optional[int]:
        server_url = server_url[:-3]

    try:
-        server_type = detect_local_server_type(base_url)
+        server_type = detect_local_server_type(base_url, api_key=api_key)
    except Exception:
        return None
    if server_type != "ollama":
        return None

+    headers = _auth_headers(api_key)
+
    try:
-        with httpx.Client(timeout=3.0) as client:
+        with httpx.Client(timeout=3.0, headers=headers) as client:
            resp = client.post(f"{server_url}/api/show", json={"name": bare_model})
            if resp.status_code != 200:
                return None
@@ -769,7 +835,7 @@ def query_ollama_num_ctx(model: str, base_url: str) -> Optional[int]:
    return None


-def _query_local_context_length(model: str, base_url: str) -> Optional[int]:
+def _query_local_context_length(model: str, base_url: str, api_key: str = "") -> Optional[int]:
    """Query a local server for the model's context length."""
    import httpx

@@ -782,13 +848,15 @@ def _query_local_context_length(model: str, base_url: str) -> Optional[int]:
    if server_url.endswith("/v1"):
        server_url = server_url[:-3]

+    headers = _auth_headers(api_key)
+
    try:
-        server_type = detect_local_server_type(base_url)
+        server_type = detect_local_server_type(base_url, api_key=api_key)
    except Exception:
        server_type = None

    try:
-        with httpx.Client(timeout=3.0) as client:
+        with httpx.Client(timeout=3.0, headers=headers) as client:
            # Ollama: /api/show returns model details with context info
            if server_type == "ollama":
                resp = client.post(f"{server_url}/api/show", json={"name": model})
@@ -999,7 +1067,7 @@ def get_model_context_length(
        if not _is_known_provider_base_url(base_url):
            # 3. Try querying local server directly
            if is_local_endpoint(base_url):
-                local_ctx = _query_local_context_length(model, base_url)
+                local_ctx = _query_local_context_length(model, base_url, api_key=api_key)
                if local_ctx and local_ctx > 0:
                    save_context_length(model, base_url, local_ctx)
                    return local_ctx
@@ -1013,7 +1081,7 @@ def get_model_context_length(

    # 4. Anthropic /v1/models API (only for regular API keys, not OAuth)
    if provider == "anthropic" or (
-        base_url and "api.anthropic.com" in base_url
+        base_url and base_url_hostname(base_url) == "api.anthropic.com"
    ):
        ctx = _query_anthropic_context_length(model, base_url or "https://api.anthropic.com", api_key)
        if ctx:
@@ -1022,7 +1090,11 @@ def get_model_context_length(
    # 4b. AWS Bedrock — use static context length table.
    # Bedrock's ListFoundationModels doesn't expose context window sizes,
    # so we maintain a curated table in bedrock_adapter.py.
-    if provider == "bedrock" or (base_url and "bedrock-runtime" in base_url):
+    if provider == "bedrock" or (
+        base_url
+        and base_url_hostname(base_url).startswith("bedrock-runtime.")
+        and base_url_host_matches(base_url, "amazonaws.com")
+    ):
        try:
            from agent.bedrock_adapter import get_bedrock_context_length
            return get_bedrock_context_length(model)
@@ -1069,7 +1141,7 @@ def get_model_context_length(

    # 9. Query local server as last resort
    if base_url and is_local_endpoint(base_url):
-        local_ctx = _query_local_context_length(model, base_url)
+        local_ctx = _query_local_context_length(model, base_url, api_key=api_key)
        if local_ctx and local_ctx > 0:
            save_context_length(model, base_url, local_ctx)
            return local_ctx
@@ -350,7 +350,13 @@ PLATFORM_HINTS = {
    ),
    "cli": (
        "You are a CLI AI Agent. Try not to use markdown but simple text "
-        "renderable inside a terminal."
+        "renderable inside a terminal. "
+        "File delivery: there is no attachment channel — the user reads your "
+        "response directly in their terminal. Do NOT emit MEDIA:/path tags "
+        "(those are only intercepted on messaging platforms like Telegram, "
+        "Discord, Slack, etc.; on the CLI they render as literal text). "
+        "When referring to a file you created or changed, just state its "
+        "absolute path in plain text; the user can open it from there."
    ),
    "sms": (
        "You are communicating via SMS. Keep responses concise and use plain text "
@@ -13,6 +13,48 @@ import re

 logger = logging.getLogger(__name__)

+# Sensitive query-string parameter names (case-insensitive exact match).
+# Ported from nearai/ironclaw#2529 — catches tokens whose values don't match
+# any known vendor prefix regex (e.g. opaque tokens, short OAuth codes).
+_SENSITIVE_QUERY_PARAMS = frozenset({
+    "access_token",
+    "refresh_token",
+    "id_token",
+    "token",
+    "api_key",
+    "apikey",
+    "client_secret",
+    "password",
+    "auth",
+    "jwt",
+    "session",
+    "secret",
+    "key",
+    "code",           # OAuth authorization codes
+    "signature",      # pre-signed URL signatures
+    "x-amz-signature",
+})
+
+# Sensitive form-urlencoded / JSON body key names (case-insensitive exact match).
+# Exact match, NOT substring — "token_count" and "session_id" must NOT match.
+# Ported from nearai/ironclaw#2529.
+_SENSITIVE_BODY_KEYS = frozenset({
+    "access_token",
+    "refresh_token",
+    "id_token",
+    "token",
+    "api_key",
+    "apikey",
+    "client_secret",
+    "password",
+    "auth",
+    "jwt",
+    "secret",
+    "private_key",
+    "authorization",
+    "key",
+})
+
 # Snapshot at import time so runtime env mutations (e.g. LLM-generated
 # `export HERMES_REDACT_SECRETS=false`) cannot disable redaction mid-session.
 _REDACT_ENABLED = os.getenv("HERMES_REDACT_SECRETS", "").lower() not in ("0", "false", "no", "off")
@@ -108,6 +150,30 @@ _DISCORD_MENTION_RE = re.compile(r"<@!?(\d{17,20})>")
 # Negative lookahead prevents matching hex strings or identifiers
 _SIGNAL_PHONE_RE = re.compile(r"(\+[1-9]\d{6,14})(?![A-Za-z0-9])")

+# URLs containing query strings — matches `scheme://...?...[# or end]`.
+# Used to scan text for URLs whose query params may contain secrets.
+# Ported from nearai/ironclaw#2529.
+_URL_WITH_QUERY_RE = re.compile(
+    r"(https?|wss?|ftp)://"          # scheme
+    r"([^\s/?#]+)"                    # authority (may include userinfo)
+    r"([^\s?#]*)"                     # path
+    r"\?([^\s#]+)"                    # query (required)
+    r"(#\S*)?",                       # optional fragment
+)
+
+# URLs containing userinfo — `scheme://user:password@host` for ANY scheme
+# (not just DB protocols already covered by _DB_CONNSTR_RE above).
+# Catches things like `https://user:token@api.example.com/v1/foo`.
+_URL_USERINFO_RE = re.compile(
+    r"(https?|wss?|ftp)://([^/\s:@]+):([^/\s@]+)@",
+)
+
+# Form-urlencoded body detection: conservative — only applies when the entire
+# text looks like a query string (k=v&k=v pattern with no newlines).
+_FORM_BODY_RE = re.compile(
+    r"^[A-Za-z_][A-Za-z0-9_.-]*=[^&\s]*(?:&[A-Za-z_][A-Za-z0-9_.-]*=[^&\s]*)+$"
+)
+
 # Compile known prefix patterns into one alternation
 _PREFIX_RE = re.compile(
    r"(?<![A-Za-z0-9_-])(" + "|".join(_PREFIX_PATTERNS) + r")(?![A-Za-z0-9_-])"
@@ -121,6 +187,72 @@ def _mask_token(token: str) -> str:
    return f"{token[:6]}...{token[-4:]}"


+def _redact_query_string(query: str) -> str:
+    """Redact sensitive parameter values in a URL query string.
+
+    Handles `k=v&k=v` format. Sensitive keys (case-insensitive) have values
+    replaced with `***`. Non-sensitive keys pass through unchanged.
+    Empty or malformed pairs are preserved as-is.
+    """
+    if not query:
+        return query
+    parts = []
+    for pair in query.split("&"):
+        if "=" not in pair:
+            parts.append(pair)
+            continue
+        key, _, value = pair.partition("=")
+        if key.lower() in _SENSITIVE_QUERY_PARAMS:
+            parts.append(f"{key}=***")
+        else:
+            parts.append(pair)
+    return "&".join(parts)
+
+
+def _redact_url_query_params(text: str) -> str:
+    """Scan text for URLs with query strings and redact sensitive params.
+
+    Catches opaque tokens that don't match vendor prefix regexes, e.g.
+    `https://example.com/cb?code=ABC123&state=xyz` → `...?code=***&state=xyz`.
+    """
+    def _sub(m: re.Match) -> str:
+        scheme = m.group(1)
+        authority = m.group(2)
+        path = m.group(3)
+        query = _redact_query_string(m.group(4))
+        fragment = m.group(5) or ""
+        return f"{scheme}://{authority}{path}?{query}{fragment}"
+    return _URL_WITH_QUERY_RE.sub(_sub, text)
+
+
+def _redact_url_userinfo(text: str) -> str:
+    """Strip `user:password@` from HTTP/WS/FTP URLs.
+
+    DB protocols (postgres, mysql, mongodb, redis, amqp) are handled
+    separately by `_DB_CONNSTR_RE`.
+    """
+    return _URL_USERINFO_RE.sub(
+        lambda m: f"{m.group(1)}://{m.group(2)}:***@",
+        text,
+    )
+
+
+def _redact_form_body(text: str) -> str:
+    """Redact sensitive values in a form-urlencoded body.
+
+    Only applies when the entire input looks like a pure form body
+    (k=v&k=v with no newlines, no other text). Single-line non-form
+    text passes through unchanged. This is a conservative pass — the
+    `_redact_url_query_params` function handles embedded query strings.
+    """
+    if not text or "\n" in text or "&" not in text:
+        return text
+    # The body-body form check is strict: only trigger on clean k=v&k=v.
+    if not _FORM_BODY_RE.match(text.strip()):
+        return text
+    return _redact_query_string(text.strip())
+
+
 def redact_sensitive_text(text: str) -> str:
    """Apply all redaction patterns to a block of text.

@@ -173,6 +305,16 @@ def redact_sensitive_text(text: str) -> str:
    # JWT tokens (eyJ... — base64-encoded JSON headers)
    text = _JWT_RE.sub(lambda m: _mask_token(m.group(0)), text)

+    # URL userinfo (http(s)://user:pass@host) — redact for non-DB schemes.
+    # DB schemes are handled above by _DB_CONNSTR_RE.
+    text = _redact_url_userinfo(text)
+
+    # URL query params containing opaque tokens (?access_token=…&code=…)
+    text = _redact_url_query_params(text)
+
+    # Form-urlencoded bodies (only triggers on clean k=v&k=v inputs).
+    text = _redact_form_body(text)
+
    # Discord user/role mentions (<@snowflake_id>)
    text = _DISCORD_MENTION_RE.sub(lambda m: f"<@{'!' if '!' in m.group(0) else ''}***>", text)

@@ -0,0 +1,831 @@
+"""
+Shell-script hooks bridge.
+
+Reads the ``hooks:`` block from ``cli-config.yaml``, prompts the user for
+consent on first use of each ``(event, command)`` pair, and registers
+callbacks on the existing plugin hook manager so every existing
+``invoke_hook()`` site dispatches to the configured shell scripts — with
+zero changes to call sites.
+
+Design notes
+------------
+* Python plugins and shell hooks compose naturally: both flow through
+  :func:`hermes_cli.plugins.invoke_hook` and its aggregators.  Python
+  plugins are registered first (via ``discover_and_load()``) so their
+  block decisions win ties over shell-hook blocks.
+* Subprocess execution uses ``shlex.split(os.path.expanduser(command))``
+  with ``shell=False`` — no shell injection footguns.  Users that need
+  pipes/redirection wrap their logic in a script.
+* First-use consent is gated by the allowlist under
+  ``~/.hermes/shell-hooks-allowlist.json``.  Non-TTY callers must pass
+  ``accept_hooks=True`` (resolved from ``--accept-hooks``,
+  ``HERMES_ACCEPT_HOOKS``, or ``hooks_auto_accept: true`` in config)
+  for registration to succeed without a prompt.
+* Registration is idempotent — safe to invoke from both the CLI entry
+  point (``hermes_cli/main.py``) and the gateway entry point
+  (``gateway/run.py``).
+
+Wire protocol
+-------------
+**stdin** (JSON, piped to the script)::
+
+    {
+        "hook_event_name": "pre_tool_call",
+        "tool_name":       "terminal",
+        "tool_input":      {"command": "rm -rf /"},
+        "session_id":      "sess_abc123",
+        "cwd":             "/home/user/project",
+        "extra":           {...}   # event-specific kwargs
+    }
+
+**stdout** (JSON, optional — anything else is ignored)::
+
+    # Block a pre_tool_call (either shape accepted; normalised internally):
+    {"decision": "block", "reason":  "Forbidden command"}   # Claude-Code-style
+    {"action":   "block", "message": "Forbidden command"}   # Hermes-canonical
+
+    # Inject context for pre_llm_call:
+    {"context": "Today is Friday"}
+
+    # Silent no-op:
+    <empty or any non-matching JSON object>
+"""
+
+from __future__ import annotations
+
+import difflib
+import json
+import logging
+import os
+import re
+import shlex
+import subprocess
+import sys
+import tempfile
+import threading
+import time
+from contextlib import contextmanager
+from dataclasses import dataclass, field
+from datetime import datetime, timezone
+from pathlib import Path
+from typing import Any, Callable, Dict, Iterator, List, Optional, Set, Tuple
+
+try:
+    import fcntl  # POSIX only; Windows falls back to best-effort without flock.
+except ImportError:  # pragma: no cover
+    fcntl = None  # type: ignore[assignment]
+
+from hermes_constants import get_hermes_home
+
+logger = logging.getLogger(__name__)
+
+DEFAULT_TIMEOUT_SECONDS = 60
+MAX_TIMEOUT_SECONDS = 300
+ALLOWLIST_FILENAME = "shell-hooks-allowlist.json"
+
+# (event, matcher, command) triples that have been wired to the plugin
+# manager in the current process.  Matcher is part of the key because
+# the same script can legitimately register for different matchers under
+# the same event (e.g. one entry per tool the user wants to gate).
+# Second registration attempts for the exact same triple become no-ops
+# so the CLI and gateway can both call register_from_config() safely.
+_registered: Set[Tuple[str, Optional[str], str]] = set()
+_registered_lock = threading.Lock()
+
+# Intra-process lock for allowlist read-modify-write on platforms that
+# lack ``fcntl`` (non-POSIX).  Kept separate from ``_registered_lock``
+# because ``register_from_config`` already holds ``_registered_lock`` when
+# it triggers ``_record_approval`` — reusing it here would self-deadlock
+# (``threading.Lock`` is non-reentrant).  POSIX callers use the sibling
+# ``.lock`` file via ``fcntl.flock`` and bypass this.
+_allowlist_write_lock = threading.Lock()
+
+
+@dataclass
+class ShellHookSpec:
+    """Parsed and validated representation of a single ``hooks:`` entry."""
+
+    event: str
+    command: str
+    matcher: Optional[str] = None
+    timeout: int = DEFAULT_TIMEOUT_SECONDS
+    compiled_matcher: Optional[re.Pattern] = field(default=None, repr=False)
+
+    def __post_init__(self) -> None:
+        # Strip whitespace introduced by YAML quirks (e.g. multi-line string
+        # folding) — a matcher of " terminal" would otherwise silently fail
+        # to match "terminal" without any diagnostic.
+        if isinstance(self.matcher, str):
+            stripped = self.matcher.strip()
+            self.matcher = stripped if stripped else None
+        if self.matcher:
+            try:
+                self.compiled_matcher = re.compile(self.matcher)
+            except re.error as exc:
+                logger.warning(
+                    "shell hook matcher %r is invalid (%s) — treating as "
+                    "literal equality", self.matcher, exc,
+                )
+                self.compiled_matcher = None
+
+    def matches_tool(self, tool_name: Optional[str]) -> bool:
+        if not self.matcher:
+            return True
+        if tool_name is None:
+            return False
+        if self.compiled_matcher is not None:
+            return self.compiled_matcher.fullmatch(tool_name) is not None
+        # compiled_matcher is None only when the regex failed to compile,
+        # in which case we already warned and fall back to literal equality.
+        return tool_name == self.matcher
+
+
+# ---------------------------------------------------------------------------
+# Public API
+# ---------------------------------------------------------------------------
+
+def register_from_config(
+    cfg: Optional[Dict[str, Any]],
+    *,
+    accept_hooks: bool = False,
+) -> List[ShellHookSpec]:
+    """Register every configured shell hook on the plugin manager.
+
+    ``cfg`` is the full parsed config dict (``hermes_cli.config.load_config``
+    output).  The ``hooks:`` key is read out of it.  Missing, empty, or
+    non-dict ``hooks`` is treated as zero configured hooks.
+
+    ``accept_hooks=True`` skips the TTY consent prompt — the caller is
+    promising that the user has opted in via a flag, env var, or config
+    setting.  ``HERMES_ACCEPT_HOOKS=1`` and ``hooks_auto_accept: true`` are
+    also honored inside this function so either CLI or gateway call sites
+    pick them up.
+
+    Returns the list of :class:`ShellHookSpec` entries that ended up wired
+    up on the plugin manager.  Skipped entries (unknown events, malformed,
+    not allowlisted, already registered) are logged but not returned.
+    """
+    if not isinstance(cfg, dict):
+        return []
+
+    effective_accept = _resolve_effective_accept(cfg, accept_hooks)
+
+    specs = _parse_hooks_block(cfg.get("hooks"))
+    if not specs:
+        return []
+
+    registered: List[ShellHookSpec] = []
+
+    # Import lazily — avoids circular imports at module-load time.
+    from hermes_cli.plugins import get_plugin_manager
+
+    manager = get_plugin_manager()
+
+    # Idempotence + allowlist read happen under the lock; the TTY
+    # prompt runs outside so other threads aren't parked on a blocking
+    # input().  Mutation re-takes the lock with a defensive idempotence
+    # re-check in case two callers ever race through the prompt.
+    for spec in specs:
+        key = (spec.event, spec.matcher, spec.command)
+        with _registered_lock:
+            if key in _registered:
+                continue
+            already_allowlisted = _is_allowlisted(spec.event, spec.command)
+
+        if not already_allowlisted:
+            if not _prompt_and_record(
+                spec.event, spec.command, accept_hooks=effective_accept,
+            ):
+                logger.warning(
+                    "shell hook for %s (%s) not allowlisted — skipped. "
+                    "Use --accept-hooks / HERMES_ACCEPT_HOOKS=1 / "
+                    "hooks_auto_accept: true, or approve at the TTY "
+                    "prompt next run.",
+                    spec.event, spec.command,
+                )
+                continue
+
+        with _registered_lock:
+            if key in _registered:
+                continue
+            manager._hooks.setdefault(spec.event, []).append(_make_callback(spec))
+            _registered.add(key)
+            registered.append(spec)
+            logger.info(
+                "shell hook registered: %s -> %s (matcher=%s, timeout=%ds)",
+                spec.event, spec.command, spec.matcher, spec.timeout,
+            )
+
+    return registered
+
+
+def iter_configured_hooks(cfg: Optional[Dict[str, Any]]) -> List[ShellHookSpec]:
+    """Return the parsed ``ShellHookSpec`` entries from config without
+    registering anything.  Used by ``hermes hooks list`` and ``doctor``."""
+    if not isinstance(cfg, dict):
+        return []
+    return _parse_hooks_block(cfg.get("hooks"))
+
+
+def reset_for_tests() -> None:
+    """Clear the idempotence set.  Test-only helper."""
+    with _registered_lock:
+        _registered.clear()
+
+
+# ---------------------------------------------------------------------------
+# Config parsing
+# ---------------------------------------------------------------------------
+
+def _parse_hooks_block(hooks_cfg: Any) -> List[ShellHookSpec]:
+    """Normalise the ``hooks:`` dict into a flat list of ``ShellHookSpec``.
+
+    Malformed entries warn-and-skip — we never raise from config parsing
+    because a broken hook must not crash the agent.
+    """
+    from hermes_cli.plugins import VALID_HOOKS
+
+    if not isinstance(hooks_cfg, dict):
+        return []
+
+    specs: List[ShellHookSpec] = []
+
+    for event_name, entries in hooks_cfg.items():
+        if event_name not in VALID_HOOKS:
+            suggestion = difflib.get_close_matches(
+                str(event_name), VALID_HOOKS, n=1, cutoff=0.6,
+            )
+            if suggestion:
+                logger.warning(
+                    "unknown hook event %r in hooks: config — did you mean %r?",
+                    event_name, suggestion[0],
+                )
+            else:
+                logger.warning(
+                    "unknown hook event %r in hooks: config (valid: %s)",
+                    event_name, ", ".join(sorted(VALID_HOOKS)),
+                )
+            continue
+
+        if entries is None:
+            continue
+
+        if not isinstance(entries, list):
+            logger.warning(
+                "hooks.%s must be a list of hook definitions; got %s",
+                event_name, type(entries).__name__,
+            )
+            continue
+
+        for i, raw in enumerate(entries):
+            spec = _parse_single_entry(event_name, i, raw)
+            if spec is not None:
+                specs.append(spec)
+
+    return specs
+
+
+def _parse_single_entry(
+    event: str, index: int, raw: Any,
+) -> Optional[ShellHookSpec]:
+    if not isinstance(raw, dict):
+        logger.warning(
+            "hooks.%s[%d] must be a mapping with a 'command' key; got %s",
+            event, index, type(raw).__name__,
+        )
+        return None
+
+    command = raw.get("command")
+    if not isinstance(command, str) or not command.strip():
+        logger.warning(
+            "hooks.%s[%d] is missing a non-empty 'command' field",
+            event, index,
+        )
+        return None
+
+    matcher = raw.get("matcher")
+    if matcher is not None and not isinstance(matcher, str):
+        logger.warning(
+            "hooks.%s[%d].matcher must be a string regex; ignoring",
+            event, index,
+        )
+        matcher = None
+
+    if matcher is not None and event not in ("pre_tool_call", "post_tool_call"):
+        logger.warning(
+            "hooks.%s[%d].matcher=%r will be ignored at runtime — the "
+            "matcher field is only honored for pre_tool_call / "
+            "post_tool_call.  The hook will fire on every %s event.",
+            event, index, matcher, event,
+        )
+        matcher = None
+
+    timeout_raw = raw.get("timeout", DEFAULT_TIMEOUT_SECONDS)
+    try:
+        timeout = int(timeout_raw)
+    except (TypeError, ValueError):
+        logger.warning(
+            "hooks.%s[%d].timeout must be an int (got %r); using default %ds",
+            event, index, timeout_raw, DEFAULT_TIMEOUT_SECONDS,
+        )
+        timeout = DEFAULT_TIMEOUT_SECONDS
+
+    if timeout < 1:
+        logger.warning(
+            "hooks.%s[%d].timeout must be >=1; using default %ds",
+            event, index, DEFAULT_TIMEOUT_SECONDS,
+        )
+        timeout = DEFAULT_TIMEOUT_SECONDS
+
+    if timeout > MAX_TIMEOUT_SECONDS:
+        logger.warning(
+            "hooks.%s[%d].timeout=%ds exceeds max %ds; clamping",
+            event, index, timeout, MAX_TIMEOUT_SECONDS,
+        )
+        timeout = MAX_TIMEOUT_SECONDS
+
+    return ShellHookSpec(
+        event=event,
+        command=command.strip(),
+        matcher=matcher,
+        timeout=timeout,
+    )
+
+
+# ---------------------------------------------------------------------------
+# Subprocess callback
+# ---------------------------------------------------------------------------
+
+_TOP_LEVEL_PAYLOAD_KEYS = {"tool_name", "args", "session_id", "parent_session_id"}
+
+
+def _spawn(spec: ShellHookSpec, stdin_json: str) -> Dict[str, Any]:
+    """Run ``spec.command`` as a subprocess with ``stdin_json`` on stdin.
+
+    Returns a diagnostic dict with the same keys for every outcome
+    (``returncode``, ``stdout``, ``stderr``, ``timed_out``,
+    ``elapsed_seconds``, ``error``).  This is the single place the
+    subprocess is actually invoked — both the live callback path
+    (:func:`_make_callback`) and the CLI test helper (:func:`run_once`)
+    go through it.
+    """
+    result: Dict[str, Any] = {
+        "returncode": None,
+        "stdout": "",
+        "stderr": "",
+        "timed_out": False,
+        "elapsed_seconds": 0.0,
+        "error": None,
+    }
+    try:
+        argv = shlex.split(os.path.expanduser(spec.command))
+    except ValueError as exc:
+        result["error"] = f"command {spec.command!r} cannot be parsed: {exc}"
+        return result
+    if not argv:
+        result["error"] = "empty command"
+        return result
+
+    t0 = time.monotonic()
+    try:
+        proc = subprocess.run(
+            argv,
+            input=stdin_json,
+            capture_output=True,
+            timeout=spec.timeout,
+            text=True,
+            shell=False,
+        )
+    except subprocess.TimeoutExpired:
+        result["timed_out"] = True
+        result["elapsed_seconds"] = round(time.monotonic() - t0, 3)
+        return result
+    except FileNotFoundError:
+        result["error"] = "command not found"
+        return result
+    except PermissionError:
+        result["error"] = "command not executable"
+        return result
+    except Exception as exc:  # pragma: no cover — defensive
+        result["error"] = str(exc)
+        return result
+
+    result["returncode"] = proc.returncode
+    result["stdout"] = proc.stdout or ""
+    result["stderr"] = proc.stderr or ""
+    result["elapsed_seconds"] = round(time.monotonic() - t0, 3)
+    return result
+
+
+def _make_callback(spec: ShellHookSpec) -> Callable[..., Optional[Dict[str, Any]]]:
+    """Build the closure that ``invoke_hook()`` will call per firing."""
+
+    def _callback(**kwargs: Any) -> Optional[Dict[str, Any]]:
+        # Matcher gate — only meaningful for tool-scoped events.
+        if spec.event in ("pre_tool_call", "post_tool_call"):
+            if not spec.matches_tool(kwargs.get("tool_name")):
+                return None
+
+        r = _spawn(spec, _serialize_payload(spec.event, kwargs))
+
+        if r["error"]:
+            logger.warning(
+                "shell hook failed (event=%s command=%s): %s",
+                spec.event, spec.command, r["error"],
+            )
+            return None
+        if r["timed_out"]:
+            logger.warning(
+                "shell hook timed out after %.2fs (event=%s command=%s)",
+                r["elapsed_seconds"], spec.event, spec.command,
+            )
+            return None
+
+        stderr = r["stderr"].strip()
+        if stderr:
+            logger.debug(
+                "shell hook stderr (event=%s command=%s): %s",
+                spec.event, spec.command, stderr[:400],
+            )
+        # Non-zero exits: log but still parse stdout so scripts that
+        # signal failure via exit code can also return a block directive.
+        if r["returncode"] != 0:
+            logger.warning(
+                "shell hook exited %d (event=%s command=%s); stderr=%s",
+                r["returncode"], spec.event, spec.command, stderr[:400],
+            )
+        return _parse_response(spec.event, r["stdout"])
+
+    _callback.__name__ = f"shell_hook[{spec.event}:{spec.command}]"
+    _callback.__qualname__ = _callback.__name__
+    return _callback
+
+
+def _serialize_payload(event: str, kwargs: Dict[str, Any]) -> str:
+    """Render the stdin JSON payload.  Unserialisable values are
+    stringified via ``default=str`` rather than dropped."""
+    extras = {k: v for k, v in kwargs.items() if k not in _TOP_LEVEL_PAYLOAD_KEYS}
+    try:
+        cwd = str(Path.cwd())
+    except OSError:
+        cwd = ""
+    payload = {
+        "hook_event_name": event,
+        "tool_name": kwargs.get("tool_name"),
+        "tool_input": kwargs.get("args") if isinstance(kwargs.get("args"), dict) else None,
+        "session_id": kwargs.get("session_id") or kwargs.get("parent_session_id") or "",
+        "cwd": cwd,
+        "extra": extras,
+    }
+    return json.dumps(payload, ensure_ascii=False, default=str)
+
+
+def _parse_response(event: str, stdout: str) -> Optional[Dict[str, Any]]:
+    """Translate stdout JSON into a Hermes wire-shape dict.
+
+    For ``pre_tool_call`` the Claude-Code-style ``{"decision": "block",
+    "reason": "..."}`` payload is translated into the canonical Hermes
+    ``{"action": "block", "message": "..."}`` shape expected by
+    :func:`hermes_cli.plugins.get_pre_tool_call_block_message`.  This is
+    the single most important correctness invariant in this module —
+    skipping the translation silently breaks every ``pre_tool_call``
+    block directive.
+
+    For ``pre_llm_call``, ``{"context": "..."}`` is passed through
+    unchanged to match the existing plugin-hook contract.
+
+    Anything else returns ``None``.
+    """
+    stdout = (stdout or "").strip()
+    if not stdout:
+        return None
+
+    try:
+        data = json.loads(stdout)
+    except json.JSONDecodeError:
+        logger.warning(
+            "shell hook stdout was not valid JSON (event=%s): %s",
+            event, stdout[:200],
+        )
+        return None
+
+    if not isinstance(data, dict):
+        return None
+
+    if event == "pre_tool_call":
+        if data.get("action") == "block":
+            message = data.get("message") or data.get("reason") or ""
+            if isinstance(message, str) and message:
+                return {"action": "block", "message": message}
+        if data.get("decision") == "block":
+            message = data.get("reason") or data.get("message") or ""
+            if isinstance(message, str) and message:
+                return {"action": "block", "message": message}
+        return None
+
+    context = data.get("context")
+    if isinstance(context, str) and context.strip():
+        return {"context": context}
+
+    return None
+
+
+# ---------------------------------------------------------------------------
+# Allowlist / consent
+# ---------------------------------------------------------------------------
+
+def allowlist_path() -> Path:
+    """Path to the per-user shell-hook allowlist file."""
+    return get_hermes_home() / ALLOWLIST_FILENAME
+
+
+def load_allowlist() -> Dict[str, Any]:
+    """Return the parsed allowlist, or an empty skeleton if absent."""
+    try:
+        raw = json.loads(allowlist_path().read_text())
+    except (FileNotFoundError, json.JSONDecodeError, OSError):
+        return {"approvals": []}
+    if not isinstance(raw, dict):
+        return {"approvals": []}
+    approvals = raw.get("approvals")
+    if not isinstance(approvals, list):
+        raw["approvals"] = []
+    return raw
+
+
+def save_allowlist(data: Dict[str, Any]) -> None:
+    """Atomically persist the allowlist via per-process ``mkstemp`` +
+    ``os.replace``.  Cross-process read-modify-write races are handled
+    by :func:`_locked_update_approvals` (``fcntl.flock``).  On OSError
+    the failure is logged; the in-process hook still registers but
+    the approval won't survive across runs."""
+    p = allowlist_path()
+    try:
+        p.parent.mkdir(parents=True, exist_ok=True)
+        fd, tmp_path = tempfile.mkstemp(
+            prefix=f"{p.name}.", suffix=".tmp", dir=str(p.parent),
+        )
+        try:
+            with os.fdopen(fd, "w") as fh:
+                fh.write(json.dumps(data, indent=2, sort_keys=True))
+            os.replace(tmp_path, p)
+        except Exception:
+            try:
+                os.unlink(tmp_path)
+            except OSError:
+                pass
+            raise
+    except OSError as exc:
+        logger.warning(
+            "Failed to persist shell hook allowlist to %s: %s. "
+            "The approval is in-memory for this run, but the next "
+            "startup will re-prompt (or skip registration on non-TTY "
+            "runs without --accept-hooks / HERMES_ACCEPT_HOOKS).",
+            p, exc,
+        )
+
+
+def _is_allowlisted(event: str, command: str) -> bool:
+    data = load_allowlist()
+    return any(
+        isinstance(e, dict)
+        and e.get("event") == event
+        and e.get("command") == command
+        for e in data.get("approvals", [])
+    )
+
+
+@contextmanager
+def _locked_update_approvals() -> Iterator[Dict[str, Any]]:
+    """Serialise read-modify-write on the allowlist across processes.
+
+    Holds an exclusive ``flock`` on a sibling lock file for the duration
+    of the update so concurrent ``_record_approval``/``revoke`` callers
+    cannot clobber each other's changes (the race Codex reproduced with
+    20–50 simultaneous writers).  Falls back to an in-process lock on
+    platforms without ``fcntl``.
+    """
+    p = allowlist_path()
+    p.parent.mkdir(parents=True, exist_ok=True)
+    lock_path = p.with_suffix(p.suffix + ".lock")
+
+    if fcntl is None:  # pragma: no cover — non-POSIX fallback
+        with _allowlist_write_lock:
+            data = load_allowlist()
+            yield data
+            save_allowlist(data)
+        return
+
+    with open(lock_path, "a+") as lock_fh:
+        fcntl.flock(lock_fh.fileno(), fcntl.LOCK_EX)
+        try:
+            data = load_allowlist()
+            yield data
+            save_allowlist(data)
+        finally:
+            fcntl.flock(lock_fh.fileno(), fcntl.LOCK_UN)
+
+
+def _prompt_and_record(
+    event: str, command: str, *, accept_hooks: bool,
+) -> bool:
+    """Decide whether to approve an unseen ``(event, command)`` pair.
+    Returns ``True`` iff the approval was granted and recorded.
+    """
+    if accept_hooks:
+        _record_approval(event, command)
+        logger.info(
+            "shell hook auto-approved via --accept-hooks / env / config: "
+            "%s -> %s", event, command,
+        )
+        return True
+
+    if not sys.stdin.isatty():
+        return False
+
+    print(
+        f"\n⚠ Hermes is about to register a shell hook that will run a\n"
+        f"  command on your behalf.\n\n"
+        f"    Event:   {event}\n"
+        f"    Command: {command}\n\n"
+        f"  Commands run with your full user credentials.  Only approve\n"
+        f"  commands you trust."
+    )
+    try:
+        answer = input("Allow this hook to run? [y/N]: ").strip().lower()
+    except (EOFError, KeyboardInterrupt):
+        print()  # keep the terminal tidy after ^C
+        return False
+
+    if answer in ("y", "yes"):
+        _record_approval(event, command)
+        return True
+
+    return False
+
+
+def _record_approval(event: str, command: str) -> None:
+    entry = {
+        "event": event,
+        "command": command,
+        "approved_at": _utc_now_iso(),
+        "script_mtime_at_approval": script_mtime_iso(command),
+    }
+    with _locked_update_approvals() as data:
+        data["approvals"] = [
+            e for e in data.get("approvals", [])
+            if not (
+                isinstance(e, dict)
+                and e.get("event") == event
+                and e.get("command") == command
+            )
+        ] + [entry]
+
+
+def _utc_now_iso() -> str:
+    return datetime.now(tz=timezone.utc).isoformat().replace("+00:00", "Z")
+
+
+def revoke(command: str) -> int:
+    """Remove every allowlist entry matching ``command``.
+
+    Returns the number of entries removed.  Does not unregister any
+    callbacks that are already live on the plugin manager in the current
+    process — restart the CLI / gateway to drop them.
+    """
+    with _locked_update_approvals() as data:
+        before = len(data.get("approvals", []))
+        data["approvals"] = [
+            e for e in data.get("approvals", [])
+            if not (isinstance(e, dict) and e.get("command") == command)
+        ]
+        after = len(data["approvals"])
+    return before - after
+
+
+_SCRIPT_EXTENSIONS: Tuple[str, ...] = (
+    ".sh", ".bash", ".zsh", ".fish",
+    ".py", ".pyw",
+    ".rb", ".pl", ".lua",
+    ".js", ".mjs", ".cjs", ".ts",
+)
+
+
+def _command_script_path(command: str) -> str:
+    """Return the script path from ``command`` for doctor / drift checks.
+
+    Prefers a token ending in a known script extension, then a token
+    containing ``/`` or leading ``~``, then the first token.  Handles
+    ``python3 /path/hook.py``, ``/usr/bin/env bash hook.sh``, and the
+    common bare-path form.
+    """
+    try:
+        parts = shlex.split(command)
+    except ValueError:
+        return command
+    if not parts:
+        return command
+    for part in parts:
+        if part.lower().endswith(_SCRIPT_EXTENSIONS):
+            return part
+    for part in parts:
+        if "/" in part or part.startswith("~"):
+            return part
+    return parts[0]
+
+
+# ---------------------------------------------------------------------------
+# Helpers for accept-hooks resolution
+# ---------------------------------------------------------------------------
+
+def _resolve_effective_accept(
+    cfg: Dict[str, Any], accept_hooks_arg: bool,
+) -> bool:
+    """Combine all three opt-in channels into a single boolean.
+
+    Precedence (any truthy source flips us on):
+      1. ``--accept-hooks`` flag (CLI) / explicit argument
+      2. ``HERMES_ACCEPT_HOOKS`` env var
+      3. ``hooks_auto_accept: true`` in ``cli-config.yaml``
+    """
+    if accept_hooks_arg:
+        return True
+    env = os.environ.get("HERMES_ACCEPT_HOOKS", "").strip().lower()
+    if env in ("1", "true", "yes", "on"):
+        return True
+    cfg_val = cfg.get("hooks_auto_accept", False)
+    return bool(cfg_val)
+
+
+# ---------------------------------------------------------------------------
+# Introspection (used by `hermes hooks` CLI)
+# ---------------------------------------------------------------------------
+
+def allowlist_entry_for(event: str, command: str) -> Optional[Dict[str, Any]]:
+    """Return the allowlist record for this pair, if any."""
+    for e in load_allowlist().get("approvals", []):
+        if (
+            isinstance(e, dict)
+            and e.get("event") == event
+            and e.get("command") == command
+        ):
+            return e
+    return None
+
+
+def script_mtime_iso(command: str) -> Optional[str]:
+    """ISO-8601 mtime of the resolved script path, or ``None`` if the
+    script is missing."""
+    path = _command_script_path(command)
+    if not path:
+        return None
+    try:
+        expanded = os.path.expanduser(path)
+        return datetime.fromtimestamp(
+            os.path.getmtime(expanded), tz=timezone.utc,
+        ).isoformat().replace("+00:00", "Z")
+    except OSError:
+        return None
+
+
+def script_is_executable(command: str) -> bool:
+    """Return ``True`` iff ``command`` is runnable as configured.
+
+    For a bare invocation (``/path/hook.sh``) the script itself must be
+    executable.  For interpreter-prefixed commands (``python3
+    /path/hook.py``, ``/usr/bin/env bash hook.sh``) the script just has
+    to be readable — the interpreter doesn't care about the ``X_OK``
+    bit.  Mirrors what ``_spawn`` would actually do at runtime."""
+    path = _command_script_path(command)
+    if not path:
+        return False
+    expanded = os.path.expanduser(path)
+    if not os.path.isfile(expanded):
+        return False
+    try:
+        argv = shlex.split(command)
+    except ValueError:
+        return False
+    is_bare_invocation = bool(argv) and argv[0] == path
+    required = os.X_OK if is_bare_invocation else os.R_OK
+    return os.access(expanded, required)
+
+
+def run_once(
+    spec: ShellHookSpec, kwargs: Dict[str, Any],
+) -> Dict[str, Any]:
+    """Fire a single shell-hook invocation with a synthetic payload.
+    Used by ``hermes hooks test`` and ``hermes hooks doctor``.
+
+    ``kwargs`` is the same dict that :func:`hermes_cli.plugins.invoke_hook`
+    would pass at runtime.  It is routed through :func:`_serialize_payload`
+    so the synthetic stdin exactly matches what a real hook firing would
+    produce — otherwise scripts tested via ``hermes hooks test`` could
+    diverge silently from production behaviour.
+
+    Returns the :func:`_spawn` diagnostic dict plus a ``parsed`` field
+    holding the canonical Hermes-wire-shape response."""
+    stdin_json = _serialize_payload(spec.event, kwargs)
+    result = _spawn(spec, stdin_json)
+    result["parsed"] = _parse_response(spec.event, result["stdout"])
+    return result
@@ -8,6 +8,7 @@ can invoke skills via /skill-name commands and prompt-only built-ins like
 import json
 import logging
 import re
+import subprocess
 from datetime import datetime
 from pathlib import Path
 from typing import Any, Dict, Optional
@@ -22,6 +23,110 @@ _PLAN_SLUG_RE = re.compile(r"[^a-z0-9]+")
 _SKILL_INVALID_CHARS = re.compile(r"[^a-z0-9-]")
 _SKILL_MULTI_HYPHEN = re.compile(r"-{2,}")

+# Matches ${HERMES_SKILL_DIR} / ${HERMES_SESSION_ID} tokens in SKILL.md.
+# Tokens that don't resolve (e.g. ${HERMES_SESSION_ID} with no session) are
+# left as-is so the user can debug them.
+_SKILL_TEMPLATE_RE = re.compile(r"\$\{(HERMES_SKILL_DIR|HERMES_SESSION_ID)\}")
+
+# Matches inline shell snippets like:  !`date +%Y-%m-%d`
+# Non-greedy, single-line only — no newlines inside the backticks.
+_INLINE_SHELL_RE = re.compile(r"!`([^`\n]+)`")
+
+# Cap inline-shell output so a runaway command can't blow out the context.
+_INLINE_SHELL_MAX_OUTPUT = 4000
+
+
+def _load_skills_config() -> dict:
+    """Load the ``skills`` section of config.yaml (best-effort)."""
+    try:
+        from hermes_cli.config import load_config
+
+        cfg = load_config() or {}
+        skills_cfg = cfg.get("skills")
+        if isinstance(skills_cfg, dict):
+            return skills_cfg
+    except Exception:
+        logger.debug("Could not read skills config", exc_info=True)
+    return {}
+
+
+def _substitute_template_vars(
+    content: str,
+    skill_dir: Path | None,
+    session_id: str | None,
+) -> str:
+    """Replace ${HERMES_SKILL_DIR} / ${HERMES_SESSION_ID} in skill content.
+
+    Only substitutes tokens for which a concrete value is available —
+    unresolved tokens are left in place so the author can spot them.
+    """
+    if not content:
+        return content
+
+    skill_dir_str = str(skill_dir) if skill_dir else None
+
+    def _replace(match: re.Match) -> str:
+        token = match.group(1)
+        if token == "HERMES_SKILL_DIR" and skill_dir_str:
+            return skill_dir_str
+        if token == "HERMES_SESSION_ID" and session_id:
+            return str(session_id)
+        return match.group(0)
+
+    return _SKILL_TEMPLATE_RE.sub(_replace, content)
+
+
+def _run_inline_shell(command: str, cwd: Path | None, timeout: int) -> str:
+    """Execute a single inline-shell snippet and return its stdout (trimmed).
+
+    Failures return a short ``[inline-shell error: ...]`` marker instead of
+    raising, so one bad snippet can't wreck the whole skill message.
+    """
+    try:
+        completed = subprocess.run(
+            ["bash", "-c", command],
+            cwd=str(cwd) if cwd else None,
+            capture_output=True,
+            text=True,
+            timeout=max(1, int(timeout)),
+            check=False,
+        )
+    except subprocess.TimeoutExpired:
+        return f"[inline-shell timeout after {timeout}s: {command}]"
+    except FileNotFoundError:
+        return f"[inline-shell error: bash not found]"
+    except Exception as exc:
+        return f"[inline-shell error: {exc}]"
+
+    output = (completed.stdout or "").rstrip("\n")
+    if not output and completed.stderr:
+        output = completed.stderr.rstrip("\n")
+    if len(output) > _INLINE_SHELL_MAX_OUTPUT:
+        output = output[:_INLINE_SHELL_MAX_OUTPUT] + "…[truncated]"
+    return output
+
+
+def _expand_inline_shell(
+    content: str,
+    skill_dir: Path | None,
+    timeout: int,
+) -> str:
+    """Replace every !`cmd` snippet in ``content`` with its stdout.
+
+    Runs each snippet with the skill directory as CWD so relative paths in
+    the snippet work the way the author expects.
+    """
+    if "!`" not in content:
+        return content
+
+    def _replace(match: re.Match) -> str:
+        cmd = match.group(1).strip()
+        if not cmd:
+            return ""
+        return _run_inline_shell(cmd, skill_dir, timeout)
+
+    return _INLINE_SHELL_RE.sub(_replace, content)
+

 def build_plan_path(
    user_instruction: str = "",
@@ -133,14 +238,36 @@ def _build_skill_message(
    activation_note: str,
    user_instruction: str = "",
    runtime_note: str = "",
+    session_id: str | None = None,
 ) -> str:
    """Format a loaded skill into a user/system message payload."""
    from tools.skills_tool import SKILLS_DIR

    content = str(loaded_skill.get("content") or "")

+    # ── Template substitution and inline-shell expansion ──
+    # Done before anything else so downstream blocks (setup notes,
+    # supporting-file hints) see the expanded content.
+    skills_cfg = _load_skills_config()
+    if skills_cfg.get("template_vars", True):
+        content = _substitute_template_vars(content, skill_dir, session_id)
+    if skills_cfg.get("inline_shell", False):
+        timeout = int(skills_cfg.get("inline_shell_timeout", 10) or 10)
+        content = _expand_inline_shell(content, skill_dir, timeout)
+
    parts = [activation_note, "", content.strip()]

+    # ── Inject the absolute skill directory so the agent can reference
+    #    bundled scripts without an extra skill_view() round-trip. ──
+    if skill_dir:
+        parts.append("")
+        parts.append(f"[Skill directory: {skill_dir}]")
+        parts.append(
+            "Resolve any relative paths in this skill (e.g. `scripts/foo.js`, "
+            "`templates/config.yaml`) against that directory, then run them "
+            "with the terminal tool using the absolute path."
+        )
+
    # ── Inject resolved skill config values ──
    _inject_skill_config(loaded_skill, parts)

@@ -188,11 +315,13 @@ def _build_skill_message(
            # Skill is from an external dir — use the skill name instead
            skill_view_target = skill_dir.name
        parts.append("")
-        parts.append("[This skill has supporting files you can load with the skill_view tool:]")
+        parts.append("[This skill has supporting files:]")
        for sf in supporting:
-            parts.append(f"- {sf}")
+            parts.append(f"- {sf}  ->  {skill_dir / sf}")
        parts.append(
-            f'\nTo view any of these, use: skill_view(name="{skill_view_target}", file_path="<path>")'
+            f'\nLoad any of these with skill_view(name="{skill_view_target}", '
+            f'file_path="<path>"), or run scripts directly by absolute path '
+            f"(e.g. `node {skill_dir}/scripts/foo.js`)."
        )

    if user_instruction:
@@ -332,6 +461,7 @@ def build_skill_invocation_message(
        activation_note,
        user_instruction=user_instruction,
        runtime_note=runtime_note,
+        session_id=task_id,
    )


@@ -370,6 +500,7 @@ def build_preloaded_skills_prompt(
                loaded_skill,
                skill_dir,
                activation_note,
+                session_id=task_id,
            )
        )
        loaded_names.append(skill_name)
@@ -0,0 +1,51 @@
+"""Transport layer types and registry for provider response normalization.
+
+Usage:
+    from agent.transports import get_transport
+    transport = get_transport("anthropic_messages")
+    result = transport.normalize_response(raw_response)
+"""
+
+from agent.transports.types import NormalizedResponse, ToolCall, Usage, build_tool_call, map_finish_reason  # noqa: F401
+
+_REGISTRY: dict = {}
+
+
+def register_transport(api_mode: str, transport_cls: type) -> None:
+    """Register a transport class for an api_mode string."""
+    _REGISTRY[api_mode] = transport_cls
+
+
+def get_transport(api_mode: str):
+    """Get a transport instance for the given api_mode.
+
+    Returns None if no transport is registered for this api_mode.
+    This allows gradual migration — call sites can check for None
+    and fall back to the legacy code path.
+    """
+    if not _REGISTRY:
+        _discover_transports()
+    cls = _REGISTRY.get(api_mode)
+    if cls is None:
+        return None
+    return cls()
+
+
+def _discover_transports() -> None:
+    """Import all transport modules to trigger auto-registration."""
+    try:
+        import agent.transports.anthropic  # noqa: F401
+    except ImportError:
+        pass
+    try:
+        import agent.transports.codex  # noqa: F401
+    except ImportError:
+        pass
+    try:
+        import agent.transports.chat_completions  # noqa: F401
+    except ImportError:
+        pass
+    try:
+        import agent.transports.bedrock  # noqa: F401
+    except ImportError:
+        pass
@@ -0,0 +1,150 @@
+"""Anthropic Messages API transport.
+
+Delegates to the existing adapter functions in agent/anthropic_adapter.py.
+This transport owns format conversion and normalization — NOT client lifecycle.
+"""
+
+from typing import Any, Dict, List, Optional
+
+from agent.transports.base import ProviderTransport
+from agent.transports.types import NormalizedResponse
+
+
+class AnthropicTransport(ProviderTransport):
+    """Transport for api_mode='anthropic_messages'.
+
+    Wraps the existing functions in anthropic_adapter.py behind the
+    ProviderTransport ABC.  Each method delegates — no logic is duplicated.
+    """
+
+    @property
+    def api_mode(self) -> str:
+        return "anthropic_messages"
+
+    def convert_messages(self, messages: List[Dict[str, Any]], **kwargs) -> Any:
+        """Convert OpenAI messages to Anthropic (system, messages) tuple.
+
+        kwargs:
+            base_url: Optional[str] — affects thinking signature handling.
+        """
+        from agent.anthropic_adapter import convert_messages_to_anthropic
+
+        base_url = kwargs.get("base_url")
+        return convert_messages_to_anthropic(messages, base_url=base_url)
+
+    def convert_tools(self, tools: List[Dict[str, Any]]) -> Any:
+        """Convert OpenAI tool schemas to Anthropic input_schema format."""
+        from agent.anthropic_adapter import convert_tools_to_anthropic
+
+        return convert_tools_to_anthropic(tools)
+
+    def build_kwargs(
+        self,
+        model: str,
+        messages: List[Dict[str, Any]],
+        tools: Optional[List[Dict[str, Any]]] = None,
+        **params,
+    ) -> Dict[str, Any]:
+        """Build Anthropic messages.create() kwargs.
+
+        Calls convert_messages and convert_tools internally.
+
+        params (all optional):
+            max_tokens: int
+            reasoning_config: dict | None
+            tool_choice: str | None
+            is_oauth: bool
+            preserve_dots: bool
+            context_length: int | None
+            base_url: str | None
+            fast_mode: bool
+        """
+        from agent.anthropic_adapter import build_anthropic_kwargs
+
+        return build_anthropic_kwargs(
+            model=model,
+            messages=messages,
+            tools=tools,
+            max_tokens=params.get("max_tokens", 16384),
+            reasoning_config=params.get("reasoning_config"),
+            tool_choice=params.get("tool_choice"),
+            is_oauth=params.get("is_oauth", False),
+            preserve_dots=params.get("preserve_dots", False),
+            context_length=params.get("context_length"),
+            base_url=params.get("base_url"),
+            fast_mode=params.get("fast_mode", False),
+        )
+
+    def normalize_response(self, response: Any, **kwargs) -> NormalizedResponse:
+        """Normalize Anthropic response to NormalizedResponse.
+
+        Calls the adapter's v1 normalize and maps the (SimpleNamespace, finish_reason)
+        tuple to the shared NormalizedResponse type.
+        """
+        from agent.anthropic_adapter import normalize_anthropic_response
+        from agent.transports.types import build_tool_call
+
+        strip_tool_prefix = kwargs.get("strip_tool_prefix", False)
+        assistant_msg, finish_reason = normalize_anthropic_response(response, strip_tool_prefix)
+
+        tool_calls = None
+        if assistant_msg.tool_calls:
+            tool_calls = [
+                build_tool_call(id=tc.id, name=tc.function.name, arguments=tc.function.arguments)
+                for tc in assistant_msg.tool_calls
+            ]
+
+        provider_data = {}
+        if getattr(assistant_msg, "reasoning_details", None):
+            provider_data["reasoning_details"] = assistant_msg.reasoning_details
+
+        return NormalizedResponse(
+            content=assistant_msg.content,
+            tool_calls=tool_calls,
+            finish_reason=finish_reason,
+            reasoning=getattr(assistant_msg, "reasoning", None),
+            usage=None,
+            provider_data=provider_data or None,
+        )
+
+    def validate_response(self, response: Any) -> bool:
+        """Check Anthropic response structure is valid."""
+        if response is None:
+            return False
+        content_blocks = getattr(response, "content", None)
+        if not isinstance(content_blocks, list):
+            return False
+        if not content_blocks:
+            return False
+        return True
+
+    def extract_cache_stats(self, response: Any) -> Optional[Dict[str, int]]:
+        """Extract Anthropic cache_read and cache_creation token counts."""
+        usage = getattr(response, "usage", None)
+        if usage is None:
+            return None
+        cached = getattr(usage, "cache_read_input_tokens", 0) or 0
+        written = getattr(usage, "cache_creation_input_tokens", 0) or 0
+        if cached or written:
+            return {"cached_tokens": cached, "creation_tokens": written}
+        return None
+
+    # Promote the adapter's canonical mapping to module level so it's shared
+    _STOP_REASON_MAP = {
+        "end_turn": "stop",
+        "tool_use": "tool_calls",
+        "max_tokens": "length",
+        "stop_sequence": "stop",
+        "refusal": "content_filter",
+        "model_context_window_exceeded": "length",
+    }
+
+    def map_finish_reason(self, raw_reason: str) -> str:
+        """Map Anthropic stop_reason to OpenAI finish_reason."""
+        return self._STOP_REASON_MAP.get(raw_reason, "stop")
+
+
+# Auto-register on import
+from agent.transports import register_transport  # noqa: E402
+
+register_transport("anthropic_messages", AnthropicTransport)
@@ -0,0 +1,89 @@
+"""Abstract base for provider transports.
+
+A transport owns the data path for one api_mode:
+  convert_messages → convert_tools → build_kwargs → normalize_response
+
+It does NOT own: client construction, streaming, credential refresh,
+prompt caching, interrupt handling, or retry logic.  Those stay on AIAgent.
+"""
+
+from abc import ABC, abstractmethod
+from typing import Any, Dict, List, Optional
+
+from agent.transports.types import NormalizedResponse
+
+
+class ProviderTransport(ABC):
+    """Base class for provider-specific format conversion and normalization."""
+
+    @property
+    @abstractmethod
+    def api_mode(self) -> str:
+        """The api_mode string this transport handles (e.g. 'anthropic_messages')."""
+        ...
+
+    @abstractmethod
+    def convert_messages(self, messages: List[Dict[str, Any]], **kwargs) -> Any:
+        """Convert OpenAI-format messages to provider-native format.
+
+        Returns provider-specific structure (e.g. (system, messages) for Anthropic,
+        or the messages list unchanged for chat_completions).
+        """
+        ...
+
+    @abstractmethod
+    def convert_tools(self, tools: List[Dict[str, Any]]) -> Any:
+        """Convert OpenAI-format tool definitions to provider-native format.
+
+        Returns provider-specific tool list (e.g. Anthropic input_schema format).
+        """
+        ...
+
+    @abstractmethod
+    def build_kwargs(
+        self,
+        model: str,
+        messages: List[Dict[str, Any]],
+        tools: Optional[List[Dict[str, Any]]] = None,
+        **params,
+    ) -> Dict[str, Any]:
+        """Build the complete API call kwargs dict.
+
+        This is the primary entry point — it typically calls convert_messages()
+        and convert_tools() internally, then adds model-specific config.
+
+        Returns a dict ready to be passed to the provider's SDK client.
+        """
+        ...
+
+    @abstractmethod
+    def normalize_response(self, response: Any, **kwargs) -> NormalizedResponse:
+        """Normalize a raw provider response to the shared NormalizedResponse type.
+
+        This is the only method that returns a transport-layer type.
+        """
+        ...
+
+    def validate_response(self, response: Any) -> bool:
+        """Optional: check if the raw response is structurally valid.
+
+        Returns True if valid, False if the response should be treated as invalid.
+        Default implementation always returns True.
+        """
+        return True
+
+    def extract_cache_stats(self, response: Any) -> Optional[Dict[str, int]]:
+        """Optional: extract provider-specific cache hit/creation stats.
+
+        Returns dict with 'cached_tokens' and 'creation_tokens', or None.
+        Default returns None.
+        """
+        return None
+
+    def map_finish_reason(self, raw_reason: str) -> str:
+        """Optional: map provider-specific stop reason to OpenAI equivalent.
+
+        Default returns the raw reason unchanged.  Override for providers
+        with different stop reason vocabularies.
+        """
+        return raw_reason
@@ -0,0 +1,154 @@
+"""AWS Bedrock Converse API transport.
+
+Delegates to the existing adapter functions in agent/bedrock_adapter.py.
+Bedrock uses its own boto3 client (not the OpenAI SDK), so the transport
+owns format conversion and normalization, while client construction and
+boto3 calls stay on AIAgent.
+"""
+
+from typing import Any, Dict, List, Optional
+
+from agent.transports.base import ProviderTransport
+from agent.transports.types import NormalizedResponse, ToolCall, Usage
+
+
+class BedrockTransport(ProviderTransport):
+    """Transport for api_mode='bedrock_converse'."""
+
+    @property
+    def api_mode(self) -> str:
+        return "bedrock_converse"
+
+    def convert_messages(self, messages: List[Dict[str, Any]], **kwargs) -> Any:
+        """Convert OpenAI messages to Bedrock Converse format."""
+        from agent.bedrock_adapter import convert_messages_to_converse
+        return convert_messages_to_converse(messages)
+
+    def convert_tools(self, tools: List[Dict[str, Any]]) -> Any:
+        """Convert OpenAI tool schemas to Bedrock Converse toolConfig."""
+        from agent.bedrock_adapter import convert_tools_to_converse
+        return convert_tools_to_converse(tools)
+
+    def build_kwargs(
+        self,
+        model: str,
+        messages: List[Dict[str, Any]],
+        tools: Optional[List[Dict[str, Any]]] = None,
+        **params,
+    ) -> Dict[str, Any]:
+        """Build Bedrock converse() kwargs.
+
+        Calls convert_messages and convert_tools internally.
+
+        params:
+            max_tokens: int — output token limit (default 4096)
+            temperature: float | None
+            guardrail_config: dict | None — Bedrock guardrails
+            region: str — AWS region (default 'us-east-1')
+        """
+        from agent.bedrock_adapter import build_converse_kwargs
+
+        region = params.get("region", "us-east-1")
+        guardrail = params.get("guardrail_config")
+
+        kwargs = build_converse_kwargs(
+            model=model,
+            messages=messages,
+            tools=tools,
+            max_tokens=params.get("max_tokens", 4096),
+            temperature=params.get("temperature"),
+            guardrail_config=guardrail,
+        )
+        # Sentinel keys for dispatch — agent pops these before the boto3 call
+        kwargs["__bedrock_converse__"] = True
+        kwargs["__bedrock_region__"] = region
+        return kwargs
+
+    def normalize_response(self, response: Any, **kwargs) -> NormalizedResponse:
+        """Normalize Bedrock response to NormalizedResponse.
+
+        Handles two shapes:
+        1. Raw boto3 dict (from direct converse() calls)
+        2. Already-normalized SimpleNamespace with .choices (from dispatch site)
+        """
+        from agent.bedrock_adapter import normalize_converse_response
+
+        # Normalize to OpenAI-compatible SimpleNamespace
+        if hasattr(response, "choices") and response.choices:
+            # Already normalized at dispatch site
+            ns = response
+        else:
+            # Raw boto3 dict
+            ns = normalize_converse_response(response)
+
+        choice = ns.choices[0]
+        msg = choice.message
+        finish_reason = choice.finish_reason or "stop"
+
+        tool_calls = None
+        if msg.tool_calls:
+            tool_calls = [
+                ToolCall(
+                    id=tc.id,
+                    name=tc.function.name,
+                    arguments=tc.function.arguments,
+                )
+                for tc in msg.tool_calls
+            ]
+
+        usage = None
+        if hasattr(ns, "usage") and ns.usage:
+            u = ns.usage
+            usage = Usage(
+                prompt_tokens=getattr(u, "prompt_tokens", 0) or 0,
+                completion_tokens=getattr(u, "completion_tokens", 0) or 0,
+                total_tokens=getattr(u, "total_tokens", 0) or 0,
+            )
+
+        reasoning = getattr(msg, "reasoning", None) or getattr(msg, "reasoning_content", None)
+
+        return NormalizedResponse(
+            content=msg.content,
+            tool_calls=tool_calls,
+            finish_reason=finish_reason,
+            reasoning=reasoning,
+            usage=usage,
+        )
+
+    def validate_response(self, response: Any) -> bool:
+        """Check Bedrock response structure.
+
+        After normalize_converse_response, the response has OpenAI-compatible
+        .choices — same check as chat_completions.
+        """
+        if response is None:
+            return False
+        # Raw Bedrock dict response — check for 'output' key
+        if isinstance(response, dict):
+            return "output" in response
+        # Already-normalized SimpleNamespace
+        if hasattr(response, "choices"):
+            return bool(response.choices)
+        return False
+
+    def map_finish_reason(self, raw_reason: str) -> str:
+        """Map Bedrock stop reason to OpenAI finish_reason.
+
+        The adapter already does this mapping inside normalize_converse_response,
+        so this is only used for direct access to raw responses.
+        """
+        _MAP = {
+            "end_turn": "stop",
+            "tool_use": "tool_calls",
+            "max_tokens": "length",
+            "stop_sequence": "stop",
+            "guardrail_intervened": "content_filter",
+            "content_filtered": "content_filter",
+        }
+        return _MAP.get(raw_reason, "stop")
+
+
+# Auto-register on import
+from agent.transports import register_transport  # noqa: E402
+
+register_transport("bedrock_converse", BedrockTransport)
@@ -0,0 +1,387 @@
+"""OpenAI Chat Completions transport.
+
+Handles the default api_mode ('chat_completions') used by ~16 OpenAI-compatible
+providers (OpenRouter, Nous, NVIDIA, Qwen, Ollama, DeepSeek, xAI, Kimi, etc.).
+
+Messages and tools are already in OpenAI format — convert_messages and
+convert_tools are near-identity.  The complexity lives in build_kwargs
+which has provider-specific conditionals for max_tokens defaults,
+reasoning configuration, temperature handling, and extra_body assembly.
+"""
+
+import copy
+from typing import Any, Dict, List, Optional
+
+from agent.prompt_builder import DEVELOPER_ROLE_MODELS
+from agent.transports.base import ProviderTransport
+from agent.transports.types import NormalizedResponse, ToolCall, Usage
+
+
+class ChatCompletionsTransport(ProviderTransport):
+    """Transport for api_mode='chat_completions'.
+
+    The default path for OpenAI-compatible providers.
+    """
+
+    @property
+    def api_mode(self) -> str:
+        return "chat_completions"
+
+    def convert_messages(self, messages: List[Dict[str, Any]], **kwargs) -> List[Dict[str, Any]]:
+        """Messages are already in OpenAI format — sanitize Codex leaks only.
+
+        Strips Codex Responses API fields (``codex_reasoning_items`` on the
+        message, ``call_id``/``response_item_id`` on tool_calls) that strict
+        chat-completions providers reject with 400/422.
+        """
+        needs_sanitize = False
+        for msg in messages:
+            if not isinstance(msg, dict):
+                continue
+            if "codex_reasoning_items" in msg:
+                needs_sanitize = True
+                break
+            tool_calls = msg.get("tool_calls")
+            if isinstance(tool_calls, list):
+                for tc in tool_calls:
+                    if isinstance(tc, dict) and ("call_id" in tc or "response_item_id" in tc):
+                        needs_sanitize = True
+                        break
+                if needs_sanitize:
+                    break
+
+        if not needs_sanitize:
+            return messages
+
+        sanitized = copy.deepcopy(messages)
+        for msg in sanitized:
+            if not isinstance(msg, dict):
+                continue
+            msg.pop("codex_reasoning_items", None)
+            tool_calls = msg.get("tool_calls")
+            if isinstance(tool_calls, list):
+                for tc in tool_calls:
+                    if isinstance(tc, dict):
+                        tc.pop("call_id", None)
+                        tc.pop("response_item_id", None)
+        return sanitized
+
+    def convert_tools(self, tools: List[Dict[str, Any]]) -> List[Dict[str, Any]]:
+        """Tools are already in OpenAI format — identity."""
+        return tools
+
+    def build_kwargs(
+        self,
+        model: str,
+        messages: List[Dict[str, Any]],
+        tools: Optional[List[Dict[str, Any]]] = None,
+        **params,
+    ) -> Dict[str, Any]:
+        """Build chat.completions.create() kwargs.
+
+        This is the most complex transport method — it handles ~16 providers
+        via params rather than subclasses.
+
+        params:
+            timeout: float — API call timeout
+            max_tokens: int | None — user-configured max tokens
+            ephemeral_max_output_tokens: int | None — one-shot override (error recovery)
+            max_tokens_param_fn: callable — returns {max_tokens: N} or {max_completion_tokens: N}
+            reasoning_config: dict | None
+            request_overrides: dict | None
+            session_id: str | None
+            qwen_session_metadata: dict | None — {sessionId, promptId} precomputed
+            model_lower: str — lowercase model name for pattern matching
+            # Provider detection flags (all optional, default False)
+            is_openrouter: bool
+            is_nous: bool
+            is_qwen_portal: bool
+            is_github_models: bool
+            is_nvidia_nim: bool
+            is_kimi: bool
+            is_custom_provider: bool
+            ollama_num_ctx: int | None
+            # Provider routing
+            provider_preferences: dict | None
+            # Qwen-specific
+            qwen_prepare_fn: callable | None — runs AFTER codex sanitization
+            qwen_prepare_inplace_fn: callable | None — in-place variant for deepcopied lists
+            # Temperature
+            fixed_temperature: Any — from _fixed_temperature_for_model()
+            omit_temperature: bool
+            # Reasoning
+            supports_reasoning: bool
+            github_reasoning_extra: dict | None
+            # Claude on OpenRouter/Nous max output
+            anthropic_max_output: int | None
+            # Extra
+            extra_body_additions: dict | None — pre-built extra_body entries
+        """
+        # Codex sanitization: drop reasoning_items / call_id / response_item_id
+        sanitized = self.convert_messages(messages)
+
+        # Qwen portal prep AFTER codex sanitization.  If sanitize already
+        # deepcopied, reuse that copy via the in-place variant to avoid a
+        # second deepcopy.
+        is_qwen = params.get("is_qwen_portal", False)
+        if is_qwen:
+            qwen_prep = params.get("qwen_prepare_fn")
+            qwen_prep_inplace = params.get("qwen_prepare_inplace_fn")
+            if sanitized is messages:
+                if qwen_prep is not None:
+                    sanitized = qwen_prep(sanitized)
+            else:
+                # Already deepcopied — transform in place
+                if qwen_prep_inplace is not None:
+                    qwen_prep_inplace(sanitized)
+                elif qwen_prep is not None:
+                    sanitized = qwen_prep(sanitized)
+
+        # Developer role swap for GPT-5/Codex models
+        model_lower = params.get("model_lower", (model or "").lower())
+        if (
+            sanitized
+            and isinstance(sanitized[0], dict)
+            and sanitized[0].get("role") == "system"
+            and any(p in model_lower for p in DEVELOPER_ROLE_MODELS)
+        ):
+            sanitized = list(sanitized)
+            sanitized[0] = {**sanitized[0], "role": "developer"}
+
+        api_kwargs: Dict[str, Any] = {
+            "model": model,
+            "messages": sanitized,
+        }
+
+        timeout = params.get("timeout")
+        if timeout is not None:
+            api_kwargs["timeout"] = timeout
+
+        # Temperature
+        fixed_temp = params.get("fixed_temperature")
+        omit_temp = params.get("omit_temperature", False)
+        if omit_temp:
+            api_kwargs.pop("temperature", None)
+        elif fixed_temp is not None:
+            api_kwargs["temperature"] = fixed_temp
+
+        # Qwen metadata (caller precomputes {sessionId, promptId})
+        qwen_meta = params.get("qwen_session_metadata")
+        if qwen_meta and is_qwen:
+            api_kwargs["metadata"] = qwen_meta
+
+        # Tools
+        if tools:
+            api_kwargs["tools"] = tools
+
+        # max_tokens resolution — priority: ephemeral > user > provider default
+        max_tokens_fn = params.get("max_tokens_param_fn")
+        ephemeral = params.get("ephemeral_max_output_tokens")
+        max_tokens = params.get("max_tokens")
+        anthropic_max_out = params.get("anthropic_max_output")
+        is_nvidia_nim = params.get("is_nvidia_nim", False)
+        is_kimi = params.get("is_kimi", False)
+        reasoning_config = params.get("reasoning_config")
+
+        if ephemeral is not None and max_tokens_fn:
+            api_kwargs.update(max_tokens_fn(ephemeral))
+        elif max_tokens is not None and max_tokens_fn:
+            api_kwargs.update(max_tokens_fn(max_tokens))
+        elif is_nvidia_nim and max_tokens_fn:
+            api_kwargs.update(max_tokens_fn(16384))
+        elif is_qwen and max_tokens_fn:
+            api_kwargs.update(max_tokens_fn(65536))
+        elif is_kimi and max_tokens_fn:
+            # Kimi/Moonshot: 32000 matches Kimi CLI's default
+            api_kwargs.update(max_tokens_fn(32000))
+        elif anthropic_max_out is not None:
+            api_kwargs["max_tokens"] = anthropic_max_out
+
+        # Kimi: top-level reasoning_effort (unless thinking disabled)
+        if is_kimi:
+            _kimi_thinking_off = bool(
+                reasoning_config
+                and isinstance(reasoning_config, dict)
+                and reasoning_config.get("enabled") is False
+            )
+            if not _kimi_thinking_off:
+                _kimi_effort = "medium"
+                if reasoning_config and isinstance(reasoning_config, dict):
+                    _e = (reasoning_config.get("effort") or "").strip().lower()
+                    if _e in ("low", "medium", "high"):
+                        _kimi_effort = _e
+                api_kwargs["reasoning_effort"] = _kimi_effort
+
+        # extra_body assembly
+        extra_body: Dict[str, Any] = {}
+
+        is_openrouter = params.get("is_openrouter", False)
+        is_nous = params.get("is_nous", False)
+        is_github_models = params.get("is_github_models", False)
+
+        provider_prefs = params.get("provider_preferences")
+        if provider_prefs and is_openrouter:
+            extra_body["provider"] = provider_prefs
+
+        # Kimi extra_body.thinking
+        if is_kimi:
+            _kimi_thinking_enabled = True
+            if reasoning_config and isinstance(reasoning_config, dict):
+                if reasoning_config.get("enabled") is False:
+                    _kimi_thinking_enabled = False
+            extra_body["thinking"] = {
+                "type": "enabled" if _kimi_thinking_enabled else "disabled",
+            }
+
+        # Reasoning
+        if params.get("supports_reasoning", False):
+            if is_github_models:
+                gh_reasoning = params.get("github_reasoning_extra")
+                if gh_reasoning is not None:
+                    extra_body["reasoning"] = gh_reasoning
+            else:
+                if reasoning_config is not None:
+                    rc = dict(reasoning_config)
+                    if is_nous and rc.get("enabled") is False:
+                        pass  # omit for Nous when disabled
+                    else:
+                        extra_body["reasoning"] = rc
+                else:
+                    extra_body["reasoning"] = {"enabled": True, "effort": "medium"}
+
+        if is_nous:
+            extra_body["tags"] = ["product=hermes-agent"]
+
+        # Ollama num_ctx
+        ollama_ctx = params.get("ollama_num_ctx")
+        if ollama_ctx:
+            options = extra_body.get("options", {})
+            options["num_ctx"] = ollama_ctx
+            extra_body["options"] = options
+
+        # Ollama/custom think=false
+        if params.get("is_custom_provider", False):
+            if reasoning_config and isinstance(reasoning_config, dict):
+                _effort = (reasoning_config.get("effort") or "").strip().lower()
+                _enabled = reasoning_config.get("enabled", True)
+                if _effort == "none" or _enabled is False:
+                    extra_body["think"] = False
+
+        if is_qwen:
+            extra_body["vl_high_resolution_images"] = True
+
+        # Merge any pre-built extra_body additions
+        additions = params.get("extra_body_additions")
+        if additions:
+            extra_body.update(additions)
+
+        if extra_body:
+            api_kwargs["extra_body"] = extra_body
+
+        # Request overrides last (service_tier etc.)
+        overrides = params.get("request_overrides")
+        if overrides:
+            api_kwargs.update(overrides)
+
+        return api_kwargs
+
+    def normalize_response(self, response: Any, **kwargs) -> NormalizedResponse:
+        """Normalize OpenAI ChatCompletion to NormalizedResponse.
+
+        For chat_completions, this is near-identity — the response is already
+        in OpenAI format.  extra_content on tool_calls (Gemini thought_signature)
+        is preserved via ToolCall.provider_data.  reasoning_details (OpenRouter
+        unified format) and reasoning_content (DeepSeek/Moonshot) are also
+        preserved for downstream replay.
+        """
+        choice = response.choices[0]
+        msg = choice.message
+        finish_reason = choice.finish_reason or "stop"
+
+        tool_calls = None
+        if msg.tool_calls:
+            tool_calls = []
+            for tc in msg.tool_calls:
+                # Preserve provider-specific extras on the tool call.
+                # Gemini 3 thinking models attach extra_content with
+                # thought_signature — without replay on the next turn the API
+                # rejects the request with 400.
+                tc_provider_data: Dict[str, Any] = {}
+                extra = getattr(tc, "extra_content", None)
+                if extra is None and hasattr(tc, "model_extra"):
+                    extra = (tc.model_extra or {}).get("extra_content")
+                if extra is not None:
+                    if hasattr(extra, "model_dump"):
+                        try:
+                            extra = extra.model_dump()
+                        except Exception:
+                            pass
+                    tc_provider_data["extra_content"] = extra
+                tool_calls.append(ToolCall(
+                    id=tc.id,
+                    name=tc.function.name,
+                    arguments=tc.function.arguments,
+                    provider_data=tc_provider_data or None,
+                ))
+
+        usage = None
+        if hasattr(response, "usage") and response.usage:
+            u = response.usage
+            usage = Usage(
+                prompt_tokens=getattr(u, "prompt_tokens", 0) or 0,
+                completion_tokens=getattr(u, "completion_tokens", 0) or 0,
+                total_tokens=getattr(u, "total_tokens", 0) or 0,
+            )
+
+        # Preserve reasoning fields separately.  DeepSeek/Moonshot use
+        # ``reasoning_content``; others use ``reasoning``.  Downstream code
+        # (_extract_reasoning, thinking-prefill retry) reads both distinctly,
+        # so keep them apart in provider_data rather than merging.
+        reasoning = getattr(msg, "reasoning", None)
+        reasoning_content = getattr(msg, "reasoning_content", None)
+
+        provider_data: Dict[str, Any] = {}
+        if reasoning_content:
+            provider_data["reasoning_content"] = reasoning_content
+        rd = getattr(msg, "reasoning_details", None)
+        if rd:
+            provider_data["reasoning_details"] = rd
+
+        return NormalizedResponse(
+            content=msg.content,
+            tool_calls=tool_calls,
+            finish_reason=finish_reason,
+            reasoning=reasoning,
+            usage=usage,
+            provider_data=provider_data or None,
+        )
+
+    def validate_response(self, response: Any) -> bool:
+        """Check that response has valid choices."""
+        if response is None:
+            return False
+        if not hasattr(response, "choices") or response.choices is None:
+            return False
+        if not response.choices:
+            return False
+        return True
+
+    def extract_cache_stats(self, response: Any) -> Optional[Dict[str, int]]:
+        """Extract OpenRouter/OpenAI cache stats from prompt_tokens_details."""
+        usage = getattr(response, "usage", None)
+        if usage is None:
+            return None
+        details = getattr(usage, "prompt_tokens_details", None)
+        if details is None:
+            return None
+        cached = getattr(details, "cached_tokens", 0) or 0
+        written = getattr(details, "cache_write_tokens", 0) or 0
+        if cached or written:
+            return {"cached_tokens": cached, "creation_tokens": written}
+        return None
+
+
+# Auto-register on import
+from agent.transports import register_transport  # noqa: E402
+
+register_transport("chat_completions", ChatCompletionsTransport)
@@ -0,0 +1,217 @@
+"""OpenAI Responses API (Codex) transport.
+
+Delegates to the existing adapter functions in agent/codex_responses_adapter.py.
+This transport owns format conversion and normalization — NOT client lifecycle,
+streaming, or the _run_codex_stream() call path.
+"""
+
+from typing import Any, Dict, List, Optional
+
+from agent.transports.base import ProviderTransport
+from agent.transports.types import NormalizedResponse, ToolCall, Usage
+
+
+class ResponsesApiTransport(ProviderTransport):
+    """Transport for api_mode='codex_responses'.
+
+    Wraps the functions extracted into codex_responses_adapter.py (PR 1).
+    """
+
+    @property
+    def api_mode(self) -> str:
+        return "codex_responses"
+
+    def convert_messages(self, messages: List[Dict[str, Any]], **kwargs) -> Any:
+        """Convert OpenAI chat messages to Responses API input items."""
+        from agent.codex_responses_adapter import _chat_messages_to_responses_input
+        return _chat_messages_to_responses_input(messages)
+
+    def convert_tools(self, tools: List[Dict[str, Any]]) -> Any:
+        """Convert OpenAI tool schemas to Responses API function definitions."""
+        from agent.codex_responses_adapter import _responses_tools
+        return _responses_tools(tools)
+
+    def build_kwargs(
+        self,
+        model: str,
+        messages: List[Dict[str, Any]],
+        tools: Optional[List[Dict[str, Any]]] = None,
+        **params,
+    ) -> Dict[str, Any]:
+        """Build Responses API kwargs.
+
+        Calls convert_messages and convert_tools internally.
+
+        params:
+            instructions: str — system prompt (extracted from messages[0] if not given)
+            reasoning_config: dict | None — {effort, enabled}
+            session_id: str | None — used for prompt_cache_key + xAI conv header
+            max_tokens: int | None — max_output_tokens
+            request_overrides: dict | None — extra kwargs merged in
+            provider: str | None — provider name for backend-specific logic
+            base_url: str | None — endpoint URL
+            base_url_hostname: str | None — hostname for backend detection
+            is_github_responses: bool — Copilot/GitHub models backend
+            is_codex_backend: bool — chatgpt.com/backend-api/codex
+            is_xai_responses: bool — xAI/Grok backend
+            github_reasoning_extra: dict | None — Copilot reasoning params
+        """
+        from agent.codex_responses_adapter import (
+            _chat_messages_to_responses_input,
+            _responses_tools,
+        )
+
+        from run_agent import DEFAULT_AGENT_IDENTITY
+
+        instructions = params.get("instructions", "")
+        payload_messages = messages
+        if not instructions:
+            if messages and messages[0].get("role") == "system":
+                instructions = str(messages[0].get("content") or "").strip()
+                payload_messages = messages[1:]
+        if not instructions:
+            instructions = DEFAULT_AGENT_IDENTITY
+
+        is_github_responses = params.get("is_github_responses", False)
+        is_codex_backend = params.get("is_codex_backend", False)
+        is_xai_responses = params.get("is_xai_responses", False)
+
+        # Resolve reasoning effort
+        reasoning_effort = "medium"
+        reasoning_enabled = True
+        reasoning_config = params.get("reasoning_config")
+        if reasoning_config and isinstance(reasoning_config, dict):
+            if reasoning_config.get("enabled") is False:
+                reasoning_enabled = False
+            elif reasoning_config.get("effort"):
+                reasoning_effort = reasoning_config["effort"]
+
+        _effort_clamp = {"minimal": "low"}
+        reasoning_effort = _effort_clamp.get(reasoning_effort, reasoning_effort)
+
+        kwargs = {
+            "model": model,
+            "instructions": instructions,
+            "input": _chat_messages_to_responses_input(payload_messages),
+            "tools": _responses_tools(tools),
+            "tool_choice": "auto",
+            "parallel_tool_calls": True,
+            "store": False,
+        }
+
+        session_id = params.get("session_id")
+        if not is_github_responses and session_id:
+            kwargs["prompt_cache_key"] = session_id
+
+        if reasoning_enabled and is_xai_responses:
+            kwargs["include"] = ["reasoning.encrypted_content"]
+        elif reasoning_enabled:
+            if is_github_responses:
+                github_reasoning = params.get("github_reasoning_extra")
+                if github_reasoning is not None:
+                    kwargs["reasoning"] = github_reasoning
+            else:
+                kwargs["reasoning"] = {"effort": reasoning_effort, "summary": "auto"}
+                kwargs["include"] = ["reasoning.encrypted_content"]
+        elif not is_github_responses and not is_xai_responses:
+            kwargs["include"] = []
+
+        request_overrides = params.get("request_overrides")
+        if request_overrides:
+            kwargs.update(request_overrides)
+
+        max_tokens = params.get("max_tokens")
+        if max_tokens is not None and not is_codex_backend:
+            kwargs["max_output_tokens"] = max_tokens
+
+        if is_xai_responses and session_id:
+            kwargs["extra_headers"] = {"x-grok-conv-id": session_id}
+
+        return kwargs
+
+    def normalize_response(self, response: Any, **kwargs) -> NormalizedResponse:
+        """Normalize Codex Responses API response to NormalizedResponse."""
+        from agent.codex_responses_adapter import (
+            _normalize_codex_response,
+            _extract_responses_message_text,
+            _extract_responses_reasoning_text,
+        )
+
+        # _normalize_codex_response returns (SimpleNamespace, finish_reason_str)
+        msg, finish_reason = _normalize_codex_response(response)
+
+        tool_calls = None
+        if msg and msg.tool_calls:
+            tool_calls = []
+            for tc in msg.tool_calls:
+                provider_data = {}
+                if hasattr(tc, "call_id") and tc.call_id:
+                    provider_data["call_id"] = tc.call_id
+                if hasattr(tc, "response_item_id") and tc.response_item_id:
+                    provider_data["response_item_id"] = tc.response_item_id
+                tool_calls.append(ToolCall(
+                    id=tc.id if hasattr(tc, "id") else (tc.function.name if hasattr(tc, "function") else None),
+                    name=tc.function.name if hasattr(tc, "function") else getattr(tc, "name", ""),
+                    arguments=tc.function.arguments if hasattr(tc, "function") else getattr(tc, "arguments", "{}"),
+                    provider_data=provider_data or None,
+                ))
+
+        # Extract reasoning items for provider_data
+        provider_data = {}
+        if msg and hasattr(msg, "codex_reasoning_items") and msg.codex_reasoning_items:
+            provider_data["codex_reasoning_items"] = msg.codex_reasoning_items
+        if msg and hasattr(msg, "reasoning_details") and msg.reasoning_details:
+            provider_data["reasoning_details"] = msg.reasoning_details
+
+        return NormalizedResponse(
+            content=msg.content if msg else None,
+            tool_calls=tool_calls,
+            finish_reason=finish_reason or "stop",
+            reasoning=msg.reasoning if msg and hasattr(msg, "reasoning") else None,
+            usage=None,  # Codex usage is extracted separately in normalize_usage()
+            provider_data=provider_data or None,
+        )
+
+    def validate_response(self, response: Any) -> bool:
+        """Check Codex Responses API response has valid output structure.
+
+        Returns True only if response.output is a non-empty list.
+        Does NOT check output_text fallback — the caller handles that
+        with diagnostic logging for stream backfill recovery.
+        """
+        if response is None:
+            return False
+        output = getattr(response, "output", None)
+        if not isinstance(output, list) or not output:
+            return False
+        return True
+
+    def preflight_kwargs(self, api_kwargs: Any, *, allow_stream: bool = False) -> dict:
+        """Validate and sanitize Codex API kwargs before the call.
+
+        Normalizes input items, strips unsupported fields, validates structure.
+        """
+        from agent.codex_responses_adapter import _preflight_codex_api_kwargs
+        return _preflight_codex_api_kwargs(api_kwargs, allow_stream=allow_stream)
+
+    def map_finish_reason(self, raw_reason: str) -> str:
+        """Map Codex response.status to OpenAI finish_reason.
+
+        Codex uses response.status ('completed', 'incomplete') +
+        response.incomplete_details.reason for granular mapping.
+        This method handles the simple status string; the caller
+        should check incomplete_details separately for 'max_output_tokens'.
+        """
+        _MAP = {
+            "completed": "stop",
+            "incomplete": "length",
+            "failed": "stop",
+            "cancelled": "stop",
+        }
+        return _MAP.get(raw_reason, "stop")
+
+
+# Auto-register on import
+from agent.transports import register_transport  # noqa: E402
+
+register_transport("codex_responses", ResponsesApiTransport)
@@ -0,0 +1,100 @@
+"""Shared types for normalized provider responses.
+
+These dataclasses define the canonical shape that all provider adapters
+normalize responses to.  The shared surface is intentionally minimal —
+only fields that every downstream consumer reads are top-level.
+Protocol-specific state goes in ``provider_data`` dicts (response-level
+and per-tool-call) so that protocol-aware code paths can access it
+without polluting the shared type.
+"""
+
+from __future__ import annotations
+
+import json
+from dataclasses import dataclass, field
+from typing import Any, Dict, List, Optional
+
+
+@dataclass
+class ToolCall:
+    """A normalized tool call from any provider.
+
+    ``id`` is the protocol's canonical identifier — what gets used in
+    ``tool_call_id`` / ``tool_use_id`` when constructing tool result
+    messages.  May be ``None`` when the provider omits it; the agent
+    fills it via ``_deterministic_call_id()`` before storing in history.
+
+    ``provider_data`` carries per-tool-call protocol metadata that only
+    protocol-aware code reads:
+
+    * Codex: ``{"call_id": "call_XXX", "response_item_id": "fc_XXX"}``
+    * Gemini: ``{"extra_content": {"google": {"thought_signature": "..."}}}``
+    * Others: ``None``
+    """
+
+    id: Optional[str]
+    name: str
+    arguments: str  # JSON string
+    provider_data: Optional[Dict[str, Any]] = field(default=None, repr=False)
+
+
+@dataclass
+class Usage:
+    """Token usage from an API response."""
+
+    prompt_tokens: int = 0
+    completion_tokens: int = 0
+    total_tokens: int = 0
+    cached_tokens: int = 0
+
+
+@dataclass
+class NormalizedResponse:
+    """Normalized API response from any provider.
+
+    Shared fields are truly cross-provider — every caller can rely on
+    them without branching on api_mode.  Protocol-specific state goes in
+    ``provider_data`` so that only protocol-aware code paths read it.
+
+    Response-level ``provider_data`` examples:
+
+    * Anthropic: ``{"reasoning_details": [...]}``
+    * Codex: ``{"codex_reasoning_items": [...]}``
+    * Others: ``None``
+    """
+
+    content: Optional[str]
+    tool_calls: Optional[List[ToolCall]]
+    finish_reason: str  # "stop", "tool_calls", "length", "content_filter"
+    reasoning: Optional[str] = None
+    usage: Optional[Usage] = None
+    provider_data: Optional[Dict[str, Any]] = field(default=None, repr=False)
+
+
+# ---------------------------------------------------------------------------
+# Factory helpers
+# ---------------------------------------------------------------------------
+
+def build_tool_call(
+    id: Optional[str],
+    name: str,
+    arguments: Any,
+    **provider_fields: Any,
+) -> ToolCall:
+    """Build a ``ToolCall``, auto-serialising *arguments* if it's a dict.
+
+    Any extra keyword arguments are collected into ``provider_data``.
+    """
+    args_str = json.dumps(arguments) if isinstance(arguments, dict) else str(arguments)
+    pd = dict(provider_fields) if provider_fields else None
+    return ToolCall(id=id, name=name, arguments=args_str, provider_data=pd)
+
+
+def map_finish_reason(reason: Optional[str], mapping: Dict[str, str]) -> str:
+    """Translate a provider-specific stop reason to the normalised set.
+
+    Falls back to ``"stop"`` for unknown or ``None`` reasons.
+    """
+    if reason is None:
+        return "stop"
+    return mapping.get(reason, "stop")
@@ -6,6 +6,7 @@ from decimal import Decimal
 from typing import Any, Dict, Literal, Optional

 from agent.model_metadata import fetch_endpoint_model_metadata, fetch_model_metadata
+from utils import base_url_host_matches

 DEFAULT_PRICING = {"input": 0.0, "output": 0.0}

@@ -393,7 +394,7 @@ def resolve_billing_route(

    if provider_name == "openai-codex":
        return BillingRoute(provider="openai-codex", model=model, base_url=base_url or "", billing_mode="subscription_included")
-    if provider_name == "openrouter" or "openrouter.ai" in base:
+    if provider_name == "openrouter" or base_url_host_matches(base_url or "", "openrouter.ai"):
        return BillingRoute(provider="openrouter", model=model, base_url=base_url or "", billing_mode="official_models_api")
    if provider_name == "anthropic":
        return BillingRoute(provider="anthropic", model=model.split("/")[-1], base_url=base_url or "", billing_mode="official_docs_snapshot")
@@ -444,6 +444,7 @@ def _process_batch_worker(args: Tuple) -> Dict[str, Any]:
            if not reasoning.get("has_any_reasoning", True):
                print(f"   🚫 Prompt {prompt_index} discarded (no reasoning in any turn)")
                discarded_no_reasoning += 1
+                completed_in_batch.append(prompt_index)
                continue
            
            # Get and normalize tool stats for consistent schema across all entries
@@ -1189,12 +1190,12 @@ def main(
    """
    # Handle list distributions
    if list_distributions:
-        from toolset_distributions import list_distributions as get_all_dists, print_distribution_info
-        
+        from toolset_distributions import print_distribution_info
+
        print("📊 Available Toolset Distributions")
        print("=" * 70)
-        
-        all_dists = get_all_dists()
+
+        all_dists = list_distributions()
        for dist_name in sorted(all_dists.keys()):
            print_distribution_info(dist_name)
        
@@ -770,10 +770,12 @@ code_execution:
 # Subagent Delegation
 # =============================================================================
 # The delegate_task tool spawns child agents with isolated context.
-# Supports single tasks and batch mode (up to 3 parallel).
+# Supports single tasks and batch mode (default 3 parallel, configurable).
 delegation:
  max_iterations: 50                          # Max tool-calling turns per child (default: 50)
-  default_toolsets: ["terminal", "file", "web"]  # Default toolsets for subagents
+  # max_concurrent_children: 3                # Max parallel child agents (default: 3)
+  # max_spawn_depth: 1                        # Tree depth cap (1-3, default: 1 = flat). Raise to 2 or 3 to allow orchestrator children to spawn their own workers.
+  # orchestrator_enabled: true                # Kill switch for role="orchestrator" children (default: true).
  # model: "google/gemini-3-flash-preview"    # Override model for subagents (empty = inherit parent)
  # provider: "openrouter"                    # Override provider for subagents (empty = inherit parent)
  #                                           # Resolves full credentials (base_url, api_key) automatically.
@@ -917,3 +919,39 @@ display:
 #   # Names and usernames are NOT affected (user-chosen, publicly visible).
 #   # Routing/delivery still uses the original values internally.
 #   redact_pii: false
+
+# =============================================================================
+# Shell-script hooks
+# =============================================================================
+# Register shell scripts as plugin-hook callbacks.  Each entry is executed as
+# a subprocess (shell=False, shlex.split) with a JSON payload on stdin.  On
+# stdout the script may return JSON that either blocks the tool call or
+# injects context into the next LLM call.
+#
+# Valid events (mirror hermes_cli.plugins.VALID_HOOKS):
+#   pre_tool_call, post_tool_call, pre_llm_call, post_llm_call,
+#   pre_api_request, post_api_request, on_session_start, on_session_end,
+#   on_session_finalize, on_session_reset, subagent_stop
+#
+# First-use consent: each (event, command) pair prompts once on a TTY, then
+# is persisted to ~/.hermes/shell-hooks-allowlist.json.  Non-interactive
+# runs (gateway, cron) need --accept-hooks, HERMES_ACCEPT_HOOKS=1, or the
+# hooks_auto_accept key below.
+#
+# See website/docs/user-guide/features/hooks.md for the full JSON wire
+# protocol and worked examples.
+#
+# hooks:
+#   pre_tool_call:
+#     - matcher: "terminal"
+#       command: "~/.hermes/agent-hooks/block-rm-rf.sh"
+#       timeout: 10
+#   post_tool_call:
+#     - matcher: "write_file|patch"
+#       command: "~/.hermes/agent-hooks/auto-format.sh"
+#   pre_llm_call:
+#     - command: "~/.hermes/agent-hooks/inject-cwd-context.sh"
+#   subagent_stop:
+#     - command: "~/.hermes/agent-hooks/log-orchestration.sh"
+#
+# hooks_auto_accept: false
@@ -19,12 +19,14 @@ import shutil
 import sys
 import json
 import re
+import concurrent.futures
 import base64
 import atexit
 import tempfile
 import time
 import uuid
 import textwrap
+from urllib.parse import unquote, urlparse
 from contextlib import contextmanager
 from pathlib import Path
 from datetime import datetime
@@ -65,6 +67,7 @@ from agent.usage_pricing import (
    format_duration_compact,
    format_token_count_compact,
 )
+from agent.account_usage import fetch_account_usage, render_account_usage_lines
 from hermes_cli.banner import _format_context_length, format_banner_version_label

 _COMMAND_SPINNER_FRAMES = ("⠋", "⠙", "⠹", "⠸", "⠼", "⠴", "⠦", "⠧", "⠇", "⠏")
@@ -74,6 +77,7 @@ _COMMAND_SPINNER_FRAMES = ("⠋", "⠙", "⠹", "⠸", "⠼", "⠴", "⠦", "⠧
 # User-managed env files should override stale shell exports on restart.
 from hermes_constants import get_hermes_home, display_hermes_home
 from hermes_cli.env_loader import load_hermes_dotenv
+from utils import base_url_host_matches

 _hermes_home = get_hermes_home()
 _project_env = Path(__file__).parent / '.env'
@@ -367,7 +371,6 @@ def load_cli_config() -> Dict[str, Any]:
        },
        "delegation": {
            "max_iterations": 45,  # Max tool-calling turns per child agent
-            "default_toolsets": ["terminal", "file", "web"],  # Default toolsets for subagents
            "model": "",       # Subagent model override (empty = inherit parent model)
            "provider": "",    # Subagent provider override (empty = inherit parent provider)
            "base_url": "",    # Direct OpenAI-compatible endpoint for subagents
@@ -528,7 +531,6 @@ def load_cli_config() -> Dict[str, Any]:
            if _file_has_terminal_config or env_var not in os.environ:
                val = terminal_config[config_key]
                if isinstance(val, list):
-                    import json
                    os.environ[env_var] = json.dumps(val)
                else:
                    os.environ[env_var] = str(val)
@@ -1143,8 +1145,6 @@ def _rich_text_from_ansi(text: str) -> _RichText:

 def _strip_markdown_syntax(text: str) -> str:
    """Best-effort markdown marker removal for plain-text display."""
-    import re
-
    plain = _rich_text_from_ansi(text or "").plain
    plain = re.sub(r"^\s{0,3}(?:[-*_]\s*){3,}$", "", plain, flags=re.MULTILINE)
    plain = re.sub(r"^\s{0,3}#{1,6}\s+", "", plain, flags=re.MULTILINE)
@@ -1154,11 +1154,11 @@ def _strip_markdown_syntax(text: str) -> str:
    plain = re.sub(r"!\[([^\]]*)\]\([^\)]*\)", r"\1", plain)
    plain = re.sub(r"\[([^\]]+)\]\([^\)]*\)", r"\1", plain)
    plain = re.sub(r"\*\*\*([^*]+)\*\*\*", r"\1", plain)
-    plain = re.sub(r"___([^_]+)___", r"\1", plain)
+    plain = re.sub(r"(?<!\w)___([^_]+)___(?!\w)", r"\1", plain)
    plain = re.sub(r"\*\*([^*]+)\*\*", r"\1", plain)
-    plain = re.sub(r"__([^_]+)__", r"\1", plain)
+    plain = re.sub(r"(?<!\w)__([^_]+)__(?!\w)", r"\1", plain)
    plain = re.sub(r"\*([^*]+)\*", r"\1", plain)
-    plain = re.sub(r"_([^_]+)_", r"\1", plain)
+    plain = re.sub(r"(?<!\w)_([^_]+)_(?!\w)", r"\1", plain)
    plain = re.sub(r"~~([^~]+)~~", r"\1", plain)
    plain = re.sub(r"\n{3,}", "\n\n", plain)
    return plain.strip("\n")
@@ -1271,10 +1271,21 @@ def _resolve_attachment_path(raw_path: str) -> Path | None:

    if (token.startswith('"') and token.endswith('"')) or (token.startswith("'") and token.endswith("'")):
        token = token[1:-1].strip()
+    token = token.replace('\\ ', ' ')
    if not token:
        return None

-    expanded = os.path.expandvars(os.path.expanduser(token))
+    expanded = token
+    if token.startswith("file://"):
+        try:
+            parsed = urlparse(token)
+            if parsed.scheme == "file":
+                expanded = unquote(parsed.path or "")
+                if parsed.netloc and os.name == "nt":
+                    expanded = f"//{parsed.netloc}{expanded}"
+        except Exception:
+            expanded = token
+    expanded = os.path.expandvars(os.path.expanduser(expanded))
    if os.name != "nt":
        normalized = expanded.replace("\\", "/")
        if len(normalized) >= 3 and normalized[1] == ":" and normalized[2] == "/" and normalized[0].isalpha():
@@ -1361,6 +1372,7 @@ def _detect_file_drop(user_input: str) -> "dict | None":
        or stripped.startswith("~")
        or stripped.startswith("./")
        or stripped.startswith("../")
+        or stripped.startswith("file://")
        or (len(stripped) >= 3 and stripped[1] == ":" and stripped[2] in ("\\", "/") and stripped[0].isalpha())
        or stripped.startswith('"/')
        or stripped.startswith('"~')
@@ -1371,8 +1383,25 @@ def _detect_file_drop(user_input: str) -> "dict | None":
    if not starts_like_path:
        return None

+    direct_path = _resolve_attachment_path(stripped)
+    if direct_path is not None:
+        return {
+            "path": direct_path,
+            "is_image": direct_path.suffix.lower() in _IMAGE_EXTENSIONS,
+            "remainder": "",
+        }
+
    first_token, remainder = _split_path_input(stripped)
    drop_path = _resolve_attachment_path(first_token)
+    if drop_path is None and " " in stripped and stripped[0] not in {"'", '"'}:
+        space_positions = [idx for idx, ch in enumerate(stripped) if ch == " "]
+        for pos in reversed(space_positions):
+            candidate = stripped[:pos].rstrip()
+            resolved = _resolve_attachment_path(candidate)
+            if resolved is not None:
+                drop_path = resolved
+                remainder = stripped[pos + 1 :].strip()
+                break
    if drop_path is None:
        return None

@@ -1836,7 +1865,7 @@ class HermesCLI:
        # Match key to resolved base_url: OpenRouter URL → prefer OPENROUTER_API_KEY,
        # custom endpoint → prefer OPENAI_API_KEY (issue #560).
        # Note: _ensure_runtime_credentials() re-resolves this before first use.
-        if self.base_url and "openrouter.ai" in self.base_url:
+        if self.base_url and base_url_host_matches(self.base_url, "openrouter.ai"):
            self.api_key = api_key or os.getenv("OPENROUTER_API_KEY") or os.getenv("OPENAI_API_KEY")
        else:
            self.api_key = api_key or os.getenv("OPENAI_API_KEY") or os.getenv("OPENROUTER_API_KEY")
@@ -2001,8 +2030,7 @@ class HermesCLI:

    def _invalidate(self, min_interval: float = 0.25) -> None:
        """Throttled UI repaint — prevents terminal blinking on slow/SSH connections."""
-        import time as _time
-        now = _time.monotonic()
+        now = time.monotonic()
        if hasattr(self, "_app") and self._app and (now - self._last_invalidate) >= min_interval:
            self._last_invalidate = now
            self._app.invalidate()
@@ -2220,8 +2248,7 @@ class HermesCLI:
            return ""
        t0 = getattr(self, "_tool_start_time", 0) or 0
        if t0 > 0:
-            import time as _time
-            elapsed = _time.monotonic() - t0
+            elapsed = time.monotonic() - t0
            if elapsed >= 60:
                _m, _s = int(elapsed // 60), int(elapsed % 60)
                elapsed_str = f"{_m}m {_s}s"
@@ -2476,9 +2503,6 @@ class HermesCLI:

    def _emit_reasoning_preview(self, reasoning_text: str) -> None:
        """Render a buffered reasoning preview as a single [thinking] block."""
-        import re
-        import textwrap
-
        preview_text = reasoning_text.strip()
        if not preview_text:
            return
@@ -2597,9 +2621,7 @@ class HermesCLI:
        """Expand [Pasted text #N -> file] placeholders into file contents."""
        if not isinstance(text, str) or "[Pasted text #" not in text:
            return text or ""
-        import re as _re
-
-        paste_ref_re = _re.compile(r'\[Pasted text #\d+: \d+ lines \u2192 (.+?)\]')
+        paste_ref_re = re.compile(r'\[Pasted text #\d+: \d+ lines \u2192 (.+?)\]')

        def _expand_ref(match):
            path = Path(match.group(1))
@@ -2922,9 +2944,7 @@ class HermesCLI:

    def _command_spinner_frame(self) -> str:
        """Return the current spinner frame for slow slash commands."""
-        import time as _time
-
-        frame_idx = int(_time.monotonic() * 10) % len(_COMMAND_SPINNER_FRAMES)
+        frame_idx = int(time.monotonic() * 10) % len(_COMMAND_SPINNER_FRAMES)
        return _COMMAND_SPINNER_FRAMES[frame_idx]

    @contextmanager
@@ -3935,7 +3955,6 @@ class HermesCLI:
        image later with ``vision_analyze`` if needed.
        """
        import asyncio as _asyncio
-        import json as _json
        from tools.vision_tools import vision_analyze_tool

        analysis_prompt = (
@@ -3955,7 +3974,7 @@ class HermesCLI:
                result_json = _asyncio.run(
                    vision_analyze_tool(image_url=str(img_path), user_prompt=analysis_prompt)
                )
-                result = _json.loads(result_json)
+                result = json.loads(result_json)
                if result.get("success"):
                    description = result.get("analysis", "")
                    enriched_parts.append(
@@ -4210,8 +4229,37 @@ class HermesCLI:
        """
        import shlex
        from argparse import Namespace
+        from contextlib import redirect_stdout
+        from io import StringIO
        from hermes_cli.tools_config import tools_disable_enable_command

+        def _run_capture(ns: Namespace) -> None:
+            """Run tools_disable_enable_command, routing its ANSI-colored
+            print() output through _cprint when inside the interactive TUI
+            so escapes aren't mangled by patch_stdout's StdoutProxy into
+            garbled '?[32m...?[0m' text.
+
+            Outside the TUI (standalone mode, tests), call straight through
+            so real stdout / pytest capture works as expected.
+            """
+            # Standalone/tests, run as usual
+            if getattr(self, "_app", None) is None:
+                tools_disable_enable_command(ns)
+                return
+
+            # Buffer reports isatty()=True so color() in hermes_cli/colors.py
+            # still emits ANSI escapes. StringIO.isatty() is False, which
+            # would otherwise strip all colors before we re-render them.
+            class _TTYBuf(StringIO):
+                def isatty(self) -> bool:
+                    return True
+
+            buf = _TTYBuf()
+            with redirect_stdout(buf):
+                tools_disable_enable_command(ns)
+            for line in buf.getvalue().splitlines():
+                _cprint(line)
+
        try:
            parts = shlex.split(cmd)
        except ValueError:
@@ -4223,8 +4271,7 @@ class HermesCLI:
            return

        if subcommand == "list":
-            tools_disable_enable_command(
-                Namespace(tools_action="list", platform="cli"))
+            _run_capture(Namespace(tools_action="list", platform="cli"))
            return

        names = parts[2:]
@@ -4241,8 +4288,7 @@ class HermesCLI:
        label = ", ".join(names)
        _cprint(f"{_ACCENT}{verb} {label}...{_RST}")

-        tools_disable_enable_command(
-            Namespace(tools_action=subcommand, names=names, platform="cli"))
+        _run_capture(Namespace(tools_action=subcommand, names=names, platform="cli"))

        # Reset session so the new tool config is picked up from a clean state
        from hermes_cli.tools_config import _get_platform_tools
@@ -4969,7 +5015,7 @@ class HermesCLI:
                pass

        cache_enabled = (
-            ("openrouter" in (result.base_url or "").lower() and "claude" in result.new_model.lower())
+            (base_url_host_matches(result.base_url or "", "openrouter.ai") and "claude" in result.new_model.lower())
            or result.api_mode == "anthropic_messages"
        )
        if cache_enabled:
@@ -5197,7 +5243,7 @@ class HermesCLI:

        # Cache notice
        cache_enabled = (
-            ("openrouter" in (result.base_url or "").lower() and "claude" in result.new_model.lower())
+            (base_url_host_matches(result.base_url or "", "openrouter.ai") and "claude" in result.new_model.lower())
            or result.api_mode == "anthropic_messages"
        )
        if cache_enabled:
@@ -5228,6 +5274,30 @@ class HermesCLI:
        except Exception:
            return False

+    def _should_handle_steer_command_inline(self, text: str, has_images: bool = False) -> bool:
+        """Return True when /steer should be dispatched immediately while the agent is running.
+
+        /steer MUST bypass the normal _pending_input → process_loop path when
+        the agent is active, because process_loop is blocked inside
+        self.chat() for the duration of the run.  By the time the queued
+        command is pulled from _pending_input, _agent_running has already
+        flipped back to False, and process_command() takes the idle
+        fallback — delivering the steer as a next-turn message instead of
+        injecting it mid-run.  Dispatching inline on the UI thread calls
+        agent.steer() directly, which is thread-safe (uses _pending_steer_lock).
+        """
+        if not text or has_images or not _looks_like_slash_command(text):
+            return False
+        if not getattr(self, "_agent_running", False):
+            return False
+        try:
+            from hermes_cli.commands import resolve_command
+            base = text.split(None, 1)[0].lower().lstrip('/')
+            cmd = resolve_command(base)
+            return bool(cmd and cmd.name == "steer")
+        except Exception:
+            return False
+
    def _show_model_and_providers(self):
        """Show current model + provider and list all authenticated providers.

@@ -6230,8 +6300,7 @@ class HermesCLI:
                # with the output (fixes #2718).
                if self._app:
                    self._app.invalidate()
-                    import time as _tmod
-                    _tmod.sleep(0.05)  # brief pause for refresh
+                    time.sleep(0.05)  # brief pause for refresh
                print()
                ChatConsole().print(f"[{_accent_hex()}]{'─' * 40}[/]")
                _cprint(f"  ✅ Background task #{task_num} complete")
@@ -6271,8 +6340,7 @@ class HermesCLI:
                # Same TUI refresh pattern as success path (#2718)
                if self._app:
                    self._app.invalidate()
-                    import time as _tmod
-                    _tmod.sleep(0.05)
+                    time.sleep(0.05)
                print()
                _cprint(f"  ❌ Background task #{task_num} failed: {e}")
            finally:
@@ -6492,7 +6560,6 @@ class HermesCLI:
                _launched = self._try_launch_chrome_debug(_port, _plat.system())
                if _launched:
                    # Wait for the port to come up
-                    import time as _time
                    for _wait in range(10):
                        try:
                            s = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
@@ -6502,7 +6569,7 @@ class HermesCLI:
                            _already_open = True
                            break
                        except (OSError, socket.timeout):
-                            _time.sleep(0.5)
+                            time.sleep(0.5)
                    if _already_open:
                        print(f"   ✓ Chrome launched and listening on port {_port}")
                    else:
@@ -6982,6 +7049,27 @@ class HermesCLI:
        if cost_result.status == "unknown":
            print(f"  Note:             Pricing unknown for {agent.model}")

+        # Account limits -- fetched off-thread with a hard timeout so slow
+        # provider APIs don't hang the prompt.
+        provider = getattr(agent, "provider", None) or getattr(self, "provider", None)
+        base_url = getattr(agent, "base_url", None) or getattr(self, "base_url", None)
+        api_key = getattr(agent, "api_key", None) or getattr(self, "api_key", None)
+        account_snapshot = None
+        if provider:
+            with concurrent.futures.ThreadPoolExecutor(max_workers=1) as _pool:
+                try:
+                    account_snapshot = _pool.submit(
+                        fetch_account_usage, provider,
+                        base_url=base_url, api_key=api_key,
+                    ).result(timeout=10.0)
+                except (concurrent.futures.TimeoutError, Exception):
+                    account_snapshot = None
+        account_lines = [f"  {line}" for line in render_account_usage_lines(account_snapshot)]
+        if account_lines:
+            print()
+            for line in account_lines:
+                print(line)
+
        if self.verbose:
            logging.getLogger().setLevel(logging.DEBUG)
            for noisy in ('openai', 'openai._base_client', 'httpx', 'httpcore', 'asyncio', 'hpack', 'grpc', 'modal'):
@@ -7032,7 +7120,6 @@ class HermesCLI:
        known state.  When a change is detected, triggers _reload_mcp() and
        informs the user so they know the tool list has been refreshed.
        """
-        import time
        import yaml as _yaml

        CONFIG_WATCH_INTERVAL = 5.0  # seconds between config.yaml stat() calls
@@ -7124,7 +7211,6 @@ class HermesCLI:

            # Refresh the agent's tool list so the model can call new tools
            if self.agent is not None:
-                from model_tools import get_tool_definitions
                self.agent.tools = get_tool_definitions(
                    enabled_toolsets=self.agent.enabled_toolsets
                    if hasattr(self.agent, "enabled_toolsets") else None,
@@ -7207,7 +7293,6 @@ class HermesCLI:
        full history of tool calls (not just the current one in the spinner).
        """
        if event_type == "tool.completed":
-            import time as _time
            self._tool_start_time = 0.0
            # Print stacked scrollback line for "all" / "new" modes
            if function_name and self.tool_progress_mode in ("all", "new"):
@@ -7236,7 +7321,6 @@ class HermesCLI:
        if event_type != "tool.started":
            return
        if function_name and not function_name.startswith("_"):
-            import time as _time
            from agent.display import get_tool_emoji
            emoji = get_tool_emoji(function_name)
            label = preview or function_name
@@ -7245,7 +7329,7 @@ class HermesCLI:
            if _pl > 0 and len(label) > _pl:
                label = label[:_pl - 3] + "..."
            self._spinner_text = f"{emoji} {label}"
-            self._tool_start_time = _time.monotonic()
+            self._tool_start_time = time.monotonic()
            # Store args for stacked scrollback line on completion
            self._pending_tool_info.setdefault(function_name, []).append(
                function_args if function_args is not None else {}
@@ -7362,11 +7446,12 @@ class HermesCLI:
            self._voice_stop_and_transcribe()

        # Audio cue: single beep BEFORE starting stream (avoid CoreAudio conflict)
-        try:
-            from tools.voice_mode import play_beep
-            play_beep(frequency=880, count=1)
-        except Exception:
-            pass
+        if self._voice_beeps_enabled():
+            try:
+                from tools.voice_mode import play_beep
+                play_beep(frequency=880, count=1)
+            except Exception:
+                pass

        try:
            self._voice_recorder.start(on_silence_stop=_on_silence)
@@ -7414,11 +7499,12 @@ class HermesCLI:
            wav_path = self._voice_recorder.stop()

            # Audio cue: double beep after stream stopped (no CoreAudio conflict)
-            try:
-                from tools.voice_mode import play_beep
-                play_beep(frequency=660, count=2)
-            except Exception:
-                pass
+            if self._voice_beeps_enabled():
+                try:
+                    from tools.voice_mode import play_beep
+                    play_beep(frequency=660, count=2)
+                except Exception:
+                    pass

            if wav_path is None:
                _cprint(f"{_DIM}No speech detected.{_RST}")
@@ -7501,7 +7587,6 @@ class HermesCLI:
        try:
            from tools.tts_tool import text_to_speech_tool
            from tools.voice_mode import play_audio_file
-            import re

            # Strip markdown and non-speech content for cleaner TTS
            tts_text = text[:4000] if len(text) > 4000 else text
@@ -7569,6 +7654,17 @@ class HermesCLI:
            _cprint(f"Unknown voice subcommand: {subcommand}")
            _cprint("Usage: /voice [on|off|tts|status]")

+    def _voice_beeps_enabled(self) -> bool:
+        """Return whether CLI voice mode should play record start/stop beeps."""
+        try:
+            from hermes_cli.config import load_config
+            voice_cfg = load_config().get("voice", {})
+            if isinstance(voice_cfg, dict):
+                return bool(voice_cfg.get("beep_enabled", True))
+        except Exception:
+            pass
+        return True
+
    def _enable_voice_mode(self):
        """Enable voice mode after checking requirements."""
        if self._voice_mode:
@@ -7878,7 +7974,9 @@ class HermesCLI:
            return

        selected = state.get("selected", 0)
-        choices = state.get("choices") or []
+        choices = state.get("choices")
+        if not isinstance(choices, list):
+            choices = []
        if not (0 <= selected < len(choices)):
            return

@@ -7970,8 +8068,18 @@ class HermesCLI:
        choice_wrapped: list[tuple[int, str]] = []
        for i, choice in enumerate(choices):
            label = choice_labels.get(choice, choice)
-            prefix = '❯ ' if i == selected else '  '
-            for wrapped in _wrap_panel_text(f"{prefix}{label}", inner_text_width, subsequent_indent="  "):
+            # Show number prefix for quick selection (1-9 for items 1-9, 0 for 10th item)
+            if i < 9:
+                num_prefix = str(i + 1)
+            elif i == 9:
+                num_prefix = '0'
+            else:
+                num_prefix = ' '  # No number for items beyond 10th
+            if i == selected:
+                prefix = f'❯ {num_prefix}. '
+            else:
+                prefix = f'  {num_prefix}. '
+            for wrapped in _wrap_panel_text(f"{prefix}{label}", inner_text_width, subsequent_indent="    "):
                choice_wrapped.append((i, wrapped))

        # Budget vertical space so HSplit never clips the command or choices.
@@ -8262,6 +8370,17 @@ class HermesCLI:

            def run_agent():
                nonlocal result
+                # Set callbacks inside the agent thread so thread-local storage
+                # in terminal_tool is populated for this thread.  The main thread
+                # registration (run() line ~9046) is invisible here because
+                # _callback_tls is threading.local().  Matches the pattern used
+                # by acp_adapter/server.py for ACP sessions.
+                set_sudo_password_callback(self._sudo_password_callback)
+                set_approval_callback(self._approval_callback)
+                try:
+                    set_secret_capture_callback(self._secret_capture_callback)
+                except Exception:
+                    pass
                agent_message = _voice_prefix + message if _voice_prefix else message
                # Prepend pending model switch note so the model knows about the switch
                _msn = getattr(self, '_pending_model_switch_note', None)
@@ -8287,6 +8406,15 @@ class HermesCLI:
                        "failed": True,
                        "error": _summary,
                    }
+                finally:
+                    # Clear thread-local callbacks so a reused thread doesn't
+                    # hold stale references to a disposed CLI instance.
+                    try:
+                        set_sudo_password_callback(None)
+                        set_approval_callback(None)
+                        set_secret_capture_callback(None)
+                    except Exception:
+                        pass

            # Start agent in background thread (daemon so it cannot keep the
            # process alive when the user closes the terminal tab — SIGHUP
@@ -8324,8 +8452,7 @@ class HermesCLI:
                            try:
                                _dbg = _hermes_home / "interrupt_debug.log"
                                with open(_dbg, "a") as _f:
-                                    import time as _t
-                                    _f.write(f"{_t.strftime('%H:%M:%S')} interrupt fired: msg={str(interrupt_msg)[:60]!r}, "
+                                    _f.write(f"{time.strftime('%H:%M:%S')} interrupt fired: msg={str(interrupt_msg)[:60]!r}, "
                                             f"children={len(self.agent._active_children)}, "
                                             f"parent._interrupt={self.agent._interrupt_requested}\n")
                                    for _ci, _ch in enumerate(self.agent._active_children):
@@ -8401,9 +8528,8 @@ class HermesCLI:
            # buffer so tool/status lines render ABOVE our response box.
            # The flush pushes data into the renderer queue; the short
            # sleep lets the renderer actually paint it before we draw.
-            import time as _time
            sys.stdout.flush()
-            _time.sleep(0.15)
+            time.sleep(0.15)

            # Update history with full conversation
            self.conversation_history = result.get("messages", self.conversation_history) if result else self.conversation_history
@@ -9040,6 +9166,17 @@ class HermesCLI:
                    event.app.current_buffer.reset(append_to_history=True)
                    return

+                # Handle /steer while the agent is running immediately on the
+                # UI thread.  Queuing through _pending_input would deadlock the
+                # steer until after the agent loop finishes (process_loop is
+                # blocked inside self.chat()), which turns /steer into a
+                # post-run next-turn message — defeating mid-run injection.
+                # agent.steer() is thread-safe (holds _pending_steer_lock).
+                if self._should_handle_steer_command_inline(text, has_images=has_images):
+                    self.process_command(text)
+                    event.app.current_buffer.reset(append_to_history=True)
+                    return
+
                # Snapshot and clear attached images
                images = list(self._attached_images)
                self._attached_images.clear()
@@ -9058,8 +9195,7 @@ class HermesCLI:
                        try:
                            _dbg = _hermes_home / "interrupt_debug.log"
                            with open(_dbg, "a") as _f:
-                                import time as _t
-                                _f.write(f"{_t.strftime('%H:%M:%S')} ENTER: queued interrupt msg={str(payload)[:60]!r}, "
+                                _f.write(f"{time.strftime('%H:%M:%S')} ENTER: queued interrupt msg={str(payload)[:60]!r}, "
                                         f"agent_running={self._agent_running}\n")
                        except Exception:
                            pass
@@ -9138,6 +9274,29 @@ class HermesCLI:
                self._clarify_state["selected"] = min(max_idx, self._clarify_state["selected"] + 1)
                event.app.invalidate()

+        # Number keys for quick clarify selection (1-9, 0 for 10th item)
+        def _make_clarify_number_handler(idx):
+            def handler(event):
+                if self._clarify_state and not self._clarify_freetext:
+                    choices = self._clarify_state.get("choices") or []
+                    # Map index to choice (treating "Other" as the last option)
+                    if idx < len(choices):
+                        # Select a numbered choice
+                        self._clarify_state["response_queue"].put(choices[idx])
+                        self._clarify_state = None
+                        self._clarify_freetext = False
+                        event.app.invalidate()
+                    elif idx == len(choices):
+                        # Select "Other" option
+                        self._clarify_freetext = True
+                        event.app.invalidate()
+            return handler
+
+        for _num in range(10):
+            # 1-9 select items 0-8, 0 selects item 9 (10thitem)
+            _idx = 9 if _num == 0 else _num - 1
+            kb.add(str(_num), filter=Condition(lambda: bool(self._clarify_state) and not self._clarify_freetext))(_make_clarify_number_handler(_idx))
+
        # --- Dangerous command approval: arrow-key navigation ---

        @kb.add('up', filter=Condition(lambda: bool(self._approval_state)))
@@ -9179,6 +9338,20 @@ class HermesCLI:
            event.app.current_buffer.reset()
            event.app.invalidate()

+        # Number keys for quick approval selection (1-9, 0 for 10th item)
+        def _make_approval_number_handler(idx):
+            def handler(event):
+                if self._approval_state and idx < len(self._approval_state["choices"]):
+                    self._approval_state["selected"] = idx
+                    self._handle_approval_selection()
+                    event.app.invalidate()
+            return handler
+
+        for _num in range(10):
+            # 1-9 select items 0-8, 0 selects item 9 (10th item)
+            _idx = 9 if _num == 0 else _num - 1
+            kb.add(str(_num), filter=Condition(lambda: bool(self._approval_state)))(_make_approval_number_handler(_idx))
+
        # --- History navigation: up/down browse history in normal input mode ---
        # The TextArea is multiline, so by default up/down only move the cursor.
        # Buffer.auto_up/auto_down handle both: cursor movement when multi-line,
@@ -9207,8 +9380,7 @@ class HermesCLI:
            2. Interrupt the running agent (first press)
            3. Force exit (second press within 2s, or when idle)
            """
-            import time as _time
-            now = _time.time()
+            now = time.time()

            # Cancel active voice recording.
            # Run cancel() in a background thread to prevent blocking the
@@ -9316,12 +9488,11 @@ class HermesCLI:
        @kb.add('c-z')
        def handle_ctrl_z(event):
            """Handle Ctrl+Z - suspend process to background (Unix only)."""
-            import sys
            if sys.platform == 'win32':
                _cprint(f"\n{_DIM}Suspend (Ctrl+Z) is not supported on Windows.{_RST}")
                event.app.invalidate()
                return
-            import os, signal as _sig
+            import signal as _sig
            from prompt_toolkit.application import run_in_terminal
            from hermes_cli.skin_engine import get_active_skin
            agent_name = get_active_skin().get_branding("agent_name", "Hermes Agent")
@@ -9635,31 +9806,29 @@ class HermesCLI:
        # extra instructions (sudo countdown, approval navigation, clarify).
        # The agent-running interrupt hint is now an inline placeholder above.
        def get_hint_text():
-            import time as _time
-
            if cli_ref._sudo_state:
-                remaining = max(0, int(cli_ref._sudo_deadline - _time.monotonic()))
+                remaining = max(0, int(cli_ref._sudo_deadline - time.monotonic()))
                return [
                    ('class:hint', '  password hidden · Enter to skip'),
                    ('class:clarify-countdown', f'  ({remaining}s)'),
                ]

            if cli_ref._secret_state:
-                remaining = max(0, int(cli_ref._secret_deadline - _time.monotonic()))
+                remaining = max(0, int(cli_ref._secret_deadline - time.monotonic()))
                return [
                    ('class:hint', '  secret hidden · Enter to skip'),
                    ('class:clarify-countdown', f'  ({remaining}s)'),
                ]

            if cli_ref._approval_state:
-                remaining = max(0, int(cli_ref._approval_deadline - _time.monotonic()))
+                remaining = max(0, int(cli_ref._approval_deadline - time.monotonic()))
                return [
                    ('class:hint', '  ↑/↓ to select, Enter to confirm'),
                    ('class:clarify-countdown', f'  ({remaining}s)'),
                ]

            if cli_ref._clarify_state:
-                remaining = max(0, int(cli_ref._clarify_deadline - _time.monotonic()))
+                remaining = max(0, int(cli_ref._clarify_deadline - time.monotonic()))
                countdown = f'  ({remaining}s)' if cli_ref._clarify_deadline else ''
                if cli_ref._clarify_freetext:
                    return [
@@ -9751,14 +9920,32 @@ class HermesCLI:
            selected = state.get("selected", 0)
            preview_lines = _wrap_panel_text(question, 60)
            for i, choice in enumerate(choices):
-                prefix = "❯ " if i == selected and not cli_ref._clarify_freetext else "  "
-                preview_lines.extend(_wrap_panel_text(f"{prefix}{choice}", 60, subsequent_indent="  "))
+                # Show number prefix for quick selection (1-9 for items 1-9, 0 for 10th item)
+                if i < 9:
+                    num_prefix = str(i + 1)
+                elif i == 9:
+                    num_prefix = '0'
+                else:
+                    num_prefix = ' '
+                if i == selected and not cli_ref._clarify_freetext:
+                    prefix = f"❯ {num_prefix}. "
+                else:
+                    prefix = f"  {num_prefix}. "
+                preview_lines.extend(_wrap_panel_text(f"{prefix}{choice}", 60, subsequent_indent="    "))
+            # "Other" option in preview
+            other_num = len(choices) + 1
+            if other_num < 10:
+                other_num_prefix = str(other_num)
+            elif other_num == 10:
+                other_num_prefix = '0'
+            else:
+                other_num_prefix = ' '
            other_label = (
-                "❯ Other (type below)" if cli_ref._clarify_freetext
-                else "❯ Other (type your answer)" if selected == len(choices)
-                else "  Other (type your answer)"
+                f"❯ {other_num_prefix}. Other (type below)" if cli_ref._clarify_freetext
+                else f"❯ {other_num_prefix}. Other (type your answer)" if selected == len(choices)
+                else f"  {other_num_prefix}. Other (type your answer)"
            )
-            preview_lines.extend(_wrap_panel_text(other_label, 60, subsequent_indent="  "))
+            preview_lines.extend(_wrap_panel_text(other_label, 60, subsequent_indent="    "))
            box_width = _panel_box_width("Hermes needs your input", preview_lines)
            inner_text_width = max(8, box_width - 2)

@@ -9766,18 +9953,35 @@ class HermesCLI:
            choice_wrapped: list[tuple[int, str]] = []
            if choices:
                for i, choice in enumerate(choices):
-                    prefix = '❯ ' if i == selected and not cli_ref._clarify_freetext else '  '
-                    for wrapped in _wrap_panel_text(f"{prefix}{choice}", inner_text_width, subsequent_indent="  "):
+                    # Show number prefix for quick selection (1-9 for items 1-9, 0 for 10th item)
+                    if i < 9:
+                        num_prefix = str(i + 1)
+                    elif i == 9:
+                        num_prefix = '0'
+                    else:
+                        num_prefix = ' '
+                    if i == selected and not cli_ref._clarify_freetext:
+                        prefix = f'❯ {num_prefix}. '
+                    else:
+                        prefix = f'  {num_prefix}. '
+                    for wrapped in _wrap_panel_text(f"{prefix}{choice}", inner_text_width, subsequent_indent="    "):
                        choice_wrapped.append((i, wrapped))
                # Trailing Other row(s)
                other_idx = len(choices)
-                if selected == other_idx and not cli_ref._clarify_freetext:
-                    other_label_mand = '❯ Other (type your answer)'
-                elif cli_ref._clarify_freetext:
-                    other_label_mand = '❯ Other (type below)'
+                other_num = other_idx + 1
+                if other_num < 10:
+                    other_num_prefix = str(other_num)
+                elif other_num == 10:
+                    other_num_prefix = '0'
                else:
-                    other_label_mand = '  Other (type your answer)'
-                other_wrapped = _wrap_panel_text(other_label_mand, inner_text_width, subsequent_indent="  ")
+                    other_num_prefix = ' '
+                if selected == other_idx and not cli_ref._clarify_freetext:
+                    other_label_mand = f'❯ {other_num_prefix}. Other (type your answer)'
+                elif cli_ref._clarify_freetext:
+                    other_label_mand = f'❯ {other_num_prefix}. Other (type below)'
+                else:
+                    other_label_mand = f'  {other_num_prefix}. Other (type your answer)'
+                other_wrapped = _wrap_panel_text(other_label_mand, inner_text_width, subsequent_indent="    ")
            elif cli_ref._clarify_freetext:
                # Freetext-only mode: the guidance line takes the place of choices.
                other_wrapped = _wrap_panel_text(
@@ -9842,6 +10046,15 @@ class HermesCLI:

                # "Other" option (trailing row(s), only shown when choices exist)
                other_idx = len(choices)
+                # Calculate number prefix for "Other" option
+                other_num = other_idx + 1
+                if other_num < 10:
+                    other_num_prefix = str(other_num)
+                elif other_num == 10:
+                    other_num_prefix = '0'
+                else:
+                    other_num_prefix = ' '
+                
                if selected == other_idx and not cli_ref._clarify_freetext:
                    other_style = 'class:clarify-selected'
                elif cli_ref._clarify_freetext:
@@ -9949,7 +10162,8 @@ class HermesCLI:
            if stage == "provider":
                title = "⚙ Model Picker — Select Provider"
                choices = []
-                for p in state.get("providers") or []:
+                _providers = state.get("providers")
+                for p in _providers if isinstance(_providers, list) else []:
                    count = p.get("total_models", len(p.get("models", [])))
                    label = f"{p['name']} ({count} model{'s' if count != 1 else ''})"
                    if p.get("is_current"):
@@ -10206,22 +10420,20 @@ class HermesCLI:
        app._on_resize = _resize_clear_ghosts

        def spinner_loop():
-            import time as _time
-
            last_idle_refresh = 0.0
            while not self._should_exit:
                if not self._app:
-                    _time.sleep(0.1)
+                    time.sleep(0.1)
                    continue
                if self._command_running:
                    self._invalidate(min_interval=0.1)
-                    _time.sleep(0.1)
+                    time.sleep(0.1)
                else:
-                    now = _time.monotonic()
+                    now = time.monotonic()
                    if now - last_idle_refresh >= 1.0:
                        last_idle_refresh = now
                        self._invalidate(min_interval=1.0)
-                    _time.sleep(0.2)
+                    time.sleep(0.2)

        spinner_thread = threading.Thread(target=spinner_loop, daemon=True)
        spinner_thread.start()
@@ -10290,8 +10502,7 @@ class HermesCLI:
                        continue
                    
                    # Expand paste references back to full content
-                    import re as _re
-                    _paste_ref_re = _re.compile(r'\[Pasted text #\d+: \d+ lines \u2192 (.+?)\]')
+                    _paste_ref_re = re.compile(r'\[Pasted text #\d+: \d+ lines \u2192 (.+?)\]')
                    paste_refs = list(_paste_ref_re.finditer(user_input)) if isinstance(user_input, str) else []
                    if paste_refs:
                        user_input = self._expand_paste_references(user_input)
@@ -10383,13 +10594,12 @@ class HermesCLI:
            try:
                if getattr(self, "agent", None) and getattr(self, "_agent_running", False):
                    self.agent.interrupt(f"received signal {signum}")
-                    import time as _t
                    try:
                        _grace = float(os.getenv("HERMES_SIGTERM_GRACE", "1.5"))
                    except (TypeError, ValueError):
                        _grace = 1.5
                    if _grace > 0:
-                        _t.sleep(_grace)
+                        time.sleep(_grace)
            except Exception:
                pass  # never block signal handling
            raise KeyboardInterrupt()
@@ -10422,8 +10632,7 @@ class HermesCLI:
        # uv-managed Python, fd 0 can be invalid or unregisterable with the
        # asyncio selector, causing "KeyError: '0 is not registered'" (#6393).
        try:
-            import os as _os
-            _os.fstat(0)
+            os.fstat(0)
        except OSError:
            print(
                "Error: stdin (fd 0) is not available.\n"
@@ -10716,13 +10925,12 @@ def main(
            _agent = getattr(cli, "agent", None)
            if _agent is not None:
                _agent.interrupt(f"received signal {signum}")
-                import time as _t
                try:
                    _grace = float(os.getenv("HERMES_SIGTERM_GRACE", "1.5"))
                except (TypeError, ValueError):
                    _grace = 1.5
                if _grace > 0:
-                    _t.sleep(_grace)
+                    time.sleep(_grace)
        except Exception:
            pass  # never block signal handling
        raise KeyboardInterrupt()
@@ -9,6 +9,7 @@ import copy
 import json
 import logging
 import tempfile
+import threading
 import os
 import re
 import uuid
@@ -34,6 +35,11 @@ except ImportError:
 HERMES_DIR = get_hermes_home().resolve()
 CRON_DIR = HERMES_DIR / "cron"
 JOBS_FILE = CRON_DIR / "jobs.json"
+
+# In-process lock protecting load_jobs→modify→save_jobs cycles.
+# Required when tick() runs jobs in parallel threads — without this,
+# concurrent mark_job_run / advance_next_run calls can clobber each other.
+_jobs_file_lock = threading.Lock()
 OUTPUT_DIR = CRON_DIR / "output"
 ONESHOT_GRACE_SECONDS = 120

@@ -594,43 +600,44 @@ def mark_job_run(job_id: str, success: bool, error: Optional[str] = None,
    ``delivery_error`` is tracked separately from the agent error — a job
    can succeed (agent produced output) but fail delivery (platform down).
    """
-    jobs = load_jobs()
-    for i, job in enumerate(jobs):
-        if job["id"] == job_id:
-            now = _hermes_now().isoformat()
-            job["last_run_at"] = now
-            job["last_status"] = "ok" if success else "error"
-            job["last_error"] = error if not success else None
-            # Track delivery failures separately — cleared on successful delivery
-            job["last_delivery_error"] = delivery_error
-            
-            # Increment completed count
-            if job.get("repeat"):
-                job["repeat"]["completed"] = job["repeat"].get("completed", 0) + 1
+    with _jobs_file_lock:
+        jobs = load_jobs()
+        for i, job in enumerate(jobs):
+            if job["id"] == job_id:
+                now = _hermes_now().isoformat()
+                job["last_run_at"] = now
+                job["last_status"] = "ok" if success else "error"
+                job["last_error"] = error if not success else None
+                # Track delivery failures separately — cleared on successful delivery
+                job["last_delivery_error"] = delivery_error
                
-                # Check if we've hit the repeat limit
-                times = job["repeat"].get("times")
-                completed = job["repeat"]["completed"]
-                if times is not None and times > 0 and completed >= times:
-                    # Remove the job (limit reached)
-                    jobs.pop(i)
-                    save_jobs(jobs)
-                    return
-            
-            # Compute next run
-            job["next_run_at"] = compute_next_run(job["schedule"], now)
+                # Increment completed count
+                if job.get("repeat"):
+                    job["repeat"]["completed"] = job["repeat"].get("completed", 0) + 1
+                    
+                    # Check if we've hit the repeat limit
+                    times = job["repeat"].get("times")
+                    completed = job["repeat"]["completed"]
+                    if times is not None and times > 0 and completed >= times:
+                        # Remove the job (limit reached)
+                        jobs.pop(i)
+                        save_jobs(jobs)
+                        return
+                
+                # Compute next run
+                job["next_run_at"] = compute_next_run(job["schedule"], now)

-            # If no next run (one-shot completed), disable
-            if job["next_run_at"] is None:
-                job["enabled"] = False
-                job["state"] = "completed"
-            elif job.get("state") != "paused":
-                job["state"] = "scheduled"
+                # If no next run (one-shot completed), disable
+                if job["next_run_at"] is None:
+                    job["enabled"] = False
+                    job["state"] = "completed"
+                elif job.get("state") != "paused":
+                    job["state"] = "scheduled"

-            save_jobs(jobs)
-            return
+                save_jobs(jobs)
+                return

-    logger.warning("mark_job_run: job_id %s not found, skipping save", job_id)
+        logger.warning("mark_job_run: job_id %s not found, skipping save", job_id)


 def advance_next_run(job_id: str) -> bool:
@@ -645,20 +652,21 @@ def advance_next_run(job_id: str) -> bool:

    Returns True if next_run_at was advanced, False otherwise.
    """
-    jobs = load_jobs()
-    for job in jobs:
-        if job["id"] == job_id:
-            kind = job.get("schedule", {}).get("kind")
-            if kind not in ("cron", "interval"):
+    with _jobs_file_lock:
+        jobs = load_jobs()
+        for job in jobs:
+            if job["id"] == job_id:
+                kind = job.get("schedule", {}).get("kind")
+                if kind not in ("cron", "interval"):
+                    return False
+                now = _hermes_now().isoformat()
+                new_next = compute_next_run(job["schedule"], now)
+                if new_next and new_next != job.get("next_run_at"):
+                    job["next_run_at"] = new_next
+                    save_jobs(jobs)
+                    return True
                return False
-            now = _hermes_now().isoformat()
-            new_next = compute_next_run(job["schedule"], now)
-            if new_next and new_next != job.get("next_run_at"):
-                job["next_run_at"] = new_next
-                save_jobs(jobs)
-                return True
-            return False
-    return False
+        return False


 def get_due_jobs() -> List[Dict[str, Any]]:
@@ -252,7 +252,11 @@ def _send_media_via_adapter(adapter, chat_id: str, media_files: list, metadata:
                coro = adapter.send_document(chat_id=chat_id, file_path=media_path, metadata=metadata)

            future = asyncio.run_coroutine_threadsafe(coro, loop)
-            result = future.result(timeout=30)
+            try:
+                result = future.result(timeout=30)
+            except TimeoutError:
+                future.cancel()
+                raise
            if result and not getattr(result, "success", True):
                logger.warning(
                    "Job '%s': media send failed for %s: %s",
@@ -382,7 +386,11 @@ def _deliver_result(job: dict, content: str, adapters=None, loop=None) -> Option
                        runtime_adapter.send(chat_id, text_to_send, metadata=send_metadata),
                        loop,
                    )
-                    send_result = future.result(timeout=60)
+                    try:
+                        send_result = future.result(timeout=60)
+                    except TimeoutError:
+                        future.cancel()
+                        raise
                    if send_result and not getattr(send_result, "success", True):
                        err = getattr(send_result, "error", "unknown")
                        logger.warning(
@@ -422,7 +430,6 @@ def _deliver_result(job: dict, content: str, adapters=None, loop=None) -> Option
                # prevent "coroutine was never awaited" RuntimeWarning, then retry in a
                # fresh thread that has no running loop.
                coro.close()
-                import concurrent.futures
                with concurrent.futures.ThreadPoolExecutor(max_workers=1) as pool:
                    future = pool.submit(asyncio.run, _send_to_platform(platform, pconfig, chat_id, cleaned_delivery_content, thread_id=thread_id, media_files=media_files))
                    result = future.result(timeout=30)
@@ -747,14 +754,17 @@ def run_job(job: dict) -> tuple[bool, str, str, Optional[str]]:
    # scheduler process — every job this process runs is a cron job.
    os.environ["HERMES_CRON_SESSION"] = "1"

+    # Use ContextVars for per-job session/delivery state so parallel jobs
+    # don't clobber each other's targets (os.environ is process-global).
+    from gateway.session_context import set_session_vars, clear_session_vars, _VAR_MAP
+
+    _ctx_tokens = set_session_vars(
+        platform=origin["platform"] if origin else "",
+        chat_id=str(origin["chat_id"]) if origin else "",
+        chat_name=origin.get("chat_name", "") if origin else "",
+    )
+
    try:
-        # Inject origin context so the agent's send_message tool knows the chat.
-        # Must be INSIDE the try block so the finally cleanup always runs.
-        if origin:
-            os.environ["HERMES_SESSION_PLATFORM"] = origin["platform"]
-            os.environ["HERMES_SESSION_CHAT_ID"] = str(origin["chat_id"])
-            if origin.get("chat_name"):
-                os.environ["HERMES_SESSION_CHAT_NAME"] = origin["chat_name"]
        # Re-read .env and config.yaml fresh every run so provider/key
        # changes take effect without a gateway restart.
        from dotenv import load_dotenv
@@ -765,10 +775,10 @@ def run_job(job: dict) -> tuple[bool, str, str, Optional[str]]:

        delivery_target = _resolve_delivery_target(job)
        if delivery_target:
-            os.environ["HERMES_CRON_AUTO_DELIVER_PLATFORM"] = delivery_target["platform"]
-            os.environ["HERMES_CRON_AUTO_DELIVER_CHAT_ID"] = str(delivery_target["chat_id"])
+            _VAR_MAP["HERMES_CRON_AUTO_DELIVER_PLATFORM"].set(delivery_target["platform"])
+            _VAR_MAP["HERMES_CRON_AUTO_DELIVER_CHAT_ID"].set(str(delivery_target["chat_id"]))
            if delivery_target.get("thread_id") is not None:
-                os.environ["HERMES_CRON_AUTO_DELIVER_THREAD_ID"] = str(delivery_target["thread_id"])
+                _VAR_MAP["HERMES_CRON_AUTO_DELIVER_THREAD_ID"].set(str(delivery_target["thread_id"]))

        model = job.get("model") or os.getenv("HERMES_MODEL") or ""

@@ -807,14 +817,13 @@ def run_job(job: dict) -> tuple[bool, str, str, Optional[str]]:
        prefill_messages = None
        prefill_file = os.getenv("HERMES_PREFILL_MESSAGES_FILE", "") or _cfg.get("prefill_messages_file", "")
        if prefill_file:
-            import json as _json
            pfpath = Path(prefill_file).expanduser()
            if not pfpath.is_absolute():
                pfpath = _hermes_home / pfpath
            if pfpath.exists():
                try:
                    with open(pfpath, "r", encoding="utf-8") as _pf:
-                        prefill_messages = _json.load(_pf)
+                        prefill_messages = json.load(_pf)
                    if not isinstance(prefill_messages, list):
                        prefill_messages = None
                except Exception as e:
@@ -1012,16 +1021,8 @@ def run_job(job: dict) -> tuple[bool, str, str, Optional[str]]:
        return False, output, "", error_msg

    finally:
-        # Clean up injected env vars so they don't leak to other jobs
-        for key in (
-            "HERMES_SESSION_PLATFORM",
-            "HERMES_SESSION_CHAT_ID",
-            "HERMES_SESSION_CHAT_NAME",
-            "HERMES_CRON_AUTO_DELIVER_PLATFORM",
-            "HERMES_CRON_AUTO_DELIVER_CHAT_ID",
-            "HERMES_CRON_AUTO_DELIVER_THREAD_ID",
-        ):
-            os.environ.pop(key, None)
+        # Clean up ContextVar session/delivery state for this job.
+        clear_session_vars(_ctx_tokens)
        if _session_db:
            try:
                _session_db.end_session(_cron_session_id, "cron_complete")
@@ -1074,15 +1075,41 @@ def tick(verbose: bool = True, adapters=None, loop=None) -> int:
        if verbose:
            logger.info("%s - %s job(s) due", _hermes_now().strftime('%H:%M:%S'), len(due_jobs))

-        executed = 0
+        # Advance next_run_at for all recurring jobs FIRST, under the file lock,
+        # before any execution begins.  This preserves at-most-once semantics.
        for job in due_jobs:
-            try:
-                # For recurring jobs (cron/interval), advance next_run_at to the
-                # next future occurrence BEFORE execution.  This way, if the
-                # process crashes mid-run, the job won't re-fire on restart.
-                # One-shot jobs are left alone so they can retry on restart.
-                advance_next_run(job["id"])
+            advance_next_run(job["id"])

+        # Resolve max parallel workers: env var > config.yaml > unbounded.
+        # Set HERMES_CRON_MAX_PARALLEL=1 to restore old serial behaviour.
+        _max_workers: Optional[int] = None
+        try:
+            _env_par = os.getenv("HERMES_CRON_MAX_PARALLEL", "").strip()
+            if _env_par:
+                _max_workers = int(_env_par) or None
+        except (ValueError, TypeError):
+            logger.warning("Invalid HERMES_CRON_MAX_PARALLEL value; defaulting to unbounded")
+        if _max_workers is None:
+            try:
+                _ucfg = load_config() or {}
+                _cfg_par = (
+                    _ucfg.get("cron", {}) if isinstance(_ucfg, dict) else {}
+                ).get("max_parallel_jobs")
+                if _cfg_par is not None:
+                    _max_workers = int(_cfg_par) or None
+            except Exception:
+                pass
+
+        if verbose:
+            logger.info(
+                "Running %d job(s) in parallel (max_workers=%s)",
+                len(due_jobs),
+                _max_workers if _max_workers else "unbounded",
+            )
+
+        def _process_job(job: dict) -> bool:
+            """Run one due job end-to-end: execute, save, deliver, mark."""
+            try:
                success, output, final_response, error = run_job(job)

                output_file = save_job_output(job["id"], output)
@@ -1114,13 +1141,23 @@ def tick(verbose: bool = True, adapters=None, loop=None) -> int:
                    error = "Agent completed but produced empty response (model error, timeout, or misconfiguration)"

                mark_job_run(job["id"], success, error, delivery_error=delivery_error)
-                executed += 1
+                return True

            except Exception as e:
                logger.error("Error processing job %s: %s", job['id'], e)
                mark_job_run(job["id"], False, str(e))
+                return False

-        return executed
+        # Run all due jobs concurrently, each in its own ContextVar copy
+        # so session/delivery state stays isolated per-thread.
+        with concurrent.futures.ThreadPoolExecutor(max_workers=_max_workers) as _tick_pool:
+            _futures = []
+            for job in due_jobs:
+                _ctx = contextvars.copy_context()
+                _futures.append(_tick_pool.submit(_ctx.run, _process_job, job))
+            _results = [f.result() for f in _futures]
+
+        return sum(_results)
    finally:
        if fcntl:
            fcntl.flock(lock_fd, fcntl.LOCK_UN)
@@ -53,7 +53,6 @@ def _run_tool_in_thread(tool_name: str, arguments: Dict[str, Any], task_id: str)
    try:
        loop = asyncio.get_running_loop()
        # We're in an async context -- need to run in thread
-        import concurrent.futures
        with concurrent.futures.ThreadPoolExecutor(max_workers=1) as pool:
            future = pool.submit(
                handle_function_call, tool_name, arguments, task_id
@@ -576,6 +576,14 @@ def load_gateway_config() -> GatewayConfig:
                    bridged["free_response_channels"] = platform_cfg["free_response_channels"]
                if "mention_patterns" in platform_cfg:
                    bridged["mention_patterns"] = platform_cfg["mention_patterns"]
+                if "dm_policy" in platform_cfg:
+                    bridged["dm_policy"] = platform_cfg["dm_policy"]
+                if "allow_from" in platform_cfg:
+                    bridged["allow_from"] = platform_cfg["allow_from"]
+                if "group_policy" in platform_cfg:
+                    bridged["group_policy"] = platform_cfg["group_policy"]
+                if "group_allow_from" in platform_cfg:
+                    bridged["group_allow_from"] = platform_cfg["group_allow_from"]
                if plat == Platform.DISCORD and "channel_skill_bindings" in platform_cfg:
                    bridged["channel_skill_bindings"] = platform_cfg["channel_skill_bindings"]
                if "channel_prompts" in platform_cfg:
@@ -662,8 +670,7 @@ def load_gateway_config() -> GatewayConfig:
                if "require_mention" in telegram_cfg and not os.getenv("TELEGRAM_REQUIRE_MENTION"):
                    os.environ["TELEGRAM_REQUIRE_MENTION"] = str(telegram_cfg["require_mention"]).lower()
                if "mention_patterns" in telegram_cfg and not os.getenv("TELEGRAM_MENTION_PATTERNS"):
-                    import json as _json
-                    os.environ["TELEGRAM_MENTION_PATTERNS"] = _json.dumps(telegram_cfg["mention_patterns"])
+                    os.environ["TELEGRAM_MENTION_PATTERNS"] = json.dumps(telegram_cfg["mention_patterns"])
                frc = telegram_cfg.get("free_response_chats")
                if frc is not None and not os.getenv("TELEGRAM_FREE_RESPONSE_CHATS"):
                    if isinstance(frc, list):
@@ -700,6 +707,20 @@ def load_gateway_config() -> GatewayConfig:
                    if isinstance(frc, list):
                        frc = ",".join(str(v) for v in frc)
                    os.environ["WHATSAPP_FREE_RESPONSE_CHATS"] = str(frc)
+                if "dm_policy" in whatsapp_cfg and not os.getenv("WHATSAPP_DM_POLICY"):
+                    os.environ["WHATSAPP_DM_POLICY"] = str(whatsapp_cfg["dm_policy"]).lower()
+                af = whatsapp_cfg.get("allow_from")
+                if af is not None and not os.getenv("WHATSAPP_ALLOWED_USERS"):
+                    if isinstance(af, list):
+                        af = ",".join(str(v) for v in af)
+                    os.environ["WHATSAPP_ALLOWED_USERS"] = str(af)
+                if "group_policy" in whatsapp_cfg and not os.getenv("WHATSAPP_GROUP_POLICY"):
+                    os.environ["WHATSAPP_GROUP_POLICY"] = str(whatsapp_cfg["group_policy"]).lower()
+                gaf = whatsapp_cfg.get("group_allow_from")
+                if gaf is not None and not os.getenv("WHATSAPP_GROUP_ALLOWED_USERS"):
+                    if isinstance(gaf, list):
+                        gaf = ",".join(str(v) for v in gaf)
+                    os.environ["WHATSAPP_GROUP_ALLOWED_USERS"] = str(gaf)

            # DingTalk settings → env vars (env vars take precedence)
            dingtalk_cfg = yaml_cfg.get("dingtalk", {})
@@ -1237,7 +1258,6 @@ def _apply_env_overrides(config: GatewayConfig) -> None:
            if legacy_home:
                qq_home = legacy_home
                qq_home_name_env = "QQ_HOME_CHANNEL_NAME"
-                import logging
                logging.getLogger(__name__).warning(
                    "QQ_HOME_CHANNEL is deprecated; rename to QQBOT_HOME_CHANNEL "
                    "in your .env for consistency with the platform key."
@@ -117,6 +117,160 @@ def _normalize_chat_content(
        return ""


+# Content part type aliases used by the OpenAI Chat Completions and Responses
+# APIs.  We accept both spellings on input and emit a single canonical internal
+# shape (``{"type": "text", ...}`` / ``{"type": "image_url", ...}``) that the
+# rest of the agent pipeline already understands.
+_TEXT_PART_TYPES = frozenset({"text", "input_text", "output_text"})
+_IMAGE_PART_TYPES = frozenset({"image_url", "input_image"})
+_FILE_PART_TYPES = frozenset({"file", "input_file"})
+
+
+def _normalize_multimodal_content(content: Any) -> Any:
+    """Validate and normalize multimodal content for the API server.
+
+    Returns a plain string when the content is text-only, or a list of
+    ``{"type": "text"|"image_url", ...}`` parts when images are present.
+    The output shape is the native OpenAI Chat Completions vision format,
+    which the agent pipeline accepts verbatim (OpenAI-wire providers) or
+    converts (``_preprocess_anthropic_content`` for Anthropic).
+
+    Raises ``ValueError`` with an OpenAI-style code on invalid input:
+      * ``unsupported_content_type`` — file/input_file/file_id parts, or
+        non-image ``data:`` URLs.
+      * ``invalid_image_url`` — missing URL or unsupported scheme.
+      * ``invalid_content_part`` — malformed text/image objects.
+
+    Callers translate the ValueError into a 400 response.
+    """
+    # Scalar passthrough mirrors ``_normalize_chat_content``.
+    if content is None:
+        return ""
+    if isinstance(content, str):
+        return content[:MAX_NORMALIZED_TEXT_LENGTH] if len(content) > MAX_NORMALIZED_TEXT_LENGTH else content
+    if not isinstance(content, list):
+        # Mirror the legacy text-normalizer's fallback so callers that
+        # pre-existed image support still get a string back.
+        return _normalize_chat_content(content)
+
+    items = content[:MAX_CONTENT_LIST_SIZE] if len(content) > MAX_CONTENT_LIST_SIZE else content
+    normalized_parts: List[Dict[str, Any]] = []
+    text_accum_len = 0
+
+    for part in items:
+        if isinstance(part, str):
+            if part:
+                trimmed = part[:MAX_NORMALIZED_TEXT_LENGTH]
+                normalized_parts.append({"type": "text", "text": trimmed})
+                text_accum_len += len(trimmed)
+            continue
+
+        if not isinstance(part, dict):
+            # Ignore unknown scalars for forward compatibility with future
+            # Responses API additions (e.g. ``refusal``).  The same policy
+            # the text normalizer applies.
+            continue
+
+        raw_type = part.get("type")
+        part_type = str(raw_type or "").strip().lower()
+
+        if part_type in _TEXT_PART_TYPES:
+            text = part.get("text")
+            if text is None:
+                continue
+            if not isinstance(text, str):
+                text = str(text)
+            if text:
+                trimmed = text[:MAX_NORMALIZED_TEXT_LENGTH]
+                normalized_parts.append({"type": "text", "text": trimmed})
+                text_accum_len += len(trimmed)
+            continue
+
+        if part_type in _IMAGE_PART_TYPES:
+            detail = part.get("detail")
+            image_ref = part.get("image_url")
+            # OpenAI Responses sends ``input_image`` with a top-level
+            # ``image_url`` string; Chat Completions sends ``image_url`` as
+            # ``{"url": "...", "detail": "..."}``.  Support both.
+            if isinstance(image_ref, dict):
+                url_value = image_ref.get("url")
+                detail = image_ref.get("detail", detail)
+            else:
+                url_value = image_ref
+            if not isinstance(url_value, str) or not url_value.strip():
+                raise ValueError("invalid_image_url:Image parts must include a non-empty image URL.")
+            url_value = url_value.strip()
+            lowered = url_value.lower()
+            if lowered.startswith("data:"):
+                if not lowered.startswith("data:image/") or "," not in url_value:
+                    raise ValueError(
+                        "unsupported_content_type:Only image data URLs are supported. "
+                        "Non-image data payloads are not supported."
+                    )
+            elif not (lowered.startswith("http://") or lowered.startswith("https://")):
+                raise ValueError(
+                    "invalid_image_url:Image inputs must use http(s) URLs or data:image/... URLs."
+                )
+            image_part: Dict[str, Any] = {"type": "image_url", "image_url": {"url": url_value}}
+            if detail is not None:
+                if not isinstance(detail, str) or not detail.strip():
+                    raise ValueError("invalid_content_part:Image detail must be a non-empty string when provided.")
+                image_part["image_url"]["detail"] = detail.strip()
+            normalized_parts.append(image_part)
+            continue
+
+        if part_type in _FILE_PART_TYPES:
+            raise ValueError(
+                "unsupported_content_type:Inline image inputs are supported, "
+                "but uploaded files and document inputs are not supported on this endpoint."
+            )
+
+        # Unknown part type — reject explicitly so clients get a clear error
+        # instead of a silently dropped turn.
+        raise ValueError(
+            f"unsupported_content_type:Unsupported content part type {raw_type!r}. "
+            "Only text and image_url/input_image parts are supported."
+        )
+
+    if not normalized_parts:
+        return ""
+
+    # Text-only: collapse to a plain string so downstream logging/trajectory
+    # code sees the native shape and prompt caching on text-only turns is
+    # unaffected.
+    if all(p.get("type") == "text" for p in normalized_parts):
+        return "\n".join(p["text"] for p in normalized_parts if p.get("text"))
+
+    return normalized_parts
+
+
+def _content_has_visible_payload(content: Any) -> bool:
+    """True when content has any text or image attachment.  Used to reject empty turns."""
+    if isinstance(content, str):
+        return bool(content.strip())
+    if isinstance(content, list):
+        for part in content:
+            if isinstance(part, dict):
+                ptype = str(part.get("type") or "").strip().lower()
+                if ptype in _TEXT_PART_TYPES and str(part.get("text") or "").strip():
+                    return True
+                if ptype in _IMAGE_PART_TYPES:
+                    return True
+    return False
+
+
+def _multimodal_validation_error(exc: ValueError, *, param: str) -> "web.Response":
+    """Translate a ``_normalize_multimodal_content`` ValueError into a 400 response."""
+    raw = str(exc)
+    code, _, message = raw.partition(":")
+    if not message:
+        code, message = "invalid_content_part", raw
+    return web.json_response(
+        _openai_error(message, code=code, param=param),
+        status=400,
+    )
+
+
 def check_api_server_requirements() -> bool:
    """Check if API server dependencies are available."""
    return AIOHTTP_AVAILABLE
@@ -169,7 +323,6 @@ class ResponseStore:
        ).fetchone()
        if row is None:
            return None
-        import time
        self._conn.execute(
            "UPDATE responses SET accessed_at = ? WHERE response_id = ?",
            (time.time(), response_id),
@@ -179,7 +332,6 @@ class ResponseStore:

    def put(self, response_id: str, data: Dict[str, Any]) -> None:
        """Store a response, evicting the oldest if at capacity."""
-        import time
        self._conn.execute(
            "INSERT OR REPLACE INTO responses (response_id, data, accessed_at) VALUES (?, ?, ?)",
            (response_id, json.dumps(data, default=str), time.time()),
@@ -315,12 +467,12 @@ class _IdempotencyCache:
    def __init__(self, max_items: int = 1000, ttl_seconds: int = 300):
        from collections import OrderedDict
        self._store = OrderedDict()
+        self._inflight: Dict[tuple[str, str], "asyncio.Task[Any]"] = {}
        self._ttl = ttl_seconds
        self._max = max_items

    def _purge(self):
-        import time as _t
-        now = _t.time()
+        now = time.time()
        expired = [k for k, v in self._store.items() if now - v["ts"] > self._ttl]
        for k in expired:
            self._store.pop(k, None)
@@ -332,11 +484,27 @@ class _IdempotencyCache:
        item = self._store.get(key)
        if item and item["fp"] == fingerprint:
            return item["resp"]
-        resp = await compute_coro()
-        import time as _t
-        self._store[key] = {"resp": resp, "fp": fingerprint, "ts": _t.time()}
-        self._purge()
-        return resp
+
+        inflight_key = (key, fingerprint)
+        task = self._inflight.get(inflight_key)
+        if task is None:
+            async def _compute_and_store():
+                resp = await compute_coro()
+                import time as _t
+                self._store[key] = {"resp": resp, "fp": fingerprint, "ts": _t.time()}
+                self._purge()
+                return resp
+
+            task = asyncio.create_task(_compute_and_store())
+            self._inflight[inflight_key] = task
+
+            def _clear_inflight(done_task: "asyncio.Task[Any]") -> None:
+                if self._inflight.get(inflight_key) is done_task:
+                    self._inflight.pop(inflight_key, None)
+
+            task.add_done_callback(_clear_inflight)
+
+        return await asyncio.shield(task)


 _idem_cache = _IdempotencyCache()
@@ -366,6 +534,30 @@ def _derive_chat_session_id(
    return f"api-{digest}"


+_CRON_AVAILABLE = False
+try:
+    from cron.jobs import (
+        list_jobs as _cron_list,
+        get_job as _cron_get,
+        create_job as _cron_create,
+        update_job as _cron_update,
+        remove_job as _cron_remove,
+        pause_job as _cron_pause,
+        resume_job as _cron_resume,
+        trigger_job as _cron_trigger,
+    )
+    _CRON_AVAILABLE = True
+except ImportError:
+    _cron_list = None
+    _cron_get = None
+    _cron_create = None
+    _cron_update = None
+    _cron_remove = None
+    _cron_pause = None
+    _cron_resume = None
+    _cron_trigger = None
+
+
 class APIServerAdapter(BasePlatformAdapter):
    """
    OpenAI-compatible HTTP API server adapter.
@@ -637,26 +829,32 @@ class APIServerAdapter(BasePlatformAdapter):
        system_prompt = None
        conversation_messages: List[Dict[str, str]] = []

-        for msg in messages:
+        for idx, msg in enumerate(messages):
            role = msg.get("role", "")
-            content = _normalize_chat_content(msg.get("content", ""))
+            raw_content = msg.get("content", "")
            if role == "system":
-                # Accumulate system messages
+                # System messages don't support images (Anthropic rejects, OpenAI
+                # text-model systems don't render them).  Flatten to text.
+                content = _normalize_chat_content(raw_content)
                if system_prompt is None:
                    system_prompt = content
                else:
                    system_prompt = system_prompt + "\n" + content
            elif role in ("user", "assistant"):
+                try:
+                    content = _normalize_multimodal_content(raw_content)
+                except ValueError as exc:
+                    return _multimodal_validation_error(exc, param=f"messages[{idx}].content")
                conversation_messages.append({"role": role, "content": content})

        # Extract the last user message as the primary input
-        user_message = ""
+        user_message: Any = ""
        history = []
        if conversation_messages:
            user_message = conversation_messages[-1].get("content", "")
            history = conversation_messages[:-1]

-        if not user_message:
+        if not _content_has_visible_payload(user_message):
            return web.json_response(
                {"error": {"message": "No user message found in messages", "type": "invalid_request_error"}},
                status=400,
@@ -1424,16 +1622,19 @@ class APIServerAdapter(BasePlatformAdapter):
            # No error if conversation doesn't exist yet — it's a new conversation

        # Normalize input to message list
-        input_messages: List[Dict[str, str]] = []
+        input_messages: List[Dict[str, Any]] = []
        if isinstance(raw_input, str):
            input_messages = [{"role": "user", "content": raw_input}]
        elif isinstance(raw_input, list):
-            for item in raw_input:
+            for idx, item in enumerate(raw_input):
                if isinstance(item, str):
                    input_messages.append({"role": "user", "content": item})
                elif isinstance(item, dict):
                    role = item.get("role", "user")
-                    content = _normalize_chat_content(item.get("content", ""))
+                    try:
+                        content = _normalize_multimodal_content(item.get("content", ""))
+                    except ValueError as exc:
+                        return _multimodal_validation_error(exc, param=f"input[{idx}].content")
                    input_messages.append({"role": role, "content": content})
        else:
            return web.json_response(_openai_error("'input' must be a string or array"), status=400)
@@ -1442,7 +1643,7 @@ class APIServerAdapter(BasePlatformAdapter):
        # This lets stateless clients supply their own history instead of
        # relying on server-side response chaining via previous_response_id.
        # Precedence: explicit conversation_history > previous_response_id.
-        conversation_history: List[Dict[str, str]] = []
+        conversation_history: List[Dict[str, Any]] = []
        raw_history = body.get("conversation_history")
        if raw_history:
            if not isinstance(raw_history, list):
@@ -1456,7 +1657,11 @@ class APIServerAdapter(BasePlatformAdapter):
                        _openai_error(f"conversation_history[{i}] must have 'role' and 'content' fields"),
                        status=400,
                    )
-                conversation_history.append({"role": str(entry["role"]), "content": str(entry["content"])})
+                try:
+                    entry_content = _normalize_multimodal_content(entry["content"])
+                except ValueError as exc:
+                    return _multimodal_validation_error(exc, param=f"conversation_history[{i}].content")
+                conversation_history.append({"role": str(entry["role"]), "content": entry_content})
            if previous_response_id:
                logger.debug("Both conversation_history and previous_response_id provided; using conversation_history")

@@ -1476,8 +1681,8 @@ class APIServerAdapter(BasePlatformAdapter):
            conversation_history.append(msg)

        # Last input message is the user_message
-        user_message = input_messages[-1].get("content", "") if input_messages else ""
-        if not user_message:
+        user_message: Any = input_messages[-1].get("content", "") if input_messages else ""
+        if not _content_has_visible_payload(user_message):
            return web.json_response(_openai_error("No user message found in input"), status=400)

        # Truncation support
@@ -1682,44 +1887,16 @@ class APIServerAdapter(BasePlatformAdapter):
    # Cron jobs API
    # ------------------------------------------------------------------

-    # Check cron module availability once (not per-request)
-    _CRON_AVAILABLE = False
-    try:
-        from cron.jobs import (
-            list_jobs as _cron_list,
-            get_job as _cron_get,
-            create_job as _cron_create,
-            update_job as _cron_update,
-            remove_job as _cron_remove,
-            pause_job as _cron_pause,
-            resume_job as _cron_resume,
-            trigger_job as _cron_trigger,
-        )
-        # Wrap as staticmethod to prevent descriptor binding — these are plain
-        # module functions, not instance methods.  Without this, self._cron_*()
-        # injects ``self`` as the first positional argument and every call
-        # raises TypeError.
-        _cron_list = staticmethod(_cron_list)
-        _cron_get = staticmethod(_cron_get)
-        _cron_create = staticmethod(_cron_create)
-        _cron_update = staticmethod(_cron_update)
-        _cron_remove = staticmethod(_cron_remove)
-        _cron_pause = staticmethod(_cron_pause)
-        _cron_resume = staticmethod(_cron_resume)
-        _cron_trigger = staticmethod(_cron_trigger)
-        _CRON_AVAILABLE = True
-    except ImportError:
-        pass
-
    _JOB_ID_RE = __import__("re").compile(r"[a-f0-9]{12}")
    # Allowed fields for update — prevents clients injecting arbitrary keys
    _UPDATE_ALLOWED_FIELDS = {"name", "schedule", "prompt", "deliver", "skills", "skill", "repeat", "enabled"}
    _MAX_NAME_LENGTH = 200
    _MAX_PROMPT_LENGTH = 5000

-    def _check_jobs_available(self) -> Optional["web.Response"]:
+    @staticmethod
+    def _check_jobs_available() -> Optional["web.Response"]:
        """Return error response if cron module isn't available."""
-        if not self._CRON_AVAILABLE:
+        if not _CRON_AVAILABLE:
            return web.json_response(
                {"error": "Cron module not available"}, status=501,
            )
@@ -1744,7 +1921,7 @@ class APIServerAdapter(BasePlatformAdapter):
            return cron_err
        try:
            include_disabled = request.query.get("include_disabled", "").lower() in ("true", "1")
-            jobs = self._cron_list(include_disabled=include_disabled)
+            jobs = _cron_list(include_disabled=include_disabled)
            return web.json_response({"jobs": jobs})
        except Exception as e:
            return web.json_response({"error": str(e)}, status=500)
@@ -1792,7 +1969,7 @@ class APIServerAdapter(BasePlatformAdapter):
            if repeat is not None:
                kwargs["repeat"] = repeat

-            job = self._cron_create(**kwargs)
+            job = _cron_create(**kwargs)
            return web.json_response({"job": job})
        except Exception as e:
            return web.json_response({"error": str(e)}, status=500)
@@ -1809,7 +1986,7 @@ class APIServerAdapter(BasePlatformAdapter):
        if id_err:
            return id_err
        try:
-            job = self._cron_get(job_id)
+            job = _cron_get(job_id)
            if not job:
                return web.json_response({"error": "Job not found"}, status=404)
            return web.json_response({"job": job})
@@ -1842,7 +2019,7 @@ class APIServerAdapter(BasePlatformAdapter):
                return web.json_response(
                    {"error": f"Prompt must be ≤ {self._MAX_PROMPT_LENGTH} characters"}, status=400,
                )
-            job = self._cron_update(job_id, sanitized)
+            job = _cron_update(job_id, sanitized)
            if not job:
                return web.json_response({"error": "Job not found"}, status=404)
            return web.json_response({"job": job})
@@ -1861,7 +2038,7 @@ class APIServerAdapter(BasePlatformAdapter):
        if id_err:
            return id_err
        try:
-            success = self._cron_remove(job_id)
+            success = _cron_remove(job_id)
            if not success:
                return web.json_response({"error": "Job not found"}, status=404)
            return web.json_response({"ok": True})
@@ -1880,7 +2057,7 @@ class APIServerAdapter(BasePlatformAdapter):
        if id_err:
            return id_err
        try:
-            job = self._cron_pause(job_id)
+            job = _cron_pause(job_id)
            if not job:
                return web.json_response({"error": "Job not found"}, status=404)
            return web.json_response({"job": job})
@@ -1899,7 +2076,7 @@ class APIServerAdapter(BasePlatformAdapter):
        if id_err:
            return id_err
        try:
-            job = self._cron_resume(job_id)
+            job = _cron_resume(job_id)
            if not job:
                return web.json_response({"error": "Job not found"}, status=404)
            return web.json_response({"job": job})
@@ -1918,7 +2095,7 @@ class APIServerAdapter(BasePlatformAdapter):
        if id_err:
            return id_err
        try:
-            job = self._cron_trigger(job_id)
+            job = _cron_trigger(job_id)
            if not job:
                return web.json_response({"error": "Job not found"}, status=404)
            return web.json_response({"job": job})
@@ -19,6 +19,8 @@ import uuid
 from abc import ABC, abstractmethod
 from urllib.parse import urlsplit

+from utils import normalize_proxy_url
+
 logger = logging.getLogger(__name__)


@@ -159,13 +161,13 @@ def resolve_proxy_url(platform_env_var: str | None = None) -> str | None:
    if platform_env_var:
        value = (os.environ.get(platform_env_var) or "").strip()
        if value:
-            return value
+            return normalize_proxy_url(value)
    for key in ("HTTPS_PROXY", "HTTP_PROXY", "ALL_PROXY",
                "https_proxy", "http_proxy", "all_proxy"):
        value = (os.environ.get(key) or "").strip()
        if value:
-            return value
-    return _detect_macos_system_proxy()
+            return normalize_proxy_url(value)
+    return normalize_proxy_url(_detect_macos_system_proxy())


 def proxy_kwargs_for_bot(proxy_url: str | None) -> dict:
@@ -391,12 +393,9 @@ async def cache_image_from_url(url: str, ext: str = ".jpg", retries: int = 2) ->
    if not is_safe_url(url):
        raise ValueError(f"Blocked unsafe URL (SSRF protection): {safe_url_for_log(url)}")

-    import asyncio
    import httpx
-    import logging as _logging
-    _log = _logging.getLogger(__name__)
+    _log = logging.getLogger(__name__)

-    last_exc = None
    async with httpx.AsyncClient(
        timeout=30.0,
        follow_redirects=True,
@@ -414,7 +413,6 @@ async def cache_image_from_url(url: str, ext: str = ".jpg", retries: int = 2) ->
                response.raise_for_status()
                return cache_image_from_bytes(response.content, ext)
            except (httpx.TimeoutException, httpx.HTTPStatusError) as exc:
-                last_exc = exc
                if isinstance(exc, httpx.HTTPStatusError) and exc.response.status_code < 429:
                    raise
                if attempt < retries:
@@ -430,7 +428,6 @@ async def cache_image_from_url(url: str, ext: str = ".jpg", retries: int = 2) ->
                    await asyncio.sleep(wait)
                    continue
                raise
-    raise last_exc


 def cleanup_image_cache(max_age_hours: int = 24) -> int:
@@ -510,12 +507,9 @@ async def cache_audio_from_url(url: str, ext: str = ".ogg", retries: int = 2) ->
    if not is_safe_url(url):
        raise ValueError(f"Blocked unsafe URL (SSRF protection): {safe_url_for_log(url)}")

-    import asyncio
    import httpx
-    import logging as _logging
-    _log = _logging.getLogger(__name__)
+    _log = logging.getLogger(__name__)

-    last_exc = None
    async with httpx.AsyncClient(
        timeout=30.0,
        follow_redirects=True,
@@ -533,7 +527,6 @@ async def cache_audio_from_url(url: str, ext: str = ".ogg", retries: int = 2) ->
                response.raise_for_status()
                return cache_audio_from_bytes(response.content, ext)
            except (httpx.TimeoutException, httpx.HTTPStatusError) as exc:
-                last_exc = exc
                if isinstance(exc, httpx.HTTPStatusError) and exc.response.status_code < 429:
                    raise
                if attempt < retries:
@@ -549,7 +542,39 @@ async def cache_audio_from_url(url: str, ext: str = ".ogg", retries: int = 2) ->
                    await asyncio.sleep(wait)
                    continue
                raise
-    raise last_exc
+
+
+# ---------------------------------------------------------------------------
+# Video cache utilities
+#
+# Same pattern as image/audio cache -- videos from platforms are downloaded
+# here so the agent can reference them by local file path.
+# ---------------------------------------------------------------------------
+
+VIDEO_CACHE_DIR = get_hermes_dir("cache/videos", "video_cache")
+
+SUPPORTED_VIDEO_TYPES = {
+    ".mp4": "video/mp4",
+    ".mov": "video/quicktime",
+    ".webm": "video/webm",
+    ".mkv": "video/x-matroska",
+    ".avi": "video/x-msvideo",
+}
+
+
+def get_video_cache_dir() -> Path:
+    """Return the video cache directory, creating it if it doesn't exist."""
+    VIDEO_CACHE_DIR.mkdir(parents=True, exist_ok=True)
+    return VIDEO_CACHE_DIR
+
+
+def cache_video_from_bytes(data: bytes, ext: str = ".mp4") -> str:
+    """Save raw video bytes to the cache and return the absolute file path."""
+    cache_dir = get_video_cache_dir()
+    filename = f"video_{uuid.uuid4().hex[:12]}{ext}"
+    filepath = cache_dir / filename
+    filepath.write_bytes(data)
+    return str(filepath)


 # ---------------------------------------------------------------------------
@@ -1318,7 +1343,7 @@ class BasePlatformAdapter(ABC):
        # Extract MEDIA:<path> tags, allowing optional whitespace after the colon
        # and quoted/backticked paths for LLM-formatted outputs.
        media_pattern = re.compile(
-            r'''[`"']?MEDIA:\s*(?P<path>`[^`\n]+`|"[^"\n]+"|'[^'\n]+'|(?:~/|/)\S+(?:[^\S\n]+\S+)*?\.(?:png|jpe?g|gif|webp|mp4|mov|avi|mkv|webm|ogg|opus|mp3|wav|m4a)(?=[\s`"',;:)\]}]|$)|\S+)[`"']?'''
+            r'''[`"']?MEDIA:\s*(?P<path>`[^`\n]+`|"[^"\n]+"|'[^'\n]+'|(?:~/|/)\S+(?:[^\S\n]+\S+)*?\.(?:png|jpe?g|gif|webp|mp4|mov|avi|mkv|webm|ogg|opus|mp3|wav|m4a|pdf)(?=[\s`"',;:)\]}]|$)|\S+)[`"']?'''
        )
        for match in media_pattern.finditer(content):
            path = match.group("path").strip()
@@ -1754,8 +1779,6 @@ class BasePlatformAdapter(ABC):
          HERMES_HUMAN_DELAY_MIN_MS: minimum delay in ms (default 800, custom mode)
          HERMES_HUMAN_DELAY_MAX_MS: maximum delay in ms (default 2500, custom mode)
        """
-        import random
-
        mode = os.getenv("HERMES_HUMAN_DELAY_MODE", "off").lower()
        if mode == "off":
            return 0.0
@@ -75,7 +75,7 @@ def _redact(text: str) -> str:
 def check_bluebubbles_requirements() -> bool:
    try:
        import aiohttp  # noqa: F401
-        import httpx as _httpx  # noqa: F401
+        import httpx  # noqa: F401
    except ImportError:
        return False
    return True
@@ -541,7 +541,6 @@ class DiscordAdapter(BasePlatformAdapter):
            # ctypes.util.find_library fails on macOS with Homebrew-installed libs,
            # so fall back to known Homebrew paths if needed.
            if not opus_path:
-                import sys
                _homebrew_paths = (
                    "/opt/homebrew/lib/libopus.dylib",  # Apple Silicon
                    "/usr/local/lib/libopus.dylib",     # Intel Mac
@@ -1422,8 +1421,7 @@ class DiscordAdapter(BasePlatformAdapter):
        speaking_user_ids: set = set()
        receiver = self._voice_receivers.get(guild_id)
        if receiver:
-            import time as _time
-            now = _time.monotonic()
+            now = time.monotonic()
            with receiver._lock:
                for ssrc, last_t in receiver._last_packet_time.items():
                    # Consider "speaking" if audio received within last 2 seconds
@@ -2962,6 +2960,17 @@ class DiscordAdapter(BasePlatformAdapter):
            parent_channel_id = self._get_parent_channel_id(message.channel)

        is_voice_linked_channel = False
+
+        # Save mention-stripped text before auto-threading since create_thread()
+        # can clobber message.content, breaking /command detection in channels.
+        raw_content = message.content.strip()
+        normalized_content = raw_content
+        mention_prefix = False
+        if self._client.user and self._client.user in message.mentions:
+            mention_prefix = True
+            normalized_content = normalized_content.replace(f"<@{self._client.user.id}>", "").strip()
+            normalized_content = normalized_content.replace(f"<@!{self._client.user.id}>", "").strip()
+            message.content = normalized_content
        if not isinstance(message.channel, discord.DMChannel):
            channel_ids = {str(message.channel.id)}
            if parent_channel_id:
@@ -2999,13 +3008,8 @@ class DiscordAdapter(BasePlatformAdapter):
            in_bot_thread = is_thread and thread_id in self._threads

            if require_mention and not is_free_channel and not in_bot_thread:
-                if self._client.user not in message.mentions:
+                if self._client.user not in message.mentions and not mention_prefix:
                    return
-
-            if self._client.user and self._client.user in message.mentions:
-                message.content = message.content.replace(f"<@{self._client.user.id}>", "").strip()
-                message.content = message.content.replace(f"<@!{self._client.user.id}>", "").strip()
-
        # Auto-thread: when enabled, automatically create a thread for every
        # @mention in a text channel so each conversation is isolated (like Slack).
        # Messages already inside threads or DMs are unaffected.
@@ -3027,7 +3031,7 @@ class DiscordAdapter(BasePlatformAdapter):

        # Determine message type
        msg_type = MessageType.TEXT
-        if message.content.startswith("/"):
+        if normalized_content.startswith("/"):
            msg_type = MessageType.COMMAND
        elif message.attachments:
            # Check attachment types
@@ -3167,7 +3171,9 @@ class DiscordAdapter(BasePlatformAdapter):
                                att.filename, e, exc_info=True,
                            )

-        event_text = message.content
+        # Use normalized_content (saved before auto-threading) instead of message.content,
+        # to detect /slash commands in channel messages.
+        event_text = normalized_content
        if pending_text_injection:
            event_text = f"{pending_text_injection}\n\n{event_text}" if event_text else pending_text_injection

@@ -410,7 +410,6 @@ class MattermostAdapter(BasePlatformAdapter):
            logger.warning("Mattermost: blocked unsafe URL (SSRF protection)")
            return await self.send(chat_id, f"{caption or ''}\n{url}".strip(), reply_to)

-        import asyncio
        import aiohttp

        last_exc = None
@@ -1086,11 +1086,8 @@ class QQAdapter(BasePlatformAdapter):
            return MessageType.VIDEO
        if "image" in first_type or "photo" in first_type:
            return MessageType.PHOTO
-        # Unknown content type with an attachment — don't assume PHOTO
-        # to prevent non-image files from being sent to vision analysis.
        logger.debug(
-            "[%s] Unknown media content_type '%s', defaulting to TEXT",
-            self._log_tag,
+            "Unknown media content_type '%s', defaulting to TEXT",
            first_type,
        )
        return MessageType.TEXT
@@ -1826,14 +1823,12 @@ class QQAdapter(BasePlatformAdapter):
            body["file_name"] = file_name

        # Retry transient upload failures
-        last_exc = None
        for attempt in range(3):
            try:
                return await self._api_request(
                    "POST", path, body, timeout=FILE_UPLOAD_TIMEOUT
                )
            except RuntimeError as exc:
-                last_exc = exc
                err_msg = str(exc)
                if any(
                        kw in err_msg
@@ -1842,8 +1837,8 @@ class QQAdapter(BasePlatformAdapter):
                    raise
                if attempt < 2:
                    await asyncio.sleep(1.5 * (attempt + 1))
-
-        raise last_exc  # type: ignore[misc]
+                else:
+                    raise

    # Maximum time (seconds) to wait for reconnection before giving up on send.
    _RECONNECT_WAIT_SECONDS = 15.0
@@ -1600,11 +1600,9 @@ class SlackAdapter(BasePlatformAdapter):

    async def _download_slack_file(self, url: str, ext: str, audio: bool = False, team_id: str = "") -> str:
        """Download a Slack file using the bot token for auth, with retry."""
-        import asyncio
        import httpx

        bot_token = self._team_clients[team_id].token if team_id and team_id in self._team_clients else self.config.token
-        last_exc = None

        async with httpx.AsyncClient(timeout=30.0, follow_redirects=True) as client:
            for attempt in range(3):
@@ -1634,7 +1632,6 @@ class SlackAdapter(BasePlatformAdapter):
                        from gateway.platforms.base import cache_image_from_bytes
                        return cache_image_from_bytes(response.content, ext)
                except (httpx.TimeoutException, httpx.HTTPStatusError) as exc:
-                    last_exc = exc
                    if isinstance(exc, httpx.HTTPStatusError) and exc.response.status_code < 429:
                        raise
                    if attempt < 2:
@@ -1643,15 +1640,12 @@ class SlackAdapter(BasePlatformAdapter):
                        await asyncio.sleep(1.5 * (attempt + 1))
                        continue
                    raise
-        raise last_exc

    async def _download_slack_file_bytes(self, url: str, team_id: str = "") -> bytes:
        """Download a Slack file and return raw bytes, with retry."""
-        import asyncio
        import httpx

        bot_token = self._team_clients[team_id].token if team_id and team_id in self._team_clients else self.config.token
-        last_exc = None

        async with httpx.AsyncClient(timeout=30.0, follow_redirects=True) as client:
            for attempt in range(3):
@@ -1663,7 +1657,6 @@ class SlackAdapter(BasePlatformAdapter):
                    response.raise_for_status()
                    return response.content
                except (httpx.TimeoutException, httpx.HTTPStatusError) as exc:
-                    last_exc = exc
                    if isinstance(exc, httpx.HTTPStatusError) and exc.response.status_code < 429:
                        raise
                    if attempt < 2:
@@ -1672,7 +1665,6 @@ class SlackAdapter(BasePlatformAdapter):
                        await asyncio.sleep(1.5 * (attempt + 1))
                        continue
                    raise
-        raise last_exc

    # ── Channel mention gating ─────────────────────────────────────────────

@@ -71,8 +71,10 @@ from gateway.platforms.base import (
    SendResult,
    cache_image_from_bytes,
    cache_audio_from_bytes,
+    cache_video_from_bytes,
    cache_document_from_bytes,
    resolve_proxy_url,
+    SUPPORTED_VIDEO_TYPES,
    SUPPORTED_DOCUMENT_TYPES,
    utf16_len,
    _prefix_within_utf16_limit,
@@ -494,6 +496,13 @@ class TelegramAdapter(BasePlatformAdapter):
                    "[%s] DM topic '%s' already exists in chat %s (will be mapped from incoming messages)",
                    self.name, name, chat_id,
                )
+            elif "not a forum" in error_text or "forums_disabled" in error_text:
+                logger.warning(
+                    "[%s] Cannot create DM topic '%s' in chat %s: Topics mode is not enabled. "
+                    "The user must open the DM with this bot in Telegram, tap the bot name "
+                    "at the top, and enable 'Topics' in chat settings before topics can be created.",
+                    self.name, name, chat_id,
+                )
            else:
                logger.warning(
                    "[%s] Failed to create DM topic '%s' in chat %s: %s",
@@ -785,8 +794,28 @@ class TelegramAdapter(BasePlatformAdapter):
                # Telegram pushes updates to our HTTP endpoint.  This
                # enables cloud platforms (Fly.io, Railway) to auto-wake
                # suspended machines on inbound HTTP traffic.
+                #
+                # SECURITY: TELEGRAM_WEBHOOK_SECRET is REQUIRED. Without it,
+                # python-telegram-bot passes secret_token=None and the
+                # webhook endpoint accepts any HTTP POST — attackers can
+                # inject forged updates as if from Telegram. Refuse to
+                # start rather than silently run in fail-open mode.
+                # See GHSA-3vpc-7q5r-276h.
                webhook_port = int(os.getenv("TELEGRAM_WEBHOOK_PORT", "8443"))
-                webhook_secret = os.getenv("TELEGRAM_WEBHOOK_SECRET", "").strip() or None
+                webhook_secret = os.getenv("TELEGRAM_WEBHOOK_SECRET", "").strip()
+                if not webhook_secret:
+                    raise RuntimeError(
+                        "TELEGRAM_WEBHOOK_SECRET is required when "
+                        "TELEGRAM_WEBHOOK_URL is set. Without it, the "
+                        "webhook endpoint accepts forged updates from "
+                        "anyone who can reach it — see "
+                        "https://github.com/NousResearch/hermes-agent/"
+                        "security/advisories/GHSA-3vpc-7q5r-276h.\n\n"
+                        "Generate a secret and set it in your .env:\n"
+                        "  export TELEGRAM_WEBHOOK_SECRET=\"$(openssl rand -hex 32)\"\n\n"
+                        "Then register it with Telegram when setting the "
+                        "webhook via setWebhook's secret_token parameter."
+                    )
                from urllib.parse import urlparse
                webhook_path = urlparse(webhook_url).path or "/telegram"

@@ -1704,7 +1733,6 @@ class TelegramAdapter(BasePlatformAdapter):
            return SendResult(success=False, error="Not connected")
        
        try:
-            import os
            if not os.path.exists(audio_path):
                return SendResult(success=False, error=self._missing_media_path_error("Audio", audio_path))
            
@@ -1753,7 +1781,6 @@ class TelegramAdapter(BasePlatformAdapter):
            return SendResult(success=False, error="Not connected")

        try:
-            import os
            if not os.path.exists(image_path):
                return SendResult(success=False, error=self._missing_media_path_error("Image", image_path))

@@ -2066,7 +2093,7 @@ class TelegramAdapter(BasePlatformAdapter):
            url = m.group(2).replace('\\', '\\\\').replace(')', '\\)')
            return _ph(f'[{display}]({url})')

-        text = re.sub(r'\[([^\]]+)\]\(([^)]+)\)', _convert_link, text)
+        text = re.sub(r'\[([^\]]+)\]\(([^()]*(?:\([^()]*\)[^()]*)*)\)', _convert_link, text)

        # 4) Convert markdown headers (## Title) → bold *Title*
        def _convert_header(m):
@@ -2326,10 +2353,16 @@ class TelegramAdapter(BasePlatformAdapter):
        DMs remain unrestricted. Group/supergroup messages are accepted when:
        - the chat is explicitly allowlisted in ``free_response_chats``
        - ``require_mention`` is disabled
-        - the message is a command
        - the message replies to the bot
        - the bot is @mentioned
        - the text/caption matches a configured regex wake-word pattern
+
+        When ``require_mention`` is enabled, slash commands are not given
+        special treatment — they must pass the same mention/reply checks
+        as any other group message.  Users can still trigger commands via
+        the Telegram bot menu (``/command@botname``) or by explicitly
+        mentioning the bot (``@botname /command``), both of which are
+        recognised as mentions by :meth:`_message_mentions_bot`.
        """
        if not self._is_group_chat(message):
            return True
@@ -2344,8 +2377,6 @@ class TelegramAdapter(BasePlatformAdapter):
            return True
        if not self._telegram_require_mention():
            return True
-        if is_command:
-            return True
        if self._is_reply_to_bot(message):
            return True
        if self._message_mentions_bot(message):
@@ -2628,6 +2659,23 @@ class TelegramAdapter(BasePlatformAdapter):
            except Exception as e:
                logger.warning("[Telegram] Failed to cache audio: %s", e, exc_info=True)

+        elif msg.video:
+            try:
+                file_obj = await msg.video.get_file()
+                video_bytes = await file_obj.download_as_bytearray()
+                ext = ".mp4"
+                if getattr(file_obj, "file_path", None):
+                    for candidate in SUPPORTED_VIDEO_TYPES:
+                        if file_obj.file_path.lower().endswith(candidate):
+                            ext = candidate
+                            break
+                cached_path = cache_video_from_bytes(bytes(video_bytes), ext=ext)
+                event.media_urls = [cached_path]
+                event.media_types = [SUPPORTED_VIDEO_TYPES.get(ext, "video/mp4")]
+                logger.info("[Telegram] Cached user video at %s", cached_path)
+            except Exception as e:
+                logger.warning("[Telegram] Failed to cache video: %s", e, exc_info=True)
+
        # Download document files to cache for agent processing
        elif msg.document:
            doc = msg.document
@@ -2644,6 +2692,21 @@ class TelegramAdapter(BasePlatformAdapter):
                    mime_to_ext = {v: k for k, v in SUPPORTED_DOCUMENT_TYPES.items()}
                    ext = mime_to_ext.get(doc.mime_type, "")

+                if not ext and doc.mime_type:
+                    video_mime_to_ext = {v: k for k, v in SUPPORTED_VIDEO_TYPES.items()}
+                    ext = video_mime_to_ext.get(doc.mime_type, "")
+
+                if ext in SUPPORTED_VIDEO_TYPES:
+                    file_obj = await doc.get_file()
+                    video_bytes = await file_obj.download_as_bytearray()
+                    cached_path = cache_video_from_bytes(bytes(video_bytes), ext=ext)
+                    event.media_urls = [cached_path]
+                    event.media_types = [SUPPORTED_VIDEO_TYPES[ext]]
+                    event.message_type = MessageType.VIDEO
+                    logger.info("[Telegram] Cached user video document at %s", cached_path)
+                    await self.handle_message(event)
+                    return
+
                # Check if supported
                if ext not in SUPPORTED_DOCUMENT_TYPES:
                    supported_list = ", ".join(sorted(SUPPORTED_DOCUMENT_TYPES.keys()))
@@ -2782,13 +2845,11 @@ class TelegramAdapter(BasePlatformAdapter):
            logger.info("[Telegram] Analyzing sticker at %s", cached_path)

            from tools.vision_tools import vision_analyze_tool
-            import json as _json
-
            result_json = await vision_analyze_tool(
                image_url=cached_path,
                user_prompt=STICKER_VISION_PROMPT,
            )
-            result = _json.loads(result_json)
+            result = json.loads(result_json)

            if result.get("success"):
                description = result.get("analysis", "a sticker")
@@ -624,13 +624,16 @@ class WeComAdapter(BasePlatformAdapter):
        msgtype = str(body.get("msgtype") or "").lower()

        if msgtype == "mixed":
-            mixed = body.get("mixed") if isinstance(body.get("mixed"), dict) else {}
-            items = mixed.get("msg_item") if isinstance(mixed.get("msg_item"), list) else []
+            _raw_mixed = body.get("mixed")
+            mixed = _raw_mixed if isinstance(_raw_mixed, dict) else {}
+            _raw_items = mixed.get("msg_item")
+            items = _raw_items if isinstance(_raw_items, list) else []
            for item in items:
                if not isinstance(item, dict):
                    continue
                if str(item.get("msgtype") or "").lower() == "text":
-                    text_block = item.get("text") if isinstance(item.get("text"), dict) else {}
+                    _raw_text = item.get("text")
+                    text_block = _raw_text if isinstance(_raw_text, dict) else {}
                    content = str(text_block.get("content") or "").strip()
                    if content:
                        text_parts.append(content)
@@ -672,8 +675,10 @@ class WeComAdapter(BasePlatformAdapter):
        msgtype = str(body.get("msgtype") or "").lower()

        if msgtype == "mixed":
-            mixed = body.get("mixed") if isinstance(body.get("mixed"), dict) else {}
-            items = mixed.get("msg_item") if isinstance(mixed.get("msg_item"), list) else []
+            _raw_mixed = body.get("mixed")
+            mixed = _raw_mixed if isinstance(_raw_mixed, dict) else {}
+            _raw_items = mixed.get("msg_item")
+            items = _raw_items if isinstance(_raw_items, list) else []
            for item in items:
                if not isinstance(item, dict):
                    continue
@@ -66,6 +66,37 @@ def _kill_port_process(port: int) -> None:
    except Exception:
        pass

+
+def _terminate_bridge_process(proc, *, force: bool = False) -> None:
+    """Terminate the bridge process using process-tree semantics where possible."""
+    if _IS_WINDOWS:
+        cmd = ["taskkill", "/PID", str(proc.pid), "/T"]
+        if force:
+            cmd.append("/F")
+        try:
+            result = subprocess.run(
+                cmd,
+                capture_output=True,
+                text=True,
+                timeout=10,
+            )
+        except FileNotFoundError:
+            if force:
+                proc.kill()
+            else:
+                proc.terminate()
+            return
+
+        if result.returncode != 0:
+            details = (result.stderr or result.stdout or "").strip()
+            raise OSError(details or f"taskkill failed for PID {proc.pid}")
+        return
+
+    import signal
+
+    sig = signal.SIGTERM if not force else signal.SIGKILL
+    os.killpg(os.getpgid(proc.pid), sig)
+
 import sys
 sys.path.insert(0, str(Path(__file__).resolve().parents[2]))

@@ -118,6 +149,10 @@ class WhatsAppAdapter(BasePlatformAdapter):
    - bridge_script: Path to the Node.js bridge script
    - bridge_port: Port for HTTP communication (default: 3000)
    - session_path: Path to store WhatsApp session data
+    - dm_policy: "open" | "allowlist" | "disabled" — how DMs are handled (default: "open")
+    - allow_from: List of sender IDs allowed in DMs (when dm_policy="allowlist")
+    - group_policy: "open" | "allowlist" | "disabled" — which groups are processed (default: "open")
+    - group_allow_from: List of group JIDs allowed (when group_policy="allowlist")
    """
    
    # WhatsApp message limits — practical UX limit, not protocol max.
@@ -140,6 +175,10 @@ class WhatsAppAdapter(BasePlatformAdapter):
            get_hermes_dir("platforms/whatsapp/session", "whatsapp/session")
        ))
        self._reply_prefix: Optional[str] = config.extra.get("reply_prefix")
+        self._dm_policy = str(config.extra.get("dm_policy") or os.getenv("WHATSAPP_DM_POLICY", "open")).strip().lower()
+        self._allow_from = self._coerce_allow_list(config.extra.get("allow_from") or config.extra.get("allowFrom"))
+        self._group_policy = str(config.extra.get("group_policy") or os.getenv("WHATSAPP_GROUP_POLICY", "open")).strip().lower()
+        self._group_allow_from = self._coerce_allow_list(config.extra.get("group_allow_from") or config.extra.get("groupAllowFrom"))
        self._mention_patterns = self._compile_mention_patterns()
        self._message_queue: asyncio.Queue = asyncio.Queue()
        self._bridge_log_fh = None
@@ -163,6 +202,33 @@ class WhatsAppAdapter(BasePlatformAdapter):
            return {str(part).strip() for part in raw if str(part).strip()}
        return {part.strip() for part in str(raw).split(",") if part.strip()}

+    @staticmethod
+    def _coerce_allow_list(raw) -> set[str]:
+        """Parse allow_from / group_allow_from from config or env var."""
+        if raw is None:
+            return set()
+        if isinstance(raw, list):
+            return {str(part).strip() for part in raw if str(part).strip()}
+        return {part.strip() for part in str(raw).split(",") if part.strip()}
+
+    def _is_dm_allowed(self, sender_id: str) -> bool:
+        """Check whether a DM from the given sender should be processed."""
+        if self._dm_policy == "disabled":
+            return False
+        if self._dm_policy == "allowlist":
+            return sender_id in self._allow_from
+        # "open" — all DMs allowed
+        return True
+
+    def _is_group_allowed(self, chat_id: str) -> bool:
+        """Check whether a group chat should be processed."""
+        if self._group_policy == "disabled":
+            return False
+        if self._group_policy == "allowlist":
+            return chat_id in self._group_allow_from
+        # "open" — all groups allowed
+        return True
+
    def _compile_mention_patterns(self):
        patterns = self.config.extra.get("mention_patterns")
        if patterns is None:
@@ -255,8 +321,18 @@ class WhatsAppAdapter(BasePlatformAdapter):
        return cleaned.strip() or text

    def _should_process_message(self, data: Dict[str, Any]) -> bool:
-        if not data.get("isGroup"):
+        is_group = data.get("isGroup", False)
+        if is_group:
+            chat_id = str(data.get("chatId") or "")
+            if not self._is_group_allowed(chat_id):
+                return False
+        else:
+            sender_id = str(data.get("senderId") or data.get("from") or "")
+            if not self._is_dm_allowed(sender_id):
+                return False
+            # DMs that pass the policy gate are always processed
            return True
+        # Group messages: check mention / free-response settings
        chat_id = str(data.get("chatId") or "")
        if chat_id in self._whatsapp_free_response_chats():
            return True
@@ -323,7 +399,6 @@ class WhatsAppAdapter(BasePlatformAdapter):
            
            # Check if bridge is already running and connected
            import aiohttp
-            import asyncio
            try:
                async with aiohttp.ClientSession() as session:
                    async with session.get(
@@ -492,22 +567,14 @@ class WhatsAppAdapter(BasePlatformAdapter):
        """Stop the WhatsApp bridge and clean up any orphaned processes."""
        if self._bridge_process:
            try:
-                # Kill the entire process group so child node processes die too
-                import signal
                try:
-                    if _IS_WINDOWS:
-                        self._bridge_process.terminate()
-                    else:
-                        os.killpg(os.getpgid(self._bridge_process.pid), signal.SIGTERM)
+                    _terminate_bridge_process(self._bridge_process, force=False)
                except (ProcessLookupError, PermissionError):
                    self._bridge_process.terminate()
                await asyncio.sleep(1)
                if self._bridge_process.poll() is None:
                    try:
-                        if _IS_WINDOWS:
-                            self._bridge_process.kill()
-                        else:
-                            os.killpg(os.getpgid(self._bridge_process.pid), signal.SIGKILL)
+                        _terminate_bridge_process(self._bridge_process, force=True)
                    except (ProcessLookupError, PermissionError):
                        self._bridge_process.kill()
            except Exception as e:
@@ -773,6 +840,17 @@ class WhatsAppAdapter(BasePlatformAdapter):
        """Send a video natively via bridge — plays inline in WhatsApp."""
        return await self._send_media_to_bridge(chat_id, video_path, "video", caption)

+    async def send_voice(
+        self,
+        chat_id: str,
+        audio_path: str,
+        caption: Optional[str] = None,
+        reply_to: Optional[str] = None,
+        **kwargs,
+    ) -> SendResult:
+        """Send an audio file as a WhatsApp voice message via bridge."""
+        return await self._send_media_to_bridge(chat_id, audio_path, "audio", caption)
+
    async def send_document(
        self,
        chat_id: str,
@@ -30,6 +30,8 @@ from pathlib import Path
 from datetime import datetime
 from typing import Dict, Optional, Any, List

+from agent.account_usage import fetch_account_usage, render_account_usage_lines
+
 # --- Agent cache tuning ---------------------------------------------------
 # Bounds the per-session AIAgent cache to prevent unbounded growth in
 # long-lived gateways (each AIAgent holds LLM clients, tool schemas,
@@ -86,7 +88,7 @@ sys.path.insert(0, str(Path(__file__).parent.parent))

 # Resolve Hermes home directory (respects HERMES_HOME override)
 from hermes_constants import get_hermes_home
-from utils import atomic_yaml_write, is_truthy_value
+from utils import atomic_yaml_write, base_url_host_matches, is_truthy_value
 _hermes_home = get_hermes_home()

 # Load environment variables from ~/.hermes/.env first.
@@ -279,6 +281,7 @@ from gateway.session import (
    build_session_context,
    build_session_context_prompt,
    build_session_key,
+    is_shared_multi_user_session,
 )
 from gateway.delivery import DeliveryRouter
 from gateway.platforms.base import (
@@ -1266,7 +1269,6 @@ class GatewayRunner:
        the prefill_messages_file key in ~/.hermes/config.yaml.
        Relative paths are resolved from ~/.hermes/.
        """
-        import json as _json
        file_path = os.getenv("HERMES_PREFILL_MESSAGES_FILE", "")
        if not file_path:
            try:
@@ -1288,7 +1290,7 @@ class GatewayRunner:
            return []
        try:
            with open(path, "r", encoding="utf-8") as f:
-                data = _json.load(f)
+                data = json.load(f)
            if not isinstance(data, list):
                logger.warning("Prefill messages file must contain a JSON array: %s", path)
                return []
@@ -1667,12 +1669,32 @@ class GatewayRunner:

        notified: set = set()
        for session_key in active:
-            # Parse platform + chat_id from the session key.
-            _parsed = _parse_session_key(session_key)
-            if not _parsed:
-                continue
-            platform_str = _parsed["platform"]
-            chat_id = _parsed["chat_id"]
+            source = None
+            try:
+                if getattr(self, "session_store", None) is not None:
+                    self.session_store._ensure_loaded()
+                    entry = self.session_store._entries.get(session_key)
+                    source = getattr(entry, "origin", None) if entry else None
+            except Exception as e:
+                logger.debug(
+                    "Failed to load session origin for shutdown notification %s: %s",
+                    session_key,
+                    e,
+                )
+
+            if source is not None:
+                platform_str = source.platform.value
+                chat_id = source.chat_id
+                thread_id = source.thread_id
+            else:
+                # Fall back to parsing the session key when no persisted
+                # origin is available (legacy sessions/tests).
+                _parsed = _parse_session_key(session_key)
+                if not _parsed:
+                    continue
+                platform_str = _parsed["platform"]
+                chat_id = _parsed["chat_id"]
+                thread_id = _parsed.get("thread_id")

            # Deduplicate: one notification per chat, even if multiple
            # sessions (different users/threads) share the same chat.
@@ -1688,7 +1710,6 @@ class GatewayRunner:

                # Include thread_id if present so the message lands in the
                # correct forum topic / thread.
-                thread_id = _parsed.get("thread_id")
                metadata = {"thread_id": thread_id} if thread_id else None

                await adapter.send(chat_id, msg, metadata=metadata)
@@ -1941,6 +1962,39 @@ class GatewayRunner:
                "or configure platform allowlists (e.g., TELEGRAM_ALLOWED_USERS=your_id)."
            )
        
+        # Discover Python plugins before shell hooks so plugin block
+        # decisions take precedence in tie cases.  The CLI startup path
+        # does this via an explicit call in hermes_cli/main.py; the
+        # gateway lazily imports run_agent inside per-request handlers,
+        # so the discover_plugins() side-effect in model_tools.py is NOT
+        # guaranteed to have run by the time we reach this point.
+        try:
+            from hermes_cli.plugins import discover_plugins
+            discover_plugins()
+        except Exception:
+            logger.debug(
+                "plugin discovery failed at gateway startup", exc_info=True,
+            )
+
+        # Register declarative shell hooks from cli-config.yaml.  Gateway
+        # has no TTY, so consent has to come from one of the three opt-in
+        # channels (--accept-hooks on launch, HERMES_ACCEPT_HOOKS env var,
+        # or hooks_auto_accept: true in config.yaml).  We pass
+        # accept_hooks=False here and let register_from_config resolve
+        # the effective value from env + config itself — the CLI-side
+        # registration already honored --accept-hooks, and re-reading
+        # hooks_auto_accept here would just duplicate that lookup.
+        # Failures are logged but must never block gateway startup.
+        try:
+            from hermes_cli.config import load_config
+            from agent.shell_hooks import register_from_config
+            register_from_config(load_config(), accept_hooks=False)
+        except Exception:
+            logger.debug(
+                "shell-hook registration failed at gateway startup",
+                exc_info=True,
+            )
+
        # Discover and load event hooks
        self.hooks.discover_and_load()
        
@@ -3223,10 +3277,9 @@ class GatewayRunner:
                    return "Usage: /queue <prompt>"
                adapter = self.adapters.get(source.platform)
                if adapter:
-                    from gateway.platforms.base import MessageEvent as _ME, MessageType as _MT
-                    queued_event = _ME(
+                    queued_event = MessageEvent(
                        text=queued_text,
-                        message_type=_MT.TEXT,
+                        message_type=MessageType.TEXT,
                        source=event.source,
                        message_id=event.message_id,
                        channel_prompt=event.channel_prompt,
@@ -3248,10 +3301,9 @@ class GatewayRunner:
                    # Agent hasn't started yet — queue as turn-boundary fallback.
                    adapter = self.adapters.get(source.platform)
                    if adapter:
-                        from gateway.platforms.base import MessageEvent as _ME, MessageType as _MT
-                        queued_event = _ME(
+                        queued_event = MessageEvent(
                            text=steer_text,
-                            message_type=_MT.TEXT,
+                            message_type=MessageType.TEXT,
                            source=event.source,
                            message_id=event.message_id,
                            channel_prompt=event.channel_prompt,
@@ -3271,10 +3323,9 @@ class GatewayRunner:
                # Running agent is missing or lacks steer() — fall back to queue.
                adapter = self.adapters.get(source.platform)
                if adapter:
-                    from gateway.platforms.base import MessageEvent as _ME, MessageType as _MT
-                    queued_event = _ME(
+                    queued_event = MessageEvent(
                        text=steer_text,
-                        message_type=_MT.TEXT,
+                        message_type=MessageType.TEXT,
                        source=event.source,
                        message_id=event.message_id,
                        channel_prompt=event.channel_prompt,
@@ -3623,9 +3674,8 @@ class GatewayRunner:
                plugin_handler = get_plugin_command_handler(command.replace("_", "-"))
                if plugin_handler:
                    user_args = event.get_command_args().strip()
-                    import asyncio as _aio
                    result = plugin_handler(user_args)
-                    if _aio.iscoroutine(result):
+                    if asyncio.iscoroutine(result):
                        result = await result
                    return str(result) if result else None
            except Exception as e:
@@ -3742,12 +3792,12 @@ class GatewayRunner:
        history = history or []
        message_text = event.text or ""

-        _is_shared_thread = (
-            source.chat_type != "dm"
-            and source.thread_id
-            and not getattr(self.config, "thread_sessions_per_user", False)
+        _is_shared_multi_user = is_shared_multi_user_session(
+            source,
+            group_sessions_per_user=getattr(self.config, "group_sessions_per_user", True),
+            thread_sessions_per_user=getattr(self.config, "thread_sessions_per_user", False),
        )
-        if _is_shared_thread and source.user_name:
+        if _is_shared_multi_user and source.user_name:
            message_text = f"[{source.user_name}] {message_text}"

        if event.media_urls:
@@ -3807,9 +3857,7 @@ class GatewayRunner:
            for i, path in enumerate(event.media_urls):
                mtype = event.media_types[i] if i < len(event.media_types) else ""
                if mtype in ("", "application/octet-stream"):
-                    import os as _os2
-
-                    _ext = _os2.path.splitext(path)[1].lower()
+                    _ext = os.path.splitext(path)[1].lower()
                    if _ext in _TEXT_EXTENSIONS:
                        mtype = "text/plain"
                    else:
@@ -3819,13 +3867,10 @@ class GatewayRunner:
                if not mtype.startswith(("application/", "text/")):
                    continue

-                import os as _os
-                import re as _re
-
-                basename = _os.path.basename(path)
+                basename = os.path.basename(path)
                parts = basename.split("_", 2)
                display_name = parts[2] if len(parts) >= 3 else basename
-                display_name = _re.sub(r'[^\w.\- ]', '_', display_name)
+                display_name = re.sub(r'[^\w.\- ]', '_', display_name)

                if mtype.startswith("text/"):
                    context_note = (
@@ -3842,14 +3887,14 @@ class GatewayRunner:
                message_text = f"{context_note}\n\n{message_text}"

        if getattr(event, "reply_to_text", None) and event.reply_to_message_id:
+            # Always inject the reply-to pointer — even when the quoted text
+            # already appears in history. The prefix isn't deduplication, it's
+            # disambiguation: it tells the agent *which* prior message the user
+            # is referencing. History can contain the same or similar text
+            # multiple times, and without an explicit pointer the agent has to
+            # guess (or answer for both subjects). Token overhead is minimal.
            reply_snippet = event.reply_to_text[:500]
-            found_in_history = any(
-                reply_snippet[:200] in (msg.get("content") or "")
-                for msg in history
-                if msg.get("role") in ("assistant", "user", "tool")
-            )
-            if not found_in_history:
-                message_text = f'[Replying to: "{reply_snippet}"]\n\n{message_text}'
+            message_text = f'[Replying to: "{reply_snippet}"]\n\n{message_text}'

        if "@" in message_text:
            try:
@@ -3857,9 +3902,11 @@ class GatewayRunner:
                from agent.model_metadata import get_model_context_length

                _msg_cwd = os.environ.get("TERMINAL_CWD", os.path.expanduser("~"))
+                _msg_runtime = _resolve_runtime_agent_kwargs()
                _msg_ctx_len = get_model_context_length(
                    self._model,
-                    base_url=self._base_url or "",
+                    base_url=self._base_url or _msg_runtime.get("base_url") or "",
+                    api_key=_msg_runtime.get("api_key") or "",
                )
                _ctx_result = await preprocess_context_references_async(
                    message_text,
@@ -5121,7 +5168,6 @@ class GatewayRunner:
        # Save the requester's routing info so the new gateway process can
        # notify them once it comes back online.
        try:
-            import json as _json
            notify_data = {
                "platform": event.source.platform.value if event.source.platform else None,
                "chat_id": event.source.chat_id,
@@ -5129,7 +5175,7 @@ class GatewayRunner:
            if event.source.thread_id:
                notify_data["thread_id"] = event.source.thread_id
            (_hermes_home / ".restart_notify.json").write_text(
-                _json.dumps(notify_data)
+                json.dumps(notify_data)
            )
        except Exception as e:
            logger.debug("Failed to write restart notify file: %s", e)
@@ -5140,16 +5186,14 @@ class GatewayRunner:
        # marker persists so the new gateway can still detect a delayed
        # /restart redelivery from Telegram.  Overwritten on every /restart.
        try:
-            import json as _json
-            import time as _time
            dedup_data = {
                "platform": event.source.platform.value if event.source.platform else None,
-                "requested_at": _time.time(),
+                "requested_at": time.time(),
            }
            if event.platform_update_id is not None:
                dedup_data["update_id"] = event.platform_update_id
            (_hermes_home / ".restart_last_processed.json").write_text(
-                _json.dumps(dedup_data)
+                json.dumps(dedup_data)
            )
        except Exception as e:
            logger.debug("Failed to write restart dedup marker: %s", e)
@@ -5197,12 +5241,10 @@ class GatewayRunner:
            return False

        try:
-            import json as _json
-            import time as _time
            marker_path = _hermes_home / ".restart_last_processed.json"
            if not marker_path.exists():
                return False
-            data = _json.loads(marker_path.read_text())
+            data = json.loads(marker_path.read_text())
        except Exception:
            return False

@@ -5216,7 +5258,7 @@ class GatewayRunner:
        # swallow a fresh /restart from the user.
        requested_at = data.get("requested_at")
        if isinstance(requested_at, (int, float)):
-            if _time.time() - requested_at > 300:
+            if time.time() - requested_at > 300:
                return False
        return event.platform_update_id <= recorded_uid

@@ -5607,7 +5649,7 @@ class GatewayRunner:

        # Cache notice
        cache_enabled = (
-            ("openrouter" in (result.base_url or "").lower() and "claude" in result.new_model.lower())
+            (base_url_host_matches(result.base_url or "", "openrouter.ai") and "claude" in result.new_model.lower())
            or result.api_mode == "anthropic_messages"
        )
        if cache_enabled:
@@ -7222,6 +7264,38 @@ class GatewayRunner:
                    if cached:
                        agent = cached[0]

+        # Resolve provider/base_url/api_key for the account-usage fetch.
+        # Prefer the live agent; fall back to persisted billing data on the
+        # SessionDB row so `/usage` still returns account info between turns
+        # when no agent is resident.
+        provider = getattr(agent, "provider", None) if agent and agent is not _AGENT_PENDING_SENTINEL else None
+        base_url = getattr(agent, "base_url", None) if agent and agent is not _AGENT_PENDING_SENTINEL else None
+        api_key = getattr(agent, "api_key", None) if agent and agent is not _AGENT_PENDING_SENTINEL else None
+        if not provider and getattr(self, "_session_db", None) is not None:
+            try:
+                _entry_for_billing = self.session_store.get_or_create_session(source)
+                persisted = self._session_db.get_session(_entry_for_billing.session_id) or {}
+            except Exception:
+                persisted = {}
+            provider = provider or persisted.get("billing_provider")
+            base_url = base_url or persisted.get("billing_base_url")
+
+        # Fetch account usage off the event loop so slow provider APIs don't
+        # block the gateway. Failures are non-fatal -- account_lines stays [].
+        account_lines: list[str] = []
+        if provider:
+            try:
+                account_snapshot = await asyncio.to_thread(
+                    fetch_account_usage,
+                    provider,
+                    base_url=base_url,
+                    api_key=api_key,
+                )
+            except Exception:
+                account_snapshot = None
+            if account_snapshot:
+                account_lines = render_account_usage_lines(account_snapshot, markdown=True)
+
        if agent and hasattr(agent, "session_total_tokens") and agent.session_api_calls > 0:
            lines = []

@@ -7279,6 +7353,10 @@ class GatewayRunner:
            if ctx.compression_count:
                lines.append(f"Compressions: {ctx.compression_count}")

+            if account_lines:
+                lines.append("")
+                lines.extend(account_lines)
+
            return "\n".join(lines)

        # No agent at all -- check session history for a rough count
@@ -7288,23 +7366,26 @@ class GatewayRunner:
            from agent.model_metadata import estimate_messages_tokens_rough
            msgs = [m for m in history if m.get("role") in ("user", "assistant") and m.get("content")]
            approx = estimate_messages_tokens_rough(msgs)
-            return (
-                f"📊 **Session Info**\n"
-                f"Messages: {len(msgs)}\n"
-                f"Estimated context: ~{approx:,} tokens\n"
-                f"_(Detailed usage available after the first agent response)_"
-            )
+            lines = [
+                "📊 **Session Info**",
+                f"Messages: {len(msgs)}",
+                f"Estimated context: ~{approx:,} tokens",
+                "_(Detailed usage available after the first agent response)_",
+            ]
+            if account_lines:
+                lines.append("")
+                lines.extend(account_lines)
+            return "\n".join(lines)
+        if account_lines:
+            return "\n".join(account_lines)
        return "No usage data available for this session."

    async def _handle_insights_command(self, event: MessageEvent) -> str:
        """Handle /insights command -- show usage insights and analytics."""
-        import asyncio as _asyncio
-
        args = event.get_command_args().strip()

        # Normalize Unicode dashes (Telegram/iOS auto-converts -- to em/en dash)
-        import re as _re
-        args = _re.sub(r'[\u2012\u2013\u2014\u2015](days|source)', r'--\1', args)
+        args = re.sub(r'[\u2012\u2013\u2014\u2015](days|source)', r'--\1', args)

        days = 30
        source = None
@@ -7333,7 +7414,7 @@ class GatewayRunner:
            from hermes_state import SessionDB
            from agent.insights import InsightsEngine

-            loop = _asyncio.get_running_loop()
+            loop = asyncio.get_running_loop()

            def _run_insights():
                db = SessionDB()
@@ -7691,9 +7772,6 @@ class GatewayRunner:
        the messenger.  The user's next message is intercepted by
        ``_handle_message`` and written to ``.update_response``.
        """
-        import json
-        import re as _re
-
        pending_path = _hermes_home / ".update_pending.json"
        claimed_path = _hermes_home / ".update_pending.claimed.json"
        output_path = _hermes_home / ".update_output.txt"
@@ -7738,7 +7816,7 @@ class GatewayRunner:
            return

        def _strip_ansi(text: str) -> str:
-            return _re.sub(r'\x1b\[[0-9;]*[A-Za-z]', '', text)
+            return re.sub(r'\x1b\[[0-9;]*[A-Za-z]', '', text)

        bytes_sent = 0
        last_stream_time = loop.time()
@@ -7886,9 +7964,6 @@ class GatewayRunner:
        cannot resolve the adapter (e.g. after a gateway restart where the
        platform hasn't reconnected yet).
        """
-        import json
-        import re as _re
-
        pending_path = _hermes_home / ".update_pending.json"
        claimed_path = _hermes_home / ".update_pending.claimed.json"
        output_path = _hermes_home / ".update_output.txt"
@@ -7934,7 +8009,7 @@ class GatewayRunner:

            if adapter and chat_id:
                # Strip ANSI escape codes for clean display
-                output = _re.sub(r'\x1b\[[0-9;]*m', '', output).strip()
+                output = re.sub(r'\x1b\[[0-9;]*m', '', output).strip()
                if output:
                    if len(output) > 3500:
                        output = "…" + output[-3500:]
@@ -7967,14 +8042,12 @@ class GatewayRunner:

    async def _send_restart_notification(self) -> None:
        """Notify the chat that initiated /restart that the gateway is back."""
-        import json as _json
-
        notify_path = _hermes_home / ".restart_notify.json"
        if not notify_path.exists():
            return

        try:
-            data = _json.loads(notify_path.read_text())
+            data = json.loads(notify_path.read_text())
            platform_str = data.get("platform")
            chat_id = data.get("chat_id")
            thread_id = data.get("thread_id")
@@ -8060,7 +8133,6 @@ class GatewayRunner:
            The enriched message string with vision descriptions prepended.
        """
        from tools.vision_tools import vision_analyze_tool
-        import json as _json

        analysis_prompt = (
            "Describe everything visible in this image in thorough detail. "
@@ -8076,7 +8148,7 @@ class GatewayRunner:
                    image_url=path,
                    user_prompt=analysis_prompt,
                )
-                result = _json.loads(result_json)
+                result = json.loads(result_json)
                if result.get("success"):
                    description = result.get("analysis", "")
                    enriched_parts.append(
@@ -8135,7 +8207,6 @@ class GatewayRunner:
            return disabled_note

        from tools.transcription_tools import transcribe_audio
-        import asyncio

        enriched_parts = []
        for path in audio_paths:
@@ -8271,7 +8342,6 @@ class GatewayRunner:
        if not adapter:
            return
        try:
-            from gateway.platforms.base import MessageEvent, MessageType
            synth_event = MessageEvent(
                text=synth_text,
                message_type=MessageType.TEXT,
@@ -8376,7 +8446,6 @@ class GatewayRunner:
                            break
                    if adapter and source.chat_id:
                        try:
-                            from gateway.platforms.base import MessageEvent, MessageType
                            synth_event = MessageEvent(
                                text=synth_text,
                                message_type=MessageType.TEXT,
@@ -8898,7 +8967,6 @@ class GatewayRunner:
        if _streaming_enabled:
            try:
                from gateway.stream_consumer import GatewayStreamConsumer, StreamConsumerConfig
-                from gateway.config import Platform
                _adapter = self.adapters.get(source.platform)
                if _adapter:
                    _adapter_supports_edit = getattr(_adapter, "SUPPORTS_MESSAGE_EDITING", True)
@@ -9182,8 +9250,7 @@ class GatewayRunner:
                if args:
                    from agent.display import get_tool_preview_max_len
                    _pl = get_tool_preview_max_len()
-                    import json as _json
-                    args_str = _json.dumps(args, ensure_ascii=False, default=str)
+                    args_str = json.dumps(args, ensure_ascii=False, default=str)
                    # When tool_preview_length is 0 (default), don't truncate
                    # in verbose mode — the user explicitly asked for full
                    # detail.  Platform message-length limits handle the rest.
@@ -9249,8 +9316,7 @@ class GatewayRunner:
            # Skip tool progress for platforms that don't support message
            # editing (e.g. iMessage/BlueBubbles) — each progress update
            # would become a separate message bubble, which is noisy.
-            from gateway.platforms.base import BasePlatformAdapter as _BaseAdapter
-            if type(adapter).edit_message is _BaseAdapter.edit_message:
+            if type(adapter).edit_message is BasePlatformAdapter.edit_message:
                while not progress_queue.empty():
                    try:
                        progress_queue.get_nowait()
@@ -10698,7 +10764,6 @@ async def start_gateway(config: Optional[GatewayConfig] = None, replace: bool =
    # The PID file is scoped to HERMES_HOME, so future multi-profile
    # setups (each profile using a distinct HERMES_HOME) will naturally
    # allow concurrent instances without tripping this guard.
-    import time as _time
    from gateway.status import get_running_pid, remove_pid_file, terminate_pid
    existing_pid = get_running_pid()
    if existing_pid is not None and existing_pid != os.getpid():
@@ -10738,7 +10803,7 @@ async def start_gateway(config: Optional[GatewayConfig] = None, replace: bool =
            for _ in range(20):
                try:
                    os.kill(existing_pid, 0)
-                    _time.sleep(0.5)
+                    time.sleep(0.5)
                except (ProcessLookupError, PermissionError):
                    break  # Process is gone
            else:
@@ -10749,10 +10814,16 @@ async def start_gateway(config: Optional[GatewayConfig] = None, replace: bool =
                )
                try:
                    terminate_pid(existing_pid, force=True)
-                    _time.sleep(0.5)
+                    time.sleep(0.5)
                except (ProcessLookupError, PermissionError, OSError):
                    pass
            remove_pid_file()
+            # remove_pid_file() is a no-op when the PID doesn't match.
+            # Force-unlink to cover the old-process-crashed case.
+            try:
+                (get_hermes_home() / "gateway.pid").unlink(missing_ok=True)
+            except Exception:
+                pass
            # Clean up any takeover marker the old process didn't consume
            # (e.g. SIGKILL'd before its shutdown handler could read it).
            try:
@@ -10891,6 +10962,30 @@ async def start_gateway(config: Optional[GatewayConfig] = None, replace: bool =
    else:
        logger.info("Skipping signal handlers (not running in main thread).")
    
+    # Claim the PID file BEFORE bringing up any platform adapters.
+    # This closes the --replace race window: two concurrent `gateway run
+    # --replace` invocations both pass the termination-wait above, but
+    # only the winner of the O_CREAT|O_EXCL race below will ever open
+    # Telegram polling, Discord gateway sockets, etc. The loser exits
+    # cleanly before touching any external service.
+    import atexit
+    from gateway.status import write_pid_file, remove_pid_file, get_running_pid
+    _current_pid = get_running_pid()
+    if _current_pid is not None and _current_pid != os.getpid():
+        logger.error(
+            "Another gateway instance (PID %d) started during our startup. "
+            "Exiting to avoid double-running.", _current_pid
+        )
+        return False
+    try:
+        write_pid_file()
+    except FileExistsError:
+        logger.error(
+            "PID file race lost to another gateway instance. Exiting."
+        )
+        return False
+    atexit.register(remove_pid_file)
+
    # Start the gateway
    success = await runner.start()
    if not success:
@@ -10900,12 +10995,6 @@ async def start_gateway(config: Optional[GatewayConfig] = None, replace: bool =
            logger.error("Gateway exiting cleanly: %s", runner.exit_reason)
        return True
    
-    # Write PID file so CLI can detect gateway is running
-    import atexit
-    from gateway.status import write_pid_file, remove_pid_file
-    write_pid_file()
-    atexit.register(remove_pid_file)
-    
    # Start background cron ticker so scheduled jobs fire automatically.
    # Pass the event loop so cron delivery can use live adapters (E2EE support).
    cron_stop = threading.Event()
@@ -152,6 +152,7 @@ class SessionContext:
    source: SessionSource
    connected_platforms: List[Platform]
    home_channels: Dict[Platform, HomeChannel]
+    shared_multi_user_session: bool = False
    
    # Session metadata
    session_key: str = ""
@@ -166,6 +167,7 @@ class SessionContext:
            "home_channels": {
                p.value: hc.to_dict() for p, hc in self.home_channels.items()
            },
+            "shared_multi_user_session": self.shared_multi_user_session,
            "session_key": self.session_key,
            "session_id": self.session_id,
            "created_at": self.created_at.isoformat() if self.created_at else None,
@@ -240,18 +242,16 @@ def build_session_context_prompt(
        lines.append(f"**Channel Topic:** {context.source.chat_topic}")

    # User identity.
-    # In shared thread sessions (non-DM with thread_id), multiple users
-    # contribute to the same conversation.  Don't pin a single user name
-    # in the system prompt — it changes per-turn and would bust the prompt
-    # cache.  Instead, note that this is a multi-user thread; individual
-    # sender names are prefixed on each user message by the gateway.
-    _is_shared_thread = (
-        context.source.chat_type != "dm"
-        and context.source.thread_id
-    )
-    if _is_shared_thread:
+    # In shared multi-user sessions (shared threads OR shared non-thread groups
+    # when group_sessions_per_user=False), multiple users contribute to the same
+    # conversation.  Don't pin a single user name in the system prompt — it
+    # changes per-turn and would bust the prompt cache.  Instead, note that
+    # this is a multi-user session; individual sender names are prefixed on
+    # each user message by the gateway.
+    if context.shared_multi_user_session:
+        session_label = "Multi-user thread" if context.source.thread_id else "Multi-user session"
        lines.append(
-            "**Session type:** Multi-user thread — messages are prefixed "
+            f"**Session type:** {session_label} — messages are prefixed "
            "with [sender name]. Multiple users may participate."
        )
    elif context.source.user_name:
@@ -467,6 +467,27 @@ class SessionEntry:
        )


+def is_shared_multi_user_session(
+    source: SessionSource,
+    *,
+    group_sessions_per_user: bool = True,
+    thread_sessions_per_user: bool = False,
+) -> bool:
+    """Return True when a non-DM session is shared across participants.
+
+    Mirrors the isolation rules in :func:`build_session_key`:
+      - DMs are never shared.
+      - Threads are shared unless ``thread_sessions_per_user`` is True.
+      - Non-thread group/channel sessions are shared unless
+        ``group_sessions_per_user`` is True (default: True = isolated).
+    """
+    if source.chat_type == "dm":
+        return False
+    if source.thread_id:
+        return not thread_sessions_per_user
+    return not group_sessions_per_user
+
+
 def build_session_key(
    source: SessionSource,
    group_sessions_per_user: bool = True,
@@ -1238,6 +1259,11 @@ def build_session_context(
        source=source,
        connected_platforms=connected,
        home_channels=home_channels,
+        shared_multi_user_session=is_shared_multi_user_session(
+            source,
+            group_sessions_per_user=getattr(config, "group_sessions_per_user", True),
+            thread_sessions_per_user=getattr(config, "thread_sessions_per_user", False),
+        ),
    )
    
    if session_entry:
@@ -56,6 +56,12 @@ _SESSION_USER_ID: ContextVar = ContextVar("HERMES_SESSION_USER_ID", default=_UNS
 _SESSION_USER_NAME: ContextVar = ContextVar("HERMES_SESSION_USER_NAME", default=_UNSET)
 _SESSION_KEY: ContextVar = ContextVar("HERMES_SESSION_KEY", default=_UNSET)

+# Cron auto-delivery vars — set per-job in run_job() so concurrent jobs
+# don't clobber each other's delivery targets.
+_CRON_AUTO_DELIVER_PLATFORM: ContextVar = ContextVar("HERMES_CRON_AUTO_DELIVER_PLATFORM", default=_UNSET)
+_CRON_AUTO_DELIVER_CHAT_ID: ContextVar = ContextVar("HERMES_CRON_AUTO_DELIVER_CHAT_ID", default=_UNSET)
+_CRON_AUTO_DELIVER_THREAD_ID: ContextVar = ContextVar("HERMES_CRON_AUTO_DELIVER_THREAD_ID", default=_UNSET)
+
 _VAR_MAP = {
    "HERMES_SESSION_PLATFORM": _SESSION_PLATFORM,
    "HERMES_SESSION_CHAT_ID": _SESSION_CHAT_ID,
@@ -64,6 +70,9 @@ _VAR_MAP = {
    "HERMES_SESSION_USER_ID": _SESSION_USER_ID,
    "HERMES_SESSION_USER_NAME": _SESSION_USER_NAME,
    "HERMES_SESSION_KEY": _SESSION_KEY,
+    "HERMES_CRON_AUTO_DELIVER_PLATFORM": _CRON_AUTO_DELIVER_PLATFORM,
+    "HERMES_CRON_AUTO_DELIVER_CHAT_ID": _CRON_AUTO_DELIVER_CHAT_ID,
+    "HERMES_CRON_AUTO_DELIVER_THREAD_ID": _CRON_AUTO_DELIVER_THREAD_ID,
 }


@@ -225,8 +225,28 @@ def _cleanup_invalid_pid_path(pid_path: Path, *, cleanup_stale: bool) -> None:


 def write_pid_file() -> None:
-    """Write the current process PID and metadata to the gateway PID file."""
-    _write_json_file(_get_pid_path(), _build_pid_record())
+    """Write the current process PID and metadata to the gateway PID file.
+
+    Uses atomic O_CREAT | O_EXCL creation so that concurrent --replace
+    invocations race: exactly one process wins and the rest get
+    FileExistsError.
+    """
+    path = _get_pid_path()
+    path.parent.mkdir(parents=True, exist_ok=True)
+    record = json.dumps(_build_pid_record())
+    try:
+        fd = os.open(path, os.O_CREAT | os.O_EXCL | os.O_WRONLY)
+    except FileExistsError:
+        raise  # Let caller decide: another gateway is racing us
+    try:
+        with os.fdopen(fd, "w", encoding="utf-8") as f:
+            f.write(record)
+    except Exception:
+        try:
+            path.unlink(missing_ok=True)
+        except OSError:
+            pass
+        raise


 def write_runtime_status(
@@ -168,8 +168,11 @@ PROVIDER_REGISTRY: Dict[str, ProviderConfig] = {
        id="kimi-coding",
        name="Kimi / Moonshot",
        auth_type="api_key",
+        # Legacy platform.moonshot.ai keys use this endpoint (OpenAI-compat).
+        # sk-kimi- (Kimi Code) keys are auto-redirected to api.kimi.com/coding
+        # by _resolve_kimi_base_url() below.
        inference_base_url="https://api.moonshot.ai/v1",
-        api_key_env_vars=("KIMI_API_KEY",),
+        api_key_env_vars=("KIMI_API_KEY", "KIMI_CODING_API_KEY"),
        base_url_env_var="KIMI_BASE_URL",
    ),
    "kimi-coding-cn": ProviderConfig(
@@ -340,10 +343,16 @@ def get_anthropic_key() -> str:
 # =============================================================================

 # Kimi Code (kimi.com/code) issues keys prefixed "sk-kimi-" that only work
-# on api.kimi.com/coding/v1.  Legacy keys from platform.moonshot.ai work on
-# api.moonshot.ai/v1 (the default).  Auto-detect when user hasn't set
+# on api.kimi.com/coding.  Legacy keys from platform.moonshot.ai work on
+# api.moonshot.ai/v1 (the old default).  Auto-detect when user hasn't set
 # KIMI_BASE_URL explicitly.
-KIMI_CODE_BASE_URL = "https://api.kimi.com/coding/v1"
+#
+# Note: the base URL intentionally has NO /v1 suffix.  The /coding endpoint
+# speaks the Anthropic Messages protocol, and the anthropic SDK appends
+# "/v1/messages" internally — so "/coding" + SDK suffix → "/coding/v1/messages"
+# (the correct target). Using "/coding/v1" here would produce
+# "/coding/v1/v1/messages" (a 404).
+KIMI_CODE_BASE_URL = "https://api.kimi.com/coding"


 def _resolve_kimi_base_url(api_key: str, default_url: str, env_override: str) -> str:
@@ -3375,7 +3384,7 @@ def _login_nous(args, pconfig: ProviderConfig) -> None:
                )

            from hermes_cli.models import (
-                _PROVIDER_MODELS, get_pricing_for_provider, filter_nous_free_models,
+                _PROVIDER_MODELS, get_pricing_for_provider,
                check_nous_free_tier, partition_nous_models_by_tier,
            )
            model_ids = _PROVIDER_MODELS.get("nous", [])
@@ -3384,7 +3393,6 @@ def _login_nous(args, pconfig: ProviderConfig) -> None:
            unavailable_models: list = []
            if model_ids:
                pricing = get_pricing_for_provider("nous")
-                model_ids = filter_nous_free_models(model_ids, pricing)
                free_tier = check_nous_free_tier()
                if free_tier:
                    model_ids, unavailable_models = partition_nous_models_by_tier(
@@ -152,6 +152,23 @@ def auth_add_command(args) -> None:

    pool = load_pool(provider)

+    # Clear ALL suppressions for this provider — re-adding a credential is
+    # a strong signal the user wants auth re-enabled.  This covers env:*
+    # (shell-exported vars), gh_cli (copilot), claude_code, qwen-cli,
+    # device_code (codex), etc.  One consistent re-engagement pattern.
+    # Matches the Codex device_code re-link pattern that predates this.
+    if not provider.startswith(CUSTOM_POOL_PREFIX):
+        try:
+            from hermes_cli.auth import (
+                _load_auth_store,
+                unsuppress_credential_source,
+            )
+            suppressed = _load_auth_store().get("suppressed_sources", {})
+            for src in list(suppressed.get(provider, []) or []):
+                unsuppress_credential_source(provider, src)
+        except Exception:
+            pass
+
    if requested_type == AUTH_TYPE_API_KEY:
        token = (getattr(args, "api_key", None) or "").strip()
        if not token:
@@ -338,71 +355,28 @@ def auth_remove_command(args) -> None:
        raise SystemExit(f'No credential matching "{target}" for provider {provider}.')
    print(f"Removed {provider} credential #{index} ({removed.label})")

-    # If this was an env-seeded credential, also clear the env var from .env
-    # so it doesn't get re-seeded on the next load_pool() call.
-    if removed.source.startswith("env:"):
-        env_var = removed.source[len("env:"):]
-        if env_var:
-            from hermes_cli.config import remove_env_value
-            cleared = remove_env_value(env_var)
-            if cleared:
-                print(f"Cleared {env_var} from .env")
+    # Unified removal dispatch.  Every credential source Hermes reads from
+    # (env vars, external OAuth files, auth.json blocks, custom config)
+    # has a RemovalStep registered in agent.credential_sources.  The step
+    # handles its source-specific cleanup and we centralise suppression +
+    # user-facing output here so every source behaves identically from
+    # the user's perspective.
+    from agent.credential_sources import find_removal_step
+    from hermes_cli.auth import suppress_credential_source

-    # If this was a singleton-seeded credential (OAuth device_code, hermes_pkce),
-    # clear the underlying auth store / credential file so it doesn't get
-    # re-seeded on the next load_pool() call.
-    elif provider == "openai-codex" and (
-        removed.source == "device_code" or removed.source.endswith(":device_code")
-    ):
-        # Codex tokens live in TWO places: the Hermes auth store and
-        # ~/.codex/auth.json (the Codex CLI shared file).  On every refresh,
-        # refresh_codex_oauth_pure() writes to both.  So clearing only the
-        # Hermes auth store is not enough — _seed_from_singletons() will
-        # auto-import from ~/.codex/auth.json on the next load_pool() and
-        # the removal is instantly undone.  Mark the source as suppressed
-        # so auto-import is skipped; leave ~/.codex/auth.json untouched so
-        # the Codex CLI itself keeps working.
-        from hermes_cli.auth import (
-            _load_auth_store, _save_auth_store, _auth_store_lock,
-            suppress_credential_source,
-        )
-        with _auth_store_lock():
-            auth_store = _load_auth_store()
-            providers_dict = auth_store.get("providers")
-            if isinstance(providers_dict, dict) and provider in providers_dict:
-                del providers_dict[provider]
-                _save_auth_store(auth_store)
-                print(f"Cleared {provider} OAuth tokens from auth store")
-        suppress_credential_source(provider, "device_code")
-        print("Suppressed openai-codex device_code source — it will not be re-seeded.")
-        print("Note: Codex CLI credentials still live in ~/.codex/auth.json")
-        print("Run `hermes auth add openai-codex` to re-enable if needed.")
+    step = find_removal_step(provider, removed.source)
+    if step is None:
+        # Unregistered source — e.g. "manual", which has nothing external
+        # to clean up.  The pool entry is already gone; we're done.
+        return

-    elif removed.source == "device_code" and provider == "nous":
-        from hermes_cli.auth import (
-            _load_auth_store, _save_auth_store, _auth_store_lock,
-        )
-        with _auth_store_lock():
-            auth_store = _load_auth_store()
-            providers_dict = auth_store.get("providers")
-            if isinstance(providers_dict, dict) and provider in providers_dict:
-                del providers_dict[provider]
-                _save_auth_store(auth_store)
-                print(f"Cleared {provider} OAuth tokens from auth store")
-
-    elif removed.source == "hermes_pkce" and provider == "anthropic":
-        from hermes_constants import get_hermes_home
-        oauth_file = get_hermes_home() / ".anthropic_oauth.json"
-        if oauth_file.exists():
-            oauth_file.unlink()
-            print("Cleared Hermes Anthropic OAuth credentials")
-
-    elif removed.source == "claude_code" and provider == "anthropic":
-        from hermes_cli.auth import suppress_credential_source
-        suppress_credential_source(provider, "claude_code")
-        print("Suppressed claude_code credential — it will not be re-seeded.")
-        print("Note: Claude Code credentials still live in ~/.claude/.credentials.json")
-        print("Run `hermes auth add anthropic` to re-enable if needed.")
+    result = step.remove_fn(provider, removed)
+    for line in result.cleaned:
+        print(line)
+    if result.suppress:
+        suppress_credential_source(provider, removed.source)
+    for line in result.hints:
+        print(line)


 def auth_reset_command(args) -> None:
@@ -24,7 +24,6 @@ _FORWARD_COMPAT_TEMPLATE_MODELS: List[tuple[str, tuple[str, ...]]] = [
    ("gpt-5.4-mini", ("gpt-5.3-codex", "gpt-5.2-codex")),
    ("gpt-5.4", ("gpt-5.3-codex", "gpt-5.2-codex")),
    ("gpt-5.3-codex", ("gpt-5.2-codex",)),
-    ("gpt-5.3-codex-spark", ("gpt-5.3-codex", "gpt-5.2-codex")),
 ]


@@ -497,9 +497,8 @@ def _collect_gateway_skill_entries(
    # --- Tier 1: Plugin slash commands (never trimmed) ---------------------
    plugin_pairs: list[tuple[str, str]] = []
    try:
-        from hermes_cli.plugins import get_plugin_manager
-        pm = get_plugin_manager()
-        plugin_cmds = getattr(pm, "_plugin_commands", {})
+        from hermes_cli.plugins import get_plugin_commands
+        plugin_cmds = get_plugin_commands()
        for cmd_name in sorted(plugin_cmds):
            name = sanitize_name(cmd_name) if sanitize_name else cmd_name
            if not name:
@@ -925,12 +924,22 @@ class SlashCommandCompleter(Completer):
                    display_meta=meta,
                )

-        # If the user typed @file: or @folder:, delegate to path completions
+        # If the user typed @file: / @folder: (or just @file / @folder with
+        # no colon yet), delegate to path completions.  Accepting the bare
+        # form lets the picker surface directories as soon as the user has
+        # typed `@folder`, without requiring them to first accept the static
+        # `@folder:` hint and re-trigger completion.
        for prefix in ("@file:", "@folder:"):
-            if word.startswith(prefix):
-                path_part = word[len(prefix):] or "."
+            bare = prefix[:-1]
+
+            if word == bare or word.startswith(prefix):
+                want_dir = prefix == "@folder:"
+                path_part = '' if word == bare else word[len(prefix):]
                expanded = os.path.expanduser(path_part)
-                if expanded.endswith("/"):
+
+                if not expanded or expanded == ".":
+                    search_dir, match_prefix = ".", ""
+                elif expanded.endswith("/"):
                    search_dir, match_prefix = expanded, ""
                else:
                    search_dir = os.path.dirname(expanded) or "."
@@ -946,15 +955,21 @@ class SlashCommandCompleter(Completer):
                for entry in sorted(entries):
                    if match_prefix and not entry.lower().startswith(prefix_lower):
                        continue
-                    if count >= limit:
-                        break
                    full_path = os.path.join(search_dir, entry)
                    is_dir = os.path.isdir(full_path)
+                    # `@folder:` must only surface directories; `@file:` only
+                    # regular files.  Without this filter `@folder:` listed
+                    # every .env / .gitignore in the cwd, defeating the
+                    # explicit prefix and confusing users expecting a
+                    # directory picker.
+                    if want_dir != is_dir:
+                        continue
+                    if count >= limit:
+                        break
                    display_path = os.path.relpath(full_path)
                    suffix = "/" if is_dir else ""
-                    kind = "folder" if is_dir else "file"
                    meta = "dir" if is_dir else _file_size_label(full_path)
-                    completion = f"@{kind}:{display_path}{suffix}"
+                    completion = f"{prefix}{display_path}{suffix}"
                    yield Completion(
                        completion,
                        start_position=-len(word),
@@ -13,6 +13,7 @@ This module provides:
 """

 import copy
+import logging
 import os
 import platform
 import re
@@ -24,6 +25,7 @@ from dataclasses import dataclass
 from pathlib import Path
 from typing import Dict, Any, Optional, List, Tuple

+logger = logging.getLogger(__name__)

 _IS_WINDOWS = platform.system() == "Windows"
 _ENV_VAR_NAME_RE = re.compile(r"^[A-Za-z_][A-Za-z0-9_]*$")
@@ -385,6 +387,26 @@ DEFAULT_CONFIG = {
        # (terminal and execute_code).  Skill-declared required_environment_variables
        # are passed through automatically; this list is for non-skill use cases.
        "env_passthrough": [],
+        # Extra files to source in the login shell when building the
+        # per-session environment snapshot.  Use this when tools like nvm,
+        # pyenv, asdf, or custom PATH entries are registered by files that
+        # a bash login shell would skip — most commonly ``~/.bashrc``
+        # (bash doesn't source bashrc in non-interactive login mode) or
+        # zsh-specific files like ``~/.zshrc`` / ``~/.zprofile``.
+        # Paths support ``~`` / ``${VAR}``. Missing files are silently
+        # skipped. When empty, Hermes auto-appends ``~/.bashrc`` if the
+        # snapshot shell is bash (this is the ``auto_source_bashrc``
+        # behaviour — disable with that key if you want strict login-only
+        # semantics).
+        "shell_init_files": [],
+        # When true (default), Hermes sources ``~/.bashrc`` in the login
+        # shell used to build the environment snapshot.  This captures
+        # PATH additions, shell functions, and aliases defined in the
+        # user's bashrc — which a plain ``bash -l -c`` would otherwise
+        # miss because bash skips bashrc in non-interactive login mode.
+        # Turn this off if you have a bashrc that misbehaves when sourced
+        # non-interactively (e.g. one that hard-exits on TTY checks).
+        "auto_source_bashrc": True,
        "docker_image": "nikolaik/python-nodejs:python3.11-nodejs20",
        "docker_forward_env": [],
        # Explicit environment variables to set inside Docker containers.
@@ -591,6 +613,10 @@ DEFAULT_CONFIG = {
    },
    
    # Text-to-speech configuration
+    # Each provider supports an optional `max_text_length:` override for the
+    # per-request input-character cap. Omit it to use the provider's documented
+    # limit (OpenAI 4096, xAI 15000, MiniMax 10000, ElevenLabs 5k-40k model-aware,
+    # Gemini 5000, Edge 5000, Mistral 4000, NeuTTS/KittenTTS 2000).
    "tts": {
        "provider": "edge",  # "edge" (free) | "elevenlabs" (premium) | "openai" | "xai" | "minimax" | "mistral" | "neutts" (local)
        "edge": {
@@ -643,6 +669,7 @@ DEFAULT_CONFIG = {
        "record_key": "ctrl+b",
        "max_recording_seconds": 120,
        "auto_tts": False,
+        "beep_enabled": True,         # Play record start/stop beeps in CLI voice mode
        "silence_threshold": 200,     # RMS below this = silence (0-32767)
        "silence_duration": 3.0,      # Seconds of silence before auto-stop
    },
@@ -689,6 +716,12 @@ DEFAULT_CONFIG = {
                               # independent of the parent's max_iterations)
        "reasoning_effort": "",  # reasoning effort for subagents: "xhigh", "high", "medium",
                                 # "low", "minimal", "none" (empty = inherit parent's level)
+        "max_concurrent_children": 3,  # max parallel children per batch; floor of 1 enforced, no ceiling
+        # Orchestrator role controls (see tools/delegate_tool.py:_get_max_spawn_depth
+        # and _get_orchestrator_enabled).  Values are clamped to [1, 3] with a
+        # warning log if out of range.
+        "max_spawn_depth": 1,        # depth cap (1 = flat [default], 2 = orchestrator→leaf, 3 = three-level)
+        "orchestrator_enabled": True,  # kill switch for role="orchestrator"
    },

    # Ephemeral prefill messages file — JSON list of {role, content} dicts
@@ -701,6 +734,20 @@ DEFAULT_CONFIG = {
    # always goes to ~/.hermes/skills/.
    "skills": {
        "external_dirs": [],   # e.g. ["~/.agents/skills", "/shared/team-skills"]
+        # Substitute ${HERMES_SKILL_DIR} and ${HERMES_SESSION_ID} in SKILL.md
+        # content with the absolute skill directory and the active session id
+        # before the agent sees it.  Lets skill authors reference bundled
+        # scripts without the agent having to join paths.
+        "template_vars": True,
+        # Pre-execute inline shell snippets written as !`cmd` in SKILL.md
+        # body.  Their stdout is inlined into the skill message before the
+        # agent reads it, so skills can inject dynamic context (dates, git
+        # state, detected tool versions, …).  Off by default because any
+        # content from the skill author runs on the host without approval;
+        # only enable for skill sources you trust.
+        "inline_shell": False,
+        # Timeout (seconds) for each !`cmd` snippet when inline_shell is on.
+        "inline_shell_timeout": 10,
    },

    # Honcho AI-native memory -- reads ~/.honcho/config.json as single source of truth.
@@ -771,6 +818,21 @@ DEFAULT_CONFIG = {
    "command_allowlist": [],
    # User-defined quick commands that bypass the agent loop (type: exec only)
    "quick_commands": {},
+
+    # Shell-script hooks — declarative bridge that invokes shell scripts
+    # on plugin-hook events (pre_tool_call, post_tool_call, pre_llm_call,
+    # subagent_stop, etc.).  Each entry maps an event name to a list of
+    # {matcher, command, timeout} dicts.  First registration of a new
+    # command prompts the user for consent; subsequent runs reuse the
+    # stored approval from ~/.hermes/shell-hooks-allowlist.json.
+    # See `website/docs/user-guide/features/hooks.md` for schema + examples.
+    "hooks": {},
+
+    # Auto-accept shell-hook registrations without a TTY prompt.  Also
+    # toggleable per-invocation via --accept-hooks or HERMES_ACCEPT_HOOKS=1.
+    # Gateway / cron / non-interactive runs need this (or one of the other
+    # channels) to pick up newly-added hooks.
+    "hooks_auto_accept": False,
    # Custom personalities — add your own entries here
    # Supports string format: {"name": "system prompt"}
    # Or dict format: {"name": {"description": "...", "system_prompt": "...", "tone": "...", "style": "..."}}
@@ -794,6 +856,11 @@ DEFAULT_CONFIG = {
        # Wrap delivered cron responses with a header (task name) and footer
        # ("The agent cannot see this message").  Set to false for clean output.
        "wrap_response": True,
+        # Maximum number of due jobs to run in parallel per tick.
+        # null/0 = unbounded (limited only by thread count).
+        # 1 = serial (pre-v0.9 behaviour).
+        # Also overridable via HERMES_CRON_MAX_PARALLEL env var.
+        "max_parallel_jobs": None,
    },

    # execute_code settings — controls the tool used for programmatic tool calls.
@@ -827,7 +894,7 @@ DEFAULT_CONFIG = {
    },

    # Config schema version - bump this when adding new required fields
-    "_config_version": 20,
+    "_config_version": 22,
 }

 # =============================================================================
@@ -1850,12 +1917,53 @@ def _normalize_custom_provider_entry(
    if not isinstance(entry, dict):
        return None

+    # Accept camelCase aliases commonly used in hand-written configs.
+    _CAMEL_ALIASES: Dict[str, str] = {
+        "apiKey": "api_key",
+        "baseUrl": "base_url",
+        "apiMode": "api_mode",
+        "keyEnv": "key_env",
+        "defaultModel": "default_model",
+        "contextLength": "context_length",
+        "rateLimitDelay": "rate_limit_delay",
+    }
+    _KNOWN_KEYS = {
+        "name", "api", "url", "base_url", "api_key", "key_env",
+        "api_mode", "transport", "model", "default_model", "models",
+        "context_length", "rate_limit_delay",
+    }
+    for camel, snake in _CAMEL_ALIASES.items():
+        if camel in entry and snake not in entry:
+            logger.warning(
+                "providers.%s: camelCase key '%s' auto-mapped to '%s' "
+                "(use snake_case to avoid this warning)",
+                provider_key or "?", camel, snake,
+            )
+            entry[snake] = entry[camel]
+    unknown = set(entry.keys()) - _KNOWN_KEYS - set(_CAMEL_ALIASES.keys())
+    if unknown:
+        logger.warning(
+            "providers.%s: unknown config keys ignored: %s",
+            provider_key or "?", ", ".join(sorted(unknown)),
+        )
+
+    from urllib.parse import urlparse
+
    base_url = ""
-    for url_key in ("api", "url", "base_url"):
+    for url_key in ("base_url", "url", "api"):
        raw_url = entry.get(url_key)
        if isinstance(raw_url, str) and raw_url.strip():
-            base_url = raw_url.strip()
-            break
+            candidate = raw_url.strip()
+            parsed = urlparse(candidate)
+            if parsed.scheme and parsed.netloc:
+                base_url = candidate
+                break
+            else:
+                logger.warning(
+                    "providers.%s: '%s' value '%s' is not a valid URL "
+                    "(no scheme or host) — skipped",
+                    provider_key or "?", url_key, candidate,
+                )
    if not base_url:
        return None

@@ -2151,7 +2259,6 @@ def print_config_warnings(config: Optional[Dict[str, Any]] = None) -> None:
    if not issues:
        return

-    import sys
    lines = ["\033[33m⚠ Config issues detected in config.yaml:\033[0m"]
    for ci in issues:
        marker = "\033[31m✗\033[0m" if ci.severity == "error" else "\033[33m⚠\033[0m"
@@ -2166,7 +2273,6 @@ def warn_deprecated_cwd_env_vars(config: Optional[Dict[str, Any]] = None) -> Non
    These env vars are deprecated — the canonical setting is terminal.cwd
    in config.yaml.  Prints a migration hint to stderr.
    """
-    import os, sys
    messaging_cwd = os.environ.get("MESSAGING_CWD")
    terminal_cwd_env = os.environ.get("TERMINAL_CWD")

@@ -2484,6 +2590,71 @@ def migrate_config(interactive: bool = True, quiet: bool = False) -> Dict[str, A
                    else:
                        print("  ✓ Removed unused compression.summary_* keys")

+    # ── Version 20 → 21: plugins are now opt-in; grandfather existing user plugins ──
+    # The loader now requires plugins to appear in ``plugins.enabled`` before
+    # loading. Existing installs had all discovered plugins loading by default
+    # (minus anything in ``plugins.disabled``). To avoid silently breaking
+    # those setups on upgrade, populate ``plugins.enabled`` with the set of
+    # currently-installed user plugins that aren't already disabled.
+    #
+    # Bundled plugins (shipped in the repo itself) are NOT grandfathered —
+    # they ship off for everyone, including existing users, so any user who
+    # wants one has to opt in explicitly.
+    if current_ver < 21:
+        config = read_raw_config()
+        plugins_cfg = config.get("plugins")
+        if not isinstance(plugins_cfg, dict):
+            plugins_cfg = {}
+        # Only migrate if the enabled allow-list hasn't been set yet.
+        if "enabled" not in plugins_cfg:
+            disabled = plugins_cfg.get("disabled", []) or []
+            if not isinstance(disabled, list):
+                disabled = []
+            disabled_set = set(disabled)
+
+            # Scan ``$HERMES_HOME/plugins/`` for currently installed user plugins.
+            grandfathered: List[str] = []
+            try:
+                user_plugins_dir = get_hermes_home() / "plugins"
+                if user_plugins_dir.is_dir():
+                    for child in sorted(user_plugins_dir.iterdir()):
+                        if not child.is_dir():
+                            continue
+                        manifest_file = child / "plugin.yaml"
+                        if not manifest_file.exists():
+                            manifest_file = child / "plugin.yml"
+                        if not manifest_file.exists():
+                            continue
+                        try:
+                            with open(manifest_file) as _mf:
+                                manifest = yaml.safe_load(_mf) or {}
+                        except Exception:
+                            manifest = {}
+                        name = manifest.get("name") or child.name
+                        if name in disabled_set:
+                            continue
+                        grandfathered.append(name)
+            except Exception:
+                grandfathered = []
+
+            plugins_cfg["enabled"] = grandfathered
+            config["plugins"] = plugins_cfg
+            save_config(config)
+            results["config_added"].append(
+                f"plugins.enabled (opt-in allow-list, {len(grandfathered)} grandfathered)"
+            )
+            if not quiet:
+                if grandfathered:
+                    print(
+                        f"  ✓ Plugins now opt-in: grandfathered "
+                        f"{len(grandfathered)} existing plugin(s) into plugins.enabled"
+                    )
+                else:
+                    print(
+                        "  ✓ Plugins now opt-in: no existing plugins to grandfather. "
+                        "Use `hermes plugins enable <name>` to activate."
+                    )
+
    if current_ver < latest_ver and not quiet:
        print(f"Config version: {current_ver} → {latest_ver}")
    
@@ -3109,7 +3280,6 @@ def _check_non_ascii_credential(key: str, value: str) -> str:
            bad_chars.append(f"  position {i}: {ch!r} (U+{ord(ch):04X})")
    sanitized = value.encode("ascii", errors="ignore").decode("ascii")

-    import sys
    print(
        f"\n  Warning: {key} contains non-ASCII characters that will break API requests.\n"
        f"  This usually happens when copy-pasting from a PDF, rich-text editor,\n"
@@ -30,6 +30,7 @@ load_dotenv(PROJECT_ROOT / ".env", override=False, encoding="utf-8")

 from hermes_cli.colors import Colors, color
 from hermes_constants import OPENROUTER_MODELS_URL
+from utils import base_url_host_matches


 _PROVIDER_ENV_HINTS = (
@@ -942,18 +943,22 @@ def run_doctor(args):
            try:
                import httpx
                _base = os.getenv(_base_env, "") if _base_env else ""
-                # Auto-detect Kimi Code keys (sk-kimi-) → api.kimi.com
+                # Auto-detect Kimi Code keys (sk-kimi-) → api.kimi.com/coding/v1
+                # (OpenAI-compat surface, which exposes /models for health check).
                if not _base and _key.startswith("sk-kimi-"):
                    _base = "https://api.kimi.com/coding/v1"
-                # Anthropic-compat endpoints (/anthropic) don't support /models.
-                # Rewrite to the OpenAI-compat /v1 surface for health checks.
+                # Anthropic-compat endpoints (/anthropic, api.kimi.com/coding
+                # with no /v1) don't support /models.  Rewrite to the OpenAI-compat
+                # /v1 surface for health checks.
                if _base and _base.rstrip("/").endswith("/anthropic"):
                    from agent.auxiliary_client import _to_openai_base_url
                    _base = _to_openai_base_url(_base)
+                if base_url_host_matches(_base, "api.kimi.com") and _base.rstrip("/").endswith("/coding"):
+                    _base = _base.rstrip("/") + "/v1"
                _url = (_base.rstrip("/") + "/models") if _base else _default_url
                _headers = {"Authorization": f"Bearer {_key}"}
-                if "api.kimi.com" in _url.lower():
-                    _headers["User-Agent"] = "KimiCLI/1.30.0"
+                if base_url_host_matches(_base, "api.kimi.com"):
+                    _headers["User-Agent"] = "claude-code/0.1.0"
                _resp = httpx.get(
                    _url,
                    headers=_headers,
@@ -3,6 +3,7 @@
 from __future__ import annotations

 import os
+import sys
 from pathlib import Path

 from dotenv import load_dotenv
@@ -14,6 +15,26 @@ from dotenv import load_dotenv
 # pure ASCII (they become HTTP header values).
 _CREDENTIAL_SUFFIXES = ("_API_KEY", "_TOKEN", "_SECRET", "_KEY")

+# Names we've already warned about during this process, so repeated
+# load_hermes_dotenv() calls (user env + project env, gateway hot-reload,
+# tests) don't spam the same warning multiple times.
+_WARNED_KEYS: set[str] = set()
+
+
+def _format_offending_chars(value: str, limit: int = 3) -> str:
+    """Return a compact 'U+XXXX ('c'), ...' summary of non-ASCII codepoints."""
+    seen: list[str] = []
+    for ch in value:
+        if ord(ch) > 127:
+            label = f"U+{ord(ch):04X}"
+            if ch.isprintable():
+                label += f" ({ch!r})"
+            if label not in seen:
+                seen.append(label)
+            if len(seen) >= limit:
+                break
+    return ", ".join(seen)
+

 def _sanitize_loaded_credentials() -> None:
    """Strip non-ASCII characters from credential env vars in os.environ.
@@ -21,14 +42,42 @@ def _sanitize_loaded_credentials() -> None:
    Called after dotenv loads so the rest of the codebase never sees
    non-ASCII API keys.  Only touches env vars whose names end with
    known credential suffixes (``_API_KEY``, ``_TOKEN``, etc.).
+
+    Emits a one-line warning to stderr when characters are stripped.
+    Silent stripping would mask copy-paste corruption (Unicode lookalike
+    glyphs from PDFs / rich-text editors, ZWSP from web pages) as opaque
+    provider-side "invalid API key" errors (see #6843).
    """
    for key, value in list(os.environ.items()):
        if not any(key.endswith(suffix) for suffix in _CREDENTIAL_SUFFIXES):
            continue
        try:
            value.encode("ascii")
+            continue
        except UnicodeEncodeError:
-            os.environ[key] = value.encode("ascii", errors="ignore").decode("ascii")
+            pass
+        cleaned = value.encode("ascii", errors="ignore").decode("ascii")
+        os.environ[key] = cleaned
+        if key in _WARNED_KEYS:
+            continue
+        _WARNED_KEYS.add(key)
+        stripped = len(value) - len(cleaned)
+        detail = _format_offending_chars(value) or "non-printable"
+        print(
+            f"  Warning: {key} contained {stripped} non-ASCII character"
+            f"{'s' if stripped != 1 else ''} ({detail}) — stripped so the "
+            f"key can be sent as an HTTP header.",
+            file=sys.stderr,
+        )
+        print(
+            "  This usually means the key was copy-pasted from a PDF, "
+            "rich-text editor, or web page that substituted lookalike\n"
+            "  Unicode glyphs for ASCII letters. If authentication fails "
+            "(e.g. \"API key not valid\"), re-copy the key from the\n"
+            "  provider's dashboard and run `hermes setup` (or edit the "
+            ".env file in a plain-text editor).",
+            file=sys.stderr,
+        )


 def _load_dotenv_with_fallback(path: Path, *, override: bool) -> None:
@@ -994,8 +994,6 @@ def get_systemd_linger_status() -> tuple[bool | None, str]:
    if not is_linux():
        return None, "not supported on this platform"

-    import shutil
-
    if not shutil.which("loginctl"):
        return None, "loginctl not found"

@@ -1347,7 +1345,6 @@ def _ensure_linger_enabled() -> None:
        return

    import getpass
-    import shutil

    username = getpass.getuser()
    linger_file = Path(f"/var/lib/systemd/linger/{username}")
@@ -1656,7 +1653,6 @@ def get_launchd_label() -> str:


 def _launchd_domain() -> str:
-    import os
    return f"gui/{os.getuid()}"


@@ -0,0 +1,385 @@
+"""hermes hooks — inspect and manage shell-script hooks.
+
+Usage::
+
+    hermes hooks list
+    hermes hooks test <event> [--for-tool X] [--payload-file F]
+    hermes hooks revoke <command>
+    hermes hooks doctor
+
+Consent records live under ``~/.hermes/shell-hooks-allowlist.json`` and
+hook definitions come from the ``hooks:`` block in ``~/.hermes/config.yaml``
+(the same config read by the CLI / gateway at startup).
+
+This module is a thin CLI shell over :mod:`agent.shell_hooks`; every
+shared concern (payload serialisation, response parsing, allowlist
+format) lives there.
+"""
+
+from __future__ import annotations
+
+import json
+import os
+from pathlib import Path
+from typing import Any, Dict, List, Optional
+
+
+def hooks_command(args) -> None:
+    """Entry point for ``hermes hooks`` — dispatches to the requested action."""
+    sub = getattr(args, "hooks_action", None)
+
+    if not sub:
+        print("Usage: hermes hooks {list|test|revoke|doctor}")
+        print("Run 'hermes hooks --help' for details.")
+        return
+
+    if sub in ("list", "ls"):
+        _cmd_list(args)
+    elif sub == "test":
+        _cmd_test(args)
+    elif sub in ("revoke", "remove", "rm"):
+        _cmd_revoke(args)
+    elif sub == "doctor":
+        _cmd_doctor(args)
+    else:
+        print(f"Unknown hooks subcommand: {sub}")
+
+
+# ---------------------------------------------------------------------------
+# list
+# ---------------------------------------------------------------------------
+
+def _cmd_list(_args) -> None:
+    from hermes_cli.config import load_config
+    from agent import shell_hooks
+
+    specs = shell_hooks.iter_configured_hooks(load_config())
+
+    if not specs:
+        print("No shell hooks configured in ~/.hermes/config.yaml.")
+        print("See `hermes hooks --help` or")
+        print("    website/docs/user-guide/features/hooks.md")
+        print("for the config schema and worked examples.")
+        return
+
+    by_event: Dict[str, List] = {}
+    for spec in specs:
+        by_event.setdefault(spec.event, []).append(spec)
+
+    allowlist = shell_hooks.load_allowlist()
+    approved = {
+        (e.get("event"), e.get("command"))
+        for e in allowlist.get("approvals", [])
+        if isinstance(e, dict)
+    }
+
+    print(f"Configured shell hooks ({len(specs)} total):\n")
+
+    for event in sorted(by_event.keys()):
+        print(f"  [{event}]")
+        for spec in by_event[event]:
+            is_approved = (spec.event, spec.command) in approved
+            status = "✓ allowed" if is_approved else "✗ not allowlisted"
+            matcher_part = f" matcher={spec.matcher!r}" if spec.matcher else ""
+            print(
+                f"    - {spec.command}{matcher_part} "
+                f"(timeout={spec.timeout}s, {status})"
+            )
+
+            if is_approved:
+                entry = shell_hooks.allowlist_entry_for(spec.event, spec.command)
+                if entry and entry.get("approved_at"):
+                    print(f"      approved_at: {entry['approved_at']}")
+                    mtime_now = shell_hooks.script_mtime_iso(spec.command)
+                    mtime_at = entry.get("script_mtime_at_approval")
+                    if mtime_now and mtime_at and mtime_now > mtime_at:
+                        print(
+                            f"      ⚠ script modified since approval "
+                            f"(was {mtime_at}, now {mtime_now}) — "
+                            f"run `hermes hooks doctor` to re-validate"
+                        )
+        print()
+
+
+# ---------------------------------------------------------------------------
+# test
+# ---------------------------------------------------------------------------
+
+# Synthetic kwargs matching the real invoke_hook() call sites — these are
+# passed verbatim to agent.shell_hooks.run_once(), which routes them through
+# the same _serialize_payload() that production firings use.  That way the
+# stdin a script sees under `hermes hooks test` and `hermes hooks doctor`
+# is identical in shape to what it will see at runtime.
+_DEFAULT_PAYLOADS = {
+    "pre_tool_call": {
+        "tool_name": "terminal",
+        "args": {"command": "echo hello"},
+        "session_id": "test-session",
+        "task_id": "test-task",
+        "tool_call_id": "test-call",
+    },
+    "post_tool_call": {
+        "tool_name": "terminal",
+        "args": {"command": "echo hello"},
+        "session_id": "test-session",
+        "task_id": "test-task",
+        "tool_call_id": "test-call",
+        "result": '{"output": "hello"}',
+    },
+    "pre_llm_call": {
+        "session_id": "test-session",
+        "user_message": "What is the weather?",
+        "conversation_history": [],
+        "is_first_turn": True,
+        "model": "gpt-4",
+        "platform": "cli",
+    },
+    "post_llm_call": {
+        "session_id": "test-session",
+        "model": "gpt-4",
+        "platform": "cli",
+    },
+    "on_session_start": {"session_id": "test-session"},
+    "on_session_end": {"session_id": "test-session"},
+    "on_session_finalize": {"session_id": "test-session"},
+    "on_session_reset": {"session_id": "test-session"},
+    "pre_api_request": {
+        "session_id": "test-session",
+        "task_id": "test-task",
+        "platform": "cli",
+        "model": "claude-sonnet-4-6",
+        "provider": "anthropic",
+        "base_url": "https://api.anthropic.com",
+        "api_mode": "anthropic_messages",
+        "api_call_count": 1,
+        "message_count": 4,
+        "tool_count": 12,
+        "approx_input_tokens": 2048,
+        "request_char_count": 8192,
+        "max_tokens": 4096,
+    },
+    "post_api_request": {
+        "session_id": "test-session",
+        "task_id": "test-task",
+        "platform": "cli",
+        "model": "claude-sonnet-4-6",
+        "provider": "anthropic",
+        "base_url": "https://api.anthropic.com",
+        "api_mode": "anthropic_messages",
+        "api_call_count": 1,
+        "api_duration": 1.234,
+        "finish_reason": "stop",
+        "message_count": 4,
+        "response_model": "claude-sonnet-4-6",
+        "usage": {"input_tokens": 2048, "output_tokens": 512},
+        "assistant_content_chars": 1200,
+        "assistant_tool_call_count": 0,
+    },
+    "subagent_stop": {
+        "parent_session_id": "parent-sess",
+        "child_role": None,
+        "child_summary": "Synthetic summary for hooks test",
+        "child_status": "completed",
+        "duration_ms": 1234,
+    },
+}
+
+
+def _cmd_test(args) -> None:
+    from hermes_cli.config import load_config
+    from hermes_cli.plugins import VALID_HOOKS
+    from agent import shell_hooks
+
+    event = args.event
+    if event not in VALID_HOOKS:
+        print(f"Unknown event: {event!r}")
+        print(f"Valid events: {', '.join(sorted(VALID_HOOKS))}")
+        return
+
+    # Synthetic kwargs in the same shape invoke_hook() would pass.  Merged
+    # with --for-tool (overrides tool_name) and --payload-file (extra kwargs).
+    payload = dict(_DEFAULT_PAYLOADS.get(event, {"session_id": "test-session"}))
+
+    if getattr(args, "for_tool", None):
+        payload["tool_name"] = args.for_tool
+
+    if getattr(args, "payload_file", None):
+        try:
+            custom = json.loads(Path(args.payload_file).read_text())
+            if isinstance(custom, dict):
+                payload.update(custom)
+            else:
+                print(f"Warning: {args.payload_file} is not a JSON object; ignoring")
+        except Exception as exc:
+            print(f"Error reading payload file: {exc}")
+            return
+
+    specs = shell_hooks.iter_configured_hooks(load_config())
+    specs = [s for s in specs if s.event == event]
+
+    if getattr(args, "for_tool", None):
+        specs = [
+            s for s in specs
+            if s.event not in ("pre_tool_call", "post_tool_call")
+            or s.matches_tool(args.for_tool)
+        ]
+
+    if not specs:
+        print(f"No shell hooks configured for event: {event}")
+        if getattr(args, "for_tool", None):
+            print(f"(with matcher filter --for-tool={args.for_tool})")
+        return
+
+    print(f"Firing {len(specs)} hook(s) for event '{event}':\n")
+    for spec in specs:
+        print(f"  → {spec.command}")
+        result = shell_hooks.run_once(spec, payload)
+        _print_run_result(result)
+        print()
+
+
+def _print_run_result(result: Dict[str, Any]) -> None:
+    if result.get("error"):
+        print(f"      ✗ error: {result['error']}")
+        return
+    if result.get("timed_out"):
+        print(f"      ✗ timed out after {result['elapsed_seconds']}s")
+        return
+
+    rc = result.get("returncode")
+    elapsed = result.get("elapsed_seconds", 0)
+    print(f"      exit={rc}  elapsed={elapsed}s")
+
+    stdout = (result.get("stdout") or "").strip()
+    stderr = (result.get("stderr") or "").strip()
+    if stdout:
+        print(f"      stdout: {_truncate(stdout, 400)}")
+    if stderr:
+        print(f"      stderr: {_truncate(stderr, 400)}")
+
+    parsed = result.get("parsed")
+    if parsed:
+        print(f"      parsed (Hermes wire shape): {json.dumps(parsed)}")
+    else:
+        print("      parsed: <none — hook contributed nothing to the dispatcher>")
+
+
+def _truncate(s: str, n: int) -> str:
+    return s if len(s) <= n else s[: n - 3] + "..."
+
+
+# ---------------------------------------------------------------------------
+# revoke
+# ---------------------------------------------------------------------------
+
+def _cmd_revoke(args) -> None:
+    from agent import shell_hooks
+
+    removed = shell_hooks.revoke(args.command)
+    if removed == 0:
+        print(f"No allowlist entry found for command: {args.command}")
+        return
+    print(f"Removed {removed} allowlist entry/entries for: {args.command}")
+    print(
+        "Note: currently running CLI / gateway processes keep their "
+        "already-registered callbacks until they restart."
+    )
+
+
+# ---------------------------------------------------------------------------
+# doctor
+# ---------------------------------------------------------------------------
+
+def _cmd_doctor(_args) -> None:
+    from hermes_cli.config import load_config
+    from agent import shell_hooks
+
+    specs = shell_hooks.iter_configured_hooks(load_config())
+
+    if not specs:
+        print("No shell hooks configured — nothing to check.")
+        return
+
+    print(f"Checking {len(specs)} configured shell hook(s)...\n")
+
+    problems = 0
+    for spec in specs:
+        print(f"  [{spec.event}] {spec.command}")
+        problems += _doctor_one(spec, shell_hooks)
+        print()
+
+    if problems:
+        print(f"{problems} issue(s) found.  Fix before relying on these hooks.")
+    else:
+        print("All shell hooks look healthy.")
+
+
+def _doctor_one(spec, shell_hooks) -> int:
+    problems = 0
+
+    # 1. Script exists and is executable
+    if shell_hooks.script_is_executable(spec.command):
+        print("      ✓ script exists and is executable")
+    else:
+        problems += 1
+        print("      ✗ script missing or not executable "
+              "(chmod +x the file, or fix the path)")
+
+    # 2. Allowlist status
+    entry = shell_hooks.allowlist_entry_for(spec.event, spec.command)
+    if entry:
+        print(f"      ✓ allowlisted (approved {entry.get('approved_at', '?')})")
+    else:
+        problems += 1
+        print("      ✗ not allowlisted — hook will NOT fire at runtime "
+              "(run with --accept-hooks once, or confirm at the TTY prompt)")
+
+    # 3. Mtime drift
+    if entry and entry.get("script_mtime_at_approval"):
+        mtime_now = shell_hooks.script_mtime_iso(spec.command)
+        mtime_at = entry["script_mtime_at_approval"]
+        if mtime_now and mtime_at and mtime_now > mtime_at:
+            problems += 1
+            print(f"      ⚠ script modified since approval "
+                  f"(was {mtime_at}, now {mtime_now}) — review changes, "
+                  f"then `hermes hooks revoke` + re-approve to refresh")
+        elif mtime_now and mtime_at and mtime_now == mtime_at:
+            print("      ✓ script unchanged since approval")
+
+    # 4. Produces valid JSON for a synthetic payload — only when the entry
+    # is already allowlisted.  Otherwise `hermes hooks doctor` would execute
+    # every script listed in a freshly-pulled config before the user has
+    # reviewed them, which directly contradicts the documented workflow
+    # ("spot newly-added hooks *before they register*").
+    if not entry:
+        print("      ℹ skipped JSON smoke test — not allowlisted yet. "
+              "Approve the hook first (via TTY prompt or --accept-hooks), "
+              "then re-run `hermes hooks doctor`.")
+    elif shell_hooks.script_is_executable(spec.command):
+        payload = _DEFAULT_PAYLOADS.get(spec.event, {"extra": {}})
+        result = shell_hooks.run_once(spec, payload)
+        if result.get("timed_out"):
+            problems += 1
+            print(f"      ✗ timed out after {result['elapsed_seconds']}s "
+                  f"on synthetic payload (timeout={spec.timeout}s)")
+        elif result.get("error"):
+            problems += 1
+            print(f"      ✗ execution error: {result['error']}")
+        else:
+            rc = result.get("returncode")
+            elapsed = result.get("elapsed_seconds", 0)
+            stdout = (result.get("stdout") or "").strip()
+            if stdout:
+                try:
+                    json.loads(stdout)
+                    print(f"      ✓ produced valid JSON on synthetic payload "
+                          f"(exit={rc}, {elapsed}s)")
+                except json.JSONDecodeError:
+                    problems += 1
+                    print(f"      ✗ stdout was not valid JSON (exit={rc}, "
+                          f"{elapsed}s): {_truncate(stdout, 120)}")
+            else:
+                print(f"      ✓ ran clean with empty stdout "
+                      f"(exit={rc}, {elapsed}s) — hook is observer-only")
+
+    return problems
@@ -51,6 +51,19 @@ import sys
 from pathlib import Path
 from typing import Optional

+def _add_accept_hooks_flag(parser) -> None:
+    """Attach the ``--accept-hooks`` flag.  Shared across every agent
+    subparser so the flag works regardless of CLI position."""
+    parser.add_argument(
+        "--accept-hooks",
+        action="store_true",
+        default=argparse.SUPPRESS,
+        help=(
+            "Auto-approve unseen shell hooks without a TTY prompt "
+            "(equivalent to HERMES_ACCEPT_HOOKS=1 / hooks_auto_accept: true)."
+        ),
+    )
+

 def _require_tty(command_name: str) -> None:
    """Exit with a clear error if stdin is not a terminal.
@@ -180,7 +193,7 @@ import time as _time
 from datetime import datetime

 from hermes_cli import __version__, __release_date__
-from hermes_constants import OPENROUTER_BASE_URL
+from hermes_constants import AI_GATEWAY_BASE_URL, OPENROUTER_BASE_URL

 logger = logging.getLogger(__name__)

@@ -605,7 +618,6 @@ def _exec_in_container(container_info: dict, cli_args: list):
        container_info: dict with backend, container_name, exec_user, hermes_bin
        cli_args: the original CLI arguments (everything after 'hermes')
    """
-    import shutil

    backend = container_info["backend"]
    container_name = container_info["container_name"]
@@ -1003,6 +1015,17 @@ def _launch_tui(resume_session_id: Optional[str] = None, tui_dev: bool = False):
    )
    env.setdefault("HERMES_PYTHON", sys.executable)
    env.setdefault("HERMES_CWD", os.getcwd())
+    # Guarantee an 8GB V8 heap + exposed GC for the TUI. Default node cap is
+    # ~1.5–4GB depending on version and can fatal-OOM on long sessions with
+    # large transcripts / reasoning blobs. Token-level merge: respect any
+    # user-supplied --max-old-space-size (they may have set it higher) and
+    # avoid duplicating --expose-gc.
+    _tokens = env.get("NODE_OPTIONS", "").split()
+    if not any(t.startswith("--max-old-space-size=") for t in _tokens):
+        _tokens.append("--max-old-space-size=8192")
+    if "--expose-gc" not in _tokens:
+        _tokens.append("--expose-gc")
+    env["NODE_OPTIONS"] = " ".join(_tokens)
    if resume_session_id:
        env["HERMES_TUI_RESUME"] = resume_session_id

@@ -1157,8 +1180,6 @@ def cmd_gateway(args):
 def cmd_whatsapp(args):
    """Set up WhatsApp: choose mode, configure, install bridge, pair via QR."""
    _require_tty("whatsapp")
-    import subprocess
-    from pathlib import Path
    from hermes_cli.config import get_env_value, save_env_value

    print()
@@ -1267,16 +1288,27 @@ def cmd_whatsapp(args):
        return

    if not (bridge_dir / "node_modules").exists():
-        print("\n→ Installing WhatsApp bridge dependencies...")
-        result = subprocess.run(
-            ["npm", "install"],
-            cwd=str(bridge_dir),
-            capture_output=True,
-            text=True,
-            timeout=120,
-        )
+        print("\n→ Installing WhatsApp bridge dependencies (this can take a few minutes)...")
+        npm = shutil.which("npm")
+        if not npm:
+            print("  ✗ npm not found on PATH — install Node.js first")
+            return
+        try:
+            result = subprocess.run(
+                [npm, "install", "--no-fund", "--no-audit", "--progress=false"],
+                cwd=str(bridge_dir),
+                stdout=subprocess.DEVNULL,
+                stderr=subprocess.PIPE,
+                text=True,
+            )
+        except KeyboardInterrupt:
+            print("\n  ✗ Install cancelled")
+            return
        if result.returncode != 0:
-            print(f"  ✗ npm install failed: {result.stderr}")
+            err = (result.stderr or "").strip()
+            preview = "\n".join(err.splitlines()[-30:]) if err else "(no output)"
+            print("  ✗ npm install failed:")
+            print(preview)
            return
        print("  ✓ Dependencies installed")
    else:
@@ -1295,8 +1327,6 @@ def cmd_whatsapp(args):
        except (EOFError, KeyboardInterrupt):
            response = "n"
        if response.lower() in ("y", "yes"):
-            import shutil
-
            shutil.rmtree(session_dir, ignore_errors=True)
            session_dir.mkdir(parents=True, exist_ok=True)
            print("  ✓ Session cleared")
@@ -1392,8 +1422,6 @@ def select_provider_and_model(args=None):

    # Read effective provider the same way the CLI does at startup:
    # config.yaml model.provider > env var > auto-detect
-    import os
-
    config_provider = None
    model_cfg = config.get("model")
    if isinstance(model_cfg, dict):
@@ -1504,6 +1532,8 @@ def select_provider_and_model(args=None):
    # Step 2: Provider-specific setup + model selection
    if selected_provider == "openrouter":
        _model_flow_openrouter(config, current_model)
+    elif selected_provider == "ai-gateway":
+        _model_flow_ai_gateway(config, current_model)
    elif selected_provider == "nous":
        _model_flow_nous(config, current_model, args=args)
    elif selected_provider == "openai-codex":
@@ -1549,7 +1579,6 @@ def select_provider_and_model(args=None):
        "kilocode",
        "opencode-zen",
        "opencode-go",
-        "ai-gateway",
        "alibaba",
        "huggingface",
        "xiaomi",
@@ -2021,6 +2050,63 @@ def _model_flow_openrouter(config, current_model=""):
        print("No change.")


+def _model_flow_ai_gateway(config, current_model=""):
+    """Vercel AI Gateway provider: ensure API key, then pick model with pricing."""
+    from hermes_cli.auth import (
+        _prompt_model_selection,
+        _save_model_choice,
+        deactivate_provider,
+    )
+    from hermes_cli.config import get_env_value, save_env_value
+
+    api_key = get_env_value("AI_GATEWAY_API_KEY")
+    if not api_key:
+        print("No Vercel AI Gateway API key configured.")
+        print("Create API key here: https://vercel.com/d?to=%2F%5Bteam%5D%2F%7E%2Fai-gateway&title=AI+Gateway")
+        print("Add a payment method to get $5 in free credits.")
+        print()
+        try:
+            import getpass
+
+            key = getpass.getpass("AI Gateway API key (or Enter to cancel): ").strip()
+        except (KeyboardInterrupt, EOFError):
+            print()
+            return
+        if not key:
+            print("Cancelled.")
+            return
+        save_env_value("AI_GATEWAY_API_KEY", key)
+        print("API key saved.")
+        print()
+
+    from hermes_cli.models import ai_gateway_model_ids, get_pricing_for_provider
+
+    models_list = ai_gateway_model_ids(force_refresh=True)
+    pricing = get_pricing_for_provider("ai-gateway", force_refresh=True)
+
+    selected = _prompt_model_selection(
+        models_list, current_model=current_model, pricing=pricing
+    )
+    if selected:
+        _save_model_choice(selected)
+
+        from hermes_cli.config import load_config, save_config
+
+        cfg = load_config()
+        model = cfg.get("model")
+        if not isinstance(model, dict):
+            model = {"default": model} if model else {}
+            cfg["model"] = model
+        model["provider"] = "ai-gateway"
+        model["base_url"] = AI_GATEWAY_BASE_URL
+        model["api_mode"] = "chat_completions"
+        save_config(cfg)
+        deactivate_provider()
+        print(f"Default model set to: {selected} (via Vercel AI Gateway)")
+    else:
+        print("No change.")
+
+
 def _model_flow_nous(config, current_model="", args=None):
    """Nous Portal provider: ensure logged in, then pick model."""
    from hermes_cli.auth import (
@@ -2041,7 +2127,6 @@ def _model_flow_nous(config, current_model="", args=None):
        save_env_value,
    )
    from hermes_cli.nous_subscription import prompt_enable_tool_gateway
-    import argparse

    state = get_provider_auth_state("nous")
    if not state or not state.get("access_token"):
@@ -2080,7 +2165,6 @@ def _model_flow_nous(config, current_model="", args=None):
    from hermes_cli.models import (
        _PROVIDER_MODELS,
        get_pricing_for_provider,
-        filter_nous_free_models,
        check_nous_free_tier,
        partition_nous_models_by_tier,
    )
@@ -2123,10 +2207,8 @@ def _model_flow_nous(config, current_model="", args=None):
    # Check if user is on free tier
    free_tier = check_nous_free_tier()

-    # For both tiers: apply the allowlist filter first (removes non-allowlisted
-    # free models and allowlist models that aren't actually free).
-    # Then for free users: partition remaining models into selectable/unavailable.
-    model_ids = filter_nous_free_models(model_ids, pricing)
+    # For free users: partition models into selectable/unavailable based on
+    # whether they are free per the Portal-reported pricing.
    unavailable_models: list[str] = []
    if free_tier:
        model_ids, unavailable_models = partition_nous_models_by_tier(
@@ -2209,7 +2291,6 @@ def _model_flow_openai_codex(config, current_model=""):
        DEFAULT_CODEX_BASE_URL,
    )
    from hermes_cli.codex_models import get_codex_model_ids
-    import argparse

    status = get_codex_auth_status()
    if not status.get("logged_in"):
@@ -3340,8 +3421,9 @@ def _model_flow_kimi(config, current_model=""):

    # Step 3: Model selection — show appropriate models for the endpoint
    if is_coding_plan:
-        # Coding Plan models (kimi-k2.5 first)
+        # Coding Plan models (kimi-k2.6 first)
        model_list = [
+            "kimi-k2.6",
            "kimi-k2.5",
            "kimi-for-coding",
            "kimi-k2-thinking",
@@ -4080,6 +4162,12 @@ def cmd_webhook(args):
    webhook_command(args)


+def cmd_hooks(args):
+    """Shell-hook inspection and management."""
+    from hermes_cli.hooks import hooks_command
+    hooks_command(args)
+
+
 def cmd_doctor(args):
    """Check configuration and dependencies."""
    from hermes_cli.doctor import run_doctor
@@ -4189,9 +4277,7 @@ def _clear_bytecode_cache(root: Path) -> int:
        ]
        if os.path.basename(dirpath) == "__pycache__":
            try:
-                import shutil as _shutil
-
-                _shutil.rmtree(dirpath)
+                shutil.rmtree(dirpath)
                removed += 1
            except OSError:
                pass
@@ -4230,8 +4316,6 @@ def _gateway_prompt(prompt_text: str, default: str = "", timeout: float = 300.0)
    tmp.replace(prompt_path)

    # Poll for response
-    import time as _time
-
    deadline = _time.monotonic() + timeout
    while _time.monotonic() < deadline:
        if response_path.exists():
@@ -4263,7 +4347,6 @@ def _build_web_ui(web_dir: Path, *, fatal: bool = False) -> bool:
    """
    if not (web_dir / "package.json").exists():
        return True
-    import shutil

    npm = shutil.which("npm")
    if not npm:
@@ -4300,7 +4383,6 @@ def _update_via_zip(args):
    Used on Windows when git file I/O is broken (antivirus, NTFS filter
    drivers causing 'Invalid argument' errors on file creation).
    """
-    import shutil
    import tempfile
    import zipfile
    from urllib.request import urlretrieve
@@ -4377,7 +4459,6 @@ def _update_via_zip(args):
    # breaks on this machine, keep base deps and reinstall the remaining extras
    # individually so update does not silently strip working capabilities.
    print("→ Updating Python dependencies...")
-    import subprocess

    uv_bin = shutil.which("uv")
    if uv_bin:
@@ -5128,9 +5209,11 @@ def _install_hangup_protection(gateway_mode: bool = False):
    # (2) Mirror output to update.log and wrap stdio for broken-pipe
    # tolerance.  Any failure here is non-fatal; we just skip the wrap.
    try:
-        from hermes_cli.config import get_hermes_home
+        # Late-bound import so tests can monkeypatch
+        # hermes_cli.config.get_hermes_home to simulate setup failure.
+        from hermes_cli.config import get_hermes_home as _get_hermes_home

-        logs_dir = get_hermes_home() / "logs"
+        logs_dir = _get_hermes_home() / "logs"
        logs_dir.mkdir(parents=True, exist_ok=True)
        log_path = logs_dir / "update.log"
        log_file = open(log_path, "a", buffering=1, encoding="utf-8")
@@ -5705,8 +5788,6 @@ def _cmd_update_impl(args, gateway_mode: bool):
                                    # Verify the service actually survived the
                                    # restart.  systemctl restart returns 0 even
                                    # if the new process crashes immediately.
-                                    import time as _time
-
                                    _time.sleep(3)
                                    verify = subprocess.run(
                                        scope_cmd + ["is-active", svc_name],
@@ -6359,6 +6440,17 @@ For more help on a command:
        default=False,
        help="Run in an isolated git worktree (for parallel agents)",
    )
+    parser.add_argument(
+        "--accept-hooks",
+        action="store_true",
+        default=False,
+        help=(
+            "Auto-approve any unseen shell hooks declared in config.yaml "
+            "without a TTY prompt.  Equivalent to HERMES_ACCEPT_HOOKS=1 or "
+            "hooks_auto_accept: true in config.yaml.  Use on CI / headless "
+            "runs that can't prompt."
+        ),
+    )
    parser.add_argument(
        "--skills",
        "-s",
@@ -6481,6 +6573,16 @@ For more help on a command:
        default=argparse.SUPPRESS,
        help="Run in an isolated git worktree (for parallel agents on the same repo)",
    )
+    chat_parser.add_argument(
+        "--accept-hooks",
+        action="store_true",
+        default=argparse.SUPPRESS,
+        help=(
+            "Auto-approve any unseen shell hooks declared in config.yaml "
+            "without a TTY prompt (see also HERMES_ACCEPT_HOOKS env var and "
+            "hooks_auto_accept: in config.yaml)."
+        ),
+    )
    chat_parser.add_argument(
        "--checkpoints",
        action="store_true",
@@ -6600,6 +6702,8 @@ For more help on a command:
        action="store_true",
        help="Replace any existing gateway instance (useful for systemd)",
    )
+    _add_accept_hooks_flag(gateway_run)
+    _add_accept_hooks_flag(gateway_parser)

    # gateway start
    gateway_start = gateway_subparsers.add_parser(
@@ -6964,6 +7068,7 @@ For more help on a command:
        "run", help="Run a job on the next scheduler tick"
    )
    cron_run.add_argument("job_id", help="Job ID to trigger")
+    _add_accept_hooks_flag(cron_run)

    cron_remove = cron_subparsers.add_parser(
        "remove", aliases=["rm", "delete"], help="Remove a scheduled job"
@@ -6974,8 +7079,9 @@ For more help on a command:
    cron_subparsers.add_parser("status", help="Check if cron scheduler is running")

    # cron tick (mostly for debugging)
-    cron_subparsers.add_parser("tick", help="Run due jobs once and exit")
-
+    cron_tick = cron_subparsers.add_parser("tick", help="Run due jobs once and exit")
+    _add_accept_hooks_flag(cron_tick)
+    _add_accept_hooks_flag(cron_parser)
    cron_parser.set_defaults(func=cmd_cron)

    # =========================================================================
@@ -7042,6 +7148,67 @@ For more help on a command:

    webhook_parser.set_defaults(func=cmd_webhook)

+    # =========================================================================
+    # hooks command — shell-hook inspection and management
+    # =========================================================================
+    hooks_parser = subparsers.add_parser(
+        "hooks",
+        help="Inspect and manage shell-script hooks",
+        description=(
+            "Inspect shell-script hooks declared in ~/.hermes/config.yaml, "
+            "test them against synthetic payloads, and manage the first-use "
+            "consent allowlist at ~/.hermes/shell-hooks-allowlist.json."
+        ),
+    )
+    hooks_subparsers = hooks_parser.add_subparsers(dest="hooks_action")
+
+    hooks_subparsers.add_parser(
+        "list", aliases=["ls"],
+        help="List configured hooks with matcher, timeout, and consent status",
+    )
+
+    _hk_test = hooks_subparsers.add_parser(
+        "test",
+        help="Fire every hook matching <event> against a synthetic payload",
+    )
+    _hk_test.add_argument(
+        "event",
+        help="Hook event name (e.g. pre_tool_call, pre_llm_call, subagent_stop)",
+    )
+    _hk_test.add_argument(
+        "--for-tool", dest="for_tool", default=None,
+        help=(
+            "Only fire hooks whose matcher matches this tool name "
+            "(used for pre_tool_call / post_tool_call)"
+        ),
+    )
+    _hk_test.add_argument(
+        "--payload-file", dest="payload_file", default=None,
+        help=(
+            "Path to a JSON file whose contents are merged into the "
+            "synthetic payload before execution"
+        ),
+    )
+
+    _hk_revoke = hooks_subparsers.add_parser(
+        "revoke", aliases=["remove", "rm"],
+        help="Remove a command's allowlist entries (takes effect on next restart)",
+    )
+    _hk_revoke.add_argument(
+        "command",
+        help="The exact command string to revoke (as declared in config.yaml)",
+    )
+
+    hooks_subparsers.add_parser(
+        "doctor",
+        help=(
+            "Check each configured hook: exec bit, allowlist, mtime drift, "
+            "JSON validity, and synthetic run timing"
+        ),
+    )
+
+    hooks_parser.set_defaults(func=cmd_hooks)
+
    # =========================================================================
    # doctor command
    # =========================================================================
@@ -7449,6 +7616,17 @@ Examples:
        action="store_true",
        help="Remove existing plugin and reinstall",
    )
+    _install_enable_group = plugins_install.add_mutually_exclusive_group()
+    _install_enable_group.add_argument(
+        "--enable",
+        action="store_true",
+        help="Auto-enable the plugin after install (skip confirmation prompt)",
+    )
+    _install_enable_group.add_argument(
+        "--no-enable",
+        action="store_true",
+        help="Install disabled (skip confirmation prompt); enable later with `hermes plugins enable <name>`",
+    )

    plugins_update = plugins_subparsers.add_parser(
        "update", help="Pull latest changes for an installed plugin"
@@ -7496,9 +7674,7 @@ Examples:
            )
            cmd_info["setup_fn"](plugin_parser)
    except Exception as _exc:
-        import logging as _log
-
-        _log.getLogger(__name__).debug("Plugin CLI discovery failed: %s", _exc)
+        logging.getLogger(__name__).debug("Plugin CLI discovery failed: %s", _exc)

    # =========================================================================
    # memory command
@@ -7704,6 +7880,7 @@ Examples:
        action="store_true",
        help="Enable verbose logging on stderr",
    )
+    _add_accept_hooks_flag(mcp_serve_p)

    mcp_add_p = mcp_sub.add_parser(
        "add", help="Add an MCP server (discovery-first install)"
@@ -7742,6 +7919,8 @@ Examples:
    )
    mcp_login_p.add_argument("name", help="Server name to re-authenticate")

+    _add_accept_hooks_flag(mcp_parser)
+
    def cmd_mcp(args):
        from hermes_cli.mcp_config import mcp_command

@@ -7880,7 +8059,6 @@ Examples:
                    return
                line = _json.dumps(data, ensure_ascii=False) + "\n"
                if args.output == "-":
-                    import sys

                    sys.stdout.write(line)
                else:
@@ -7890,7 +8068,6 @@ Examples:
            else:
                sessions = db.export_all(source=args.source)
                if args.output == "-":
-                    import sys

                    for s in sessions:
                        sys.stdout.write(_json.dumps(s, ensure_ascii=False) + "\n")
@@ -7961,8 +8138,6 @@ Examples:

            # Launch hermes --resume <id> by replacing the current process
            print(f"Resuming session: {selected_id}")
-            import shutil
-
            hermes_bin = shutil.which("hermes")
            if hermes_bin:
                os.execvp(hermes_bin, ["hermes", "--resume", selected_id])
@@ -8153,6 +8328,7 @@ Examples:
        help="Run Hermes Agent as an ACP (Agent Client Protocol) server",
        description="Start Hermes Agent in ACP mode for editor integration (VS Code, Zed, JetBrains)",
    )
+    _add_accept_hooks_flag(acp_parser)

    def cmd_acp(args):
        """Launch Hermes Agent as an ACP server."""
@@ -8426,6 +8602,42 @@ Examples:
        cmd_version(args)
        return

+    # Discover Python plugins and register shell hooks once, before any
+    # command that can fire lifecycle hooks.  Both are idempotent; gated
+    # so introspection/management commands (hermes hooks list, cron
+    # list, gateway status, mcp add, ...) don't pay discovery cost or
+    # trigger consent prompts for hooks the user is still inspecting.
+    # Groups with mixed admin/CRUD vs. agent-running entries narrow via
+    # the nested subcommand (dest varies by parser).
+    _AGENT_COMMANDS = {None, "chat", "acp", "rl"}
+    _AGENT_SUBCOMMANDS = {
+        "cron":    ("cron_command",    {"run", "tick"}),
+        "gateway": ("gateway_command", {"run"}),
+        "mcp":     ("mcp_action",      {"serve"}),
+    }
+    _sub_attr, _sub_set = _AGENT_SUBCOMMANDS.get(args.command, (None, None))
+    if (
+        args.command in _AGENT_COMMANDS
+        or (_sub_attr and getattr(args, _sub_attr, None) in _sub_set)
+    ):
+        _accept_hooks = bool(getattr(args, "accept_hooks", False))
+        try:
+            from hermes_cli.plugins import discover_plugins
+            discover_plugins()
+        except Exception:
+            logger.debug(
+                "plugin discovery failed at CLI startup", exc_info=True,
+            )
+        try:
+            from hermes_cli.config import load_config
+            from agent.shell_hooks import register_from_config
+            register_from_config(load_config(), accept_hooks=_accept_hooks)
+        except Exception:
+            logger.debug(
+                "shell-hook registration failed at CLI startup",
+                exc_info=True,
+            )
+
    # Handle top-level --resume / --continue as shortcut to chat
    if (args.resume or args.continue_last) and args.command is None:
        args.command = "chat"
@@ -678,6 +678,7 @@ def switch_model(
        _da = DIRECT_ALIASES.get(resolved_alias)
        if _da is not None and _da.base_url:
            base_url = _da.base_url
+            api_mode = ""  # clear so determine_api_mode re-detects from URL
            if not api_key:
                api_key = "no-key-required"

@@ -1095,6 +1096,7 @@ def list_authenticated_providers(
                "api_url": api_url,
            })
            seen_slugs.add(ep_name.lower())
+            seen_slugs.add(custom_provider_slug(display_name).lower())
            _pair = (
                str(display_name).strip().lower(),
                str(api_url).strip().rstrip("/").lower(),
@@ -16,6 +16,12 @@ from difflib import get_close_matches
 from pathlib import Path
 from typing import Any, NamedTuple, Optional

+from hermes_cli import __version__ as _HERMES_VERSION
+
+# Identify ourselves so endpoints fronted by Cloudflare's Browser Integrity
+# Check (error 1010) don't reject the default ``Python-urllib/*`` signature.
+_HERMES_USER_AGENT = f"hermes-cli/{_HERMES_VERSION}"
+
 COPILOT_BASE_URL = "https://api.githubcopilot.com"
 COPILOT_MODELS_URL = f"{COPILOT_BASE_URL}/models"
 COPILOT_EDITOR_VERSION = "vscode/1.104.1"
@@ -26,7 +32,7 @@ COPILOT_REASONING_EFFORTS_O_SERIES = ["low", "medium", "high"]
 # Fallback OpenRouter snapshot used when the live catalog is unavailable.
 # (model_id, display description shown in menus)
 OPENROUTER_MODELS: list[tuple[str, str]] = [
-    ("moonshotai/kimi-k2.5",            "recommended"),
+    ("moonshotai/kimi-k2.6",            "recommended"),
    ("anthropic/claude-opus-4.7",       ""),
    ("anthropic/claude-opus-4.6",       ""),
    ("anthropic/claude-sonnet-4.6",     ""),
@@ -47,6 +53,7 @@ OPENROUTER_MODELS: list[tuple[str, str]] = [
    ("stepfun/step-3.5-flash",          ""),
    ("minimax/minimax-m2.7",            ""),
    ("minimax/minimax-m2.5",            ""),
+    ("minimax/minimax-m2.5:free",       "free"),
    ("z-ai/glm-5.1",                    ""),
    ("z-ai/glm-5v-turbo",               ""),
    ("z-ai/glm-5-turbo",                ""),
@@ -62,6 +69,31 @@ OPENROUTER_MODELS: list[tuple[str, str]] = [
 _openrouter_catalog_cache: list[tuple[str, str]] | None = None


+# Fallback Vercel AI Gateway snapshot used when the live catalog is unavailable.
+# OSS / open-weight models prioritized first, then closed-source by family.
+# Slugs match Vercel's actual /v1/models catalog (e.g. alibaba/ for Qwen,
+# zai/ and xai/ without hyphens).
+VERCEL_AI_GATEWAY_MODELS: list[tuple[str, str]] = [
+    ("moonshotai/kimi-k2.6",                 "recommended"),
+    ("alibaba/qwen3.6-plus",                 ""),
+    ("zai/glm-5.1",                          ""),
+    ("minimax/minimax-m2.7",                 ""),
+    ("anthropic/claude-sonnet-4.6",          ""),
+    ("anthropic/claude-opus-4.7",            ""),
+    ("anthropic/claude-opus-4.6",            ""),
+    ("anthropic/claude-haiku-4.5",           ""),
+    ("openai/gpt-5.4",                       ""),
+    ("openai/gpt-5.4-mini",                  ""),
+    ("openai/gpt-5.3-codex",                 ""),
+    ("google/gemini-3.1-pro-preview",        ""),
+    ("google/gemini-3-flash",                ""),
+    ("google/gemini-3.1-flash-lite-preview", ""),
+    ("xai/grok-4.20-reasoning",              ""),
+]
+
+_ai_gateway_catalog_cache: list[tuple[str, str]] | None = None
+
+
 def _codex_curated_models() -> list[str]:
    """Derive the openai-codex curated list from codex_models.py.

@@ -75,7 +107,7 @@ def _codex_curated_models() -> list[str]:

 _PROVIDER_MODELS: dict[str, list[str]] = {
    "nous": [
-        "moonshotai/kimi-k2.5",
+        "moonshotai/kimi-k2.6",
        "xiaomi/mimo-v2-pro",
        "anthropic/claude-opus-4.7",
        "anthropic/claude-opus-4.6",
@@ -94,17 +126,15 @@ _PROVIDER_MODELS: dict[str, list[str]] = {
        "stepfun/step-3.5-flash",
        "minimax/minimax-m2.7",
        "minimax/minimax-m2.5",
+        "minimax/minimax-m2.5:free",
        "z-ai/glm-5.1",
        "z-ai/glm-5v-turbo",
        "z-ai/glm-5-turbo",
        "x-ai/grok-4.20-beta",
        "nvidia/nemotron-3-super-120b-a12b",
-        "nvidia/nemotron-3-super-120b-a12b:free",
-        "arcee-ai/trinity-large-preview:free",
        "arcee-ai/trinity-large-thinking",
        "openai/gpt-5.4-pro",
        "openai/gpt-5.4-nano",
-        "openrouter/elephant-alpha",
    ],
    "openai-codex": _codex_curated_models(),
    "copilot-acp": [
@@ -159,12 +189,13 @@ _PROVIDER_MODELS: dict[str, list[str]] = {
        # (map to OpenRouter defaults — users get familiar picks on NIM)
        "qwen/qwen3.5-397b-a17b",
        "deepseek-ai/deepseek-v3.2",
-        "moonshotai/kimi-k2.5",
+        "moonshotai/kimi-k2.6",
        "minimaxai/minimax-m2.5",
        "z-ai/glm5",
        "openai/gpt-oss-120b",
    ],
    "kimi-coding": [
+        "kimi-k2.6",
        "kimi-k2.5",
        "kimi-for-coding",
        "kimi-k2-thinking",
@@ -173,12 +204,14 @@ _PROVIDER_MODELS: dict[str, list[str]] = {
        "kimi-k2-0905-preview",
    ],
    "kimi-coding-cn": [
+        "kimi-k2.6",
        "kimi-k2.5",
        "kimi-k2-thinking",
        "kimi-k2-turbo-preview",
        "kimi-k2-0905-preview",
    ],
    "moonshot": [
+        "kimi-k2.6",
        "kimi-k2.5",
        "kimi-k2-thinking",
        "kimi-k2-turbo-preview",
@@ -225,7 +258,6 @@ _PROVIDER_MODELS: dict[str, list[str]] = {
        "gpt-5.4-pro",
        "gpt-5.4",
        "gpt-5.3-codex",
-        "gpt-5.3-codex-spark",
        "gpt-5.2",
        "gpt-5.2-codex",
        "gpt-5.1",
@@ -259,6 +291,7 @@ _PROVIDER_MODELS: dict[str, list[str]] = {
        "big-pickle",
    ],
    "opencode-go": [
+        "kimi-k2.6",
        "kimi-k2.5",
        "glm-5.1",
        "glm-5",
@@ -266,20 +299,8 @@ _PROVIDER_MODELS: dict[str, list[str]] = {
        "mimo-v2-omni",
        "minimax-m2.7",
        "minimax-m2.5",
-    ],
-    "ai-gateway": [
-        "anthropic/claude-opus-4.6",
-        "anthropic/claude-sonnet-4.6",
-        "anthropic/claude-sonnet-4.5",
-        "anthropic/claude-haiku-4.5",
-        "openai/gpt-5",
-        "openai/gpt-4.1",
-        "openai/gpt-4.1-mini",
-        "google/gemini-3-pro-preview",
-        "google/gemini-3-flash",
-        "google/gemini-2.5-pro",
-        "google/gemini-2.5-flash",
-        "deepseek/deepseek-v3.2",
+        "qwen3.6-plus",
+        "qwen3.5-plus",
    ],
    "kilocode": [
        "anthropic/claude-opus-4.6",
@@ -313,6 +334,7 @@ _PROVIDER_MODELS: dict[str, list[str]] = {
        "zai-org/GLM-5",
        "XiaomiMiMo/MiMo-V2-Flash",
        "moonshotai/Kimi-K2-Thinking",
+        "moonshotai/Kimi-K2.6",
    ],
    # AWS Bedrock — static fallback list used when dynamic discovery is
    # unavailable (no boto3, no credentials, or API error).  The agent
@@ -332,18 +354,18 @@ _PROVIDER_MODELS: dict[str, list[str]] = {
    ],
 }

+# Vercel AI Gateway: derive the bare-model-id catalog from the curated
+# ``VERCEL_AI_GATEWAY_MODELS`` snapshot so both the picker (tuples with descriptions)
+# and the static fallback catalog (bare ids) stay in sync from a single
+# source of truth.
+_PROVIDER_MODELS["ai-gateway"] = [mid for mid, _ in VERCEL_AI_GATEWAY_MODELS]
+
 # ---------------------------------------------------------------------------
-# Nous Portal free-model filtering
+# Nous Portal free-model helper
 # ---------------------------------------------------------------------------
-# Models that are ALLOWED to appear when priced as free on Nous Portal.
-# Any other free model is hidden — prevents promotional/temporary free models
-# from cluttering the selection when users are paying subscribers.
-# Models in this list are ALSO filtered out if they are NOT free (i.e. they
-# should only appear in the menu when they are genuinely free).
-_NOUS_ALLOWED_FREE_MODELS: frozenset[str] = frozenset({
-    "xiaomi/mimo-v2-pro",
-    "xiaomi/mimo-v2-omni",
-})
+# The Nous Portal models endpoint is the source of truth for which models
+# are currently offered (free or paid). We trust whatever it returns and
+# surface it to users as-is — no local allowlist filtering.


 def _is_model_free(model_id: str, pricing: dict[str, dict[str, str]]) -> bool:
@@ -357,35 +379,6 @@ def _is_model_free(model_id: str, pricing: dict[str, dict[str, str]]) -> bool:
        return False


-def filter_nous_free_models(
-    model_ids: list[str],
-    pricing: dict[str, dict[str, str]],
-) -> list[str]:
-    """Filter the Nous Portal model list according to free-model policy.
-
-    Rules:
-      • Paid models that are NOT in the allowlist → keep (normal case).
-      • Free models that are NOT in the allowlist → drop.
-      • Allowlist models that ARE free → keep.
-      • Allowlist models that are NOT free → drop.
-    """
-    if not pricing:
-        return model_ids  # no pricing data — can't filter, show everything
-
-    result: list[str] = []
-    for mid in model_ids:
-        free = _is_model_free(mid, pricing)
-        if mid in _NOUS_ALLOWED_FREE_MODELS:
-            # Allowlist model: only show when it's actually free
-            if free:
-                result.append(mid)
-        else:
-            # Regular model: keep only when it's NOT free
-            if not free:
-                result.append(mid)
-    return result
-
-
 # ---------------------------------------------------------------------------
 # Nous Portal account tier detection
 # ---------------------------------------------------------------------------
@@ -449,8 +442,7 @@ def partition_nous_models_by_tier(
 ) -> tuple[list[str], list[str]]:
    """Split Nous models into (selectable, unavailable) based on user tier.

-    For paid-tier users: all models are selectable, none unavailable
-    (free-model filtering is handled separately by ``filter_nous_free_models``).
+    For paid-tier users: all models are selectable, none unavailable.

    For free-tier users: only free models are selectable; paid models
    are returned as unavailable (shown grayed out in the menu).
@@ -489,8 +481,6 @@ def check_nous_free_tier() -> bool:
    Returns False (assume paid) on any error — never blocks paying users.
    """
    global _free_tier_cache
-    import time
-
    now = time.monotonic()
    if _free_tier_cache is not None:
        cached_result, cached_at = _free_tier_cache
@@ -522,6 +512,157 @@ def check_nous_free_tier() -> bool:
        return False  # default to paid on error — don't block users


+# ---------------------------------------------------------------------------
+# Nous Portal recommended models
+#
+# The Portal publishes a curated list of suggested models (separated into
+# paid and free tiers) plus dedicated recommendations for compaction (text
+# summarisation / auxiliary) and vision tasks. We fetch it once per process
+# with a TTL cache so callers can ask "what's the best aux model right now?"
+# without hitting the network on every lookup.
+#
+# Shape of the response (fields we care about):
+#   {
+#     "paidRecommendedModels":     [ {modelName, ...}, ... ],
+#     "freeRecommendedModels":     [ {modelName, ...}, ... ],
+#     "paidRecommendedCompactionModel":  {modelName, ...} | null,
+#     "paidRecommendedVisionModel":      {modelName, ...} | null,
+#     "freeRecommendedCompactionModel":  {modelName, ...} | null,
+#     "freeRecommendedVisionModel":      {modelName, ...} | null,
+#   }
+# ---------------------------------------------------------------------------
+
+NOUS_RECOMMENDED_MODELS_PATH = "/api/nous/recommended-models"
+_NOUS_RECOMMENDED_CACHE_TTL: int = 600  # seconds (10 minutes)
+# (result_dict, timestamp) keyed by portal_base_url so staging vs prod don't collide.
+_nous_recommended_cache: dict[str, tuple[dict[str, Any], float]] = {}
+
+
+def fetch_nous_recommended_models(
+    portal_base_url: str = "",
+    timeout: float = 5.0,
+    *,
+    force_refresh: bool = False,
+) -> dict[str, Any]:
+    """Fetch the Nous Portal's curated recommended-models payload.
+
+    Hits ``<portal>/api/nous/recommended-models``. The endpoint is public —
+    no auth is required. Results are cached per portal URL for
+    ``_NOUS_RECOMMENDED_CACHE_TTL`` seconds; pass ``force_refresh=True`` to
+    bypass the cache.
+
+    Returns the parsed JSON dict on success, or ``{}`` on any failure
+    (network, parse, non-2xx). Callers must treat missing/null fields as
+    "no recommendation" and fall back to their own default.
+    """
+    base = (portal_base_url or "https://portal.nousresearch.com").rstrip("/")
+    now = time.monotonic()
+    cached = _nous_recommended_cache.get(base)
+    if not force_refresh and cached is not None:
+        payload, cached_at = cached
+        if now - cached_at < _NOUS_RECOMMENDED_CACHE_TTL:
+            return payload
+
+    url = f"{base}{NOUS_RECOMMENDED_MODELS_PATH}"
+    try:
+        req = urllib.request.Request(
+            url,
+            headers={"Accept": "application/json"},
+        )
+        with urllib.request.urlopen(req, timeout=timeout) as resp:
+            data = json.loads(resp.read().decode())
+        if not isinstance(data, dict):
+            data = {}
+    except Exception:
+        data = {}
+
+    _nous_recommended_cache[base] = (data, now)
+    return data
+
+
+def _resolve_nous_portal_url() -> str:
+    """Best-effort lookup of the Portal base URL the user is authed against."""
+    try:
+        from hermes_cli.auth import (
+            DEFAULT_NOUS_PORTAL_URL,
+            get_provider_auth_state,
+        )
+        state = get_provider_auth_state("nous") or {}
+        portal = str(state.get("portal_base_url") or "").strip()
+        if portal:
+            return portal.rstrip("/")
+        return str(DEFAULT_NOUS_PORTAL_URL).rstrip("/")
+    except Exception:
+        return "https://portal.nousresearch.com"
+
+
+def _extract_model_name(entry: Any) -> Optional[str]:
+    """Pull the ``modelName`` field from a recommended-model entry, else None."""
+    if not isinstance(entry, dict):
+        return None
+    model_name = entry.get("modelName")
+    if isinstance(model_name, str) and model_name.strip():
+        return model_name.strip()
+    return None
+
+
+def get_nous_recommended_aux_model(
+    *,
+    vision: bool = False,
+    free_tier: Optional[bool] = None,
+    portal_base_url: str = "",
+    force_refresh: bool = False,
+) -> Optional[str]:
+    """Return the Portal's recommended model name for an auxiliary task.
+
+    Picks the best field from the Portal's recommended-models payload:
+
+    * ``vision=True``  → ``paidRecommendedVisionModel``  (paid tier) or
+                         ``freeRecommendedVisionModel``  (free tier)
+    * ``vision=False`` → ``paidRecommendedCompactionModel`` or
+                         ``freeRecommendedCompactionModel``
+
+    When ``free_tier`` is ``None`` (default) the user's tier is auto-detected
+    via :func:`check_nous_free_tier`. Pass an explicit bool to bypass the
+    detection — useful for tests or when the caller already knows the tier.
+
+    For paid-tier users we prefer the paid recommendation but gracefully fall
+    back to the free recommendation if the Portal returned ``null`` for the
+    paid field (common during the staged rollout of new paid models).
+
+    Returns ``None`` when every candidate is missing, null, or the fetch
+    fails — callers should fall back to their own default (currently
+    ``google/gemini-3-flash-preview``).
+    """
+    base = portal_base_url or _resolve_nous_portal_url()
+    payload = fetch_nous_recommended_models(base, force_refresh=force_refresh)
+    if not payload:
+        return None
+
+    if free_tier is None:
+        try:
+            free_tier = check_nous_free_tier()
+        except Exception:
+            # On any detection error, assume paid — paid users see both fields
+            # anyway so this is a safe default that maximises model quality.
+            free_tier = False
+
+    if vision:
+        paid_key, free_key = "paidRecommendedVisionModel", "freeRecommendedVisionModel"
+    else:
+        paid_key, free_key = "paidRecommendedCompactionModel", "freeRecommendedCompactionModel"
+
+    # Preference order:
+    #   free tier  → free only
+    #   paid tier  → paid, then free (if paid field is null)
+    candidates = [free_key] if free_tier else [paid_key, free_key]
+    for key in candidates:
+        name = _extract_model_name(payload.get(key))
+        if name:
+            return name
+    return None
+
+
 # ---------------------------------------------------------------------------
 # Canonical provider list — single source of truth for provider identity.
 # Every code path that lists, displays, or iterates providers derives from
@@ -542,6 +683,7 @@ class ProviderEntry(NamedTuple):
 CANONICAL_PROVIDERS: list[ProviderEntry] = [
    ProviderEntry("nous",           "Nous Portal",              "Nous Portal (Nous Research subscription)"),
    ProviderEntry("openrouter",     "OpenRouter",               "OpenRouter (100+ models, pay-per-use)"),
+    ProviderEntry("ai-gateway",     "Vercel AI Gateway",        "Vercel AI Gateway (200+ models, $5 free credit, no markup)"),
    ProviderEntry("anthropic",      "Anthropic",                "Anthropic (Claude models — API key or Claude Code)"),
    ProviderEntry("openai-codex",   "OpenAI Codex",             "OpenAI Codex"),
    ProviderEntry("xiaomi",         "Xiaomi MiMo",              "Xiaomi MiMo (MiMo-V2 models — pro, omni, flash)"),
@@ -565,7 +707,6 @@ CANONICAL_PROVIDERS: list[ProviderEntry] = [
    ProviderEntry("kilocode",       "Kilo Code",                "Kilo Code (Kilo Gateway API)"),
    ProviderEntry("opencode-zen",   "OpenCode Zen",             "OpenCode Zen (35+ curated models, pay-as-you-go)"),
    ProviderEntry("opencode-go",    "OpenCode Go",              "OpenCode Go (open models, $10/month subscription)"),
-    ProviderEntry("ai-gateway",     "Vercel AI Gateway",        "Vercel AI Gateway (200+ models, pay-per-use)"),
    ProviderEntry("bedrock",        "AWS Bedrock",              "AWS Bedrock (Claude, Nova, Llama, DeepSeek — IAM or API key)"),
 ]

@@ -661,6 +802,31 @@ def _openrouter_model_is_free(pricing: Any) -> bool:
        return False


+def _openrouter_model_supports_tools(item: Any) -> bool:
+    """Return True when the model's ``supported_parameters`` advertise tool calling.
+
+    hermes-agent is tool-calling-first — every provider path assumes the model
+    can invoke tools. Models that don't advertise ``tools`` in their
+    ``supported_parameters`` (e.g. image-only or completion-only models) cannot
+    be driven by the agent loop and would fail at the first tool call.
+
+    **Permissive when the field is missing.** Some OpenRouter-compatible gateways
+    (Nous Portal, private mirrors, older catalog snapshots) don't populate
+    ``supported_parameters`` at all. Treat that as "unknown capability → allow"
+    so the picker doesn't silently empty for those users. Only hide models
+    whose ``supported_parameters`` is an explicit list that omits ``tools``.
+
+    Ported from Kilo-Org/kilocode#9068.
+    """
+    if not isinstance(item, dict):
+        return True
+    params = item.get("supported_parameters")
+    if not isinstance(params, list):
+        # Field absent / malformed / None — be permissive.
+        return True
+    return "tools" in params
+
+
 def fetch_openrouter_models(
    timeout: float = 8.0,
    *,
@@ -703,6 +869,11 @@ def fetch_openrouter_models(
        live_item = live_by_id.get(preferred_id)
        if live_item is None:
            continue
+        # Hide models that don't advertise tool-calling support — hermes-agent
+        # requires it and surfacing them leads to immediate runtime failures
+        # when the user selects them. Ported from Kilo-Org/kilocode#9068.
+        if not _openrouter_model_supports_tools(live_item):
+            continue
        desc = "free" if _openrouter_model_is_free(live_item.get("pricing")) else ""
        curated.append((preferred_id, desc))

@@ -720,6 +891,93 @@ def model_ids(*, force_refresh: bool = False) -> list[str]:
    return [mid for mid, _ in fetch_openrouter_models(force_refresh=force_refresh)]


+def _ai_gateway_model_is_free(pricing: Any) -> bool:
+    """Return True if an AI Gateway model has $0 input AND output pricing."""
+    if not isinstance(pricing, dict):
+        return False
+    try:
+        return float(pricing.get("input", "0")) == 0 and float(pricing.get("output", "0")) == 0
+    except (TypeError, ValueError):
+        return False
+
+
+def fetch_ai_gateway_models(
+    timeout: float = 8.0,
+    *,
+    force_refresh: bool = False,
+) -> list[tuple[str, str]]:
+    """Return the curated AI Gateway picker list, refreshed from the live catalog when possible."""
+    global _ai_gateway_catalog_cache
+
+    if _ai_gateway_catalog_cache is not None and not force_refresh:
+        return list(_ai_gateway_catalog_cache)
+
+    from hermes_constants import AI_GATEWAY_BASE_URL
+
+    fallback = list(VERCEL_AI_GATEWAY_MODELS)
+    preferred_ids = [mid for mid, _ in fallback]
+
+    try:
+        req = urllib.request.Request(
+            f"{AI_GATEWAY_BASE_URL.rstrip('/')}/models",
+            headers={"Accept": "application/json"},
+        )
+        with urllib.request.urlopen(req, timeout=timeout) as resp:
+            payload = json.loads(resp.read().decode())
+    except Exception:
+        return list(_ai_gateway_catalog_cache or fallback)
+
+    live_items = payload.get("data", [])
+    if not isinstance(live_items, list):
+        return list(_ai_gateway_catalog_cache or fallback)
+
+    live_by_id: dict[str, dict[str, Any]] = {}
+    for item in live_items:
+        if not isinstance(item, dict):
+            continue
+        mid = str(item.get("id") or "").strip()
+        if not mid:
+            continue
+        live_by_id[mid] = item
+
+    curated: list[tuple[str, str]] = []
+    for preferred_id in preferred_ids:
+        live_item = live_by_id.get(preferred_id)
+        if live_item is None:
+            continue
+        desc = "free" if _ai_gateway_model_is_free(live_item.get("pricing")) else ""
+        curated.append((preferred_id, desc))
+
+    if not curated:
+        return list(_ai_gateway_catalog_cache or fallback)
+
+    # If the live catalog offers a free Moonshot model, auto-promote it to
+    # position #1 as "recommended" — dynamic discovery without a PR.
+    free_moonshot = next(
+        (
+            mid
+            for mid, item in live_by_id.items()
+            if mid.startswith("moonshotai/")
+            and _ai_gateway_model_is_free(item.get("pricing"))
+        ),
+        None,
+    )
+    if free_moonshot:
+        curated = [(mid, desc) for mid, desc in curated if mid != free_moonshot]
+        curated.insert(0, (free_moonshot, "recommended"))
+    else:
+        first_id, _ = curated[0]
+        curated[0] = (first_id, "recommended")
+
+    _ai_gateway_catalog_cache = curated
+    return list(curated)
+
+
+def ai_gateway_model_ids(*, force_refresh: bool = False) -> list[str]:
+    """Return just the AI Gateway model-id strings."""
+    return [mid for mid, _ in fetch_ai_gateway_models(force_refresh=force_refresh)]
+
+


 # ---------------------------------------------------------------------------
@@ -864,6 +1122,56 @@ def fetch_models_with_pricing(
    return result


+def fetch_ai_gateway_pricing(
+    timeout: float = 8.0,
+    *,
+    force_refresh: bool = False,
+) -> dict[str, dict[str, str]]:
+    """Fetch Vercel AI Gateway /v1/models and return hermes-shaped pricing.
+
+    Vercel uses ``input`` / ``output`` field names; hermes's picker expects
+    ``prompt`` / ``completion``. This translates. Cache read/write field names
+    already match.
+    """
+    from hermes_constants import AI_GATEWAY_BASE_URL
+
+    cache_key = AI_GATEWAY_BASE_URL.rstrip("/")
+    if not force_refresh and cache_key in _pricing_cache:
+        return _pricing_cache[cache_key]
+
+    try:
+        req = urllib.request.Request(
+            f"{cache_key}/models",
+            headers={"Accept": "application/json"},
+        )
+        with urllib.request.urlopen(req, timeout=timeout) as resp:
+            payload = json.loads(resp.read().decode())
+    except Exception:
+        _pricing_cache[cache_key] = {}
+        return {}
+
+    result: dict[str, dict[str, str]] = {}
+    for item in payload.get("data", []):
+        if not isinstance(item, dict):
+            continue
+        mid = item.get("id")
+        pricing = item.get("pricing")
+        if not (mid and isinstance(pricing, dict)):
+            continue
+        entry: dict[str, str] = {
+            "prompt": str(pricing.get("input", "")),
+            "completion": str(pricing.get("output", "")),
+        }
+        if pricing.get("input_cache_read"):
+            entry["input_cache_read"] = str(pricing["input_cache_read"])
+        if pricing.get("input_cache_write"):
+            entry["input_cache_write"] = str(pricing["input_cache_write"])
+        result[mid] = entry
+
+    _pricing_cache[cache_key] = result
+    return result
+
+
 def _resolve_openrouter_api_key() -> str:
    """Best-effort OpenRouter API key for pricing fetch."""
    return os.getenv("OPENROUTER_API_KEY", "").strip()
@@ -882,7 +1190,7 @@ def _resolve_nous_pricing_credentials() -> tuple[str, str]:


 def get_pricing_for_provider(provider: str, *, force_refresh: bool = False) -> dict[str, dict[str, str]]:
-    """Return live pricing for providers that support it (openrouter, nous)."""
+    """Return live pricing for providers that support it (openrouter, nous, ai-gateway)."""
    normalized = normalize_provider(provider)
    if normalized == "openrouter":
        return fetch_models_with_pricing(
@@ -890,6 +1198,8 @@ def get_pricing_for_provider(provider: str, *, force_refresh: bool = False) -> d
            base_url="https://openrouter.ai/api",
            force_refresh=force_refresh,
        )
+    if normalized == "ai-gateway":
+        return fetch_ai_gateway_pricing(force_refresh=force_refresh)
    if normalized == "nous":
        api_key, base_url = _resolve_nous_pricing_credentials()
        if base_url:
@@ -1094,7 +1404,6 @@ def detect_provider_for_model(
            from hermes_cli.auth import PROVIDER_REGISTRY
            pconfig = PROVIDER_REGISTRY.get(direct_match)
            if pconfig:
-                import os
                for env_var in pconfig.api_key_env_vars:
                    if os.getenv(env_var, "").strip():
                        has_creds = True
@@ -1769,7 +2078,7 @@ def probe_api_models(
        candidates.append((alternate_base, True))

    tried: list[str] = []
-    headers: dict[str, str] = {}
+    headers: dict[str, str] = {"User-Agent": _HERMES_USER_AGENT}
    if api_key:
        headers["Authorization"] = f"Bearer {api_key}"
    if normalized.startswith(COPILOT_BASE_URL):
@@ -2231,13 +2540,70 @@ def validate_requested_model(
        except Exception:
            pass  # Fall through to generic warning

+    # Static-catalog fallback: when the /models probe was unreachable,
+    # validate against the curated list from provider_model_ids() — same
+    # pattern as the openai-codex and minimax branches above.  This fixes
+    # /model switches in the gateway for providers like opencode-go and
+    # opencode-zen whose /models endpoint returns 404 against the HTML
+    # marketing site.  Without this block, validate_requested_model would
+    # reject every model on such providers, switch_model() would return
+    # success=False, and the gateway would never write to
+    # _session_model_overrides.
    provider_label = _PROVIDER_LABELS.get(normalized, normalized)
+    try:
+        catalog_models = provider_model_ids(normalized)
+    except Exception:
+        catalog_models = []
+
+    if catalog_models:
+        catalog_lower = {m.lower(): m for m in catalog_models}
+        if requested_for_lookup.lower() in catalog_lower:
+            return {
+                "accepted": True,
+                "persist": True,
+                "recognized": True,
+                "message": None,
+            }
+        catalog_lower_list = list(catalog_lower.keys())
+        auto = get_close_matches(
+            requested_for_lookup.lower(), catalog_lower_list, n=1, cutoff=0.9
+        )
+        if auto:
+            corrected = catalog_lower[auto[0]]
+            return {
+                "accepted": True,
+                "persist": True,
+                "recognized": True,
+                "corrected_model": corrected,
+                "message": f"Auto-corrected `{requested}` → `{corrected}`",
+            }
+        suggestions = get_close_matches(
+            requested_for_lookup.lower(), catalog_lower_list, n=3, cutoff=0.5
+        )
+        suggestion_text = ""
+        if suggestions:
+            suggestion_text = "\n  Similar models: " + ", ".join(
+                f"`{catalog_lower[s]}`" for s in suggestions
+            )
+        return {
+            "accepted": True,
+            "persist": True,
+            "recognized": False,
+            "message": (
+                f"Note: `{requested}` was not found in the {provider_label} curated catalog "
+                f"and the /models endpoint was unreachable.{suggestion_text}"
+                f"\n  The model may still work if it exists on the provider."
+            ),
+        }
+
+    # No catalog available — accept with a warning, matching the comment's
+    # stated intent ("Accept and persist, but warn").
    return {
-        "accepted": False,
-        "persist": False,
+        "accepted": True,
+        "persist": True,
        "recognized": False,
        "message": (
-            f"Could not reach the {provider_label} API to validate `{requested}`. "
+            f"Note: could not reach the {provider_label} API to validate `{requested}`. "
            f"If the service isn't down, this model may not be valid."
        ),
    }
@@ -10,6 +10,7 @@ from hermes_cli.auth import get_nous_auth_status
 from hermes_cli.config import get_env_value, load_config
 from tools.managed_tool_gateway import is_managed_tool_gateway_ready
 from tools.tool_backend_helpers import (
+    fal_key_is_configured,
    has_direct_modal_credentials,
    managed_nous_tools_enabled,
    normalize_browser_cloud_provider,
@@ -271,7 +272,7 @@ def get_nous_subscription_features(
    direct_firecrawl = bool(get_env_value("FIRECRAWL_API_KEY") or get_env_value("FIRECRAWL_API_URL"))
    direct_parallel = bool(get_env_value("PARALLEL_API_KEY"))
    direct_tavily = bool(get_env_value("TAVILY_API_KEY"))
-    direct_fal = bool(get_env_value("FAL_KEY"))
+    direct_fal = fal_key_is_configured()
    direct_openai_tts = bool(resolve_openai_audio_api_key())
    direct_elevenlabs = bool(get_env_value("ELEVENLABS_API_KEY"))
    direct_camofox = bool(get_env_value("CAMOFOX_URL"))
@@ -520,7 +521,7 @@ def apply_nous_managed_defaults(
        browser_cfg["cloud_provider"] = "browser-use"
        changed.add("browser")

-    if "image_gen" in selected_toolsets and not get_env_value("FAL_KEY"):
+    if "image_gen" in selected_toolsets and not fal_key_is_configured():
        changed.add("image_gen")

    return changed
@@ -548,7 +549,7 @@ def _get_gateway_direct_credentials() -> Dict[str, bool]:
            or get_env_value("TAVILY_API_KEY")
            or get_env_value("EXA_API_KEY")
        ),
-        "image_gen": bool(get_env_value("FAL_KEY")),
+        "image_gen": fal_key_is_configured(),
        "tts": bool(
            resolve_openai_audio_api_key()
            or get_env_value("ELEVENLABS_API_KEY")
@@ -586,7 +587,6 @@ def get_gateway_eligible_tools(
        return [], [], []

    if config is None:
-        from hermes_cli.config import load_config
        config = load_config() or {}

    # Quick provider check without the heavy get_nous_subscription_features call
@@ -2,14 +2,20 @@
 Hermes Plugin System
 ====================

-Discovers, loads, and manages plugins from three sources:
+Discovers, loads, and manages plugins from four sources:

-1. **User plugins**   – ``~/.hermes/plugins/<name>/``
-2. **Project plugins** – ``./.hermes/plugins/<name>/`` (opt-in via
+1. **Bundled plugins** – ``<repo>/plugins/<name>/`` (shipped with hermes-agent;
+   ``memory/`` and ``context_engine/`` subdirs are excluded — they have their
+   own discovery paths)
+2. **User plugins**   – ``~/.hermes/plugins/<name>/``
+3. **Project plugins** – ``./.hermes/plugins/<name>/`` (opt-in via
   ``HERMES_ENABLE_PROJECT_PLUGINS``)
-3. **Pip plugins**     – packages that expose the ``hermes_agent.plugins``
+4. **Pip plugins**     – packages that expose the ``hermes_agent.plugins``
   entry-point group.

+Later sources override earlier ones on name collision, so a user or project
+plugin with the same name as a bundled plugin replaces it.
+
 Each directory plugin must contain a ``plugin.yaml`` manifest **and** an
 ``__init__.py`` with a ``register(ctx)`` function.

@@ -64,6 +70,7 @@ VALID_HOOKS: Set[str] = {
    "on_session_end",
    "on_session_finalize",
    "on_session_reset",
+    "subagent_stop",
 }

 ENTRY_POINTS_GROUP = "hermes_agent.plugins"
@@ -77,7 +84,12 @@ def _env_enabled(name: str) -> bool:


 def _get_disabled_plugins() -> set:
-    """Read the disabled plugins list from config.yaml."""
+    """Read the disabled plugins list from config.yaml.
+
+    Kept for backward compat and explicit deny-list semantics. A plugin
+    name in this set will never load, even if it appears in
+    ``plugins.enabled``.
+    """
    try:
        from hermes_cli.config import load_config
        config = load_config()
@@ -87,10 +99,43 @@ def _get_disabled_plugins() -> set:
        return set()


+def _get_enabled_plugins() -> Optional[set]:
+    """Read the enabled-plugins allow-list from config.yaml.
+
+    Plugins are opt-in by default — only plugins whose name appears in
+    this set are loaded. Returns:
+
+    * ``None`` — the key is missing or malformed. Callers should treat
+      this as "nothing enabled yet" (the opt-in default); the first
+      ``migrate_config`` run populates the key with a grandfathered set
+      of currently-installed user plugins so existing setups don't
+      break on upgrade.
+    * ``set()`` — an empty list was explicitly set; nothing loads.
+    * ``set(...)`` — the concrete allow-list.
+    """
+    try:
+        from hermes_cli.config import load_config
+        config = load_config()
+        plugins_cfg = config.get("plugins")
+        if not isinstance(plugins_cfg, dict):
+            return None
+        if "enabled" not in plugins_cfg:
+            return None
+        enabled = plugins_cfg.get("enabled")
+        if not isinstance(enabled, list):
+            return None
+        return set(enabled)
+    except Exception:
+        return None
+
+
 # ---------------------------------------------------------------------------
 # Data classes
 # ---------------------------------------------------------------------------

+_VALID_PLUGIN_KINDS: Set[str] = {"standalone", "backend", "exclusive"}
+
+
@dataclass
 class PluginManifest:
    """Parsed representation of a plugin.yaml manifest."""
@@ -104,6 +149,23 @@ class PluginManifest:
    provides_hooks: List[str] = field(default_factory=list)
    source: str = ""        # "user", "project", or "entrypoint"
    path: Optional[str] = None
+    # Plugin kind — see plugins.py module docstring for semantics.
+    # ``standalone`` (default): hooks/tools of its own; opt-in via
+    #                           ``plugins.enabled``.
+    # ``backend``: pluggable backend for an existing core tool (e.g.
+    #              image_gen). Built-in (bundled) backends auto-load;
+    #              user-installed still gated by ``plugins.enabled``.
+    # ``exclusive``: category with exactly one active provider (memory).
+    #              Selection via ``<category>.provider`` config key; the
+    #              category's own discovery system handles loading and the
+    #              general scanner skips these.
+    kind: str = "standalone"
+    # Registry key — path-derived, used by ``plugins.enabled``/``disabled``
+    # lookups and by ``hermes plugins list``. For a flat plugin at
+    # ``plugins/disk-cleanup/`` the key is ``disk-cleanup``; for a nested
+    # category plugin at ``plugins/image_gen/openai/`` the key is
+    # ``image_gen/openai``. When empty, falls back to ``name``.
+    key: str = ""


@dataclass
@@ -324,6 +386,33 @@ class PluginContext:
            self.manifest.name, engine.name,
        )

+    # -- image gen provider registration ------------------------------------
+
+    def register_image_gen_provider(self, provider) -> None:
+        """Register an image generation backend.
+
+        ``provider`` must be an instance of
+        :class:`agent.image_gen_provider.ImageGenProvider`. The
+        ``provider.name`` attribute is what ``image_gen.provider`` in
+        ``config.yaml`` matches against when routing ``image_generate``
+        tool calls.
+        """
+        from agent.image_gen_provider import ImageGenProvider
+        from agent.image_gen_registry import register_provider
+
+        if not isinstance(provider, ImageGenProvider):
+            logger.warning(
+                "Plugin '%s' tried to register an image_gen provider that does "
+                "not inherit from ImageGenProvider. Ignoring.",
+                self.manifest.name,
+            )
+            return
+        register_provider(provider)
+        logger.info(
+            "Plugin '%s' registered image_gen provider: %s",
+            self.manifest.name, provider.name,
+        )
+
    # -- hook registration --------------------------------------------------

    def register_hook(self, hook_name: str, callback: Callable) -> None:
@@ -422,26 +511,103 @@ class PluginManager:

        manifests: List[PluginManifest] = []

-        # 1. User plugins (~/.hermes/plugins/)
+        # 1. Bundled plugins (<repo>/plugins/<name>/)
+        #
+        # Repo-shipped plugins live next to hermes_cli/. Two layouts are
+        # supported (see ``_scan_directory`` for details):
+        #
+        #   - flat: ``plugins/disk-cleanup/plugin.yaml`` (standalone)
+        #   - category: ``plugins/image_gen/openai/plugin.yaml`` (backend)
+        #
+        # ``memory/`` and ``context_engine/`` are skipped at the top level —
+        # they have their own discovery systems. Porting those to the
+        # category-namespace ``kind: exclusive`` model is a future PR.
+        repo_plugins = Path(__file__).resolve().parent.parent / "plugins"
+        manifests.extend(
+            self._scan_directory(
+                repo_plugins,
+                source="bundled",
+                skip_names={"memory", "context_engine"},
+            )
+        )
+
+        # 2. User plugins (~/.hermes/plugins/)
        user_dir = get_hermes_home() / "plugins"
        manifests.extend(self._scan_directory(user_dir, source="user"))

-        # 2. Project plugins (./.hermes/plugins/)
+        # 3. Project plugins (./.hermes/plugins/)
        if _env_enabled("HERMES_ENABLE_PROJECT_PLUGINS"):
            project_dir = Path.cwd() / ".hermes" / "plugins"
            manifests.extend(self._scan_directory(project_dir, source="project"))

-        # 3. Pip / entry-point plugins
+        # 4. Pip / entry-point plugins
        manifests.extend(self._scan_entry_points())

-        # Load each manifest (skip user-disabled plugins)
+        # Load each manifest (skip user-disabled plugins).
+        # Later sources override earlier ones on key collision — user
+        # plugins take precedence over bundled, project plugins take
+        # precedence over user. Dedup here so we only load the final
+        # winner. Keys are path-derived (``image_gen/openai``,
+        # ``disk-cleanup``) so ``tts/openai`` and ``image_gen/openai``
+        # don't collide even when both manifests say ``name: openai``.
        disabled = _get_disabled_plugins()
+        enabled = _get_enabled_plugins()  # None = opt-in default (nothing enabled)
+        winners: Dict[str, PluginManifest] = {}
        for manifest in manifests:
-            if manifest.name in disabled:
+            winners[manifest.key or manifest.name] = manifest
+        for manifest in winners.values():
+            lookup_key = manifest.key or manifest.name
+
+            # Explicit disable always wins (matches on key or on legacy
+            # bare name for back-compat with existing user configs).
+            if lookup_key in disabled or manifest.name in disabled:
                loaded = LoadedPlugin(manifest=manifest, enabled=False)
                loaded.error = "disabled via config"
-                self._plugins[manifest.name] = loaded
-                logger.debug("Skipping disabled plugin '%s'", manifest.name)
+                self._plugins[lookup_key] = loaded
+                logger.debug("Skipping disabled plugin '%s'", lookup_key)
+                continue
+
+            # Exclusive plugins (memory providers) have their own
+            # discovery/activation path. The general loader records the
+            # manifest for introspection but does not load the module.
+            if manifest.kind == "exclusive":
+                loaded = LoadedPlugin(manifest=manifest, enabled=False)
+                loaded.error = (
+                    "exclusive plugin — activate via <category>.provider config"
+                )
+                self._plugins[lookup_key] = loaded
+                logger.debug(
+                    "Skipping '%s' (exclusive, handled by category discovery)",
+                    lookup_key,
+                )
+                continue
+
+            # Built-in backends auto-load — they ship with hermes and must
+            # just work. Selection among them (e.g. which image_gen backend
+            # services calls) is driven by ``<category>.provider`` config,
+            # enforced by the tool wrapper.
+            if manifest.kind == "backend" and manifest.source == "bundled":
+                self._load_plugin(manifest)
+                continue
+
+            # Everything else (standalone, user-installed backends,
+            # entry-point plugins) is opt-in via plugins.enabled.
+            # Accept both the path-derived key and the legacy bare name
+            # so existing configs keep working.
+            is_enabled = (
+                enabled is not None
+                and (lookup_key in enabled or manifest.name in enabled)
+            )
+            if not is_enabled:
+                loaded = LoadedPlugin(manifest=manifest, enabled=False)
+                loaded.error = (
+                    "not enabled in config (run `hermes plugins enable {}` to activate)"
+                    .format(lookup_key)
+                )
+                self._plugins[lookup_key] = loaded
+                logger.debug(
+                    "Skipping '%s' (not in plugins.enabled)", lookup_key
+                )
                continue
            self._load_plugin(manifest)

@@ -456,8 +622,46 @@ class PluginManager:
    # Directory scanning
    # -----------------------------------------------------------------------

-    def _scan_directory(self, path: Path, source: str) -> List[PluginManifest]:
-        """Read ``plugin.yaml`` manifests from subdirectories of *path*."""
+    def _scan_directory(
+        self,
+        path: Path,
+        source: str,
+        skip_names: Optional[Set[str]] = None,
+    ) -> List[PluginManifest]:
+        """Read ``plugin.yaml`` manifests from subdirectories of *path*.
+
+        Supports two layouts, mixed freely:
+
+        * **Flat** — ``<root>/<plugin-name>/plugin.yaml``. Key is
+          ``<plugin-name>`` (e.g. ``disk-cleanup``).
+        * **Category** — ``<root>/<category>/<plugin-name>/plugin.yaml``,
+          where the ``<category>`` directory itself has no ``plugin.yaml``.
+          Key is ``<category>/<plugin-name>`` (e.g. ``image_gen/openai``).
+          Depth is capped at two segments.
+
+        *skip_names* is an optional allow-list of names to ignore at the
+        top level (kept for back-compat; the current call sites no longer
+        pass it now that categories are first-class).
+        """
+        return self._scan_directory_level(
+            path, source, skip_names=skip_names, prefix="", depth=0
+        )
+
+    def _scan_directory_level(
+        self,
+        path: Path,
+        source: str,
+        *,
+        skip_names: Optional[Set[str]],
+        prefix: str,
+        depth: int,
+    ) -> List[PluginManifest]:
+        """Recursive implementation of :meth:`_scan_directory`.
+
+        ``prefix`` is the category path already accumulated ("" at root,
+        "image_gen" one level in). ``depth`` is the recursion depth; we
+        cap at 2 so ``<root>/a/b/c/`` is ignored.
+        """
        manifests: List[PluginManifest] = []
        if not path.is_dir():
            return manifests
@@ -465,35 +669,88 @@ class PluginManager:
        for child in sorted(path.iterdir()):
            if not child.is_dir():
                continue
+            if depth == 0 and skip_names and child.name in skip_names:
+                continue
            manifest_file = child / "plugin.yaml"
            if not manifest_file.exists():
                manifest_file = child / "plugin.yml"
-            if not manifest_file.exists():
-                logger.debug("Skipping %s (no plugin.yaml)", child)
+
+            if manifest_file.exists():
+                manifest = self._parse_manifest(
+                    manifest_file, child, source, prefix
+                )
+                if manifest is not None:
+                    manifests.append(manifest)
                continue

-            try:
-                if yaml is None:
-                    logger.warning("PyYAML not installed – cannot load %s", manifest_file)
-                    continue
-                data = yaml.safe_load(manifest_file.read_text()) or {}
-                manifest = PluginManifest(
-                    name=data.get("name", child.name),
-                    version=str(data.get("version", "")),
-                    description=data.get("description", ""),
-                    author=data.get("author", ""),
-                    requires_env=data.get("requires_env", []),
-                    provides_tools=data.get("provides_tools", []),
-                    provides_hooks=data.get("provides_hooks", []),
-                    source=source,
-                    path=str(child),
+            # No manifest at this level. If we're still within the depth
+            # cap, treat this directory as a category namespace and recurse
+            # one level in looking for children with manifests.
+            if depth >= 1:
+                logger.debug("Skipping %s (no plugin.yaml, depth cap reached)", child)
+                continue
+
+            sub_prefix = f"{prefix}/{child.name}" if prefix else child.name
+            manifests.extend(
+                self._scan_directory_level(
+                    child,
+                    source,
+                    skip_names=None,
+                    prefix=sub_prefix,
+                    depth=depth + 1,
                )
-                manifests.append(manifest)
-            except Exception as exc:
-                logger.warning("Failed to parse %s: %s", manifest_file, exc)
+            )

        return manifests

+    def _parse_manifest(
+        self,
+        manifest_file: Path,
+        plugin_dir: Path,
+        source: str,
+        prefix: str,
+    ) -> Optional[PluginManifest]:
+        """Parse a single ``plugin.yaml`` into a :class:`PluginManifest`.
+
+        Returns ``None`` on parse failure (logs a warning).
+        """
+        try:
+            if yaml is None:
+                logger.warning("PyYAML not installed – cannot load %s", manifest_file)
+                return None
+            data = yaml.safe_load(manifest_file.read_text()) or {}
+
+            name = data.get("name", plugin_dir.name)
+            key = f"{prefix}/{plugin_dir.name}" if prefix else name
+
+            raw_kind = data.get("kind", "standalone")
+            if not isinstance(raw_kind, str):
+                raw_kind = "standalone"
+            kind = raw_kind.strip().lower()
+            if kind not in _VALID_PLUGIN_KINDS:
+                logger.warning(
+                    "Plugin %s: unknown kind '%s' (valid: %s); treating as 'standalone'",
+                    key, raw_kind, ", ".join(sorted(_VALID_PLUGIN_KINDS)),
+                )
+                kind = "standalone"
+
+            return PluginManifest(
+                name=name,
+                version=str(data.get("version", "")),
+                description=data.get("description", ""),
+                author=data.get("author", ""),
+                requires_env=data.get("requires_env", []),
+                provides_tools=data.get("provides_tools", []),
+                provides_hooks=data.get("provides_hooks", []),
+                source=source,
+                path=str(plugin_dir),
+                kind=kind,
+                key=key,
+            )
+        except Exception as exc:
+            logger.warning("Failed to parse %s: %s", manifest_file, exc)
+            return None
+
    # -----------------------------------------------------------------------
    # Entry-point scanning
    # -----------------------------------------------------------------------
@@ -516,6 +773,7 @@ class PluginManager:
                    name=ep.name,
                    source="entrypoint",
                    path=ep.value,
+                    key=ep.name,
                )
                manifests.append(manifest)
        except Exception as exc:
@@ -532,7 +790,7 @@ class PluginManager:
        loaded = LoadedPlugin(manifest=manifest)

        try:
-            if manifest.source in ("user", "project"):
+            if manifest.source in ("user", "project", "bundled"):
                module = self._load_directory_module(manifest)
            else:
                module = self._load_entrypoint_module(manifest)
@@ -577,10 +835,16 @@ class PluginManager:
            loaded.error = str(exc)
            logger.warning("Failed to load plugin '%s': %s", manifest.name, exc)

-        self._plugins[manifest.name] = loaded
+        self._plugins[manifest.key or manifest.name] = loaded

    def _load_directory_module(self, manifest: PluginManifest) -> types.ModuleType:
-        """Import a directory-based plugin as ``hermes_plugins.<name>``."""
+        """Import a directory-based plugin as ``hermes_plugins.<slug>``.
+
+        The module slug is derived from ``manifest.key`` so category-namespaced
+        plugins (``image_gen/openai``) import as
+        ``hermes_plugins.image_gen__openai`` without colliding with any
+        future ``tts/openai``.
+        """
        plugin_dir = Path(manifest.path)  # type: ignore[arg-type]
        init_file = plugin_dir / "__init__.py"
        if not init_file.exists():
@@ -593,7 +857,9 @@ class PluginManager:
            ns_pkg.__package__ = _NS_PARENT
            sys.modules[_NS_PARENT] = ns_pkg

-        module_name = f"{_NS_PARENT}.{manifest.name.replace('-', '_')}"
+        key = manifest.key or manifest.name
+        slug = key.replace("/", "__").replace("-", "_")
+        module_name = f"{_NS_PARENT}.{slug}"
        spec = importlib.util.spec_from_file_location(
            module_name,
            init_file,
@@ -674,10 +940,12 @@ class PluginManager:
    def list_plugins(self) -> List[Dict[str, Any]]:
        """Return a list of info dicts for all discovered plugins."""
        result: List[Dict[str, Any]] = []
-        for name, loaded in sorted(self._plugins.items()):
+        for key, loaded in sorted(self._plugins.items()):
            result.append(
                {
-                    "name": name,
+                    "name": loaded.manifest.name,
+                    "key": loaded.manifest.key or loaded.manifest.name,
+                    "kind": loaded.manifest.kind,
                    "version": loaded.manifest.version,
                    "description": loaded.manifest.description,
                    "source": loaded.manifest.source,
@@ -781,23 +1049,31 @@ def get_pre_tool_call_block_message(
    return None


+def _ensure_plugins_discovered() -> PluginManager:
+    """Return the global manager after running idempotent plugin discovery."""
+    manager = get_plugin_manager()
+    manager.discover_and_load()
+    return manager
+
+
 def get_plugin_context_engine():
    """Return the plugin-registered context engine, or None."""
-    return get_plugin_manager()._context_engine
+    return _ensure_plugins_discovered()._context_engine


 def get_plugin_command_handler(name: str) -> Optional[Callable]:
    """Return the handler for a plugin-registered slash command, or ``None``."""
-    entry = get_plugin_manager()._plugin_commands.get(name)
+    entry = _ensure_plugins_discovered()._plugin_commands.get(name)
    return entry["handler"] if entry else None


 def get_plugin_commands() -> Dict[str, dict]:
    """Return the full plugin commands dict (name → {handler, description, plugin}).

-    Safe to call before discovery — returns an empty dict if no plugins loaded.
+    Triggers idempotent plugin discovery so callers can use plugin commands
+    before any explicit discover_plugins() call.
    """
-    return get_plugin_manager()._plugin_commands
+    return _ensure_plugins_discovered()._plugin_commands


 def get_plugin_toolsets() -> List[tuple]:
@@ -15,6 +15,7 @@ import shutil
 import subprocess
 import sys
 from pathlib import Path
+from typing import Optional

 from hermes_constants import get_hermes_home

@@ -281,8 +282,16 @@ def _require_installed_plugin(name: str, plugins_dir: Path, console) -> Path:
 # ---------------------------------------------------------------------------


-def cmd_install(identifier: str, force: bool = False) -> None:
-    """Install a plugin from a Git URL or owner/repo shorthand."""
+def cmd_install(
+    identifier: str,
+    force: bool = False,
+    enable: Optional[bool] = None,
+) -> None:
+    """Install a plugin from a Git URL or owner/repo shorthand.
+
+    After install, prompt "Enable now? [y/N]" unless *enable* is provided
+    (True = auto-enable without prompting, False = install disabled).
+    """
    import tempfile
    from rich.console import Console

@@ -391,6 +400,40 @@ def cmd_install(identifier: str, force: bool = False) -> None:

    _display_after_install(target, identifier)

+    # Determine the canonical plugin name for enable-list bookkeeping.
+    installed_name = installed_manifest.get("name") or target.name
+
+    # Decide whether to enable: explicit flag > interactive prompt > default off
+    should_enable = enable
+    if should_enable is None:
+        # Interactive prompt unless stdin isn't a TTY (scripted install).
+        if sys.stdin.isatty() and sys.stdout.isatty():
+            try:
+                answer = input(
+                    f"  Enable '{installed_name}' now? [y/N]: "
+                ).strip().lower()
+                should_enable = answer in ("y", "yes")
+            except (EOFError, KeyboardInterrupt):
+                should_enable = False
+        else:
+            should_enable = False
+
+    if should_enable:
+        enabled = _get_enabled_set()
+        disabled = _get_disabled_set()
+        enabled.add(installed_name)
+        disabled.discard(installed_name)
+        _save_enabled_set(enabled)
+        _save_disabled_set(disabled)
+        console.print(
+            f"[green]✓[/green] Plugin [bold]{installed_name}[/bold] enabled."
+        )
+    else:
+        console.print(
+            f"[dim]Plugin installed but not enabled. "
+            f"Run `hermes plugins enable {installed_name}` to activate.[/dim]"
+        )
+
    console.print("[dim]Restart the gateway for the plugin to take effect:[/dim]")
    console.print("[dim]  hermes gateway restart[/dim]")
    console.print()
@@ -468,7 +511,11 @@ def cmd_remove(name: str) -> None:


 def _get_disabled_set() -> set:
-    """Read the disabled plugins set from config.yaml."""
+    """Read the disabled plugins set from config.yaml.
+
+    An explicit deny-list. A plugin name here never loads, even if also
+    listed in ``plugins.enabled``.
+    """
    try:
        from hermes_cli.config import load_config
        config = load_config()
@@ -488,103 +535,196 @@ def _save_disabled_set(disabled: set) -> None:
    save_config(config)


+def _get_enabled_set() -> set:
+    """Read the enabled plugins allow-list from config.yaml.
+
+    Plugins are opt-in: only names here are loaded. Returns ``set()`` if
+    the key is missing (same behaviour as "nothing enabled yet").
+    """
+    try:
+        from hermes_cli.config import load_config
+        config = load_config()
+        plugins_cfg = config.get("plugins", {})
+        if not isinstance(plugins_cfg, dict):
+            return set()
+        enabled = plugins_cfg.get("enabled", [])
+        return set(enabled) if isinstance(enabled, list) else set()
+    except Exception:
+        return set()
+
+
+def _save_enabled_set(enabled: set) -> None:
+    """Write the enabled plugins list to config.yaml."""
+    from hermes_cli.config import load_config, save_config
+    config = load_config()
+    if "plugins" not in config:
+        config["plugins"] = {}
+    config["plugins"]["enabled"] = sorted(enabled)
+    save_config(config)
+
+
 def cmd_enable(name: str) -> None:
-    """Enable a previously disabled plugin."""
+    """Add a plugin to the enabled allow-list (and remove it from disabled)."""
    from rich.console import Console

    console = Console()
-    plugins_dir = _plugins_dir()
-
-    # Verify the plugin exists
-    target = plugins_dir / name
-    if not target.is_dir():
-        console.print(f"[red]Plugin '{name}' is not installed.[/red]")
+    # Discover the plugin — check installed (user) AND bundled.
+    if not _plugin_exists(name):
+        console.print(f"[red]Plugin '{name}' is not installed or bundled.[/red]")
        sys.exit(1)

+    enabled = _get_enabled_set()
    disabled = _get_disabled_set()
-    if name not in disabled:
+
+    if name in enabled and name not in disabled:
        console.print(f"[dim]Plugin '{name}' is already enabled.[/dim]")
        return

+    enabled.add(name)
    disabled.discard(name)
+    _save_enabled_set(enabled)
    _save_disabled_set(disabled)
-    console.print(f"[green]✓[/green] Plugin [bold]{name}[/bold] enabled. Takes effect on next session.")
+    console.print(
+        f"[green]✓[/green] Plugin [bold]{name}[/bold] enabled. "
+        "Takes effect on next session."
+    )


 def cmd_disable(name: str) -> None:
-    """Disable a plugin without removing it."""
+    """Remove a plugin from the enabled allow-list (and add to disabled)."""
    from rich.console import Console

    console = Console()
-    plugins_dir = _plugins_dir()
-
-    # Verify the plugin exists
-    target = plugins_dir / name
-    if not target.is_dir():
-        console.print(f"[red]Plugin '{name}' is not installed.[/red]")
+    if not _plugin_exists(name):
+        console.print(f"[red]Plugin '{name}' is not installed or bundled.[/red]")
        sys.exit(1)

+    enabled = _get_enabled_set()
    disabled = _get_disabled_set()
-    if name in disabled:
+
+    if name not in enabled and name in disabled:
        console.print(f"[dim]Plugin '{name}' is already disabled.[/dim]")
        return

+    enabled.discard(name)
    disabled.add(name)
+    _save_enabled_set(enabled)
    _save_disabled_set(disabled)
-    console.print(f"[yellow]\u2298[/yellow] Plugin [bold]{name}[/bold] disabled. Takes effect on next session.")
+    console.print(
+        f"[yellow]\u2298[/yellow] Plugin [bold]{name}[/bold] disabled. "
+        "Takes effect on next session."
+    )


-def cmd_list() -> None:
-    """List installed plugins."""
-    from rich.console import Console
-    from rich.table import Table
+def _plugin_exists(name: str) -> bool:
+    """Return True if a plugin with *name* is installed (user) or bundled."""
+    # Installed: directory name or manifest name match in user plugins dir
+    user_dir = _plugins_dir()
+    if user_dir.is_dir():
+        if (user_dir / name).is_dir():
+            return True
+        for child in user_dir.iterdir():
+            if not child.is_dir():
+                continue
+            manifest = _read_manifest(child)
+            if manifest.get("name") == name:
+                return True
+    # Bundled: <repo>/plugins/<name>/
+    from pathlib import Path as _P
+    import hermes_cli
+    repo_plugins = _P(hermes_cli.__file__).resolve().parent.parent / "plugins"
+    if repo_plugins.is_dir():
+        candidate = repo_plugins / name
+        if candidate.is_dir() and (
+            (candidate / "plugin.yaml").exists()
+            or (candidate / "plugin.yml").exists()
+        ):
+            return True
+    return False

+
+def _discover_all_plugins() -> list:
+    """Return a list of (name, version, description, source, dir_path) for
+    every plugin the loader can see — user + bundled + project.
+
+    Matches the ordering/dedup of ``PluginManager.discover_and_load``:
+    bundled first, then user, then project; user overrides bundled on
+    name collision.
+    """
    try:
        import yaml
    except ImportError:
        yaml = None

-    console = Console()
-    plugins_dir = _plugins_dir()
+    seen: dict = {}  # name -> (name, version, description, source, path)

-    dirs = sorted(d for d in plugins_dir.iterdir() if d.is_dir())
-    if not dirs:
+    # Bundled (<repo>/plugins/<name>/), excluding memory/ and context_engine/
+    import hermes_cli
+    repo_plugins = Path(hermes_cli.__file__).resolve().parent.parent / "plugins"
+    for base, source in ((repo_plugins, "bundled"), (_plugins_dir(), "user")):
+        if not base.is_dir():
+            continue
+        for d in sorted(base.iterdir()):
+            if not d.is_dir():
+                continue
+            if source == "bundled" and d.name in ("memory", "context_engine"):
+                continue
+            manifest_file = d / "plugin.yaml"
+            if not manifest_file.exists():
+                manifest_file = d / "plugin.yml"
+            if not manifest_file.exists():
+                continue
+            name = d.name
+            version = ""
+            description = ""
+            if yaml:
+                try:
+                    with open(manifest_file) as f:
+                        manifest = yaml.safe_load(f) or {}
+                    name = manifest.get("name", d.name)
+                    version = manifest.get("version", "")
+                    description = manifest.get("description", "")
+                except Exception:
+                    pass
+            # User plugins override bundled on name collision.
+            if name in seen and source == "bundled":
+                continue
+            src_label = source
+            if source == "user" and (d / ".git").exists():
+                src_label = "git"
+            seen[name] = (name, version, description, src_label, d)
+    return list(seen.values())
+
+
+def cmd_list() -> None:
+    """List all plugins (bundled + user) with enabled/disabled state."""
+    from rich.console import Console
+    from rich.table import Table
+
+    console = Console()
+    entries = _discover_all_plugins()
+    if not entries:
        console.print("[dim]No plugins installed.[/dim]")
        console.print("[dim]Install with:[/dim] hermes plugins install owner/repo")
        return

+    enabled = _get_enabled_set()
    disabled = _get_disabled_set()

-    table = Table(title="Installed Plugins", show_lines=False)
+    table = Table(title="Plugins", show_lines=False)
    table.add_column("Name", style="bold")
    table.add_column("Status")
    table.add_column("Version", style="dim")
    table.add_column("Description")
    table.add_column("Source", style="dim")

-    for d in dirs:
-        manifest_file = d / "plugin.yaml"
-        name = d.name
-        version = ""
-        description = ""
-        source = "local"
-
-        if manifest_file.exists() and yaml:
-            try:
-                with open(manifest_file) as f:
-                    manifest = yaml.safe_load(f) or {}
-                name = manifest.get("name", d.name)
-                version = manifest.get("version", "")
-                description = manifest.get("description", "")
-            except Exception:
-                pass
-
-        # Check if it's a git repo (installed via hermes plugins install)
-        if (d / ".git").exists():
-            source = "git"
-
-        is_disabled = name in disabled or d.name in disabled
-        status = "[red]disabled[/red]" if is_disabled else "[green]enabled[/green]"
+    for name, version, description, source, _dir in entries:
+        if name in disabled:
+            status = "[red]disabled[/red]"
+        elif name in enabled:
+            status = "[green]enabled[/green]"
+        else:
+            status = "[yellow]not enabled[/yellow]"
        table.add_row(name, status, str(version), description, source)

    console.print()
@@ -592,6 +732,7 @@ def cmd_list() -> None:
    console.print()
    console.print("[dim]Interactive toggle:[/dim] hermes plugins")
    console.print("[dim]Enable/disable:[/dim] hermes plugins enable/disable <name>")
+    console.print("[dim]Plugins are opt-in by default — only 'enabled' plugins load.[/dim]")


 # ---------------------------------------------------------------------------
@@ -742,41 +883,25 @@ def cmd_toggle() -> None:
    """Interactive composite UI — general plugins + provider plugin categories."""
    from rich.console import Console

-    try:
-        import yaml
-    except ImportError:
-        yaml = None
-
    console = Console()
-    plugins_dir = _plugins_dir()

-    # -- General plugins discovery --
-    dirs = sorted(d for d in plugins_dir.iterdir() if d.is_dir())
-    disabled = _get_disabled_set()
+    # -- General plugins discovery (bundled + user) --
+    entries = _discover_all_plugins()
+    enabled_set = _get_enabled_set()
+    disabled_set = _get_disabled_set()

    plugin_names = []
    plugin_labels = []
    plugin_selected = set()

-    for i, d in enumerate(dirs):
-        manifest_file = d / "plugin.yaml"
-        name = d.name
-        description = ""
-
-        if manifest_file.exists() and yaml:
-            try:
-                with open(manifest_file) as f:
-                    manifest = yaml.safe_load(f) or {}
-                name = manifest.get("name", d.name)
-                description = manifest.get("description", "")
-            except Exception:
-                pass
-
-        plugin_names.append(name)
+    for i, (name, _version, description, source, _d) in enumerate(entries):
        label = f"{name} \u2014 {description}" if description else name
+        if source == "bundled":
+            label = f"{label} [bundled]"
+        plugin_names.append(name)
        plugin_labels.append(label)
-
-        if name not in disabled and d.name not in disabled:
+        # Selected (enabled) when in enabled-set AND not in disabled-set
+        if name in enabled_set and name not in disabled_set:
            plugin_selected.add(i)

    # -- Provider categories --
@@ -804,10 +929,10 @@ def cmd_toggle() -> None:
    try:
        import curses
        _run_composite_ui(curses, plugin_names, plugin_labels, plugin_selected,
-                          disabled, categories, console)
+                          disabled_set, categories, console)
    except ImportError:
        _run_composite_fallback(plugin_names, plugin_labels, plugin_selected,
-                                disabled, categories, console)
+                                disabled_set, categories, console)


 def _run_composite_ui(curses, plugin_names, plugin_labels, plugin_selected,
@@ -1020,18 +1145,29 @@ def _run_composite_ui(curses, plugin_names, plugin_labels, plugin_selected,
    curses.wrapper(_draw)
    flush_stdin()

-    # Persist general plugin changes
-    new_disabled = set()
+    # Persist general plugin changes. The new allow-list is the set of
+    # plugin names that were checked; anything not checked is explicitly
+    # disabled (written to disabled-list) so it remains off even if the
+    # plugin code does something clever like auto-enable in the future.
+    new_enabled: set = set()
+    new_disabled: set = set(disabled)  # preserve existing disabled state for unseen plugins
    for i, name in enumerate(plugin_names):
-        if i not in chosen:
+        if i in chosen:
+            new_enabled.add(name)
+            new_disabled.discard(name)
+        else:
            new_disabled.add(name)

-    if new_disabled != disabled:
+    prev_enabled = _get_enabled_set()
+    enabled_changed = new_enabled != prev_enabled
+    disabled_changed = new_disabled != disabled
+
+    if enabled_changed or disabled_changed:
+        _save_enabled_set(new_enabled)
        _save_disabled_set(new_disabled)
-        enabled_count = len(plugin_names) - len(new_disabled)
        console.print(
-            f"\n[green]\u2713[/green] General plugins: {enabled_count} enabled, "
-            f"{len(new_disabled)} disabled."
+            f"\n[green]\u2713[/green] General plugins: {len(new_enabled)} enabled, "
+            f"{len(plugin_names) - len(new_enabled)} disabled."
        )
    elif n_plugins > 0:
        console.print("\n[dim]General plugins unchanged.[/dim]")
@@ -1078,11 +1214,17 @@ def _run_composite_fallback(plugin_names, plugin_labels, plugin_selected,
                return
            print()

-        new_disabled = set()
+        new_enabled: set = set()
+        new_disabled: set = set(disabled)
        for i, name in enumerate(plugin_names):
-            if i not in chosen:
+            if i in chosen:
+                new_enabled.add(name)
+                new_disabled.discard(name)
+            else:
                new_disabled.add(name)
-        if new_disabled != disabled:
+        prev_enabled = _get_enabled_set()
+        if new_enabled != prev_enabled or new_disabled != disabled:
+            _save_enabled_set(new_enabled)
            _save_disabled_set(new_disabled)

    # Provider categories
@@ -1108,7 +1250,17 @@ def plugins_command(args) -> None:
    action = getattr(args, "plugins_action", None)

    if action == "install":
-        cmd_install(args.identifier, force=getattr(args, "force", False))
+        # Map argparse tri-state: --enable=True, --no-enable=False, neither=None (prompt)
+        enable_arg = None
+        if getattr(args, "enable", False):
+            enable_arg = True
+        elif getattr(args, "no_enable", False):
+            enable_arg = False
+        cmd_install(
+            args.identifier,
+            force=getattr(args, "force", False),
+            enable=enable_arg,
+        )
    elif action == "update":
        cmd_update(args.name)
    elif action in ("remove", "rm", "uninstall"):
@@ -23,6 +23,8 @@ import logging
 from dataclasses import dataclass
 from typing import Any, Dict, List, Optional, Tuple

+from utils import base_url_host_matches, base_url_hostname
+
 logger = logging.getLogger(__name__)


@@ -425,6 +427,16 @@ def determine_api_mode(provider: str, base_url: str = "") -> str:
    """
    pdef = get_provider(provider)
    if pdef is not None:
+        # Even for known providers, check URL heuristics for special endpoints
+        # (e.g. kimi /coding endpoint needs anthropic_messages even on 'custom')
+        if base_url:
+            url_lower = base_url.rstrip("/").lower()
+            if "api.kimi.com/coding" in url_lower:
+                return "anthropic_messages"
+            if url_lower.endswith("/anthropic") or "api.anthropic.com" in url_lower:
+                return "anthropic_messages"
+            if "api.openai.com" in url_lower:
+                return "codex_responses"
        return TRANSPORT_TO_API_MODE.get(pdef.transport, "chat_completions")

    # Direct provider checks for providers not in HERMES_OVERLAYS
@@ -434,11 +446,14 @@ def determine_api_mode(provider: str, base_url: str = "") -> str:
    # URL-based heuristics for custom / unknown providers
    if base_url:
        url_lower = base_url.rstrip("/").lower()
-        if url_lower.endswith("/anthropic") or "api.anthropic.com" in url_lower:
+        hostname = base_url_hostname(base_url)
+        if url_lower.endswith("/anthropic") or hostname == "api.anthropic.com":
            return "anthropic_messages"
-        if "api.openai.com" in url_lower:
+        if hostname == "api.kimi.com" and "/coding" in url_lower:
+            return "anthropic_messages"
+        if hostname == "api.openai.com":
            return "codex_responses"
-        if "bedrock-runtime" in url_lower and "amazonaws.com" in url_lower:
+        if hostname.startswith("bedrock-runtime.") and base_url_host_matches(base_url, "amazonaws.com"):
            return "bedrock_converse"

    return "chat_completions"
@@ -29,6 +29,7 @@ from hermes_cli.auth import (
 )
 from hermes_cli.config import get_compatible_custom_providers, load_config
 from hermes_constants import OPENROUTER_BASE_URL
+from utils import base_url_host_matches, base_url_hostname


 def _normalize_custom_provider_name(value: str) -> str:
@@ -45,14 +46,20 @@ def _detect_api_mode_for_url(base_url: str) -> Optional[str]:
      protocol under a ``/anthropic`` suffix — treat those as
      ``anthropic_messages`` transport instead of the default
      ``chat_completions``.
+    - Kimi Code's ``api.kimi.com/coding`` endpoint also speaks the
+      Anthropic Messages protocol (the /coding route accepts Claude
+      Code's native request shape).
    """
    normalized = (base_url or "").strip().lower().rstrip("/")
-    if "api.x.ai" in normalized:
+    hostname = base_url_hostname(base_url)
+    if hostname == "api.x.ai":
        return "codex_responses"
-    if "api.openai.com" in normalized and "openrouter" not in normalized:
+    if hostname == "api.openai.com":
        return "codex_responses"
    if normalized.endswith("/anthropic"):
        return "anthropic_messages"
+    if hostname == "api.kimi.com" and "/coding" in normalized:
+        return "anthropic_messages"
    return None


@@ -203,7 +210,8 @@ def _resolve_runtime_from_pool_entry(
            api_mode = opencode_model_api_mode(provider, model_cfg.get("default", ""))
        else:
            # Auto-detect Anthropic-compatible endpoints (/anthropic suffix,
-            # api.openai.com → codex_responses, api.x.ai → codex_responses).
+            # Kimi /coding, api.openai.com → codex_responses, api.x.ai →
+            # codex_responses).
            detected = _detect_api_mode_for_url(base_url)
            if detected:
                api_mode = detected
@@ -480,7 +488,7 @@ def _resolve_openrouter_runtime(
    # When hitting a custom endpoint (e.g. Z.ai, local LLM), prefer
    # OPENAI_API_KEY so the OpenRouter key doesn't leak to an unrelated
    # provider (issues #420, #560).
-    _is_openrouter_url = "openrouter.ai" in base_url
+    _is_openrouter_url = base_url_host_matches(base_url, "openrouter.ai")
    if _is_openrouter_url:
        api_key_candidates = [
            explicit_api_key,
@@ -490,8 +498,12 @@ def _resolve_openrouter_runtime(
    else:
        # Custom endpoint: use api_key from config when using config base_url (#1760).
        # When the endpoint is Ollama Cloud, check OLLAMA_API_KEY — it's
-        # the canonical env var for ollama.com authentication.
-        _is_ollama_url = "ollama.com" in base_url.lower()
+        # the canonical env var for ollama.com authentication. Match on
+        # HOST, not substring — a custom base_url whose path contains
+        # "ollama.com" (e.g. http://127.0.0.1/ollama.com/v1) or whose
+        # hostname is a look-alike (ollama.com.attacker.test) must not
+        # receive the Ollama credential. See GHSA-76xc-57q6-vm5m.
+        _is_ollama_url = base_url_host_matches(base_url, "ollama.com")
        api_key_candidates = [
            explicit_api_key,
            (cfg_api_key if use_config_base_url else ""),
@@ -654,7 +666,8 @@ def _resolve_explicit_runtime(
            if configured_mode:
                api_mode = configured_mode
            else:
-                # Auto-detect Anthropic-compatible endpoints (/anthropic suffix).
+                # Auto-detect from URL (Anthropic /anthropic suffix,
+                # api.openai.com → Responses, Kimi /coding, etc.).
                detected = _detect_api_mode_for_url(base_url)
                if detected:
                    api_mode = detected
@@ -904,8 +917,7 @@ def resolve_runtime_provider(
                code="no_aws_credentials",
            )
        # Read bedrock-specific config from config.yaml
-        from hermes_cli.config import load_config as _load_bedrock_config
-        _bedrock_cfg = _load_bedrock_config().get("bedrock", {})
+        _bedrock_cfg = load_config().get("bedrock", {})
        # Region priority: config.yaml bedrock.region → env var → us-east-1
        region = (_bedrock_cfg.get("region") or "").strip() or resolve_bedrock_region()
        auth_source = resolve_aws_auth_env_var() or "aws-sdk-default-chain"
@@ -22,6 +22,7 @@ from typing import Optional, Dict, Any

 from hermes_cli.nous_subscription import get_nous_subscription_features
 from tools.tool_backend_helpers import managed_nous_tools_enabled
+from utils import base_url_hostname
 from hermes_constants import get_optional_skills_dir

 logger = logging.getLogger(__name__)
@@ -93,15 +94,15 @@ _DEFAULT_PROVIDER_MODELS = {
        "gemini-3-flash-preview", "gemini-3.1-flash-lite-preview",
    ],
    "zai": ["glm-5.1", "glm-5", "glm-4.7", "glm-4.5", "glm-4.5-flash"],
-    "kimi-coding": ["kimi-k2.5", "kimi-k2-thinking", "kimi-k2-turbo-preview"],
-    "kimi-coding-cn": ["kimi-k2.5", "kimi-k2-thinking", "kimi-k2-turbo-preview"],
+    "kimi-coding": ["kimi-k2.6", "kimi-k2.5", "kimi-k2-thinking", "kimi-k2-turbo-preview"],
+    "kimi-coding-cn": ["kimi-k2.6", "kimi-k2.5", "kimi-k2-thinking", "kimi-k2-turbo-preview"],
    "arcee": ["trinity-large-thinking", "trinity-large-preview", "trinity-mini"],
    "minimax": ["MiniMax-M2.7", "MiniMax-M2.5", "MiniMax-M2.1", "MiniMax-M2"],
    "minimax-cn": ["MiniMax-M2.7", "MiniMax-M2.5", "MiniMax-M2.1", "MiniMax-M2"],
    "ai-gateway": ["anthropic/claude-opus-4.6", "anthropic/claude-sonnet-4.6", "openai/gpt-5", "google/gemini-3-flash"],
    "kilocode": ["anthropic/claude-opus-4.6", "anthropic/claude-sonnet-4.6", "openai/gpt-5.4", "google/gemini-3-pro-preview", "google/gemini-3-flash-preview"],
    "opencode-zen": ["gpt-5.4", "gpt-5.3-codex", "claude-sonnet-4-6", "gemini-3-flash", "glm-5", "kimi-k2.5", "minimax-m2.7"],
-    "opencode-go": ["glm-5.1", "glm-5", "kimi-k2.5", "mimo-v2-pro", "mimo-v2-omni", "minimax-m2.5", "minimax-m2.7"],
+    "opencode-go": ["kimi-k2.6", "kimi-k2.5", "glm-5.1", "glm-5", "mimo-v2-pro", "mimo-v2-omni", "minimax-m2.5", "minimax-m2.7", "qwen3.6-plus", "qwen3.5-plus"],
    "huggingface": [
        "Qwen/Qwen3.5-397B-A17B", "Qwen/Qwen3-235B-A22B-Thinking-2507",
        "Qwen/Qwen3-Coder-480B-A35B-Instruct", "deepseek-ai/DeepSeek-R1-0528",
@@ -407,13 +408,36 @@ def _print_setup_summary(config: dict, hermes_home):
            ("Browser Automation", False, missing_browser_hint)
        )

-    # FAL (image generation)
+    # Image generation — FAL (direct or via Nous), or any plugin-registered
+    # provider (OpenAI, etc.)
    if subscription_features.image_gen.managed_by_nous:
        tool_status.append(("Image Generation (Nous subscription)", True, None))
    elif subscription_features.image_gen.available:
        tool_status.append(("Image Generation", True, None))
    else:
-        tool_status.append(("Image Generation", False, "FAL_KEY"))
+        # Fall back to probing plugin-registered providers so OpenAI-only
+        # setups don't show as "missing FAL_KEY".
+        _img_backend = None
+        try:
+            from agent.image_gen_registry import list_providers
+            from hermes_cli.plugins import _ensure_plugins_discovered
+
+            _ensure_plugins_discovered()
+            for _p in list_providers():
+                if _p.name == "fal":
+                    continue
+                try:
+                    if _p.is_available():
+                        _img_backend = _p.display_name
+                        break
+                except Exception:
+                    continue
+        except Exception:
+            pass
+        if _img_backend:
+            tool_status.append((f"Image Generation ({_img_backend})", True, None))
+        else:
+            tool_status.append(("Image Generation", False, "FAL_KEY or OPENAI_API_KEY"))

    # TTS — show configured provider
    tts_provider = config.get("tts", {}).get("provider", "edge")
@@ -433,7 +457,6 @@ def _print_setup_summary(config: dict, hermes_home):
        tool_status.append(("Text-to-Speech (Google Gemini)", True, None))
    elif tts_provider == "neutts":
        try:
-            import importlib.util
            neutts_ok = importlib.util.find_spec("neutts") is not None
        except Exception:
            neutts_ok = False
@@ -441,6 +464,16 @@ def _print_setup_summary(config: dict, hermes_home):
            tool_status.append(("Text-to-Speech (NeuTTS local)", True, None))
        else:
            tool_status.append(("Text-to-Speech (NeuTTS — not installed)", False, "run 'hermes setup tts'"))
+    elif tts_provider == "kittentts":
+        try:
+            import importlib.util
+            kittentts_ok = importlib.util.find_spec("kittentts") is not None
+        except Exception:
+            kittentts_ok = False
+        if kittentts_ok:
+            tool_status.append(("Text-to-Speech (KittenTTS local)", True, None))
+        else:
+            tool_status.append(("Text-to-Speech (KittenTTS — not installed)", False, "run 'hermes setup tts'"))
    else:
        tool_status.append(("Text-to-Speech (Edge TTS)", True, None))

@@ -803,7 +836,8 @@ def setup_model_provider(config: dict, *, quick: bool = False):
        elif _vision_idx == 1:  # OpenAI-compatible endpoint
            _base_url = prompt("  Base URL (blank for OpenAI)").strip() or "https://api.openai.com/v1"
            _api_key_label = "  API key"
-            if "api.openai.com" in _base_url.lower():
+            _is_native_openai = base_url_hostname(_base_url) == "api.openai.com"
+            if _is_native_openai:
                _api_key_label = "  OpenAI API key"
            _oai_key = prompt(_api_key_label, password=True).strip()
            if _oai_key:
@@ -811,7 +845,7 @@ def setup_model_provider(config: dict, *, quick: bool = False):
                # Save vision base URL to config (not .env — only secrets go there)
                _vaux = config.setdefault("auxiliary", {}).setdefault("vision", {})
                _vaux["base_url"] = _base_url
-                if "api.openai.com" in _base_url.lower():
+                if _is_native_openai:
                    _oai_vision_models = ["gpt-4o", "gpt-4o-mini", "gpt-4.1", "gpt-4.1-mini", "gpt-4.1-nano"]
                    _vm_choices = _oai_vision_models + ["Use default (gpt-4o-mini)"]
                    _vm_idx = prompt_choice("Select vision model:", _vm_choices, 0)
@@ -847,7 +881,6 @@ def setup_model_provider(config: dict, *, quick: bool = False):

 def _check_espeak_ng() -> bool:
    """Check if espeak-ng is installed."""
-    import shutil
    return shutil.which("espeak-ng") is not None or shutil.which("espeak") is not None


@@ -901,6 +934,31 @@ def _install_neutts_deps() -> bool:
        return False


+def _install_kittentts_deps() -> bool:
+    """Install KittenTTS dependencies with user approval. Returns True on success."""
+    import subprocess
+    import sys
+
+    wheel_url = (
+        "https://github.com/KittenML/KittenTTS/releases/download/"
+        "0.8.1/kittentts-0.8.1-py3-none-any.whl"
+    )
+    print()
+    print_info("Installing kittentts Python package (~25-80MB model downloaded on first use)...")
+    print()
+    try:
+        subprocess.run(
+            [sys.executable, "-m", "pip", "install", "-U", wheel_url, "soundfile", "--quiet"],
+            check=True, timeout=300,
+        )
+        print_success("kittentts installed successfully")
+        return True
+    except (subprocess.CalledProcessError, subprocess.TimeoutExpired) as e:
+        print_error(f"Failed to install kittentts: {e}")
+        print_info(f"Try manually: python -m pip install -U '{wheel_url}' soundfile")
+        return False
+
+
 def _setup_tts_provider(config: dict):
    """Interactive TTS provider selection with install flow for NeuTTS."""
    tts_config = config.get("tts", {})
@@ -916,6 +974,7 @@ def _setup_tts_provider(config: dict):
        "mistral": "Mistral Voxtral TTS",
        "gemini": "Google Gemini TTS",
        "neutts": "NeuTTS",
+        "kittentts": "KittenTTS",
    }
    current_label = provider_labels.get(current_provider, current_provider)

@@ -939,9 +998,10 @@ def _setup_tts_provider(config: dict):
            "Mistral Voxtral TTS (multilingual, native Opus, needs API key)",
            "Google Gemini TTS (30 prebuilt voices, prompt-controllable, needs API key)",
            "NeuTTS (local on-device, free, ~300MB model download)",
+            "KittenTTS (local on-device, free, lightweight ~25-80MB ONNX)",
        ]
    )
-    providers.extend(["edge", "elevenlabs", "openai", "xai", "minimax", "mistral", "gemini", "neutts"])
+    providers.extend(["edge", "elevenlabs", "openai", "xai", "minimax", "mistral", "gemini", "neutts", "kittentts"])
    choices.append(f"Keep current ({current_label})")
    keep_current_idx = len(choices) - 1
    idx = prompt_choice("Select TTS provider:", choices, keep_current_idx)
@@ -962,7 +1022,6 @@ def _setup_tts_provider(config: dict):
    if selected == "neutts":
        # Check if already installed
        try:
-            import importlib.util
            already_installed = importlib.util.find_spec("neutts") is not None
        except Exception:
            already_installed = False
@@ -1061,6 +1120,29 @@ def _setup_tts_provider(config: dict):
                print_warning("No API key provided. Falling back to Edge TTS.")
                selected = "edge"

+    elif selected == "kittentts":
+        # Check if already installed
+        try:
+            import importlib.util
+            already_installed = importlib.util.find_spec("kittentts") is not None
+        except Exception:
+            already_installed = False
+
+        if already_installed:
+            print_success("KittenTTS is already installed")
+        else:
+            print()
+            print_info("KittenTTS is lightweight (~25-80MB, CPU-only, no API key required).")
+            print_info("Voices: Jasper, Bella, Luna, Bruno, Rosie, Hugo, Kiki, Leo")
+            print()
+            if prompt_yes_no("Install KittenTTS now?", True):
+                if not _install_kittentts_deps():
+                    print_warning("KittenTTS installation incomplete. Falling back to Edge TTS.")
+                    selected = "edge"
+            else:
+                print_info("Skipping install. Set tts.provider to 'kittentts' after installing manually.")
+                selected = "edge"
+
    # Save the selection
    if "tts" not in config:
        config["tts"] = {}
@@ -1082,8 +1164,6 @@ def setup_tts(config: dict):
 def setup_terminal_backend(config: dict):
    """Configure the terminal execution backend."""
    import platform as _platform
-    import shutil
-
    print_header("Terminal Backend")
    print_info("Choose where Hermes runs shell commands and code.")
    print_info("This affects tool execution, file access, and isolation.")
@@ -2358,6 +2438,74 @@ def setup_tools(config: dict, first_install: bool = False):
 # =============================================================================


+def _model_section_has_credentials(config: dict) -> bool:
+    """Return True when any known inference provider has usable credentials.
+
+    Sources of truth:
+      * ``PROVIDER_REGISTRY`` in ``hermes_cli.auth`` — lists every supported
+        provider along with its ``api_key_env_vars``.
+      * ``active_provider`` in the auth store — covers OAuth device-code /
+        external-OAuth providers (Nous, Codex, Qwen, Gemini CLI, ...).
+      * The legacy OpenRouter aggregator env vars, which route generic
+        ``OPENAI_API_KEY`` / ``OPENROUTER_API_KEY`` values through OpenRouter.
+    """
+    try:
+        from hermes_cli.auth import get_active_provider
+        if get_active_provider():
+            return True
+    except Exception:
+        pass
+
+    try:
+        from hermes_cli.auth import PROVIDER_REGISTRY
+    except Exception:
+        PROVIDER_REGISTRY = {}  # type: ignore[assignment]
+
+    def _has_key(pconfig) -> bool:
+        for env_var in pconfig.api_key_env_vars:
+            # CLAUDE_CODE_OAUTH_TOKEN is set by Claude Code itself, not by
+            # the user — mirrors is_provider_explicitly_configured in auth.py.
+            if env_var == "CLAUDE_CODE_OAUTH_TOKEN":
+                continue
+            if get_env_value(env_var):
+                return True
+        return False
+
+    # Prefer the provider declared in config.yaml, avoids false positives
+    # from stray env vars (GH_TOKEN, etc.) when the user has already picked
+    # a different provider.
+    model_cfg = config.get("model") if isinstance(config, dict) else None
+    if isinstance(model_cfg, dict):
+        provider_id = (model_cfg.get("provider") or "").strip().lower()
+        if provider_id in PROVIDER_REGISTRY:
+            if _has_key(PROVIDER_REGISTRY[provider_id]):
+                return True
+        if provider_id == "openrouter":
+            for env_var in ("OPENROUTER_API_KEY", "OPENAI_API_KEY"):
+                if get_env_value(env_var):
+                    return True
+
+    # OpenRouter aggregator fallback (no provider declared in config).
+    for env_var in ("OPENROUTER_API_KEY", "OPENAI_API_KEY"):
+        if get_env_value(env_var):
+            return True
+
+    for pid, pconfig in PROVIDER_REGISTRY.items():
+        # Skip copilot in auto-detect: GH_TOKEN / GITHUB_TOKEN are
+        # commonly set for git tooling.  Mirrors resolve_provider in auth.py.
+        if pid == "copilot":
+            continue
+        if _has_key(pconfig):
+            return True
+    return False
+
+
+def _gateway_platform_short_label(label: str) -> str:
+    """Strip trailing parenthetical qualifiers from a gateway platform label."""
+    base = label.split("(", 1)[0].strip()
+    return base or label
+
+
 def _get_section_config_summary(config: dict, section_key: str) -> Optional[str]:
    """Return a short summary if a setup section is already configured, else None.

@@ -2366,20 +2514,7 @@ def _get_section_config_summary(config: dict, section_key: str) -> Optional[str]
    so that test patches on ``setup_mod.get_env_value`` take effect.
    """
    if section_key == "model":
-        has_key = bool(
-            get_env_value("OPENROUTER_API_KEY")
-            or get_env_value("OPENAI_API_KEY")
-            or get_env_value("ANTHROPIC_API_KEY")
-        )
-        if not has_key:
-            # Check for OAuth providers
-            try:
-                from hermes_cli.auth import get_active_provider
-                if get_active_provider():
-                    has_key = True
-            except Exception:
-                pass
-        if not has_key:
+        if not _model_section_has_credentials(config):
            return None
        model = config.get("model")
        if isinstance(model, str) and model.strip():
@@ -2397,37 +2532,11 @@ def _get_section_config_summary(config: dict, section_key: str) -> Optional[str]
        return f"max turns: {max_turns}"

    elif section_key == "gateway":
-        platforms = []
-        if get_env_value("TELEGRAM_BOT_TOKEN"):
-            platforms.append("Telegram")
-        if get_env_value("DISCORD_BOT_TOKEN"):
-            platforms.append("Discord")
-        if get_env_value("SLACK_BOT_TOKEN"):
-            platforms.append("Slack")
-        if get_env_value("SIGNAL_ACCOUNT"):
-            platforms.append("Signal")
-        if get_env_value("EMAIL_ADDRESS"):
-            platforms.append("Email")
-        if get_env_value("TWILIO_ACCOUNT_SID"):
-            platforms.append("SMS")
-        if get_env_value("MATRIX_ACCESS_TOKEN") or get_env_value("MATRIX_PASSWORD"):
-            platforms.append("Matrix")
-        if get_env_value("MATTERMOST_TOKEN"):
-            platforms.append("Mattermost")
-        if get_env_value("WHATSAPP_PHONE_NUMBER_ID"):
-            platforms.append("WhatsApp")
-        if get_env_value("DINGTALK_CLIENT_ID"):
-            platforms.append("DingTalk")
-        if get_env_value("FEISHU_APP_ID"):
-            platforms.append("Feishu")
-        if get_env_value("WECOM_BOT_ID"):
-            platforms.append("WeCom")
-        if get_env_value("WEIXIN_ACCOUNT_ID"):
-            platforms.append("Weixin")
-        if get_env_value("BLUEBUBBLES_SERVER_URL"):
-            platforms.append("BlueBubbles")
-        if get_env_value("WEBHOOK_ENABLED"):
-            platforms.append("Webhooks")
+        platforms = [
+            _gateway_platform_short_label(label)
+            for label, env_var, _ in _GATEWAY_PLATFORMS
+            if get_env_value(env_var)
+        ]
        if platforms:
            return ", ".join(platforms)
        return None  # No platforms configured — section must run
@@ -127,7 +127,7 @@ TIPS = [

    # --- Tools & Capabilities ---
    "execute_code runs Python scripts that call Hermes tools programmatically — results stay out of context.",
-    "delegate_task spawns up to 3 concurrent sub-agents with isolated contexts for parallel work.",
+    "delegate_task spawns up to 3 concurrent sub-agents by default (configurable via delegation.max_concurrent_children) with isolated contexts for parallel work.",
    "web_extract works on PDF URLs — pass any PDF link and it converts to markdown.",
    "search_files is ripgrep-backed and faster than grep — use it instead of terminal grep.",
    "patch uses 9 fuzzy matching strategies so minor whitespace differences won't break edits.",
@@ -24,7 +24,8 @@ from hermes_cli.nous_subscription import (
    apply_nous_managed_defaults,
    get_nous_subscription_features,
 )
-from tools.tool_backend_helpers import managed_nous_tools_enabled
+from tools.tool_backend_helpers import fal_key_is_configured, managed_nous_tools_enabled
+from utils import base_url_hostname

 logger = logging.getLogger(__name__)

@@ -181,6 +182,14 @@ TOOL_CATEGORIES = {
                ],
                "tts_provider": "gemini",
            },
+            {
+                "name": "KittenTTS",
+                "badge": "local · free",
+                "tag": "Lightweight local ONNX TTS (~25MB), no API key",
+                "env_vars": [],
+                "tts_provider": "kittentts",
+                "post_setup": "kittentts",
+            },
        ],
    },
    "web": {
@@ -422,6 +431,36 @@ def _run_post_setup(post_setup_key: str):
            _print_warning("    Node.js not found. Install Camofox via Docker:")
            _print_info("      docker run -p 9377:9377 -e CAMOFOX_PORT=9377 jo-inc/camofox-browser")

+    elif post_setup_key == "kittentts":
+        try:
+            __import__("kittentts")
+            _print_success("    kittentts is already installed")
+            return
+        except ImportError:
+            pass
+        import subprocess
+        _print_info("    Installing kittentts (~25-80MB model, CPU-only)...")
+        wheel_url = (
+            "https://github.com/KittenML/KittenTTS/releases/download/"
+            "0.8.1/kittentts-0.8.1-py3-none-any.whl"
+        )
+        try:
+            result = subprocess.run(
+                [sys.executable, "-m", "pip", "install", "-U", wheel_url, "soundfile", "--quiet"],
+                capture_output=True, text=True, timeout=300,
+            )
+            if result.returncode == 0:
+                _print_success("    kittentts installed")
+                _print_info("    Voices: Jasper, Bella, Luna, Bruno, Rosie, Hugo, Kiki, Leo")
+                _print_info("    Models: KittenML/kitten-tts-nano-0.8-int8 (25MB), micro (41MB), mini (80MB)")
+            else:
+                _print_warning("    kittentts install failed:")
+                _print_info(f"      {result.stderr.strip()[:300]}")
+                _print_info(f"    Run manually: python -m pip install -U '{wheel_url}' soundfile")
+        except subprocess.TimeoutExpired:
+            _print_warning("    kittentts install timed out (>5min)")
+            _print_info(f"    Run manually: python -m pip install -U '{wheel_url}' soundfile")
+
    elif post_setup_key == "rl_training":
        try:
            __import__("tinker_atropos")
@@ -546,6 +585,10 @@ def _get_platform_tools(
            ts_tools = set(resolve_toolset(ts_key))
            if ts_tools and ts_tools.issubset(all_tool_names):
                enabled_toolsets.add(ts_key)
+        default_off = set(_DEFAULT_OFF_TOOLSETS)
+        if platform in default_off:
+            default_off.remove(platform)
+        enabled_toolsets -= default_off

    # Plugin toolsets: enabled by default unless explicitly disabled.
    # A plugin toolset is "known" for a platform once `hermes tools`
@@ -804,6 +847,51 @@ def _configure_toolset(ts_key: str, config: dict):
        _configure_simple_requirements(ts_key)


+def _plugin_image_gen_providers() -> list[dict]:
+    """Build picker-row dicts from plugin-registered image gen providers.
+
+    Each returned dict looks like a regular ``TOOL_CATEGORIES`` provider
+    row but carries an ``image_gen_plugin_name`` marker so downstream
+    code (config writing, model picker) knows to route through the
+    plugin registry instead of the in-tree FAL backend.
+
+    FAL is skipped — it's already exposed by the hardcoded
+    ``TOOL_CATEGORIES["image_gen"]`` entries. When FAL gets ported to
+    a plugin in a follow-up PR, the hardcoded entries go away and this
+    function surfaces it alongside OpenAI automatically.
+    """
+    try:
+        from agent.image_gen_registry import list_providers
+        from hermes_cli.plugins import _ensure_plugins_discovered
+
+        _ensure_plugins_discovered()
+        providers = list_providers()
+    except Exception:
+        return []
+
+    rows: list[dict] = []
+    for provider in providers:
+        if getattr(provider, "name", None) == "fal":
+            # FAL has its own hardcoded rows today.
+            continue
+        try:
+            schema = provider.get_setup_schema()
+        except Exception:
+            continue
+        if not isinstance(schema, dict):
+            continue
+        rows.append(
+            {
+                "name": schema.get("name", provider.display_name),
+                "badge": schema.get("badge", ""),
+                "tag": schema.get("tag", ""),
+                "env_vars": schema.get("env_vars", []),
+                "image_gen_plugin_name": provider.name,
+            }
+        )
+    return rows
+
+
 def _visible_providers(cat: dict, config: dict) -> list[dict]:
    """Return provider entries visible for the current auth/config state."""
    features = get_nous_subscription_features(config)
@@ -814,6 +902,12 @@ def _visible_providers(cat: dict, config: dict) -> list[dict]:
        if provider.get("requires_nous_auth") and not features.nous_auth_present:
            continue
        visible.append(provider)
+
+    # Inject plugin-registered image_gen backends (OpenAI today, more
+    # later) so the picker lists them alongside FAL / Nous Subscription.
+    if cat.get("name") == "Image Generation":
+        visible.extend(_plugin_image_gen_providers())
+
    return visible


@@ -833,7 +927,24 @@ def _toolset_needs_configuration_prompt(ts_key: str, config: dict) -> bool:
        browser_cfg = config.get("browser", {})
        return not isinstance(browser_cfg, dict) or "cloud_provider" not in browser_cfg
    if ts_key == "image_gen":
-        return not get_env_value("FAL_KEY")
+        # Satisfied when the in-tree FAL backend is configured OR any
+        # plugin-registered image gen provider is available.
+        if fal_key_is_configured():
+            return False
+        try:
+            from agent.image_gen_registry import list_providers
+            from hermes_cli.plugins import _ensure_plugins_discovered
+
+            _ensure_plugins_discovered()
+            for provider in list_providers():
+                try:
+                    if provider.is_available():
+                        return False
+                except Exception:
+                    continue
+        except Exception:
+            pass
+        return True

    return not _toolset_has_keys(ts_key, config)

@@ -1052,6 +1163,88 @@ def _configure_imagegen_model(backend_name: str, config: dict) -> None:
    _print_success(f"  Model set to: {chosen}")


+def _plugin_image_gen_catalog(plugin_name: str):
+    """Return ``(catalog_dict, default_model_id)`` for a plugin provider.
+
+    ``catalog_dict`` is shaped like the legacy ``FAL_MODELS`` table —
+    ``{model_id: {"display", "speed", "strengths", "price", ...}}`` —
+    so the existing picker code paths work without change. Returns
+    ``({}, None)`` if the provider isn't registered or has no models.
+    """
+    try:
+        from agent.image_gen_registry import get_provider
+        from hermes_cli.plugins import _ensure_plugins_discovered
+
+        _ensure_plugins_discovered()
+        provider = get_provider(plugin_name)
+    except Exception:
+        return {}, None
+    if provider is None:
+        return {}, None
+    try:
+        models = provider.list_models() or []
+        default = provider.default_model()
+    except Exception:
+        return {}, None
+    catalog = {m["id"]: m for m in models if isinstance(m, dict) and "id" in m}
+    return catalog, default
+
+
+def _configure_imagegen_model_for_plugin(plugin_name: str, config: dict) -> None:
+    """Prompt the user to pick a model for a plugin-registered backend.
+
+    Writes selection to ``image_gen.model``. Mirrors
+    :func:`_configure_imagegen_model` but sources its catalog from the
+    plugin registry instead of :data:`IMAGEGEN_BACKENDS`.
+    """
+    catalog, default_model = _plugin_image_gen_catalog(plugin_name)
+    if not catalog:
+        return
+
+    cur_cfg = config.setdefault("image_gen", {})
+    if not isinstance(cur_cfg, dict):
+        cur_cfg = {}
+        config["image_gen"] = cur_cfg
+    current_model = cur_cfg.get("model") or default_model
+    if current_model not in catalog:
+        current_model = default_model
+
+    model_ids = list(catalog.keys())
+    ordered = [current_model] + [m for m in model_ids if m != current_model]
+
+    widths = {
+        "model": max(len(m) for m in model_ids),
+        "speed": max((len(catalog[m].get("speed", "")) for m in model_ids), default=6),
+        "strengths": max((len(catalog[m].get("strengths", "")) for m in model_ids), default=0),
+    }
+
+    print()
+    header = (
+        f"  {'Model':<{widths['model']}}  "
+        f"{'Speed':<{widths['speed']}}  "
+        f"{'Strengths':<{widths['strengths']}}  "
+        f"Price"
+    )
+    print(color(header, Colors.CYAN))
+
+    rows = []
+    for mid in ordered:
+        row = _format_imagegen_model_row(mid, catalog[mid], widths)
+        if mid == current_model:
+            row += "  ← currently in use"
+        rows.append(row)
+
+    idx = _prompt_choice(
+        f"  Choose {plugin_name} model:",
+        rows,
+        default=0,
+    )
+
+    chosen = ordered[idx]
+    cur_cfg["model"] = chosen
+    _print_success(f"  Model set to: {chosen}")
+
+
 def _configure_provider(provider: dict, config: dict):
    """Configure a single provider - prompt for API keys and set config."""
    env_vars = provider.get("env_vars", [])
@@ -1108,10 +1301,28 @@ def _configure_provider(provider: dict, config: dict):
        _print_success(f"  {provider['name']} - no configuration needed!")
        if managed_feature:
            _print_info("  Requests for this tool will be billed to your Nous subscription.")
+        # Plugin-registered image_gen provider: write image_gen.provider
+        # and route model selection to the plugin's own catalog.
+        plugin_name = provider.get("image_gen_plugin_name")
+        if plugin_name:
+            img_cfg = config.setdefault("image_gen", {})
+            if not isinstance(img_cfg, dict):
+                img_cfg = {}
+                config["image_gen"] = img_cfg
+            img_cfg["provider"] = plugin_name
+            _print_success(f"  image_gen.provider set to: {plugin_name}")
+            _configure_imagegen_model_for_plugin(plugin_name, config)
+            return
        # Imagegen backends prompt for model selection after backend pick.
        backend = provider.get("imagegen_backend")
        if backend:
            _configure_imagegen_model(backend, config)
+            # In-tree FAL is the only non-plugin backend today. Keep
+            # image_gen.provider clear so the dispatch shim falls through
+            # to the legacy FAL path.
+            img_cfg = config.setdefault("image_gen", {})
+            if isinstance(img_cfg, dict) and img_cfg.get("provider") not in (None, "", "fal"):
+                img_cfg["provider"] = "fal"
        return

    # Prompt for each required env var
@@ -1146,10 +1357,23 @@ def _configure_provider(provider: dict, config: dict):

    if all_configured:
        _print_success(f"  {provider['name']} configured!")
+        plugin_name = provider.get("image_gen_plugin_name")
+        if plugin_name:
+            img_cfg = config.setdefault("image_gen", {})
+            if not isinstance(img_cfg, dict):
+                img_cfg = {}
+                config["image_gen"] = img_cfg
+            img_cfg["provider"] = plugin_name
+            _print_success(f"  image_gen.provider set to: {plugin_name}")
+            _configure_imagegen_model_for_plugin(plugin_name, config)
+            return
        # Imagegen backends prompt for model selection after env vars are in.
        backend = provider.get("imagegen_backend")
        if backend:
            _configure_imagegen_model(backend, config)
+            img_cfg = config.setdefault("image_gen", {})
+            if isinstance(img_cfg, dict) and img_cfg.get("provider") not in (None, "", "fal"):
+                img_cfg["provider"] = "fal"


 def _configure_simple_requirements(ts_key: str):
@@ -1175,17 +1399,17 @@ def _configure_simple_requirements(ts_key: str):
                _print_warning("    Skipped")
        elif idx == 1:
            base_url = _prompt("    OPENAI_BASE_URL (blank for OpenAI)").strip() or "https://api.openai.com/v1"
-            key_label = "    OPENAI_API_KEY" if "api.openai.com" in base_url.lower() else "    API key"
+            is_native_openai = base_url_hostname(base_url) == "api.openai.com"
+            key_label = "    OPENAI_API_KEY" if is_native_openai else "    API key"
            api_key = _prompt(key_label, password=True)
            if api_key and api_key.strip():
                save_env_value("OPENAI_API_KEY", api_key.strip())
                # Save vision base URL to config (not .env — only secrets go there)
-                from hermes_cli.config import load_config, save_config
                _cfg = load_config()
                _aux = _cfg.setdefault("auxiliary", {}).setdefault("vision", {})
                _aux["base_url"] = base_url
                save_config(_cfg)
-                if "api.openai.com" in base_url.lower():
+                if is_native_openai:
                    save_env_value("AUXILIARY_VISION_MODEL", "gpt-4o-mini")
                _print_success("    Saved")
            else:
@@ -16,6 +16,7 @@ import json
 import logging
 import os
 import secrets
+import subprocess
 import sys
 import threading
 import time
@@ -114,6 +115,91 @@ def _require_token(request: Request) -> None:
        raise HTTPException(status_code=401, detail="Unauthorized")


+# Accepted Host header values for loopback binds. DNS rebinding attacks
+# point a victim browser at an attacker-controlled hostname (evil.test)
+# which resolves to 127.0.0.1 after a TTL flip — bypassing same-origin
+# checks because the browser now considers evil.test and our dashboard
+# "same origin". Validating the Host header at the app layer rejects any
+# request whose Host isn't one we bound for. See GHSA-ppp5-vxwm-4cf7.
+_LOOPBACK_HOST_VALUES: frozenset = frozenset({
+    "localhost", "127.0.0.1", "::1",
+})
+
+
+def _is_accepted_host(host_header: str, bound_host: str) -> bool:
+    """True if the Host header targets the interface we bound to.
+
+    Accepts:
+    - Exact bound host (with or without port suffix)
+    - Loopback aliases when bound to loopback
+    - Any host when bound to 0.0.0.0 (explicit opt-in to non-loopback,
+      no protection possible at this layer)
+    """
+    if not host_header:
+        return False
+    # Strip port suffix. IPv6 addresses use bracket notation:
+    #   [::1]         — no port
+    #   [::1]:9119    — with port
+    # Plain hosts/v4:
+    #   localhost:9119
+    #   127.0.0.1:9119
+    h = host_header.strip()
+    if h.startswith("["):
+        # IPv6 bracketed — port (if any) follows "]:"
+        close = h.find("]")
+        if close != -1:
+            host_only = h[1:close]  # strip brackets
+        else:
+            host_only = h.strip("[]")
+    else:
+        host_only = h.rsplit(":", 1)[0] if ":" in h else h
+    host_only = host_only.lower()
+
+    # 0.0.0.0 bind means operator explicitly opted into all-interfaces
+    # (requires --insecure per web_server.start_server). No Host-layer
+    # defence can protect that mode; rely on operator network controls.
+    if bound_host in ("0.0.0.0", "::"):
+        return True
+
+    # Loopback bind: accept the loopback names
+    bound_lc = bound_host.lower()
+    if bound_lc in _LOOPBACK_HOST_VALUES:
+        return host_only in _LOOPBACK_HOST_VALUES
+
+    # Explicit non-loopback bind: require exact host match
+    return host_only == bound_lc
+
+
+@app.middleware("http")
+async def host_header_middleware(request: Request, call_next):
+    """Reject requests whose Host header doesn't match the bound interface.
+
+    Defends against DNS rebinding: a victim browser on a localhost
+    dashboard is tricked into fetching from an attacker hostname that
+    TTL-flips to 127.0.0.1. CORS and same-origin checks don't help —
+    the browser now treats the attacker origin as same-origin with the
+    dashboard. Host-header validation at the app layer catches it.
+
+    See GHSA-ppp5-vxwm-4cf7.
+    """
+    # Store the bound host on app.state so this middleware can read it —
+    # set by start_server() at listen time.
+    bound_host = getattr(app.state, "bound_host", None)
+    if bound_host:
+        host_header = request.headers.get("host", "")
+        if not _is_accepted_host(host_header, bound_host):
+            return JSONResponse(
+                status_code=400,
+                content={
+                    "detail": (
+                        "Invalid Host header. Dashboard requests must use "
+                        "the hostname the server was bound to."
+                    ),
+                },
+            )
+    return await call_next(request)
+
+
@app.middleware("http")
 async def auth_middleware(request: Request, call_next):
    """Require the session token on all /api/ routes except the public list."""
@@ -476,6 +562,138 @@ async def get_status():
    }


+# ---------------------------------------------------------------------------
+# Gateway + update actions (invoked from the Status page).
+#
+# Both commands are spawned as detached subprocesses so the HTTP request
+# returns immediately.  stdin is closed (``DEVNULL``) so any stray ``input()``
+# calls fail fast with EOF rather than hanging forever.  stdout/stderr are
+# streamed to a per-action log file under ``~/.hermes/logs/<action>.log`` so
+# the dashboard can tail them back to the user.
+# ---------------------------------------------------------------------------
+
+_ACTION_LOG_DIR: Path = get_hermes_home() / "logs"
+
+# Short ``name`` (from the URL) → absolute log file path.
+_ACTION_LOG_FILES: Dict[str, str] = {
+    "gateway-restart": "gateway-restart.log",
+    "hermes-update": "hermes-update.log",
+}
+
+# ``name`` → most recently spawned Popen handle.  Used so ``status`` can
+# report liveness and exit code without shelling out to ``ps``.
+_ACTION_PROCS: Dict[str, subprocess.Popen] = {}
+
+
+def _spawn_hermes_action(subcommand: List[str], name: str) -> subprocess.Popen:
+    """Spawn ``hermes <subcommand>`` detached and record the Popen handle.
+
+    Uses the running interpreter's ``hermes_cli.main`` module so the action
+    inherits the same venv/PYTHONPATH the web server is using.
+    """
+    log_file_name = _ACTION_LOG_FILES[name]
+    _ACTION_LOG_DIR.mkdir(parents=True, exist_ok=True)
+    log_path = _ACTION_LOG_DIR / log_file_name
+    log_file = open(log_path, "ab", buffering=0)
+    log_file.write(
+        f"\n=== {name} started {time.strftime('%Y-%m-%d %H:%M:%S')} ===\n".encode()
+    )
+
+    cmd = [sys.executable, "-m", "hermes_cli.main", *subcommand]
+
+    popen_kwargs: Dict[str, Any] = {
+        "cwd": str(PROJECT_ROOT),
+        "stdin": subprocess.DEVNULL,
+        "stdout": log_file,
+        "stderr": subprocess.STDOUT,
+        "env": {**os.environ, "HERMES_NONINTERACTIVE": "1"},
+    }
+    if sys.platform == "win32":
+        popen_kwargs["creationflags"] = (
+            subprocess.CREATE_NEW_PROCESS_GROUP  # type: ignore[attr-defined]
+            | getattr(subprocess, "DETACHED_PROCESS", 0)
+        )
+    else:
+        popen_kwargs["start_new_session"] = True
+
+    proc = subprocess.Popen(cmd, **popen_kwargs)
+    _ACTION_PROCS[name] = proc
+    return proc
+
+
+def _tail_lines(path: Path, n: int) -> List[str]:
+    """Return the last ``n`` lines of ``path``.  Reads the whole file — fine
+    for our small per-action logs.  Binary-decoded with ``errors='replace'``
+    so log corruption doesn't 500 the endpoint."""
+    if not path.exists():
+        return []
+    try:
+        text = path.read_text(errors="replace")
+    except OSError:
+        return []
+    lines = text.splitlines()
+    return lines[-n:] if n > 0 else lines
+
+
+@app.post("/api/gateway/restart")
+async def restart_gateway():
+    """Kick off a ``hermes gateway restart`` in the background."""
+    try:
+        proc = _spawn_hermes_action(["gateway", "restart"], "gateway-restart")
+    except Exception as exc:
+        _log.exception("Failed to spawn gateway restart")
+        raise HTTPException(status_code=500, detail=f"Failed to restart gateway: {exc}")
+    return {
+        "ok": True,
+        "pid": proc.pid,
+        "name": "gateway-restart",
+    }
+
+
+@app.post("/api/hermes/update")
+async def update_hermes():
+    """Kick off ``hermes update`` in the background."""
+    try:
+        proc = _spawn_hermes_action(["update"], "hermes-update")
+    except Exception as exc:
+        _log.exception("Failed to spawn hermes update")
+        raise HTTPException(status_code=500, detail=f"Failed to start update: {exc}")
+    return {
+        "ok": True,
+        "pid": proc.pid,
+        "name": "hermes-update",
+    }
+
+
+@app.get("/api/actions/{name}/status")
+async def get_action_status(name: str, lines: int = 200):
+    """Tail an action log and report whether the process is still running."""
+    log_file_name = _ACTION_LOG_FILES.get(name)
+    if log_file_name is None:
+        raise HTTPException(status_code=404, detail=f"Unknown action: {name}")
+
+    log_path = _ACTION_LOG_DIR / log_file_name
+    tail = _tail_lines(log_path, min(max(lines, 1), 2000))
+
+    proc = _ACTION_PROCS.get(name)
+    if proc is None:
+        running = False
+        exit_code: Optional[int] = None
+        pid: Optional[int] = None
+    else:
+        exit_code = proc.poll()
+        running = exit_code is None
+        pid = proc.pid
+
+    return {
+        "name": name,
+        "running": running,
+        "exit_code": exit_code,
+        "pid": pid,
+        "lines": tail,
+    }
+
+
@app.get("/api/sessions")
 async def get_sessions(limit: int = 20, offset: int = 0):
    try:
@@ -1958,6 +2176,8 @@ async def update_config_raw(body: RawConfigUpdate):
@app.get("/api/analytics/usage")
 async def get_usage_analytics(days: int = 30):
    from hermes_state import SessionDB
+    from agent.insights import InsightsEngine
+
    db = SessionDB()
    try:
        cutoff = time.time() - (days * 86400)
@@ -1997,8 +2217,24 @@ async def get_usage_analytics(days: int = 30):
            FROM sessions WHERE started_at > ?
        """, (cutoff,))
        totals = dict(cur3.fetchone())
+        insights_report = InsightsEngine(db).generate(days=days)
+        skills = insights_report.get("skills", {
+            "summary": {
+                "total_skill_loads": 0,
+                "total_skill_edits": 0,
+                "total_skill_actions": 0,
+                "distinct_skills_used": 0,
+            },
+            "top_skills": [],
+        })

-        return {"daily": daily, "by_model": by_model, "totals": totals, "period_days": days}
+        return {
+            "daily": daily,
+            "by_model": by_model,
+            "totals": totals,
+            "period_days": days,
+            "skills": skills,
+        }
    finally:
        db.close()

@@ -2305,13 +2541,15 @@ def start_server(
            "authentication. Only use on trusted networks.", host,
        )

+    # Record the bound host so host_header_middleware can validate incoming
+    # Host headers against it. Defends against DNS rebinding (GHSA-ppp5-vxwm-4cf7).
+    app.state.bound_host = host
+
    if open_browser:
-        import threading
        import webbrowser

        def _open():
-            import time as _t
-            _t.sleep(1.0)
+            time.sleep(1.0)
            webbrowser.open(f"http://{host}:{port}")

        threading.Thread(target=_open, daemon=True).start()
@@ -1249,10 +1249,37 @@ class SessionDB:
            try:
                with self._lock:
                    ctx_cursor = self._conn.execute(
-                        """SELECT role, content FROM messages
-                           WHERE session_id = ? AND id >= ? - 1 AND id <= ? + 1
-                           ORDER BY id""",
-                        (match["session_id"], match["id"], match["id"]),
+                        """WITH target AS (
+                               SELECT session_id, timestamp, id
+                               FROM messages
+                               WHERE id = ?
+                           )
+                           SELECT role, content
+                           FROM (
+                               SELECT m.id, m.timestamp, m.role, m.content
+                               FROM messages m
+                               JOIN target t ON t.session_id = m.session_id
+                               WHERE (m.timestamp < t.timestamp)
+                                  OR (m.timestamp = t.timestamp AND m.id < t.id)
+                               ORDER BY m.timestamp DESC, m.id DESC
+                               LIMIT 1
+                           )
+                           UNION ALL
+                           SELECT role, content
+                           FROM messages
+                           WHERE id = ?
+                           UNION ALL
+                           SELECT role, content
+                           FROM (
+                               SELECT m.id, m.timestamp, m.role, m.content
+                               FROM messages m
+                               JOIN target t ON t.session_id = m.session_id
+                               WHERE (m.timestamp > t.timestamp)
+                                  OR (m.timestamp = t.timestamp AND m.id > t.id)
+                               ORDER BY m.timestamp ASC, m.id ASC
+                               LIMIT 1
+                           )""",
+                        (match["id"], match["id"]),
                    )
                    context_msgs = [
                        {"role": r["role"], "content": (r["content"] or "")[:200]}
@@ -47,12 +47,19 @@ def _effective_temperature_for_model(
    model: str,
    base_url: Optional[str] = None,
 ) -> Optional[float]:
-    """Return a fixed temperature for models with strict sampling contracts."""
+    """Return a fixed temperature for models with strict sampling contracts.
+
+    Returns ``None`` when the model manages temperature server-side (Kimi);
+    callers must omit the ``temperature`` kwarg entirely in that case.
+    """
    try:
-        from agent.auxiliary_client import _fixed_temperature_for_model
+        from agent.auxiliary_client import _fixed_temperature_for_model, OMIT_TEMPERATURE
    except Exception:
        return None
-    return _fixed_temperature_for_model(model, base_url)
+    result = _fixed_temperature_for_model(model, base_url)
+    if result is OMIT_TEMPERATURE:
+        return None  # caller must omit temperature
+    return result



@@ -7,7 +7,8 @@
    let
      hermes-agent = inputs.self.packages.${system}.default;
      hermes-tui = inputs.self.packages.${system}.tui;
-      packages = [ hermes-agent hermes-tui ];
+      hermes-web = inputs.self.packages.${system}.web;
+      packages = [ hermes-agent hermes-tui hermes-web ];
    in {
      devShells.default = pkgs.mkShell {
        inputsFrom = packages;
@@ -0,0 +1,217 @@
+# nix/lib.nix — Shared helpers for nix stuff
+{ pkgs, npm-lockfile-fix }:
+{
+  # Returns a buildNpmPackage-compatible attrs set that provides:
+  #   patchPhase          — ensures lockfile has exactly one trailing newline
+  #   nativeBuildInputs   — [ updateLockfileScript ] (list, prepend with ++ for more)
+  #   passthru.devShellHook  — stamp-checked npm install + hash auto-update
+  #   passthru.npmLockfile   — metadata for mkFixLockfiles
+  #
+  # NOTE: npmConfigHook runs `diff` between the source lockfile and the
+  # npm-deps cache lockfile. fetchNpmDeps preserves whatever trailing
+  # newlines the lockfile has. The patchPhase normalizes to exactly one
+  # trailing newline so both sides always match.
+  #
+  # Usage:
+  #   npm = hermesNpmLib.mkNpmPassthru { folder = "ui-tui"; attr = "tui"; pname = "hermes-tui"; };
+  #   pkgs.buildNpmPackage (npm // { ... } # or:
+  #   pkgs.buildNpmPackage ({ ... } // npm)
+  mkNpmPassthru =
+    {
+      folder, # repo-relative folder with package.json, e.g. "ui-tui"
+      attr, # flake package attr, e.g. "tui"
+      pname, # e.g. "hermes-tui"
+      nixFile ? "nix/${attr}.nix", # defaults to nix/<attr>.nix
+    }:
+    {
+      patchPhase = ''
+        runHook prePatch
+        # Normalize trailing newlines so source and npm-deps always match,
+        # regardless of what fetchNpmDeps preserves.
+        sed -i -z 's/\n*$/\n/' package-lock.json
+
+        # Make npmConfigHook's byte-for-byte diff newline-agnostic by
+        # replacing its hardcoded /nix/store/.../diff with a wrapper that
+        # normalizes trailing newlines on both sides before comparing.
+        mkdir -p "$TMPDIR/bin"
+        cat > "$TMPDIR/bin/diff" << DIFFWRAP
+        #!/bin/sh
+        f1=\$(mktemp) && sed -z 's/\n*$/\n/' "\$1" > "\$f1"
+        f2=\$(mktemp) && sed -z 's/\n*$/\n/' "\$2" > "\$f2"
+        ${pkgs.diffutils}/bin/diff "\$f1" "\$f2" && rc=0 || rc=\$?
+        rm -f "\$f1" "\$f2"
+        exit \$rc
+        DIFFWRAP
+        chmod +x "$TMPDIR/bin/diff"
+        export PATH="$TMPDIR/bin:$PATH"
+
+        runHook postPatch
+      '';
+
+      nativeBuildInputs = [
+        (pkgs.writeShellScriptBin "update_${attr}_lockfile" ''
+          set -euox pipefail
+
+          REPO_ROOT=$(git rev-parse --show-toplevel)
+
+          cd "$REPO_ROOT/${folder}"
+          rm -rf node_modules/
+          npm cache clean --force
+          CI=true npm install
+          ${pkgs.lib.getExe npm-lockfile-fix} ./package-lock.json
+
+          NIX_FILE="$REPO_ROOT/${nixFile}"
+          sed -i "s/hash = \"[^\"]*\";/hash = \"\";/" $NIX_FILE
+          NIX_OUTPUT=$(nix build .#${attr} 2>&1 || true)
+          NEW_HASH=$(echo "$NIX_OUTPUT" | grep 'got:' | awk '{print $2}')
+          echo got new hash $NEW_HASH
+          sed -i "s|hash = \"[^\"]*\";|hash = \"$NEW_HASH\";|" $NIX_FILE
+          nix build .#${attr}
+          echo "Updated npm hash in $NIX_FILE to $NEW_HASH"
+        '')
+      ];
+
+      passthru = {
+        devShellHook = pkgs.writeShellScript "npm-dev-hook-${pname}" ''
+          REPO_ROOT=$(git rev-parse --show-toplevel)
+
+          _hermes_npm_stamp() {
+            sha256sum "${folder}/package.json" "${folder}/package-lock.json" \
+              2>/dev/null | sha256sum | awk '{print $1}'
+          }
+          STAMP=".nix-stamps/${pname}"
+          STAMP_VALUE="$(_hermes_npm_stamp)"
+          if [ ! -f "$STAMP" ] || [ "$(cat "$STAMP")" != "$STAMP_VALUE" ]; then
+            echo "${pname}: installing npm dependencies..."
+            ( cd ${folder} && CI=true npm install --silent --no-fund --no-audit 2>/dev/null )
+
+            # Auto-update the nix hash so it stays in sync with the lockfile
+            echo "${pname}: prefetching npm deps..."
+            NIX_FILE="$REPO_ROOT/${nixFile}"
+            if NEW_HASH=$(${pkgs.lib.getExe pkgs.prefetch-npm-deps} "${folder}/package-lock.json" 2>/dev/null); then
+              sed -i "s|hash = \"sha256-[A-Za-z0-9+/=]+\"|hash = \"$NEW_HASH\";|" "$NIX_FILE"
+              echo "${pname}: updated hash to $NEW_HASH"
+            else
+              echo "${pname}: warning: prefetch failed, run 'nix run .#fix-lockfiles -- --apply' manually" >&2
+            fi
+
+            mkdir -p .nix-stamps
+            _hermes_npm_stamp > "$STAMP"
+          fi
+          unset -f _hermes_npm_stamp
+        '';
+
+        npmLockfile = {
+          inherit attr folder nixFile;
+        };
+      };
+    };
+
+  # Aggregate `fix-lockfiles` bin from a list of packages carrying
+  #   passthru.npmLockfile = { attr; folder; nixFile; };
+  # Invocations:
+  #   fix-lockfiles --check   # exit 1 if any hash is stale
+  #   fix-lockfiles --apply   # rewrite stale hashes in place
+  # Writes machine-readable fields (stale, changed, report) to $GITHUB_OUTPUT
+  # when set, so CI workflows can post a sticky PR comment directly.
+  mkFixLockfiles =
+    {
+      packages, # list of packages with passthru.npmLockfile
+    }:
+    let
+      entries = map (p: p.passthru.npmLockfile) packages;
+      entryArgs = pkgs.lib.concatMapStringsSep " " (e: "\"${e.attr}:${e.folder}:${e.nixFile}\"") entries;
+    in
+    pkgs.writeShellScriptBin "fix-lockfiles" ''
+      set -uox pipefail
+      MODE="''${1:---check}"
+      case "$MODE" in
+        --check|--apply) ;;
+        -h|--help)
+          echo "usage: fix-lockfiles [--check|--apply]"
+          exit 0 ;;
+        *)
+          echo "usage: fix-lockfiles [--check|--apply]" >&2
+          exit 2 ;;
+      esac
+
+      ENTRIES=(${entryArgs})
+
+      REPO_ROOT="$(git rev-parse --show-toplevel)"
+      cd "$REPO_ROOT"
+
+      # When running in GH Actions, emit Markdown links in the report pointing
+      # at the offending line of the nix file (and the lockfile) at the exact
+      # commit that was checked. LINK_SHA should be set by the workflow to the
+      # PR head SHA; falls back to GITHUB_SHA (which on pull_request is the
+      # test-merge commit, still browseable).
+      LINK_SERVER="''${GITHUB_SERVER_URL:-https://github.com}"
+      LINK_REPO="''${GITHUB_REPOSITORY:-}"
+      LINK_SHA="''${LINK_SHA:-''${GITHUB_SHA:-}}"
+
+      STALE=0
+      FIXED=0
+      REPORT=""
+
+      for entry in "''${ENTRIES[@]}"; do
+        IFS=":" read -r ATTR FOLDER NIX_FILE <<< "$entry"
+        echo "==> .#$ATTR ($FOLDER -> $NIX_FILE)"
+        OUTPUT=$(nix build ".#$ATTR.npmDeps" --no-link --print-build-logs 2>&1)
+        STATUS=$?
+        if [ "$STATUS" -eq 0 ]; then
+          echo "    ok"
+          continue
+        fi
+
+        NEW_HASH=$(echo "$OUTPUT" | awk '/got:/ {print $2; exit}')
+        if [ -z "$NEW_HASH" ]; then
+          echo "    build failed with no hash mismatch:" >&2
+          echo "$OUTPUT" | tail -40 >&2
+          exit 1
+        fi
+
+        HASH_LINE=$(grep -n 'hash = "sha256-' "$NIX_FILE" | head -1 | cut -d: -f1)
+        OLD_HASH=$(grep -oE 'hash = "sha256-[^"]+"' "$NIX_FILE" | head -1 \
+          | sed -E 's/hash = "(.*)"/\1/')
+        LOCK_FILE="$FOLDER/package-lock.json"
+        echo "    stale: $NIX_FILE:$HASH_LINE $OLD_HASH -> $NEW_HASH"
+        STALE=1
+
+        if [ -n "$LINK_REPO" ] && [ -n "$LINK_SHA" ]; then
+          NIX_URL="$LINK_SERVER/$LINK_REPO/blob/$LINK_SHA/$NIX_FILE#L$HASH_LINE"
+          LOCK_URL="$LINK_SERVER/$LINK_REPO/blob/$LINK_SHA/$LOCK_FILE"
+          REPORT+="- [\`$NIX_FILE:$HASH_LINE\`]($NIX_URL) (\`.#$ATTR\`): \`$OLD_HASH\` → \`$NEW_HASH\` — lockfile: [\`$LOCK_FILE\`]($LOCK_URL)"$'\n'
+        else
+          REPORT+="- \`$NIX_FILE:$HASH_LINE\` (\`.#$ATTR\`): \`$OLD_HASH\` → \`$NEW_HASH\`"$'\n'
+        fi
+
+        if [ "$MODE" = "--apply" ]; then
+          sed -i "s|hash = \"sha256-[^\"]*\";|hash = \"$NEW_HASH\";|" "$NIX_FILE"
+          nix build ".#$ATTR.npmDeps" --no-link --print-build-logs
+          FIXED=1
+          echo "    fixed"
+        fi
+      done
+
+      if [ -n "''${GITHUB_OUTPUT:-}" ]; then
+        {
+          [ "$STALE" -eq 1 ] && echo "stale=true" || echo "stale=false"
+          [ "$FIXED" -eq 1 ] && echo "changed=true" || echo "changed=false"
+          if [ -n "$REPORT" ]; then
+            echo "report<<REPORT_EOF"
+            printf "%s" "$REPORT"
+            echo "REPORT_EOF"
+          fi
+        } >> "$GITHUB_OUTPUT"
+      fi
+
+      if [ "$STALE" -eq 1 ] && [ "$MODE" = "--check" ]; then
+        echo
+        echo "Stale lockfile hashes detected. Run:"
+        echo "  nix run .#fix-lockfiles -- --apply"
+        exit 1
+      fi
+
+      exit 0
+    '';
+}
@@ -8,10 +8,14 @@
        inherit (inputs) uv2nix pyproject-nix pyproject-build-systems;
      };

-      hermesTui = pkgs.callPackage ./tui.nix {
+      hermesNpmLib = pkgs.callPackage ./lib.nix {
        npm-lockfile-fix = inputs'.npm-lockfile-fix.packages.default;
      };

+      hermesTui = pkgs.callPackage ./tui.nix {
+        inherit hermesNpmLib;
+      };
+
      # Import bundled skills, excluding runtime caches
      bundledSkills = pkgs.lib.cleanSourceWith {
        src = ../skills;
@@ -19,7 +23,7 @@
      };

      hermesWeb = pkgs.callPackage ./web.nix {
-        npm-lockfile-fix = inputs'.npm-lockfile-fix.packages.default;
+        inherit hermesNpmLib;
      };

      runtimeDeps = with pkgs; [
@@ -111,6 +115,10 @@

        tui = hermesTui;
        web = hermesWeb;
+
+        fix-lockfiles = hermesNpmLib.mkFixLockfiles {
+          packages = [ hermesTui hermesWeb ];
+        };
      };
    };
 }
@@ -1,18 +1,18 @@
 # nix/tui.nix — Hermes TUI (Ink/React) compiled with tsc and bundled
-{ pkgs, npm-lockfile-fix, ... }:
+{ pkgs, hermesNpmLib, ... }:
 let
  src = ../ui-tui;
  npmDeps = pkgs.fetchNpmDeps {
    inherit src;
-    hash = "sha256-mG3vpgGi4ljt4X3XIf3I/5mIcm+rVTUAmx2DQ6YVA90=";
+    hash = "sha256-RU4qSHgJPMyfRSEJDzkG4+MReDZDc6QbTD2wisa5QE0=";
  };

+  npm = hermesNpmLib.mkNpmPassthru { folder = "ui-tui"; attr = "tui"; pname = "hermes-tui"; };
+
  packageJson = builtins.fromJSON (builtins.readFile (src + "/package.json"));
  version = packageJson.version;
-
-  npmLockHash = builtins.hashString "sha256" (builtins.readFile ../ui-tui/package-lock.json);
 in
-pkgs.buildNpmPackage {
+pkgs.buildNpmPackage (npm // {
  pname = "hermes-tui";
  inherit src npmDeps version;

@@ -37,41 +37,4 @@ pkgs.buildNpmPackage {

    runHook postInstall
  '';
-
-  nativeBuildInputs = [
-    (pkgs.writeShellScriptBin "update_tui_lockfile" ''
-      set -euox pipefail
-
-      # get root of repo
-      REPO_ROOT=$(git rev-parse --show-toplevel)
-
-      # cd into ui-tui and reinstall
-      cd "$REPO_ROOT/ui-tui"
-      rm -rf node_modules/
-      npm cache clean --force
-      CI=true npm install # ci env var to suppress annoying unicode install banner lag
-      ${pkgs.lib.getExe npm-lockfile-fix} ./package-lock.json
-
-      NIX_FILE="$REPO_ROOT/nix/tui.nix"
-      # compute the new hash
-      sed -i "s/hash = \"[^\"]*\";/hash = \"\";/" $NIX_FILE
-      NIX_OUTPUT=$(nix build .#tui 2>&1 || true)
-      NEW_HASH=$(echo "$NIX_OUTPUT" | grep 'got:' | awk '{print $2}') 
-      echo got new hash $NEW_HASH
-      sed -i "s|hash = \"[^\"]*\";|hash = \"$NEW_HASH\";|" $NIX_FILE
-      nix build .#tui
-      echo "Updated npm hash in $NIX_FILE to $NEW_HASH"
-    '')
-  ];
-
-  passthru.devShellHook = ''
-    STAMP=".nix-stamps/hermes-tui"
-    STAMP_VALUE="${npmLockHash}"
-    if [ ! -f "$STAMP" ] || [ "$(cat "$STAMP")" != "$STAMP_VALUE" ]; then
-      echo "hermes-tui: installing npm dependencies..."
-      cd ui-tui && CI=true npm install --silent --no-fund --no-audit 2>/dev/null && cd ..
-      mkdir -p .nix-stamps
-      echo "$STAMP_VALUE" > "$STAMP"
-    fi
-  '';
-}
+})
@@ -1,15 +1,15 @@
 # nix/web.nix — Hermes Web Dashboard (Vite/React) frontend build
-{ pkgs, npm-lockfile-fix, ... }:
+{ pkgs, hermesNpmLib, ... }:
 let
  src = ../web;
  npmDeps = pkgs.fetchNpmDeps {
    inherit src;
-    hash = "sha256-Y0pOzdFG8BLjfvCLmsvqYpjxFjAQabXp1i7X9W/cCU4=";
+    hash = "sha256-TS/vrCHbdvXkPcAPxImKzAd2pdDCrKlgYZkXBMQ+TEg=";
  };

-  npmLockHash = builtins.hashString "sha256" (builtins.readFile ../web/package-lock.json);
+  npm = hermesNpmLib.mkNpmPassthru { folder = "web"; attr = "web"; pname = "hermes-web"; };
 in
-pkgs.buildNpmPackage {
+pkgs.buildNpmPackage (npm // {
  pname = "hermes-web";
  version = "0.0.0";
  inherit src npmDeps;
@@ -26,38 +26,4 @@ pkgs.buildNpmPackage {
    cp -r dist $out
    runHook postInstall
  '';
-
-  nativeBuildInputs = [
-    (pkgs.writeShellScriptBin "update_web_lockfile" ''
-      set -euox pipefail
-
-      REPO_ROOT=$(git rev-parse --show-toplevel)
-
-      cd "$REPO_ROOT/web"
-      rm -rf node_modules/
-      npm cache clean --force
-      CI=true npm install
-      ${pkgs.lib.getExe npm-lockfile-fix} ./package-lock.json
-
-      NIX_FILE="$REPO_ROOT/nix/web.nix"
-      sed -i "s/hash = \"[^\"]*\";/hash = \"\";/" $NIX_FILE
-      NIX_OUTPUT=$(nix build .#web 2>&1 || true)
-      NEW_HASH=$(echo "$NIX_OUTPUT" | grep 'got:' | awk '{print $2}')
-      echo got new hash $NEW_HASH
-      sed -i "s|hash = \"[^\"]*\";|hash = \"$NEW_HASH\";|" $NIX_FILE
-      nix build .#web
-      echo "Updated npm hash in $NIX_FILE to $NEW_HASH"
-    '')
-  ];
-
-  passthru.devShellHook = ''
-    STAMP=".nix-stamps/hermes-web"
-    STAMP_VALUE="${npmLockHash}"
-    if [ ! -f "$STAMP" ] || [ "$(cat "$STAMP")" != "$STAMP_VALUE" ]; then
-      echo "hermes-web: installing npm dependencies..."
-      cd web && CI=true npm install --silent --no-fund --no-audit 2>/dev/null && cd ..
-      mkdir -p .nix-stamps
-      echo "$STAMP_VALUE" > "$STAMP"
-    fi
-  '';
-}
+})
@@ -0,0 +1,3 @@
+# Dogfood — Advanced QA & Testing Skills
+
+Specialized QA workflows that go beyond basic bug-finding. These skills use structured methodologies to surface UX friction, accessibility issues, and product-level problems that standard testing misses.
@@ -0,0 +1,190 @@
+---
+name: adversarial-ux-test
+description: Roleplay the most difficult, tech-resistant user for your product. Browse the app as that persona, find every UX pain point, then filter complaints through a pragmatism layer to separate real problems from noise. Creates actionable tickets from genuine issues only.
+version: 1.0.0
+author: Omni @ Comelse
+license: MIT
+metadata:
+  hermes:
+    tags: [qa, ux, testing, adversarial, dogfood, personas, user-testing]
+    related_skills: [dogfood]
+---
+
+# Adversarial UX Test
+
+Roleplay the worst-case user for your product — the person who hates technology, doesn't want your software, and will find every reason to complain. Then filter their feedback through a pragmatism layer to separate real UX problems from "I hate computers" noise.
+
+Think of it as an automated "mom test" — but angry.
+
+## Why This Works
+
+Most QA finds bugs. This finds **friction**. A technically correct app can still be unusable for real humans. The adversarial persona catches:
+- Confusing terminology that makes sense to developers but not users
+- Too many steps to accomplish basic tasks
+- Missing onboarding or "aha moments"
+- Accessibility issues (font size, contrast, click targets)
+- Cold-start problems (empty states, no demo content)
+- Paywall/signup friction that kills conversion
+
+The **pragmatism filter** (Phase 3) is what makes this useful instead of just entertaining. Without it, you'd add a "print this page" button to every screen because Grandpa can't figure out PDFs.
+
+## How to Use
+
+Tell the agent:
+```
+"Run an adversarial UX test on [URL]"
+"Be a grumpy [persona type] and test [app name]"
+"Do an asshole user test on my staging site"
+```
+
+You can provide a persona or let the agent generate one based on your product's target audience.
+
+## Step 1: Define the Persona
+
+If no persona is provided, generate one by answering:
+
+1. **Who is the HARDEST user for this product?** (age 50+, non-technical role, decades of experience doing it "the old way")
+2. **What is their tech comfort level?** (the lower the better — WhatsApp-only, paper notebooks, wife set up their email)
+3. **What is the ONE thing they need to accomplish?** (their core job, not your feature list)
+4. **What would make them give up?** (too many clicks, jargon, slow, confusing)
+5. **How do they talk when frustrated?** (blunt, sweary, dismissive, sighing)
+
+### Good Persona Example
+> **"Big Mick" McAllister** — 58-year-old S&C coach. Uses WhatsApp and that's it. His "spreadsheet" is a paper notebook. "If I can't figure it out in 10 seconds I'm going back to my notebook." Needs to log session results for 25 players. Hates small text, jargon, and passwords.
+
+### Bad Persona Example
+> "A user who doesn't like the app" — too vague, no constraints, no voice.
+
+The persona must be **specific enough to stay in character** for 20 minutes of testing.
+
+## Step 2: Become the Asshole (Browse as the Persona)
+
+1. Read any available project docs for app context and URLs
+2. **Fully inhabit the persona** — their frustrations, limitations, goals
+3. Navigate to the app using browser tools
+4. **Attempt the persona's ACTUAL TASKS** (not a feature tour):
+   - Can they do what they came to do?
+   - How many clicks/screens to accomplish it?
+   - What confuses them?
+   - What makes them angry?
+   - Where do they get lost?
+   - What would make them give up and go back to their old way?
+
+5. Test these friction categories:
+   - **First impression** — would they even bother past the landing page?
+   - **Core workflow** — the ONE thing they need to do most often
+   - **Error recovery** — what happens when they do something wrong?
+   - **Readability** — text size, contrast, information density
+   - **Speed** — does it feel faster than their current method?
+   - **Terminology** — any jargon they wouldn't understand?
+   - **Navigation** — can they find their way back? do they know where they are?
+
+6. Take screenshots of every pain point
+7. Check browser console for JS errors on every page
+
+## Step 3: The Rant (Write Feedback in Character)
+
+Write the feedback AS THE PERSONA — in their voice, with their frustrations. This is not a bug report. This is a real human venting.
+
+```
+[PERSONA NAME]'s Review of [PRODUCT]
+
+Overall: [Would they keep using it? Yes/No/Maybe with conditions]
+
+THE GOOD (grudging admission):
+- [things even they have to admit work]
+
+THE BAD (legitimate UX issues):
+- [real problems that would stop them from using the product]
+
+THE UGLY (showstoppers):
+- [things that would make them uninstall/cancel immediately]
+
+SPECIFIC COMPLAINTS:
+1. [Page/feature]: "[quote in persona voice]" — [what happened, expected]
+2. ...
+
+VERDICT: "[one-line persona quote summarizing their experience]"
+```
+
+## Step 4: The Pragmatism Filter (Critical — Do Not Skip)
+
+Step OUT of the persona. Evaluate each complaint as a product person:
+
+- **RED: REAL UX BUG** — Any user would have this problem, not just grumpy ones. Fix it.
+- **YELLOW: VALID BUT LOW PRIORITY** — Real issue but only for extreme users. Note it.
+- **WHITE: PERSONA NOISE** — "I hate computers" talking, not a product problem. Skip it.
+- **GREEN: FEATURE REQUEST** — Good idea hidden in the complaint. Consider it.
+
+### Filter Criteria
+1. Would a 35-year-old competent-but-busy user have the same complaint? → RED
+2. Is this a genuine accessibility issue (font size, contrast, click targets)? → RED
+3. Is this "I want it to work like paper" resistance to digital? → WHITE
+4. Is this a real workflow inefficiency the persona stumbled on? → YELLOW or RED
+5. Would fixing this add complexity for the 80% who are fine? → WHITE
+6. Does the complaint reveal a missing onboarding moment? → GREEN
+
+**This filter is MANDATORY.** Never ship raw persona complaints as tickets.
+
+## Step 5: Create Tickets
+
+For **RED** and **GREEN** items only:
+- Clear, actionable title
+- Include the persona's verbatim quote (entertaining + memorable)
+- The real UX issue underneath (objective)
+- A suggested fix (actionable)
+- Tag/label: "ux-review"
+
+For **YELLOW** items: one catch-all ticket with all notes.
+
+**WHITE** items appear in the report only. No tickets.
+
+**Max 10 tickets per session** — focus on the worst issues.
+
+## Step 6: Report
+
+Deliver:
+1. The persona rant (Step 3) — entertaining and visceral
+2. The filtered assessment (Step 4) — pragmatic and actionable
+3. Tickets created (Step 5) — with links
+4. Screenshots of key issues
+
+## Tips
+
+- **One persona per session.** Don't mix perspectives.
+- **Stay in character during Steps 2-3.** Break character only at Step 4.
+- **Test the CORE WORKFLOW first.** Don't get distracted by settings pages.
+- **Empty states are gold.** New user experience reveals the most friction.
+- **The best findings are RED items the persona found accidentally** while trying to do something else.
+- **If the persona has zero complaints, your persona is too tech-savvy.** Make them older, less patient, more set in their ways.
+- **Run this before demos, launches, or after shipping a batch of features.**
+- **Register as a NEW user when possible.** Don't use pre-seeded admin accounts — the cold start experience is where most friction lives.
+- **Zero WHITE items is a signal, not a failure.** If the pragmatism filter finds no noise, your product has real UX problems, not just a grumpy persona.
+- **Check known issues in project docs AFTER the test.** If the persona found a bug that's already in the known issues list, that's actually the most damning finding — it means the team knew about it but never felt the user's pain.
+- **Subscription/paywall testing is critical.** Test with expired accounts, not just active ones. The "what happens when you can't pay" experience reveals whether the product respects users or holds their data hostage.
+- **Count the clicks to accomplish the persona's ONE task.** If it's more than 5, that's almost always a RED finding regardless of persona tech level.
+
+## Example Personas by Industry
+
+These are starting points — customize for your specific product:
+
+| Product Type | Persona | Age | Key Trait |
+|-------------|---------|-----|-----------|
+| CRM | Retirement home director | 68 | Filing cabinet is the current CRM |
+| Photography SaaS | Rural wedding photographer | 62 | Books clients by phone, invoices on paper |
+| AI/ML Tool | Department store buyer | 55 | Burned by 3 failed tech startups |
+| Fitness App | Old-school gym coach | 58 | Paper notebook, thick fingers, bad eyes |
+| Accounting | Family bakery owner | 64 | Shoebox of receipts, hates subscriptions |
+| E-commerce | Market stall vendor | 60 | Cash only, smartphone is for calls |
+| Healthcare | Senior GP | 63 | Dictates notes, nurse handles the computer |
+| Education | Veteran teacher | 57 | Chalk and talk, worksheets in ring binders |
+
+## Rules
+
+- Stay in character during Steps 2-3
+- Be genuinely mean but fair — find real problems, not manufactured ones
+- The pragmatism filter (Step 4) is **MANDATORY**
+- Screenshots required for every complaint
+- Max 10 tickets per session
+- Test on staging/deployed app, not local dev
+- One persona, one session, one report
@@ -57,32 +57,32 @@ Use the `ddgs` command via `terminal` when it exists. This is the preferred path

 ```bash
 # Text search
-ddgs text -k "python async programming" -m 5
+ddgs text -q "python async programming" -m 5

 # News search
-ddgs news -k "artificial intelligence" -m 5
+ddgs news -q "artificial intelligence" -m 5

 # Image search
-ddgs images -k "landscape photography" -m 10
+ddgs images -q "landscape photography" -m 10

 # Video search
-ddgs videos -k "python tutorial" -m 5
+ddgs videos -q "python tutorial" -m 5

 # With region filter
-ddgs text -k "best restaurants" -m 5 -r us-en
+ddgs text -q "best restaurants" -m 5 -r us-en

 # Recent results only (d=day, w=week, m=month, y=year)
-ddgs text -k "latest AI news" -m 5 -t w
+ddgs text -q "latest AI news" -m 5 -t w

 # JSON output for parsing
-ddgs text -k "fastapi tutorial" -m 5 -o json
+ddgs text -q "fastapi tutorial" -m 5 -o json
 ```

 ### CLI Flags

 | Flag | Description | Example |
 |------|-------------|---------|
-| `-k` | Keywords (query) — **required** | `-k "search terms"` |
+| `-q` | Query — **required** | `-q "search terms"` |
 | `-m` | Max results | `-m 5` |
 | `-r` | Region | `-r us-en` |
 | `-t` | Time limit | `-t w` (week) |
@@ -189,7 +189,7 @@ DuckDuckGo returns titles, URLs, and snippets — not full page content. To get
 CLI example:

 ```bash
-ddgs text -k "fastapi deployment guide" -m 3 -o json
+ddgs text -q "fastapi deployment guide" -m 3 -o json
 ```

 Python example, only after verifying `ddgs` is installed in that runtime:
@@ -229,7 +229,7 @@ Then extract the best URL with `web_extract` or another content-retrieval tool.
 - **Do not assume the CLI exists**: Check `command -v ddgs` before using it.
 - **Do not assume `execute_code` can import `ddgs`**: `from ddgs import DDGS` may fail with `ModuleNotFoundError` unless that runtime was prepared separately.
 - **Package name**: The package is `ddgs` (previously `duckduckgo-search`). Install with `pip install ddgs`.
- **Don't confuse `-k` and `-m`** (CLI): `-k` is for keywords, `-m` is for max results count.
+- **Don't confuse `-q` and `-m`** (CLI): `-q` is for the query, `-m` is for max results count.
 - **Empty results**: If `ddgs` returns nothing, it may be rate-limited. Wait a few seconds and retry.

 ## Validated With
@@ -25,4 +25,4 @@ if ! command -v ddgs &> /dev/null; then
    exit 1
 fi

-ddgs text -k "$QUERY" -m "$MAX_RESULTS"
+ddgs text -q "$QUERY" -m "$MAX_RESULTS"
@@ -1069,6 +1069,7 @@
        }
      ],
      "license": "MIT",
+      "peer": true,
      "dependencies": {
        "baseline-browser-mapping": "^2.10.12",
        "caniuse-lite": "^1.0.30001782",
@@ -3911,6 +3912,7 @@
      "resolved": "https://registry.npmjs.org/playwright/-/playwright-1.59.1.tgz",
      "integrity": "sha512-C8oWjPR3F81yljW9o5OxcWzfh6avkVwDD2VYdwIGqTkl+OGFISgypqzfu7dOe4QNLL2aqcWBmI3PMtLIK233lw==",
      "license": "Apache-2.0",
+      "peer": true,
      "dependencies": {
        "playwright-core": "1.59.1"
      },
@@ -3929,6 +3931,7 @@
      "resolved": "https://registry.npmjs.org/playwright-core/-/playwright-core-1.59.1.tgz",
      "integrity": "sha512-HBV/RJg81z5BiiZ9yPzIiClYV/QMsDCKUyogwH9p3MCP6IYjUFu/MActgYAvK0oWyV9NlwM3GLBjADyWgydVyg==",
      "license": "Apache-2.0",
+      "peer": true,
      "bin": {
        "playwright-core": "cli.js"
      },
@@ -0,0 +1,51 @@
+# disk-cleanup
+
+Auto-tracks and cleans up ephemeral files created during Hermes Agent
+sessions — test scripts, temp outputs, cron logs, stale chrome profiles.
+Scoped strictly to `$HERMES_HOME` and `/tmp/hermes-*`.
+
+Originally contributed by [@LVT382009](https://github.com/LVT382009) as a
+skill in PR #12212.  Ported to the plugin system so the behaviour runs
+automatically via `post_tool_call` and `on_session_end` hooks — the agent
+never needs to remember to call a tool.
+
+## How it works
+
+| Hook | Behaviour |
+|---|---|
+| `post_tool_call` | When `write_file` / `terminal` / `patch` creates a file matching `test_*`, `tmp_*`, or `*.test.*` inside `HERMES_HOME`, track it silently as `test` / `temp` / `cron-output`. |
+| `on_session_end` | If any test files were auto-tracked during this turn, run `quick` cleanup (no prompts). |
+
+Deletion rules (same as the original PR):
+
+| Category | Threshold | Confirmation |
+|---|---|---|
+| `test` | every session end | Never |
+| `temp` | >7 days since tracked | Never |
+| `cron-output` | >14 days since tracked | Never |
+| empty dirs under HERMES_HOME | always | Never |
+| `research` | >30 days, beyond 10 newest | Always (deep only) |
+| `chrome-profile` | >14 days since tracked | Always (deep only) |
+| files >500 MB | never auto | Always (deep only) |
+
+## Slash command
+
+```
+/disk-cleanup status                     # breakdown + top-10 largest
+/disk-cleanup dry-run                    # preview without deleting
+/disk-cleanup quick                      # run safe cleanup now
+/disk-cleanup deep                       # quick + list items needing prompt
+/disk-cleanup track <path> <category>    # manual tracking
+/disk-cleanup forget <path>              # stop tracking
+```
+
+## Safety
+
+- `is_safe_path()` rejects anything outside `HERMES_HOME` or `/tmp/hermes-*`
+- Windows mounts (`/mnt/c` etc.) are rejected
+- The state directory `$HERMES_HOME/disk-cleanup/` is itself excluded
+- `$HERMES_HOME/logs/`, `memories/`, `sessions/`, `skills/`, `plugins/`,
+  and config files are never tracked
+- Backup/restore is scoped to `tracked.json` — the plugin never touches
+  agent logs
+- Atomic writes: `.tmp` → backup → rename
@@ -0,0 +1,316 @@
+"""disk-cleanup plugin — auto-cleanup of ephemeral Hermes session files.
+
+Wires three behaviours:
+
+1. ``post_tool_call`` hook — inspects ``write_file`` and ``terminal``
+   tool results for newly-created paths matching test/temp patterns
+   under ``HERMES_HOME`` and tracks them silently.  Zero agent
+   compliance required.
+
+2. ``on_session_end`` hook — when any test files were auto-tracked
+   during the just-finished turn, runs :func:`disk_cleanup.quick` and
+   logs a single line to ``$HERMES_HOME/disk-cleanup/cleanup.log``.
+
+3. ``/disk-cleanup`` slash command — manual ``status``, ``dry-run``,
+   ``quick``, ``deep``, ``track``, ``forget``.
+
+Replaces PR #12212's skill-plus-script design: the agent no longer
+needs to remember to run commands.
+"""
+
+from __future__ import annotations
+
+import logging
+import re
+import shlex
+import threading
+from pathlib import Path
+from typing import Any, Dict, Optional, Set
+
+from . import disk_cleanup as dg
+
+logger = logging.getLogger(__name__)
+
+
+# Per-task set of "test files newly tracked this turn".  Keyed by task_id
+# (or session_id as fallback) so on_session_end can decide whether to run
+# cleanup.  Guarded by a lock — post_tool_call can fire concurrently on
+# parallel tool calls.
+_recent_test_tracks: Dict[str, Set[str]] = {}
+_lock = threading.Lock()
+
+
+# Tool-call result shapes we can parse
+_WRITE_FILE_PATH_KEY = "path"
+_TERMINAL_PATH_REGEX = re.compile(r"(?:^|\s)(/[^\s'\"`]+|\~/[^\s'\"`]+)")
+
+
+# ---------------------------------------------------------------------------
+# Helpers
+# ---------------------------------------------------------------------------
+
+def _tracker_key(task_id: str, session_id: str) -> str:
+    return task_id or session_id or "default"
+
+
+def _record_track(task_id: str, session_id: str, path: Path, category: str) -> None:
+    """Record that we tracked *path* as *category* during this turn."""
+    if category != "test":
+        return
+    key = _tracker_key(task_id, session_id)
+    with _lock:
+        _recent_test_tracks.setdefault(key, set()).add(str(path))
+
+
+def _drain(task_id: str, session_id: str) -> Set[str]:
+    """Pop the set of test paths tracked during this turn."""
+    key = _tracker_key(task_id, session_id)
+    with _lock:
+        return _recent_test_tracks.pop(key, set())
+
+
+def _attempt_track(path_str: str, task_id: str, session_id: str) -> None:
+    """Best-effort auto-track. Never raises."""
+    try:
+        p = Path(path_str).expanduser()
+    except Exception:
+        return
+    if not p.exists():
+        return
+    category = dg.guess_category(p)
+    if category is None:
+        return
+    newly = dg.track(str(p), category, silent=True)
+    if newly:
+        _record_track(task_id, session_id, p, category)
+
+
+def _extract_paths_from_write_file(args: Dict[str, Any]) -> Set[str]:
+    path = args.get(_WRITE_FILE_PATH_KEY)
+    return {path} if isinstance(path, str) and path else set()
+
+
+def _extract_paths_from_patch(args: Dict[str, Any]) -> Set[str]:
+    # The patch tool creates new files via the `mode="patch"` path too, but
+    # most of its use is editing existing files — we only care about new
+    # ephemeral creations, so treat patch conservatively and only pick up
+    # the single-file `path` arg.  Track-then-cleanup is idempotent, so
+    # re-tracking an already-tracked file is a no-op (dedup in track()).
+    path = args.get("path")
+    return {path} if isinstance(path, str) and path else set()
+
+
+def _extract_paths_from_terminal(args: Dict[str, Any], result: str) -> Set[str]:
+    """Best-effort: pull candidate filesystem paths from a terminal command
+    and its output, then let ``guess_category`` / ``is_safe_path`` filter.
+    """
+    paths: Set[str] = set()
+    cmd = args.get("command") or ""
+    if isinstance(cmd, str) and cmd:
+        # Tokenise the command — catches `touch /tmp/hermes-x/test_foo.py`
+        try:
+            for tok in shlex.split(cmd, posix=True):
+                if tok.startswith(("/", "~")):
+                    paths.add(tok)
+        except ValueError:
+            pass
+    # Only scan the result text if it's a reasonable size (avoid 50KB dumps).
+    if isinstance(result, str) and len(result) < 4096:
+        for match in _TERMINAL_PATH_REGEX.findall(result):
+            paths.add(match)
+    return paths
+
+
+# ---------------------------------------------------------------------------
+# Hooks
+# ---------------------------------------------------------------------------
+
+def _on_post_tool_call(
+    tool_name: str = "",
+    args: Optional[Dict[str, Any]] = None,
+    result: Any = None,
+    task_id: str = "",
+    session_id: str = "",
+    tool_call_id: str = "",
+    **_: Any,
+) -> None:
+    """Auto-track ephemeral files created by recent tool calls."""
+    if not isinstance(args, dict):
+        return
+
+    candidates: Set[str] = set()
+    if tool_name == "write_file":
+        candidates = _extract_paths_from_write_file(args)
+    elif tool_name == "patch":
+        candidates = _extract_paths_from_patch(args)
+    elif tool_name == "terminal":
+        candidates = _extract_paths_from_terminal(args, result if isinstance(result, str) else "")
+    else:
+        return
+
+    for path_str in candidates:
+        _attempt_track(path_str, task_id, session_id)
+
+
+def _on_session_end(
+    session_id: str = "",
+    completed: bool = True,
+    interrupted: bool = False,
+    **_: Any,
+) -> None:
+    """Run quick cleanup if any test files were tracked during this turn."""
+    # Drain both task-level and session-level buckets.  In practice only one
+    # is populated per turn; the other is empty.
+    drained_session = _drain("", session_id)
+    # Also drain any task-scoped buckets that happen to exist.  This is a
+    # cheap sweep: if an agent spawned subagents (each with their own
+    # task_id) they'll have recorded into separate buckets; we want to
+    # cleanup them all at session end.
+    with _lock:
+        task_buckets = list(_recent_test_tracks.keys())
+    for key in task_buckets:
+        if key and key != session_id:
+            _recent_test_tracks.pop(key, None)
+
+    if not drained_session and not task_buckets:
+        return
+
+    try:
+        summary = dg.quick()
+    except Exception as exc:
+        logger.debug("disk-cleanup quick cleanup failed: %s", exc)
+        return
+
+    if summary["deleted"] or summary["empty_dirs"]:
+        dg._log(
+            f"AUTO_QUICK (session_end): deleted={summary['deleted']} "
+            f"dirs={summary['empty_dirs']} freed={dg.fmt_size(summary['freed'])}"
+        )
+
+
+# ---------------------------------------------------------------------------
+# Slash command
+# ---------------------------------------------------------------------------
+
+_HELP_TEXT = """\
+/disk-cleanup — ephemeral-file cleanup
+
+Subcommands:
+  status                     Per-category breakdown + top-10 largest
+  dry-run                    Preview what quick/deep would delete
+  quick                      Run safe cleanup now (no prompts)
+  deep                       Run quick, then list items that need prompts
+  track <path> <category>    Manually add a path to tracking
+  forget <path>              Stop tracking a path (does not delete)
+
+Categories: temp | test | research | download | chrome-profile | cron-output | other
+
+All operations are scoped to HERMES_HOME and /tmp/hermes-*.
+Test files are auto-tracked on write_file / terminal and auto-cleaned at session end.
+"""
+
+
+def _fmt_summary(summary: Dict[str, Any]) -> str:
+    base = (
+        f"[disk-cleanup] Cleaned {summary['deleted']} files + "
+        f"{summary['empty_dirs']} empty dirs, freed {dg.fmt_size(summary['freed'])}."
+    )
+    if summary.get("errors"):
+        base += f"\n  {len(summary['errors'])} error(s); see cleanup.log."
+    return base
+
+
+def _handle_slash(raw_args: str) -> Optional[str]:
+    argv = raw_args.strip().split()
+    if not argv or argv[0] in ("help", "-h", "--help"):
+        return _HELP_TEXT
+
+    sub = argv[0]
+
+    if sub == "status":
+        return dg.format_status(dg.status())
+
+    if sub == "dry-run":
+        auto, prompt = dg.dry_run()
+        auto_size = sum(i["size"] for i in auto)
+        prompt_size = sum(i["size"] for i in prompt)
+        lines = [
+            "Dry-run preview (nothing deleted):",
+            f"  Auto-delete : {len(auto)} files ({dg.fmt_size(auto_size)})",
+        ]
+        for item in auto:
+            lines.append(f"    [{item['category']}] {item['path']}")
+        lines.append(
+            f"  Needs prompt: {len(prompt)} files ({dg.fmt_size(prompt_size)})"
+        )
+        for item in prompt:
+            lines.append(f"    [{item['category']}] {item['path']}")
+        lines.append(
+            f"\n  Total potential: {dg.fmt_size(auto_size + prompt_size)}"
+        )
+        return "\n".join(lines)
+
+    if sub == "quick":
+        return _fmt_summary(dg.quick())
+
+    if sub == "deep":
+        # In-session deep can't prompt the user interactively — show what
+        # quick cleaned plus the items that WOULD need confirmation.
+        quick_summary = dg.quick()
+        _auto, prompt_items = dg.dry_run()
+        lines = [_fmt_summary(quick_summary)]
+        if prompt_items:
+            size = sum(i["size"] for i in prompt_items)
+            lines.append(
+                f"\n{len(prompt_items)} item(s) need confirmation "
+                f"({dg.fmt_size(size)}):"
+            )
+            for item in prompt_items:
+                lines.append(f"  [{item['category']}] {item['path']}")
+            lines.append(
+                "\nRun `/disk-cleanup forget <path>` to skip, or delete "
+                "manually via terminal."
+            )
+        return "\n".join(lines)
+
+    if sub == "track":
+        if len(argv) < 3:
+            return "Usage: /disk-cleanup track <path> <category>"
+        path_arg = argv[1]
+        category = argv[2]
+        if category not in dg.ALLOWED_CATEGORIES:
+            return (
+                f"Unknown category '{category}'. "
+                f"Allowed: {sorted(dg.ALLOWED_CATEGORIES)}"
+            )
+        if dg.track(path_arg, category, silent=True):
+            return f"Tracked {path_arg} as '{category}'."
+        return (
+            f"Not tracked (already present, missing, or outside HERMES_HOME): "
+            f"{path_arg}"
+        )
+
+    if sub == "forget":
+        if len(argv) < 2:
+            return "Usage: /disk-cleanup forget <path>"
+        n = dg.forget(argv[1])
+        return (
+            f"Removed {n} tracking entr{'y' if n == 1 else 'ies'} for {argv[1]}."
+            if n else f"Not found in tracking: {argv[1]}"
+        )
+
+    return f"Unknown subcommand: {sub}\n\n{_HELP_TEXT}"
+
+
+# ---------------------------------------------------------------------------
+# Plugin registration
+# ---------------------------------------------------------------------------
+
+def register(ctx) -> None:
+    ctx.register_hook("post_tool_call", _on_post_tool_call)
+    ctx.register_hook("on_session_end", _on_session_end)
+    ctx.register_command(
+        "disk-cleanup",
+        handler=_handle_slash,
+        description="Track and clean up ephemeral Hermes session files.",
+    )
@@ -0,0 +1,496 @@
+"""disk_cleanup — ephemeral file cleanup for Hermes Agent.
+
+Library module wrapping the deterministic cleanup rules written by
+@LVT382009 in PR #12212. The plugin ``__init__.py`` wires these
+functions into ``post_tool_call`` and ``on_session_end`` hooks so
+tracking and cleanup happen automatically — the agent never needs to
+call a tool or remember a skill.
+
+Rules:
+  - test files    → delete immediately at task end (age >= 0)
+  - temp files    → delete after 7 days
+  - cron-output   → delete after 14 days
+  - empty dirs    → always delete (under HERMES_HOME)
+  - research      → keep 10 newest, prompt for older (deep only)
+  - chrome-profile→ prompt after 14 days (deep only)
+  - >500 MB files → prompt always (deep only)
+
+Scope: strictly HERMES_HOME and /tmp/hermes-*
+Never touches: ~/.hermes/logs/ or any system directory.
+"""
+
+from __future__ import annotations
+
+import json
+import logging
+import shutil
+from datetime import datetime, timezone
+from pathlib import Path
+from typing import Any, Dict, List, Optional, Tuple
+
+try:
+    from hermes_constants import get_hermes_home
+except Exception:  # pragma: no cover — plugin may load before constants resolves
+    import os
+
+    def get_hermes_home() -> Path:  # type: ignore[no-redef]
+        val = (os.environ.get("HERMES_HOME") or "").strip()
+        return Path(val).resolve() if val else (Path.home() / ".hermes").resolve()
+
+
+logger = logging.getLogger(__name__)
+
+
+# ---------------------------------------------------------------------------
+# Paths
+# ---------------------------------------------------------------------------
+
+def get_state_dir() -> Path:
+    """State dir — separate from ``$HERMES_HOME/logs/``."""
+    return get_hermes_home() / "disk-cleanup"
+
+
+def get_tracked_file() -> Path:
+    return get_state_dir() / "tracked.json"
+
+
+def get_log_file() -> Path:
+    """Audit log — intentionally NOT under ``$HERMES_HOME/logs/``."""
+    return get_state_dir() / "cleanup.log"
+
+
+# ---------------------------------------------------------------------------
+# Path safety
+# ---------------------------------------------------------------------------
+
+def is_safe_path(path: Path) -> bool:
+    """Accept only paths under HERMES_HOME or ``/tmp/hermes-*``.
+
+    Rejects Windows mounts (``/mnt/c`` etc.) and any system directory.
+    """
+    hermes_home = get_hermes_home()
+    try:
+        path.resolve().relative_to(hermes_home)
+        return True
+    except (ValueError, OSError):
+        pass
+    # Allow /tmp/hermes-* explicitly
+    parts = path.parts
+    if len(parts) >= 3 and parts[1] == "tmp" and parts[2].startswith("hermes-"):
+        return True
+    return False
+
+
+# ---------------------------------------------------------------------------
+# Audit log
+# ---------------------------------------------------------------------------
+
+def _log(message: str) -> None:
+    try:
+        log_file = get_log_file()
+        log_file.parent.mkdir(parents=True, exist_ok=True)
+        ts = datetime.now(timezone.utc).strftime("%Y-%m-%d %H:%M:%S")
+        with open(log_file, "a") as f:
+            f.write(f"[{ts}] {message}\n")
+    except OSError:
+        # Never let the audit log break the agent loop.
+        pass
+
+
+# ---------------------------------------------------------------------------
+# tracked.json — atomic read/write, backup scoped to tracked.json only
+# ---------------------------------------------------------------------------
+
+def load_tracked() -> List[Dict[str, Any]]:
+    """Load tracked.json.  Restores from ``.bak`` on corruption."""
+    tf = get_tracked_file()
+    tf.parent.mkdir(parents=True, exist_ok=True)
+
+    if not tf.exists():
+        return []
+
+    try:
+        return json.loads(tf.read_text())
+    except (json.JSONDecodeError, ValueError):
+        bak = tf.with_suffix(".json.bak")
+        if bak.exists():
+            try:
+                data = json.loads(bak.read_text())
+                _log("WARN: tracked.json corrupted — restored from .bak")
+                return data
+            except Exception:
+                pass
+        _log("WARN: tracked.json corrupted, no backup — starting fresh")
+        return []
+
+
+def save_tracked(tracked: List[Dict[str, Any]]) -> None:
+    """Atomic write: ``.tmp`` → backup old → rename."""
+    tf = get_tracked_file()
+    tf.parent.mkdir(parents=True, exist_ok=True)
+    tmp = tf.with_suffix(".json.tmp")
+    tmp.write_text(json.dumps(tracked, indent=2))
+    if tf.exists():
+        shutil.copy2(tf, tf.with_suffix(".json.bak"))
+    tmp.replace(tf)
+
+
+# ---------------------------------------------------------------------------
+# Categories
+# ---------------------------------------------------------------------------
+
+ALLOWED_CATEGORIES = {
+    "temp", "test", "research", "download",
+    "chrome-profile", "cron-output", "other",
+}
+
+
+def fmt_size(n: float) -> str:
+    for unit in ("B", "KB", "MB", "GB", "TB"):
+        if n < 1024:
+            return f"{n:.1f} {unit}"
+        n /= 1024
+    return f"{n:.1f} PB"
+
+
+# ---------------------------------------------------------------------------
+# Track / forget
+# ---------------------------------------------------------------------------
+
+def track(path_str: str, category: str, silent: bool = False) -> bool:
+    """Register a file for tracking. Returns True if newly tracked."""
+    if category not in ALLOWED_CATEGORIES:
+        _log(f"WARN: unknown category '{category}', using 'other'")
+        category = "other"
+
+    path = Path(path_str).resolve()
+
+    if not path.exists():
+        _log(f"SKIP: {path} (does not exist)")
+        return False
+
+    if not is_safe_path(path):
+        _log(f"REJECT: {path} (outside HERMES_HOME)")
+        return False
+
+    size = path.stat().st_size if path.is_file() else 0
+    tracked = load_tracked()
+
+    # Deduplicate
+    if any(item["path"] == str(path) for item in tracked):
+        return False
+
+    tracked.append({
+        "path": str(path),
+        "timestamp": datetime.now(timezone.utc).isoformat(),
+        "category": category,
+        "size": size,
+    })
+    save_tracked(tracked)
+    _log(f"TRACKED: {path} ({category}, {fmt_size(size)})")
+    if not silent:
+        print(f"Tracked: {path} ({category}, {fmt_size(size)})")
+    return True
+
+
+def forget(path_str: str) -> int:
+    """Remove a path from tracking without deleting the file."""
+    p = Path(path_str).resolve()
+    tracked = load_tracked()
+    before = len(tracked)
+    tracked = [i for i in tracked if Path(i["path"]).resolve() != p]
+    removed = before - len(tracked)
+    if removed:
+        save_tracked(tracked)
+        _log(f"FORGOT: {p} ({removed} entries)")
+    return removed
+
+
+# ---------------------------------------------------------------------------
+# Dry run
+# ---------------------------------------------------------------------------
+
+def dry_run() -> Tuple[List[Dict], List[Dict]]:
+    """Return (auto_delete_list, needs_prompt_list) without touching files."""
+    tracked = load_tracked()
+    now = datetime.now(timezone.utc)
+
+    auto: List[Dict] = []
+    prompt: List[Dict] = []
+
+    for item in tracked:
+        p = Path(item["path"])
+        if not p.exists():
+            continue
+        age = (now - datetime.fromisoformat(item["timestamp"])).days
+        cat = item["category"]
+        size = item["size"]
+
+        if cat == "test":
+            auto.append(item)
+        elif cat == "temp" and age > 7:
+            auto.append(item)
+        elif cat == "cron-output" and age > 14:
+            auto.append(item)
+        elif cat == "research" and age > 30:
+            prompt.append(item)
+        elif cat == "chrome-profile" and age > 14:
+            prompt.append(item)
+        elif size > 500 * 1024 * 1024:
+            prompt.append(item)
+
+    return auto, prompt
+
+
+# ---------------------------------------------------------------------------
+# Quick cleanup
+# ---------------------------------------------------------------------------
+
+def quick() -> Dict[str, Any]:
+    """Safe deterministic cleanup — no prompts.
+
+    Returns: ``{"deleted": N, "empty_dirs": N, "freed": bytes,
+               "errors": [str, ...]}``.
+    """
+    tracked = load_tracked()
+    now = datetime.now(timezone.utc)
+    deleted = 0
+    freed = 0
+    new_tracked: List[Dict] = []
+    errors: List[str] = []
+
+    for item in tracked:
+        p = Path(item["path"])
+        cat = item["category"]
+
+        if not p.exists():
+            _log(f"STALE: {p} (removed from tracking)")
+            continue
+
+        age = (now - datetime.fromisoformat(item["timestamp"])).days
+
+        should_delete = (
+            cat == "test"
+            or (cat == "temp" and age > 7)
+            or (cat == "cron-output" and age > 14)
+        )
+
+        if should_delete:
+            try:
+                if p.is_file():
+                    p.unlink()
+                elif p.is_dir():
+                    shutil.rmtree(p)
+                freed += item["size"]
+                deleted += 1
+                _log(f"DELETED: {p} ({cat}, {fmt_size(item['size'])})")
+            except OSError as e:
+                _log(f"ERROR deleting {p}: {e}")
+                errors.append(f"{p}: {e}")
+                new_tracked.append(item)
+        else:
+            new_tracked.append(item)
+
+    # Remove empty dirs under HERMES_HOME (but leave HERMES_HOME itself and
+    # a short list of well-known top-level state dirs alone — a fresh install
+    # has these empty, and deleting them would surprise the user).
+    hermes_home = get_hermes_home()
+    _PROTECTED_TOP_LEVEL = {
+        "logs", "memories", "sessions", "cron", "cronjobs",
+        "cache", "skills", "plugins", "disk-cleanup", "optional-skills",
+        "hermes-agent", "backups", "profiles", ".worktrees",
+    }
+    empty_removed = 0
+    try:
+        for dirpath in sorted(hermes_home.rglob("*"), reverse=True):
+            if not dirpath.is_dir() or dirpath == hermes_home:
+                continue
+            try:
+                rel_parts = dirpath.relative_to(hermes_home).parts
+            except ValueError:
+                continue
+            # Skip the well-known top-level state dirs themselves.
+            if len(rel_parts) == 1 and rel_parts[0] in _PROTECTED_TOP_LEVEL:
+                continue
+            try:
+                if not any(dirpath.iterdir()):
+                    dirpath.rmdir()
+                    empty_removed += 1
+                    _log(f"DELETED: {dirpath} (empty dir)")
+            except OSError:
+                pass
+    except OSError:
+        pass
+
+    save_tracked(new_tracked)
+    _log(
+        f"QUICK_SUMMARY: {deleted} files, {empty_removed} dirs, "
+        f"{fmt_size(freed)}"
+    )
+    return {
+        "deleted": deleted,
+        "empty_dirs": empty_removed,
+        "freed": freed,
+        "errors": errors,
+    }
+
+
+# ---------------------------------------------------------------------------
+# Deep cleanup (interactive — not called from plugin hooks)
+# ---------------------------------------------------------------------------
+
+def deep(
+    confirm: Optional[callable] = None,
+) -> Dict[str, Any]:
+    """Deep cleanup.
+
+    Runs :func:`quick` first, then asks the *confirm* callable for each
+    risky item (research > 30d beyond 10 newest, chrome-profile > 14d,
+    any file > 500 MB).  *confirm(item)* must return True to delete.
+
+    Returns: ``{"quick": {...}, "deep_deleted": N, "deep_freed": bytes}``.
+    """
+    quick_result = quick()
+
+    if confirm is None:
+        # No interactive confirmer — deep stops after the quick pass.
+        return {"quick": quick_result, "deep_deleted": 0, "deep_freed": 0}
+
+    tracked = load_tracked()
+    now = datetime.now(timezone.utc)
+    research, chrome, large = [], [], []
+
+    for item in tracked:
+        p = Path(item["path"])
+        if not p.exists():
+            continue
+        age = (now - datetime.fromisoformat(item["timestamp"])).days
+        cat = item["category"]
+
+        if cat == "research" and age > 30:
+            research.append(item)
+        elif cat == "chrome-profile" and age > 14:
+            chrome.append(item)
+        elif item["size"] > 500 * 1024 * 1024:
+            large.append(item)
+
+    research.sort(key=lambda x: x["timestamp"], reverse=True)
+    old_research = research[10:]
+
+    freed, count = 0, 0
+    to_remove: List[Dict] = []
+
+    for group in (old_research, chrome, large):
+        for item in group:
+            if confirm(item):
+                try:
+                    p = Path(item["path"])
+                    if p.is_file():
+                        p.unlink()
+                    elif p.is_dir():
+                        shutil.rmtree(p)
+                    to_remove.append(item)
+                    freed += item["size"]
+                    count += 1
+                    _log(
+                        f"DELETED: {p} ({item['category']}, "
+                        f"{fmt_size(item['size'])})"
+                    )
+                except OSError as e:
+                    _log(f"ERROR deleting {item['path']}: {e}")
+
+    if to_remove:
+        remove_paths = {i["path"] for i in to_remove}
+        save_tracked([i for i in tracked if i["path"] not in remove_paths])
+
+    return {"quick": quick_result, "deep_deleted": count, "deep_freed": freed}
+
+
+# ---------------------------------------------------------------------------
+# Status
+# ---------------------------------------------------------------------------
+
+def status() -> Dict[str, Any]:
+    """Return per-category breakdown and top 10 largest tracked files."""
+    tracked = load_tracked()
+    cats: Dict[str, Dict] = {}
+    for item in tracked:
+        c = item["category"]
+        cats.setdefault(c, {"count": 0, "size": 0})
+        cats[c]["count"] += 1
+        cats[c]["size"] += item["size"]
+
+    existing = [
+        (i["path"], i["size"], i["category"])
+        for i in tracked if Path(i["path"]).exists()
+    ]
+    existing.sort(key=lambda x: x[1], reverse=True)
+
+    return {
+        "categories": cats,
+        "top10": existing[:10],
+        "total_tracked": len(tracked),
+    }
+
+
+def format_status(s: Dict[str, Any]) -> str:
+    """Human-readable status string (for slash command output)."""
+    lines = [f"{'Category':<20} {'Files':>6}  {'Size':>10}", "-" * 40]
+    cats = s["categories"]
+    for cat, d in sorted(cats.items(), key=lambda x: x[1]["size"], reverse=True):
+        lines.append(f"{cat:<20} {d['count']:>6}  {fmt_size(d['size']):>10}")
+
+    if not cats:
+        lines.append("(nothing tracked yet)")
+
+    lines.append("")
+    lines.append("Top 10 largest tracked files:")
+    if not s["top10"]:
+        lines.append("  (none)")
+    else:
+        for rank, (path, size, cat) in enumerate(s["top10"], 1):
+            lines.append(f"  {rank:>2}. {fmt_size(size):>8}  [{cat}]  {path}")
+    return "\n".join(lines)
+
+
+# ---------------------------------------------------------------------------
+# Auto-categorisation from tool-call inspection
+# ---------------------------------------------------------------------------
+
+_TEST_PATTERNS = ("test_", "tmp_")
+_TEST_SUFFIXES = (".test.py", ".test.js", ".test.ts", ".test.md")
+
+
+def guess_category(path: Path) -> Optional[str]:
+    """Return a category label for *path*, or None if we shouldn't track it.
+
+    Used by the ``post_tool_call`` hook to auto-track ephemeral files.
+    """
+    if not is_safe_path(path):
+        return None
+
+    # Skip the state dir itself, logs, memory files, sessions, config.
+    hermes_home = get_hermes_home()
+    try:
+        rel = path.resolve().relative_to(hermes_home)
+        top = rel.parts[0] if rel.parts else ""
+        if top in {
+            "disk-cleanup", "logs", "memories", "sessions", "config.yaml",
+            "skills", "plugins", ".env", "USER.md", "MEMORY.md", "SOUL.md",
+            "auth.json", "hermes-agent",
+        }:
+            return None
+        if top == "cron" or top == "cronjobs":
+            return "cron-output"
+        if top == "cache":
+            return "temp"
+    except ValueError:
+        # Path isn't under HERMES_HOME (e.g. /tmp/hermes-*) — fall through.
+        pass
+
+    name = path.name
+    if name.startswith(_TEST_PATTERNS):
+        return "test"
+    if any(name.endswith(sfx) for sfx in _TEST_SUFFIXES):
+        return "test"
+    return None
@@ -0,0 +1,7 @@
+name: disk-cleanup
+version: 2.0.0
+description: "Auto-track and clean up ephemeral files (test scripts, temp outputs, cron logs) created during Hermes sessions. Runs via plugin hooks — no agent action required."
+author: "@LVT382009 (original), NousResearch (plugin port)"
+hooks:
+  - post_tool_call
+  - on_session_end
@@ -0,0 +1,303 @@
+"""OpenAI image generation backend.
+
+Exposes OpenAI's ``gpt-image-2`` model at three quality tiers as an
+:class:`ImageGenProvider` implementation. The tiers are implemented as
+three virtual model IDs so the ``hermes tools`` model picker and the
+``image_gen.model`` config key behave like any other multi-model backend:
+
+    gpt-image-2-low     ~15s   fastest, good for iteration
+    gpt-image-2-medium  ~40s   default — balanced
+    gpt-image-2-high    ~2min  slowest, highest fidelity
+
+All three hit the same underlying API model (``gpt-image-2``) with a
+different ``quality`` parameter. Output is base64 JSON → saved under
+``$HERMES_HOME/cache/images/``.
+
+Selection precedence (first hit wins):
+
+1. ``OPENAI_IMAGE_MODEL`` env var (escape hatch for scripts / tests)
+2. ``image_gen.openai.model`` in ``config.yaml``
+3. ``image_gen.model`` in ``config.yaml`` (when it's one of our tier IDs)
+4. :data:`DEFAULT_MODEL` — ``gpt-image-2-medium``
+"""
+
+from __future__ import annotations
+
+import logging
+import os
+from typing import Any, Dict, List, Optional, Tuple
+
+from agent.image_gen_provider import (
+    DEFAULT_ASPECT_RATIO,
+    ImageGenProvider,
+    error_response,
+    resolve_aspect_ratio,
+    save_b64_image,
+    success_response,
+)
+
+logger = logging.getLogger(__name__)
+
+
+# ---------------------------------------------------------------------------
+# Model catalog
+# ---------------------------------------------------------------------------
+#
+# All three IDs resolve to the same underlying API model with a different
+# ``quality`` setting. ``api_model`` is what gets sent to OpenAI;
+# ``quality`` is the knob that changes generation time and output fidelity.
+
+API_MODEL = "gpt-image-2"
+
+_MODELS: Dict[str, Dict[str, Any]] = {
+    "gpt-image-2-low": {
+        "display": "GPT Image 2 (Low)",
+        "speed": "~15s",
+        "strengths": "Fast iteration, lowest cost",
+        "quality": "low",
+    },
+    "gpt-image-2-medium": {
+        "display": "GPT Image 2 (Medium)",
+        "speed": "~40s",
+        "strengths": "Balanced — default",
+        "quality": "medium",
+    },
+    "gpt-image-2-high": {
+        "display": "GPT Image 2 (High)",
+        "speed": "~2min",
+        "strengths": "Highest fidelity, strongest prompt adherence",
+        "quality": "high",
+    },
+}
+
+DEFAULT_MODEL = "gpt-image-2-medium"
+
+_SIZES = {
+    "landscape": "1536x1024",
+    "square": "1024x1024",
+    "portrait": "1024x1536",
+}
+
+
+def _load_openai_config() -> Dict[str, Any]:
+    """Read ``image_gen`` from config.yaml (returns {} on any failure)."""
+    try:
+        from hermes_cli.config import load_config
+
+        cfg = load_config()
+        section = cfg.get("image_gen") if isinstance(cfg, dict) else None
+        return section if isinstance(section, dict) else {}
+    except Exception as exc:
+        logger.debug("Could not load image_gen config: %s", exc)
+        return {}
+
+
+def _resolve_model() -> Tuple[str, Dict[str, Any]]:
+    """Decide which tier to use and return ``(model_id, meta)``."""
+    env_override = os.environ.get("OPENAI_IMAGE_MODEL")
+    if env_override and env_override in _MODELS:
+        return env_override, _MODELS[env_override]
+
+    cfg = _load_openai_config()
+    openai_cfg = cfg.get("openai") if isinstance(cfg.get("openai"), dict) else {}
+    candidate: Optional[str] = None
+    if isinstance(openai_cfg, dict):
+        value = openai_cfg.get("model")
+        if isinstance(value, str) and value in _MODELS:
+            candidate = value
+    if candidate is None:
+        top = cfg.get("model")
+        if isinstance(top, str) and top in _MODELS:
+            candidate = top
+
+    if candidate is not None:
+        return candidate, _MODELS[candidate]
+
+    return DEFAULT_MODEL, _MODELS[DEFAULT_MODEL]
+
+
+# ---------------------------------------------------------------------------
+# Provider
+# ---------------------------------------------------------------------------
+
+
+class OpenAIImageGenProvider(ImageGenProvider):
+    """OpenAI ``images.generate`` backend — gpt-image-2 at low/medium/high."""
+
+    @property
+    def name(self) -> str:
+        return "openai"
+
+    @property
+    def display_name(self) -> str:
+        return "OpenAI"
+
+    def is_available(self) -> bool:
+        if not os.environ.get("OPENAI_API_KEY"):
+            return False
+        try:
+            import openai  # noqa: F401
+        except ImportError:
+            return False
+        return True
+
+    def list_models(self) -> List[Dict[str, Any]]:
+        return [
+            {
+                "id": model_id,
+                "display": meta["display"],
+                "speed": meta["speed"],
+                "strengths": meta["strengths"],
+                "price": "varies",
+            }
+            for model_id, meta in _MODELS.items()
+        ]
+
+    def default_model(self) -> Optional[str]:
+        return DEFAULT_MODEL
+
+    def get_setup_schema(self) -> Dict[str, Any]:
+        return {
+            "name": "OpenAI",
+            "badge": "paid",
+            "tag": "gpt-image-2 at low/medium/high quality tiers",
+            "env_vars": [
+                {
+                    "key": "OPENAI_API_KEY",
+                    "prompt": "OpenAI API key",
+                    "url": "https://platform.openai.com/api-keys",
+                },
+            ],
+        }
+
+    def generate(
+        self,
+        prompt: str,
+        aspect_ratio: str = DEFAULT_ASPECT_RATIO,
+        **kwargs: Any,
+    ) -> Dict[str, Any]:
+        prompt = (prompt or "").strip()
+        aspect = resolve_aspect_ratio(aspect_ratio)
+
+        if not prompt:
+            return error_response(
+                error="Prompt is required and must be a non-empty string",
+                error_type="invalid_argument",
+                provider="openai",
+                aspect_ratio=aspect,
+            )
+
+        if not os.environ.get("OPENAI_API_KEY"):
+            return error_response(
+                error=(
+                    "OPENAI_API_KEY not set. Run `hermes tools` → Image "
+                    "Generation → OpenAI to configure, or `hermes setup` "
+                    "to add the key."
+                ),
+                error_type="auth_required",
+                provider="openai",
+                aspect_ratio=aspect,
+            )
+
+        try:
+            import openai
+        except ImportError:
+            return error_response(
+                error="openai Python package not installed (pip install openai)",
+                error_type="missing_dependency",
+                provider="openai",
+                aspect_ratio=aspect,
+            )
+
+        tier_id, meta = _resolve_model()
+        size = _SIZES.get(aspect, _SIZES["square"])
+
+        # gpt-image-2 returns b64_json unconditionally and REJECTS
+        # ``response_format`` as an unknown parameter. Don't send it.
+        payload: Dict[str, Any] = {
+            "model": API_MODEL,
+            "prompt": prompt,
+            "size": size,
+            "n": 1,
+            "quality": meta["quality"],
+        }
+
+        try:
+            client = openai.OpenAI()
+            response = client.images.generate(**payload)
+        except Exception as exc:
+            logger.debug("OpenAI image generation failed", exc_info=True)
+            return error_response(
+                error=f"OpenAI image generation failed: {exc}",
+                error_type="api_error",
+                provider="openai",
+                model=tier_id,
+                prompt=prompt,
+                aspect_ratio=aspect,
+            )
+
+        data = getattr(response, "data", None) or []
+        if not data:
+            return error_response(
+                error="OpenAI returned no image data",
+                error_type="empty_response",
+                provider="openai",
+                model=tier_id,
+                prompt=prompt,
+                aspect_ratio=aspect,
+            )
+
+        first = data[0]
+        b64 = getattr(first, "b64_json", None)
+        url = getattr(first, "url", None)
+        revised_prompt = getattr(first, "revised_prompt", None)
+
+        if b64:
+            try:
+                saved_path = save_b64_image(b64, prefix=f"openai_{tier_id}")
+            except Exception as exc:
+                return error_response(
+                    error=f"Could not save image to cache: {exc}",
+                    error_type="io_error",
+                    provider="openai",
+                    model=tier_id,
+                    prompt=prompt,
+                    aspect_ratio=aspect,
+                )
+            image_ref = str(saved_path)
+        elif url:
+            # Defensive — gpt-image-2 returns b64 today, but fall back
+            # gracefully if the API ever changes.
+            image_ref = url
+        else:
+            return error_response(
+                error="OpenAI response contained neither b64_json nor URL",
+                error_type="empty_response",
+                provider="openai",
+                model=tier_id,
+                prompt=prompt,
+                aspect_ratio=aspect,
+            )
+
+        extra: Dict[str, Any] = {"size": size, "quality": meta["quality"]}
+        if revised_prompt:
+            extra["revised_prompt"] = revised_prompt
+
+        return success_response(
+            image=image_ref,
+            model=tier_id,
+            prompt=prompt,
+            aspect_ratio=aspect,
+            provider="openai",
+            extra=extra,
+        )
+
+
+# ---------------------------------------------------------------------------
+# Plugin entry point
+# ---------------------------------------------------------------------------
+
+
+def register(ctx) -> None:
+    """Plugin entry point — wire ``OpenAIImageGenProvider`` into the registry."""
+    ctx.register_image_gen_provider(OpenAIImageGenProvider())
@@ -0,0 +1,7 @@
+name: openai
+version: 1.0.0
+description: "OpenAI image generation backend (gpt-image-2). Saves generated images to $HERMES_HOME/cache/images/."
+author: NousResearch
+kind: backend
+requires_env:
+  - OPENAI_API_KEY
@@ -1,36 +0,0 @@
-# NOTE: This file is maintained for convenience only.
-# The canonical dependency list is in pyproject.toml.
-# Preferred install: pip install -e ".[all]"
-
-# Core dependencies
-openai
-python-dotenv
-fire
-httpx
-rich
-tenacity
-prompt_toolkit
-pyyaml
-requests
-jinja2
-pydantic>=2.0
-PyJWT[crypto]
-debugpy
-
-# Web tools
-firecrawl-py
-parallel-web>=0.4.2
-
-# Image generation
-fal-client
-
-# Text-to-speech (Edge TTS is free, no API key needed)
-edge-tts
-
-# Optional: For cron expression parsing (cronjob scheduling)
-croniter
-
-# Optional: For messaging platform integrations (gateway)
-python-telegram-bot[webhooks]>=22.6
-discord.py>=2.0
-aiohttp>=3.9.0
@@ -630,7 +630,7 @@ function Copy-ConfigTemplates {
    New-Item -ItemType Directory -Force -Path "$HermesHome\audio_cache" | Out-Null
    New-Item -ItemType Directory -Force -Path "$HermesHome\memories" | Out-Null
    New-Item -ItemType Directory -Force -Path "$HermesHome\skills" | Out-Null
-    New-Item -ItemType Directory -Force -Path "$HermesHome\whatsapp\session" | Out-Null
+
    
    # Create .env
    $envPath = "$HermesHome\.env"
@@ -735,19 +735,7 @@ function Install-NodeDeps {
        Pop-Location
    }

-    # Install WhatsApp bridge dependencies
-    $bridgeDir = "$InstallDir\scripts\whatsapp-bridge"
-    if (Test-Path "$bridgeDir\package.json") {
-        Write-Info "Installing WhatsApp bridge dependencies..."
-        Push-Location $bridgeDir
-        try {
-            npm install --silent 2>&1 | Out-Null
-            Write-Success "WhatsApp bridge dependencies installed"
-        } catch {
-            Write-Warn "WhatsApp bridge npm install failed (WhatsApp may not work)"
-        }
-        Pop-Location
-    }
+
    
    Pop-Location
 }
@@ -297,7 +297,7 @@ check_python() {
        if command -v python >/dev/null 2>&1; then
            PYTHON_PATH="$(command -v python)"
            if "$PYTHON_PATH" -c 'import sys; raise SystemExit(0 if sys.version_info >= (3, 11) else 1)' 2>/dev/null; then
-                PYTHON_FOUND_VERSION=$($PYTHON_PATH --version 2>/dev/null)
+                PYTHON_FOUND_VERSION="$("$PYTHON_PATH" --version 2>/dev/null)"
                log_success "Python found: $PYTHON_FOUND_VERSION"
                return 0
            fi
@@ -306,7 +306,7 @@ check_python() {
        log_info "Installing Python via pkg..."
        pkg install -y python >/dev/null
        PYTHON_PATH="$(command -v python)"
-        PYTHON_FOUND_VERSION=$($PYTHON_PATH --version 2>/dev/null)
+        PYTHON_FOUND_VERSION="$("$PYTHON_PATH" --version 2>/dev/null)"
        log_success "Python installed: $PYTHON_FOUND_VERSION"
        return 0
    fi
@@ -315,18 +315,17 @@ check_python() {

    # Let uv handle Python — it can download and manage Python versions
    # First check if a suitable Python is already available
-    if $UV_CMD python find "$PYTHON_VERSION" &> /dev/null; then
-        PYTHON_PATH=$($UV_CMD python find "$PYTHON_VERSION")
-        PYTHON_FOUND_VERSION=$($PYTHON_PATH --version 2>/dev/null)
+    if PYTHON_PATH="$("$UV_CMD" python find "$PYTHON_VERSION" 2>/dev/null)"; then
+        PYTHON_FOUND_VERSION="$("$PYTHON_PATH" --version 2>/dev/null)"
        log_success "Python found: $PYTHON_FOUND_VERSION"
        return 0
    fi

    # Python not found — use uv to install it (no sudo needed!)
    log_info "Python $PYTHON_VERSION not found, installing via uv..."
-    if $UV_CMD python install "$PYTHON_VERSION"; then
-        PYTHON_PATH=$($UV_CMD python find "$PYTHON_VERSION")
-        PYTHON_FOUND_VERSION=$($PYTHON_PATH --version 2>/dev/null)
+    if "$UV_CMD" python install "$PYTHON_VERSION"; then
+        PYTHON_PATH="$("$UV_CMD" python find "$PYTHON_VERSION")"
+        PYTHON_FOUND_VERSION="$("$PYTHON_PATH" --version 2>/dev/null)"
        log_success "Python installed: $PYTHON_FOUND_VERSION"
    else
        log_error "Failed to install Python $PYTHON_VERSION"
@@ -1052,7 +1051,7 @@ copy_config_templates() {
    log_info "Setting up configuration files..."

    # Create ~/.hermes directory structure (config at top level, code in subdir)
-    mkdir -p "$HERMES_HOME"/{cron,sessions,logs,pairing,hooks,image_cache,audio_cache,memories,skills,whatsapp/session}
+    mkdir -p "$HERMES_HOME"/{cron,sessions,logs,pairing,hooks,image_cache,audio_cache,memories,skills}

    # Create .env at ~/.hermes/.env (top level, easy to find)
    if [ ! -f "$HERMES_HOME/.env" ]; then
@@ -1122,7 +1121,7 @@ install_node_deps() {

    if [ "$DISTRO" = "termux" ]; then
        log_info "Skipping automatic Node/browser dependency setup on Termux"
-        log_info "Browser automation and WhatsApp bridge are not part of the tested Termux install path yet."
+        log_info "Browser automation is not part of the tested Termux install path yet."
        log_info "If you want to experiment manually later, run: cd $INSTALL_DIR && npm install"
        return 0
    fi
@@ -1204,15 +1203,7 @@ install_node_deps() {
        log_success "TUI dependencies installed"
    fi

-    # Install WhatsApp bridge dependencies
-    if [ -f "$INSTALL_DIR/scripts/whatsapp-bridge/package.json" ]; then
-        log_info "Installing WhatsApp bridge dependencies..."
-        cd "$INSTALL_DIR/scripts/whatsapp-bridge"
-        npm install --silent 2>/dev/null || {
-            log_warn "WhatsApp bridge npm install failed (WhatsApp may not work)"
-        }
-        log_success "WhatsApp bridge dependencies installed"
-    fi
+
 }

 run_setup_wizard() {
@@ -44,8 +44,11 @@ AUTHOR_MAP = {
    "teknium@nousresearch.com": "teknium1",
    "127238744+teknium1@users.noreply.github.com": "teknium1",
    # contributors (from noreply pattern)
+    "wangqiang@wangqiangdeMac-mini.local": "xiaoqiang243",
    "snreynolds2506@gmail.com": "snreynolds",
    "35742124+0xbyt4@users.noreply.github.com": "0xbyt4",
+    "71184274+MassiveMassimo@users.noreply.github.com": "MassiveMassimo",
+    "massivemassimo@users.noreply.github.com": "MassiveMassimo",
    "82637225+kshitijk4poor@users.noreply.github.com": "kshitijk4poor",
    "kshitijk4poor@users.noreply.github.com": "kshitijk4poor",
    "kshitijk4poor@gmail.com": "kshitijk4poor",
@@ -53,6 +56,9 @@ AUTHOR_MAP = {
    "185121704+stablegenius49@users.noreply.github.com": "stablegenius49",
    "101283333+batuhankocyigit@users.noreply.github.com": "batuhankocyigit",
    "valdi.jorge@gmail.com": "jvcl",
+    "francip@gmail.com": "francip",
+    "omni@comelse.com": "omnissiah-comelse",
+    "oussama.redcode@gmail.com": "mavrickdeveloper",
    "126368201+vilkasdev@users.noreply.github.com": "vilkasdev",
    "137614867+cutepawss@users.noreply.github.com": "cutepawss",
    "96793918+memosr@users.noreply.github.com": "memosr",
@@ -66,6 +72,8 @@ AUTHOR_MAP = {
    "104278804+Sertug17@users.noreply.github.com": "Sertug17",
    "112503481+caentzminger@users.noreply.github.com": "caentzminger",
    "258577966+voidborne-d@users.noreply.github.com": "voidborne-d",
+    "sir_even@icloud.com": "sirEven",
+    "36056348+sirEven@users.noreply.github.com": "sirEven",
    "70424851+insecurejezza@users.noreply.github.com": "insecurejezza",
    "254021826+dodo-reach@users.noreply.github.com": "dodo-reach",
    "259807879+Bartok9@users.noreply.github.com": "Bartok9",
@@ -77,8 +85,10 @@ AUTHOR_MAP = {
    "39405770+yyq4193@users.noreply.github.com": "yyq4193",
    "Asunfly@users.noreply.github.com": "Asunfly",
    "2500400+honghua@users.noreply.github.com": "honghua",
+    "462836+jplew@users.noreply.github.com": "jplew",
    "nish3451@users.noreply.github.com": "nish3451",
    "Mibayy@users.noreply.github.com": "Mibayy",
+    "mibayy@users.noreply.github.com": "Mibayy",
    "135070653+sgaofen@users.noreply.github.com": "sgaofen",
    "nocoo@users.noreply.github.com": "nocoo",
    "30841158+n-WN@users.noreply.github.com": "n-WN",
@@ -87,19 +97,25 @@ AUTHOR_MAP = {
    "i@troy-y.org": "TroyMitchell911",
    "mygamez@163.com": "zhongyueming1121",
    "hansnow@users.noreply.github.com": "hansnow",
+    "134848055+UNLINEARITY@users.noreply.github.com": "UNLINEARITY",
+    "ben.burtenshaw@gmail.com": "burtenshaw",
    # contributors (manual mapping from git names)
    "ahmedsherif95@gmail.com": "asheriif",
    "liujinkun@bytedance.com": "liujinkun2025",
    "dmayhem93@gmail.com": "dmahan93",
+    "fr@tecompanytea.com": "ifrederico",
+    "cdanis@gmail.com": "cdanis",
    "samherring99@gmail.com": "samherring99",
    "desaiaum08@gmail.com": "Aum08Desai",
    "shannon.sands.1979@gmail.com": "shannonsands",
    "shannon@nousresearch.com": "shannonsands",
+    "abdi.moya@gmail.com": "AxDSan",
    "eri@plasticlabs.ai": "Erosika",
    "hjcpuro@gmail.com": "hjc-puro",
    "xaydinoktay@gmail.com": "aydnOktay",
    "abdullahfarukozden@gmail.com": "Farukest",
    "lovre.pesut@gmail.com": "rovle",
+    "xjtumj@gmail.com": "mengjian-github",
    "kevinskysunny@gmail.com": "kevinskysunny",
    "xiewenxuan462@gmail.com": "yule975",
    "yiweimeng.dlut@hotmail.com": "meng93",
@@ -107,11 +123,14 @@ AUTHOR_MAP = {
    "linux2010@users.noreply.github.com": "Linux2010",
    "elmatadorgh@users.noreply.github.com": "elmatadorgh",
    "alexazzjjtt@163.com": "alexzhu0",
+    "1180176+Swift42@users.noreply.github.com": "Swift42",
    "ruzzgarcn@gmail.com": "Ruzzgar",
+    "yukipukikedy@gmail.com": "Yukipukii1",
    "alireza78.crypto@gmail.com": "alireza78a",
    "brooklyn.bb.nicholson@gmail.com": "brooklynnicholson",
    "withapurpose37@gmail.com": "StefanIsMe",
    "4317663+helix4u@users.noreply.github.com": "helix4u",
+    "ifkellx@users.noreply.github.com": "Ifkellx",
    "331214+counterposition@users.noreply.github.com": "counterposition",
    "blspear@gmail.com": "BrennerSpear",
    "akhater@gmail.com": "akhater",
@@ -168,11 +187,15 @@ AUTHOR_MAP = {
    "simon@simonmarcus.org": "simon-marcus",
    "xowiekk@gmail.com": "Xowiek",
    "1243352777@qq.com": "zons-zhaozhy",
+    "e.silacandmr@gmail.com": "Es1la",
    # ── bulk addition: 75 emails resolved via API, PR salvage bodies, noreply
    #    crossref, and GH contributor list matching (April 2026 audit) ──
    "1115117931@qq.com": "aaronagent",
    "1506751656@qq.com": "hqhq1025",
    "364939526@qq.com": "luyao618",
+    "hgk324@gmail.com": "houziershi",
+    "176644217+PStarH@users.noreply.github.com": "PStarH",
+    "51058514+Sanjays2402@users.noreply.github.com": "Sanjays2402",
    "906014227@qq.com": "bingo906",
    "aaronwong1999@icloud.com": "AaronWong1999",
    "agents@kylefrench.dev": "DeployFaith",
@@ -193,6 +216,8 @@ AUTHOR_MAP = {
    "don.rhm@gmail.com": "donrhmexe",
    "dorukardahan@hotmail.com": "dorukardahan",
    "dsocolobsky@gmail.com": "dsocolobsky",
+    "dylan.socolobsky@lambdaclass.com": "dsocolobsky",
+    "ignacio.avecilla@lambdaclass.com": "IAvecilla",
    "duerzy@gmail.com": "duerzy",
    "emozilla@nousresearch.com": "emozilla",
    "fancydirty@gmail.com": "fancydirty",
@@ -280,6 +305,7 @@ AUTHOR_MAP = {
    "ywt000818@gmail.com": "OwenYWT",
    "dhandhalyabhavik@gmail.com": "v1k22",
    "rucchizhao@zhaochenfeideMacBook-Pro.local": "RucchiZ",
+    "tannerfokkens@Mac.attlocal.net": "tannerfokkens-maker",
    "lehaolin98@outlook.com": "LehaoLin",
    "yuewang1@microsoft.com": "imink",
    "1736355688@qq.com": "hedgeho9X",
@@ -290,6 +316,7 @@ AUTHOR_MAP = {
    "anthhub@163.com": "anthhub",
    "shenuu@gmail.com": "shenuu",
    "xiayh17@gmail.com": "xiayh0107",
+    "zhujianxyz@gmail.com": "opriz",
    "asurla@nvidia.com": "anniesurla",
    "limkuan24@gmail.com": "WideLee",
    "aviralarora002@gmail.com": "AviArora02-commits",
@@ -302,6 +329,12 @@ AUTHOR_MAP = {
    "261797239+lumenradley@users.noreply.github.com": "lumenradley",
    "166376523+sjz-ks@users.noreply.github.com": "sjz-ks",
    "haileymarshall005@gmail.com": "haileymarshall",
+    "aniruddhaadak80@users.noreply.github.com": "aniruddhaadak80",
+    "zheng.jerilyn@gmail.com": "jerilynzheng",
+    "asslaenn5@gmail.com": "Aslaaen",
+    "shalompmc0505@naver.com": "pinion05",
+    "105142614+VTRiot@users.noreply.github.com": "VTRiot",
+    "vivien000812@gmail.com": "iamagenius00",
 }


--- a/Show More
+++ b/Show More