fix: follow-up for salvaged PR #8952

- Rename provider_contracts.py -> volcengine_byteplus.py for explicitness - Consolidate duplicate host-to-provider mappings: provider_for_base_url() now uses the canonical _URL_TO_PROVIDER from model_metadata.py instead of maintaining a separate 20-entry dict - Add volcengine/byteplus to runtime_provider.py model-dependent base URL resolution (kimi-style special case) so manually-edited configs resolve the coding-plan base URL correctly - Remove volcengine/byteplus from _API_KEY_PROVIDER_AUX_MODELS — the main-model-first design in _resolve_auto() handles these providers already; entries were dead code in the normal flow - Add VOLCENGINE_API_KEY and BYTEPLUS_API_KEY to OPTIONAL_ENV_VARS in config.py so they appear in hermes setup - Update docs: environment-variables.md, fallback-providers.md, configuration.md
feat(providers): add Volcengine and BytePlus support
2026-04-22 22:42:39 +05:30 · 2026-04-22 22:33:06 +05:30 · 2026-04-22 08:49:24 -07:00 · 2026-04-22 08:49:24 -07:00 · 2026-04-22 08:49:24 -07:00 · 2026-04-22 08:21:23 -07:00
385 changed files with 34790 additions and 4461 deletions
@@ -0,0 +1,8 @@
+name: 'Setup Nix'
+description: 'Install Nix with DeterminateSystems and enable magic-nix-cache'
+
+runs:
+  using: composite
+  steps:
+    - uses: DeterminateSystems/nix-installer-action@ef8a148080ab6020fd15196c2084a2eea5ff2d25 # v22
+    - uses: DeterminateSystems/magic-nix-cache-action@565684385bcd71bad329742eefe8d12f2e765b39 # v13
@@ -0,0 +1,68 @@
+name: Nix Lockfile Check
+
+on:
+  pull_request:
+  workflow_dispatch:
+
+permissions:
+  contents: read
+  pull-requests: write
+
+concurrency:
+  group: nix-lockfile-check-${{ github.ref }}
+  cancel-in-progress: true
+
+jobs:
+  check:
+    runs-on: ubuntu-latest
+    timeout-minutes: 20
+    steps:
+      - uses: actions/checkout@34e114876b0b11c390a56381ad16ebd13914f8d5  # v4
+
+      - uses: ./.github/actions/nix-setup
+
+      - name: Resolve head SHA
+        id: sha
+        shell: bash
+        run: |
+          FULL="${{ github.event.pull_request.head.sha || github.sha }}"
+          echo "full=$FULL" >> "$GITHUB_OUTPUT"
+          echo "short=${FULL:0:7}" >> "$GITHUB_OUTPUT"
+
+      - name: Check lockfile hashes
+        id: check
+        continue-on-error: true
+        env:
+          LINK_SHA: ${{ steps.sha.outputs.full }}
+        run: nix run .#fix-lockfiles -- --check
+
+      - name: Post sticky PR comment (stale)
+        if: steps.check.outputs.stale == 'true' && github.event_name == 'pull_request'
+        uses: marocchino/sticky-pull-request-comment@52423e01640425a022ef5fd42c6fb5f633a02728  # v2.9.1
+        with:
+          header: nix-lockfile-check
+          message: |
+            ### ⚠️ npm lockfile hash out of date
+
+            Checked against commit [`${{ steps.sha.outputs.short }}`](${{ github.server_url }}/${{ github.repository }}/commit/${{ steps.sha.outputs.full }}) (PR head at check time).
+
+            The `hash = "sha256-..."` line in these nix files no longer matches the committed `package-lock.json`:
+
+            ${{ steps.check.outputs.report }}
+
+            #### Apply the fix
+
+            - [ ] **Apply lockfile fix** — tick to push a commit with the correct hashes to this PR branch
+            - Or [run the Nix Lockfile Fix workflow](${{ github.server_url }}/${{ github.repository }}/actions/workflows/nix-lockfile-fix.yml) manually (pass PR `#${{ github.event.pull_request.number }}`)
+            - Or locally: `nix run .#fix-lockfiles -- --apply` and commit the diff
+
+      - name: Clear sticky PR comment (resolved)
+        if: steps.check.outputs.stale == 'false' && github.event_name == 'pull_request'
+        uses: marocchino/sticky-pull-request-comment@52423e01640425a022ef5fd42c6fb5f633a02728  # v2.9.1
+        with:
+          header: nix-lockfile-check
+          delete: true
+
+      - name: Fail if stale
+        if: steps.check.outputs.stale == 'true'
+        run: exit 1
@@ -0,0 +1,149 @@
+name: Nix Lockfile Fix
+
+on:
+  workflow_dispatch:
+    inputs:
+      pr_number:
+        description: 'PR number to fix (leave empty to run on the selected branch)'
+        required: false
+        type: string
+  issue_comment:
+    types: [edited]
+
+permissions:
+  contents: write
+  pull-requests: write
+
+concurrency:
+  group: nix-lockfile-fix-${{ github.event.issue.number || github.event.inputs.pr_number || github.ref }}
+  cancel-in-progress: false
+
+jobs:
+  fix:
+    # Run on manual dispatch OR when a task-list checkbox in the sticky
+    # lockfile-check comment flips from `[ ]` to `[x]`.
+    if: |
+      github.event_name == 'workflow_dispatch' ||
+      (github.event_name == 'issue_comment'
+       && github.event.issue.pull_request != null
+       && contains(github.event.comment.body, '[x] **Apply lockfile fix**')
+       && !contains(github.event.changes.body.from, '[x] **Apply lockfile fix**'))
+    runs-on: ubuntu-latest
+    timeout-minutes: 25
+    steps:
+      - name: Authorize & resolve PR
+        id: resolve
+        uses: actions/github-script@60a0d83039c74a4aee543508d2ffcb1c3799cdea  # v7.0.1
+        with:
+          script: |
+            // 1. Verify the actor has write access — applies to both checkbox
+            //    clicks and manual dispatch.
+            const { data: perm } =
+              await github.rest.repos.getCollaboratorPermissionLevel({
+                owner: context.repo.owner,
+                repo: context.repo.repo,
+                username: context.actor,
+              });
+            if (!['admin', 'write', 'maintain'].includes(perm.permission)) {
+              core.setFailed(
+                `${context.actor} lacks write access (has: ${perm.permission})`
+              );
+              return;
+            }
+
+            // 2. Resolve which ref to check out.
+            let prNumber = '';
+            if (context.eventName === 'issue_comment') {
+              prNumber = String(context.payload.issue.number);
+            } else if (context.eventName === 'workflow_dispatch') {
+              prNumber = context.payload.inputs.pr_number || '';
+            }
+
+            if (!prNumber) {
+              core.setOutput('ref', context.ref.replace(/^refs\/heads\//, ''));
+              core.setOutput('repo', context.repo.repo);
+              core.setOutput('owner', context.repo.owner);
+              core.setOutput('pr', '');
+              return;
+            }
+
+            const { data: pr } = await github.rest.pulls.get({
+              owner: context.repo.owner,
+              repo: context.repo.repo,
+              pull_number: Number(prNumber),
+            });
+            core.setOutput('ref', pr.head.ref);
+            core.setOutput('repo', pr.head.repo.name);
+            core.setOutput('owner', pr.head.repo.owner.login);
+            core.setOutput('pr', String(pr.number));
+
+      # Wipe the sticky lockfile-check comment to a "running" state as soon
+      # as the job is authorized, so the user sees their click was picked up
+      # before the ~minute of nix build work.
+      - name: Mark sticky as running
+        if: steps.resolve.outputs.pr != ''
+        uses: marocchino/sticky-pull-request-comment@52423e01640425a022ef5fd42c6fb5f633a02728  # v2.9.1
+        with:
+          header: nix-lockfile-check
+          number: ${{ steps.resolve.outputs.pr }}
+          message: |
+            ### 🔄 Applying lockfile fix…
+
+            Triggered by @${{ github.actor }} — [workflow run](${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }}).
+
+      - uses: actions/checkout@34e114876b0b11c390a56381ad16ebd13914f8d5  # v4
+        with:
+          repository: ${{ steps.resolve.outputs.owner }}/${{ steps.resolve.outputs.repo }}
+          ref: ${{ steps.resolve.outputs.ref }}
+          token: ${{ secrets.GITHUB_TOKEN }}
+          fetch-depth: 0
+
+      - uses: ./.github/actions/nix-setup
+
+      - name: Apply lockfile hashes
+        id: apply
+        run: nix run .#fix-lockfiles -- --apply
+
+      - name: Commit & push
+        if: steps.apply.outputs.changed == 'true'
+        shell: bash
+        run: |
+          set -euo pipefail
+          git config user.name 'github-actions[bot]'
+          git config user.email '41898282+github-actions[bot]@users.noreply.github.com'
+          git add nix/tui.nix nix/web.nix
+          git commit -m "fix(nix): refresh npm lockfile hashes"
+          git push
+
+      - name: Update sticky (applied)
+        if: steps.apply.outputs.changed == 'true' && steps.resolve.outputs.pr != ''
+        uses: marocchino/sticky-pull-request-comment@52423e01640425a022ef5fd42c6fb5f633a02728  # v2.9.1
+        with:
+          header: nix-lockfile-check
+          number: ${{ steps.resolve.outputs.pr }}
+          message: |
+            ### ✅ Lockfile fix applied
+
+            Pushed a commit refreshing the npm lockfile hashes — [workflow run](${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }}).
+
+      - name: Update sticky (already current)
+        if: steps.apply.outputs.changed == 'false' && steps.resolve.outputs.pr != ''
+        uses: marocchino/sticky-pull-request-comment@52423e01640425a022ef5fd42c6fb5f633a02728  # v2.9.1
+        with:
+          header: nix-lockfile-check
+          number: ${{ steps.resolve.outputs.pr }}
+          message: |
+            ### ✅ Lockfile hashes already current
+
+            Nothing to commit — [workflow run](${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }}).
+
+      - name: Update sticky (failed)
+        if: failure() && steps.resolve.outputs.pr != ''
+        uses: marocchino/sticky-pull-request-comment@52423e01640425a022ef5fd42c6fb5f633a02728  # v2.9.1
+        with:
+          header: nix-lockfile-check
+          number: ${{ steps.resolve.outputs.pr }}
+          message: |
+            ### ❌ Lockfile fix failed
+
+            See the [workflow run](${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }}) for logs.
@@ -4,15 +4,6 @@ on:
  push:
    branches: [main]
  pull_request:
-    paths:
-      - 'flake.nix'
-      - 'flake.lock'
-      - 'nix/**'
-      - 'pyproject.toml'
-      - 'uv.lock'
-      - 'hermes_cli/**'
-      - 'run_agent.py'
-      - 'acp_adapter/**'

 permissions:
  contents: read
@@ -29,9 +20,8 @@ jobs:
    runs-on: ${{ matrix.os }}
    timeout-minutes: 30
    steps:
-      - uses: actions/checkout@34e114876b0b11c390a56381ad16ebd13914f8d5  # v4
-      - uses: DeterminateSystems/nix-installer-action@ef8a148080ab6020fd15196c2084a2eea5ff2d25  # v22
-      - uses: DeterminateSystems/magic-nix-cache-action@565684385bcd71bad329742eefe8d12f2e765b39  # v13
+      - uses: actions/checkout@34e114876b0b11c390a56381ad16ebd13914f8d5 # v4
+      - uses: ./.github/actions/nix-setup
      - name: Check flake
        if: runner.os == 'Linux'
        run: nix flake check --print-build-logs
@@ -566,3 +566,52 @@ python -m pytest tests/ -q -n 4
 Worker count above 4 will surface test-ordering flakes that CI never sees.

 Always run the full suite before pushing changes.
+
+### Don't write change-detector tests
+
+A test is a **change-detector** if it fails whenever data that is **expected
+to change** gets updated — model catalogs, config version numbers,
+enumeration counts, hardcoded lists of provider models. These tests add no
+behavioral coverage; they just guarantee that routine source updates break
+CI and cost engineering time to "fix."
+
+**Do not write:**
+
+```python
+# catalog snapshot — breaks every model release
+assert "gemini-2.5-pro" in _PROVIDER_MODELS["gemini"]
+assert "MiniMax-M2.7" in models
+
+# config version literal — breaks every schema bump
+assert DEFAULT_CONFIG["_config_version"] == 21
+
+# enumeration count — breaks every time a skill/provider is added
+assert len(_PROVIDER_MODELS["huggingface"]) == 8
+```
+
+**Do write:**
+
+```python
+# behavior: does the catalog plumbing work at all?
+assert "gemini" in _PROVIDER_MODELS
+assert len(_PROVIDER_MODELS["gemini"]) >= 1
+
+# behavior: does migration bump the user's version to current latest?
+assert raw["_config_version"] == DEFAULT_CONFIG["_config_version"]
+
+# invariant: no plan-only model leaks into the legacy list
+assert not (set(moonshot_models) & coding_plan_only_models)
+
+# invariant: every model in the catalog has a context-length entry
+for m in _PROVIDER_MODELS["huggingface"]:
+    assert m.lower() in DEFAULT_CONTEXT_LENGTHS_LOWER
+```
+
+The rule: if the test reads like a snapshot of current data, delete it. If
+it reads like a contract about how two pieces of data must relate, keep it.
+When a PR adds a new provider/model and you want a test, make the test
+assert the relationship (e.g. "catalog entries all have context lengths"),
+not the specific names.
+
+Reviewers should reject new change-detector tests; authors should convert
+them into invariants before re-requesting review.
@@ -13,7 +13,7 @@

 **The self-improving AI agent built by [Nous Research](https://nousresearch.com).** It's the only agent with a built-in learning loop — it creates skills from experience, improves them during use, nudges itself to persist knowledge, searches its own past conversations, and builds a deepening model of who you are across sessions. Run it on a $5 VPS, a GPU cluster, or serverless infrastructure that costs nearly nothing when idle. It's not tied to your laptop — talk to it from Telegram while it works on a cloud VM.

-Use any model you want — [Nous Portal](https://portal.nousresearch.com), [OpenRouter](https://openrouter.ai) (200+ models), [NVIDIA NIM](https://build.nvidia.com) (Nemotron), [Xiaomi MiMo](https://platform.xiaomimimo.com), [z.ai/GLM](https://z.ai), [Kimi/Moonshot](https://platform.moonshot.ai), [MiniMax](https://www.minimax.io), [Hugging Face](https://huggingface.co), OpenAI, or your own endpoint. Switch with `hermes model` — no code changes, no lock-in.
+Use any model you want — [Nous Portal](https://portal.nousresearch.com), [OpenRouter](https://openrouter.ai) (200+ models), [Volcengine](https://www.volcengine.com/product/ark), [BytePlus](https://www.byteplus.com/en/product/modelark), [NVIDIA NIM](https://build.nvidia.com) (Nemotron), [Xiaomi MiMo](https://platform.xiaomimimo.com), [z.ai/GLM](https://z.ai), [Kimi/Moonshot](https://platform.moonshot.ai), [MiniMax](https://www.minimax.io), [Hugging Face](https://huggingface.co), OpenAI, or your own endpoint. Switch with `hermes model` — no code changes, no lock-in.

 <table>
 <tr><td><b>A real terminal interface</b></td><td>Full TUI with multiline editing, slash-command autocomplete, conversation history, interrupt-and-redirect, and streaming tool output.</td></tr>
@@ -63,6 +63,9 @@ def make_approval_callback(
            logger.warning("Permission request timed out or failed: %s", exc)
            return "deny"

+        if response is None:
+            return "deny"
+
        outcome = response.outcome
        if isinstance(outcome, AllowedOutcome):
            option_id = outcome.option_id
@@ -4,6 +4,7 @@ from __future__ import annotations

 import asyncio
 import logging
+import os
 from collections import defaultdict, deque
 from concurrent.futures import ThreadPoolExecutor
 from typing import Any, Deque, Optional
@@ -51,7 +52,7 @@ try:
 except ImportError:
    from acp.schema import AuthMethod as AuthMethodAgent  # type: ignore[attr-defined]

-from acp_adapter.auth import detect_provider, has_provider
+from acp_adapter.auth import detect_provider
 from acp_adapter.events import (
    make_message_cb,
    make_step_cb,
@@ -71,6 +72,11 @@ except Exception:
 # Thread pool for running AIAgent (synchronous) in parallel.
 _executor = ThreadPoolExecutor(max_workers=4, thread_name_prefix="acp-agent")

+# Server-side page size for list_sessions. The ACP ListSessionsRequest schema
+# does not expose a client-side limit, so this is a fixed cap that clients
+# paginate against using `cursor` / `next_cursor`.
+_LIST_SESSIONS_PAGE_SIZE = 50
+

 def _extract_text(
    prompt: list[
@@ -351,9 +357,18 @@ class HermesACPAgent(acp.Agent):
        )

    async def authenticate(self, method_id: str, **kwargs: Any) -> AuthenticateResponse | None:
-        if has_provider():
-            return AuthenticateResponse()
-        return None
+        # Only accept authenticate() calls whose method_id matches the
+        # provider we advertised in initialize(). Without this check,
+        # authenticate() would acknowledge any method_id as long as the
+        # server has provider credentials configured — harmless under
+        # Hermes' threat model (ACP is stdio-only, local-trust), but poor
+        # API hygiene and confusing if ACP ever grows multi-method auth.
+        provider = detect_provider()
+        if not provider:
+            return None
+        if not isinstance(method_id, str) or method_id.strip().lower() != provider:
+            return None
+        return AuthenticateResponse()

    # ---- Session management -------------------------------------------------

@@ -437,7 +452,28 @@ class HermesACPAgent(acp.Agent):
        cwd: str | None = None,
        **kwargs: Any,
    ) -> ListSessionsResponse:
+        """List ACP sessions with optional ``cwd`` filtering and cursor pagination.
+
+        ``cwd`` is passed through to ``SessionManager.list_sessions`` which already
+        normalizes and filters by working directory. ``cursor`` is a ``session_id``
+        previously returned as ``next_cursor``; results resume after that entry.
+        Server-side page size is capped at ``_LIST_SESSIONS_PAGE_SIZE``; when more
+        results remain, ``next_cursor`` is set to the last returned ``session_id``.
+        """
        infos = self.session_manager.list_sessions(cwd=cwd)
+
+        if cursor:
+            for idx, s in enumerate(infos):
+                if s["session_id"] == cursor:
+                    infos = infos[idx + 1:]
+                    break
+            else:
+                # Unknown cursor -> empty page (do not fall back to full list).
+                infos = []
+
+        has_more = len(infos) > _LIST_SESSIONS_PAGE_SIZE
+        infos = infos[:_LIST_SESSIONS_PAGE_SIZE]
+
        sessions = []
        for s in infos:
            updated_at = s.get("updated_at")
@@ -451,7 +487,9 @@ class HermesACPAgent(acp.Agent):
                    updated_at=updated_at,
                )
            )
-        return ListSessionsResponse(sessions=sessions)
+
+        next_cursor = sessions[-1].session_id if has_more and sessions else None
+        return ListSessionsResponse(sessions=sessions, next_cursor=next_cursor)

    # ---- Prompt (core) ------------------------------------------------------

@@ -517,15 +555,32 @@ class HermesACPAgent(acp.Agent):
        agent.step_callback = step_cb
        agent.message_callback = message_cb

-        if approval_cb:
-            try:
-                from tools import terminal_tool as _terminal_tool
-                previous_approval_cb = getattr(_terminal_tool, "_approval_callback", None)
-                _terminal_tool.set_approval_callback(approval_cb)
-            except Exception:
-                logger.debug("Could not set ACP approval callback", exc_info=True)
+        # Approval callback is per-thread (thread-local, GHSA-qg5c-hvr5-hjgr).
+        # Set it INSIDE _run_agent so the TLS write happens in the executor
+        # thread — setting it here would write to the event-loop thread's TLS,
+        # not the executor's. Also set HERMES_INTERACTIVE so approval.py
+        # takes the CLI-interactive path (which calls the registered
+        # callback via prompt_dangerous_approval) instead of the
+        # non-interactive auto-approve branch (GHSA-96vc-wcxf-jjff).
+        # ACP's conn.request_permission maps cleanly to the interactive
+        # callback shape — not the gateway-queue HERMES_EXEC_ASK path,
+        # which requires a notify_cb registered in _gateway_notify_cbs.
+        previous_approval_cb = None
+        previous_interactive = None

        def _run_agent() -> dict:
+            nonlocal previous_approval_cb, previous_interactive
+            if approval_cb:
+                try:
+                    from tools import terminal_tool as _terminal_tool
+                    previous_approval_cb = _terminal_tool._get_approval_callback()
+                    _terminal_tool.set_approval_callback(approval_cb)
+                except Exception:
+                    logger.debug("Could not set ACP approval callback", exc_info=True)
+            # Signal to tools.approval that we have an interactive callback
+            # and the non-interactive auto-approve path must not fire.
+            previous_interactive = os.environ.get("HERMES_INTERACTIVE")
+            os.environ["HERMES_INTERACTIVE"] = "1"
            try:
                result = agent.run_conversation(
                    user_message=user_text,
@@ -537,6 +592,11 @@ class HermesACPAgent(acp.Agent):
                logger.exception("Agent error in session %s", session_id)
                return {"final_response": f"Error: {e}", "messages": state.history}
            finally:
+                # Restore HERMES_INTERACTIVE.
+                if previous_interactive is None:
+                    os.environ.pop("HERMES_INTERACTIVE", None)
+                else:
+                    os.environ["HERMES_INTERACTIVE"] = previous_interactive
                if approval_cb:
                    try:
                        from tools import terminal_tool as _terminal_tool
@@ -613,8 +673,8 @@ class HermesACPAgent(acp.Agent):
            await self._conn.session_update(
                session_id=session_id,
                update=AvailableCommandsUpdate(
-                    sessionUpdate="available_commands_update",
-                    availableCommands=self._available_commands(),
+                    session_update="available_commands_update",
+                    available_commands=self._available_commands(),
                ),
            )
        except Exception:
@@ -0,0 +1,326 @@
+from __future__ import annotations
+
+from dataclasses import dataclass
+from datetime import datetime, timezone
+from typing import Any, Optional
+
+import httpx
+
+from agent.anthropic_adapter import _is_oauth_token, resolve_anthropic_token
+from hermes_cli.auth import _read_codex_tokens, resolve_codex_runtime_credentials
+from hermes_cli.runtime_provider import resolve_runtime_provider
+
+
+def _utc_now() -> datetime:
+    return datetime.now(timezone.utc)
+
+
+@dataclass(frozen=True)
+class AccountUsageWindow:
+    label: str
+    used_percent: Optional[float] = None
+    reset_at: Optional[datetime] = None
+    detail: Optional[str] = None
+
+
+@dataclass(frozen=True)
+class AccountUsageSnapshot:
+    provider: str
+    source: str
+    fetched_at: datetime
+    title: str = "Account limits"
+    plan: Optional[str] = None
+    windows: tuple[AccountUsageWindow, ...] = ()
+    details: tuple[str, ...] = ()
+    unavailable_reason: Optional[str] = None
+
+    @property
+    def available(self) -> bool:
+        return bool(self.windows or self.details) and not self.unavailable_reason
+
+
+def _title_case_slug(value: Optional[str]) -> Optional[str]:
+    cleaned = str(value or "").strip()
+    if not cleaned:
+        return None
+    return cleaned.replace("_", " ").replace("-", " ").title()
+
+
+def _parse_dt(value: Any) -> Optional[datetime]:
+    if value in (None, ""):
+        return None
+    if isinstance(value, (int, float)):
+        return datetime.fromtimestamp(float(value), tz=timezone.utc)
+    if isinstance(value, str):
+        text = value.strip()
+        if not text:
+            return None
+        if text.endswith("Z"):
+            text = text[:-1] + "+00:00"
+        try:
+            dt = datetime.fromisoformat(text)
+            return dt if dt.tzinfo else dt.replace(tzinfo=timezone.utc)
+        except ValueError:
+            return None
+    return None
+
+
+def _format_reset(dt: Optional[datetime]) -> str:
+    if not dt:
+        return "unknown"
+    local_dt = dt.astimezone()
+    delta = dt - _utc_now()
+    total_seconds = int(delta.total_seconds())
+    if total_seconds <= 0:
+        return f"now ({local_dt.strftime('%Y-%m-%d %H:%M %Z')})"
+    hours, rem = divmod(total_seconds, 3600)
+    minutes = rem // 60
+    if hours >= 24:
+        days, hours = divmod(hours, 24)
+        rel = f"in {days}d {hours}h"
+    elif hours > 0:
+        rel = f"in {hours}h {minutes}m"
+    else:
+        rel = f"in {minutes}m"
+    return f"{rel} ({local_dt.strftime('%Y-%m-%d %H:%M %Z')})"
+
+
+def render_account_usage_lines(snapshot: Optional[AccountUsageSnapshot], *, markdown: bool = False) -> list[str]:
+    if not snapshot:
+        return []
+    header = f"📈 {'**' if markdown else ''}{snapshot.title}{'**' if markdown else ''}"
+    lines = [header]
+    if snapshot.plan:
+        lines.append(f"Provider: {snapshot.provider} ({snapshot.plan})")
+    else:
+        lines.append(f"Provider: {snapshot.provider}")
+    for window in snapshot.windows:
+        if window.used_percent is None:
+            base = f"{window.label}: unavailable"
+        else:
+            remaining = max(0, round(100 - float(window.used_percent)))
+            used = max(0, round(float(window.used_percent)))
+            base = f"{window.label}: {remaining}% remaining ({used}% used)"
+        if window.reset_at:
+            base += f" • resets {_format_reset(window.reset_at)}"
+        elif window.detail:
+            base += f" • {window.detail}"
+        lines.append(base)
+    for detail in snapshot.details:
+        lines.append(detail)
+    if snapshot.unavailable_reason:
+        lines.append(f"Unavailable: {snapshot.unavailable_reason}")
+    return lines
+
+
+def _resolve_codex_usage_url(base_url: str) -> str:
+    normalized = (base_url or "").strip().rstrip("/")
+    if not normalized:
+        normalized = "https://chatgpt.com/backend-api/codex"
+    if normalized.endswith("/codex"):
+        normalized = normalized[: -len("/codex")]
+    if "/backend-api" in normalized:
+        return normalized + "/wham/usage"
+    return normalized + "/api/codex/usage"
+
+
+def _fetch_codex_account_usage() -> Optional[AccountUsageSnapshot]:
+    creds = resolve_codex_runtime_credentials(refresh_if_expiring=True)
+    token_data = _read_codex_tokens()
+    tokens = token_data.get("tokens") or {}
+    account_id = str(tokens.get("account_id", "") or "").strip() or None
+    headers = {
+        "Authorization": f"Bearer {creds['api_key']}",
+        "Accept": "application/json",
+        "User-Agent": "codex-cli",
+    }
+    if account_id:
+        headers["ChatGPT-Account-Id"] = account_id
+    with httpx.Client(timeout=15.0) as client:
+        response = client.get(_resolve_codex_usage_url(creds.get("base_url", "")), headers=headers)
+        response.raise_for_status()
+    payload = response.json() or {}
+    rate_limit = payload.get("rate_limit") or {}
+    windows: list[AccountUsageWindow] = []
+    for key, label in (("primary_window", "Session"), ("secondary_window", "Weekly")):
+        window = rate_limit.get(key) or {}
+        used = window.get("used_percent")
+        if used is None:
+            continue
+        windows.append(
+            AccountUsageWindow(
+                label=label,
+                used_percent=float(used),
+                reset_at=_parse_dt(window.get("reset_at")),
+            )
+        )
+    details: list[str] = []
+    credits = payload.get("credits") or {}
+    if credits.get("has_credits"):
+        balance = credits.get("balance")
+        if isinstance(balance, (int, float)):
+            details.append(f"Credits balance: ${float(balance):.2f}")
+        elif credits.get("unlimited"):
+            details.append("Credits balance: unlimited")
+    return AccountUsageSnapshot(
+        provider="openai-codex",
+        source="usage_api",
+        fetched_at=_utc_now(),
+        plan=_title_case_slug(payload.get("plan_type")),
+        windows=tuple(windows),
+        details=tuple(details),
+    )
+
+
+def _fetch_anthropic_account_usage() -> Optional[AccountUsageSnapshot]:
+    token = (resolve_anthropic_token() or "").strip()
+    if not token:
+        return None
+    if not _is_oauth_token(token):
+        return AccountUsageSnapshot(
+            provider="anthropic",
+            source="oauth_usage_api",
+            fetched_at=_utc_now(),
+            unavailable_reason="Anthropic account limits are only available for OAuth-backed Claude accounts.",
+        )
+    headers = {
+        "Authorization": f"Bearer {token}",
+        "Accept": "application/json",
+        "Content-Type": "application/json",
+        "anthropic-beta": "oauth-2025-04-20",
+        "User-Agent": "claude-code/2.1.0",
+    }
+    with httpx.Client(timeout=15.0) as client:
+        response = client.get("https://api.anthropic.com/api/oauth/usage", headers=headers)
+        response.raise_for_status()
+    payload = response.json() or {}
+    windows: list[AccountUsageWindow] = []
+    mapping = (
+        ("five_hour", "Current session"),
+        ("seven_day", "Current week"),
+        ("seven_day_opus", "Opus week"),
+        ("seven_day_sonnet", "Sonnet week"),
+    )
+    for key, label in mapping:
+        window = payload.get(key) or {}
+        util = window.get("utilization")
+        if util is None:
+            continue
+        used = float(util) * 100 if float(util) <= 1 else float(util)
+        windows.append(
+            AccountUsageWindow(
+                label=label,
+                used_percent=used,
+                reset_at=_parse_dt(window.get("resets_at")),
+            )
+        )
+    details: list[str] = []
+    extra = payload.get("extra_usage") or {}
+    if extra.get("is_enabled"):
+        used_credits = extra.get("used_credits")
+        monthly_limit = extra.get("monthly_limit")
+        currency = extra.get("currency") or "USD"
+        if isinstance(used_credits, (int, float)) and isinstance(monthly_limit, (int, float)):
+            details.append(
+                f"Extra usage: {used_credits:.2f} / {monthly_limit:.2f} {currency}"
+            )
+    return AccountUsageSnapshot(
+        provider="anthropic",
+        source="oauth_usage_api",
+        fetched_at=_utc_now(),
+        windows=tuple(windows),
+        details=tuple(details),
+    )
+
+
+def _fetch_openrouter_account_usage(base_url: Optional[str], api_key: Optional[str]) -> Optional[AccountUsageSnapshot]:
+    runtime = resolve_runtime_provider(
+        requested="openrouter",
+        explicit_base_url=base_url,
+        explicit_api_key=api_key,
+    )
+    token = str(runtime.get("api_key", "") or "").strip()
+    if not token:
+        return None
+    normalized = str(runtime.get("base_url", "") or "").rstrip("/")
+    credits_url = f"{normalized}/credits"
+    key_url = f"{normalized}/key"
+    headers = {
+        "Authorization": f"Bearer {token}",
+        "Accept": "application/json",
+    }
+    with httpx.Client(timeout=10.0) as client:
+        credits_resp = client.get(credits_url, headers=headers)
+        credits_resp.raise_for_status()
+        credits = (credits_resp.json() or {}).get("data") or {}
+        try:
+            key_resp = client.get(key_url, headers=headers)
+            key_resp.raise_for_status()
+            key_data = (key_resp.json() or {}).get("data") or {}
+        except Exception:
+            key_data = {}
+    total_credits = float(credits.get("total_credits") or 0.0)
+    total_usage = float(credits.get("total_usage") or 0.0)
+    details = [f"Credits balance: ${max(0.0, total_credits - total_usage):.2f}"]
+    windows: list[AccountUsageWindow] = []
+    limit = key_data.get("limit")
+    limit_remaining = key_data.get("limit_remaining")
+    limit_reset = str(key_data.get("limit_reset") or "").strip()
+    usage = key_data.get("usage")
+    if (
+        isinstance(limit, (int, float))
+        and float(limit) > 0
+        and isinstance(limit_remaining, (int, float))
+        and 0 <= float(limit_remaining) <= float(limit)
+    ):
+        limit_value = float(limit)
+        remaining_value = float(limit_remaining)
+        used_percent = ((limit_value - remaining_value) / limit_value) * 100
+        detail_parts = [f"${remaining_value:.2f} of ${limit_value:.2f} remaining"]
+        if limit_reset:
+            detail_parts.append(f"resets {limit_reset}")
+        windows.append(
+            AccountUsageWindow(
+                label="API key quota",
+                used_percent=used_percent,
+                detail=" • ".join(detail_parts),
+            )
+        )
+    if isinstance(usage, (int, float)):
+        usage_parts = [f"API key usage: ${float(usage):.2f} total"]
+        for value, label in (
+            (key_data.get("usage_daily"), "today"),
+            (key_data.get("usage_weekly"), "this week"),
+            (key_data.get("usage_monthly"), "this month"),
+        ):
+            if isinstance(value, (int, float)) and float(value) > 0:
+                usage_parts.append(f"${float(value):.2f} {label}")
+        details.append(" • ".join(usage_parts))
+    return AccountUsageSnapshot(
+        provider="openrouter",
+        source="credits_api",
+        fetched_at=_utc_now(),
+        windows=tuple(windows),
+        details=tuple(details),
+    )
+
+
+def fetch_account_usage(
+    provider: Optional[str],
+    *,
+    base_url: Optional[str] = None,
+    api_key: Optional[str] = None,
+) -> Optional[AccountUsageSnapshot]:
+    normalized = str(provider or "").strip().lower()
+    if normalized in {"", "auto", "custom"}:
+        return None
+    try:
+        if normalized == "openai-codex":
+            return _fetch_codex_account_usage()
+        if normalized == "anthropic":
+            return _fetch_anthropic_account_usage()
+        if normalized == "openrouter":
+            return _fetch_openrouter_account_usage(base_url, api_key)
+    except Exception:
+        return None
+    return None
@@ -19,6 +19,7 @@ from pathlib import Path
 from hermes_constants import get_hermes_home
 from types import SimpleNamespace
 from typing import Any, Dict, List, Optional, Tuple
+from utils import normalize_proxy_env_vars

 try:
    import anthropic as _anthropic_sdk
@@ -265,6 +266,14 @@ def _is_third_party_anthropic_endpoint(base_url: str | None) -> bool:
    return True  # Any other endpoint is a third-party proxy


+def _is_kimi_coding_endpoint(base_url: str | None) -> bool:
+    """Return True for Kimi's /coding endpoint that requires claude-code UA."""
+    normalized = _normalize_base_url_text(base_url)
+    if not normalized:
+        return False
+    return normalized.rstrip("/").lower().startswith("https://api.kimi.com/coding")
+
+
 def _requires_bearer_auth(base_url: str | None) -> bool:
    """Return True for Anthropic-compatible providers that require Bearer auth.

@@ -308,6 +317,9 @@ def build_anthropic_client(api_key: str, base_url: str = None, timeout: float =
            "The 'anthropic' package is required for the Anthropic provider. "
            "Install it with: pip install 'anthropic>=0.39.0'"
        )
+
+    normalize_proxy_env_vars()
+
    from httpx import Timeout

    normalized_base_url = _normalize_base_url_text(base_url)
@@ -319,9 +331,18 @@ def build_anthropic_client(api_key: str, base_url: str = None, timeout: float =
        kwargs["base_url"] = normalized_base_url
    common_betas = _common_betas_for_base_url(normalized_base_url)

-    if _requires_bearer_auth(normalized_base_url):
+    if _is_kimi_coding_endpoint(base_url):
+        # Kimi's /coding endpoint requires User-Agent: claude-code/0.1.0
+        # to be recognized as a valid Coding Agent. Without it, returns 403.
+        # Check this BEFORE _requires_bearer_auth since both match api.kimi.com/coding.
+        kwargs["api_key"] = api_key
+        kwargs["default_headers"] = {
+            "User-Agent": "claude-code/0.1.0",
+            **( {"anthropic-beta": ",".join(common_betas)} if common_betas else {} )
+        }
+    elif _requires_bearer_auth(normalized_base_url):
        # Some Anthropic-compatible providers (e.g. MiniMax) expect the API key in
-        # Authorization: Bearer even for regular API keys. Route those endpoints
+        # Authorization: Bearer *** for regular API keys. Route those endpoints
        # through auth_token so the SDK sends Bearer auth instead of x-api-key.
        # Check this before OAuth token shape detection because MiniMax secrets do
        # not use Anthropic's sk-ant-api prefix and would otherwise be misread as
@@ -1062,6 +1083,31 @@ def convert_messages_to_anthropic(
                    "name": fn.get("name", ""),
                    "input": parsed_args,
                })
+            # Kimi's /coding endpoint (Anthropic protocol) requires assistant
+            # tool-call messages to carry reasoning_content when thinking is
+            # enabled server-side.  Preserve it as a thinking block so Kimi
+            # can validate the message history.  See hermes-agent#13848.
+            #
+            # Accept empty string "" — _copy_reasoning_content_for_api()
+            # injects "" as a tier-3 fallback for Kimi tool-call messages
+            # that had no reasoning.  Kimi requires the field to exist, even
+            # if empty.
+            #
+            # Prepend (not append): Anthropic protocol requires thinking
+            # blocks before text and tool_use blocks.
+            #
+            # Guard: only add when reasoning_details didn't already contribute
+            # thinking blocks.  On native Anthropic, reasoning_details produces
+            # signed thinking blocks — adding another unsigned one from
+            # reasoning_content would create a duplicate (same text) that gets
+            # downgraded to a spurious text block on the last assistant message.
+            reasoning_content = m.get("reasoning_content")
+            _already_has_thinking = any(
+                isinstance(b, dict) and b.get("type") in ("thinking", "redacted_thinking")
+                for b in blocks
+            )
+            if isinstance(reasoning_content, str) and not _already_has_thinking:
+                blocks.insert(0, {"type": "thinking", "thinking": reasoning_content})
            # Anthropic rejects empty assistant content
            effective = blocks or content
            if not effective or effective == "":
@@ -1217,6 +1263,7 @@ def convert_messages_to_anthropic(
    #    cache markers can interfere with signature validation.
    _THINKING_TYPES = frozenset(("thinking", "redacted_thinking"))
    _is_third_party = _is_third_party_anthropic_endpoint(base_url)
+    _is_kimi = _is_kimi_coding_endpoint(base_url)

    last_assistant_idx = None
    for i in range(len(result) - 1, -1, -1):
@@ -1228,7 +1275,25 @@ def convert_messages_to_anthropic(
        if m.get("role") != "assistant" or not isinstance(m.get("content"), list):
            continue

-        if _is_third_party or idx != last_assistant_idx:
+        if _is_kimi:
+            # Kimi's /coding endpoint enables thinking server-side and
+            # requires unsigned thinking blocks on replayed assistant
+            # tool-call messages.  Strip signed Anthropic blocks (Kimi
+            # can't validate signatures) but preserve the unsigned ones
+            # we synthesised from reasoning_content above.
+            new_content = []
+            for b in m["content"]:
+                if not isinstance(b, dict) or b.get("type") not in _THINKING_TYPES:
+                    new_content.append(b)
+                    continue
+                if b.get("signature") or b.get("data"):
+                    # Anthropic-signed block — Kimi can't validate, strip
+                    continue
+                # Unsigned thinking (synthesised from reasoning_content) —
+                # keep it: Kimi needs it for message-history validation.
+                new_content.append(b)
+            m["content"] = new_content or [{"type": "text", "text": "(empty)"}]
+        elif _is_third_party or idx != last_assistant_idx:
            # Third-party endpoint: strip ALL thinking blocks from every
            # assistant message — signatures are Anthropic-proprietary.
            # Direct Anthropic: strip from non-latest assistant messages only.
@@ -1405,11 +1470,25 @@ def build_anthropic_kwargs(
    # MiniMax Anthropic-compat endpoints support thinking (manual mode only,
    # not adaptive).  Haiku does NOT support extended thinking — skip entirely.
    #
+    # Kimi's /coding endpoint speaks the Anthropic Messages protocol but has
+    # its own thinking semantics: when ``thinking.enabled`` is sent, Kimi
+    # validates the message history and requires every prior assistant
+    # tool-call message to carry OpenAI-style ``reasoning_content``.  The
+    # Anthropic path never populates that field, and
+    # ``convert_messages_to_anthropic`` strips all Anthropic thinking blocks
+    # on third-party endpoints — so the request fails with HTTP 400
+    # "thinking is enabled but reasoning_content is missing in assistant
+    # tool call message at index N".  Kimi's reasoning is driven server-side
+    # on the /coding route, so skip Anthropic's thinking parameter entirely
+    # for that host.  (Kimi on chat_completions enables thinking via
+    # extra_body in the ChatCompletionsTransport — see #13503.)
+    #
    # On 4.7+ the `thinking.display` field defaults to "omitted", which
    # silently hides reasoning text that Hermes surfaces in its CLI. We
    # request "summarized" so the reasoning blocks stay populated — matching
    # 4.6 behavior and preserving the activity-feed UX during long tool runs.
-    if reasoning_config and isinstance(reasoning_config, dict):
+    _is_kimi_coding = _is_kimi_coding_endpoint(base_url)
+    if reasoning_config and isinstance(reasoning_config, dict) and not _is_kimi_coding:
        if reasoning_config.get("enabled") is not False and "haiku" not in model.lower():
            effort = str(reasoning_config.get("effort", "medium")).lower()
            budget = THINKING_BUDGET.get(effort, 8000)
@@ -1525,3 +1604,42 @@ def normalize_anthropic_response(
        ),
        finish_reason,
    )
+
+
+def normalize_anthropic_response_v2(
+    response,
+    strip_tool_prefix: bool = False,
+) -> "NormalizedResponse":
+    """Normalize Anthropic response to NormalizedResponse.
+
+    Wraps the existing normalize_anthropic_response() and maps its output
+    to the shared transport types.  This allows incremental migration —
+    one call site at a time — without changing the original function.
+    """
+    from agent.transports.types import NormalizedResponse, build_tool_call
+
+    assistant_msg, finish_reason = normalize_anthropic_response(response, strip_tool_prefix)
+
+    tool_calls = None
+    if assistant_msg.tool_calls:
+        tool_calls = [
+            build_tool_call(
+                id=tc.id,
+                name=tc.function.name,
+                arguments=tc.function.arguments,
+            )
+            for tc in assistant_msg.tool_calls
+        ]
+
+    provider_data = {}
+    if getattr(assistant_msg, "reasoning_details", None):
+        provider_data["reasoning_details"] = assistant_msg.reasoning_details
+
+    return NormalizedResponse(
+        content=assistant_msg.content,
+        tool_calls=tool_calls,
+        finish_reason=finish_reason,
+        reasoning=getattr(assistant_msg, "reasoning", None),
+        usage=None,  # Anthropic usage is on the raw response, not the normaliser
+        provider_data=provider_data or None,
+    )
@@ -48,6 +48,7 @@ from openai import OpenAI
 from agent.credential_pool import load_pool
 from hermes_cli.config import get_hermes_home
 from hermes_constants import OPENROUTER_BASE_URL
+from utils import base_url_host_matches, base_url_hostname, normalize_proxy_env_vars

 logger = logging.getLogger(__name__)

@@ -73,6 +74,10 @@ _PROVIDER_ALIASES = {
    "minimax_cn": "minimax-cn",
    "claude": "anthropic",
    "claude-code": "anthropic",
+    "volcengine-coding-plan": "volcengine",
+    "volcengine_coding_plan": "volcengine",
+    "byteplus-coding-plan": "byteplus",
+    "byteplus_coding_plan": "byteplus",
 }


@@ -95,84 +100,37 @@ def _normalize_aux_provider(provider: Optional[str]) -> str:
    return _PROVIDER_ALIASES.get(normalized, normalized)


-_FIXED_TEMPERATURE_MODELS: Dict[str, float] = {
-    "kimi-for-coding": 0.6,
-}
+# Sentinel: when returned by _fixed_temperature_for_model(), callers must
+# strip the ``temperature`` key from API kwargs entirely so the provider's
+# server-side default applies.  Kimi/Moonshot models manage temperature
+# internally — sending *any* value (even the "correct" one) can conflict
+# with gateway-side mode selection (thinking → 1.0, non-thinking → 0.6).
+OMIT_TEMPERATURE: object = object()

-# Moonshot's kimi-for-coding endpoint (api.kimi.com/coding) documents:
-# "k2.5 model will use a fixed value 1.0, non-thinking mode will use a fixed
-# value 0.6.  Any other value will result in an error."  The same lock applies
-# to the other k2.* models served on that endpoint.  Enumerated explicitly so
-# non-coding siblings like `kimi-k2-instruct` (variable temperature, served on
-# the standard chat API and third parties) are NOT clamped.
-# Source: https://platform.kimi.ai/docs/guide/kimi-k2-5-quickstart
-_KIMI_INSTANT_MODELS: frozenset = frozenset({
-    "kimi-k2.5",
-    "kimi-k2-turbo-preview",
-    "kimi-k2-0905-preview",
-})
-_KIMI_THINKING_MODELS: frozenset = frozenset({
-    "kimi-k2-thinking",
-    "kimi-k2-thinking-turbo",
-})

-# Moonshot's public chat endpoint (api.moonshot.ai/v1) enforces a different
-# temperature contract than the Coding Plan endpoint above.  Empirically,
-# `kimi-k2.5` on the public API rejects 0.6 with HTTP 400
-# "invalid temperature: only 1 is allowed for this model" — the Coding Plan
-# lock (0.6 for non-thinking) does not apply.  `kimi-k2-turbo-preview` and the
-# thinking variants already match the Coding Plan contract on the public
-# endpoint, so we only override the models that diverge.
-# Users hit this endpoint when `KIMI_API_KEY` is a legacy `sk-*` key (the
-# `sk-kimi-*` prefix routes to api.kimi.com/coding/v1 instead — see
-# hermes_cli/auth.py:_kimi_base_url_for_key).
-_KIMI_PUBLIC_API_OVERRIDES: Dict[str, float] = {
-    "kimi-k2.5": 1.0,
-}
+def _is_kimi_model(model: Optional[str]) -> bool:
+    """True for any Kimi / Moonshot model that manages temperature server-side."""
+    bare = (model or "").strip().lower().rsplit("/", 1)[-1]
+    return bare.startswith("kimi-") or bare == "kimi"


 def _fixed_temperature_for_model(
    model: Optional[str],
    base_url: Optional[str] = None,
-) -> Optional[float]:
-    """Return a required temperature override for models with strict contracts.
+) -> "Optional[float] | object":
+    """Return a temperature directive for models with strict contracts.

-    Moonshot's kimi-for-coding endpoint rejects any non-approved temperature on
-    the k2.5 family.  Non-thinking variants require exactly 0.6; thinking
-    variants require 1.0.  An optional ``vendor/`` prefix (e.g.
-    ``moonshotai/kimi-k2.5``) is tolerated for aggregator routings.
-
-    When ``base_url`` points to Moonshot's public chat endpoint
-    (``api.moonshot.ai``), the contract changes for ``kimi-k2.5``: the public
-    API only accepts ``temperature=1``, not 0.6.  That override takes precedence
-    over the Coding Plan defaults above.
-
-    Returns ``None`` for every other model, including ``kimi-k2-instruct*``
-    which is the separate non-coding K2 family with variable temperature.
+    Returns:
+        ``OMIT_TEMPERATURE`` — caller must remove the ``temperature`` key so the
+            provider chooses its own default.  Used for all Kimi / Moonshot
+            models whose gateway selects temperature server-side.
+        ``float`` — a specific value the caller must use (reserved for future
+            models with fixed-temperature contracts).
+        ``None`` — no override; caller should use its own default.
    """
-    normalized = (model or "").strip().lower()
-    bare = normalized.rsplit("/", 1)[-1]
-
-    # Public Moonshot API has a stricter contract for some models than the
-    # Coding Plan endpoint — check it first so it wins on conflict.
-    if base_url and ("api.moonshot.ai" in base_url.lower() or "api.moonshot.cn" in base_url.lower()):
-        public = _KIMI_PUBLIC_API_OVERRIDES.get(bare)
-        if public is not None:
-            logger.debug(
-                "Forcing temperature=%s for %r on public Moonshot API", public, model
-            )
-            return public
-
-    fixed = _FIXED_TEMPERATURE_MODELS.get(normalized)
-    if fixed is not None:
-        logger.debug("Forcing temperature=%s for model %r (fixed map)", fixed, model)
-        return fixed
-    if bare in _KIMI_THINKING_MODELS:
-        logger.debug("Forcing temperature=1.0 for kimi thinking model %r", model)
-        return 1.0
-    if bare in _KIMI_INSTANT_MODELS:
-        logger.debug("Forcing temperature=0.6 for kimi instant model %r", model)
-        return 0.6
+    if _is_kimi_model(model):
+        logger.debug("Omitting temperature for Kimi model %r (server-managed)", model)
+        return OMIT_TEMPERATURE
    return None

 # Default auxiliary models for direct API-key providers (cheap/fast for side tasks)
@@ -180,6 +138,7 @@ _API_KEY_PROVIDER_AUX_MODELS: Dict[str, str] = {
    "gemini": "gemini-3-flash-preview",
    "zai": "glm-4.5-flash",
    "kimi-coding": "kimi-k2-turbo-preview",
+    "stepfun": "step-3.5-flash",
    "kimi-coding-cn": "kimi-k2-turbo-preview",
    "minimax": "MiniMax-M2.7",
    "minimax-cn": "MiniMax-M2.7",
@@ -207,6 +166,16 @@ _OR_HEADERS = {
    "X-OpenRouter-Categories": "productivity,cli-agent",
 }

+# Vercel AI Gateway app attribution headers. HTTP-Referer maps to
+# referrerUrl and X-Title maps to appName in the gateway's analytics.
+from hermes_cli import __version__ as _HERMES_VERSION
+
+_AI_GATEWAY_HEADERS = {
+    "HTTP-Referer": "https://hermes-agent.nousresearch.com",
+    "X-Title": "Hermes Agent",
+    "User-Agent": f"HermesAgent/{_HERMES_VERSION}",
+}
+
 # Nous Portal extra_body for product attribution.
 # Callers should pass this as extra_body in chat.completions.create()
 # when the auxiliary client is backed by Nous Portal.
@@ -218,8 +187,6 @@ auxiliary_is_nous: bool = False
 # Default auxiliary models per provider
 _OPENROUTER_MODEL = "google/gemini-3-flash-preview"
 _NOUS_MODEL = "google/gemini-3-flash-preview"
-_NOUS_FREE_TIER_VISION_MODEL = "xiaomi/mimo-v2-omni"
-_NOUS_FREE_TIER_AUX_MODEL = "xiaomi/mimo-v2-pro"
 _NOUS_DEFAULT_BASE_URL = "https://inference-api.nousresearch.com/v1"
 _ANTHROPIC_DEFAULT_BASE_URL = "https://api.anthropic.com"
 _AUTH_JSON_PATH = get_hermes_home() / "auth.json"
@@ -764,6 +731,33 @@ def _nous_base_url() -> str:
    return os.getenv("NOUS_INFERENCE_BASE_URL", _NOUS_DEFAULT_BASE_URL)


+def _resolve_nous_runtime_api(*, force_refresh: bool = False) -> Optional[tuple[str, str]]:
+    """Return fresh Nous runtime credentials when available.
+
+    This mirrors the main agent's 401 recovery path and keeps auxiliary
+    clients aligned with the singleton auth store + mint flow instead of
+    relying only on whatever raw tokens happen to be sitting in auth.json
+    or the credential pool.
+    """
+    try:
+        from hermes_cli.auth import resolve_nous_runtime_credentials
+
+        creds = resolve_nous_runtime_credentials(
+            min_key_ttl_seconds=max(60, int(os.getenv("HERMES_NOUS_MIN_KEY_TTL_SECONDS", "1800"))),
+            timeout_seconds=float(os.getenv("HERMES_NOUS_TIMEOUT_SECONDS", "15")),
+            force_mint=force_refresh,
+        )
+    except Exception as exc:
+        logger.debug("Auxiliary Nous runtime credential resolution failed: %s", exc)
+        return None
+
+    api_key = str(creds.get("api_key") or "").strip()
+    base_url = str(creds.get("base_url") or "").strip().rstrip("/")
+    if not api_key or not base_url:
+        return None
+    return api_key, base_url
+
+
 def _read_codex_access_token() -> Optional[str]:
    """Read a valid, non-expired Codex OAuth access token from Hermes auth store.

@@ -853,9 +847,9 @@ def _resolve_api_key_provider() -> Tuple[Optional[OpenAI], Optional[str]]:
                if is_native_gemini_base_url(base_url):
                    return GeminiNativeClient(api_key=api_key, base_url=base_url), model
            extra = {}
-            if "api.kimi.com" in base_url.lower():
-                extra["default_headers"] = {"User-Agent": "KimiCLI/1.30.0"}
-            elif "api.githubcopilot.com" in base_url.lower():
+            if base_url_host_matches(base_url, "api.kimi.com"):
+                extra["default_headers"] = {"User-Agent": "claude-code/0.1.0"}
+            elif base_url_host_matches(base_url, "api.githubcopilot.com"):
                from hermes_cli.models import copilot_default_headers

                extra["default_headers"] = copilot_default_headers()
@@ -879,9 +873,9 @@ def _resolve_api_key_provider() -> Tuple[Optional[OpenAI], Optional[str]]:
            if is_native_gemini_base_url(base_url):
                return GeminiNativeClient(api_key=api_key, base_url=base_url), model
        extra = {}
-        if "api.kimi.com" in base_url.lower():
-            extra["default_headers"] = {"User-Agent": "KimiCLI/1.30.0"}
-        elif "api.githubcopilot.com" in base_url.lower():
+        if base_url_host_matches(base_url, "api.kimi.com"):
+            extra["default_headers"] = {"User-Agent": "claude-code/0.1.0"}
+        elif base_url_host_matches(base_url, "api.githubcopilot.com"):
            from hermes_cli.models import copilot_default_headers

            extra["default_headers"] = copilot_default_headers()
@@ -930,29 +924,50 @@ def _try_nous(vision: bool = False) -> Tuple[Optional[OpenAI], Optional[str]]:
        pass

    nous = _read_nous_auth()
-    if not nous:
+    runtime = _resolve_nous_runtime_api(force_refresh=False)
+    if runtime is None and not nous:
        return None, None
    global auxiliary_is_nous
    auxiliary_is_nous = True
    logger.debug("Auxiliary client: Nous Portal")
-    if nous.get("source") == "pool":
-        model = "gemini-3-flash"
-    else:
-        model = _NOUS_MODEL
-    # Free-tier users can't use paid auxiliary models — use the free
-    # models instead: mimo-v2-omni for vision, mimo-v2-pro for text tasks.
+
+    # Ask the Portal which model it currently recommends for this task type.
+    # The /api/nous/recommended-models endpoint is the authoritative source:
+    # it distinguishes paid vs free tier recommendations, and get_nous_recommended_aux_model
+    # auto-detects the caller's tier via check_nous_free_tier().  Fall back to
+    # _NOUS_MODEL (google/gemini-3-flash-preview) when the Portal is unreachable
+    # or returns a null recommendation for this task type.
+    model = _NOUS_MODEL
    try:
-        from hermes_cli.models import check_nous_free_tier
-        if check_nous_free_tier():
-            model = _NOUS_FREE_TIER_VISION_MODEL if vision else _NOUS_FREE_TIER_AUX_MODEL
-            logger.debug("Free-tier Nous account — using %s for auxiliary/%s",
-                         model, "vision" if vision else "text")
-    except Exception:
-        pass
+        from hermes_cli.models import get_nous_recommended_aux_model
+        recommended = get_nous_recommended_aux_model(vision=vision)
+        if recommended:
+            model = recommended
+            logger.debug(
+                "Auxiliary/%s: using Portal-recommended model %s",
+                "vision" if vision else "text", model,
+            )
+        else:
+            logger.debug(
+                "Auxiliary/%s: no Portal recommendation, falling back to %s",
+                "vision" if vision else "text", model,
+            )
+    except Exception as exc:
+        logger.debug(
+            "Auxiliary/%s: recommended-models lookup failed (%s); "
+            "falling back to %s",
+            "vision" if vision else "text", exc, model,
+        )
+
+    if runtime is not None:
+        api_key, base_url = runtime
+    else:
+        api_key = _nous_api_key(nous or {})
+        base_url = str((nous or {}).get("inference_base_url") or _nous_base_url()).rstrip("/")
    return (
        OpenAI(
-            api_key=_nous_api_key(nous),
-            base_url=str(nous.get("inference_base_url") or _nous_base_url()).rstrip("/"),
+            api_key=api_key,
+            base_url=base_url,
        ),
        model,
    )
@@ -1030,7 +1045,7 @@ def _resolve_custom_runtime() -> Tuple[Optional[str], Optional[str], Optional[st
        return None, None, None

    custom_base = custom_base.strip().rstrip("/")
-    if "openrouter.ai" in custom_base.lower():
+    if base_url_host_matches(custom_base, "openrouter.ai"):
        # requested='custom' falls back to OpenRouter when no custom endpoint is
        # configured. Treat that as "no custom endpoint" for auxiliary routing.
        return None, None, None
@@ -1064,6 +1079,8 @@ def _validate_proxy_env_urls() -> None:
    """
    from urllib.parse import urlparse

+    normalize_proxy_env_vars()
+
    for key in ("HTTPS_PROXY", "HTTP_PROXY", "ALL_PROXY",
                "https_proxy", "http_proxy", "all_proxy"):
        value = str(os.environ.get(key) or "").strip()
@@ -1294,6 +1311,15 @@ def _is_connection_error(exc: Exception) -> bool:
    return False


+def _is_auth_error(exc: Exception) -> bool:
+    """Detect auth failures that should trigger provider-specific refresh."""
+    status = getattr(exc, "status_code", None)
+    if status == 401:
+        return True
+    err_lower = str(exc).lower()
+    return "error code: 401" in err_lower or "authenticationerror" in type(exc).__name__.lower()
+
+
 def _try_payment_fallback(
    failed_provider: str,
    task: str = None,
@@ -1469,15 +1495,15 @@ def _to_async_client(sync_client, model: str):
        "api_key": sync_client.api_key,
        "base_url": str(sync_client.base_url),
    }
-    base_lower = str(sync_client.base_url).lower()
-    if "openrouter" in base_lower:
+    sync_base_url = str(sync_client.base_url)
+    if base_url_host_matches(sync_base_url, "openrouter.ai"):
        async_kwargs["default_headers"] = dict(_OR_HEADERS)
-    elif "api.githubcopilot.com" in base_lower:
+    elif base_url_host_matches(sync_base_url, "api.githubcopilot.com"):
        from hermes_cli.models import copilot_default_headers

        async_kwargs["default_headers"] = copilot_default_headers()
-    elif "api.kimi.com" in base_lower:
-        async_kwargs["default_headers"] = {"User-Agent": "KimiCLI/1.30.0"}
+    elif base_url_host_matches(sync_base_url, "api.kimi.com"):
+        async_kwargs["default_headers"] = {"User-Agent": "claude-code/0.1.0"}
    return AsyncOpenAI(**async_kwargs), model


@@ -1553,8 +1579,7 @@ def resolve_provider_client(
        # Auto-detect: api.openai.com + codex model name pattern
        if api_mode and api_mode != "codex_responses":
            return False  # explicit non-codex mode
-        normalized_base = (base_url_str or "").strip().lower()
-        if "api.openai.com" in normalized_base and "openrouter" not in normalized_base:
+        if base_url_hostname(base_url_str) == "api.openai.com":
            model_lower = (model_str or "").lower()
            if "codex" in model_lower:
                return True
@@ -1602,7 +1627,13 @@ def resolve_provider_client(

    # ── Nous Portal (OAuth) ──────────────────────────────────────────
    if provider == "nous":
-        client, default = _try_nous()
+        # Detect vision tasks: either explicit model override from
+        # _PROVIDER_VISION_MODELS, or caller passed a known vision model.
+        _is_vision = (
+            model in _PROVIDER_VISION_MODELS.values()
+            or (model or "").strip().lower() == "mimo-v2-omni"
+        )
+        client, default = _try_nous(vision=_is_vision)
        if client is None:
            logger.warning("resolve_provider_client: nous requested "
                           "but Nous Portal not configured (run: hermes auth)")
@@ -1658,9 +1689,9 @@ def resolve_provider_client(
                provider,
            )
            extra = {}
-            if "api.kimi.com" in custom_base.lower():
-                extra["default_headers"] = {"User-Agent": "KimiCLI/1.30.0"}
-            elif "api.githubcopilot.com" in custom_base.lower():
+            if base_url_host_matches(custom_base, "api.kimi.com"):
+                extra["default_headers"] = {"User-Agent": "claude-code/0.1.0"}
+            elif base_url_host_matches(custom_base, "api.githubcopilot.com"):
                from hermes_cli.models import copilot_default_headers
                extra["default_headers"] = copilot_default_headers()
            client = OpenAI(api_key=custom_key, base_url=custom_base, **extra)
@@ -1765,9 +1796,9 @@ def resolve_provider_client(

        # Provider-specific headers
        headers = {}
-        if "api.kimi.com" in base_url.lower():
-            headers["User-Agent"] = "KimiCLI/1.30.0"
-        elif "api.githubcopilot.com" in base_url.lower():
+        if base_url_host_matches(base_url, "api.kimi.com"):
+            headers["User-Agent"] = "claude-code/0.1.0"
+        elif base_url_host_matches(base_url, "api.githubcopilot.com"):
            from hermes_cli.models import copilot_default_headers

            headers.update(copilot_default_headers())
@@ -1998,24 +2029,35 @@ def resolve_vision_provider_client(
        #      _PROVIDER_VISION_MODELS provides per-provider vision model
        #      overrides when the provider has a dedicated multimodal model
        #      that differs from the chat model (e.g. xiaomi → mimo-v2-omni,
-        #      zai → glm-5v-turbo).
+        #      zai → glm-5v-turbo). Nous is the exception: it has a dedicated
+        #      strict vision backend with tier-aware defaults, so it must not
+        #      fall through to the user's text chat model here.
        #   2. OpenRouter  (vision-capable aggregator fallback)
        #   3. Nous Portal (vision-capable aggregator fallback)
        #   4. Stop
        main_provider = _read_main_provider()
        main_model = _read_main_model()
        if main_provider and main_provider not in ("auto", ""):
-            vision_model = _PROVIDER_VISION_MODELS.get(main_provider, main_model)
-            rpc_client, rpc_model = resolve_provider_client(
-                main_provider, vision_model,
-                api_mode=resolved_api_mode)
-            if rpc_client is not None:
-                logger.info(
-                    "Vision auto-detect: using main provider %s (%s)",
-                    main_provider, rpc_model or vision_model,
-                )
-                return _finalize(
-                    main_provider, rpc_client, rpc_model or vision_model)
+            if main_provider == "nous":
+                sync_client, default_model = _resolve_strict_vision_backend(main_provider)
+                if sync_client is not None:
+                    logger.info(
+                        "Vision auto-detect: using main provider %s (%s)",
+                        main_provider, default_model or resolved_model or main_model,
+                    )
+                    return _finalize(main_provider, sync_client, default_model)
+            else:
+                vision_model = _PROVIDER_VISION_MODELS.get(main_provider, main_model)
+                rpc_client, rpc_model = resolve_provider_client(
+                    main_provider, vision_model,
+                    api_mode=resolved_api_mode)
+                if rpc_client is not None:
+                    logger.info(
+                        "Vision auto-detect: using main provider %s (%s)",
+                        main_provider, rpc_model or vision_model,
+                    )
+                    return _finalize(
+                        main_provider, rpc_client, rpc_model or vision_model)

        # Fall back through aggregators (uses their dedicated vision model,
        # not the user's main model) when main provider has no client.
@@ -2062,7 +2104,7 @@ def auxiliary_max_tokens_param(value: int) -> dict:
    # Only use max_completion_tokens for direct OpenAI custom endpoints
    if (not or_key
            and _read_nous_auth() is None
-            and "api.openai.com" in custom_base.lower()):
+            and base_url_hostname(custom_base) == "api.openai.com"):
        return {"max_completion_tokens": value}
    return {"max_tokens": value}

@@ -2090,6 +2132,76 @@ _client_cache_lock = threading.Lock()
 _CLIENT_CACHE_MAX_SIZE = 64  # safety belt — evict oldest when exceeded


+def _client_cache_key(
+    provider: str,
+    *,
+    async_mode: bool,
+    base_url: Optional[str] = None,
+    api_key: Optional[str] = None,
+    api_mode: Optional[str] = None,
+    main_runtime: Optional[Dict[str, Any]] = None,
+) -> tuple:
+    runtime = _normalize_main_runtime(main_runtime)
+    runtime_key = tuple(runtime.get(field, "") for field in _MAIN_RUNTIME_FIELDS) if provider == "auto" else ()
+    return (provider, async_mode, base_url or "", api_key or "", api_mode or "", runtime_key)
+
+
+def _store_cached_client(cache_key: tuple, client: Any, default_model: Optional[str], *, bound_loop: Any = None) -> None:
+    with _client_cache_lock:
+        old_entry = _client_cache.get(cache_key)
+        if old_entry is not None and old_entry[0] is not client:
+            _force_close_async_httpx(old_entry[0])
+            try:
+                close_fn = getattr(old_entry[0], "close", None)
+                if callable(close_fn):
+                    close_fn()
+            except Exception:
+                pass
+        _client_cache[cache_key] = (client, default_model, bound_loop)
+
+
+def _refresh_nous_auxiliary_client(
+    *,
+    cache_provider: str,
+    model: Optional[str],
+    async_mode: bool,
+    base_url: Optional[str] = None,
+    api_key: Optional[str] = None,
+    api_mode: Optional[str] = None,
+    main_runtime: Optional[Dict[str, Any]] = None,
+) -> Tuple[Optional[Any], Optional[str]]:
+    """Refresh Nous runtime creds, rebuild the client, and replace the cache entry."""
+    runtime = _resolve_nous_runtime_api(force_refresh=True)
+    if runtime is None:
+        return None, model
+
+    fresh_key, fresh_base_url = runtime
+    sync_client = OpenAI(api_key=fresh_key, base_url=fresh_base_url)
+    final_model = model
+
+    current_loop = None
+    if async_mode:
+        try:
+            import asyncio as _aio
+            current_loop = _aio.get_event_loop()
+        except RuntimeError:
+            pass
+        client, final_model = _to_async_client(sync_client, final_model or "")
+    else:
+        client = sync_client
+
+    cache_key = _client_cache_key(
+        cache_provider,
+        async_mode=async_mode,
+        base_url=base_url,
+        api_key=api_key,
+        api_mode=api_mode,
+        main_runtime=main_runtime,
+    )
+    _store_cached_client(cache_key, client, final_model, bound_loop=current_loop)
+    return client, final_model
+
+
 def neuter_async_httpx_del() -> None:
    """Monkey-patch ``AsyncHttpxClientWrapper.__del__`` to be a no-op.

@@ -2191,7 +2303,7 @@ def cleanup_stale_async_clients() -> None:

 def _is_openrouter_client(client: Any) -> bool:
    for obj in (client, getattr(client, "_client", None), getattr(client, "client", None)):
-        if obj and "openrouter" in str(getattr(obj, "base_url", "") or "").lower():
+        if obj and base_url_host_matches(str(getattr(obj, "base_url", "") or ""), "openrouter.ai"):
            return True
    return False

@@ -2243,8 +2355,14 @@ def _get_cached_client(
        except RuntimeError:
            pass
    runtime = _normalize_main_runtime(main_runtime)
-    runtime_key = tuple(runtime.get(field, "") for field in _MAIN_RUNTIME_FIELDS) if provider == "auto" else ()
-    cache_key = (provider, async_mode, base_url or "", api_key or "", api_mode or "", runtime_key)
+    cache_key = _client_cache_key(
+        provider,
+        async_mode=async_mode,
+        base_url=base_url,
+        api_key=api_key,
+        api_mode=api_mode,
+        main_runtime=main_runtime,
+    )
    with _client_cache_lock:
        if cache_key in _client_cache:
            cached_client, cached_default, cached_loop = _client_cache[cache_key]
@@ -2475,7 +2593,9 @@ def _build_call_kwargs(
    }

    fixed_temperature = _fixed_temperature_for_model(model, base_url)
-    if fixed_temperature is not None:
+    if fixed_temperature is OMIT_TEMPERATURE:
+        temperature = None  # strip — let server choose
+    elif fixed_temperature is not None:
        temperature = fixed_temperature

    # Opus 4.7+ rejects any non-default temperature/top_p/top_k — silently
@@ -2495,7 +2615,7 @@ def _build_call_kwargs(
        # Direct OpenAI api.openai.com with newer models needs max_completion_tokens.
        if provider == "custom":
            custom_base = base_url or _current_custom_base_url()
-            if "api.openai.com" in custom_base.lower():
+            if base_url_hostname(custom_base) == "api.openai.com":
                kwargs["max_completion_tokens"] = max_tokens
            else:
                kwargs["max_tokens"] = max_tokens
@@ -2690,6 +2810,29 @@ def call_llm(
                    raise
                first_err = retry_err

+        # ── Nous auth refresh parity with main agent ──────────────────
+        client_is_nous = (
+            resolved_provider == "nous"
+            or base_url_host_matches(_base_info, "inference-api.nousresearch.com")
+        )
+        if _is_auth_error(first_err) and client_is_nous:
+            refreshed_client, refreshed_model = _refresh_nous_auxiliary_client(
+                cache_provider=resolved_provider or "nous",
+                model=final_model,
+                async_mode=False,
+                base_url=resolved_base_url,
+                api_key=resolved_api_key,
+                api_mode=resolved_api_mode,
+                main_runtime=main_runtime,
+            )
+            if refreshed_client is not None:
+                logger.info("Auxiliary %s: refreshed Nous runtime credentials after 401, retrying",
+                            task or "call")
+                if refreshed_model and refreshed_model != kwargs.get("model"):
+                    kwargs["model"] = refreshed_model
+                return _validate_llm_response(
+                    refreshed_client.chat.completions.create(**kwargs), task)
+
        # ── Payment / credit exhaustion fallback ──────────────────────
        # When the resolved provider returns 402 or a credit-related error,
        # try alternative providers instead of giving up.  This handles the
@@ -2888,6 +3031,28 @@ async def async_call_llm(
                    raise
                first_err = retry_err

+        # ── Nous auth refresh parity with main agent ──────────────────
+        client_is_nous = (
+            resolved_provider == "nous"
+            or base_url_host_matches(_client_base, "inference-api.nousresearch.com")
+        )
+        if _is_auth_error(first_err) and client_is_nous:
+            refreshed_client, refreshed_model = _refresh_nous_auxiliary_client(
+                cache_provider=resolved_provider or "nous",
+                model=final_model,
+                async_mode=True,
+                base_url=resolved_base_url,
+                api_key=resolved_api_key,
+                api_mode=resolved_api_mode,
+            )
+            if refreshed_client is not None:
+                logger.info("Auxiliary %s (async): refreshed Nous runtime credentials after 401, retrying",
+                            task or "call")
+                if refreshed_model and refreshed_model != kwargs.get("model"):
+                    kwargs["model"] = refreshed_model
+                return _validate_llm_response(
+                    await refreshed_client.chat.completions.create(**kwargs), task)
+
        # ── Payment / connection fallback (mirrors sync call_llm) ─────
        should_fallback = _is_payment_error(first_err) or _is_connection_error(first_err)
        is_auto = resolved_provider in ("auto", "", None)
@@ -0,0 +1,813 @@
+"""Codex Responses API adapter.
+
+Pure format-conversion and normalization logic for the OpenAI Responses API
+(used by OpenAI Codex, xAI, GitHub Models, and other Responses-compatible endpoints).
+
+Extracted from run_agent.py to isolate Responses API-specific logic from the
+core agent loop. All functions are stateless — they operate on the data passed
+in and return transformed results.
+"""
+
+from __future__ import annotations
+
+import hashlib
+import json
+import logging
+import re
+import uuid
+from types import SimpleNamespace
+from typing import Any, Dict, List, Optional
+
+from agent.prompt_builder import DEFAULT_AGENT_IDENTITY
+
+logger = logging.getLogger(__name__)
+
+
+# ---------------------------------------------------------------------------
+# Multimodal content helpers
+# ---------------------------------------------------------------------------
+
+def _chat_content_to_responses_parts(content: Any) -> List[Dict[str, Any]]:
+    """Convert chat-style multimodal content to Responses API input parts.
+
+    Input:  ``[{"type":"text"|"image_url", ...}]`` (native OpenAI Chat format)
+    Output: ``[{"type":"input_text"|"input_image", ...}]`` (Responses format)
+
+    Returns an empty list when ``content`` is not a list or contains no
+    recognized parts — callers fall back to the string path.
+    """
+    if not isinstance(content, list):
+        return []
+    converted: List[Dict[str, Any]] = []
+    for part in content:
+        if isinstance(part, str):
+            if part:
+                converted.append({"type": "input_text", "text": part})
+            continue
+        if not isinstance(part, dict):
+            continue
+        ptype = str(part.get("type") or "").strip().lower()
+        if ptype in {"text", "input_text", "output_text"}:
+            text = part.get("text")
+            if isinstance(text, str) and text:
+                converted.append({"type": "input_text", "text": text})
+            continue
+        if ptype in {"image_url", "input_image"}:
+            image_ref = part.get("image_url")
+            detail = part.get("detail")
+            if isinstance(image_ref, dict):
+                url = image_ref.get("url")
+                detail = image_ref.get("detail", detail)
+            else:
+                url = image_ref
+            if not isinstance(url, str) or not url:
+                continue
+            image_part: Dict[str, Any] = {"type": "input_image", "image_url": url}
+            if isinstance(detail, str) and detail.strip():
+                image_part["detail"] = detail.strip()
+            converted.append(image_part)
+    return converted
+
+
+def _summarize_user_message_for_log(content: Any) -> str:
+    """Return a short text summary of a user message for logging/trajectory.
+
+    Multimodal messages arrive as a list of ``{type:"text"|"image_url", ...}``
+    parts from the API server.  Logging, spinner previews, and trajectory
+    files all want a plain string — this helper extracts the first chunk of
+    text and notes any attached images.  Returns an empty string for empty
+    lists and ``str(content)`` for unexpected scalar types.
+    """
+    if content is None:
+        return ""
+    if isinstance(content, str):
+        return content
+    if isinstance(content, list):
+        text_bits: List[str] = []
+        image_count = 0
+        for part in content:
+            if isinstance(part, str):
+                if part:
+                    text_bits.append(part)
+                continue
+            if not isinstance(part, dict):
+                continue
+            ptype = str(part.get("type") or "").strip().lower()
+            if ptype in {"text", "input_text", "output_text"}:
+                text = part.get("text")
+                if isinstance(text, str) and text:
+                    text_bits.append(text)
+            elif ptype in {"image_url", "input_image"}:
+                image_count += 1
+        summary = " ".join(text_bits).strip()
+        if image_count:
+            note = f"[{image_count} image{'s' if image_count != 1 else ''}]"
+            summary = f"{note} {summary}" if summary else note
+        return summary
+    try:
+        return str(content)
+    except Exception:
+        return ""
+
+
+# ---------------------------------------------------------------------------
+# ID helpers
+# ---------------------------------------------------------------------------
+
+def _deterministic_call_id(fn_name: str, arguments: str, index: int = 0) -> str:
+    """Generate a deterministic call_id from tool call content.
+
+    Used as a fallback when the API doesn't provide a call_id.
+    Deterministic IDs prevent cache invalidation — random UUIDs would
+    make every API call's prefix unique, breaking OpenAI's prompt cache.
+    """
+    seed = f"{fn_name}:{arguments}:{index}"
+    digest = hashlib.sha256(seed.encode("utf-8", errors="replace")).hexdigest()[:12]
+    return f"call_{digest}"
+
+
+def _split_responses_tool_id(raw_id: Any) -> tuple[Optional[str], Optional[str]]:
+    """Split a stored tool id into (call_id, response_item_id)."""
+    if not isinstance(raw_id, str):
+        return None, None
+    value = raw_id.strip()
+    if not value:
+        return None, None
+    if "|" in value:
+        call_id, response_item_id = value.split("|", 1)
+        call_id = call_id.strip() or None
+        response_item_id = response_item_id.strip() or None
+        return call_id, response_item_id
+    if value.startswith("fc_"):
+        return None, value
+    return value, None
+
+
+def _derive_responses_function_call_id(
+    call_id: str,
+    response_item_id: Optional[str] = None,
+) -> str:
+    """Build a valid Responses `function_call.id` (must start with `fc_`)."""
+    if isinstance(response_item_id, str):
+        candidate = response_item_id.strip()
+        if candidate.startswith("fc_"):
+            return candidate
+
+    source = (call_id or "").strip()
+    if source.startswith("fc_"):
+        return source
+    if source.startswith("call_") and len(source) > len("call_"):
+        return f"fc_{source[len('call_'):]}"
+
+    sanitized = re.sub(r"[^A-Za-z0-9_-]", "", source)
+    if sanitized.startswith("fc_"):
+        return sanitized
+    if sanitized.startswith("call_") and len(sanitized) > len("call_"):
+        return f"fc_{sanitized[len('call_'):]}"
+    if sanitized:
+        return f"fc_{sanitized[:48]}"
+
+    seed = source or str(response_item_id or "") or uuid.uuid4().hex
+    digest = hashlib.sha1(seed.encode("utf-8")).hexdigest()[:24]
+    return f"fc_{digest}"
+
+
+# ---------------------------------------------------------------------------
+# Schema conversion
+# ---------------------------------------------------------------------------
+
+def _responses_tools(tools: Optional[List[Dict[str, Any]]] = None) -> Optional[List[Dict[str, Any]]]:
+    """Convert chat-completions tool schemas to Responses function-tool schemas."""
+    if not tools:
+        return None
+
+    converted: List[Dict[str, Any]] = []
+    for item in tools:
+        fn = item.get("function", {}) if isinstance(item, dict) else {}
+        name = fn.get("name")
+        if not isinstance(name, str) or not name.strip():
+            continue
+        converted.append({
+            "type": "function",
+            "name": name,
+            "description": fn.get("description", ""),
+            "strict": False,
+            "parameters": fn.get("parameters", {"type": "object", "properties": {}}),
+        })
+    return converted or None
+
+
+# ---------------------------------------------------------------------------
+# Message format conversion
+# ---------------------------------------------------------------------------
+
+def _chat_messages_to_responses_input(messages: List[Dict[str, Any]]) -> List[Dict[str, Any]]:
+    """Convert internal chat-style messages to Responses input items."""
+    items: List[Dict[str, Any]] = []
+    seen_item_ids: set = set()
+
+    for msg in messages:
+        if not isinstance(msg, dict):
+            continue
+        role = msg.get("role")
+        if role == "system":
+            continue
+
+        if role in {"user", "assistant"}:
+            content = msg.get("content", "")
+            if isinstance(content, list):
+                content_parts = _chat_content_to_responses_parts(content)
+                content_text = "".join(
+                    p.get("text", "") for p in content_parts if p.get("type") == "input_text"
+                )
+            else:
+                content_parts = []
+                content_text = str(content) if content is not None else ""
+
+            if role == "assistant":
+                # Replay encrypted reasoning items from previous turns
+                # so the API can maintain coherent reasoning chains.
+                codex_reasoning = msg.get("codex_reasoning_items")
+                has_codex_reasoning = False
+                if isinstance(codex_reasoning, list):
+                    for ri in codex_reasoning:
+                        if isinstance(ri, dict) and ri.get("encrypted_content"):
+                            item_id = ri.get("id")
+                            if item_id and item_id in seen_item_ids:
+                                continue
+                            # Strip the "id" field — with store=False the
+                            # Responses API cannot look up items by ID and
+                            # returns 404.  The encrypted_content blob is
+                            # self-contained for reasoning chain continuity.
+                            replay_item = {k: v for k, v in ri.items() if k != "id"}
+                            items.append(replay_item)
+                            if item_id:
+                                seen_item_ids.add(item_id)
+                            has_codex_reasoning = True
+
+                if content_parts:
+                    items.append({"role": "assistant", "content": content_parts})
+                elif content_text.strip():
+                    items.append({"role": "assistant", "content": content_text})
+                elif has_codex_reasoning:
+                    # The Responses API requires a following item after each
+                    # reasoning item (otherwise: missing_following_item error).
+                    # When the assistant produced only reasoning with no visible
+                    # content, emit an empty assistant message as the required
+                    # following item.
+                    items.append({"role": "assistant", "content": ""})
+
+                tool_calls = msg.get("tool_calls")
+                if isinstance(tool_calls, list):
+                    for tc in tool_calls:
+                        if not isinstance(tc, dict):
+                            continue
+                        fn = tc.get("function", {})
+                        fn_name = fn.get("name")
+                        if not isinstance(fn_name, str) or not fn_name.strip():
+                            continue
+
+                        embedded_call_id, embedded_response_item_id = _split_responses_tool_id(
+                            tc.get("id")
+                        )
+                        call_id = tc.get("call_id")
+                        if not isinstance(call_id, str) or not call_id.strip():
+                            call_id = embedded_call_id
+                        if not isinstance(call_id, str) or not call_id.strip():
+                            if (
+                                isinstance(embedded_response_item_id, str)
+                                and embedded_response_item_id.startswith("fc_")
+                                and len(embedded_response_item_id) > len("fc_")
+                            ):
+                                call_id = f"call_{embedded_response_item_id[len('fc_'):]}"
+                            else:
+                                _raw_args = str(fn.get("arguments", "{}"))
+                                call_id = _deterministic_call_id(fn_name, _raw_args, len(items))
+                        call_id = call_id.strip()
+
+                        arguments = fn.get("arguments", "{}")
+                        if isinstance(arguments, dict):
+                            arguments = json.dumps(arguments, ensure_ascii=False)
+                        elif not isinstance(arguments, str):
+                            arguments = str(arguments)
+                        arguments = arguments.strip() or "{}"
+
+                        items.append({
+                            "type": "function_call",
+                            "call_id": call_id,
+                            "name": fn_name,
+                            "arguments": arguments,
+                        })
+                continue
+
+            # Non-assistant (user) role: emit multimodal parts when present,
+            # otherwise fall back to the text payload.
+            if content_parts:
+                items.append({"role": role, "content": content_parts})
+            else:
+                items.append({"role": role, "content": content_text})
+            continue
+
+        if role == "tool":
+            raw_tool_call_id = msg.get("tool_call_id")
+            call_id, _ = _split_responses_tool_id(raw_tool_call_id)
+            if not isinstance(call_id, str) or not call_id.strip():
+                if isinstance(raw_tool_call_id, str) and raw_tool_call_id.strip():
+                    call_id = raw_tool_call_id.strip()
+            if not isinstance(call_id, str) or not call_id.strip():
+                continue
+            items.append({
+                "type": "function_call_output",
+                "call_id": call_id,
+                "output": str(msg.get("content", "") or ""),
+            })
+
+    return items
+
+
+# ---------------------------------------------------------------------------
+# Input preflight / validation
+# ---------------------------------------------------------------------------
+
+def _preflight_codex_input_items(raw_items: Any) -> List[Dict[str, Any]]:
+    if not isinstance(raw_items, list):
+        raise ValueError("Codex Responses input must be a list of input items.")
+
+    normalized: List[Dict[str, Any]] = []
+    seen_ids: set = set()
+    for idx, item in enumerate(raw_items):
+        if not isinstance(item, dict):
+            raise ValueError(f"Codex Responses input[{idx}] must be an object.")
+
+        item_type = item.get("type")
+        if item_type == "function_call":
+            call_id = item.get("call_id")
+            name = item.get("name")
+            if not isinstance(call_id, str) or not call_id.strip():
+                raise ValueError(f"Codex Responses input[{idx}] function_call is missing call_id.")
+            if not isinstance(name, str) or not name.strip():
+                raise ValueError(f"Codex Responses input[{idx}] function_call is missing name.")
+
+            arguments = item.get("arguments", "{}")
+            if isinstance(arguments, dict):
+                arguments = json.dumps(arguments, ensure_ascii=False)
+            elif not isinstance(arguments, str):
+                arguments = str(arguments)
+            arguments = arguments.strip() or "{}"
+
+            normalized.append(
+                {
+                    "type": "function_call",
+                    "call_id": call_id.strip(),
+                    "name": name.strip(),
+                    "arguments": arguments,
+                }
+            )
+            continue
+
+        if item_type == "function_call_output":
+            call_id = item.get("call_id")
+            if not isinstance(call_id, str) or not call_id.strip():
+                raise ValueError(f"Codex Responses input[{idx}] function_call_output is missing call_id.")
+            output = item.get("output", "")
+            if output is None:
+                output = ""
+            if not isinstance(output, str):
+                output = str(output)
+
+            normalized.append(
+                {
+                    "type": "function_call_output",
+                    "call_id": call_id.strip(),
+                    "output": output,
+                }
+            )
+            continue
+
+        if item_type == "reasoning":
+            encrypted = item.get("encrypted_content")
+            if isinstance(encrypted, str) and encrypted:
+                item_id = item.get("id")
+                if isinstance(item_id, str) and item_id:
+                    if item_id in seen_ids:
+                        continue
+                    seen_ids.add(item_id)
+                reasoning_item = {"type": "reasoning", "encrypted_content": encrypted}
+                # Do NOT include the "id" in the outgoing item — with
+                # store=False (our default) the API tries to resolve the
+                # id server-side and returns 404.  The id is still used
+                # above for local deduplication via seen_ids.
+                summary = item.get("summary")
+                if isinstance(summary, list):
+                    reasoning_item["summary"] = summary
+                else:
+                    reasoning_item["summary"] = []
+                normalized.append(reasoning_item)
+            continue
+
+        role = item.get("role")
+        if role in {"user", "assistant"}:
+            content = item.get("content", "")
+            if content is None:
+                content = ""
+            if isinstance(content, list):
+                # Multimodal content from ``_chat_messages_to_responses_input``
+                # is already in Responses format (``input_text`` / ``input_image``).
+                # Validate each part and pass through.
+                validated: List[Dict[str, Any]] = []
+                for part_idx, part in enumerate(content):
+                    if isinstance(part, str):
+                        if part:
+                            validated.append({"type": "input_text", "text": part})
+                        continue
+                    if not isinstance(part, dict):
+                        raise ValueError(
+                            f"Codex Responses input[{idx}].content[{part_idx}] must be an object or string."
+                        )
+                    ptype = str(part.get("type") or "").strip().lower()
+                    if ptype in {"input_text", "text", "output_text"}:
+                        text = part.get("text", "")
+                        if not isinstance(text, str):
+                            text = str(text or "")
+                        validated.append({"type": "input_text", "text": text})
+                    elif ptype in {"input_image", "image_url"}:
+                        image_ref = part.get("image_url", "")
+                        detail = part.get("detail")
+                        if isinstance(image_ref, dict):
+                            url = image_ref.get("url", "")
+                            detail = image_ref.get("detail", detail)
+                        else:
+                            url = image_ref
+                        if not isinstance(url, str):
+                            url = str(url or "")
+                        image_part: Dict[str, Any] = {"type": "input_image", "image_url": url}
+                        if isinstance(detail, str) and detail.strip():
+                            image_part["detail"] = detail.strip()
+                        validated.append(image_part)
+                    else:
+                        raise ValueError(
+                            f"Codex Responses input[{idx}].content[{part_idx}] has unsupported type {part.get('type')!r}."
+                        )
+                normalized.append({"role": role, "content": validated})
+                continue
+            if not isinstance(content, str):
+                content = str(content)
+
+            normalized.append({"role": role, "content": content})
+            continue
+
+        raise ValueError(
+            f"Codex Responses input[{idx}] has unsupported item shape (type={item_type!r}, role={role!r})."
+        )
+
+    return normalized
+
+
+def _preflight_codex_api_kwargs(
+    api_kwargs: Any,
+    *,
+    allow_stream: bool = False,
+) -> Dict[str, Any]:
+    if not isinstance(api_kwargs, dict):
+        raise ValueError("Codex Responses request must be a dict.")
+
+    required = {"model", "instructions", "input"}
+    missing = [key for key in required if key not in api_kwargs]
+    if missing:
+        raise ValueError(f"Codex Responses request missing required field(s): {', '.join(sorted(missing))}.")
+
+    model = api_kwargs.get("model")
+    if not isinstance(model, str) or not model.strip():
+        raise ValueError("Codex Responses request 'model' must be a non-empty string.")
+    model = model.strip()
+
+    instructions = api_kwargs.get("instructions")
+    if instructions is None:
+        instructions = ""
+    if not isinstance(instructions, str):
+        instructions = str(instructions)
+    instructions = instructions.strip() or DEFAULT_AGENT_IDENTITY
+
+    normalized_input = _preflight_codex_input_items(api_kwargs.get("input"))
+
+    tools = api_kwargs.get("tools")
+    normalized_tools = None
+    if tools is not None:
+        if not isinstance(tools, list):
+            raise ValueError("Codex Responses request 'tools' must be a list when provided.")
+        normalized_tools = []
+        for idx, tool in enumerate(tools):
+            if not isinstance(tool, dict):
+                raise ValueError(f"Codex Responses tools[{idx}] must be an object.")
+            if tool.get("type") != "function":
+                raise ValueError(f"Codex Responses tools[{idx}] has unsupported type {tool.get('type')!r}.")
+
+            name = tool.get("name")
+            parameters = tool.get("parameters")
+            if not isinstance(name, str) or not name.strip():
+                raise ValueError(f"Codex Responses tools[{idx}] is missing a valid name.")
+            if not isinstance(parameters, dict):
+                raise ValueError(f"Codex Responses tools[{idx}] is missing valid parameters.")
+
+            description = tool.get("description", "")
+            if description is None:
+                description = ""
+            if not isinstance(description, str):
+                description = str(description)
+
+            strict = tool.get("strict", False)
+            if not isinstance(strict, bool):
+                strict = bool(strict)
+
+            normalized_tools.append(
+                {
+                    "type": "function",
+                    "name": name.strip(),
+                    "description": description,
+                    "strict": strict,
+                    "parameters": parameters,
+                }
+            )
+
+    store = api_kwargs.get("store", False)
+    if store is not False:
+        raise ValueError("Codex Responses contract requires 'store' to be false.")
+
+    allowed_keys = {
+        "model", "instructions", "input", "tools", "store",
+        "reasoning", "include", "max_output_tokens", "temperature",
+        "tool_choice", "parallel_tool_calls", "prompt_cache_key", "service_tier",
+        "extra_headers",
+    }
+    normalized: Dict[str, Any] = {
+        "model": model,
+        "instructions": instructions,
+        "input": normalized_input,
+        "store": False,
+    }
+    if normalized_tools is not None:
+        normalized["tools"] = normalized_tools
+
+    # Pass through reasoning config
+    reasoning = api_kwargs.get("reasoning")
+    if isinstance(reasoning, dict):
+        normalized["reasoning"] = reasoning
+    include = api_kwargs.get("include")
+    if isinstance(include, list):
+        normalized["include"] = include
+    service_tier = api_kwargs.get("service_tier")
+    if isinstance(service_tier, str) and service_tier.strip():
+        normalized["service_tier"] = service_tier.strip()
+
+    # Pass through max_output_tokens and temperature
+    max_output_tokens = api_kwargs.get("max_output_tokens")
+    if isinstance(max_output_tokens, (int, float)) and max_output_tokens > 0:
+        normalized["max_output_tokens"] = int(max_output_tokens)
+    temperature = api_kwargs.get("temperature")
+    if isinstance(temperature, (int, float)):
+        normalized["temperature"] = float(temperature)
+
+    # Pass through tool_choice, parallel_tool_calls, prompt_cache_key
+    for passthrough_key in ("tool_choice", "parallel_tool_calls", "prompt_cache_key"):
+        val = api_kwargs.get(passthrough_key)
+        if val is not None:
+            normalized[passthrough_key] = val
+
+    extra_headers = api_kwargs.get("extra_headers")
+    if extra_headers is not None:
+        if not isinstance(extra_headers, dict):
+            raise ValueError("Codex Responses request 'extra_headers' must be an object.")
+        normalized_headers: Dict[str, str] = {}
+        for key, value in extra_headers.items():
+            if not isinstance(key, str) or not key.strip():
+                raise ValueError("Codex Responses request 'extra_headers' keys must be non-empty strings.")
+            if value is None:
+                continue
+            normalized_headers[key.strip()] = str(value)
+        if normalized_headers:
+            normalized["extra_headers"] = normalized_headers
+
+    if allow_stream:
+        stream = api_kwargs.get("stream")
+        if stream is not None and stream is not True:
+            raise ValueError("Codex Responses 'stream' must be true when set.")
+        if stream is True:
+            normalized["stream"] = True
+        allowed_keys.add("stream")
+    elif "stream" in api_kwargs:
+        raise ValueError("Codex Responses stream flag is only allowed in fallback streaming requests.")
+
+    unexpected = sorted(key for key in api_kwargs if key not in allowed_keys)
+    if unexpected:
+        raise ValueError(
+            f"Codex Responses request has unsupported field(s): {', '.join(unexpected)}."
+        )
+
+    return normalized
+
+
+# ---------------------------------------------------------------------------
+# Response extraction helpers
+# ---------------------------------------------------------------------------
+
+def _extract_responses_message_text(item: Any) -> str:
+    """Extract assistant text from a Responses message output item."""
+    content = getattr(item, "content", None)
+    if not isinstance(content, list):
+        return ""
+
+    chunks: List[str] = []
+    for part in content:
+        ptype = getattr(part, "type", None)
+        if ptype not in {"output_text", "text"}:
+            continue
+        text = getattr(part, "text", None)
+        if isinstance(text, str) and text:
+            chunks.append(text)
+    return "".join(chunks).strip()
+
+
+def _extract_responses_reasoning_text(item: Any) -> str:
+    """Extract a compact reasoning text from a Responses reasoning item."""
+    summary = getattr(item, "summary", None)
+    if isinstance(summary, list):
+        chunks: List[str] = []
+        for part in summary:
+            text = getattr(part, "text", None)
+            if isinstance(text, str) and text:
+                chunks.append(text)
+        if chunks:
+            return "\n".join(chunks).strip()
+    text = getattr(item, "text", None)
+    if isinstance(text, str) and text:
+        return text.strip()
+    return ""
+
+
+# ---------------------------------------------------------------------------
+# Full response normalization
+# ---------------------------------------------------------------------------
+
+def _normalize_codex_response(response: Any) -> tuple[Any, str]:
+    """Normalize a Responses API object to an assistant_message-like object."""
+    output = getattr(response, "output", None)
+    if not isinstance(output, list) or not output:
+        # The Codex backend can return empty output when the answer was
+        # delivered entirely via stream events. Check output_text as a
+        # last-resort fallback before raising.
+        out_text = getattr(response, "output_text", None)
+        if isinstance(out_text, str) and out_text.strip():
+            logger.debug(
+                "Codex response has empty output but output_text is present (%d chars); "
+                "synthesizing output item.", len(out_text.strip()),
+            )
+            output = [SimpleNamespace(
+                type="message", role="assistant", status="completed",
+                content=[SimpleNamespace(type="output_text", text=out_text.strip())],
+            )]
+            response.output = output
+        else:
+            raise RuntimeError("Responses API returned no output items")
+
+    response_status = getattr(response, "status", None)
+    if isinstance(response_status, str):
+        response_status = response_status.strip().lower()
+    else:
+        response_status = None
+
+    if response_status in {"failed", "cancelled"}:
+        error_obj = getattr(response, "error", None)
+        if isinstance(error_obj, dict):
+            error_msg = error_obj.get("message") or str(error_obj)
+        else:
+            error_msg = str(error_obj) if error_obj else f"Responses API returned status '{response_status}'"
+        raise RuntimeError(error_msg)
+
+    content_parts: List[str] = []
+    reasoning_parts: List[str] = []
+    reasoning_items_raw: List[Dict[str, Any]] = []
+    tool_calls: List[Any] = []
+    has_incomplete_items = response_status in {"queued", "in_progress", "incomplete"}
+    saw_commentary_phase = False
+    saw_final_answer_phase = False
+
+    for item in output:
+        item_type = getattr(item, "type", None)
+        item_status = getattr(item, "status", None)
+        if isinstance(item_status, str):
+            item_status = item_status.strip().lower()
+        else:
+            item_status = None
+
+        if item_status in {"queued", "in_progress", "incomplete"}:
+            has_incomplete_items = True
+
+        if item_type == "message":
+            item_phase = getattr(item, "phase", None)
+            if isinstance(item_phase, str):
+                normalized_phase = item_phase.strip().lower()
+                if normalized_phase in {"commentary", "analysis"}:
+                    saw_commentary_phase = True
+                elif normalized_phase in {"final_answer", "final"}:
+                    saw_final_answer_phase = True
+            message_text = _extract_responses_message_text(item)
+            if message_text:
+                content_parts.append(message_text)
+        elif item_type == "reasoning":
+            reasoning_text = _extract_responses_reasoning_text(item)
+            if reasoning_text:
+                reasoning_parts.append(reasoning_text)
+            # Capture the full reasoning item for multi-turn continuity.
+            # encrypted_content is an opaque blob the API needs back on
+            # subsequent turns to maintain coherent reasoning chains.
+            encrypted = getattr(item, "encrypted_content", None)
+            if isinstance(encrypted, str) and encrypted:
+                raw_item = {"type": "reasoning", "encrypted_content": encrypted}
+                item_id = getattr(item, "id", None)
+                if isinstance(item_id, str) and item_id:
+                    raw_item["id"] = item_id
+                # Capture summary — required by the API when replaying reasoning items
+                summary = getattr(item, "summary", None)
+                if isinstance(summary, list):
+                    raw_summary = []
+                    for part in summary:
+                        text = getattr(part, "text", None)
+                        if isinstance(text, str):
+                            raw_summary.append({"type": "summary_text", "text": text})
+                    raw_item["summary"] = raw_summary
+                reasoning_items_raw.append(raw_item)
+        elif item_type == "function_call":
+            if item_status in {"queued", "in_progress", "incomplete"}:
+                continue
+            fn_name = getattr(item, "name", "") or ""
+            arguments = getattr(item, "arguments", "{}")
+            if not isinstance(arguments, str):
+                arguments = json.dumps(arguments, ensure_ascii=False)
+            raw_call_id = getattr(item, "call_id", None)
+            raw_item_id = getattr(item, "id", None)
+            embedded_call_id, _ = _split_responses_tool_id(raw_item_id)
+            call_id = raw_call_id if isinstance(raw_call_id, str) and raw_call_id.strip() else embedded_call_id
+            if not isinstance(call_id, str) or not call_id.strip():
+                call_id = _deterministic_call_id(fn_name, arguments, len(tool_calls))
+            call_id = call_id.strip()
+            response_item_id = raw_item_id if isinstance(raw_item_id, str) else None
+            response_item_id = _derive_responses_function_call_id(call_id, response_item_id)
+            tool_calls.append(SimpleNamespace(
+                id=call_id,
+                call_id=call_id,
+                response_item_id=response_item_id,
+                type="function",
+                function=SimpleNamespace(name=fn_name, arguments=arguments),
+            ))
+        elif item_type == "custom_tool_call":
+            fn_name = getattr(item, "name", "") or ""
+            arguments = getattr(item, "input", "{}")
+            if not isinstance(arguments, str):
+                arguments = json.dumps(arguments, ensure_ascii=False)
+            raw_call_id = getattr(item, "call_id", None)
+            raw_item_id = getattr(item, "id", None)
+            embedded_call_id, _ = _split_responses_tool_id(raw_item_id)
+            call_id = raw_call_id if isinstance(raw_call_id, str) and raw_call_id.strip() else embedded_call_id
+            if not isinstance(call_id, str) or not call_id.strip():
+                call_id = _deterministic_call_id(fn_name, arguments, len(tool_calls))
+            call_id = call_id.strip()
+            response_item_id = raw_item_id if isinstance(raw_item_id, str) else None
+            response_item_id = _derive_responses_function_call_id(call_id, response_item_id)
+            tool_calls.append(SimpleNamespace(
+                id=call_id,
+                call_id=call_id,
+                response_item_id=response_item_id,
+                type="function",
+                function=SimpleNamespace(name=fn_name, arguments=arguments),
+            ))
+
+    final_text = "\n".join([p for p in content_parts if p]).strip()
+    if not final_text and hasattr(response, "output_text"):
+        out_text = getattr(response, "output_text", "")
+        if isinstance(out_text, str):
+            final_text = out_text.strip()
+
+    assistant_message = SimpleNamespace(
+        content=final_text,
+        tool_calls=tool_calls,
+        reasoning="\n\n".join(reasoning_parts).strip() if reasoning_parts else None,
+        reasoning_content=None,
+        reasoning_details=None,
+        codex_reasoning_items=reasoning_items_raw or None,
+    )
+
+    if tool_calls:
+        finish_reason = "tool_calls"
+    elif has_incomplete_items or (saw_commentary_phase and not saw_final_answer_phase):
+        finish_reason = "incomplete"
+    elif reasoning_items_raw and not final_text:
+        # Response contains only reasoning (encrypted thinking state) with
+        # no visible content or tool calls.  The model is still thinking and
+        # needs another turn to produce the actual answer.  Marking this as
+        # "stop" would send it into the empty-content retry loop which burns
+        # 3 retries then fails — treat it as incomplete instead so the Codex
+        # continuation path handles it correctly.
+        finish_reason = "incomplete"
+    else:
+        finish_reason = "stop"
+    return assistant_message, finish_reason
@@ -31,6 +31,7 @@ from agent.model_metadata import (
    get_model_context_length,
    estimate_messages_tokens_rough,
 )
+from agent.redact import redact_sensitive_text

 logger = logging.getLogger(__name__)

@@ -550,11 +551,15 @@ class ContextCompressor(ContextEngine):
        Includes tool call arguments and result content (up to
        ``_CONTENT_MAX`` chars per message) so the summarizer can preserve
        specific details like file paths, commands, and outputs.
+
+        All content is redacted before serialization to prevent secrets
+        (API keys, tokens, passwords) from leaking into the summary that
+        gets sent to the auxiliary model and persisted across compactions.
        """
        parts = []
        for msg in turns:
            role = msg.get("role", "unknown")
-            content = msg.get("content") or ""
+            content = redact_sensitive_text(msg.get("content") or "")

            # Tool results: keep enough content for the summarizer
            if role == "tool":
@@ -575,7 +580,7 @@ class ContextCompressor(ContextEngine):
                        if isinstance(tc, dict):
                            fn = tc.get("function", {})
                            name = fn.get("name", "?")
-                            args = fn.get("arguments", "")
+                            args = redact_sensitive_text(fn.get("arguments", ""))
                            # Truncate long arguments but keep enough for context
                            if len(args) > self._TOOL_ARGS_MAX:
                                args = args[:self._TOOL_ARGS_HEAD] + "..."
@@ -635,7 +640,11 @@ class ContextCompressor(ContextEngine):
            "only output the structured summary. "
            "Do NOT include any preamble, greeting, or prefix. "
            "Write the summary in the same language the user was using in the "
-            "conversation — do not translate or switch to English."
+            "conversation — do not translate or switch to English. "
+            "NEVER include API keys, tokens, passwords, secrets, credentials, "
+            "or connection strings in the summary — replace any that appear "
+            "with [REDACTED]. Note that the user had credentials present, but "
+            "do not preserve their values."
        )

        # Shared structured template (used by both paths).
@@ -692,7 +701,7 @@ Be specific with file paths, commands, line numbers, and results.]
 [What remains to be done — framed as context, not instructions]

 ## Critical Context
-[Any specific values, error messages, configuration details, or data that would be lost without explicit preservation]
+[Any specific values, error messages, configuration details, or data that would be lost without explicit preservation. NEVER include API keys, tokens, passwords, or credentials — write [REDACTED] instead.]

 Target ~{summary_budget} tokens. Be CONCRETE — include file paths, command outputs, error messages, line numbers, and specific values. Avoid vague descriptions like "made some changes" — say exactly what changed.

@@ -732,7 +741,7 @@ Use this exact structure:
            prompt += f"""

 FOCUS TOPIC: "{focus_topic}"
-The user has requested that this compaction PRIORITISE preserving all information related to the focus topic above. For content related to "{focus_topic}", include full detail — exact values, file paths, command outputs, error messages, and decisions. For content NOT related to the focus topic, summarise more aggressively (brief one-liners or omit if truly irrelevant). The focus topic sections should receive roughly 60-70% of the summary token budget."""
+The user has requested that this compaction PRIORITISE preserving all information related to the focus topic above. For content related to "{focus_topic}", include full detail — exact values, file paths, command outputs, error messages, and decisions. For content NOT related to the focus topic, summarise more aggressively (brief one-liners or omit if truly irrelevant). The focus topic sections should receive roughly 60-70% of the summary token budget. Even for the focus topic, NEVER preserve API keys, tokens, passwords, or credentials — use [REDACTED]."""

        try:
            call_kwargs = {
@@ -755,7 +764,9 @@ The user has requested that this compaction PRIORITISE preserving all informatio
            # Handle cases where content is not a string (e.g., dict from llama.cpp)
            if not isinstance(content, str):
                content = str(content) if content else ""
-            summary = content.strip()
+            # Redact the summary output as well — the summarizer LLM may
+            # ignore prompt instructions and echo back secrets verbatim.
+            summary = redact_sensitive_text(content.strip())
            # Store for iterative updates on next compaction
            self._previous_summary = summary
            self._summary_failure_cooldown_until = 0.0
@@ -796,7 +807,7 @@ The user has requested that this compaction PRIORITISE preserving all informatio
                )
                self.summary_model = ""  # empty = use main model
                self._summary_failure_cooldown_until = 0.0  # no cooldown
-                return self._generate_summary(messages, summary_budget)  # retry immediately
+                return self._generate_summary(turns_to_summarize)  # retry immediately

            # Transient errors (timeout, rate limit, network) — shorter cooldown
            _transient_cooldown = 60
@@ -21,6 +21,9 @@ from pathlib import Path
 from types import SimpleNamespace
 from typing import Any

+from agent.file_safety import get_read_block_error, is_write_denied
+from agent.redact import redact_sensitive_text
+
 ACP_MARKER_BASE_URL = "acp://copilot"
 _DEFAULT_TIMEOUT_SECONDS = 900.0

@@ -54,6 +57,18 @@ def _jsonrpc_error(message_id: Any, code: int, message: str) -> dict[str, Any]:
    }


+def _permission_denied(message_id: Any) -> dict[str, Any]:
+    return {
+        "jsonrpc": "2.0",
+        "id": message_id,
+        "result": {
+            "outcome": {
+                "outcome": "cancelled",
+            }
+        },
+    }
+
+
 def _format_messages_as_prompt(
    messages: list[dict[str, Any]],
    model: str | None = None,
@@ -386,6 +401,8 @@ class CopilotACPClient:
        stderr_tail: deque[str] = deque(maxlen=40)

        def _stdout_reader() -> None:
+            if proc.stdout is None:
+                return
            for line in proc.stdout:
                try:
                    inbox.put(json.loads(line))
@@ -533,18 +550,13 @@ class CopilotACPClient:
        params = msg.get("params") or {}

        if method == "session/request_permission":
-            response = {
-                "jsonrpc": "2.0",
-                "id": message_id,
-                "result": {
-                    "outcome": {
-                        "outcome": "allow_once",
-                    }
-                },
-            }
+            response = _permission_denied(message_id)
        elif method == "fs/read_text_file":
            try:
                path = _ensure_path_within_cwd(str(params.get("path") or ""), cwd)
+                block_error = get_read_block_error(str(path))
+                if block_error:
+                    raise PermissionError(block_error)
                content = path.read_text() if path.exists() else ""
                line = params.get("line")
                limit = params.get("limit")
@@ -553,6 +565,8 @@ class CopilotACPClient:
                    start = line - 1
                    end = start + limit if isinstance(limit, int) and limit > 0 else None
                    content = "".join(lines[start:end])
+                if content:
+                    content = redact_sensitive_text(content)
                response = {
                    "jsonrpc": "2.0",
                    "id": message_id,
@@ -565,6 +579,10 @@ class CopilotACPClient:
        elif method == "fs/write_text_file":
            try:
                path = _ensure_path_within_cwd(str(params.get("path") or ""), cwd)
+                if is_write_denied(str(path)):
+                    raise PermissionError(
+                        f"Write denied: '{path}' is a protected system/credential file."
+                    )
                path.parent.mkdir(parents=True, exist_ok=True)
                path.write_text(str(params.get("content") or ""))
                response = {
@@ -983,6 +983,14 @@ def _seed_from_singletons(provider: str, entries: List[PooledCredential]) -> Tup
    active_sources: Set[str] = set()
    auth_store = _load_auth_store()

+    # Shared suppression gate — used at every upsert site so
+    # `hermes auth remove <provider> <N>` is stable across all source types.
+    try:
+        from hermes_cli.auth import is_source_suppressed as _is_suppressed
+    except ImportError:
+        def _is_suppressed(_p, _s):  # type: ignore[misc]
+            return False
+
    if provider == "anthropic":
        # Only auto-discover external credentials (Claude Code, Hermes PKCE)
        # when the user has explicitly configured anthropic as their provider.
@@ -1002,13 +1010,8 @@ def _seed_from_singletons(provider: str, entries: List[PooledCredential]) -> Tup
            ("claude_code", read_claude_code_credentials()),
        ):
            if creds and creds.get("accessToken"):
-                # Check if user explicitly removed this source
-                try:
-                    from hermes_cli.auth import is_source_suppressed
-                    if is_source_suppressed(provider, source_name):
-                        continue
-                except ImportError:
-                    pass
+                if _is_suppressed(provider, source_name):
+                    continue
                active_sources.add(source_name)
                changed |= _upsert_entry(
                    entries,
@@ -1026,7 +1029,7 @@ def _seed_from_singletons(provider: str, entries: List[PooledCredential]) -> Tup

    elif provider == "nous":
        state = _load_provider_state(auth_store, "nous")
-        if state:
+        if state and not _is_suppressed(provider, "device_code"):
            active_sources.add("device_code")
            # Prefer a user-supplied label embedded in the singleton state
            # (set by persist_nous_credentials(label=...) when the user ran
@@ -1067,20 +1070,21 @@ def _seed_from_singletons(provider: str, entries: List[PooledCredential]) -> Tup
            token, source = resolve_copilot_token()
            if token:
                source_name = "gh_cli" if "gh" in source.lower() else f"env:{source}"
-                active_sources.add(source_name)
-                pconfig = PROVIDER_REGISTRY.get(provider)
-                changed |= _upsert_entry(
-                    entries,
-                    provider,
-                    source_name,
-                    {
-                        "source": source_name,
-                        "auth_type": AUTH_TYPE_API_KEY,
-                        "access_token": token,
-                        "base_url": pconfig.inference_base_url if pconfig else "",
-                        "label": source,
-                    },
-                )
+                if not _is_suppressed(provider, source_name):
+                    active_sources.add(source_name)
+                    pconfig = PROVIDER_REGISTRY.get(provider)
+                    changed |= _upsert_entry(
+                        entries,
+                        provider,
+                        source_name,
+                        {
+                            "source": source_name,
+                            "auth_type": AUTH_TYPE_API_KEY,
+                            "access_token": token,
+                            "base_url": pconfig.inference_base_url if pconfig else "",
+                            "label": source,
+                        },
+                    )
        except Exception as exc:
            logger.debug("Copilot token seed failed: %s", exc)

@@ -1096,20 +1100,21 @@ def _seed_from_singletons(provider: str, entries: List[PooledCredential]) -> Tup
            token = creds.get("api_key", "")
            if token:
                source_name = creds.get("source", "qwen-cli")
-                active_sources.add(source_name)
-                changed |= _upsert_entry(
-                    entries,
-                    provider,
-                    source_name,
-                    {
-                        "source": source_name,
-                        "auth_type": AUTH_TYPE_OAUTH,
-                        "access_token": token,
-                        "expires_at_ms": creds.get("expires_at_ms"),
-                        "base_url": creds.get("base_url", ""),
-                        "label": creds.get("auth_file", source_name),
-                    },
-                )
+                if not _is_suppressed(provider, source_name):
+                    active_sources.add(source_name)
+                    changed |= _upsert_entry(
+                        entries,
+                        provider,
+                        source_name,
+                        {
+                            "source": source_name,
+                            "auth_type": AUTH_TYPE_OAUTH,
+                            "access_token": token,
+                            "expires_at_ms": creds.get("expires_at_ms"),
+                            "base_url": creds.get("base_url", ""),
+                            "label": creds.get("auth_file", source_name),
+                        },
+                    )
        except Exception as exc:
            logger.debug("Qwen OAuth token seed failed: %s", exc)

@@ -1118,13 +1123,7 @@ def _seed_from_singletons(provider: str, entries: List[PooledCredential]) -> Tup
        # the device_code source as suppressed so it won't be re-seeded from
        # the Hermes auth store.  Without this gate the removal is instantly
        # undone on the next load_pool() call.
-        codex_suppressed = False
-        try:
-            from hermes_cli.auth import is_source_suppressed
-            codex_suppressed = is_source_suppressed(provider, "device_code")
-        except ImportError:
-            pass
-        if codex_suppressed:
+        if _is_suppressed(provider, "device_code"):
            return changed, active_sources

        state = _load_provider_state(auth_store, "openai-codex")
@@ -1158,10 +1157,22 @@ def _seed_from_singletons(provider: str, entries: List[PooledCredential]) -> Tup
 def _seed_from_env(provider: str, entries: List[PooledCredential]) -> Tuple[bool, Set[str]]:
    changed = False
    active_sources: Set[str] = set()
+    # Honour user suppression — `hermes auth remove <provider> <N>` for an
+    # env-seeded credential marks the env:<VAR> source as suppressed so it
+    # won't be re-seeded from the user's shell environment or ~/.hermes/.env.
+    # Without this gate the removal is silently undone on the next
+    # load_pool() call whenever the var is still exported by the shell.
+    try:
+        from hermes_cli.auth import is_source_suppressed as _is_source_suppressed
+    except ImportError:
+        def _is_source_suppressed(_p, _s):  # type: ignore[misc]
+            return False
    if provider == "openrouter":
        token = os.getenv("OPENROUTER_API_KEY", "").strip()
        if token:
            source = "env:OPENROUTER_API_KEY"
+            if _is_source_suppressed(provider, source):
+                return changed, active_sources
            active_sources.add(source)
            changed |= _upsert_entry(
                entries,
@@ -1198,6 +1209,8 @@ def _seed_from_env(provider: str, entries: List[PooledCredential]) -> Tuple[bool
        if not token:
            continue
        source = f"env:{env_var}"
+        if _is_source_suppressed(provider, source):
+            continue
        active_sources.add(source)
        auth_type = AUTH_TYPE_OAUTH if provider == "anthropic" and not token.startswith("sk-ant-api") else AUTH_TYPE_API_KEY
        base_url = env_url or pconfig.inference_base_url
@@ -1242,6 +1255,13 @@ def _seed_custom_pool(pool_key: str, entries: List[PooledCredential]) -> Tuple[b
    changed = False
    active_sources: Set[str] = set()

+    # Shared suppression gate — same pattern as _seed_from_env/_seed_from_singletons.
+    try:
+        from hermes_cli.auth import is_source_suppressed as _is_suppressed
+    except ImportError:
+        def _is_suppressed(_p, _s):  # type: ignore[misc]
+            return False
+
    # Seed from the custom_providers config entry's api_key field
    cp_config = _get_custom_provider_config(pool_key)
    if cp_config:
@@ -1250,19 +1270,20 @@ def _seed_custom_pool(pool_key: str, entries: List[PooledCredential]) -> Tuple[b
        name = str(cp_config.get("name") or "").strip()
        if api_key:
            source = f"config:{name}"
-            active_sources.add(source)
-            changed |= _upsert_entry(
-                entries,
-                pool_key,
-                source,
-                {
-                    "source": source,
-                    "auth_type": AUTH_TYPE_API_KEY,
-                    "access_token": api_key,
-                    "base_url": base_url,
-                    "label": name or source,
-                },
-            )
+            if not _is_suppressed(pool_key, source):
+                active_sources.add(source)
+                changed |= _upsert_entry(
+                    entries,
+                    pool_key,
+                    source,
+                    {
+                        "source": source,
+                        "auth_type": AUTH_TYPE_API_KEY,
+                        "access_token": api_key,
+                        "base_url": base_url,
+                        "label": name or source,
+                    },
+                )

    # Seed from model.api_key if model.provider=='custom' and model.base_url matches
    try:
@@ -1282,19 +1303,20 @@ def _seed_custom_pool(pool_key: str, entries: List[PooledCredential]) -> Tuple[b
                matched_key = get_custom_provider_pool_key(model_base_url)
                if matched_key == pool_key:
                    source = "model_config"
-                    active_sources.add(source)
-                    changed |= _upsert_entry(
-                        entries,
-                        pool_key,
-                        source,
-                        {
-                            "source": source,
-                            "auth_type": AUTH_TYPE_API_KEY,
-                            "access_token": model_api_key,
-                            "base_url": model_base_url,
-                            "label": "model_config",
-                        },
-                    )
+                    if not _is_suppressed(pool_key, source):
+                        active_sources.add(source)
+                        changed |= _upsert_entry(
+                            entries,
+                            pool_key,
+                            source,
+                            {
+                                "source": source,
+                                "auth_type": AUTH_TYPE_API_KEY,
+                                "access_token": model_api_key,
+                                "base_url": model_base_url,
+                                "label": "model_config",
+                            },
+                        )
    except Exception:
        pass

@@ -0,0 +1,401 @@
+"""Unified removal contract for every credential source Hermes reads from.
+
+Hermes seeds its credential pool from many places:
+
+    env:<VAR>     — os.environ / ~/.hermes/.env
+    claude_code   — ~/.claude/.credentials.json
+    hermes_pkce   — ~/.hermes/.anthropic_oauth.json
+    device_code   — auth.json providers.<provider> (nous, openai-codex, ...)
+    qwen-cli      — ~/.qwen/oauth_creds.json
+    gh_cli        — gh auth token
+    config:<name> — custom_providers config entry
+    model_config  — model.api_key when model.provider == "custom"
+    manual        — user ran `hermes auth add`
+
+Each source has its own reader inside ``agent.credential_pool._seed_from_*``
+(which keep their existing shape — we haven't restructured them).  What we
+unify here is **removal**:
+
+    ``hermes auth remove <provider> <N>`` must make the pool entry stay gone.
+
+Before this module, every source had an ad-hoc removal branch in
+``auth_remove_command``, and several sources had no branch at all — so
+``auth remove`` silently reverted on the next ``load_pool()`` call for
+qwen-cli, nous device_code (partial), hermes_pkce, copilot gh_cli, and
+custom-config sources.
+
+Now every source registers a ``RemovalStep`` that does exactly three things
+in the same shape:
+
+    1. Clean up whatever externally-readable state the source reads from
+       (.env line, auth.json block, OAuth file, etc.)
+    2. Suppress the ``(provider, source_id)`` in auth.json so the
+       corresponding ``_seed_from_*`` branch skips the upsert on re-load
+    3. Return ``RemovalResult`` describing what was cleaned and any
+       diagnostic hints the user should see (shell-exported env vars,
+       external credential files we deliberately don't delete, etc.)
+
+Adding a new credential source is:
+    - wire up a reader branch in ``_seed_from_*`` (existing pattern)
+    - gate that reader behind ``is_source_suppressed(provider, source_id)``
+    - register a ``RemovalStep`` here
+
+No more per-source if/elif chain in ``auth_remove_command``.
+"""
+
+from __future__ import annotations
+
+import os
+from dataclasses import dataclass, field
+from pathlib import Path
+from typing import Callable, List, Optional
+
+
+@dataclass
+class RemovalResult:
+    """Outcome of removing a credential source.
+
+    Attributes:
+        cleaned: Short strings describing external state that was actually
+            mutated (``"Cleared XAI_API_KEY from .env"``,
+            ``"Cleared openai-codex OAuth tokens from auth store"``).
+            Printed as plain lines to the user.
+        hints: Diagnostic lines ABOUT state the user may need to clean up
+            themselves or is deliberately left intact (shell-exported env
+            var, Claude Code credential file we don't delete, etc.).
+            Printed as plain lines to the user.  Always non-destructive.
+        suppress: Whether to call ``suppress_credential_source`` after
+            cleanup so future ``load_pool`` calls skip this source.
+            Default True — almost every source needs this to stay sticky.
+            The only legitimate False is ``manual`` entries, which aren't
+            seeded from anywhere external.
+    """
+
+    cleaned: List[str] = field(default_factory=list)
+    hints: List[str] = field(default_factory=list)
+    suppress: bool = True
+
+
+@dataclass
+class RemovalStep:
+    """How to remove one specific credential source cleanly.
+
+    Attributes:
+        provider: Provider pool key (``"xai"``, ``"anthropic"``, ``"nous"``, ...).
+            Special value ``"*"`` means "matches any provider" — used for
+            sources like ``manual`` that aren't provider-specific.
+        source_id: Source identifier as it appears in
+            ``PooledCredential.source``.  May be a literal (``"claude_code"``)
+            or a prefix pattern matched via ``match_fn``.
+        match_fn: Optional predicate overriding literal ``source_id``
+            matching.  Gets the removed entry's source string.  Used for
+            ``env:*`` (any env-seeded key), ``config:*`` (any custom
+            pool), and ``manual:*`` (any manual-source variant).
+        remove_fn: ``(provider, removed_entry) -> RemovalResult``.  Does the
+            actual cleanup and returns what happened for the user.
+        description: One-line human-readable description for docs / tests.
+    """
+
+    provider: str
+    source_id: str
+    remove_fn: Callable[..., RemovalResult]
+    match_fn: Optional[Callable[[str], bool]] = None
+    description: str = ""
+
+    def matches(self, provider: str, source: str) -> bool:
+        if self.provider != "*" and self.provider != provider:
+            return False
+        if self.match_fn is not None:
+            return self.match_fn(source)
+        return source == self.source_id
+
+
+_REGISTRY: List[RemovalStep] = []
+
+
+def register(step: RemovalStep) -> RemovalStep:
+    _REGISTRY.append(step)
+    return step
+
+
+def find_removal_step(provider: str, source: str) -> Optional[RemovalStep]:
+    """Return the first matching RemovalStep, or None if unregistered.
+
+    Unregistered sources fall through to the default remove path in
+    ``auth_remove_command``: the pool entry is already gone (that happens
+    before dispatch), no external cleanup, no suppression.  This is the
+    correct behaviour for ``manual`` entries — they were only ever stored
+    in the pool, nothing external to clean up.
+    """
+    for step in _REGISTRY:
+        if step.matches(provider, source):
+            return step
+    return None
+
+
+# ---------------------------------------------------------------------------
+# Individual RemovalStep implementations — one per source.
+# ---------------------------------------------------------------------------
+# Each remove_fn is intentionally small and single-purpose.  Adding a new
+# credential source means adding ONE entry here — no other changes to
+# auth_remove_command.
+
+
+def _remove_env_source(provider: str, removed) -> RemovalResult:
+    """env:<VAR> — the most common case.
+
+    Handles three user situations:
+      1. Var lives only in ~/.hermes/.env  → clear it
+      2. Var lives only in the user's shell (shell profile, systemd
+         EnvironmentFile, launchd plist) → hint them where to unset it
+      3. Var lives in both → clear from .env, hint about shell
+    """
+    from hermes_cli.config import get_env_path, remove_env_value
+
+    result = RemovalResult()
+    env_var = removed.source[len("env:"):]
+    if not env_var:
+        return result
+
+    # Detect shell vs .env BEFORE remove_env_value pops os.environ.
+    env_in_process = bool(os.getenv(env_var))
+    env_in_dotenv = False
+    try:
+        env_path = get_env_path()
+        if env_path.exists():
+            env_in_dotenv = any(
+                line.strip().startswith(f"{env_var}=")
+                for line in env_path.read_text(errors="replace").splitlines()
+            )
+    except OSError:
+        pass
+    shell_exported = env_in_process and not env_in_dotenv
+
+    cleared = remove_env_value(env_var)
+    if cleared:
+        result.cleaned.append(f"Cleared {env_var} from .env")
+
+    if shell_exported:
+        result.hints.extend([
+            f"Note: {env_var} is still set in your shell environment "
+            f"(not in ~/.hermes/.env).",
+            "  Unset it there (shell profile, systemd EnvironmentFile, "
+            "launchd plist, etc.) or it will keep being visible to Hermes.",
+            f"  The pool entry is now suppressed — Hermes will ignore "
+            f"{env_var} until you run `hermes auth add {provider}`.",
+        ])
+    else:
+        result.hints.append(
+            f"Suppressed env:{env_var} — it will not be re-seeded even "
+            f"if the variable is re-exported later."
+        )
+    return result
+
+
+def _remove_claude_code(provider: str, removed) -> RemovalResult:
+    """~/.claude/.credentials.json is owned by Claude Code itself.
+
+    We don't delete it — the user's Claude Code install still needs to
+    work.  We just suppress it so Hermes stops reading it.
+    """
+    return RemovalResult(hints=[
+        "Suppressed claude_code credential — it will not be re-seeded.",
+        "Note: Claude Code credentials still live in ~/.claude/.credentials.json",
+        "Run `hermes auth add anthropic` to re-enable if needed.",
+    ])
+
+
+def _remove_hermes_pkce(provider: str, removed) -> RemovalResult:
+    """~/.hermes/.anthropic_oauth.json is ours — delete it outright."""
+    from hermes_constants import get_hermes_home
+
+    result = RemovalResult()
+    oauth_file = get_hermes_home() / ".anthropic_oauth.json"
+    if oauth_file.exists():
+        try:
+            oauth_file.unlink()
+            result.cleaned.append("Cleared Hermes Anthropic OAuth credentials")
+        except OSError as exc:
+            result.hints.append(f"Could not delete {oauth_file}: {exc}")
+    return result
+
+
+def _clear_auth_store_provider(provider: str) -> bool:
+    """Delete auth_store.providers[provider].  Returns True if deleted."""
+    from hermes_cli.auth import (
+        _auth_store_lock,
+        _load_auth_store,
+        _save_auth_store,
+    )
+
+    with _auth_store_lock():
+        auth_store = _load_auth_store()
+        providers_dict = auth_store.get("providers")
+        if isinstance(providers_dict, dict) and provider in providers_dict:
+            del providers_dict[provider]
+            _save_auth_store(auth_store)
+            return True
+    return False
+
+
+def _remove_nous_device_code(provider: str, removed) -> RemovalResult:
+    """Nous OAuth lives in auth.json providers.nous — clear it and suppress.
+
+    We suppress in addition to clearing because nothing else stops the
+    user's next `hermes login` run from writing providers.nous again
+    before they decide to.  Suppression forces them to go through
+    `hermes auth add nous` to re-engage, which is the documented re-add
+    path and clears the suppression atomically.
+    """
+    result = RemovalResult()
+    if _clear_auth_store_provider(provider):
+        result.cleaned.append(f"Cleared {provider} OAuth tokens from auth store")
+    return result
+
+
+def _remove_codex_device_code(provider: str, removed) -> RemovalResult:
+    """Codex tokens live in TWO places: our auth store AND ~/.codex/auth.json.
+
+    refresh_codex_oauth_pure() writes both every time, so clearing only
+    the Hermes auth store is not enough — _seed_from_singletons() would
+    re-import from ~/.codex/auth.json on the next load_pool() call and
+    the removal would be instantly undone.  We suppress instead of
+    deleting Codex CLI's file, so the Codex CLI itself keeps working.
+
+    The canonical source name in ``_seed_from_singletons`` is
+    ``"device_code"`` (no prefix).  Entries may show up in the pool as
+    either ``"device_code"`` (seeded) or ``"manual:device_code"`` (added
+    via ``hermes auth add openai-codex``), but in both cases the re-seed
+    gate lives at the ``"device_code"`` suppression key.  We suppress
+    that canonical key here; the central dispatcher also suppresses
+    ``removed.source`` which is fine — belt-and-suspenders, idempotent.
+    """
+    from hermes_cli.auth import suppress_credential_source
+
+    result = RemovalResult()
+    if _clear_auth_store_provider(provider):
+        result.cleaned.append(f"Cleared {provider} OAuth tokens from auth store")
+    # Suppress the canonical re-seed source, not just whatever source the
+    # removed entry had.  Otherwise `manual:device_code` removals wouldn't
+    # block the `device_code` re-seed path.
+    suppress_credential_source(provider, "device_code")
+    result.hints.extend([
+        "Suppressed openai-codex device_code source — it will not be re-seeded.",
+        "Note: Codex CLI credentials still live in ~/.codex/auth.json",
+        "Run `hermes auth add openai-codex` to re-enable if needed.",
+    ])
+    return result
+
+
+def _remove_qwen_cli(provider: str, removed) -> RemovalResult:
+    """~/.qwen/oauth_creds.json is owned by the Qwen CLI.
+
+    Same pattern as claude_code — suppress, don't delete.  The user's
+    Qwen CLI install still reads from that file.
+    """
+    return RemovalResult(hints=[
+        "Suppressed qwen-cli credential — it will not be re-seeded.",
+        "Note: Qwen CLI credentials still live in ~/.qwen/oauth_creds.json",
+        "Run `hermes auth add qwen-oauth` to re-enable if needed.",
+    ])
+
+
+def _remove_copilot_gh(provider: str, removed) -> RemovalResult:
+    """Copilot token comes from `gh auth token` or COPILOT_GITHUB_TOKEN / GH_TOKEN / GITHUB_TOKEN.
+
+    Copilot is special: the same token can be seeded as multiple source
+    entries (gh_cli from ``_seed_from_singletons`` plus env:<VAR> from
+    ``_seed_from_env``), so removing one entry without suppressing the
+    others lets the duplicates resurrect.  We suppress ALL known copilot
+    sources here so removal is stable regardless of which entry the
+    user clicked.
+
+    We don't touch the user's gh CLI or shell state — just suppress so
+    Hermes stops picking the token up.
+    """
+    # Suppress ALL copilot source variants up-front so no path resurrects
+    # the pool entry.  The central dispatcher in auth_remove_command will
+    # ALSO suppress removed.source, but it's idempotent so double-calling
+    # is harmless.
+    from hermes_cli.auth import suppress_credential_source
+    suppress_credential_source(provider, "gh_cli")
+    for env_var in ("COPILOT_GITHUB_TOKEN", "GH_TOKEN", "GITHUB_TOKEN"):
+        suppress_credential_source(provider, f"env:{env_var}")
+
+    return RemovalResult(hints=[
+        "Suppressed all copilot token sources (gh_cli + env vars) — they will not be re-seeded.",
+        "Note: Your gh CLI / shell environment is unchanged.",
+        "Run `hermes auth add copilot` to re-enable if needed.",
+    ])
+
+
+def _remove_custom_config(provider: str, removed) -> RemovalResult:
+    """Custom provider pools are seeded from custom_providers config or
+    model.api_key.  Both are in config.yaml — modifying that from here
+    is more invasive than suppression.  We suppress; the user can edit
+    config.yaml if they want to remove the key from disk entirely.
+    """
+    source_label = removed.source
+    return RemovalResult(hints=[
+        f"Suppressed {source_label} — it will not be re-seeded.",
+        "Note: The underlying value in config.yaml is unchanged.  Edit it "
+        "directly if you want to remove the credential from disk.",
+    ])
+
+
+def _register_all_sources() -> None:
+    """Called once on module import.
+
+    ORDER MATTERS — ``find_removal_step`` returns the first match.  Put
+    provider-specific steps before the generic ``env:*`` step so that e.g.
+    copilot's ``env:GH_TOKEN`` goes through the copilot removal (which
+    doesn't touch the user's shell), not the generic env-var removal
+    (which would try to clear .env).
+    """
+    register(RemovalStep(
+        provider="copilot", source_id="gh_cli",
+        match_fn=lambda src: src == "gh_cli" or src.startswith("env:"),
+        remove_fn=_remove_copilot_gh,
+        description="gh auth token / COPILOT_GITHUB_TOKEN / GH_TOKEN",
+    ))
+    register(RemovalStep(
+        provider="*", source_id="env:",
+        match_fn=lambda src: src.startswith("env:"),
+        remove_fn=_remove_env_source,
+        description="Any env-seeded credential (XAI_API_KEY, DEEPSEEK_API_KEY, etc.)",
+    ))
+    register(RemovalStep(
+        provider="anthropic", source_id="claude_code",
+        remove_fn=_remove_claude_code,
+        description="~/.claude/.credentials.json",
+    ))
+    register(RemovalStep(
+        provider="anthropic", source_id="hermes_pkce",
+        remove_fn=_remove_hermes_pkce,
+        description="~/.hermes/.anthropic_oauth.json",
+    ))
+    register(RemovalStep(
+        provider="nous", source_id="device_code",
+        remove_fn=_remove_nous_device_code,
+        description="auth.json providers.nous",
+    ))
+    register(RemovalStep(
+        provider="openai-codex", source_id="device_code",
+        match_fn=lambda src: src == "device_code" or src.endswith(":device_code"),
+        remove_fn=_remove_codex_device_code,
+        description="auth.json providers.openai-codex + ~/.codex/auth.json",
+    ))
+    register(RemovalStep(
+        provider="qwen-oauth", source_id="qwen-cli",
+        remove_fn=_remove_qwen_cli,
+        description="~/.qwen/oauth_creds.json",
+    ))
+    register(RemovalStep(
+        provider="*", source_id="config:",
+        match_fn=lambda src: src.startswith("config:") or src == "model_config",
+        remove_fn=_remove_custom_config,
+        description="Custom provider config.yaml api_key field",
+    ))
+
+
+_register_all_sources()
@@ -470,11 +470,16 @@ def _classify_by_status(
                retryable=False,
                should_fallback=True,
            )
-        # Generic 404 — could be model or endpoint
+        # Generic 404 with no "model not found" signal — could be a wrong
+        # endpoint path (common with local llama.cpp / Ollama / vLLM when
+        # the URL is slightly misconfigured), a proxy routing glitch, or
+        # a transient backend issue.  Classifying these as model_not_found
+        # silently falls back to a different provider and tells the model
+        # the model is missing, which is wrong and wastes a turn.  Treat
+        # as unknown so the retry loop surfaces the real error instead.
        return result_fn(
-            FailoverReason.model_not_found,
-            retryable=False,
-            should_fallback=True,
+            FailoverReason.unknown,
+            retryable=True,
        )

    if status_code == 413:
@@ -0,0 +1,111 @@
+"""Shared file safety rules used by both tools and ACP shims."""
+
+from __future__ import annotations
+
+import os
+from pathlib import Path
+from typing import Optional
+
+
+def _hermes_home_path() -> Path:
+    """Resolve the active HERMES_HOME (profile-aware) without circular imports."""
+    try:
+        from hermes_constants import get_hermes_home  # local import to avoid cycles
+        return get_hermes_home()
+    except Exception:
+        return Path(os.path.expanduser("~/.hermes"))
+
+
+def build_write_denied_paths(home: str) -> set[str]:
+    """Return exact sensitive paths that must never be written."""
+    hermes_home = _hermes_home_path()
+    return {
+        os.path.realpath(p)
+        for p in [
+            os.path.join(home, ".ssh", "authorized_keys"),
+            os.path.join(home, ".ssh", "id_rsa"),
+            os.path.join(home, ".ssh", "id_ed25519"),
+            os.path.join(home, ".ssh", "config"),
+            str(hermes_home / ".env"),
+            os.path.join(home, ".bashrc"),
+            os.path.join(home, ".zshrc"),
+            os.path.join(home, ".profile"),
+            os.path.join(home, ".bash_profile"),
+            os.path.join(home, ".zprofile"),
+            os.path.join(home, ".netrc"),
+            os.path.join(home, ".pgpass"),
+            os.path.join(home, ".npmrc"),
+            os.path.join(home, ".pypirc"),
+            "/etc/sudoers",
+            "/etc/passwd",
+            "/etc/shadow",
+        ]
+    }
+
+
+def build_write_denied_prefixes(home: str) -> list[str]:
+    """Return sensitive directory prefixes that must never be written."""
+    return [
+        os.path.realpath(p) + os.sep
+        for p in [
+            os.path.join(home, ".ssh"),
+            os.path.join(home, ".aws"),
+            os.path.join(home, ".gnupg"),
+            os.path.join(home, ".kube"),
+            "/etc/sudoers.d",
+            "/etc/systemd",
+            os.path.join(home, ".docker"),
+            os.path.join(home, ".azure"),
+            os.path.join(home, ".config", "gh"),
+        ]
+    ]
+
+
+def get_safe_write_root() -> Optional[str]:
+    """Return the resolved HERMES_WRITE_SAFE_ROOT path, or None if unset."""
+    root = os.getenv("HERMES_WRITE_SAFE_ROOT", "")
+    if not root:
+        return None
+    try:
+        return os.path.realpath(os.path.expanduser(root))
+    except Exception:
+        return None
+
+
+def is_write_denied(path: str) -> bool:
+    """Return True if path is blocked by the write denylist or safe root."""
+    home = os.path.realpath(os.path.expanduser("~"))
+    resolved = os.path.realpath(os.path.expanduser(str(path)))
+
+    if resolved in build_write_denied_paths(home):
+        return True
+    for prefix in build_write_denied_prefixes(home):
+        if resolved.startswith(prefix):
+            return True
+
+    safe_root = get_safe_write_root()
+    if safe_root and not (resolved == safe_root or resolved.startswith(safe_root + os.sep)):
+        return True
+
+    return False
+
+
+def get_read_block_error(path: str) -> Optional[str]:
+    """Return an error message when a read targets internal Hermes cache files."""
+    resolved = Path(path).expanduser().resolve()
+    hermes_home = _hermes_home_path().resolve()
+    blocked_dirs = [
+        hermes_home / "skills" / ".hub" / "index-cache",
+        hermes_home / "skills" / ".hub",
+    ]
+    for blocked in blocked_dirs:
+        try:
+            resolved.relative_to(blocked)
+        except ValueError:
+            continue
+        return (
+            f"Access denied: {path} is an internal Hermes cache file "
+            "and cannot be read directly to prevent prompt injection. "
+            "Use the skills_list or skill_view tools instead."
+        )
+    return None
@@ -799,7 +799,8 @@ def _gemini_http_error(response: httpx.Response) -> CodeAssistError:
        err_obj = {}
    err_status = str(err_obj.get("status") or "").strip()
    err_message = str(err_obj.get("message") or "").strip()
-    err_details_list = err_obj.get("details") if isinstance(err_obj.get("details"), list) else []
+    _raw_details = err_obj.get("details")
+    err_details_list = _raw_details if isinstance(_raw_details, list) else []

    # Extract google.rpc.ErrorInfo reason + metadata.  There may be more
    # than one ErrorInfo (rare), so we pick the first one with a reason.
@@ -613,7 +613,8 @@ def gemini_http_error(response: httpx.Response) -> GeminiAPIError:
        err_obj = {}
    err_status = str(err_obj.get("status") or "").strip()
    err_message = str(err_obj.get("message") or "").strip()
-    details_list = err_obj.get("details") if isinstance(err_obj.get("details"), list) else []
+    _raw_details = err_obj.get("details")
+    details_list = _raw_details if isinstance(_raw_details, list) else []

    reason = ""
    retry_after: Optional[float] = None
@@ -0,0 +1,242 @@
+"""
+Image Generation Provider ABC
+=============================
+
+Defines the pluggable-backend interface for image generation. Providers register
+instances via ``PluginContext.register_image_gen_provider()``; the active one
+(selected via ``image_gen.provider`` in ``config.yaml``) services every
+``image_generate`` tool call.
+
+Providers live in ``<repo>/plugins/image_gen/<name>/`` (built-in, auto-loaded
+as ``kind: backend``) or ``~/.hermes/plugins/image_gen/<name>/`` (user, opt-in
+via ``plugins.enabled``).
+
+Response shape
+--------------
+All providers return a dict that :func:`success_response` / :func:`error_response`
+produce. The tool wrapper JSON-serializes it. Keys:
+
+    success        bool
+    image          str | None       URL or absolute file path
+    model          str              provider-specific model identifier
+    prompt         str              echoed prompt
+    aspect_ratio   str              "landscape" | "square" | "portrait"
+    provider       str              provider name (for diagnostics)
+    error          str              only when success=False
+    error_type     str              only when success=False
+"""
+
+from __future__ import annotations
+
+import abc
+import base64
+import datetime
+import logging
+import uuid
+from pathlib import Path
+from typing import Any, Dict, List, Optional, Tuple
+
+logger = logging.getLogger(__name__)
+
+
+VALID_ASPECT_RATIOS: Tuple[str, ...] = ("landscape", "square", "portrait")
+DEFAULT_ASPECT_RATIO = "landscape"
+
+
+# ---------------------------------------------------------------------------
+# ABC
+# ---------------------------------------------------------------------------
+
+
+class ImageGenProvider(abc.ABC):
+    """Abstract base class for an image generation backend.
+
+    Subclasses must implement :meth:`generate`. Everything else has sane
+    defaults — override only what your provider needs.
+    """
+
+    @property
+    @abc.abstractmethod
+    def name(self) -> str:
+        """Stable short identifier used in ``image_gen.provider`` config.
+
+        Lowercase, no spaces. Examples: ``fal``, ``openai``, ``replicate``.
+        """
+
+    @property
+    def display_name(self) -> str:
+        """Human-readable label shown in ``hermes tools``. Defaults to ``name.title()``."""
+        return self.name.title()
+
+    def is_available(self) -> bool:
+        """Return True when this provider can service calls.
+
+        Typically checks for a required API key. Default: True
+        (providers with no external dependencies are always available).
+        """
+        return True
+
+    def list_models(self) -> List[Dict[str, Any]]:
+        """Return catalog entries for ``hermes tools`` model picker.
+
+        Each entry::
+
+            {
+                "id": "gpt-image-1.5",               # required
+                "display": "GPT Image 1.5",          # optional; defaults to id
+                "speed": "~10s",                     # optional
+                "strengths": "...",                  # optional
+                "price": "$...",                     # optional
+            }
+
+        Default: empty list (provider has no user-selectable models).
+        """
+        return []
+
+    def get_setup_schema(self) -> Dict[str, Any]:
+        """Return provider metadata for the ``hermes tools`` picker.
+
+        Used by ``tools_config.py`` to inject this provider as a row in
+        the Image Generation provider list. Shape::
+
+            {
+                "name": "OpenAI",                     # picker label
+                "badge": "paid",                      # optional short tag
+                "tag": "One-line description...",     # optional subtitle
+                "env_vars": [                         # keys to prompt for
+                    {"key": "OPENAI_API_KEY",
+                     "prompt": "OpenAI API key",
+                     "url": "https://platform.openai.com/api-keys"},
+                ],
+            }
+
+        Default: minimal entry derived from ``display_name``. Override to
+        expose API key prompts and custom badges.
+        """
+        return {
+            "name": self.display_name,
+            "badge": "",
+            "tag": "",
+            "env_vars": [],
+        }
+
+    def default_model(self) -> Optional[str]:
+        """Return the default model id, or None if not applicable."""
+        models = self.list_models()
+        if models:
+            return models[0].get("id")
+        return None
+
+    @abc.abstractmethod
+    def generate(
+        self,
+        prompt: str,
+        aspect_ratio: str = DEFAULT_ASPECT_RATIO,
+        **kwargs: Any,
+    ) -> Dict[str, Any]:
+        """Generate an image.
+
+        Implementations should return the dict from :func:`success_response`
+        or :func:`error_response`. ``kwargs`` may contain forward-compat
+        parameters future versions of the schema will expose — implementations
+        should ignore unknown keys.
+        """
+
+
+# ---------------------------------------------------------------------------
+# Helpers
+# ---------------------------------------------------------------------------
+
+
+def resolve_aspect_ratio(value: Optional[str]) -> str:
+    """Clamp an aspect_ratio value to the valid set, defaulting to landscape.
+
+    Invalid values are coerced rather than rejected so the tool surface is
+    forgiving of agent mistakes.
+    """
+    if not isinstance(value, str):
+        return DEFAULT_ASPECT_RATIO
+    v = value.strip().lower()
+    if v in VALID_ASPECT_RATIOS:
+        return v
+    return DEFAULT_ASPECT_RATIO
+
+
+def _images_cache_dir() -> Path:
+    """Return ``$HERMES_HOME/cache/images/``, creating parents as needed."""
+    from hermes_constants import get_hermes_home
+
+    path = get_hermes_home() / "cache" / "images"
+    path.mkdir(parents=True, exist_ok=True)
+    return path
+
+
+def save_b64_image(
+    b64_data: str,
+    *,
+    prefix: str = "image",
+    extension: str = "png",
+) -> Path:
+    """Decode base64 image data and write it under ``$HERMES_HOME/cache/images/``.
+
+    Returns the absolute :class:`Path` to the saved file.
+
+    Filename format: ``<prefix>_<YYYYMMDD_HHMMSS>_<short-uuid>.<ext>``.
+    """
+    raw = base64.b64decode(b64_data)
+    ts = datetime.datetime.now().strftime("%Y%m%d_%H%M%S")
+    short = uuid.uuid4().hex[:8]
+    path = _images_cache_dir() / f"{prefix}_{ts}_{short}.{extension}"
+    path.write_bytes(raw)
+    return path
+
+
+def success_response(
+    *,
+    image: str,
+    model: str,
+    prompt: str,
+    aspect_ratio: str,
+    provider: str,
+    extra: Optional[Dict[str, Any]] = None,
+) -> Dict[str, Any]:
+    """Build a uniform success response dict.
+
+    ``image`` may be an HTTP URL or an absolute filesystem path (for b64
+    providers like OpenAI). Callers that need to pass through additional
+    backend-specific fields can supply ``extra``.
+    """
+    payload: Dict[str, Any] = {
+        "success": True,
+        "image": image,
+        "model": model,
+        "prompt": prompt,
+        "aspect_ratio": aspect_ratio,
+        "provider": provider,
+    }
+    if extra:
+        for k, v in extra.items():
+            payload.setdefault(k, v)
+    return payload
+
+
+def error_response(
+    *,
+    error: str,
+    error_type: str = "provider_error",
+    provider: str = "",
+    model: str = "",
+    prompt: str = "",
+    aspect_ratio: str = DEFAULT_ASPECT_RATIO,
+) -> Dict[str, Any]:
+    """Build a uniform error response dict."""
+    return {
+        "success": False,
+        "image": None,
+        "error": error,
+        "error_type": error_type,
+        "model": model,
+        "prompt": prompt,
+        "aspect_ratio": aspect_ratio,
+        "provider": provider,
+    }
@@ -0,0 +1,120 @@
+"""
+Image Generation Provider Registry
+==================================
+
+Central map of registered providers. Populated by plugins at import-time via
+``PluginContext.register_image_gen_provider()``; consumed by the
+``image_generate`` tool to dispatch each call to the active backend.
+
+Active selection
+----------------
+The active provider is chosen by ``image_gen.provider`` in ``config.yaml``.
+If unset, :func:`get_active_provider` applies fallback logic:
+
+1. If exactly one provider is registered, use it.
+2. Otherwise if a provider named ``fal`` is registered, use it (legacy
+   default — matches pre-plugin behavior).
+3. Otherwise return ``None`` (the tool surfaces a helpful error pointing
+   the user at ``hermes tools``).
+"""
+
+from __future__ import annotations
+
+import logging
+import threading
+from typing import Dict, List, Optional
+
+from agent.image_gen_provider import ImageGenProvider
+
+logger = logging.getLogger(__name__)
+
+
+_providers: Dict[str, ImageGenProvider] = {}
+_lock = threading.Lock()
+
+
+def register_provider(provider: ImageGenProvider) -> None:
+    """Register an image generation provider.
+
+    Re-registration (same ``name``) overwrites the previous entry and logs
+    a debug message — this makes hot-reload scenarios (tests, dev loops)
+    behave predictably.
+    """
+    if not isinstance(provider, ImageGenProvider):
+        raise TypeError(
+            f"register_provider() expects an ImageGenProvider instance, "
+            f"got {type(provider).__name__}"
+        )
+    name = provider.name
+    if not isinstance(name, str) or not name.strip():
+        raise ValueError("Image gen provider .name must be a non-empty string")
+    with _lock:
+        existing = _providers.get(name)
+        _providers[name] = provider
+    if existing is not None:
+        logger.debug("Image gen provider '%s' re-registered (was %r)", name, type(existing).__name__)
+    else:
+        logger.debug("Registered image gen provider '%s' (%s)", name, type(provider).__name__)
+
+
+def list_providers() -> List[ImageGenProvider]:
+    """Return all registered providers, sorted by name."""
+    with _lock:
+        items = list(_providers.values())
+    return sorted(items, key=lambda p: p.name)
+
+
+def get_provider(name: str) -> Optional[ImageGenProvider]:
+    """Return the provider registered under *name*, or None."""
+    if not isinstance(name, str):
+        return None
+    with _lock:
+        return _providers.get(name.strip())
+
+
+def get_active_provider() -> Optional[ImageGenProvider]:
+    """Resolve the currently-active provider.
+
+    Reads ``image_gen.provider`` from config.yaml; falls back per the
+    module docstring.
+    """
+    configured: Optional[str] = None
+    try:
+        from hermes_cli.config import load_config
+
+        cfg = load_config()
+        section = cfg.get("image_gen") if isinstance(cfg, dict) else None
+        if isinstance(section, dict):
+            raw = section.get("provider")
+            if isinstance(raw, str) and raw.strip():
+                configured = raw.strip()
+    except Exception as exc:
+        logger.debug("Could not read image_gen.provider from config: %s", exc)
+
+    with _lock:
+        snapshot = dict(_providers)
+
+    if configured:
+        provider = snapshot.get(configured)
+        if provider is not None:
+            return provider
+        logger.debug(
+            "image_gen.provider='%s' configured but not registered; falling back",
+            configured,
+        )
+
+    # Fallback: single-provider case
+    if len(snapshot) == 1:
+        return next(iter(snapshot.values()))
+
+    # Fallback: prefer legacy FAL for backward compat
+    if "fal" in snapshot:
+        return snapshot["fal"]
+
+    return None
+
+
+def _reset_for_tests() -> None:
+    """Clear the registry. **Test-only.**"""
+    with _lock:
+        _providers.clear()
@@ -14,6 +14,8 @@ from urllib.parse import urlparse
 import requests
 import yaml

+from hermes_cli.volcengine_byteplus import model_context_window
+from utils import base_url_host_matches, base_url_hostname
 from hermes_constants import OPENROUTER_MODELS_URL

 logger = logging.getLogger(__name__)
@@ -23,18 +25,22 @@ logger = logging.getLogger(__name__)
 # are preserved so the full model name reaches cache lookups and server queries.
 _PROVIDER_PREFIXES: frozenset[str] = frozenset({
    "openrouter", "nous", "openai-codex", "copilot", "copilot-acp",
-    "gemini", "ollama-cloud", "zai", "kimi-coding", "kimi-coding-cn", "minimax", "minimax-cn", "anthropic", "deepseek",
+    "gemini", "ollama-cloud", "zai", "kimi-coding", "kimi-coding-cn", "stepfun", "minimax", "minimax-cn", "anthropic", "deepseek",
    "opencode-zen", "opencode-go", "ai-gateway", "kilocode", "alibaba",
    "qwen-oauth",
    "xiaomi",
    "arcee",
+    "volcengine",
+    "volcengine-coding-plan",
+    "byteplus",
+    "byteplus-coding-plan",
    "custom", "local",
    # Common aliases
    "google", "google-gemini", "google-ai-studio",
    "glm", "z-ai", "z.ai", "zhipu", "github", "github-copilot",
    "github-models", "kimi", "moonshot", "kimi-cn", "moonshot-cn", "claude", "deep-seek",
    "ollama",
-    "opencode", "zen", "go", "vercel", "kilo", "dashscope", "aliyun", "qwen",
+    "stepfun", "opencode", "zen", "go", "vercel", "kilo", "dashscope", "aliyun", "qwen",
    "mimo", "xiaomi-mimo",
    "arcee-ai", "arceeai",
    "xai", "x-ai", "x.ai", "grok",
@@ -168,6 +174,7 @@ DEFAULT_CONTEXT_LENGTHS = {
    "Qwen/Qwen3.5-35B-A3B": 131072,
    "deepseek-ai/DeepSeek-V3.2": 65536,
    "moonshotai/Kimi-K2.5": 262144,
+    "moonshotai/Kimi-K2.6": 262144,
    "moonshotai/Kimi-K2-Thinking": 262144,
    "MiniMaxAI/MiniMax-M2.5": 204800,
    "XiaomiMiMo/MiMo-V2-Flash": 256000,
@@ -210,8 +217,15 @@ def _normalize_base_url(base_url: str) -> str:
    return (base_url or "").strip().rstrip("/")


+def _auth_headers(api_key: str = "") -> Dict[str, str]:
+    token = str(api_key or "").strip()
+    if not token:
+        return {}
+    return {"Authorization": f"Bearer {token}"}
+
+
 def _is_openrouter_base_url(base_url: str) -> bool:
-    return "openrouter.ai" in _normalize_base_url(base_url).lower()
+    return base_url_host_matches(base_url, "openrouter.ai")


 def _is_custom_endpoint(base_url: str) -> bool:
@@ -227,6 +241,8 @@ _URL_TO_PROVIDER: Dict[str, str] = {
    "api.moonshot.ai": "kimi-coding",
    "api.moonshot.cn": "kimi-coding-cn",
    "api.kimi.com": "kimi-coding",
+    "api.stepfun.ai": "stepfun",
+    "api.stepfun.com": "stepfun",
    "api.arcee.ai": "arcee",
    "api.minimax": "minimax",
    "dashscope.aliyuncs.com": "alibaba",
@@ -245,6 +261,8 @@ _URL_TO_PROVIDER: Dict[str, str] = {
    "api.xiaomimimo.com": "xiaomi",
    "xiaomimimo.com": "xiaomi",
    "ollama.com": "ollama-cloud",
+    "ark.cn-beijing.volces.com": "volcengine",
+    "ark.ap-southeast.bytepluses.com": "byteplus",
 }


@@ -309,7 +327,7 @@ def is_local_endpoint(base_url: str) -> bool:
    return False


-def detect_local_server_type(base_url: str) -> Optional[str]:
+def detect_local_server_type(base_url: str, api_key: str = "") -> Optional[str]:
    """Detect which local server is running at base_url by probing known endpoints.

    Returns one of: "ollama", "lm-studio", "vllm", "llamacpp", or None.
@@ -321,8 +339,10 @@ def detect_local_server_type(base_url: str) -> Optional[str]:
    if server_url.endswith("/v1"):
        server_url = server_url[:-3]

+    headers = _auth_headers(api_key)
+
    try:
-        with httpx.Client(timeout=2.0) as client:
+        with httpx.Client(timeout=2.0, headers=headers) as client:
            # LM Studio exposes /api/v1/models — check first (most specific)
            try:
                r = client.get(f"{server_url}/api/v1/models")
@@ -509,6 +529,59 @@ def fetch_endpoint_model_metadata(
    headers = {"Authorization": f"Bearer {api_key}"} if api_key else {}
    last_error: Optional[Exception] = None

+    if is_local_endpoint(normalized):
+        try:
+            if detect_local_server_type(normalized, api_key=api_key) == "lm-studio":
+                server_url = normalized[:-3].rstrip("/") if normalized.endswith("/v1") else normalized
+                response = requests.get(
+                    server_url.rstrip("/") + "/api/v1/models",
+                    headers=headers,
+                    timeout=10,
+                )
+                response.raise_for_status()
+                payload = response.json()
+                cache: Dict[str, Dict[str, Any]] = {}
+                for model in payload.get("models", []):
+                    if not isinstance(model, dict):
+                        continue
+                    model_id = model.get("key") or model.get("id")
+                    if not model_id:
+                        continue
+                    entry: Dict[str, Any] = {"name": model.get("name", model_id)}
+
+                    context_length = None
+                    for inst in model.get("loaded_instances", []) or []:
+                        if not isinstance(inst, dict):
+                            continue
+                        cfg = inst.get("config", {})
+                        ctx = cfg.get("context_length") if isinstance(cfg, dict) else None
+                        if isinstance(ctx, int) and ctx > 0:
+                            context_length = ctx
+                            break
+                    if context_length is None:
+                        context_length = _extract_context_length(model)
+                    if context_length is not None:
+                        entry["context_length"] = context_length
+
+                    max_completion_tokens = _extract_max_completion_tokens(model)
+                    if max_completion_tokens is not None:
+                        entry["max_completion_tokens"] = max_completion_tokens
+
+                    pricing = _extract_pricing(model)
+                    if pricing:
+                        entry["pricing"] = pricing
+
+                    _add_model_aliases(cache, model_id, entry)
+                    alt_id = model.get("id")
+                    if isinstance(alt_id, str) and alt_id and alt_id != model_id:
+                        _add_model_aliases(cache, alt_id, entry)
+
+                _endpoint_model_metadata_cache[normalized] = cache
+                _endpoint_model_metadata_cache_time[normalized] = time.time()
+                return cache
+        except Exception as exc:
+            last_error = exc
+
    for candidate in candidates:
        url = candidate.rstrip("/") + "/models"
        try:
@@ -715,7 +788,7 @@ def _model_id_matches(candidate_id: str, lookup_model: str) -> bool:
    return False


-def query_ollama_num_ctx(model: str, base_url: str) -> Optional[int]:
+def query_ollama_num_ctx(model: str, base_url: str, api_key: str = "") -> Optional[int]:
    """Query an Ollama server for the model's context length.

    Returns the model's maximum context from GGUF metadata via ``/api/show``,
@@ -733,14 +806,16 @@ def query_ollama_num_ctx(model: str, base_url: str) -> Optional[int]:
        server_url = server_url[:-3]

    try:
-        server_type = detect_local_server_type(base_url)
+        server_type = detect_local_server_type(base_url, api_key=api_key)
    except Exception:
        return None
    if server_type != "ollama":
        return None

+    headers = _auth_headers(api_key)
+
    try:
-        with httpx.Client(timeout=3.0) as client:
+        with httpx.Client(timeout=3.0, headers=headers) as client:
            resp = client.post(f"{server_url}/api/show", json={"name": bare_model})
            if resp.status_code != 200:
                return None
@@ -768,7 +843,7 @@ def query_ollama_num_ctx(model: str, base_url: str) -> Optional[int]:
    return None


-def _query_local_context_length(model: str, base_url: str) -> Optional[int]:
+def _query_local_context_length(model: str, base_url: str, api_key: str = "") -> Optional[int]:
    """Query a local server for the model's context length."""
    import httpx

@@ -781,13 +856,15 @@ def _query_local_context_length(model: str, base_url: str) -> Optional[int]:
    if server_url.endswith("/v1"):
        server_url = server_url[:-3]

+    headers = _auth_headers(api_key)
+
    try:
-        server_type = detect_local_server_type(base_url)
+        server_type = detect_local_server_type(base_url, api_key=api_key)
    except Exception:
        server_type = None

    try:
-        with httpx.Client(timeout=3.0) as client:
+        with httpx.Client(timeout=3.0, headers=headers) as client:
            # Ollama: /api/show returns model details with context info
            if server_type == "ollama":
                resp = client.post(f"{server_url}/api/show", json={"name": model})
@@ -998,7 +1075,7 @@ def get_model_context_length(
        if not _is_known_provider_base_url(base_url):
            # 3. Try querying local server directly
            if is_local_endpoint(base_url):
-                local_ctx = _query_local_context_length(model, base_url)
+                local_ctx = _query_local_context_length(model, base_url, api_key=api_key)
                if local_ctx and local_ctx > 0:
                    save_context_length(model, base_url, local_ctx)
                    return local_ctx
@@ -1012,7 +1089,7 @@ def get_model_context_length(

    # 4. Anthropic /v1/models API (only for regular API keys, not OAuth)
    if provider == "anthropic" or (
-        base_url and "api.anthropic.com" in base_url
+        base_url and base_url_hostname(base_url) == "api.anthropic.com"
    ):
        ctx = _query_anthropic_context_length(model, base_url or "https://api.anthropic.com", api_key)
        if ctx:
@@ -1021,7 +1098,11 @@ def get_model_context_length(
    # 4b. AWS Bedrock — use static context length table.
    # Bedrock's ListFoundationModels doesn't expose context window sizes,
    # so we maintain a curated table in bedrock_adapter.py.
-    if provider == "bedrock" or (base_url and "bedrock-runtime" in base_url):
+    if provider == "bedrock" or (
+        base_url
+        and base_url_hostname(base_url).startswith("bedrock-runtime.")
+        and base_url_host_matches(base_url, "amazonaws.com")
+    ):
        try:
            from agent.bedrock_adapter import get_bedrock_context_length
            return get_bedrock_context_length(model)
@@ -1044,12 +1125,20 @@ def get_model_context_length(
        ctx = _resolve_nous_context_length(model)
        if ctx:
            return ctx
+    if effective_provider in {"volcengine", "byteplus"}:
+        ctx = model_context_window(model)
+        if ctx:
+            return ctx
    if effective_provider:
        from agent.models_dev import lookup_models_dev_context
        ctx = lookup_models_dev_context(effective_provider, model)
        if ctx:
            return ctx

+    ctx = model_context_window(model)
+    if ctx:
+        return ctx
+
    # 6. OpenRouter live API metadata (provider-unaware fallback)
    metadata = fetch_model_metadata()
    if model in metadata:
@@ -1068,7 +1157,7 @@ def get_model_context_length(

    # 9. Query local server as last resort
    if base_url and is_local_endpoint(base_url):
-        local_ctx = _query_local_context_length(model, base_url)
+        local_ctx = _query_local_context_length(model, base_url, api_key=api_key)
        if local_ctx and local_ctx > 0:
            save_context_length(model, base_url, local_ctx)
            return local_ctx
@@ -146,6 +146,7 @@ PROVIDER_TO_MODELS_DEV: Dict[str, str] = {
    "openai-codex": "openai",
    "zai": "zai",
    "kimi-coding": "kimi-for-coding",
+    "stepfun": "stepfun",
    "kimi-coding-cn": "kimi-for-coding",
    "minimax": "minimax",
    "minimax-cn": "minimax-cn",
@@ -350,7 +350,13 @@ PLATFORM_HINTS = {
    ),
    "cli": (
        "You are a CLI AI Agent. Try not to use markdown but simple text "
-        "renderable inside a terminal."
+        "renderable inside a terminal. "
+        "File delivery: there is no attachment channel — the user reads your "
+        "response directly in their terminal. Do NOT emit MEDIA:/path tags "
+        "(those are only intercepted on messaging platforms like Telegram, "
+        "Discord, Slack, etc.; on the CLI they render as literal text). "
+        "When referring to a file you created or changed, just state its "
+        "absolute path in plain text; the user can open it from there."
    ),
    "sms": (
        "You are communicating via SMS. Keep responses concise and use plain text "
@@ -13,6 +13,48 @@ import re

 logger = logging.getLogger(__name__)

+# Sensitive query-string parameter names (case-insensitive exact match).
+# Ported from nearai/ironclaw#2529 — catches tokens whose values don't match
+# any known vendor prefix regex (e.g. opaque tokens, short OAuth codes).
+_SENSITIVE_QUERY_PARAMS = frozenset({
+    "access_token",
+    "refresh_token",
+    "id_token",
+    "token",
+    "api_key",
+    "apikey",
+    "client_secret",
+    "password",
+    "auth",
+    "jwt",
+    "session",
+    "secret",
+    "key",
+    "code",           # OAuth authorization codes
+    "signature",      # pre-signed URL signatures
+    "x-amz-signature",
+})
+
+# Sensitive form-urlencoded / JSON body key names (case-insensitive exact match).
+# Exact match, NOT substring — "token_count" and "session_id" must NOT match.
+# Ported from nearai/ironclaw#2529.
+_SENSITIVE_BODY_KEYS = frozenset({
+    "access_token",
+    "refresh_token",
+    "id_token",
+    "token",
+    "api_key",
+    "apikey",
+    "client_secret",
+    "password",
+    "auth",
+    "jwt",
+    "secret",
+    "private_key",
+    "authorization",
+    "key",
+})
+
 # Snapshot at import time so runtime env mutations (e.g. LLM-generated
 # `export HERMES_REDACT_SECRETS=false`) cannot disable redaction mid-session.
 _REDACT_ENABLED = os.getenv("HERMES_REDACT_SECRETS", "").lower() not in ("0", "false", "no", "off")
@@ -108,6 +150,30 @@ _DISCORD_MENTION_RE = re.compile(r"<@!?(\d{17,20})>")
 # Negative lookahead prevents matching hex strings or identifiers
 _SIGNAL_PHONE_RE = re.compile(r"(\+[1-9]\d{6,14})(?![A-Za-z0-9])")

+# URLs containing query strings — matches `scheme://...?...[# or end]`.
+# Used to scan text for URLs whose query params may contain secrets.
+# Ported from nearai/ironclaw#2529.
+_URL_WITH_QUERY_RE = re.compile(
+    r"(https?|wss?|ftp)://"          # scheme
+    r"([^\s/?#]+)"                    # authority (may include userinfo)
+    r"([^\s?#]*)"                     # path
+    r"\?([^\s#]+)"                    # query (required)
+    r"(#\S*)?",                       # optional fragment
+)
+
+# URLs containing userinfo — `scheme://user:password@host` for ANY scheme
+# (not just DB protocols already covered by _DB_CONNSTR_RE above).
+# Catches things like `https://user:token@api.example.com/v1/foo`.
+_URL_USERINFO_RE = re.compile(
+    r"(https?|wss?|ftp)://([^/\s:@]+):([^/\s@]+)@",
+)
+
+# Form-urlencoded body detection: conservative — only applies when the entire
+# text looks like a query string (k=v&k=v pattern with no newlines).
+_FORM_BODY_RE = re.compile(
+    r"^[A-Za-z_][A-Za-z0-9_.-]*=[^&\s]*(?:&[A-Za-z_][A-Za-z0-9_.-]*=[^&\s]*)+$"
+)
+
 # Compile known prefix patterns into one alternation
 _PREFIX_RE = re.compile(
    r"(?<![A-Za-z0-9_-])(" + "|".join(_PREFIX_PATTERNS) + r")(?![A-Za-z0-9_-])"
@@ -121,6 +187,72 @@ def _mask_token(token: str) -> str:
    return f"{token[:6]}...{token[-4:]}"


+def _redact_query_string(query: str) -> str:
+    """Redact sensitive parameter values in a URL query string.
+
+    Handles `k=v&k=v` format. Sensitive keys (case-insensitive) have values
+    replaced with `***`. Non-sensitive keys pass through unchanged.
+    Empty or malformed pairs are preserved as-is.
+    """
+    if not query:
+        return query
+    parts = []
+    for pair in query.split("&"):
+        if "=" not in pair:
+            parts.append(pair)
+            continue
+        key, _, value = pair.partition("=")
+        if key.lower() in _SENSITIVE_QUERY_PARAMS:
+            parts.append(f"{key}=***")
+        else:
+            parts.append(pair)
+    return "&".join(parts)
+
+
+def _redact_url_query_params(text: str) -> str:
+    """Scan text for URLs with query strings and redact sensitive params.
+
+    Catches opaque tokens that don't match vendor prefix regexes, e.g.
+    `https://example.com/cb?code=ABC123&state=xyz` → `...?code=***&state=xyz`.
+    """
+    def _sub(m: re.Match) -> str:
+        scheme = m.group(1)
+        authority = m.group(2)
+        path = m.group(3)
+        query = _redact_query_string(m.group(4))
+        fragment = m.group(5) or ""
+        return f"{scheme}://{authority}{path}?{query}{fragment}"
+    return _URL_WITH_QUERY_RE.sub(_sub, text)
+
+
+def _redact_url_userinfo(text: str) -> str:
+    """Strip `user:password@` from HTTP/WS/FTP URLs.
+
+    DB protocols (postgres, mysql, mongodb, redis, amqp) are handled
+    separately by `_DB_CONNSTR_RE`.
+    """
+    return _URL_USERINFO_RE.sub(
+        lambda m: f"{m.group(1)}://{m.group(2)}:***@",
+        text,
+    )
+
+
+def _redact_form_body(text: str) -> str:
+    """Redact sensitive values in a form-urlencoded body.
+
+    Only applies when the entire input looks like a pure form body
+    (k=v&k=v with no newlines, no other text). Single-line non-form
+    text passes through unchanged. This is a conservative pass — the
+    `_redact_url_query_params` function handles embedded query strings.
+    """
+    if not text or "\n" in text or "&" not in text:
+        return text
+    # The body-body form check is strict: only trigger on clean k=v&k=v.
+    if not _FORM_BODY_RE.match(text.strip()):
+        return text
+    return _redact_query_string(text.strip())
+
+
 def redact_sensitive_text(text: str) -> str:
    """Apply all redaction patterns to a block of text.

@@ -173,6 +305,16 @@ def redact_sensitive_text(text: str) -> str:
    # JWT tokens (eyJ... — base64-encoded JSON headers)
    text = _JWT_RE.sub(lambda m: _mask_token(m.group(0)), text)

+    # URL userinfo (http(s)://user:pass@host) — redact for non-DB schemes.
+    # DB schemes are handled above by _DB_CONNSTR_RE.
+    text = _redact_url_userinfo(text)
+
+    # URL query params containing opaque tokens (?access_token=…&code=…)
+    text = _redact_url_query_params(text)
+
+    # Form-urlencoded bodies (only triggers on clean k=v&k=v inputs).
+    text = _redact_form_body(text)
+
    # Discord user/role mentions (<@snowflake_id>)
    text = _DISCORD_MENTION_RE.sub(lambda m: f"<@{'!' if '!' in m.group(0) else ''}***>", text)

@@ -0,0 +1,831 @@
+"""
+Shell-script hooks bridge.
+
+Reads the ``hooks:`` block from ``cli-config.yaml``, prompts the user for
+consent on first use of each ``(event, command)`` pair, and registers
+callbacks on the existing plugin hook manager so every existing
+``invoke_hook()`` site dispatches to the configured shell scripts — with
+zero changes to call sites.
+
+Design notes
+------------
+* Python plugins and shell hooks compose naturally: both flow through
+  :func:`hermes_cli.plugins.invoke_hook` and its aggregators.  Python
+  plugins are registered first (via ``discover_and_load()``) so their
+  block decisions win ties over shell-hook blocks.
+* Subprocess execution uses ``shlex.split(os.path.expanduser(command))``
+  with ``shell=False`` — no shell injection footguns.  Users that need
+  pipes/redirection wrap their logic in a script.
+* First-use consent is gated by the allowlist under
+  ``~/.hermes/shell-hooks-allowlist.json``.  Non-TTY callers must pass
+  ``accept_hooks=True`` (resolved from ``--accept-hooks``,
+  ``HERMES_ACCEPT_HOOKS``, or ``hooks_auto_accept: true`` in config)
+  for registration to succeed without a prompt.
+* Registration is idempotent — safe to invoke from both the CLI entry
+  point (``hermes_cli/main.py``) and the gateway entry point
+  (``gateway/run.py``).
+
+Wire protocol
+-------------
+**stdin** (JSON, piped to the script)::
+
+    {
+        "hook_event_name": "pre_tool_call",
+        "tool_name":       "terminal",
+        "tool_input":      {"command": "rm -rf /"},
+        "session_id":      "sess_abc123",
+        "cwd":             "/home/user/project",
+        "extra":           {...}   # event-specific kwargs
+    }
+
+**stdout** (JSON, optional — anything else is ignored)::
+
+    # Block a pre_tool_call (either shape accepted; normalised internally):
+    {"decision": "block", "reason":  "Forbidden command"}   # Claude-Code-style
+    {"action":   "block", "message": "Forbidden command"}   # Hermes-canonical
+
+    # Inject context for pre_llm_call:
+    {"context": "Today is Friday"}
+
+    # Silent no-op:
+    <empty or any non-matching JSON object>
+"""
+
+from __future__ import annotations
+
+import difflib
+import json
+import logging
+import os
+import re
+import shlex
+import subprocess
+import sys
+import tempfile
+import threading
+import time
+from contextlib import contextmanager
+from dataclasses import dataclass, field
+from datetime import datetime, timezone
+from pathlib import Path
+from typing import Any, Callable, Dict, Iterator, List, Optional, Set, Tuple
+
+try:
+    import fcntl  # POSIX only; Windows falls back to best-effort without flock.
+except ImportError:  # pragma: no cover
+    fcntl = None  # type: ignore[assignment]
+
+from hermes_constants import get_hermes_home
+
+logger = logging.getLogger(__name__)
+
+DEFAULT_TIMEOUT_SECONDS = 60
+MAX_TIMEOUT_SECONDS = 300
+ALLOWLIST_FILENAME = "shell-hooks-allowlist.json"
+
+# (event, matcher, command) triples that have been wired to the plugin
+# manager in the current process.  Matcher is part of the key because
+# the same script can legitimately register for different matchers under
+# the same event (e.g. one entry per tool the user wants to gate).
+# Second registration attempts for the exact same triple become no-ops
+# so the CLI and gateway can both call register_from_config() safely.
+_registered: Set[Tuple[str, Optional[str], str]] = set()
+_registered_lock = threading.Lock()
+
+# Intra-process lock for allowlist read-modify-write on platforms that
+# lack ``fcntl`` (non-POSIX).  Kept separate from ``_registered_lock``
+# because ``register_from_config`` already holds ``_registered_lock`` when
+# it triggers ``_record_approval`` — reusing it here would self-deadlock
+# (``threading.Lock`` is non-reentrant).  POSIX callers use the sibling
+# ``.lock`` file via ``fcntl.flock`` and bypass this.
+_allowlist_write_lock = threading.Lock()
+
+
+@dataclass
+class ShellHookSpec:
+    """Parsed and validated representation of a single ``hooks:`` entry."""
+
+    event: str
+    command: str
+    matcher: Optional[str] = None
+    timeout: int = DEFAULT_TIMEOUT_SECONDS
+    compiled_matcher: Optional[re.Pattern] = field(default=None, repr=False)
+
+    def __post_init__(self) -> None:
+        # Strip whitespace introduced by YAML quirks (e.g. multi-line string
+        # folding) — a matcher of " terminal" would otherwise silently fail
+        # to match "terminal" without any diagnostic.
+        if isinstance(self.matcher, str):
+            stripped = self.matcher.strip()
+            self.matcher = stripped if stripped else None
+        if self.matcher:
+            try:
+                self.compiled_matcher = re.compile(self.matcher)
+            except re.error as exc:
+                logger.warning(
+                    "shell hook matcher %r is invalid (%s) — treating as "
+                    "literal equality", self.matcher, exc,
+                )
+                self.compiled_matcher = None
+
+    def matches_tool(self, tool_name: Optional[str]) -> bool:
+        if not self.matcher:
+            return True
+        if tool_name is None:
+            return False
+        if self.compiled_matcher is not None:
+            return self.compiled_matcher.fullmatch(tool_name) is not None
+        # compiled_matcher is None only when the regex failed to compile,
+        # in which case we already warned and fall back to literal equality.
+        return tool_name == self.matcher
+
+
+# ---------------------------------------------------------------------------
+# Public API
+# ---------------------------------------------------------------------------
+
+def register_from_config(
+    cfg: Optional[Dict[str, Any]],
+    *,
+    accept_hooks: bool = False,
+) -> List[ShellHookSpec]:
+    """Register every configured shell hook on the plugin manager.
+
+    ``cfg`` is the full parsed config dict (``hermes_cli.config.load_config``
+    output).  The ``hooks:`` key is read out of it.  Missing, empty, or
+    non-dict ``hooks`` is treated as zero configured hooks.
+
+    ``accept_hooks=True`` skips the TTY consent prompt — the caller is
+    promising that the user has opted in via a flag, env var, or config
+    setting.  ``HERMES_ACCEPT_HOOKS=1`` and ``hooks_auto_accept: true`` are
+    also honored inside this function so either CLI or gateway call sites
+    pick them up.
+
+    Returns the list of :class:`ShellHookSpec` entries that ended up wired
+    up on the plugin manager.  Skipped entries (unknown events, malformed,
+    not allowlisted, already registered) are logged but not returned.
+    """
+    if not isinstance(cfg, dict):
+        return []
+
+    effective_accept = _resolve_effective_accept(cfg, accept_hooks)
+
+    specs = _parse_hooks_block(cfg.get("hooks"))
+    if not specs:
+        return []
+
+    registered: List[ShellHookSpec] = []
+
+    # Import lazily — avoids circular imports at module-load time.
+    from hermes_cli.plugins import get_plugin_manager
+
+    manager = get_plugin_manager()
+
+    # Idempotence + allowlist read happen under the lock; the TTY
+    # prompt runs outside so other threads aren't parked on a blocking
+    # input().  Mutation re-takes the lock with a defensive idempotence
+    # re-check in case two callers ever race through the prompt.
+    for spec in specs:
+        key = (spec.event, spec.matcher, spec.command)
+        with _registered_lock:
+            if key in _registered:
+                continue
+            already_allowlisted = _is_allowlisted(spec.event, spec.command)
+
+        if not already_allowlisted:
+            if not _prompt_and_record(
+                spec.event, spec.command, accept_hooks=effective_accept,
+            ):
+                logger.warning(
+                    "shell hook for %s (%s) not allowlisted — skipped. "
+                    "Use --accept-hooks / HERMES_ACCEPT_HOOKS=1 / "
+                    "hooks_auto_accept: true, or approve at the TTY "
+                    "prompt next run.",
+                    spec.event, spec.command,
+                )
+                continue
+
+        with _registered_lock:
+            if key in _registered:
+                continue
+            manager._hooks.setdefault(spec.event, []).append(_make_callback(spec))
+            _registered.add(key)
+            registered.append(spec)
+            logger.info(
+                "shell hook registered: %s -> %s (matcher=%s, timeout=%ds)",
+                spec.event, spec.command, spec.matcher, spec.timeout,
+            )
+
+    return registered
+
+
+def iter_configured_hooks(cfg: Optional[Dict[str, Any]]) -> List[ShellHookSpec]:
+    """Return the parsed ``ShellHookSpec`` entries from config without
+    registering anything.  Used by ``hermes hooks list`` and ``doctor``."""
+    if not isinstance(cfg, dict):
+        return []
+    return _parse_hooks_block(cfg.get("hooks"))
+
+
+def reset_for_tests() -> None:
+    """Clear the idempotence set.  Test-only helper."""
+    with _registered_lock:
+        _registered.clear()
+
+
+# ---------------------------------------------------------------------------
+# Config parsing
+# ---------------------------------------------------------------------------
+
+def _parse_hooks_block(hooks_cfg: Any) -> List[ShellHookSpec]:
+    """Normalise the ``hooks:`` dict into a flat list of ``ShellHookSpec``.
+
+    Malformed entries warn-and-skip — we never raise from config parsing
+    because a broken hook must not crash the agent.
+    """
+    from hermes_cli.plugins import VALID_HOOKS
+
+    if not isinstance(hooks_cfg, dict):
+        return []
+
+    specs: List[ShellHookSpec] = []
+
+    for event_name, entries in hooks_cfg.items():
+        if event_name not in VALID_HOOKS:
+            suggestion = difflib.get_close_matches(
+                str(event_name), VALID_HOOKS, n=1, cutoff=0.6,
+            )
+            if suggestion:
+                logger.warning(
+                    "unknown hook event %r in hooks: config — did you mean %r?",
+                    event_name, suggestion[0],
+                )
+            else:
+                logger.warning(
+                    "unknown hook event %r in hooks: config (valid: %s)",
+                    event_name, ", ".join(sorted(VALID_HOOKS)),
+                )
+            continue
+
+        if entries is None:
+            continue
+
+        if not isinstance(entries, list):
+            logger.warning(
+                "hooks.%s must be a list of hook definitions; got %s",
+                event_name, type(entries).__name__,
+            )
+            continue
+
+        for i, raw in enumerate(entries):
+            spec = _parse_single_entry(event_name, i, raw)
+            if spec is not None:
+                specs.append(spec)
+
+    return specs
+
+
+def _parse_single_entry(
+    event: str, index: int, raw: Any,
+) -> Optional[ShellHookSpec]:
+    if not isinstance(raw, dict):
+        logger.warning(
+            "hooks.%s[%d] must be a mapping with a 'command' key; got %s",
+            event, index, type(raw).__name__,
+        )
+        return None
+
+    command = raw.get("command")
+    if not isinstance(command, str) or not command.strip():
+        logger.warning(
+            "hooks.%s[%d] is missing a non-empty 'command' field",
+            event, index,
+        )
+        return None
+
+    matcher = raw.get("matcher")
+    if matcher is not None and not isinstance(matcher, str):
+        logger.warning(
+            "hooks.%s[%d].matcher must be a string regex; ignoring",
+            event, index,
+        )
+        matcher = None
+
+    if matcher is not None and event not in ("pre_tool_call", "post_tool_call"):
+        logger.warning(
+            "hooks.%s[%d].matcher=%r will be ignored at runtime — the "
+            "matcher field is only honored for pre_tool_call / "
+            "post_tool_call.  The hook will fire on every %s event.",
+            event, index, matcher, event,
+        )
+        matcher = None
+
+    timeout_raw = raw.get("timeout", DEFAULT_TIMEOUT_SECONDS)
+    try:
+        timeout = int(timeout_raw)
+    except (TypeError, ValueError):
+        logger.warning(
+            "hooks.%s[%d].timeout must be an int (got %r); using default %ds",
+            event, index, timeout_raw, DEFAULT_TIMEOUT_SECONDS,
+        )
+        timeout = DEFAULT_TIMEOUT_SECONDS
+
+    if timeout < 1:
+        logger.warning(
+            "hooks.%s[%d].timeout must be >=1; using default %ds",
+            event, index, DEFAULT_TIMEOUT_SECONDS,
+        )
+        timeout = DEFAULT_TIMEOUT_SECONDS
+
+    if timeout > MAX_TIMEOUT_SECONDS:
+        logger.warning(
+            "hooks.%s[%d].timeout=%ds exceeds max %ds; clamping",
+            event, index, timeout, MAX_TIMEOUT_SECONDS,
+        )
+        timeout = MAX_TIMEOUT_SECONDS
+
+    return ShellHookSpec(
+        event=event,
+        command=command.strip(),
+        matcher=matcher,
+        timeout=timeout,
+    )
+
+
+# ---------------------------------------------------------------------------
+# Subprocess callback
+# ---------------------------------------------------------------------------
+
+_TOP_LEVEL_PAYLOAD_KEYS = {"tool_name", "args", "session_id", "parent_session_id"}
+
+
+def _spawn(spec: ShellHookSpec, stdin_json: str) -> Dict[str, Any]:
+    """Run ``spec.command`` as a subprocess with ``stdin_json`` on stdin.
+
+    Returns a diagnostic dict with the same keys for every outcome
+    (``returncode``, ``stdout``, ``stderr``, ``timed_out``,
+    ``elapsed_seconds``, ``error``).  This is the single place the
+    subprocess is actually invoked — both the live callback path
+    (:func:`_make_callback`) and the CLI test helper (:func:`run_once`)
+    go through it.
+    """
+    result: Dict[str, Any] = {
+        "returncode": None,
+        "stdout": "",
+        "stderr": "",
+        "timed_out": False,
+        "elapsed_seconds": 0.0,
+        "error": None,
+    }
+    try:
+        argv = shlex.split(os.path.expanduser(spec.command))
+    except ValueError as exc:
+        result["error"] = f"command {spec.command!r} cannot be parsed: {exc}"
+        return result
+    if not argv:
+        result["error"] = "empty command"
+        return result
+
+    t0 = time.monotonic()
+    try:
+        proc = subprocess.run(
+            argv,
+            input=stdin_json,
+            capture_output=True,
+            timeout=spec.timeout,
+            text=True,
+            shell=False,
+        )
+    except subprocess.TimeoutExpired:
+        result["timed_out"] = True
+        result["elapsed_seconds"] = round(time.monotonic() - t0, 3)
+        return result
+    except FileNotFoundError:
+        result["error"] = "command not found"
+        return result
+    except PermissionError:
+        result["error"] = "command not executable"
+        return result
+    except Exception as exc:  # pragma: no cover — defensive
+        result["error"] = str(exc)
+        return result
+
+    result["returncode"] = proc.returncode
+    result["stdout"] = proc.stdout or ""
+    result["stderr"] = proc.stderr or ""
+    result["elapsed_seconds"] = round(time.monotonic() - t0, 3)
+    return result
+
+
+def _make_callback(spec: ShellHookSpec) -> Callable[..., Optional[Dict[str, Any]]]:
+    """Build the closure that ``invoke_hook()`` will call per firing."""
+
+    def _callback(**kwargs: Any) -> Optional[Dict[str, Any]]:
+        # Matcher gate — only meaningful for tool-scoped events.
+        if spec.event in ("pre_tool_call", "post_tool_call"):
+            if not spec.matches_tool(kwargs.get("tool_name")):
+                return None
+
+        r = _spawn(spec, _serialize_payload(spec.event, kwargs))
+
+        if r["error"]:
+            logger.warning(
+                "shell hook failed (event=%s command=%s): %s",
+                spec.event, spec.command, r["error"],
+            )
+            return None
+        if r["timed_out"]:
+            logger.warning(
+                "shell hook timed out after %.2fs (event=%s command=%s)",
+                r["elapsed_seconds"], spec.event, spec.command,
+            )
+            return None
+
+        stderr = r["stderr"].strip()
+        if stderr:
+            logger.debug(
+                "shell hook stderr (event=%s command=%s): %s",
+                spec.event, spec.command, stderr[:400],
+            )
+        # Non-zero exits: log but still parse stdout so scripts that
+        # signal failure via exit code can also return a block directive.
+        if r["returncode"] != 0:
+            logger.warning(
+                "shell hook exited %d (event=%s command=%s); stderr=%s",
+                r["returncode"], spec.event, spec.command, stderr[:400],
+            )
+        return _parse_response(spec.event, r["stdout"])
+
+    _callback.__name__ = f"shell_hook[{spec.event}:{spec.command}]"
+    _callback.__qualname__ = _callback.__name__
+    return _callback
+
+
+def _serialize_payload(event: str, kwargs: Dict[str, Any]) -> str:
+    """Render the stdin JSON payload.  Unserialisable values are
+    stringified via ``default=str`` rather than dropped."""
+    extras = {k: v for k, v in kwargs.items() if k not in _TOP_LEVEL_PAYLOAD_KEYS}
+    try:
+        cwd = str(Path.cwd())
+    except OSError:
+        cwd = ""
+    payload = {
+        "hook_event_name": event,
+        "tool_name": kwargs.get("tool_name"),
+        "tool_input": kwargs.get("args") if isinstance(kwargs.get("args"), dict) else None,
+        "session_id": kwargs.get("session_id") or kwargs.get("parent_session_id") or "",
+        "cwd": cwd,
+        "extra": extras,
+    }
+    return json.dumps(payload, ensure_ascii=False, default=str)
+
+
+def _parse_response(event: str, stdout: str) -> Optional[Dict[str, Any]]:
+    """Translate stdout JSON into a Hermes wire-shape dict.
+
+    For ``pre_tool_call`` the Claude-Code-style ``{"decision": "block",
+    "reason": "..."}`` payload is translated into the canonical Hermes
+    ``{"action": "block", "message": "..."}`` shape expected by
+    :func:`hermes_cli.plugins.get_pre_tool_call_block_message`.  This is
+    the single most important correctness invariant in this module —
+    skipping the translation silently breaks every ``pre_tool_call``
+    block directive.
+
+    For ``pre_llm_call``, ``{"context": "..."}`` is passed through
+    unchanged to match the existing plugin-hook contract.
+
+    Anything else returns ``None``.
+    """
+    stdout = (stdout or "").strip()
+    if not stdout:
+        return None
+
+    try:
+        data = json.loads(stdout)
+    except json.JSONDecodeError:
+        logger.warning(
+            "shell hook stdout was not valid JSON (event=%s): %s",
+            event, stdout[:200],
+        )
+        return None
+
+    if not isinstance(data, dict):
+        return None
+
+    if event == "pre_tool_call":
+        if data.get("action") == "block":
+            message = data.get("message") or data.get("reason") or ""
+            if isinstance(message, str) and message:
+                return {"action": "block", "message": message}
+        if data.get("decision") == "block":
+            message = data.get("reason") or data.get("message") or ""
+            if isinstance(message, str) and message:
+                return {"action": "block", "message": message}
+        return None
+
+    context = data.get("context")
+    if isinstance(context, str) and context.strip():
+        return {"context": context}
+
+    return None
+
+
+# ---------------------------------------------------------------------------
+# Allowlist / consent
+# ---------------------------------------------------------------------------
+
+def allowlist_path() -> Path:
+    """Path to the per-user shell-hook allowlist file."""
+    return get_hermes_home() / ALLOWLIST_FILENAME
+
+
+def load_allowlist() -> Dict[str, Any]:
+    """Return the parsed allowlist, or an empty skeleton if absent."""
+    try:
+        raw = json.loads(allowlist_path().read_text())
+    except (FileNotFoundError, json.JSONDecodeError, OSError):
+        return {"approvals": []}
+    if not isinstance(raw, dict):
+        return {"approvals": []}
+    approvals = raw.get("approvals")
+    if not isinstance(approvals, list):
+        raw["approvals"] = []
+    return raw
+
+
+def save_allowlist(data: Dict[str, Any]) -> None:
+    """Atomically persist the allowlist via per-process ``mkstemp`` +
+    ``os.replace``.  Cross-process read-modify-write races are handled
+    by :func:`_locked_update_approvals` (``fcntl.flock``).  On OSError
+    the failure is logged; the in-process hook still registers but
+    the approval won't survive across runs."""
+    p = allowlist_path()
+    try:
+        p.parent.mkdir(parents=True, exist_ok=True)
+        fd, tmp_path = tempfile.mkstemp(
+            prefix=f"{p.name}.", suffix=".tmp", dir=str(p.parent),
+        )
+        try:
+            with os.fdopen(fd, "w") as fh:
+                fh.write(json.dumps(data, indent=2, sort_keys=True))
+            os.replace(tmp_path, p)
+        except Exception:
+            try:
+                os.unlink(tmp_path)
+            except OSError:
+                pass
+            raise
+    except OSError as exc:
+        logger.warning(
+            "Failed to persist shell hook allowlist to %s: %s. "
+            "The approval is in-memory for this run, but the next "
+            "startup will re-prompt (or skip registration on non-TTY "
+            "runs without --accept-hooks / HERMES_ACCEPT_HOOKS).",
+            p, exc,
+        )
+
+
+def _is_allowlisted(event: str, command: str) -> bool:
+    data = load_allowlist()
+    return any(
+        isinstance(e, dict)
+        and e.get("event") == event
+        and e.get("command") == command
+        for e in data.get("approvals", [])
+    )
+
+
+@contextmanager
+def _locked_update_approvals() -> Iterator[Dict[str, Any]]:
+    """Serialise read-modify-write on the allowlist across processes.
+
+    Holds an exclusive ``flock`` on a sibling lock file for the duration
+    of the update so concurrent ``_record_approval``/``revoke`` callers
+    cannot clobber each other's changes (the race Codex reproduced with
+    20–50 simultaneous writers).  Falls back to an in-process lock on
+    platforms without ``fcntl``.
+    """
+    p = allowlist_path()
+    p.parent.mkdir(parents=True, exist_ok=True)
+    lock_path = p.with_suffix(p.suffix + ".lock")
+
+    if fcntl is None:  # pragma: no cover — non-POSIX fallback
+        with _allowlist_write_lock:
+            data = load_allowlist()
+            yield data
+            save_allowlist(data)
+        return
+
+    with open(lock_path, "a+") as lock_fh:
+        fcntl.flock(lock_fh.fileno(), fcntl.LOCK_EX)
+        try:
+            data = load_allowlist()
+            yield data
+            save_allowlist(data)
+        finally:
+            fcntl.flock(lock_fh.fileno(), fcntl.LOCK_UN)
+
+
+def _prompt_and_record(
+    event: str, command: str, *, accept_hooks: bool,
+) -> bool:
+    """Decide whether to approve an unseen ``(event, command)`` pair.
+    Returns ``True`` iff the approval was granted and recorded.
+    """
+    if accept_hooks:
+        _record_approval(event, command)
+        logger.info(
+            "shell hook auto-approved via --accept-hooks / env / config: "
+            "%s -> %s", event, command,
+        )
+        return True
+
+    if not sys.stdin.isatty():
+        return False
+
+    print(
+        f"\n⚠ Hermes is about to register a shell hook that will run a\n"
+        f"  command on your behalf.\n\n"
+        f"    Event:   {event}\n"
+        f"    Command: {command}\n\n"
+        f"  Commands run with your full user credentials.  Only approve\n"
+        f"  commands you trust."
+    )
+    try:
+        answer = input("Allow this hook to run? [y/N]: ").strip().lower()
+    except (EOFError, KeyboardInterrupt):
+        print()  # keep the terminal tidy after ^C
+        return False
+
+    if answer in ("y", "yes"):
+        _record_approval(event, command)
+        return True
+
+    return False
+
+
+def _record_approval(event: str, command: str) -> None:
+    entry = {
+        "event": event,
+        "command": command,
+        "approved_at": _utc_now_iso(),
+        "script_mtime_at_approval": script_mtime_iso(command),
+    }
+    with _locked_update_approvals() as data:
+        data["approvals"] = [
+            e for e in data.get("approvals", [])
+            if not (
+                isinstance(e, dict)
+                and e.get("event") == event
+                and e.get("command") == command
+            )
+        ] + [entry]
+
+
+def _utc_now_iso() -> str:
+    return datetime.now(tz=timezone.utc).isoformat().replace("+00:00", "Z")
+
+
+def revoke(command: str) -> int:
+    """Remove every allowlist entry matching ``command``.
+
+    Returns the number of entries removed.  Does not unregister any
+    callbacks that are already live on the plugin manager in the current
+    process — restart the CLI / gateway to drop them.
+    """
+    with _locked_update_approvals() as data:
+        before = len(data.get("approvals", []))
+        data["approvals"] = [
+            e for e in data.get("approvals", [])
+            if not (isinstance(e, dict) and e.get("command") == command)
+        ]
+        after = len(data["approvals"])
+    return before - after
+
+
+_SCRIPT_EXTENSIONS: Tuple[str, ...] = (
+    ".sh", ".bash", ".zsh", ".fish",
+    ".py", ".pyw",
+    ".rb", ".pl", ".lua",
+    ".js", ".mjs", ".cjs", ".ts",
+)
+
+
+def _command_script_path(command: str) -> str:
+    """Return the script path from ``command`` for doctor / drift checks.
+
+    Prefers a token ending in a known script extension, then a token
+    containing ``/`` or leading ``~``, then the first token.  Handles
+    ``python3 /path/hook.py``, ``/usr/bin/env bash hook.sh``, and the
+    common bare-path form.
+    """
+    try:
+        parts = shlex.split(command)
+    except ValueError:
+        return command
+    if not parts:
+        return command
+    for part in parts:
+        if part.lower().endswith(_SCRIPT_EXTENSIONS):
+            return part
+    for part in parts:
+        if "/" in part or part.startswith("~"):
+            return part
+    return parts[0]
+
+
+# ---------------------------------------------------------------------------
+# Helpers for accept-hooks resolution
+# ---------------------------------------------------------------------------
+
+def _resolve_effective_accept(
+    cfg: Dict[str, Any], accept_hooks_arg: bool,
+) -> bool:
+    """Combine all three opt-in channels into a single boolean.
+
+    Precedence (any truthy source flips us on):
+      1. ``--accept-hooks`` flag (CLI) / explicit argument
+      2. ``HERMES_ACCEPT_HOOKS`` env var
+      3. ``hooks_auto_accept: true`` in ``cli-config.yaml``
+    """
+    if accept_hooks_arg:
+        return True
+    env = os.environ.get("HERMES_ACCEPT_HOOKS", "").strip().lower()
+    if env in ("1", "true", "yes", "on"):
+        return True
+    cfg_val = cfg.get("hooks_auto_accept", False)
+    return bool(cfg_val)
+
+
+# ---------------------------------------------------------------------------
+# Introspection (used by `hermes hooks` CLI)
+# ---------------------------------------------------------------------------
+
+def allowlist_entry_for(event: str, command: str) -> Optional[Dict[str, Any]]:
+    """Return the allowlist record for this pair, if any."""
+    for e in load_allowlist().get("approvals", []):
+        if (
+            isinstance(e, dict)
+            and e.get("event") == event
+            and e.get("command") == command
+        ):
+            return e
+    return None
+
+
+def script_mtime_iso(command: str) -> Optional[str]:
+    """ISO-8601 mtime of the resolved script path, or ``None`` if the
+    script is missing."""
+    path = _command_script_path(command)
+    if not path:
+        return None
+    try:
+        expanded = os.path.expanduser(path)
+        return datetime.fromtimestamp(
+            os.path.getmtime(expanded), tz=timezone.utc,
+        ).isoformat().replace("+00:00", "Z")
+    except OSError:
+        return None
+
+
+def script_is_executable(command: str) -> bool:
+    """Return ``True`` iff ``command`` is runnable as configured.
+
+    For a bare invocation (``/path/hook.sh``) the script itself must be
+    executable.  For interpreter-prefixed commands (``python3
+    /path/hook.py``, ``/usr/bin/env bash hook.sh``) the script just has
+    to be readable — the interpreter doesn't care about the ``X_OK``
+    bit.  Mirrors what ``_spawn`` would actually do at runtime."""
+    path = _command_script_path(command)
+    if not path:
+        return False
+    expanded = os.path.expanduser(path)
+    if not os.path.isfile(expanded):
+        return False
+    try:
+        argv = shlex.split(command)
+    except ValueError:
+        return False
+    is_bare_invocation = bool(argv) and argv[0] == path
+    required = os.X_OK if is_bare_invocation else os.R_OK
+    return os.access(expanded, required)
+
+
+def run_once(
+    spec: ShellHookSpec, kwargs: Dict[str, Any],
+) -> Dict[str, Any]:
+    """Fire a single shell-hook invocation with a synthetic payload.
+    Used by ``hermes hooks test`` and ``hermes hooks doctor``.
+
+    ``kwargs`` is the same dict that :func:`hermes_cli.plugins.invoke_hook`
+    would pass at runtime.  It is routed through :func:`_serialize_payload`
+    so the synthetic stdin exactly matches what a real hook firing would
+    produce — otherwise scripts tested via ``hermes hooks test`` could
+    diverge silently from production behaviour.
+
+    Returns the :func:`_spawn` diagnostic dict plus a ``parsed`` field
+    holding the canonical Hermes-wire-shape response."""
+    stdin_json = _serialize_payload(spec.event, kwargs)
+    result = _spawn(spec, stdin_json)
+    result["parsed"] = _parse_response(spec.event, result["stdout"])
+    return result
@@ -8,6 +8,7 @@ can invoke skills via /skill-name commands and prompt-only built-ins like
 import json
 import logging
 import re
+import subprocess
 from datetime import datetime
 from pathlib import Path
 from typing import Any, Dict, Optional
@@ -22,6 +23,110 @@ _PLAN_SLUG_RE = re.compile(r"[^a-z0-9]+")
 _SKILL_INVALID_CHARS = re.compile(r"[^a-z0-9-]")
 _SKILL_MULTI_HYPHEN = re.compile(r"-{2,}")

+# Matches ${HERMES_SKILL_DIR} / ${HERMES_SESSION_ID} tokens in SKILL.md.
+# Tokens that don't resolve (e.g. ${HERMES_SESSION_ID} with no session) are
+# left as-is so the user can debug them.
+_SKILL_TEMPLATE_RE = re.compile(r"\$\{(HERMES_SKILL_DIR|HERMES_SESSION_ID)\}")
+
+# Matches inline shell snippets like:  !`date +%Y-%m-%d`
+# Non-greedy, single-line only — no newlines inside the backticks.
+_INLINE_SHELL_RE = re.compile(r"!`([^`\n]+)`")
+
+# Cap inline-shell output so a runaway command can't blow out the context.
+_INLINE_SHELL_MAX_OUTPUT = 4000
+
+
+def _load_skills_config() -> dict:
+    """Load the ``skills`` section of config.yaml (best-effort)."""
+    try:
+        from hermes_cli.config import load_config
+
+        cfg = load_config() or {}
+        skills_cfg = cfg.get("skills")
+        if isinstance(skills_cfg, dict):
+            return skills_cfg
+    except Exception:
+        logger.debug("Could not read skills config", exc_info=True)
+    return {}
+
+
+def _substitute_template_vars(
+    content: str,
+    skill_dir: Path | None,
+    session_id: str | None,
+) -> str:
+    """Replace ${HERMES_SKILL_DIR} / ${HERMES_SESSION_ID} in skill content.
+
+    Only substitutes tokens for which a concrete value is available —
+    unresolved tokens are left in place so the author can spot them.
+    """
+    if not content:
+        return content
+
+    skill_dir_str = str(skill_dir) if skill_dir else None
+
+    def _replace(match: re.Match) -> str:
+        token = match.group(1)
+        if token == "HERMES_SKILL_DIR" and skill_dir_str:
+            return skill_dir_str
+        if token == "HERMES_SESSION_ID" and session_id:
+            return str(session_id)
+        return match.group(0)
+
+    return _SKILL_TEMPLATE_RE.sub(_replace, content)
+
+
+def _run_inline_shell(command: str, cwd: Path | None, timeout: int) -> str:
+    """Execute a single inline-shell snippet and return its stdout (trimmed).
+
+    Failures return a short ``[inline-shell error: ...]`` marker instead of
+    raising, so one bad snippet can't wreck the whole skill message.
+    """
+    try:
+        completed = subprocess.run(
+            ["bash", "-c", command],
+            cwd=str(cwd) if cwd else None,
+            capture_output=True,
+            text=True,
+            timeout=max(1, int(timeout)),
+            check=False,
+        )
+    except subprocess.TimeoutExpired:
+        return f"[inline-shell timeout after {timeout}s: {command}]"
+    except FileNotFoundError:
+        return f"[inline-shell error: bash not found]"
+    except Exception as exc:
+        return f"[inline-shell error: {exc}]"
+
+    output = (completed.stdout or "").rstrip("\n")
+    if not output and completed.stderr:
+        output = completed.stderr.rstrip("\n")
+    if len(output) > _INLINE_SHELL_MAX_OUTPUT:
+        output = output[:_INLINE_SHELL_MAX_OUTPUT] + "…[truncated]"
+    return output
+
+
+def _expand_inline_shell(
+    content: str,
+    skill_dir: Path | None,
+    timeout: int,
+) -> str:
+    """Replace every !`cmd` snippet in ``content`` with its stdout.
+
+    Runs each snippet with the skill directory as CWD so relative paths in
+    the snippet work the way the author expects.
+    """
+    if "!`" not in content:
+        return content
+
+    def _replace(match: re.Match) -> str:
+        cmd = match.group(1).strip()
+        if not cmd:
+            return ""
+        return _run_inline_shell(cmd, skill_dir, timeout)
+
+    return _INLINE_SHELL_RE.sub(_replace, content)
+

 def build_plan_path(
    user_instruction: str = "",
@@ -133,14 +238,36 @@ def _build_skill_message(
    activation_note: str,
    user_instruction: str = "",
    runtime_note: str = "",
+    session_id: str | None = None,
 ) -> str:
    """Format a loaded skill into a user/system message payload."""
    from tools.skills_tool import SKILLS_DIR

    content = str(loaded_skill.get("content") or "")

+    # ── Template substitution and inline-shell expansion ──
+    # Done before anything else so downstream blocks (setup notes,
+    # supporting-file hints) see the expanded content.
+    skills_cfg = _load_skills_config()
+    if skills_cfg.get("template_vars", True):
+        content = _substitute_template_vars(content, skill_dir, session_id)
+    if skills_cfg.get("inline_shell", False):
+        timeout = int(skills_cfg.get("inline_shell_timeout", 10) or 10)
+        content = _expand_inline_shell(content, skill_dir, timeout)
+
    parts = [activation_note, "", content.strip()]

+    # ── Inject the absolute skill directory so the agent can reference
+    #    bundled scripts without an extra skill_view() round-trip. ──
+    if skill_dir:
+        parts.append("")
+        parts.append(f"[Skill directory: {skill_dir}]")
+        parts.append(
+            "Resolve any relative paths in this skill (e.g. `scripts/foo.js`, "
+            "`templates/config.yaml`) against that directory, then run them "
+            "with the terminal tool using the absolute path."
+        )
+
    # ── Inject resolved skill config values ──
    _inject_skill_config(loaded_skill, parts)

@@ -188,11 +315,13 @@ def _build_skill_message(
            # Skill is from an external dir — use the skill name instead
            skill_view_target = skill_dir.name
        parts.append("")
-        parts.append("[This skill has supporting files you can load with the skill_view tool:]")
+        parts.append("[This skill has supporting files:]")
        for sf in supporting:
-            parts.append(f"- {sf}")
+            parts.append(f"- {sf}  ->  {skill_dir / sf}")
        parts.append(
-            f'\nTo view any of these, use: skill_view(name="{skill_view_target}", file_path="<path>")'
+            f'\nLoad any of these with skill_view(name="{skill_view_target}", '
+            f'file_path="<path>"), or run scripts directly by absolute path '
+            f"(e.g. `node {skill_dir}/scripts/foo.js`)."
        )

    if user_instruction:
@@ -332,6 +461,7 @@ def build_skill_invocation_message(
        activation_note,
        user_instruction=user_instruction,
        runtime_note=runtime_note,
+        session_id=task_id,
    )


@@ -370,6 +500,7 @@ def build_preloaded_skills_prompt(
                loaded_skill,
                skill_dir,
                activation_note,
+                session_id=task_id,
            )
        )
        loaded_names.append(skill_name)
@@ -0,0 +1,51 @@
+"""Transport layer types and registry for provider response normalization.
+
+Usage:
+    from agent.transports import get_transport
+    transport = get_transport("anthropic_messages")
+    result = transport.normalize_response(raw_response)
+"""
+
+from agent.transports.types import NormalizedResponse, ToolCall, Usage, build_tool_call, map_finish_reason  # noqa: F401
+
+_REGISTRY: dict = {}
+
+
+def register_transport(api_mode: str, transport_cls: type) -> None:
+    """Register a transport class for an api_mode string."""
+    _REGISTRY[api_mode] = transport_cls
+
+
+def get_transport(api_mode: str):
+    """Get a transport instance for the given api_mode.
+
+    Returns None if no transport is registered for this api_mode.
+    This allows gradual migration — call sites can check for None
+    and fall back to the legacy code path.
+    """
+    if not _REGISTRY:
+        _discover_transports()
+    cls = _REGISTRY.get(api_mode)
+    if cls is None:
+        return None
+    return cls()
+
+
+def _discover_transports() -> None:
+    """Import all transport modules to trigger auto-registration."""
+    try:
+        import agent.transports.anthropic  # noqa: F401
+    except ImportError:
+        pass
+    try:
+        import agent.transports.codex  # noqa: F401
+    except ImportError:
+        pass
+    try:
+        import agent.transports.chat_completions  # noqa: F401
+    except ImportError:
+        pass
+    try:
+        import agent.transports.bedrock  # noqa: F401
+    except ImportError:
+        pass
@@ -0,0 +1,129 @@
+"""Anthropic Messages API transport.
+
+Delegates to the existing adapter functions in agent/anthropic_adapter.py.
+This transport owns format conversion and normalization — NOT client lifecycle.
+"""
+
+from typing import Any, Dict, List, Optional
+
+from agent.transports.base import ProviderTransport
+from agent.transports.types import NormalizedResponse
+
+
+class AnthropicTransport(ProviderTransport):
+    """Transport for api_mode='anthropic_messages'.
+
+    Wraps the existing functions in anthropic_adapter.py behind the
+    ProviderTransport ABC.  Each method delegates — no logic is duplicated.
+    """
+
+    @property
+    def api_mode(self) -> str:
+        return "anthropic_messages"
+
+    def convert_messages(self, messages: List[Dict[str, Any]], **kwargs) -> Any:
+        """Convert OpenAI messages to Anthropic (system, messages) tuple.
+
+        kwargs:
+            base_url: Optional[str] — affects thinking signature handling.
+        """
+        from agent.anthropic_adapter import convert_messages_to_anthropic
+
+        base_url = kwargs.get("base_url")
+        return convert_messages_to_anthropic(messages, base_url=base_url)
+
+    def convert_tools(self, tools: List[Dict[str, Any]]) -> Any:
+        """Convert OpenAI tool schemas to Anthropic input_schema format."""
+        from agent.anthropic_adapter import convert_tools_to_anthropic
+
+        return convert_tools_to_anthropic(tools)
+
+    def build_kwargs(
+        self,
+        model: str,
+        messages: List[Dict[str, Any]],
+        tools: Optional[List[Dict[str, Any]]] = None,
+        **params,
+    ) -> Dict[str, Any]:
+        """Build Anthropic messages.create() kwargs.
+
+        Calls convert_messages and convert_tools internally.
+
+        params (all optional):
+            max_tokens: int
+            reasoning_config: dict | None
+            tool_choice: str | None
+            is_oauth: bool
+            preserve_dots: bool
+            context_length: int | None
+            base_url: str | None
+            fast_mode: bool
+        """
+        from agent.anthropic_adapter import build_anthropic_kwargs
+
+        return build_anthropic_kwargs(
+            model=model,
+            messages=messages,
+            tools=tools,
+            max_tokens=params.get("max_tokens", 16384),
+            reasoning_config=params.get("reasoning_config"),
+            tool_choice=params.get("tool_choice"),
+            is_oauth=params.get("is_oauth", False),
+            preserve_dots=params.get("preserve_dots", False),
+            context_length=params.get("context_length"),
+            base_url=params.get("base_url"),
+            fast_mode=params.get("fast_mode", False),
+        )
+
+    def normalize_response(self, response: Any, **kwargs) -> NormalizedResponse:
+        """Normalize Anthropic response to NormalizedResponse.
+
+        kwargs:
+            strip_tool_prefix: bool — strip 'mcp_mcp_' prefixes from tool names.
+        """
+        from agent.anthropic_adapter import normalize_anthropic_response_v2
+
+        strip_tool_prefix = kwargs.get("strip_tool_prefix", False)
+        return normalize_anthropic_response_v2(response, strip_tool_prefix=strip_tool_prefix)
+
+    def validate_response(self, response: Any) -> bool:
+        """Check Anthropic response structure is valid."""
+        if response is None:
+            return False
+        content_blocks = getattr(response, "content", None)
+        if not isinstance(content_blocks, list):
+            return False
+        if not content_blocks:
+            return False
+        return True
+
+    def extract_cache_stats(self, response: Any) -> Optional[Dict[str, int]]:
+        """Extract Anthropic cache_read and cache_creation token counts."""
+        usage = getattr(response, "usage", None)
+        if usage is None:
+            return None
+        cached = getattr(usage, "cache_read_input_tokens", 0) or 0
+        written = getattr(usage, "cache_creation_input_tokens", 0) or 0
+        if cached or written:
+            return {"cached_tokens": cached, "creation_tokens": written}
+        return None
+
+    # Promote the adapter's canonical mapping to module level so it's shared
+    _STOP_REASON_MAP = {
+        "end_turn": "stop",
+        "tool_use": "tool_calls",
+        "max_tokens": "length",
+        "stop_sequence": "stop",
+        "refusal": "content_filter",
+        "model_context_window_exceeded": "length",
+    }
+
+    def map_finish_reason(self, raw_reason: str) -> str:
+        """Map Anthropic stop_reason to OpenAI finish_reason."""
+        return self._STOP_REASON_MAP.get(raw_reason, "stop")
+
+
+# Auto-register on import
+from agent.transports import register_transport  # noqa: E402
+
+register_transport("anthropic_messages", AnthropicTransport)
@@ -0,0 +1,89 @@
+"""Abstract base for provider transports.
+
+A transport owns the data path for one api_mode:
+  convert_messages → convert_tools → build_kwargs → normalize_response
+
+It does NOT own: client construction, streaming, credential refresh,
+prompt caching, interrupt handling, or retry logic.  Those stay on AIAgent.
+"""
+
+from abc import ABC, abstractmethod
+from typing import Any, Dict, List, Optional
+
+from agent.transports.types import NormalizedResponse
+
+
+class ProviderTransport(ABC):
+    """Base class for provider-specific format conversion and normalization."""
+
+    @property
+    @abstractmethod
+    def api_mode(self) -> str:
+        """The api_mode string this transport handles (e.g. 'anthropic_messages')."""
+        ...
+
+    @abstractmethod
+    def convert_messages(self, messages: List[Dict[str, Any]], **kwargs) -> Any:
+        """Convert OpenAI-format messages to provider-native format.
+
+        Returns provider-specific structure (e.g. (system, messages) for Anthropic,
+        or the messages list unchanged for chat_completions).
+        """
+        ...
+
+    @abstractmethod
+    def convert_tools(self, tools: List[Dict[str, Any]]) -> Any:
+        """Convert OpenAI-format tool definitions to provider-native format.
+
+        Returns provider-specific tool list (e.g. Anthropic input_schema format).
+        """
+        ...
+
+    @abstractmethod
+    def build_kwargs(
+        self,
+        model: str,
+        messages: List[Dict[str, Any]],
+        tools: Optional[List[Dict[str, Any]]] = None,
+        **params,
+    ) -> Dict[str, Any]:
+        """Build the complete API call kwargs dict.
+
+        This is the primary entry point — it typically calls convert_messages()
+        and convert_tools() internally, then adds model-specific config.
+
+        Returns a dict ready to be passed to the provider's SDK client.
+        """
+        ...
+
+    @abstractmethod
+    def normalize_response(self, response: Any, **kwargs) -> NormalizedResponse:
+        """Normalize a raw provider response to the shared NormalizedResponse type.
+
+        This is the only method that returns a transport-layer type.
+        """
+        ...
+
+    def validate_response(self, response: Any) -> bool:
+        """Optional: check if the raw response is structurally valid.
+
+        Returns True if valid, False if the response should be treated as invalid.
+        Default implementation always returns True.
+        """
+        return True
+
+    def extract_cache_stats(self, response: Any) -> Optional[Dict[str, int]]:
+        """Optional: extract provider-specific cache hit/creation stats.
+
+        Returns dict with 'cached_tokens' and 'creation_tokens', or None.
+        Default returns None.
+        """
+        return None
+
+    def map_finish_reason(self, raw_reason: str) -> str:
+        """Optional: map provider-specific stop reason to OpenAI equivalent.
+
+        Default returns the raw reason unchanged.  Override for providers
+        with different stop reason vocabularies.
+        """
+        return raw_reason
@@ -0,0 +1,154 @@
+"""AWS Bedrock Converse API transport.
+
+Delegates to the existing adapter functions in agent/bedrock_adapter.py.
+Bedrock uses its own boto3 client (not the OpenAI SDK), so the transport
+owns format conversion and normalization, while client construction and
+boto3 calls stay on AIAgent.
+"""
+
+from typing import Any, Dict, List, Optional
+
+from agent.transports.base import ProviderTransport
+from agent.transports.types import NormalizedResponse, ToolCall, Usage
+
+
+class BedrockTransport(ProviderTransport):
+    """Transport for api_mode='bedrock_converse'."""
+
+    @property
+    def api_mode(self) -> str:
+        return "bedrock_converse"
+
+    def convert_messages(self, messages: List[Dict[str, Any]], **kwargs) -> Any:
+        """Convert OpenAI messages to Bedrock Converse format."""
+        from agent.bedrock_adapter import convert_messages_to_converse
+        return convert_messages_to_converse(messages)
+
+    def convert_tools(self, tools: List[Dict[str, Any]]) -> Any:
+        """Convert OpenAI tool schemas to Bedrock Converse toolConfig."""
+        from agent.bedrock_adapter import convert_tools_to_converse
+        return convert_tools_to_converse(tools)
+
+    def build_kwargs(
+        self,
+        model: str,
+        messages: List[Dict[str, Any]],
+        tools: Optional[List[Dict[str, Any]]] = None,
+        **params,
+    ) -> Dict[str, Any]:
+        """Build Bedrock converse() kwargs.
+
+        Calls convert_messages and convert_tools internally.
+
+        params:
+            max_tokens: int — output token limit (default 4096)
+            temperature: float | None
+            guardrail_config: dict | None — Bedrock guardrails
+            region: str — AWS region (default 'us-east-1')
+        """
+        from agent.bedrock_adapter import build_converse_kwargs
+
+        region = params.get("region", "us-east-1")
+        guardrail = params.get("guardrail_config")
+
+        kwargs = build_converse_kwargs(
+            model=model,
+            messages=messages,
+            tools=tools,
+            max_tokens=params.get("max_tokens", 4096),
+            temperature=params.get("temperature"),
+            guardrail_config=guardrail,
+        )
+        # Sentinel keys for dispatch — agent pops these before the boto3 call
+        kwargs["__bedrock_converse__"] = True
+        kwargs["__bedrock_region__"] = region
+        return kwargs
+
+    def normalize_response(self, response: Any, **kwargs) -> NormalizedResponse:
+        """Normalize Bedrock response to NormalizedResponse.
+
+        Handles two shapes:
+        1. Raw boto3 dict (from direct converse() calls)
+        2. Already-normalized SimpleNamespace with .choices (from dispatch site)
+        """
+        from agent.bedrock_adapter import normalize_converse_response
+
+        # Normalize to OpenAI-compatible SimpleNamespace
+        if hasattr(response, "choices") and response.choices:
+            # Already normalized at dispatch site
+            ns = response
+        else:
+            # Raw boto3 dict
+            ns = normalize_converse_response(response)
+
+        choice = ns.choices[0]
+        msg = choice.message
+        finish_reason = choice.finish_reason or "stop"
+
+        tool_calls = None
+        if msg.tool_calls:
+            tool_calls = [
+                ToolCall(
+                    id=tc.id,
+                    name=tc.function.name,
+                    arguments=tc.function.arguments,
+                )
+                for tc in msg.tool_calls
+            ]
+
+        usage = None
+        if hasattr(ns, "usage") and ns.usage:
+            u = ns.usage
+            usage = Usage(
+                prompt_tokens=getattr(u, "prompt_tokens", 0) or 0,
+                completion_tokens=getattr(u, "completion_tokens", 0) or 0,
+                total_tokens=getattr(u, "total_tokens", 0) or 0,
+            )
+
+        reasoning = getattr(msg, "reasoning", None) or getattr(msg, "reasoning_content", None)
+
+        return NormalizedResponse(
+            content=msg.content,
+            tool_calls=tool_calls,
+            finish_reason=finish_reason,
+            reasoning=reasoning,
+            usage=usage,
+        )
+
+    def validate_response(self, response: Any) -> bool:
+        """Check Bedrock response structure.
+
+        After normalize_converse_response, the response has OpenAI-compatible
+        .choices — same check as chat_completions.
+        """
+        if response is None:
+            return False
+        # Raw Bedrock dict response — check for 'output' key
+        if isinstance(response, dict):
+            return "output" in response
+        # Already-normalized SimpleNamespace
+        if hasattr(response, "choices"):
+            return bool(response.choices)
+        return False
+
+    def map_finish_reason(self, raw_reason: str) -> str:
+        """Map Bedrock stop reason to OpenAI finish_reason.
+
+        The adapter already does this mapping inside normalize_converse_response,
+        so this is only used for direct access to raw responses.
+        """
+        _MAP = {
+            "end_turn": "stop",
+            "tool_use": "tool_calls",
+            "max_tokens": "length",
+            "stop_sequence": "stop",
+            "guardrail_intervened": "content_filter",
+            "content_filtered": "content_filter",
+        }
+        return _MAP.get(raw_reason, "stop")
+
+
+# Auto-register on import
+from agent.transports import register_transport  # noqa: E402
+
+register_transport("bedrock_converse", BedrockTransport)
@@ -0,0 +1,387 @@
+"""OpenAI Chat Completions transport.
+
+Handles the default api_mode ('chat_completions') used by ~16 OpenAI-compatible
+providers (OpenRouter, Nous, NVIDIA, Qwen, Ollama, DeepSeek, xAI, Kimi, etc.).
+
+Messages and tools are already in OpenAI format — convert_messages and
+convert_tools are near-identity.  The complexity lives in build_kwargs
+which has provider-specific conditionals for max_tokens defaults,
+reasoning configuration, temperature handling, and extra_body assembly.
+"""
+
+import copy
+from typing import Any, Dict, List, Optional
+
+from agent.prompt_builder import DEVELOPER_ROLE_MODELS
+from agent.transports.base import ProviderTransport
+from agent.transports.types import NormalizedResponse, ToolCall, Usage
+
+
+class ChatCompletionsTransport(ProviderTransport):
+    """Transport for api_mode='chat_completions'.
+
+    The default path for OpenAI-compatible providers.
+    """
+
+    @property
+    def api_mode(self) -> str:
+        return "chat_completions"
+
+    def convert_messages(self, messages: List[Dict[str, Any]], **kwargs) -> List[Dict[str, Any]]:
+        """Messages are already in OpenAI format — sanitize Codex leaks only.
+
+        Strips Codex Responses API fields (``codex_reasoning_items`` on the
+        message, ``call_id``/``response_item_id`` on tool_calls) that strict
+        chat-completions providers reject with 400/422.
+        """
+        needs_sanitize = False
+        for msg in messages:
+            if not isinstance(msg, dict):
+                continue
+            if "codex_reasoning_items" in msg:
+                needs_sanitize = True
+                break
+            tool_calls = msg.get("tool_calls")
+            if isinstance(tool_calls, list):
+                for tc in tool_calls:
+                    if isinstance(tc, dict) and ("call_id" in tc or "response_item_id" in tc):
+                        needs_sanitize = True
+                        break
+                if needs_sanitize:
+                    break
+
+        if not needs_sanitize:
+            return messages
+
+        sanitized = copy.deepcopy(messages)
+        for msg in sanitized:
+            if not isinstance(msg, dict):
+                continue
+            msg.pop("codex_reasoning_items", None)
+            tool_calls = msg.get("tool_calls")
+            if isinstance(tool_calls, list):
+                for tc in tool_calls:
+                    if isinstance(tc, dict):
+                        tc.pop("call_id", None)
+                        tc.pop("response_item_id", None)
+        return sanitized
+
+    def convert_tools(self, tools: List[Dict[str, Any]]) -> List[Dict[str, Any]]:
+        """Tools are already in OpenAI format — identity."""
+        return tools
+
+    def build_kwargs(
+        self,
+        model: str,
+        messages: List[Dict[str, Any]],
+        tools: Optional[List[Dict[str, Any]]] = None,
+        **params,
+    ) -> Dict[str, Any]:
+        """Build chat.completions.create() kwargs.
+
+        This is the most complex transport method — it handles ~16 providers
+        via params rather than subclasses.
+
+        params:
+            timeout: float — API call timeout
+            max_tokens: int | None — user-configured max tokens
+            ephemeral_max_output_tokens: int | None — one-shot override (error recovery)
+            max_tokens_param_fn: callable — returns {max_tokens: N} or {max_completion_tokens: N}
+            reasoning_config: dict | None
+            request_overrides: dict | None
+            session_id: str | None
+            qwen_session_metadata: dict | None — {sessionId, promptId} precomputed
+            model_lower: str — lowercase model name for pattern matching
+            # Provider detection flags (all optional, default False)
+            is_openrouter: bool
+            is_nous: bool
+            is_qwen_portal: bool
+            is_github_models: bool
+            is_nvidia_nim: bool
+            is_kimi: bool
+            is_custom_provider: bool
+            ollama_num_ctx: int | None
+            # Provider routing
+            provider_preferences: dict | None
+            # Qwen-specific
+            qwen_prepare_fn: callable | None — runs AFTER codex sanitization
+            qwen_prepare_inplace_fn: callable | None — in-place variant for deepcopied lists
+            # Temperature
+            fixed_temperature: Any — from _fixed_temperature_for_model()
+            omit_temperature: bool
+            # Reasoning
+            supports_reasoning: bool
+            github_reasoning_extra: dict | None
+            # Claude on OpenRouter/Nous max output
+            anthropic_max_output: int | None
+            # Extra
+            extra_body_additions: dict | None — pre-built extra_body entries
+        """
+        # Codex sanitization: drop reasoning_items / call_id / response_item_id
+        sanitized = self.convert_messages(messages)
+
+        # Qwen portal prep AFTER codex sanitization.  If sanitize already
+        # deepcopied, reuse that copy via the in-place variant to avoid a
+        # second deepcopy.
+        is_qwen = params.get("is_qwen_portal", False)
+        if is_qwen:
+            qwen_prep = params.get("qwen_prepare_fn")
+            qwen_prep_inplace = params.get("qwen_prepare_inplace_fn")
+            if sanitized is messages:
+                if qwen_prep is not None:
+                    sanitized = qwen_prep(sanitized)
+            else:
+                # Already deepcopied — transform in place
+                if qwen_prep_inplace is not None:
+                    qwen_prep_inplace(sanitized)
+                elif qwen_prep is not None:
+                    sanitized = qwen_prep(sanitized)
+
+        # Developer role swap for GPT-5/Codex models
+        model_lower = params.get("model_lower", (model or "").lower())
+        if (
+            sanitized
+            and isinstance(sanitized[0], dict)
+            and sanitized[0].get("role") == "system"
+            and any(p in model_lower for p in DEVELOPER_ROLE_MODELS)
+        ):
+            sanitized = list(sanitized)
+            sanitized[0] = {**sanitized[0], "role": "developer"}
+
+        api_kwargs: Dict[str, Any] = {
+            "model": model,
+            "messages": sanitized,
+        }
+
+        timeout = params.get("timeout")
+        if timeout is not None:
+            api_kwargs["timeout"] = timeout
+
+        # Temperature
+        fixed_temp = params.get("fixed_temperature")
+        omit_temp = params.get("omit_temperature", False)
+        if omit_temp:
+            api_kwargs.pop("temperature", None)
+        elif fixed_temp is not None:
+            api_kwargs["temperature"] = fixed_temp
+
+        # Qwen metadata (caller precomputes {sessionId, promptId})
+        qwen_meta = params.get("qwen_session_metadata")
+        if qwen_meta and is_qwen:
+            api_kwargs["metadata"] = qwen_meta
+
+        # Tools
+        if tools:
+            api_kwargs["tools"] = tools
+
+        # max_tokens resolution — priority: ephemeral > user > provider default
+        max_tokens_fn = params.get("max_tokens_param_fn")
+        ephemeral = params.get("ephemeral_max_output_tokens")
+        max_tokens = params.get("max_tokens")
+        anthropic_max_out = params.get("anthropic_max_output")
+        is_nvidia_nim = params.get("is_nvidia_nim", False)
+        is_kimi = params.get("is_kimi", False)
+        reasoning_config = params.get("reasoning_config")
+
+        if ephemeral is not None and max_tokens_fn:
+            api_kwargs.update(max_tokens_fn(ephemeral))
+        elif max_tokens is not None and max_tokens_fn:
+            api_kwargs.update(max_tokens_fn(max_tokens))
+        elif is_nvidia_nim and max_tokens_fn:
+            api_kwargs.update(max_tokens_fn(16384))
+        elif is_qwen and max_tokens_fn:
+            api_kwargs.update(max_tokens_fn(65536))
+        elif is_kimi and max_tokens_fn:
+            # Kimi/Moonshot: 32000 matches Kimi CLI's default
+            api_kwargs.update(max_tokens_fn(32000))
+        elif anthropic_max_out is not None:
+            api_kwargs["max_tokens"] = anthropic_max_out
+
+        # Kimi: top-level reasoning_effort (unless thinking disabled)
+        if is_kimi:
+            _kimi_thinking_off = bool(
+                reasoning_config
+                and isinstance(reasoning_config, dict)
+                and reasoning_config.get("enabled") is False
+            )
+            if not _kimi_thinking_off:
+                _kimi_effort = "medium"
+                if reasoning_config and isinstance(reasoning_config, dict):
+                    _e = (reasoning_config.get("effort") or "").strip().lower()
+                    if _e in ("low", "medium", "high"):
+                        _kimi_effort = _e
+                api_kwargs["reasoning_effort"] = _kimi_effort
+
+        # extra_body assembly
+        extra_body: Dict[str, Any] = {}
+
+        is_openrouter = params.get("is_openrouter", False)
+        is_nous = params.get("is_nous", False)
+        is_github_models = params.get("is_github_models", False)
+
+        provider_prefs = params.get("provider_preferences")
+        if provider_prefs and is_openrouter:
+            extra_body["provider"] = provider_prefs
+
+        # Kimi extra_body.thinking
+        if is_kimi:
+            _kimi_thinking_enabled = True
+            if reasoning_config and isinstance(reasoning_config, dict):
+                if reasoning_config.get("enabled") is False:
+                    _kimi_thinking_enabled = False
+            extra_body["thinking"] = {
+                "type": "enabled" if _kimi_thinking_enabled else "disabled",
+            }
+
+        # Reasoning
+        if params.get("supports_reasoning", False):
+            if is_github_models:
+                gh_reasoning = params.get("github_reasoning_extra")
+                if gh_reasoning is not None:
+                    extra_body["reasoning"] = gh_reasoning
+            else:
+                if reasoning_config is not None:
+                    rc = dict(reasoning_config)
+                    if is_nous and rc.get("enabled") is False:
+                        pass  # omit for Nous when disabled
+                    else:
+                        extra_body["reasoning"] = rc
+                else:
+                    extra_body["reasoning"] = {"enabled": True, "effort": "medium"}
+
+        if is_nous:
+            extra_body["tags"] = ["product=hermes-agent"]
+
+        # Ollama num_ctx
+        ollama_ctx = params.get("ollama_num_ctx")
+        if ollama_ctx:
+            options = extra_body.get("options", {})
+            options["num_ctx"] = ollama_ctx
+            extra_body["options"] = options
+
+        # Ollama/custom think=false
+        if params.get("is_custom_provider", False):
+            if reasoning_config and isinstance(reasoning_config, dict):
+                _effort = (reasoning_config.get("effort") or "").strip().lower()
+                _enabled = reasoning_config.get("enabled", True)
+                if _effort == "none" or _enabled is False:
+                    extra_body["think"] = False
+
+        if is_qwen:
+            extra_body["vl_high_resolution_images"] = True
+
+        # Merge any pre-built extra_body additions
+        additions = params.get("extra_body_additions")
+        if additions:
+            extra_body.update(additions)
+
+        if extra_body:
+            api_kwargs["extra_body"] = extra_body
+
+        # Request overrides last (service_tier etc.)
+        overrides = params.get("request_overrides")
+        if overrides:
+            api_kwargs.update(overrides)
+
+        return api_kwargs
+
+    def normalize_response(self, response: Any, **kwargs) -> NormalizedResponse:
+        """Normalize OpenAI ChatCompletion to NormalizedResponse.
+
+        For chat_completions, this is near-identity — the response is already
+        in OpenAI format.  extra_content on tool_calls (Gemini thought_signature)
+        is preserved via ToolCall.provider_data.  reasoning_details (OpenRouter
+        unified format) and reasoning_content (DeepSeek/Moonshot) are also
+        preserved for downstream replay.
+        """
+        choice = response.choices[0]
+        msg = choice.message
+        finish_reason = choice.finish_reason or "stop"
+
+        tool_calls = None
+        if msg.tool_calls:
+            tool_calls = []
+            for tc in msg.tool_calls:
+                # Preserve provider-specific extras on the tool call.
+                # Gemini 3 thinking models attach extra_content with
+                # thought_signature — without replay on the next turn the API
+                # rejects the request with 400.
+                tc_provider_data: Dict[str, Any] = {}
+                extra = getattr(tc, "extra_content", None)
+                if extra is None and hasattr(tc, "model_extra"):
+                    extra = (tc.model_extra or {}).get("extra_content")
+                if extra is not None:
+                    if hasattr(extra, "model_dump"):
+                        try:
+                            extra = extra.model_dump()
+                        except Exception:
+                            pass
+                    tc_provider_data["extra_content"] = extra
+                tool_calls.append(ToolCall(
+                    id=tc.id,
+                    name=tc.function.name,
+                    arguments=tc.function.arguments,
+                    provider_data=tc_provider_data or None,
+                ))
+
+        usage = None
+        if hasattr(response, "usage") and response.usage:
+            u = response.usage
+            usage = Usage(
+                prompt_tokens=getattr(u, "prompt_tokens", 0) or 0,
+                completion_tokens=getattr(u, "completion_tokens", 0) or 0,
+                total_tokens=getattr(u, "total_tokens", 0) or 0,
+            )
+
+        # Preserve reasoning fields separately.  DeepSeek/Moonshot use
+        # ``reasoning_content``; others use ``reasoning``.  Downstream code
+        # (_extract_reasoning, thinking-prefill retry) reads both distinctly,
+        # so keep them apart in provider_data rather than merging.
+        reasoning = getattr(msg, "reasoning", None)
+        reasoning_content = getattr(msg, "reasoning_content", None)
+
+        provider_data: Dict[str, Any] = {}
+        if reasoning_content:
+            provider_data["reasoning_content"] = reasoning_content
+        rd = getattr(msg, "reasoning_details", None)
+        if rd:
+            provider_data["reasoning_details"] = rd
+
+        return NormalizedResponse(
+            content=msg.content,
+            tool_calls=tool_calls,
+            finish_reason=finish_reason,
+            reasoning=reasoning,
+            usage=usage,
+            provider_data=provider_data or None,
+        )
+
+    def validate_response(self, response: Any) -> bool:
+        """Check that response has valid choices."""
+        if response is None:
+            return False
+        if not hasattr(response, "choices") or response.choices is None:
+            return False
+        if not response.choices:
+            return False
+        return True
+
+    def extract_cache_stats(self, response: Any) -> Optional[Dict[str, int]]:
+        """Extract OpenRouter/OpenAI cache stats from prompt_tokens_details."""
+        usage = getattr(response, "usage", None)
+        if usage is None:
+            return None
+        details = getattr(usage, "prompt_tokens_details", None)
+        if details is None:
+            return None
+        cached = getattr(details, "cached_tokens", 0) or 0
+        written = getattr(details, "cache_write_tokens", 0) or 0
+        if cached or written:
+            return {"cached_tokens": cached, "creation_tokens": written}
+        return None
+
+
+# Auto-register on import
+from agent.transports import register_transport  # noqa: E402
+
+register_transport("chat_completions", ChatCompletionsTransport)
@@ -0,0 +1,217 @@
+"""OpenAI Responses API (Codex) transport.
+
+Delegates to the existing adapter functions in agent/codex_responses_adapter.py.
+This transport owns format conversion and normalization — NOT client lifecycle,
+streaming, or the _run_codex_stream() call path.
+"""
+
+from typing import Any, Dict, List, Optional
+
+from agent.transports.base import ProviderTransport
+from agent.transports.types import NormalizedResponse, ToolCall, Usage
+
+
+class ResponsesApiTransport(ProviderTransport):
+    """Transport for api_mode='codex_responses'.
+
+    Wraps the functions extracted into codex_responses_adapter.py (PR 1).
+    """
+
+    @property
+    def api_mode(self) -> str:
+        return "codex_responses"
+
+    def convert_messages(self, messages: List[Dict[str, Any]], **kwargs) -> Any:
+        """Convert OpenAI chat messages to Responses API input items."""
+        from agent.codex_responses_adapter import _chat_messages_to_responses_input
+        return _chat_messages_to_responses_input(messages)
+
+    def convert_tools(self, tools: List[Dict[str, Any]]) -> Any:
+        """Convert OpenAI tool schemas to Responses API function definitions."""
+        from agent.codex_responses_adapter import _responses_tools
+        return _responses_tools(tools)
+
+    def build_kwargs(
+        self,
+        model: str,
+        messages: List[Dict[str, Any]],
+        tools: Optional[List[Dict[str, Any]]] = None,
+        **params,
+    ) -> Dict[str, Any]:
+        """Build Responses API kwargs.
+
+        Calls convert_messages and convert_tools internally.
+
+        params:
+            instructions: str — system prompt (extracted from messages[0] if not given)
+            reasoning_config: dict | None — {effort, enabled}
+            session_id: str | None — used for prompt_cache_key + xAI conv header
+            max_tokens: int | None — max_output_tokens
+            request_overrides: dict | None — extra kwargs merged in
+            provider: str | None — provider name for backend-specific logic
+            base_url: str | None — endpoint URL
+            base_url_hostname: str | None — hostname for backend detection
+            is_github_responses: bool — Copilot/GitHub models backend
+            is_codex_backend: bool — chatgpt.com/backend-api/codex
+            is_xai_responses: bool — xAI/Grok backend
+            github_reasoning_extra: dict | None — Copilot reasoning params
+        """
+        from agent.codex_responses_adapter import (
+            _chat_messages_to_responses_input,
+            _responses_tools,
+        )
+
+        from run_agent import DEFAULT_AGENT_IDENTITY
+
+        instructions = params.get("instructions", "")
+        payload_messages = messages
+        if not instructions:
+            if messages and messages[0].get("role") == "system":
+                instructions = str(messages[0].get("content") or "").strip()
+                payload_messages = messages[1:]
+        if not instructions:
+            instructions = DEFAULT_AGENT_IDENTITY
+
+        is_github_responses = params.get("is_github_responses", False)
+        is_codex_backend = params.get("is_codex_backend", False)
+        is_xai_responses = params.get("is_xai_responses", False)
+
+        # Resolve reasoning effort
+        reasoning_effort = "medium"
+        reasoning_enabled = True
+        reasoning_config = params.get("reasoning_config")
+        if reasoning_config and isinstance(reasoning_config, dict):
+            if reasoning_config.get("enabled") is False:
+                reasoning_enabled = False
+            elif reasoning_config.get("effort"):
+                reasoning_effort = reasoning_config["effort"]
+
+        _effort_clamp = {"minimal": "low"}
+        reasoning_effort = _effort_clamp.get(reasoning_effort, reasoning_effort)
+
+        kwargs = {
+            "model": model,
+            "instructions": instructions,
+            "input": _chat_messages_to_responses_input(payload_messages),
+            "tools": _responses_tools(tools),
+            "tool_choice": "auto",
+            "parallel_tool_calls": True,
+            "store": False,
+        }
+
+        session_id = params.get("session_id")
+        if not is_github_responses and session_id:
+            kwargs["prompt_cache_key"] = session_id
+
+        if reasoning_enabled and is_xai_responses:
+            kwargs["include"] = ["reasoning.encrypted_content"]
+        elif reasoning_enabled:
+            if is_github_responses:
+                github_reasoning = params.get("github_reasoning_extra")
+                if github_reasoning is not None:
+                    kwargs["reasoning"] = github_reasoning
+            else:
+                kwargs["reasoning"] = {"effort": reasoning_effort, "summary": "auto"}
+                kwargs["include"] = ["reasoning.encrypted_content"]
+        elif not is_github_responses and not is_xai_responses:
+            kwargs["include"] = []
+
+        request_overrides = params.get("request_overrides")
+        if request_overrides:
+            kwargs.update(request_overrides)
+
+        max_tokens = params.get("max_tokens")
+        if max_tokens is not None and not is_codex_backend:
+            kwargs["max_output_tokens"] = max_tokens
+
+        if is_xai_responses and session_id:
+            kwargs["extra_headers"] = {"x-grok-conv-id": session_id}
+
+        return kwargs
+
+    def normalize_response(self, response: Any, **kwargs) -> NormalizedResponse:
+        """Normalize Codex Responses API response to NormalizedResponse."""
+        from agent.codex_responses_adapter import (
+            _normalize_codex_response,
+            _extract_responses_message_text,
+            _extract_responses_reasoning_text,
+        )
+
+        # _normalize_codex_response returns (SimpleNamespace, finish_reason_str)
+        msg, finish_reason = _normalize_codex_response(response)
+
+        tool_calls = None
+        if msg and msg.tool_calls:
+            tool_calls = []
+            for tc in msg.tool_calls:
+                provider_data = {}
+                if hasattr(tc, "call_id") and tc.call_id:
+                    provider_data["call_id"] = tc.call_id
+                if hasattr(tc, "response_item_id") and tc.response_item_id:
+                    provider_data["response_item_id"] = tc.response_item_id
+                tool_calls.append(ToolCall(
+                    id=tc.id if hasattr(tc, "id") else (tc.function.name if hasattr(tc, "function") else None),
+                    name=tc.function.name if hasattr(tc, "function") else getattr(tc, "name", ""),
+                    arguments=tc.function.arguments if hasattr(tc, "function") else getattr(tc, "arguments", "{}"),
+                    provider_data=provider_data or None,
+                ))
+
+        # Extract reasoning items for provider_data
+        provider_data = {}
+        if msg and hasattr(msg, "codex_reasoning_items") and msg.codex_reasoning_items:
+            provider_data["codex_reasoning_items"] = msg.codex_reasoning_items
+        if msg and hasattr(msg, "reasoning_details") and msg.reasoning_details:
+            provider_data["reasoning_details"] = msg.reasoning_details
+
+        return NormalizedResponse(
+            content=msg.content if msg else None,
+            tool_calls=tool_calls,
+            finish_reason=finish_reason or "stop",
+            reasoning=msg.reasoning if msg and hasattr(msg, "reasoning") else None,
+            usage=None,  # Codex usage is extracted separately in normalize_usage()
+            provider_data=provider_data or None,
+        )
+
+    def validate_response(self, response: Any) -> bool:
+        """Check Codex Responses API response has valid output structure.
+
+        Returns True only if response.output is a non-empty list.
+        Does NOT check output_text fallback — the caller handles that
+        with diagnostic logging for stream backfill recovery.
+        """
+        if response is None:
+            return False
+        output = getattr(response, "output", None)
+        if not isinstance(output, list) or not output:
+            return False
+        return True
+
+    def preflight_kwargs(self, api_kwargs: Any, *, allow_stream: bool = False) -> dict:
+        """Validate and sanitize Codex API kwargs before the call.
+
+        Normalizes input items, strips unsupported fields, validates structure.
+        """
+        from agent.codex_responses_adapter import _preflight_codex_api_kwargs
+        return _preflight_codex_api_kwargs(api_kwargs, allow_stream=allow_stream)
+
+    def map_finish_reason(self, raw_reason: str) -> str:
+        """Map Codex response.status to OpenAI finish_reason.
+
+        Codex uses response.status ('completed', 'incomplete') +
+        response.incomplete_details.reason for granular mapping.
+        This method handles the simple status string; the caller
+        should check incomplete_details separately for 'max_output_tokens'.
+        """
+        _MAP = {
+            "completed": "stop",
+            "incomplete": "length",
+            "failed": "stop",
+            "cancelled": "stop",
+        }
+        return _MAP.get(raw_reason, "stop")
+
+
+# Auto-register on import
+from agent.transports import register_transport  # noqa: E402
+
+register_transport("codex_responses", ResponsesApiTransport)
@@ -0,0 +1,100 @@
+"""Shared types for normalized provider responses.
+
+These dataclasses define the canonical shape that all provider adapters
+normalize responses to.  The shared surface is intentionally minimal —
+only fields that every downstream consumer reads are top-level.
+Protocol-specific state goes in ``provider_data`` dicts (response-level
+and per-tool-call) so that protocol-aware code paths can access it
+without polluting the shared type.
+"""
+
+from __future__ import annotations
+
+import json
+from dataclasses import dataclass, field
+from typing import Any, Dict, List, Optional
+
+
+@dataclass
+class ToolCall:
+    """A normalized tool call from any provider.
+
+    ``id`` is the protocol's canonical identifier — what gets used in
+    ``tool_call_id`` / ``tool_use_id`` when constructing tool result
+    messages.  May be ``None`` when the provider omits it; the agent
+    fills it via ``_deterministic_call_id()`` before storing in history.
+
+    ``provider_data`` carries per-tool-call protocol metadata that only
+    protocol-aware code reads:
+
+    * Codex: ``{"call_id": "call_XXX", "response_item_id": "fc_XXX"}``
+    * Gemini: ``{"extra_content": {"google": {"thought_signature": "..."}}}``
+    * Others: ``None``
+    """
+
+    id: Optional[str]
+    name: str
+    arguments: str  # JSON string
+    provider_data: Optional[Dict[str, Any]] = field(default=None, repr=False)
+
+
+@dataclass
+class Usage:
+    """Token usage from an API response."""
+
+    prompt_tokens: int = 0
+    completion_tokens: int = 0
+    total_tokens: int = 0
+    cached_tokens: int = 0
+
+
+@dataclass
+class NormalizedResponse:
+    """Normalized API response from any provider.
+
+    Shared fields are truly cross-provider — every caller can rely on
+    them without branching on api_mode.  Protocol-specific state goes in
+    ``provider_data`` so that only protocol-aware code paths read it.
+
+    Response-level ``provider_data`` examples:
+
+    * Anthropic: ``{"reasoning_details": [...]}``
+    * Codex: ``{"codex_reasoning_items": [...]}``
+    * Others: ``None``
+    """
+
+    content: Optional[str]
+    tool_calls: Optional[List[ToolCall]]
+    finish_reason: str  # "stop", "tool_calls", "length", "content_filter"
+    reasoning: Optional[str] = None
+    usage: Optional[Usage] = None
+    provider_data: Optional[Dict[str, Any]] = field(default=None, repr=False)
+
+
+# ---------------------------------------------------------------------------
+# Factory helpers
+# ---------------------------------------------------------------------------
+
+def build_tool_call(
+    id: Optional[str],
+    name: str,
+    arguments: Any,
+    **provider_fields: Any,
+) -> ToolCall:
+    """Build a ``ToolCall``, auto-serialising *arguments* if it's a dict.
+
+    Any extra keyword arguments are collected into ``provider_data``.
+    """
+    args_str = json.dumps(arguments) if isinstance(arguments, dict) else str(arguments)
+    pd = dict(provider_fields) if provider_fields else None
+    return ToolCall(id=id, name=name, arguments=args_str, provider_data=pd)
+
+
+def map_finish_reason(reason: Optional[str], mapping: Dict[str, str]) -> str:
+    """Translate a provider-specific stop reason to the normalised set.
+
+    Falls back to ``"stop"`` for unknown or ``None`` reasons.
+    """
+    if reason is None:
+        return "stop"
+    return mapping.get(reason, "stop")
@@ -6,6 +6,7 @@ from decimal import Decimal
 from typing import Any, Dict, Literal, Optional

 from agent.model_metadata import fetch_endpoint_model_metadata, fetch_model_metadata
+from utils import base_url_host_matches

 DEFAULT_PRICING = {"input": 0.0, "output": 0.0}

@@ -393,7 +394,7 @@ def resolve_billing_route(

    if provider_name == "openai-codex":
        return BillingRoute(provider="openai-codex", model=model, base_url=base_url or "", billing_mode="subscription_included")
-    if provider_name == "openrouter" or "openrouter.ai" in base:
+    if provider_name == "openrouter" or base_url_host_matches(base_url or "", "openrouter.ai"):
        return BillingRoute(provider="openrouter", model=model, base_url=base_url or "", billing_mode="official_models_api")
    if provider_name == "anthropic":
        return BillingRoute(provider="anthropic", model=model.split("/")[-1], base_url=base_url or "", billing_mode="official_docs_snapshot")
@@ -1190,12 +1190,12 @@ def main(
    """
    # Handle list distributions
    if list_distributions:
-        from toolset_distributions import list_distributions as get_all_dists, print_distribution_info
-        
+        from toolset_distributions import print_distribution_info
+
        print("📊 Available Toolset Distributions")
        print("=" * 70)
-        
-        all_dists = get_all_dists()
+
+        all_dists = list_distributions()
        for dist_name in sorted(all_dists.keys()):
            print_distribution_info(dist_name)
        
@@ -770,10 +770,12 @@ code_execution:
 # Subagent Delegation
 # =============================================================================
 # The delegate_task tool spawns child agents with isolated context.
-# Supports single tasks and batch mode (up to 3 parallel).
+# Supports single tasks and batch mode (default 3 parallel, configurable).
 delegation:
  max_iterations: 50                          # Max tool-calling turns per child (default: 50)
-  default_toolsets: ["terminal", "file", "web"]  # Default toolsets for subagents
+  # max_concurrent_children: 3                # Max parallel child agents (default: 3)
+  # max_spawn_depth: 1                        # Tree depth cap (1-3, default: 1 = flat). Raise to 2 or 3 to allow orchestrator children to spawn their own workers.
+  # orchestrator_enabled: true                # Kill switch for role="orchestrator" children (default: true).
  # model: "google/gemini-3-flash-preview"    # Override model for subagents (empty = inherit parent)
  # provider: "openrouter"                    # Override provider for subagents (empty = inherit parent)
  #                                           # Resolves full credentials (base_url, api_key) automatically.
@@ -917,3 +919,39 @@ display:
 #   # Names and usernames are NOT affected (user-chosen, publicly visible).
 #   # Routing/delivery still uses the original values internally.
 #   redact_pii: false
+
+# =============================================================================
+# Shell-script hooks
+# =============================================================================
+# Register shell scripts as plugin-hook callbacks.  Each entry is executed as
+# a subprocess (shell=False, shlex.split) with a JSON payload on stdin.  On
+# stdout the script may return JSON that either blocks the tool call or
+# injects context into the next LLM call.
+#
+# Valid events (mirror hermes_cli.plugins.VALID_HOOKS):
+#   pre_tool_call, post_tool_call, pre_llm_call, post_llm_call,
+#   pre_api_request, post_api_request, on_session_start, on_session_end,
+#   on_session_finalize, on_session_reset, subagent_stop
+#
+# First-use consent: each (event, command) pair prompts once on a TTY, then
+# is persisted to ~/.hermes/shell-hooks-allowlist.json.  Non-interactive
+# runs (gateway, cron) need --accept-hooks, HERMES_ACCEPT_HOOKS=1, or the
+# hooks_auto_accept key below.
+#
+# See website/docs/user-guide/features/hooks.md for the full JSON wire
+# protocol and worked examples.
+#
+# hooks:
+#   pre_tool_call:
+#     - matcher: "terminal"
+#       command: "~/.hermes/agent-hooks/block-rm-rf.sh"
+#       timeout: 10
+#   post_tool_call:
+#     - matcher: "write_file|patch"
+#       command: "~/.hermes/agent-hooks/auto-format.sh"
+#   pre_llm_call:
+#     - command: "~/.hermes/agent-hooks/inject-cwd-context.sh"
+#   subagent_stop:
+#     - command: "~/.hermes/agent-hooks/log-orchestration.sh"
+#
+# hooks_auto_accept: false
@@ -19,12 +19,14 @@ import shutil
 import sys
 import json
 import re
+import concurrent.futures
 import base64
 import atexit
 import tempfile
 import time
 import uuid
 import textwrap
+from urllib.parse import unquote, urlparse
 from contextlib import contextmanager
 from pathlib import Path
 from datetime import datetime
@@ -65,6 +67,7 @@ from agent.usage_pricing import (
    format_duration_compact,
    format_token_count_compact,
 )
+from agent.account_usage import fetch_account_usage, render_account_usage_lines
 from hermes_cli.banner import _format_context_length, format_banner_version_label

 _COMMAND_SPINNER_FRAMES = ("⠋", "⠙", "⠹", "⠸", "⠼", "⠴", "⠦", "⠧", "⠇", "⠏")
@@ -74,6 +77,7 @@ _COMMAND_SPINNER_FRAMES = ("⠋", "⠙", "⠹", "⠸", "⠼", "⠴", "⠦", "⠧
 # User-managed env files should override stale shell exports on restart.
 from hermes_constants import get_hermes_home, display_hermes_home
 from hermes_cli.env_loader import load_hermes_dotenv
+from utils import base_url_host_matches

 _hermes_home = get_hermes_home()
 _project_env = Path(__file__).parent / '.env'
@@ -367,7 +371,6 @@ def load_cli_config() -> Dict[str, Any]:
        },
        "delegation": {
            "max_iterations": 45,  # Max tool-calling turns per child agent
-            "default_toolsets": ["terminal", "file", "web"],  # Default toolsets for subagents
            "model": "",       # Subagent model override (empty = inherit parent model)
            "provider": "",    # Subagent provider override (empty = inherit parent provider)
            "base_url": "",    # Direct OpenAI-compatible endpoint for subagents
@@ -528,7 +531,6 @@ def load_cli_config() -> Dict[str, Any]:
            if _file_has_terminal_config or env_var not in os.environ:
                val = terminal_config[config_key]
                if isinstance(val, list):
-                    import json
                    os.environ[env_var] = json.dumps(val)
                else:
                    os.environ[env_var] = str(val)
@@ -912,6 +914,32 @@ def _cleanup_worktree(info: Dict[str, str] = None) -> None:
    print(f"\033[32m✓ Worktree cleaned up: {wt_path}\033[0m")


+def _run_state_db_auto_maintenance(session_db) -> None:
+    """Call ``SessionDB.maybe_auto_prune_and_vacuum`` using current config.
+
+    Reads the ``sessions:`` section from config.yaml via
+    :func:`hermes_cli.config.load_config` (the authoritative loader that
+    deep-merges DEFAULT_CONFIG, so unmigrated configs still get default
+    values). Honours ``auto_prune`` / ``retention_days`` /
+    ``vacuum_after_prune`` / ``min_interval_hours``, and delegates to the
+    DB. Never raises — maintenance must never block interactive startup.
+    """
+    if session_db is None:
+        return
+    try:
+        from hermes_cli.config import load_config as _load_full_config
+        cfg = (_load_full_config().get("sessions") or {})
+        if not cfg.get("auto_prune", False):
+            return
+        session_db.maybe_auto_prune_and_vacuum(
+            retention_days=int(cfg.get("retention_days", 90)),
+            min_interval_hours=int(cfg.get("min_interval_hours", 24)),
+            vacuum=bool(cfg.get("vacuum_after_prune", True)),
+        )
+    except Exception as exc:
+        logger.debug("state.db auto-maintenance skipped: %s", exc)
+
+
 def _prune_stale_worktrees(repo_root: str, max_age_hours: int = 24) -> None:
    """Remove stale worktrees and orphaned branches on startup.

@@ -1143,8 +1171,6 @@ def _rich_text_from_ansi(text: str) -> _RichText:

 def _strip_markdown_syntax(text: str) -> str:
    """Best-effort markdown marker removal for plain-text display."""
-    import re
-
    plain = _rich_text_from_ansi(text or "").plain
    plain = re.sub(r"^\s{0,3}(?:[-*_]\s*){3,}$", "", plain, flags=re.MULTILINE)
    plain = re.sub(r"^\s{0,3}#{1,6}\s+", "", plain, flags=re.MULTILINE)
@@ -1154,11 +1180,11 @@ def _strip_markdown_syntax(text: str) -> str:
    plain = re.sub(r"!\[([^\]]*)\]\([^\)]*\)", r"\1", plain)
    plain = re.sub(r"\[([^\]]+)\]\([^\)]*\)", r"\1", plain)
    plain = re.sub(r"\*\*\*([^*]+)\*\*\*", r"\1", plain)
-    plain = re.sub(r"___([^_]+)___", r"\1", plain)
+    plain = re.sub(r"(?<!\w)___([^_]+)___(?!\w)", r"\1", plain)
    plain = re.sub(r"\*\*([^*]+)\*\*", r"\1", plain)
-    plain = re.sub(r"__([^_]+)__", r"\1", plain)
+    plain = re.sub(r"(?<!\w)__([^_]+)__(?!\w)", r"\1", plain)
    plain = re.sub(r"\*([^*]+)\*", r"\1", plain)
-    plain = re.sub(r"_([^_]+)_", r"\1", plain)
+    plain = re.sub(r"(?<!\w)_([^_]+)_(?!\w)", r"\1", plain)
    plain = re.sub(r"~~([^~]+)~~", r"\1", plain)
    plain = re.sub(r"\n{3,}", "\n\n", plain)
    return plain.strip("\n")
@@ -1271,10 +1297,21 @@ def _resolve_attachment_path(raw_path: str) -> Path | None:

    if (token.startswith('"') and token.endswith('"')) or (token.startswith("'") and token.endswith("'")):
        token = token[1:-1].strip()
+    token = token.replace('\\ ', ' ')
    if not token:
        return None

-    expanded = os.path.expandvars(os.path.expanduser(token))
+    expanded = token
+    if token.startswith("file://"):
+        try:
+            parsed = urlparse(token)
+            if parsed.scheme == "file":
+                expanded = unquote(parsed.path or "")
+                if parsed.netloc and os.name == "nt":
+                    expanded = f"//{parsed.netloc}{expanded}"
+        except Exception:
+            expanded = token
+    expanded = os.path.expandvars(os.path.expanduser(expanded))
    if os.name != "nt":
        normalized = expanded.replace("\\", "/")
        if len(normalized) >= 3 and normalized[1] == ":" and normalized[2] == "/" and normalized[0].isalpha():
@@ -1361,6 +1398,7 @@ def _detect_file_drop(user_input: str) -> "dict | None":
        or stripped.startswith("~")
        or stripped.startswith("./")
        or stripped.startswith("../")
+        or stripped.startswith("file://")
        or (len(stripped) >= 3 and stripped[1] == ":" and stripped[2] in ("\\", "/") and stripped[0].isalpha())
        or stripped.startswith('"/')
        or stripped.startswith('"~')
@@ -1371,8 +1409,25 @@ def _detect_file_drop(user_input: str) -> "dict | None":
    if not starts_like_path:
        return None

+    direct_path = _resolve_attachment_path(stripped)
+    if direct_path is not None:
+        return {
+            "path": direct_path,
+            "is_image": direct_path.suffix.lower() in _IMAGE_EXTENSIONS,
+            "remainder": "",
+        }
+
    first_token, remainder = _split_path_input(stripped)
    drop_path = _resolve_attachment_path(first_token)
+    if drop_path is None and " " in stripped and stripped[0] not in {"'", '"'}:
+        space_positions = [idx for idx, ch in enumerate(stripped) if ch == " "]
+        for pos in reversed(space_positions):
+            candidate = stripped[:pos].rstrip()
+            resolved = _resolve_attachment_path(candidate)
+            if resolved is not None:
+                drop_path = resolved
+                remainder = stripped[pos + 1 :].strip()
+                break
    if drop_path is None:
        return None

@@ -1836,7 +1891,7 @@ class HermesCLI:
        # Match key to resolved base_url: OpenRouter URL → prefer OPENROUTER_API_KEY,
        # custom endpoint → prefer OPENAI_API_KEY (issue #560).
        # Note: _ensure_runtime_credentials() re-resolves this before first use.
-        if self.base_url and "openrouter.ai" in self.base_url:
+        if self.base_url and base_url_host_matches(self.base_url, "openrouter.ai"):
            self.api_key = api_key or os.getenv("OPENROUTER_API_KEY") or os.getenv("OPENAI_API_KEY")
        else:
            self.api_key = api_key or os.getenv("OPENAI_API_KEY") or os.getenv("OPENROUTER_API_KEY")
@@ -1932,7 +1987,13 @@ class HermesCLI:
            self._session_db = SessionDB()
        except Exception as e:
            logger.warning("Failed to initialize SessionDB — session will NOT be indexed for search: %s", e)
-        
+
+        # Opportunistic state.db maintenance — runs at most once per
+        # min_interval_hours, tracked via state_meta in state.db itself so
+        # it's shared across all Hermes processes for this HERMES_HOME.
+        # Never blocks startup on failure.
+        _run_state_db_auto_maintenance(self._session_db)
+
        # Deferred title: stored in memory until the session is created in the DB
        self._pending_title: Optional[str] = None
        
@@ -2001,8 +2062,7 @@ class HermesCLI:

    def _invalidate(self, min_interval: float = 0.25) -> None:
        """Throttled UI repaint — prevents terminal blinking on slow/SSH connections."""
-        import time as _time
-        now = _time.monotonic()
+        now = time.monotonic()
        if hasattr(self, "_app") and self._app and (now - self._last_invalidate) >= min_interval:
            self._last_invalidate = now
            self._app.invalidate()
@@ -2220,8 +2280,7 @@ class HermesCLI:
            return ""
        t0 = getattr(self, "_tool_start_time", 0) or 0
        if t0 > 0:
-            import time as _time
-            elapsed = _time.monotonic() - t0
+            elapsed = time.monotonic() - t0
            if elapsed >= 60:
                _m, _s = int(elapsed // 60), int(elapsed % 60)
                elapsed_str = f"{_m}m {_s}s"
@@ -2476,9 +2535,6 @@ class HermesCLI:

    def _emit_reasoning_preview(self, reasoning_text: str) -> None:
        """Render a buffered reasoning preview as a single [thinking] block."""
-        import re
-        import textwrap
-
        preview_text = reasoning_text.strip()
        if not preview_text:
            return
@@ -2597,9 +2653,7 @@ class HermesCLI:
        """Expand [Pasted text #N -> file] placeholders into file contents."""
        if not isinstance(text, str) or "[Pasted text #" not in text:
            return text or ""
-        import re as _re
-
-        paste_ref_re = _re.compile(r'\[Pasted text #\d+: \d+ lines \u2192 (.+?)\]')
+        paste_ref_re = re.compile(r'\[Pasted text #\d+: \d+ lines \u2192 (.+?)\]')

        def _expand_ref(match):
            path = Path(match.group(1))
@@ -2922,9 +2976,7 @@ class HermesCLI:

    def _command_spinner_frame(self) -> str:
        """Return the current spinner frame for slow slash commands."""
-        import time as _time
-
-        frame_idx = int(_time.monotonic() * 10) % len(_COMMAND_SPINNER_FRAMES)
+        frame_idx = int(time.monotonic() * 10) % len(_COMMAND_SPINNER_FRAMES)
        return _COMMAND_SPINNER_FRAMES[frame_idx]

    @contextmanager
@@ -3935,7 +3987,6 @@ class HermesCLI:
        image later with ``vision_analyze`` if needed.
        """
        import asyncio as _asyncio
-        import json as _json
        from tools.vision_tools import vision_analyze_tool

        analysis_prompt = (
@@ -3955,7 +4006,7 @@ class HermesCLI:
                result_json = _asyncio.run(
                    vision_analyze_tool(image_url=str(img_path), user_prompt=analysis_prompt)
                )
-                result = _json.loads(result_json)
+                result = json.loads(result_json)
                if result.get("success"):
                    description = result.get("analysis", "")
                    enriched_parts.append(
@@ -4210,8 +4261,37 @@ class HermesCLI:
        """
        import shlex
        from argparse import Namespace
+        from contextlib import redirect_stdout
+        from io import StringIO
        from hermes_cli.tools_config import tools_disable_enable_command

+        def _run_capture(ns: Namespace) -> None:
+            """Run tools_disable_enable_command, routing its ANSI-colored
+            print() output through _cprint when inside the interactive TUI
+            so escapes aren't mangled by patch_stdout's StdoutProxy into
+            garbled '?[32m...?[0m' text.
+
+            Outside the TUI (standalone mode, tests), call straight through
+            so real stdout / pytest capture works as expected.
+            """
+            # Standalone/tests, run as usual
+            if getattr(self, "_app", None) is None:
+                tools_disable_enable_command(ns)
+                return
+
+            # Buffer reports isatty()=True so color() in hermes_cli/colors.py
+            # still emits ANSI escapes. StringIO.isatty() is False, which
+            # would otherwise strip all colors before we re-render them.
+            class _TTYBuf(StringIO):
+                def isatty(self) -> bool:
+                    return True
+
+            buf = _TTYBuf()
+            with redirect_stdout(buf):
+                tools_disable_enable_command(ns)
+            for line in buf.getvalue().splitlines():
+                _cprint(line)
+
        try:
            parts = shlex.split(cmd)
        except ValueError:
@@ -4223,8 +4303,7 @@ class HermesCLI:
            return

        if subcommand == "list":
-            tools_disable_enable_command(
-                Namespace(tools_action="list", platform="cli"))
+            _run_capture(Namespace(tools_action="list", platform="cli"))
            return

        names = parts[2:]
@@ -4241,8 +4320,7 @@ class HermesCLI:
        label = ", ".join(names)
        _cprint(f"{_ACCENT}{verb} {label}...{_RST}")

-        tools_disable_enable_command(
-            Namespace(tools_action=subcommand, names=names, platform="cli"))
+        _run_capture(Namespace(tools_action=subcommand, names=names, platform="cli"))

        # Reset session so the new tool config is picked up from a clean state
        from hermes_cli.tools_config import _get_platform_tools
@@ -4969,7 +5047,7 @@ class HermesCLI:
                pass

        cache_enabled = (
-            ("openrouter" in (result.base_url or "").lower() and "claude" in result.new_model.lower())
+            (base_url_host_matches(result.base_url or "", "openrouter.ai") and "claude" in result.new_model.lower())
            or result.api_mode == "anthropic_messages"
        )
        if cache_enabled:
@@ -5197,7 +5275,7 @@ class HermesCLI:

        # Cache notice
        cache_enabled = (
-            ("openrouter" in (result.base_url or "").lower() and "claude" in result.new_model.lower())
+            (base_url_host_matches(result.base_url or "", "openrouter.ai") and "claude" in result.new_model.lower())
            or result.api_mode == "anthropic_messages"
        )
        if cache_enabled:
@@ -5228,6 +5306,30 @@ class HermesCLI:
        except Exception:
            return False

+    def _should_handle_steer_command_inline(self, text: str, has_images: bool = False) -> bool:
+        """Return True when /steer should be dispatched immediately while the agent is running.
+
+        /steer MUST bypass the normal _pending_input → process_loop path when
+        the agent is active, because process_loop is blocked inside
+        self.chat() for the duration of the run.  By the time the queued
+        command is pulled from _pending_input, _agent_running has already
+        flipped back to False, and process_command() takes the idle
+        fallback — delivering the steer as a next-turn message instead of
+        injecting it mid-run.  Dispatching inline on the UI thread calls
+        agent.steer() directly, which is thread-safe (uses _pending_steer_lock).
+        """
+        if not text or has_images or not _looks_like_slash_command(text):
+            return False
+        if not getattr(self, "_agent_running", False):
+            return False
+        try:
+            from hermes_cli.commands import resolve_command
+            base = text.split(None, 1)[0].lower().lstrip('/')
+            cmd = resolve_command(base)
+            return bool(cmd and cmd.name == "steer")
+        except Exception:
+            return False
+
    def _show_model_and_providers(self):
        """Show current model + provider and list all authenticated providers.

@@ -6230,8 +6332,7 @@ class HermesCLI:
                # with the output (fixes #2718).
                if self._app:
                    self._app.invalidate()
-                    import time as _tmod
-                    _tmod.sleep(0.05)  # brief pause for refresh
+                    time.sleep(0.05)  # brief pause for refresh
                print()
                ChatConsole().print(f"[{_accent_hex()}]{'─' * 40}[/]")
                _cprint(f"  ✅ Background task #{task_num} complete")
@@ -6271,8 +6372,7 @@ class HermesCLI:
                # Same TUI refresh pattern as success path (#2718)
                if self._app:
                    self._app.invalidate()
-                    import time as _tmod
-                    _tmod.sleep(0.05)
+                    time.sleep(0.05)
                print()
                _cprint(f"  ❌ Background task #{task_num} failed: {e}")
            finally:
@@ -6492,7 +6592,6 @@ class HermesCLI:
                _launched = self._try_launch_chrome_debug(_port, _plat.system())
                if _launched:
                    # Wait for the port to come up
-                    import time as _time
                    for _wait in range(10):
                        try:
                            s = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
@@ -6502,7 +6601,7 @@ class HermesCLI:
                            _already_open = True
                            break
                        except (OSError, socket.timeout):
-                            _time.sleep(0.5)
+                            time.sleep(0.5)
                    if _already_open:
                        print(f"   ✓ Chrome launched and listening on port {_port}")
                    else:
@@ -6982,6 +7081,27 @@ class HermesCLI:
        if cost_result.status == "unknown":
            print(f"  Note:             Pricing unknown for {agent.model}")

+        # Account limits -- fetched off-thread with a hard timeout so slow
+        # provider APIs don't hang the prompt.
+        provider = getattr(agent, "provider", None) or getattr(self, "provider", None)
+        base_url = getattr(agent, "base_url", None) or getattr(self, "base_url", None)
+        api_key = getattr(agent, "api_key", None) or getattr(self, "api_key", None)
+        account_snapshot = None
+        if provider:
+            with concurrent.futures.ThreadPoolExecutor(max_workers=1) as _pool:
+                try:
+                    account_snapshot = _pool.submit(
+                        fetch_account_usage, provider,
+                        base_url=base_url, api_key=api_key,
+                    ).result(timeout=10.0)
+                except (concurrent.futures.TimeoutError, Exception):
+                    account_snapshot = None
+        account_lines = [f"  {line}" for line in render_account_usage_lines(account_snapshot)]
+        if account_lines:
+            print()
+            for line in account_lines:
+                print(line)
+
        if self.verbose:
            logging.getLogger().setLevel(logging.DEBUG)
            for noisy in ('openai', 'openai._base_client', 'httpx', 'httpcore', 'asyncio', 'hpack', 'grpc', 'modal'):
@@ -7032,7 +7152,6 @@ class HermesCLI:
        known state.  When a change is detected, triggers _reload_mcp() and
        informs the user so they know the tool list has been refreshed.
        """
-        import time
        import yaml as _yaml

        CONFIG_WATCH_INTERVAL = 5.0  # seconds between config.yaml stat() calls
@@ -7124,7 +7243,6 @@ class HermesCLI:

            # Refresh the agent's tool list so the model can call new tools
            if self.agent is not None:
-                from model_tools import get_tool_definitions
                self.agent.tools = get_tool_definitions(
                    enabled_toolsets=self.agent.enabled_toolsets
                    if hasattr(self.agent, "enabled_toolsets") else None,
@@ -7207,7 +7325,6 @@ class HermesCLI:
        full history of tool calls (not just the current one in the spinner).
        """
        if event_type == "tool.completed":
-            import time as _time
            self._tool_start_time = 0.0
            # Print stacked scrollback line for "all" / "new" modes
            if function_name and self.tool_progress_mode in ("all", "new"):
@@ -7236,7 +7353,6 @@ class HermesCLI:
        if event_type != "tool.started":
            return
        if function_name and not function_name.startswith("_"):
-            import time as _time
            from agent.display import get_tool_emoji
            emoji = get_tool_emoji(function_name)
            label = preview or function_name
@@ -7245,7 +7361,7 @@ class HermesCLI:
            if _pl > 0 and len(label) > _pl:
                label = label[:_pl - 3] + "..."
            self._spinner_text = f"{emoji} {label}"
-            self._tool_start_time = _time.monotonic()
+            self._tool_start_time = time.monotonic()
            # Store args for stacked scrollback line on completion
            self._pending_tool_info.setdefault(function_name, []).append(
                function_args if function_args is not None else {}
@@ -7362,11 +7478,12 @@ class HermesCLI:
            self._voice_stop_and_transcribe()

        # Audio cue: single beep BEFORE starting stream (avoid CoreAudio conflict)
-        try:
-            from tools.voice_mode import play_beep
-            play_beep(frequency=880, count=1)
-        except Exception:
-            pass
+        if self._voice_beeps_enabled():
+            try:
+                from tools.voice_mode import play_beep
+                play_beep(frequency=880, count=1)
+            except Exception:
+                pass

        try:
            self._voice_recorder.start(on_silence_stop=_on_silence)
@@ -7414,11 +7531,12 @@ class HermesCLI:
            wav_path = self._voice_recorder.stop()

            # Audio cue: double beep after stream stopped (no CoreAudio conflict)
-            try:
-                from tools.voice_mode import play_beep
-                play_beep(frequency=660, count=2)
-            except Exception:
-                pass
+            if self._voice_beeps_enabled():
+                try:
+                    from tools.voice_mode import play_beep
+                    play_beep(frequency=660, count=2)
+                except Exception:
+                    pass

            if wav_path is None:
                _cprint(f"{_DIM}No speech detected.{_RST}")
@@ -7501,7 +7619,6 @@ class HermesCLI:
        try:
            from tools.tts_tool import text_to_speech_tool
            from tools.voice_mode import play_audio_file
-            import re

            # Strip markdown and non-speech content for cleaner TTS
            tts_text = text[:4000] if len(text) > 4000 else text
@@ -7569,6 +7686,17 @@ class HermesCLI:
            _cprint(f"Unknown voice subcommand: {subcommand}")
            _cprint("Usage: /voice [on|off|tts|status]")

+    def _voice_beeps_enabled(self) -> bool:
+        """Return whether CLI voice mode should play record start/stop beeps."""
+        try:
+            from hermes_cli.config import load_config
+            voice_cfg = load_config().get("voice", {})
+            if isinstance(voice_cfg, dict):
+                return bool(voice_cfg.get("beep_enabled", True))
+        except Exception:
+            pass
+        return True
+
    def _enable_voice_mode(self):
        """Enable voice mode after checking requirements."""
        if self._voice_mode:
@@ -7878,7 +8006,9 @@ class HermesCLI:
            return

        selected = state.get("selected", 0)
-        choices = state.get("choices") or []
+        choices = state.get("choices")
+        if not isinstance(choices, list):
+            choices = []
        if not (0 <= selected < len(choices)):
            return

@@ -7970,8 +8100,18 @@ class HermesCLI:
        choice_wrapped: list[tuple[int, str]] = []
        for i, choice in enumerate(choices):
            label = choice_labels.get(choice, choice)
-            prefix = '❯ ' if i == selected else '  '
-            for wrapped in _wrap_panel_text(f"{prefix}{label}", inner_text_width, subsequent_indent="  "):
+            # Show number prefix for quick selection (1-9 for items 1-9, 0 for 10th item)
+            if i < 9:
+                num_prefix = str(i + 1)
+            elif i == 9:
+                num_prefix = '0'
+            else:
+                num_prefix = ' '  # No number for items beyond 10th
+            if i == selected:
+                prefix = f'❯ {num_prefix}. '
+            else:
+                prefix = f'  {num_prefix}. '
+            for wrapped in _wrap_panel_text(f"{prefix}{label}", inner_text_width, subsequent_indent="    "):
                choice_wrapped.append((i, wrapped))

        # Budget vertical space so HSplit never clips the command or choices.
@@ -8262,6 +8402,17 @@ class HermesCLI:

            def run_agent():
                nonlocal result
+                # Set callbacks inside the agent thread so thread-local storage
+                # in terminal_tool is populated for this thread.  The main thread
+                # registration (run() line ~9046) is invisible here because
+                # _callback_tls is threading.local().  Matches the pattern used
+                # by acp_adapter/server.py for ACP sessions.
+                set_sudo_password_callback(self._sudo_password_callback)
+                set_approval_callback(self._approval_callback)
+                try:
+                    set_secret_capture_callback(self._secret_capture_callback)
+                except Exception:
+                    pass
                agent_message = _voice_prefix + message if _voice_prefix else message
                # Prepend pending model switch note so the model knows about the switch
                _msn = getattr(self, '_pending_model_switch_note', None)
@@ -8287,6 +8438,15 @@ class HermesCLI:
                        "failed": True,
                        "error": _summary,
                    }
+                finally:
+                    # Clear thread-local callbacks so a reused thread doesn't
+                    # hold stale references to a disposed CLI instance.
+                    try:
+                        set_sudo_password_callback(None)
+                        set_approval_callback(None)
+                        set_secret_capture_callback(None)
+                    except Exception:
+                        pass

            # Start agent in background thread (daemon so it cannot keep the
            # process alive when the user closes the terminal tab — SIGHUP
@@ -8324,8 +8484,7 @@ class HermesCLI:
                            try:
                                _dbg = _hermes_home / "interrupt_debug.log"
                                with open(_dbg, "a") as _f:
-                                    import time as _t
-                                    _f.write(f"{_t.strftime('%H:%M:%S')} interrupt fired: msg={str(interrupt_msg)[:60]!r}, "
+                                    _f.write(f"{time.strftime('%H:%M:%S')} interrupt fired: msg={str(interrupt_msg)[:60]!r}, "
                                             f"children={len(self.agent._active_children)}, "
                                             f"parent._interrupt={self.agent._interrupt_requested}\n")
                                    for _ci, _ch in enumerate(self.agent._active_children):
@@ -8401,9 +8560,8 @@ class HermesCLI:
            # buffer so tool/status lines render ABOVE our response box.
            # The flush pushes data into the renderer queue; the short
            # sleep lets the renderer actually paint it before we draw.
-            import time as _time
            sys.stdout.flush()
-            _time.sleep(0.15)
+            time.sleep(0.15)

            # Update history with full conversation
            self.conversation_history = result.get("messages", self.conversation_history) if result else self.conversation_history
@@ -9040,6 +9198,17 @@ class HermesCLI:
                    event.app.current_buffer.reset(append_to_history=True)
                    return

+                # Handle /steer while the agent is running immediately on the
+                # UI thread.  Queuing through _pending_input would deadlock the
+                # steer until after the agent loop finishes (process_loop is
+                # blocked inside self.chat()), which turns /steer into a
+                # post-run next-turn message — defeating mid-run injection.
+                # agent.steer() is thread-safe (holds _pending_steer_lock).
+                if self._should_handle_steer_command_inline(text, has_images=has_images):
+                    self.process_command(text)
+                    event.app.current_buffer.reset(append_to_history=True)
+                    return
+
                # Snapshot and clear attached images
                images = list(self._attached_images)
                self._attached_images.clear()
@@ -9058,8 +9227,7 @@ class HermesCLI:
                        try:
                            _dbg = _hermes_home / "interrupt_debug.log"
                            with open(_dbg, "a") as _f:
-                                import time as _t
-                                _f.write(f"{_t.strftime('%H:%M:%S')} ENTER: queued interrupt msg={str(payload)[:60]!r}, "
+                                _f.write(f"{time.strftime('%H:%M:%S')} ENTER: queued interrupt msg={str(payload)[:60]!r}, "
                                         f"agent_running={self._agent_running}\n")
                        except Exception:
                            pass
@@ -9138,6 +9306,29 @@ class HermesCLI:
                self._clarify_state["selected"] = min(max_idx, self._clarify_state["selected"] + 1)
                event.app.invalidate()

+        # Number keys for quick clarify selection (1-9, 0 for 10th item)
+        def _make_clarify_number_handler(idx):
+            def handler(event):
+                if self._clarify_state and not self._clarify_freetext:
+                    choices = self._clarify_state.get("choices") or []
+                    # Map index to choice (treating "Other" as the last option)
+                    if idx < len(choices):
+                        # Select a numbered choice
+                        self._clarify_state["response_queue"].put(choices[idx])
+                        self._clarify_state = None
+                        self._clarify_freetext = False
+                        event.app.invalidate()
+                    elif idx == len(choices):
+                        # Select "Other" option
+                        self._clarify_freetext = True
+                        event.app.invalidate()
+            return handler
+
+        for _num in range(10):
+            # 1-9 select items 0-8, 0 selects item 9 (10thitem)
+            _idx = 9 if _num == 0 else _num - 1
+            kb.add(str(_num), filter=Condition(lambda: bool(self._clarify_state) and not self._clarify_freetext))(_make_clarify_number_handler(_idx))
+
        # --- Dangerous command approval: arrow-key navigation ---

        @kb.add('up', filter=Condition(lambda: bool(self._approval_state)))
@@ -9179,6 +9370,20 @@ class HermesCLI:
            event.app.current_buffer.reset()
            event.app.invalidate()

+        # Number keys for quick approval selection (1-9, 0 for 10th item)
+        def _make_approval_number_handler(idx):
+            def handler(event):
+                if self._approval_state and idx < len(self._approval_state["choices"]):
+                    self._approval_state["selected"] = idx
+                    self._handle_approval_selection()
+                    event.app.invalidate()
+            return handler
+
+        for _num in range(10):
+            # 1-9 select items 0-8, 0 selects item 9 (10th item)
+            _idx = 9 if _num == 0 else _num - 1
+            kb.add(str(_num), filter=Condition(lambda: bool(self._approval_state)))(_make_approval_number_handler(_idx))
+
        # --- History navigation: up/down browse history in normal input mode ---
        # The TextArea is multiline, so by default up/down only move the cursor.
        # Buffer.auto_up/auto_down handle both: cursor movement when multi-line,
@@ -9207,8 +9412,7 @@ class HermesCLI:
            2. Interrupt the running agent (first press)
            3. Force exit (second press within 2s, or when idle)
            """
-            import time as _time
-            now = _time.time()
+            now = time.time()

            # Cancel active voice recording.
            # Run cancel() in a background thread to prevent blocking the
@@ -9316,12 +9520,11 @@ class HermesCLI:
        @kb.add('c-z')
        def handle_ctrl_z(event):
            """Handle Ctrl+Z - suspend process to background (Unix only)."""
-            import sys
            if sys.platform == 'win32':
                _cprint(f"\n{_DIM}Suspend (Ctrl+Z) is not supported on Windows.{_RST}")
                event.app.invalidate()
                return
-            import os, signal as _sig
+            import signal as _sig
            from prompt_toolkit.application import run_in_terminal
            from hermes_cli.skin_engine import get_active_skin
            agent_name = get_active_skin().get_branding("agent_name", "Hermes Agent")
@@ -9635,31 +9838,29 @@ class HermesCLI:
        # extra instructions (sudo countdown, approval navigation, clarify).
        # The agent-running interrupt hint is now an inline placeholder above.
        def get_hint_text():
-            import time as _time
-
            if cli_ref._sudo_state:
-                remaining = max(0, int(cli_ref._sudo_deadline - _time.monotonic()))
+                remaining = max(0, int(cli_ref._sudo_deadline - time.monotonic()))
                return [
                    ('class:hint', '  password hidden · Enter to skip'),
                    ('class:clarify-countdown', f'  ({remaining}s)'),
                ]

            if cli_ref._secret_state:
-                remaining = max(0, int(cli_ref._secret_deadline - _time.monotonic()))
+                remaining = max(0, int(cli_ref._secret_deadline - time.monotonic()))
                return [
                    ('class:hint', '  secret hidden · Enter to skip'),
                    ('class:clarify-countdown', f'  ({remaining}s)'),
                ]

            if cli_ref._approval_state:
-                remaining = max(0, int(cli_ref._approval_deadline - _time.monotonic()))
+                remaining = max(0, int(cli_ref._approval_deadline - time.monotonic()))
                return [
                    ('class:hint', '  ↑/↓ to select, Enter to confirm'),
                    ('class:clarify-countdown', f'  ({remaining}s)'),
                ]

            if cli_ref._clarify_state:
-                remaining = max(0, int(cli_ref._clarify_deadline - _time.monotonic()))
+                remaining = max(0, int(cli_ref._clarify_deadline - time.monotonic()))
                countdown = f'  ({remaining}s)' if cli_ref._clarify_deadline else ''
                if cli_ref._clarify_freetext:
                    return [
@@ -9751,14 +9952,32 @@ class HermesCLI:
            selected = state.get("selected", 0)
            preview_lines = _wrap_panel_text(question, 60)
            for i, choice in enumerate(choices):
-                prefix = "❯ " if i == selected and not cli_ref._clarify_freetext else "  "
-                preview_lines.extend(_wrap_panel_text(f"{prefix}{choice}", 60, subsequent_indent="  "))
+                # Show number prefix for quick selection (1-9 for items 1-9, 0 for 10th item)
+                if i < 9:
+                    num_prefix = str(i + 1)
+                elif i == 9:
+                    num_prefix = '0'
+                else:
+                    num_prefix = ' '
+                if i == selected and not cli_ref._clarify_freetext:
+                    prefix = f"❯ {num_prefix}. "
+                else:
+                    prefix = f"  {num_prefix}. "
+                preview_lines.extend(_wrap_panel_text(f"{prefix}{choice}", 60, subsequent_indent="    "))
+            # "Other" option in preview
+            other_num = len(choices) + 1
+            if other_num < 10:
+                other_num_prefix = str(other_num)
+            elif other_num == 10:
+                other_num_prefix = '0'
+            else:
+                other_num_prefix = ' '
            other_label = (
-                "❯ Other (type below)" if cli_ref._clarify_freetext
-                else "❯ Other (type your answer)" if selected == len(choices)
-                else "  Other (type your answer)"
+                f"❯ {other_num_prefix}. Other (type below)" if cli_ref._clarify_freetext
+                else f"❯ {other_num_prefix}. Other (type your answer)" if selected == len(choices)
+                else f"  {other_num_prefix}. Other (type your answer)"
            )
-            preview_lines.extend(_wrap_panel_text(other_label, 60, subsequent_indent="  "))
+            preview_lines.extend(_wrap_panel_text(other_label, 60, subsequent_indent="    "))
            box_width = _panel_box_width("Hermes needs your input", preview_lines)
            inner_text_width = max(8, box_width - 2)

@@ -9766,18 +9985,35 @@ class HermesCLI:
            choice_wrapped: list[tuple[int, str]] = []
            if choices:
                for i, choice in enumerate(choices):
-                    prefix = '❯ ' if i == selected and not cli_ref._clarify_freetext else '  '
-                    for wrapped in _wrap_panel_text(f"{prefix}{choice}", inner_text_width, subsequent_indent="  "):
+                    # Show number prefix for quick selection (1-9 for items 1-9, 0 for 10th item)
+                    if i < 9:
+                        num_prefix = str(i + 1)
+                    elif i == 9:
+                        num_prefix = '0'
+                    else:
+                        num_prefix = ' '
+                    if i == selected and not cli_ref._clarify_freetext:
+                        prefix = f'❯ {num_prefix}. '
+                    else:
+                        prefix = f'  {num_prefix}. '
+                    for wrapped in _wrap_panel_text(f"{prefix}{choice}", inner_text_width, subsequent_indent="    "):
                        choice_wrapped.append((i, wrapped))
                # Trailing Other row(s)
                other_idx = len(choices)
-                if selected == other_idx and not cli_ref._clarify_freetext:
-                    other_label_mand = '❯ Other (type your answer)'
-                elif cli_ref._clarify_freetext:
-                    other_label_mand = '❯ Other (type below)'
+                other_num = other_idx + 1
+                if other_num < 10:
+                    other_num_prefix = str(other_num)
+                elif other_num == 10:
+                    other_num_prefix = '0'
                else:
-                    other_label_mand = '  Other (type your answer)'
-                other_wrapped = _wrap_panel_text(other_label_mand, inner_text_width, subsequent_indent="  ")
+                    other_num_prefix = ' '
+                if selected == other_idx and not cli_ref._clarify_freetext:
+                    other_label_mand = f'❯ {other_num_prefix}. Other (type your answer)'
+                elif cli_ref._clarify_freetext:
+                    other_label_mand = f'❯ {other_num_prefix}. Other (type below)'
+                else:
+                    other_label_mand = f'  {other_num_prefix}. Other (type your answer)'
+                other_wrapped = _wrap_panel_text(other_label_mand, inner_text_width, subsequent_indent="    ")
            elif cli_ref._clarify_freetext:
                # Freetext-only mode: the guidance line takes the place of choices.
                other_wrapped = _wrap_panel_text(
@@ -9842,6 +10078,15 @@ class HermesCLI:

                # "Other" option (trailing row(s), only shown when choices exist)
                other_idx = len(choices)
+                # Calculate number prefix for "Other" option
+                other_num = other_idx + 1
+                if other_num < 10:
+                    other_num_prefix = str(other_num)
+                elif other_num == 10:
+                    other_num_prefix = '0'
+                else:
+                    other_num_prefix = ' '
+                
                if selected == other_idx and not cli_ref._clarify_freetext:
                    other_style = 'class:clarify-selected'
                elif cli_ref._clarify_freetext:
@@ -9949,7 +10194,8 @@ class HermesCLI:
            if stage == "provider":
                title = "⚙ Model Picker — Select Provider"
                choices = []
-                for p in state.get("providers") or []:
+                _providers = state.get("providers")
+                for p in _providers if isinstance(_providers, list) else []:
                    count = p.get("total_models", len(p.get("models", [])))
                    label = f"{p['name']} ({count} model{'s' if count != 1 else ''})"
                    if p.get("is_current"):
@@ -10206,22 +10452,20 @@ class HermesCLI:
        app._on_resize = _resize_clear_ghosts

        def spinner_loop():
-            import time as _time
-
            last_idle_refresh = 0.0
            while not self._should_exit:
                if not self._app:
-                    _time.sleep(0.1)
+                    time.sleep(0.1)
                    continue
                if self._command_running:
                    self._invalidate(min_interval=0.1)
-                    _time.sleep(0.1)
+                    time.sleep(0.1)
                else:
-                    now = _time.monotonic()
+                    now = time.monotonic()
                    if now - last_idle_refresh >= 1.0:
                        last_idle_refresh = now
                        self._invalidate(min_interval=1.0)
-                    _time.sleep(0.2)
+                    time.sleep(0.2)

        spinner_thread = threading.Thread(target=spinner_loop, daemon=True)
        spinner_thread.start()
@@ -10290,8 +10534,7 @@ class HermesCLI:
                        continue
                    
                    # Expand paste references back to full content
-                    import re as _re
-                    _paste_ref_re = _re.compile(r'\[Pasted text #\d+: \d+ lines \u2192 (.+?)\]')
+                    _paste_ref_re = re.compile(r'\[Pasted text #\d+: \d+ lines \u2192 (.+?)\]')
                    paste_refs = list(_paste_ref_re.finditer(user_input)) if isinstance(user_input, str) else []
                    if paste_refs:
                        user_input = self._expand_paste_references(user_input)
@@ -10383,13 +10626,12 @@ class HermesCLI:
            try:
                if getattr(self, "agent", None) and getattr(self, "_agent_running", False):
                    self.agent.interrupt(f"received signal {signum}")
-                    import time as _t
                    try:
                        _grace = float(os.getenv("HERMES_SIGTERM_GRACE", "1.5"))
                    except (TypeError, ValueError):
                        _grace = 1.5
                    if _grace > 0:
-                        _t.sleep(_grace)
+                        time.sleep(_grace)
            except Exception:
                pass  # never block signal handling
            raise KeyboardInterrupt()
@@ -10422,8 +10664,7 @@ class HermesCLI:
        # uv-managed Python, fd 0 can be invalid or unregisterable with the
        # asyncio selector, causing "KeyError: '0 is not registered'" (#6393).
        try:
-            import os as _os
-            _os.fstat(0)
+            os.fstat(0)
        except OSError:
            print(
                "Error: stdin (fd 0) is not available.\n"
@@ -10716,13 +10957,12 @@ def main(
            _agent = getattr(cli, "agent", None)
            if _agent is not None:
                _agent.interrupt(f"received signal {signum}")
-                import time as _t
                try:
                    _grace = float(os.getenv("HERMES_SIGTERM_GRACE", "1.5"))
                except (TypeError, ValueError):
                    _grace = 1.5
                if _grace > 0:
-                    _t.sleep(_grace)
+                    time.sleep(_grace)
        except Exception:
            pass  # never block signal handling
        raise KeyboardInterrupt()
@@ -9,6 +9,7 @@ import copy
 import json
 import logging
 import tempfile
+import threading
 import os
 import re
 import uuid
@@ -34,6 +35,11 @@ except ImportError:
 HERMES_DIR = get_hermes_home().resolve()
 CRON_DIR = HERMES_DIR / "cron"
 JOBS_FILE = CRON_DIR / "jobs.json"
+
+# In-process lock protecting load_jobs→modify→save_jobs cycles.
+# Required when tick() runs jobs in parallel threads — without this,
+# concurrent mark_job_run / advance_next_run calls can clobber each other.
+_jobs_file_lock = threading.Lock()
 OUTPUT_DIR = CRON_DIR / "output"
 ONESHOT_GRACE_SECONDS = 120

@@ -594,43 +600,44 @@ def mark_job_run(job_id: str, success: bool, error: Optional[str] = None,
    ``delivery_error`` is tracked separately from the agent error — a job
    can succeed (agent produced output) but fail delivery (platform down).
    """
-    jobs = load_jobs()
-    for i, job in enumerate(jobs):
-        if job["id"] == job_id:
-            now = _hermes_now().isoformat()
-            job["last_run_at"] = now
-            job["last_status"] = "ok" if success else "error"
-            job["last_error"] = error if not success else None
-            # Track delivery failures separately — cleared on successful delivery
-            job["last_delivery_error"] = delivery_error
-            
-            # Increment completed count
-            if job.get("repeat"):
-                job["repeat"]["completed"] = job["repeat"].get("completed", 0) + 1
+    with _jobs_file_lock:
+        jobs = load_jobs()
+        for i, job in enumerate(jobs):
+            if job["id"] == job_id:
+                now = _hermes_now().isoformat()
+                job["last_run_at"] = now
+                job["last_status"] = "ok" if success else "error"
+                job["last_error"] = error if not success else None
+                # Track delivery failures separately — cleared on successful delivery
+                job["last_delivery_error"] = delivery_error
                
-                # Check if we've hit the repeat limit
-                times = job["repeat"].get("times")
-                completed = job["repeat"]["completed"]
-                if times is not None and times > 0 and completed >= times:
-                    # Remove the job (limit reached)
-                    jobs.pop(i)
-                    save_jobs(jobs)
-                    return
-            
-            # Compute next run
-            job["next_run_at"] = compute_next_run(job["schedule"], now)
+                # Increment completed count
+                if job.get("repeat"):
+                    job["repeat"]["completed"] = job["repeat"].get("completed", 0) + 1
+                    
+                    # Check if we've hit the repeat limit
+                    times = job["repeat"].get("times")
+                    completed = job["repeat"]["completed"]
+                    if times is not None and times > 0 and completed >= times:
+                        # Remove the job (limit reached)
+                        jobs.pop(i)
+                        save_jobs(jobs)
+                        return
+                
+                # Compute next run
+                job["next_run_at"] = compute_next_run(job["schedule"], now)

-            # If no next run (one-shot completed), disable
-            if job["next_run_at"] is None:
-                job["enabled"] = False
-                job["state"] = "completed"
-            elif job.get("state") != "paused":
-                job["state"] = "scheduled"
+                # If no next run (one-shot completed), disable
+                if job["next_run_at"] is None:
+                    job["enabled"] = False
+                    job["state"] = "completed"
+                elif job.get("state") != "paused":
+                    job["state"] = "scheduled"

-            save_jobs(jobs)
-            return
+                save_jobs(jobs)
+                return

-    logger.warning("mark_job_run: job_id %s not found, skipping save", job_id)
+        logger.warning("mark_job_run: job_id %s not found, skipping save", job_id)


 def advance_next_run(job_id: str) -> bool:
@@ -645,20 +652,21 @@ def advance_next_run(job_id: str) -> bool:

    Returns True if next_run_at was advanced, False otherwise.
    """
-    jobs = load_jobs()
-    for job in jobs:
-        if job["id"] == job_id:
-            kind = job.get("schedule", {}).get("kind")
-            if kind not in ("cron", "interval"):
+    with _jobs_file_lock:
+        jobs = load_jobs()
+        for job in jobs:
+            if job["id"] == job_id:
+                kind = job.get("schedule", {}).get("kind")
+                if kind not in ("cron", "interval"):
+                    return False
+                now = _hermes_now().isoformat()
+                new_next = compute_next_run(job["schedule"], now)
+                if new_next and new_next != job.get("next_run_at"):
+                    job["next_run_at"] = new_next
+                    save_jobs(jobs)
+                    return True
                return False
-            now = _hermes_now().isoformat()
-            new_next = compute_next_run(job["schedule"], now)
-            if new_next and new_next != job.get("next_run_at"):
-                job["next_run_at"] = new_next
-                save_jobs(jobs)
-                return True
-            return False
-    return False
+        return False


 def get_due_jobs() -> List[Dict[str, Any]]:
@@ -252,7 +252,11 @@ def _send_media_via_adapter(adapter, chat_id: str, media_files: list, metadata:
                coro = adapter.send_document(chat_id=chat_id, file_path=media_path, metadata=metadata)

            future = asyncio.run_coroutine_threadsafe(coro, loop)
-            result = future.result(timeout=30)
+            try:
+                result = future.result(timeout=30)
+            except TimeoutError:
+                future.cancel()
+                raise
            if result and not getattr(result, "success", True):
                logger.warning(
                    "Job '%s': media send failed for %s: %s",
@@ -382,7 +386,11 @@ def _deliver_result(job: dict, content: str, adapters=None, loop=None) -> Option
                        runtime_adapter.send(chat_id, text_to_send, metadata=send_metadata),
                        loop,
                    )
-                    send_result = future.result(timeout=60)
+                    try:
+                        send_result = future.result(timeout=60)
+                    except TimeoutError:
+                        future.cancel()
+                        raise
                    if send_result and not getattr(send_result, "success", True):
                        err = getattr(send_result, "error", "unknown")
                        logger.warning(
@@ -422,7 +430,6 @@ def _deliver_result(job: dict, content: str, adapters=None, loop=None) -> Option
                # prevent "coroutine was never awaited" RuntimeWarning, then retry in a
                # fresh thread that has no running loop.
                coro.close()
-                import concurrent.futures
                with concurrent.futures.ThreadPoolExecutor(max_workers=1) as pool:
                    future = pool.submit(asyncio.run, _send_to_platform(platform, pconfig, chat_id, cleaned_delivery_content, thread_id=thread_id, media_files=media_files))
                    result = future.result(timeout=30)
@@ -747,14 +754,17 @@ def run_job(job: dict) -> tuple[bool, str, str, Optional[str]]:
    # scheduler process — every job this process runs is a cron job.
    os.environ["HERMES_CRON_SESSION"] = "1"

+    # Use ContextVars for per-job session/delivery state so parallel jobs
+    # don't clobber each other's targets (os.environ is process-global).
+    from gateway.session_context import set_session_vars, clear_session_vars, _VAR_MAP
+
+    _ctx_tokens = set_session_vars(
+        platform=origin["platform"] if origin else "",
+        chat_id=str(origin["chat_id"]) if origin else "",
+        chat_name=origin.get("chat_name", "") if origin else "",
+    )
+
    try:
-        # Inject origin context so the agent's send_message tool knows the chat.
-        # Must be INSIDE the try block so the finally cleanup always runs.
-        if origin:
-            os.environ["HERMES_SESSION_PLATFORM"] = origin["platform"]
-            os.environ["HERMES_SESSION_CHAT_ID"] = str(origin["chat_id"])
-            if origin.get("chat_name"):
-                os.environ["HERMES_SESSION_CHAT_NAME"] = origin["chat_name"]
        # Re-read .env and config.yaml fresh every run so provider/key
        # changes take effect without a gateway restart.
        from dotenv import load_dotenv
@@ -765,10 +775,10 @@ def run_job(job: dict) -> tuple[bool, str, str, Optional[str]]:

        delivery_target = _resolve_delivery_target(job)
        if delivery_target:
-            os.environ["HERMES_CRON_AUTO_DELIVER_PLATFORM"] = delivery_target["platform"]
-            os.environ["HERMES_CRON_AUTO_DELIVER_CHAT_ID"] = str(delivery_target["chat_id"])
+            _VAR_MAP["HERMES_CRON_AUTO_DELIVER_PLATFORM"].set(delivery_target["platform"])
+            _VAR_MAP["HERMES_CRON_AUTO_DELIVER_CHAT_ID"].set(str(delivery_target["chat_id"]))
            if delivery_target.get("thread_id") is not None:
-                os.environ["HERMES_CRON_AUTO_DELIVER_THREAD_ID"] = str(delivery_target["thread_id"])
+                _VAR_MAP["HERMES_CRON_AUTO_DELIVER_THREAD_ID"].set(str(delivery_target["thread_id"]))

        model = job.get("model") or os.getenv("HERMES_MODEL") or ""

@@ -807,14 +817,13 @@ def run_job(job: dict) -> tuple[bool, str, str, Optional[str]]:
        prefill_messages = None
        prefill_file = os.getenv("HERMES_PREFILL_MESSAGES_FILE", "") or _cfg.get("prefill_messages_file", "")
        if prefill_file:
-            import json as _json
            pfpath = Path(prefill_file).expanduser()
            if not pfpath.is_absolute():
                pfpath = _hermes_home / pfpath
            if pfpath.exists():
                try:
                    with open(pfpath, "r", encoding="utf-8") as _pf:
-                        prefill_messages = _json.load(_pf)
+                        prefill_messages = json.load(_pf)
                    if not isinstance(prefill_messages, list):
                        prefill_messages = None
                except Exception as e:
@@ -1012,16 +1021,8 @@ def run_job(job: dict) -> tuple[bool, str, str, Optional[str]]:
        return False, output, "", error_msg

    finally:
-        # Clean up injected env vars so they don't leak to other jobs
-        for key in (
-            "HERMES_SESSION_PLATFORM",
-            "HERMES_SESSION_CHAT_ID",
-            "HERMES_SESSION_CHAT_NAME",
-            "HERMES_CRON_AUTO_DELIVER_PLATFORM",
-            "HERMES_CRON_AUTO_DELIVER_CHAT_ID",
-            "HERMES_CRON_AUTO_DELIVER_THREAD_ID",
-        ):
-            os.environ.pop(key, None)
+        # Clean up ContextVar session/delivery state for this job.
+        clear_session_vars(_ctx_tokens)
        if _session_db:
            try:
                _session_db.end_session(_cron_session_id, "cron_complete")
@@ -1074,15 +1075,41 @@ def tick(verbose: bool = True, adapters=None, loop=None) -> int:
        if verbose:
            logger.info("%s - %s job(s) due", _hermes_now().strftime('%H:%M:%S'), len(due_jobs))

-        executed = 0
+        # Advance next_run_at for all recurring jobs FIRST, under the file lock,
+        # before any execution begins.  This preserves at-most-once semantics.
        for job in due_jobs:
-            try:
-                # For recurring jobs (cron/interval), advance next_run_at to the
-                # next future occurrence BEFORE execution.  This way, if the
-                # process crashes mid-run, the job won't re-fire on restart.
-                # One-shot jobs are left alone so they can retry on restart.
-                advance_next_run(job["id"])
+            advance_next_run(job["id"])

+        # Resolve max parallel workers: env var > config.yaml > unbounded.
+        # Set HERMES_CRON_MAX_PARALLEL=1 to restore old serial behaviour.
+        _max_workers: Optional[int] = None
+        try:
+            _env_par = os.getenv("HERMES_CRON_MAX_PARALLEL", "").strip()
+            if _env_par:
+                _max_workers = int(_env_par) or None
+        except (ValueError, TypeError):
+            logger.warning("Invalid HERMES_CRON_MAX_PARALLEL value; defaulting to unbounded")
+        if _max_workers is None:
+            try:
+                _ucfg = load_config() or {}
+                _cfg_par = (
+                    _ucfg.get("cron", {}) if isinstance(_ucfg, dict) else {}
+                ).get("max_parallel_jobs")
+                if _cfg_par is not None:
+                    _max_workers = int(_cfg_par) or None
+            except Exception:
+                pass
+
+        if verbose:
+            logger.info(
+                "Running %d job(s) in parallel (max_workers=%s)",
+                len(due_jobs),
+                _max_workers if _max_workers else "unbounded",
+            )
+
+        def _process_job(job: dict) -> bool:
+            """Run one due job end-to-end: execute, save, deliver, mark."""
+            try:
                success, output, final_response, error = run_job(job)

                output_file = save_job_output(job["id"], output)
@@ -1114,13 +1141,23 @@ def tick(verbose: bool = True, adapters=None, loop=None) -> int:
                    error = "Agent completed but produced empty response (model error, timeout, or misconfiguration)"

                mark_job_run(job["id"], success, error, delivery_error=delivery_error)
-                executed += 1
+                return True

            except Exception as e:
                logger.error("Error processing job %s: %s", job['id'], e)
                mark_job_run(job["id"], False, str(e))
+                return False

-        return executed
+        # Run all due jobs concurrently, each in its own ContextVar copy
+        # so session/delivery state stays isolated per-thread.
+        with concurrent.futures.ThreadPoolExecutor(max_workers=_max_workers) as _tick_pool:
+            _futures = []
+            for job in due_jobs:
+                _ctx = contextvars.copy_context()
+                _futures.append(_tick_pool.submit(_ctx.run, _process_job, job))
+            _results = [f.result() for f in _futures]
+
+        return sum(_results)
    finally:
        if fcntl:
            fcntl.flock(lock_fd, fcntl.LOCK_UN)
@@ -53,7 +53,6 @@ def _run_tool_in_thread(tool_name: str, arguments: Dict[str, Any], task_id: str)
    try:
        loop = asyncio.get_running_loop()
        # We're in an async context -- need to run in thread
-        import concurrent.futures
        with concurrent.futures.ThreadPoolExecutor(max_workers=1) as pool:
            future = pool.submit(
                handle_function_call, tool_name, arguments, task_id
@@ -576,6 +576,14 @@ def load_gateway_config() -> GatewayConfig:
                    bridged["free_response_channels"] = platform_cfg["free_response_channels"]
                if "mention_patterns" in platform_cfg:
                    bridged["mention_patterns"] = platform_cfg["mention_patterns"]
+                if "dm_policy" in platform_cfg:
+                    bridged["dm_policy"] = platform_cfg["dm_policy"]
+                if "allow_from" in platform_cfg:
+                    bridged["allow_from"] = platform_cfg["allow_from"]
+                if "group_policy" in platform_cfg:
+                    bridged["group_policy"] = platform_cfg["group_policy"]
+                if "group_allow_from" in platform_cfg:
+                    bridged["group_allow_from"] = platform_cfg["group_allow_from"]
                if plat == Platform.DISCORD and "channel_skill_bindings" in platform_cfg:
                    bridged["channel_skill_bindings"] = platform_cfg["channel_skill_bindings"]
                if "channel_prompts" in platform_cfg:
@@ -608,6 +616,8 @@ def load_gateway_config() -> GatewayConfig:
                    if isinstance(frc, list):
                        frc = ",".join(str(v) for v in frc)
                    os.environ["SLACK_FREE_RESPONSE_CHANNELS"] = str(frc)
+                if "reactions" in slack_cfg and not os.getenv("SLACK_REACTIONS"):
+                    os.environ["SLACK_REACTIONS"] = str(slack_cfg["reactions"]).lower()

            # Discord settings → env vars (env vars take precedence)
            discord_cfg = yaml_cfg.get("discord", {})
@@ -662,8 +672,7 @@ def load_gateway_config() -> GatewayConfig:
                if "require_mention" in telegram_cfg and not os.getenv("TELEGRAM_REQUIRE_MENTION"):
                    os.environ["TELEGRAM_REQUIRE_MENTION"] = str(telegram_cfg["require_mention"]).lower()
                if "mention_patterns" in telegram_cfg and not os.getenv("TELEGRAM_MENTION_PATTERNS"):
-                    import json as _json
-                    os.environ["TELEGRAM_MENTION_PATTERNS"] = _json.dumps(telegram_cfg["mention_patterns"])
+                    os.environ["TELEGRAM_MENTION_PATTERNS"] = json.dumps(telegram_cfg["mention_patterns"])
                frc = telegram_cfg.get("free_response_chats")
                if frc is not None and not os.getenv("TELEGRAM_FREE_RESPONSE_CHATS"):
                    if isinstance(frc, list):
@@ -700,6 +709,20 @@ def load_gateway_config() -> GatewayConfig:
                    if isinstance(frc, list):
                        frc = ",".join(str(v) for v in frc)
                    os.environ["WHATSAPP_FREE_RESPONSE_CHATS"] = str(frc)
+                if "dm_policy" in whatsapp_cfg and not os.getenv("WHATSAPP_DM_POLICY"):
+                    os.environ["WHATSAPP_DM_POLICY"] = str(whatsapp_cfg["dm_policy"]).lower()
+                af = whatsapp_cfg.get("allow_from")
+                if af is not None and not os.getenv("WHATSAPP_ALLOWED_USERS"):
+                    if isinstance(af, list):
+                        af = ",".join(str(v) for v in af)
+                    os.environ["WHATSAPP_ALLOWED_USERS"] = str(af)
+                if "group_policy" in whatsapp_cfg and not os.getenv("WHATSAPP_GROUP_POLICY"):
+                    os.environ["WHATSAPP_GROUP_POLICY"] = str(whatsapp_cfg["group_policy"]).lower()
+                gaf = whatsapp_cfg.get("group_allow_from")
+                if gaf is not None and not os.getenv("WHATSAPP_GROUP_ALLOWED_USERS"):
+                    if isinstance(gaf, list):
+                        gaf = ",".join(str(v) for v in gaf)
+                    os.environ["WHATSAPP_GROUP_ALLOWED_USERS"] = str(gaf)

            # DingTalk settings → env vars (env vars take precedence)
            dingtalk_cfg = yaml_cfg.get("dingtalk", {})
@@ -1237,7 +1260,6 @@ def _apply_env_overrides(config: GatewayConfig) -> None:
            if legacy_home:
                qq_home = legacy_home
                qq_home_name_env = "QQ_HOME_CHANNEL_NAME"
-                import logging
                logging.getLogger(__name__).warning(
                    "QQ_HOME_CHANNEL is deprecated; rename to QQBOT_HOME_CHANNEL "
                    "in your .env for consistency with the platform key."
@@ -323,7 +323,6 @@ class ResponseStore:
        ).fetchone()
        if row is None:
            return None
-        import time
        self._conn.execute(
            "UPDATE responses SET accessed_at = ? WHERE response_id = ?",
            (time.time(), response_id),
@@ -333,7 +332,6 @@ class ResponseStore:

    def put(self, response_id: str, data: Dict[str, Any]) -> None:
        """Store a response, evicting the oldest if at capacity."""
-        import time
        self._conn.execute(
            "INSERT OR REPLACE INTO responses (response_id, data, accessed_at) VALUES (?, ?, ?)",
            (response_id, json.dumps(data, default=str), time.time()),
@@ -469,12 +467,12 @@ class _IdempotencyCache:
    def __init__(self, max_items: int = 1000, ttl_seconds: int = 300):
        from collections import OrderedDict
        self._store = OrderedDict()
+        self._inflight: Dict[tuple[str, str], "asyncio.Task[Any]"] = {}
        self._ttl = ttl_seconds
        self._max = max_items

    def _purge(self):
-        import time as _t
-        now = _t.time()
+        now = time.time()
        expired = [k for k, v in self._store.items() if now - v["ts"] > self._ttl]
        for k in expired:
            self._store.pop(k, None)
@@ -486,11 +484,27 @@ class _IdempotencyCache:
        item = self._store.get(key)
        if item and item["fp"] == fingerprint:
            return item["resp"]
-        resp = await compute_coro()
-        import time as _t
-        self._store[key] = {"resp": resp, "fp": fingerprint, "ts": _t.time()}
-        self._purge()
-        return resp
+
+        inflight_key = (key, fingerprint)
+        task = self._inflight.get(inflight_key)
+        if task is None:
+            async def _compute_and_store():
+                resp = await compute_coro()
+                import time as _t
+                self._store[key] = {"resp": resp, "fp": fingerprint, "ts": _t.time()}
+                self._purge()
+                return resp
+
+            task = asyncio.create_task(_compute_and_store())
+            self._inflight[inflight_key] = task
+
+            def _clear_inflight(done_task: "asyncio.Task[Any]") -> None:
+                if self._inflight.get(inflight_key) is done_task:
+                    self._inflight.pop(inflight_key, None)
+
+            task.add_done_callback(_clear_inflight)
+
+        return await asyncio.shield(task)


 _idem_cache = _IdempotencyCache()
@@ -520,6 +534,30 @@ def _derive_chat_session_id(
    return f"api-{digest}"


+_CRON_AVAILABLE = False
+try:
+    from cron.jobs import (
+        list_jobs as _cron_list,
+        get_job as _cron_get,
+        create_job as _cron_create,
+        update_job as _cron_update,
+        remove_job as _cron_remove,
+        pause_job as _cron_pause,
+        resume_job as _cron_resume,
+        trigger_job as _cron_trigger,
+    )
+    _CRON_AVAILABLE = True
+except ImportError:
+    _cron_list = None
+    _cron_get = None
+    _cron_create = None
+    _cron_update = None
+    _cron_remove = None
+    _cron_pause = None
+    _cron_resume = None
+    _cron_trigger = None
+
+
 class APIServerAdapter(BasePlatformAdapter):
    """
    OpenAI-compatible HTTP API server adapter.
@@ -1849,44 +1887,16 @@ class APIServerAdapter(BasePlatformAdapter):
    # Cron jobs API
    # ------------------------------------------------------------------

-    # Check cron module availability once (not per-request)
-    _CRON_AVAILABLE = False
-    try:
-        from cron.jobs import (
-            list_jobs as _cron_list,
-            get_job as _cron_get,
-            create_job as _cron_create,
-            update_job as _cron_update,
-            remove_job as _cron_remove,
-            pause_job as _cron_pause,
-            resume_job as _cron_resume,
-            trigger_job as _cron_trigger,
-        )
-        # Wrap as staticmethod to prevent descriptor binding — these are plain
-        # module functions, not instance methods.  Without this, self._cron_*()
-        # injects ``self`` as the first positional argument and every call
-        # raises TypeError.
-        _cron_list = staticmethod(_cron_list)
-        _cron_get = staticmethod(_cron_get)
-        _cron_create = staticmethod(_cron_create)
-        _cron_update = staticmethod(_cron_update)
-        _cron_remove = staticmethod(_cron_remove)
-        _cron_pause = staticmethod(_cron_pause)
-        _cron_resume = staticmethod(_cron_resume)
-        _cron_trigger = staticmethod(_cron_trigger)
-        _CRON_AVAILABLE = True
-    except ImportError:
-        pass
-
    _JOB_ID_RE = __import__("re").compile(r"[a-f0-9]{12}")
    # Allowed fields for update — prevents clients injecting arbitrary keys
    _UPDATE_ALLOWED_FIELDS = {"name", "schedule", "prompt", "deliver", "skills", "skill", "repeat", "enabled"}
    _MAX_NAME_LENGTH = 200
    _MAX_PROMPT_LENGTH = 5000

-    def _check_jobs_available(self) -> Optional["web.Response"]:
+    @staticmethod
+    def _check_jobs_available() -> Optional["web.Response"]:
        """Return error response if cron module isn't available."""
-        if not self._CRON_AVAILABLE:
+        if not _CRON_AVAILABLE:
            return web.json_response(
                {"error": "Cron module not available"}, status=501,
            )
@@ -1911,7 +1921,7 @@ class APIServerAdapter(BasePlatformAdapter):
            return cron_err
        try:
            include_disabled = request.query.get("include_disabled", "").lower() in ("true", "1")
-            jobs = self._cron_list(include_disabled=include_disabled)
+            jobs = _cron_list(include_disabled=include_disabled)
            return web.json_response({"jobs": jobs})
        except Exception as e:
            return web.json_response({"error": str(e)}, status=500)
@@ -1959,7 +1969,7 @@ class APIServerAdapter(BasePlatformAdapter):
            if repeat is not None:
                kwargs["repeat"] = repeat

-            job = self._cron_create(**kwargs)
+            job = _cron_create(**kwargs)
            return web.json_response({"job": job})
        except Exception as e:
            return web.json_response({"error": str(e)}, status=500)
@@ -1976,7 +1986,7 @@ class APIServerAdapter(BasePlatformAdapter):
        if id_err:
            return id_err
        try:
-            job = self._cron_get(job_id)
+            job = _cron_get(job_id)
            if not job:
                return web.json_response({"error": "Job not found"}, status=404)
            return web.json_response({"job": job})
@@ -2009,7 +2019,7 @@ class APIServerAdapter(BasePlatformAdapter):
                return web.json_response(
                    {"error": f"Prompt must be ≤ {self._MAX_PROMPT_LENGTH} characters"}, status=400,
                )
-            job = self._cron_update(job_id, sanitized)
+            job = _cron_update(job_id, sanitized)
            if not job:
                return web.json_response({"error": "Job not found"}, status=404)
            return web.json_response({"job": job})
@@ -2028,7 +2038,7 @@ class APIServerAdapter(BasePlatformAdapter):
        if id_err:
            return id_err
        try:
-            success = self._cron_remove(job_id)
+            success = _cron_remove(job_id)
            if not success:
                return web.json_response({"error": "Job not found"}, status=404)
            return web.json_response({"ok": True})
@@ -2047,7 +2057,7 @@ class APIServerAdapter(BasePlatformAdapter):
        if id_err:
            return id_err
        try:
-            job = self._cron_pause(job_id)
+            job = _cron_pause(job_id)
            if not job:
                return web.json_response({"error": "Job not found"}, status=404)
            return web.json_response({"job": job})
@@ -2066,7 +2076,7 @@ class APIServerAdapter(BasePlatformAdapter):
        if id_err:
            return id_err
        try:
-            job = self._cron_resume(job_id)
+            job = _cron_resume(job_id)
            if not job:
                return web.json_response({"error": "Job not found"}, status=404)
            return web.json_response({"job": job})
@@ -2085,7 +2095,7 @@ class APIServerAdapter(BasePlatformAdapter):
        if id_err:
            return id_err
        try:
-            job = self._cron_trigger(job_id)
+            job = _cron_trigger(job_id)
            if not job:
                return web.json_response({"error": "Job not found"}, status=404)
            return web.json_response({"job": job})
@@ -19,6 +19,8 @@ import uuid
 from abc import ABC, abstractmethod
 from urllib.parse import urlsplit

+from utils import normalize_proxy_url
+
 logger = logging.getLogger(__name__)


@@ -159,13 +161,13 @@ def resolve_proxy_url(platform_env_var: str | None = None) -> str | None:
    if platform_env_var:
        value = (os.environ.get(platform_env_var) or "").strip()
        if value:
-            return value
+            return normalize_proxy_url(value)
    for key in ("HTTPS_PROXY", "HTTP_PROXY", "ALL_PROXY",
                "https_proxy", "http_proxy", "all_proxy"):
        value = (os.environ.get(key) or "").strip()
        if value:
-            return value
-    return _detect_macos_system_proxy()
+            return normalize_proxy_url(value)
+    return normalize_proxy_url(_detect_macos_system_proxy())


 def proxy_kwargs_for_bot(proxy_url: str | None) -> dict:
@@ -391,12 +393,9 @@ async def cache_image_from_url(url: str, ext: str = ".jpg", retries: int = 2) ->
    if not is_safe_url(url):
        raise ValueError(f"Blocked unsafe URL (SSRF protection): {safe_url_for_log(url)}")

-    import asyncio
    import httpx
-    import logging as _logging
-    _log = _logging.getLogger(__name__)
+    _log = logging.getLogger(__name__)

-    last_exc = None
    async with httpx.AsyncClient(
        timeout=30.0,
        follow_redirects=True,
@@ -414,7 +413,6 @@ async def cache_image_from_url(url: str, ext: str = ".jpg", retries: int = 2) ->
                response.raise_for_status()
                return cache_image_from_bytes(response.content, ext)
            except (httpx.TimeoutException, httpx.HTTPStatusError) as exc:
-                last_exc = exc
                if isinstance(exc, httpx.HTTPStatusError) and exc.response.status_code < 429:
                    raise
                if attempt < retries:
@@ -430,7 +428,6 @@ async def cache_image_from_url(url: str, ext: str = ".jpg", retries: int = 2) ->
                    await asyncio.sleep(wait)
                    continue
                raise
-    raise last_exc


 def cleanup_image_cache(max_age_hours: int = 24) -> int:
@@ -510,12 +507,9 @@ async def cache_audio_from_url(url: str, ext: str = ".ogg", retries: int = 2) ->
    if not is_safe_url(url):
        raise ValueError(f"Blocked unsafe URL (SSRF protection): {safe_url_for_log(url)}")

-    import asyncio
    import httpx
-    import logging as _logging
-    _log = _logging.getLogger(__name__)
+    _log = logging.getLogger(__name__)

-    last_exc = None
    async with httpx.AsyncClient(
        timeout=30.0,
        follow_redirects=True,
@@ -533,7 +527,6 @@ async def cache_audio_from_url(url: str, ext: str = ".ogg", retries: int = 2) ->
                response.raise_for_status()
                return cache_audio_from_bytes(response.content, ext)
            except (httpx.TimeoutException, httpx.HTTPStatusError) as exc:
-                last_exc = exc
                if isinstance(exc, httpx.HTTPStatusError) and exc.response.status_code < 429:
                    raise
                if attempt < retries:
@@ -549,7 +542,6 @@ async def cache_audio_from_url(url: str, ext: str = ".ogg", retries: int = 2) ->
                    await asyncio.sleep(wait)
                    continue
                raise
-    raise last_exc


 # ---------------------------------------------------------------------------
@@ -1351,7 +1343,7 @@ class BasePlatformAdapter(ABC):
        # Extract MEDIA:<path> tags, allowing optional whitespace after the colon
        # and quoted/backticked paths for LLM-formatted outputs.
        media_pattern = re.compile(
-            r'''[`"']?MEDIA:\s*(?P<path>`[^`\n]+`|"[^"\n]+"|'[^'\n]+'|(?:~/|/)\S+(?:[^\S\n]+\S+)*?\.(?:png|jpe?g|gif|webp|mp4|mov|avi|mkv|webm|ogg|opus|mp3|wav|m4a)(?=[\s`"',;:)\]}]|$)|\S+)[`"']?'''
+            r'''[`"']?MEDIA:\s*(?P<path>`[^`\n]+`|"[^"\n]+"|'[^'\n]+'|(?:~/|/)\S+(?:[^\S\n]+\S+)*?\.(?:png|jpe?g|gif|webp|mp4|mov|avi|mkv|webm|ogg|opus|mp3|wav|m4a|pdf)(?=[\s`"',;:)\]}]|$)|\S+)[`"']?'''
        )
        for match in media_pattern.finditer(content):
            path = match.group("path").strip()
@@ -1787,8 +1779,6 @@ class BasePlatformAdapter(ABC):
          HERMES_HUMAN_DELAY_MIN_MS: minimum delay in ms (default 800, custom mode)
          HERMES_HUMAN_DELAY_MAX_MS: maximum delay in ms (default 2500, custom mode)
        """
-        import random
-
        mode = os.getenv("HERMES_HUMAN_DELAY_MODE", "off").lower()
        if mode == "off":
            return 0.0
@@ -75,7 +75,7 @@ def _redact(text: str) -> str:
 def check_bluebubbles_requirements() -> bool:
    try:
        import aiohttp  # noqa: F401
-        import httpx as _httpx  # noqa: F401
+        import httpx  # noqa: F401
    except ImportError:
        return False
    return True
@@ -541,7 +541,6 @@ class DiscordAdapter(BasePlatformAdapter):
            # ctypes.util.find_library fails on macOS with Homebrew-installed libs,
            # so fall back to known Homebrew paths if needed.
            if not opus_path:
-                import sys
                _homebrew_paths = (
                    "/opt/homebrew/lib/libopus.dylib",  # Apple Silicon
                    "/usr/local/lib/libopus.dylib",     # Intel Mac
@@ -1422,8 +1421,7 @@ class DiscordAdapter(BasePlatformAdapter):
        speaking_user_ids: set = set()
        receiver = self._voice_receivers.get(guild_id)
        if receiver:
-            import time as _time
-            now = _time.monotonic()
+            now = time.monotonic()
            with receiver._lock:
                for ssrc, last_t in receiver._last_packet_time.items():
                    # Consider "speaking" if audio received within last 2 seconds
@@ -2962,6 +2960,17 @@ class DiscordAdapter(BasePlatformAdapter):
            parent_channel_id = self._get_parent_channel_id(message.channel)

        is_voice_linked_channel = False
+
+        # Save mention-stripped text before auto-threading since create_thread()
+        # can clobber message.content, breaking /command detection in channels.
+        raw_content = message.content.strip()
+        normalized_content = raw_content
+        mention_prefix = False
+        if self._client.user and self._client.user in message.mentions:
+            mention_prefix = True
+            normalized_content = normalized_content.replace(f"<@{self._client.user.id}>", "").strip()
+            normalized_content = normalized_content.replace(f"<@!{self._client.user.id}>", "").strip()
+            message.content = normalized_content
        if not isinstance(message.channel, discord.DMChannel):
            channel_ids = {str(message.channel.id)}
            if parent_channel_id:
@@ -2999,13 +3008,8 @@ class DiscordAdapter(BasePlatformAdapter):
            in_bot_thread = is_thread and thread_id in self._threads

            if require_mention and not is_free_channel and not in_bot_thread:
-                if self._client.user not in message.mentions:
+                if self._client.user not in message.mentions and not mention_prefix:
                    return
-
-            if self._client.user and self._client.user in message.mentions:
-                message.content = message.content.replace(f"<@{self._client.user.id}>", "").strip()
-                message.content = message.content.replace(f"<@!{self._client.user.id}>", "").strip()
-
        # Auto-thread: when enabled, automatically create a thread for every
        # @mention in a text channel so each conversation is isolated (like Slack).
        # Messages already inside threads or DMs are unaffected.
@@ -3027,7 +3031,7 @@ class DiscordAdapter(BasePlatformAdapter):

        # Determine message type
        msg_type = MessageType.TEXT
-        if message.content.startswith("/"):
+        if normalized_content.startswith("/"):
            msg_type = MessageType.COMMAND
        elif message.attachments:
            # Check attachment types
@@ -3167,7 +3171,9 @@ class DiscordAdapter(BasePlatformAdapter):
                                att.filename, e, exc_info=True,
                            )

-        event_text = message.content
+        # Use normalized_content (saved before auto-threading) instead of message.content,
+        # to detect /slash commands in channel messages.
+        event_text = normalized_content
        if pending_text_injection:
            event_text = f"{pending_text_injection}\n\n{event_text}" if event_text else pending_text_injection

@@ -410,7 +410,6 @@ class MattermostAdapter(BasePlatformAdapter):
            logger.warning("Mattermost: blocked unsafe URL (SSRF protection)")
            return await self.send(chat_id, f"{caption or ''}\n{url}".strip(), reply_to)

-        import asyncio
        import aiohttp

        last_exc = None
@@ -26,9 +26,8 @@ from .adapter import (  # noqa: F401
 # -- Onboard (QR-code scan-to-configure) -----------------------------------
 from .onboard import (  # noqa: F401
    BindStatus,
-    create_bind_task,
-    poll_bind_result,
    build_connect_url,
+    qr_register,
 )
 from .crypto import decrypt_secret, generate_bind_key  # noqa: F401

@@ -44,9 +43,8 @@ __all__ = [
    "_ssrf_redirect_guard",
    # onboard
    "BindStatus",
-    "create_bind_task",
-    "poll_bind_result",
    "build_connect_url",
+    "qr_register",
    # crypto
    "decrypt_secret",
    "generate_bind_key",
@@ -1086,11 +1086,8 @@ class QQAdapter(BasePlatformAdapter):
            return MessageType.VIDEO
        if "image" in first_type or "photo" in first_type:
            return MessageType.PHOTO
-        # Unknown content type with an attachment — don't assume PHOTO
-        # to prevent non-image files from being sent to vision analysis.
        logger.debug(
-            "[%s] Unknown media content_type '%s', defaulting to TEXT",
-            self._log_tag,
+            "Unknown media content_type '%s', defaulting to TEXT",
            first_type,
        )
        return MessageType.TEXT
@@ -1826,14 +1823,12 @@ class QQAdapter(BasePlatformAdapter):
            body["file_name"] = file_name

        # Retry transient upload failures
-        last_exc = None
        for attempt in range(3):
            try:
                return await self._api_request(
                    "POST", path, body, timeout=FILE_UPLOAD_TIMEOUT
                )
            except RuntimeError as exc:
-                last_exc = exc
                err_msg = str(exc)
                if any(
                        kw in err_msg
@@ -1842,8 +1837,8 @@ class QQAdapter(BasePlatformAdapter):
                    raise
                if attempt < 2:
                    await asyncio.sleep(1.5 * (attempt + 1))
-
-        raise last_exc  # type: ignore[misc]
+                else:
+                    raise

    # Maximum time (seconds) to wait for reconnection before giving up on send.
    _RECONNECT_WAIT_SECONDS = 15.0
@@ -1,6 +1,10 @@
 """
 QQBot scan-to-configure (QR code onboard) module.

+Mirrors the Feishu onboarding pattern: synchronous HTTP + a single public
+entry-point ``qr_register()`` that handles the full flow (create task →
+display QR code → poll → decrypt credentials).
+
 Calls the ``q.qq.com`` ``create_bind_task`` / ``poll_bind_result`` APIs to
 generate a QR-code URL and poll for scan completion.  On success the caller
 receives the bot's *app_id*, *client_secret* (decrypted locally), and the
@@ -12,18 +16,20 @@ Reference: https://bot.q.qq.com/wiki/develop/api-v2/
 from __future__ import annotations

 import logging
+import time
 from enum import IntEnum
-from typing import Tuple
+from typing import Optional, Tuple
 from urllib.parse import quote

 from .constants import (
    ONBOARD_API_TIMEOUT,
    ONBOARD_CREATE_PATH,
+    ONBOARD_POLL_INTERVAL,
    ONBOARD_POLL_PATH,
    PORTAL_HOST,
    QR_URL_TEMPLATE,
 )
-from .crypto import generate_bind_key
+from .crypto import decrypt_secret, generate_bind_key
 from .utils import get_api_headers

 logger = logging.getLogger(__name__)
@@ -35,7 +41,7 @@ logger = logging.getLogger(__name__)


 class BindStatus(IntEnum):
-    """Status codes returned by ``poll_bind_result``."""
+    """Status codes returned by ``_poll_bind_result``."""

    NONE = 0
    PENDING = 1
@@ -44,18 +50,40 @@ class BindStatus(IntEnum):


 # ---------------------------------------------------------------------------
-# Public API
+# QR rendering
+# ---------------------------------------------------------------------------
+
+try:
+    import qrcode as _qrcode_mod
+except (ImportError, TypeError):
+    _qrcode_mod = None  # type: ignore[assignment]
+
+
+def _render_qr(url: str) -> bool:
+    """Try to render a QR code in the terminal. Returns True if successful."""
+    if _qrcode_mod is None:
+        return False
+    try:
+        qr = _qrcode_mod.QRCode(
+            error_correction=_qrcode_mod.constants.ERROR_CORRECT_M,
+            border=2,
+        )
+        qr.add_data(url)
+        qr.make(fit=True)
+        qr.print_ascii(invert=True)
+        return True
+    except Exception:
+        return False
+
+
+# ---------------------------------------------------------------------------
+# Synchronous HTTP helpers (mirrors Feishu _post_registration pattern)
 # ---------------------------------------------------------------------------


-async def create_bind_task(
-    timeout: float = ONBOARD_API_TIMEOUT,
-) -> Tuple[str, str]:
+def _create_bind_task(timeout: float = ONBOARD_API_TIMEOUT) -> Tuple[str, str]:
    """Create a bind task and return *(task_id, aes_key_base64)*.

-    The AES key is generated locally and sent to the server so it can
-    encrypt the bot credentials before returning them.
-
    Raises:
        RuntimeError: If the API returns a non-zero ``retcode``.
    """
@@ -64,8 +92,8 @@ async def create_bind_task(
    url = f"https://{PORTAL_HOST}{ONBOARD_CREATE_PATH}"
    key = generate_bind_key()

-    async with httpx.AsyncClient(timeout=timeout, follow_redirects=True) as client:
-        resp = await client.post(url, json={"key": key}, headers=get_api_headers())
+    with httpx.Client(timeout=timeout, follow_redirects=True) as client:
+        resp = client.post(url, json={"key": key}, headers=get_api_headers())
        resp.raise_for_status()
        data = resp.json()

@@ -80,7 +108,7 @@ async def create_bind_task(
    return task_id, key


-async def poll_bind_result(
+def _poll_bind_result(
    task_id: str,
    timeout: float = ONBOARD_API_TIMEOUT,
 ) -> Tuple[BindStatus, str, str, str]:
@@ -89,12 +117,6 @@ async def poll_bind_result(
    Returns:
        A 4-tuple of ``(status, bot_appid, bot_encrypt_secret, user_openid)``.

-        * ``bot_encrypt_secret`` is AES-256-GCM encrypted — decrypt it with
-          :func:`~gateway.platforms.qqbot.crypto.decrypt_secret` using the
-          key from :func:`create_bind_task`.
-        * ``user_openid`` is the OpenID of the person who scanned the code
-          (available when ``status == COMPLETED``).
-
    Raises:
        RuntimeError: If the API returns a non-zero ``retcode``.
    """
@@ -102,8 +124,8 @@ async def poll_bind_result(

    url = f"https://{PORTAL_HOST}{ONBOARD_POLL_PATH}"

-    async with httpx.AsyncClient(timeout=timeout, follow_redirects=True) as client:
-        resp = await client.post(url, json={"task_id": task_id}, headers=get_api_headers())
+    with httpx.Client(timeout=timeout, follow_redirects=True) as client:
+        resp = client.post(url, json={"task_id": task_id}, headers=get_api_headers())
        resp.raise_for_status()
        data = resp.json()

@@ -122,3 +144,77 @@ async def poll_bind_result(
 def build_connect_url(task_id: str) -> str:
    """Build the QR-code target URL for a given *task_id*."""
    return QR_URL_TEMPLATE.format(task_id=quote(task_id))
+
+
+# ---------------------------------------------------------------------------
+# Public entry-point
+# ---------------------------------------------------------------------------
+
+_MAX_REFRESHES = 3
+
+
+def qr_register(timeout_seconds: int = 600) -> Optional[dict]:
+    """Run the QQBot scan-to-configure QR registration flow.
+
+    Mirrors ``feishu.qr_register()``: handles create → display → poll →
+    decrypt in one call.  Unexpected errors propagate to the caller.
+
+    :returns:
+        ``{"app_id": ..., "client_secret": ..., "user_openid": ...}`` on
+        success, or ``None`` on failure / expiry / cancellation.
+    """
+    deadline = time.monotonic() + timeout_seconds
+
+    for refresh_count in range(_MAX_REFRESHES + 1):
+        # ── Create bind task ──
+        try:
+            task_id, aes_key = _create_bind_task()
+        except Exception as exc:
+            logger.warning("[QQBot onboard] Failed to create bind task: %s", exc)
+            return None
+
+        url = build_connect_url(task_id)
+
+        # ── Display QR code + URL ──
+        print()
+        if _render_qr(url):
+            print(f"  Scan the QR code above, or open this URL directly:\n  {url}")
+        else:
+            print(f"  Open this URL in QQ on your phone:\n  {url}")
+            print("  Tip: pip install qrcode  to display a scannable QR code here")
+        print()
+
+        # ── Poll loop ──
+        while time.monotonic() < deadline:
+            try:
+                status, app_id, encrypted_secret, user_openid = _poll_bind_result(task_id)
+            except Exception:
+                time.sleep(ONBOARD_POLL_INTERVAL)
+                continue
+
+            if status == BindStatus.COMPLETED:
+                client_secret = decrypt_secret(encrypted_secret, aes_key)
+                print()
+                print(f"  QR scan complete! (App ID: {app_id})")
+                if user_openid:
+                    print(f"  Scanner's OpenID: {user_openid}")
+                return {
+                    "app_id": app_id,
+                    "client_secret": client_secret,
+                    "user_openid": user_openid,
+                }
+
+            if status == BindStatus.EXPIRED:
+                if refresh_count >= _MAX_REFRESHES:
+                    logger.warning("[QQBot onboard] QR code expired %d times — giving up", _MAX_REFRESHES)
+                    return None
+                print(f"\n  QR code expired, refreshing... ({refresh_count + 1}/{_MAX_REFRESHES})")
+                break  # next for-loop iteration creates a new task
+
+            time.sleep(ONBOARD_POLL_INTERVAL)
+        else:
+            # deadline reached without completing
+            logger.warning("[QQBot onboard] Poll timed out after %ds", timeout_seconds)
+            return None
+
+    return None
@@ -38,6 +38,7 @@ from gateway.platforms.base import (
    BasePlatformAdapter,
    MessageEvent,
    MessageType,
+    ProcessingOutcome,
    SendResult,
    SUPPORTED_DOCUMENT_TYPES,
    safe_url_for_log,
@@ -113,6 +114,11 @@ class SlackAdapter(BasePlatformAdapter):
        # Cache for _fetch_thread_context results: cache_key → _ThreadContextCache
        self._thread_context_cache: Dict[str, _ThreadContextCache] = {}
        self._THREAD_CACHE_TTL = 60.0
+        # Track message IDs that should get reaction lifecycle (DMs / @mentions).
+        self._reacting_message_ids: set = set()
+        # Track active assistant thread status indicators so stop_typing can
+        # clear them (chat_id → thread_ts).
+        self._active_status_threads: Dict[str, str] = {}

    async def connect(self) -> bool:
        """Connect to Slack via Socket Mode."""
@@ -362,6 +368,7 @@ class SlackAdapter(BasePlatformAdapter):
        if not thread_ts:
            return  # Can only set status in a thread context

+        self._active_status_threads[chat_id] = thread_ts
        try:
            await self._get_client(chat_id).assistant_threads_setStatus(
                channel_id=chat_id,
@@ -373,6 +380,22 @@ class SlackAdapter(BasePlatformAdapter):
            # in an assistant-enabled context. Falls back to reactions.
            logger.debug("[Slack] assistant.threads.setStatus failed: %s", e)

+    async def stop_typing(self, chat_id: str) -> None:
+        """Clear the assistant thread status indicator."""
+        if not self._app:
+            return
+        thread_ts = self._active_status_threads.pop(chat_id, None)
+        if not thread_ts:
+            return
+        try:
+            await self._get_client(chat_id).assistant_threads_setStatus(
+                channel_id=chat_id,
+                thread_ts=thread_ts,
+                status="",
+            )
+        except Exception as e:
+            logger.debug("[Slack] assistant.threads.setStatus clear failed: %s", e)
+
    def _dm_top_level_threads_as_sessions(self) -> bool:
        """Whether top-level Slack DMs get per-message session threads.

@@ -584,6 +607,38 @@ class SlackAdapter(BasePlatformAdapter):
            logger.debug("[Slack] reactions.remove failed (%s): %s", emoji, e)
            return False

+    def _reactions_enabled(self) -> bool:
+        """Check if message reactions are enabled via config/env."""
+        return os.getenv("SLACK_REACTIONS", "true").lower() not in ("false", "0", "no")
+
+    async def on_processing_start(self, event: MessageEvent) -> None:
+        """Add an in-progress reaction when message processing begins."""
+        if not self._reactions_enabled():
+            return
+        ts = getattr(event, "message_id", None)
+        if not ts or ts not in self._reacting_message_ids:
+            return
+        channel_id = getattr(event.source, "chat_id", None)
+        if channel_id:
+            await self._add_reaction(channel_id, ts, "eyes")
+
+    async def on_processing_complete(self, event: MessageEvent, outcome: ProcessingOutcome) -> None:
+        """Swap the in-progress reaction for a final success/failure reaction."""
+        if not self._reactions_enabled():
+            return
+        ts = getattr(event, "message_id", None)
+        if not ts or ts not in self._reacting_message_ids:
+            return
+        self._reacting_message_ids.discard(ts)
+        channel_id = getattr(event.source, "chat_id", None)
+        if not channel_id:
+            return
+        await self._remove_reaction(channel_id, ts, "eyes")
+        if outcome == ProcessingOutcome.SUCCESS:
+            await self._add_reaction(channel_id, ts, "white_check_mark")
+        elif outcome == ProcessingOutcome.FAILURE:
+            await self._add_reaction(channel_id, ts, "x")
+
    # ----- User identity resolution -----

    async def _resolve_user_name(self, user_id: str, chat_id: str = "") -> str:
@@ -1213,17 +1268,12 @@ class SlackAdapter(BasePlatformAdapter):
        # Only react when bot is directly addressed (DM or @mention).
        # In listen-all channels (require_mention=false), reacting to every
        # casual message would be noisy.
-        _should_react = is_dm or is_mentioned
-
+        _should_react = (is_dm or is_mentioned) and self._reactions_enabled()
        if _should_react:
-            await self._add_reaction(channel_id, ts, "eyes")
+            self._reacting_message_ids.add(ts)

        await self.handle_message(msg_event)

-        if _should_react:
-            await self._remove_reaction(channel_id, ts, "eyes")
-            await self._add_reaction(channel_id, ts, "white_check_mark")
-
    # ----- Approval button support (Block Kit) -----

    async def send_exec_approval(
@@ -1600,11 +1650,9 @@ class SlackAdapter(BasePlatformAdapter):

    async def _download_slack_file(self, url: str, ext: str, audio: bool = False, team_id: str = "") -> str:
        """Download a Slack file using the bot token for auth, with retry."""
-        import asyncio
        import httpx

        bot_token = self._team_clients[team_id].token if team_id and team_id in self._team_clients else self.config.token
-        last_exc = None

        async with httpx.AsyncClient(timeout=30.0, follow_redirects=True) as client:
            for attempt in range(3):
@@ -1634,7 +1682,6 @@ class SlackAdapter(BasePlatformAdapter):
                        from gateway.platforms.base import cache_image_from_bytes
                        return cache_image_from_bytes(response.content, ext)
                except (httpx.TimeoutException, httpx.HTTPStatusError) as exc:
-                    last_exc = exc
                    if isinstance(exc, httpx.HTTPStatusError) and exc.response.status_code < 429:
                        raise
                    if attempt < 2:
@@ -1643,15 +1690,12 @@ class SlackAdapter(BasePlatformAdapter):
                        await asyncio.sleep(1.5 * (attempt + 1))
                        continue
                    raise
-        raise last_exc

    async def _download_slack_file_bytes(self, url: str, team_id: str = "") -> bytes:
        """Download a Slack file and return raw bytes, with retry."""
-        import asyncio
        import httpx

        bot_token = self._team_clients[team_id].token if team_id and team_id in self._team_clients else self.config.token
-        last_exc = None

        async with httpx.AsyncClient(timeout=30.0, follow_redirects=True) as client:
            for attempt in range(3):
@@ -1663,7 +1707,6 @@ class SlackAdapter(BasePlatformAdapter):
                    response.raise_for_status()
                    return response.content
                except (httpx.TimeoutException, httpx.HTTPStatusError) as exc:
-                    last_exc = exc
                    if isinstance(exc, httpx.HTTPStatusError) and exc.response.status_code < 429:
                        raise
                    if attempt < 2:
@@ -1672,7 +1715,6 @@ class SlackAdapter(BasePlatformAdapter):
                        await asyncio.sleep(1.5 * (attempt + 1))
                        continue
                    raise
-        raise last_exc

    # ── Channel mention gating ─────────────────────────────────────────────

@@ -496,6 +496,13 @@ class TelegramAdapter(BasePlatformAdapter):
                    "[%s] DM topic '%s' already exists in chat %s (will be mapped from incoming messages)",
                    self.name, name, chat_id,
                )
+            elif "not a forum" in error_text or "forums_disabled" in error_text:
+                logger.warning(
+                    "[%s] Cannot create DM topic '%s' in chat %s: Topics mode is not enabled. "
+                    "The user must open the DM with this bot in Telegram, tap the bot name "
+                    "at the top, and enable 'Topics' in chat settings before topics can be created.",
+                    self.name, name, chat_id,
+                )
            else:
                logger.warning(
                    "[%s] Failed to create DM topic '%s' in chat %s: %s",
@@ -787,8 +794,28 @@ class TelegramAdapter(BasePlatformAdapter):
                # Telegram pushes updates to our HTTP endpoint.  This
                # enables cloud platforms (Fly.io, Railway) to auto-wake
                # suspended machines on inbound HTTP traffic.
+                #
+                # SECURITY: TELEGRAM_WEBHOOK_SECRET is REQUIRED. Without it,
+                # python-telegram-bot passes secret_token=None and the
+                # webhook endpoint accepts any HTTP POST — attackers can
+                # inject forged updates as if from Telegram. Refuse to
+                # start rather than silently run in fail-open mode.
+                # See GHSA-3vpc-7q5r-276h.
                webhook_port = int(os.getenv("TELEGRAM_WEBHOOK_PORT", "8443"))
-                webhook_secret = os.getenv("TELEGRAM_WEBHOOK_SECRET", "").strip() or None
+                webhook_secret = os.getenv("TELEGRAM_WEBHOOK_SECRET", "").strip()
+                if not webhook_secret:
+                    raise RuntimeError(
+                        "TELEGRAM_WEBHOOK_SECRET is required when "
+                        "TELEGRAM_WEBHOOK_URL is set. Without it, the "
+                        "webhook endpoint accepts forged updates from "
+                        "anyone who can reach it — see "
+                        "https://github.com/NousResearch/hermes-agent/"
+                        "security/advisories/GHSA-3vpc-7q5r-276h.\n\n"
+                        "Generate a secret and set it in your .env:\n"
+                        "  export TELEGRAM_WEBHOOK_SECRET=\"$(openssl rand -hex 32)\"\n\n"
+                        "Then register it with Telegram when setting the "
+                        "webhook via setWebhook's secret_token parameter."
+                    )
                from urllib.parse import urlparse
                webhook_path = urlparse(webhook_url).path or "/telegram"

@@ -1706,7 +1733,6 @@ class TelegramAdapter(BasePlatformAdapter):
            return SendResult(success=False, error="Not connected")
        
        try:
-            import os
            if not os.path.exists(audio_path):
                return SendResult(success=False, error=self._missing_media_path_error("Audio", audio_path))
            
@@ -1755,7 +1781,6 @@ class TelegramAdapter(BasePlatformAdapter):
            return SendResult(success=False, error="Not connected")

        try:
-            import os
            if not os.path.exists(image_path):
                return SendResult(success=False, error=self._missing_media_path_error("Image", image_path))

@@ -2068,7 +2093,7 @@ class TelegramAdapter(BasePlatformAdapter):
            url = m.group(2).replace('\\', '\\\\').replace(')', '\\)')
            return _ph(f'[{display}]({url})')

-        text = re.sub(r'\[([^\]]+)\]\(([^)]+)\)', _convert_link, text)
+        text = re.sub(r'\[([^\]]+)\]\(([^()]*(?:\([^()]*\)[^()]*)*)\)', _convert_link, text)

        # 4) Convert markdown headers (## Title) → bold *Title*
        def _convert_header(m):
@@ -2328,10 +2353,16 @@ class TelegramAdapter(BasePlatformAdapter):
        DMs remain unrestricted. Group/supergroup messages are accepted when:
        - the chat is explicitly allowlisted in ``free_response_chats``
        - ``require_mention`` is disabled
-        - the message is a command
        - the message replies to the bot
        - the bot is @mentioned
        - the text/caption matches a configured regex wake-word pattern
+
+        When ``require_mention`` is enabled, slash commands are not given
+        special treatment — they must pass the same mention/reply checks
+        as any other group message.  Users can still trigger commands via
+        the Telegram bot menu (``/command@botname``) or by explicitly
+        mentioning the bot (``@botname /command``), both of which are
+        recognised as mentions by :meth:`_message_mentions_bot`.
        """
        if not self._is_group_chat(message):
            return True
@@ -2346,8 +2377,6 @@ class TelegramAdapter(BasePlatformAdapter):
            return True
        if not self._telegram_require_mention():
            return True
-        if is_command:
-            return True
        if self._is_reply_to_bot(message):
            return True
        if self._message_mentions_bot(message):
@@ -2816,13 +2845,11 @@ class TelegramAdapter(BasePlatformAdapter):
            logger.info("[Telegram] Analyzing sticker at %s", cached_path)

            from tools.vision_tools import vision_analyze_tool
-            import json as _json
-
            result_json = await vision_analyze_tool(
                image_url=cached_path,
                user_prompt=STICKER_VISION_PROMPT,
            )
-            result = _json.loads(result_json)
+            result = json.loads(result_json)

            if result.get("success"):
                description = result.get("analysis", "a sticker")
@@ -624,13 +624,16 @@ class WeComAdapter(BasePlatformAdapter):
        msgtype = str(body.get("msgtype") or "").lower()

        if msgtype == "mixed":
-            mixed = body.get("mixed") if isinstance(body.get("mixed"), dict) else {}
-            items = mixed.get("msg_item") if isinstance(mixed.get("msg_item"), list) else []
+            _raw_mixed = body.get("mixed")
+            mixed = _raw_mixed if isinstance(_raw_mixed, dict) else {}
+            _raw_items = mixed.get("msg_item")
+            items = _raw_items if isinstance(_raw_items, list) else []
            for item in items:
                if not isinstance(item, dict):
                    continue
                if str(item.get("msgtype") or "").lower() == "text":
-                    text_block = item.get("text") if isinstance(item.get("text"), dict) else {}
+                    _raw_text = item.get("text")
+                    text_block = _raw_text if isinstance(_raw_text, dict) else {}
                    content = str(text_block.get("content") or "").strip()
                    if content:
                        text_parts.append(content)
@@ -672,8 +675,10 @@ class WeComAdapter(BasePlatformAdapter):
        msgtype = str(body.get("msgtype") or "").lower()

        if msgtype == "mixed":
-            mixed = body.get("mixed") if isinstance(body.get("mixed"), dict) else {}
-            items = mixed.get("msg_item") if isinstance(mixed.get("msg_item"), list) else []
+            _raw_mixed = body.get("mixed")
+            mixed = _raw_mixed if isinstance(_raw_mixed, dict) else {}
+            _raw_items = mixed.get("msg_item")
+            items = _raw_items if isinstance(_raw_items, list) else []
            for item in items:
                if not isinstance(item, dict):
                    continue
@@ -1459,3 +1464,134 @@ class WeComAdapter(BasePlatformAdapter):
            "name": chat_id,
            "type": "group" if chat_id and chat_id.lower().startswith("group") else "dm",
        }
+
+
+# ------------------------------------------------------------------
+# QR code scan flow for obtaining bot credentials
+# ------------------------------------------------------------------
+
+_QR_GENERATE_URL = "https://work.weixin.qq.com/ai/qc/generate"
+_QR_QUERY_URL = "https://work.weixin.qq.com/ai/qc/query_result"
+_QR_CODE_PAGE = "https://work.weixin.qq.com/ai/qc/gen?source=hermes&scode="
+_QR_POLL_INTERVAL = 3  # seconds
+_QR_POLL_TIMEOUT = 300  # 5 minutes
+
+
+def qr_scan_for_bot_info(
+    *,
+    timeout_seconds: int = _QR_POLL_TIMEOUT,
+) -> Optional[Dict[str, str]]:
+    """Run the WeCom QR scan flow to obtain bot_id and secret.
+
+    Fetches a QR code from WeCom, renders it in the terminal, and polls
+    until the user scans it or the timeout expires.
+
+    Returns ``{"bot_id": ..., "secret": ...}`` on success, ``None`` on
+    failure or timeout.
+
+    Note: the ``work.weixin.qq.com/ai/qc/{generate,query_result}`` endpoints
+    used here are not part of WeCom's public developer API — they back the
+    admin-console web UI's bot-creation flow and may change without notice.
+    The same pattern is used by the feishu/dingtalk QR setup wizards.
+    """
+    try:
+        import urllib.request
+        import urllib.parse
+    except ImportError:  # pragma: no cover
+        logger.error("urllib is required for WeCom QR scan")
+        return None
+
+    generate_url = f"{_QR_GENERATE_URL}?source=hermes"
+
+    # ── Step 1: Fetch QR code ──
+    print("  Connecting to WeCom...", end="", flush=True)
+    try:
+        req = urllib.request.Request(generate_url, headers={"User-Agent": "HermesAgent/1.0"})
+        with urllib.request.urlopen(req, timeout=15) as resp:
+            raw = json.loads(resp.read().decode("utf-8"))
+    except Exception as exc:
+        logger.error("WeCom QR: failed to fetch QR code: %s", exc)
+        print(f" failed: {exc}")
+        return None
+
+    data = raw.get("data") or {}
+    scode = str(data.get("scode") or "").strip()
+    auth_url = str(data.get("auth_url") or "").strip()
+
+    if not scode or not auth_url:
+        logger.error("WeCom QR: unexpected response format: %s", raw)
+        print(" failed: unexpected response format")
+        return None
+
+    print(" done.")
+
+    # ── Step 2: Render QR code in terminal ──
+    print()
+    qr_rendered = False
+    try:
+        import qrcode as _qrcode
+        qr = _qrcode.QRCode()
+        qr.add_data(auth_url)
+        qr.make(fit=True)
+        qr.print_ascii(invert=True)
+        qr_rendered = True
+    except ImportError:
+        pass
+    except Exception:
+        pass
+
+    page_url = f"{_QR_CODE_PAGE}{urllib.parse.quote(scode)}"
+    if qr_rendered:
+        print(f"\n  Scan the QR code above, or open this URL directly:\n  {page_url}")
+    else:
+        print(f"  Open this URL in WeCom on your phone:\n\n  {page_url}\n")
+        print("  Tip: pip install qrcode  to display a scannable QR code here next time")
+    print()
+    print("  Fetching configuration results...", end="", flush=True)
+
+    # ── Step 3: Poll for result ──
+    import time
+    deadline = time.time() + timeout_seconds
+    query_url = f"{_QR_QUERY_URL}?scode={urllib.parse.quote(scode)}"
+    poll_count = 0
+
+    while time.time() < deadline:
+        try:
+            req = urllib.request.Request(query_url, headers={"User-Agent": "HermesAgent/1.0"})
+            with urllib.request.urlopen(req, timeout=10) as resp:
+                result = json.loads(resp.read().decode("utf-8"))
+        except Exception as exc:
+            logger.debug("WeCom QR poll error: %s", exc)
+            time.sleep(_QR_POLL_INTERVAL)
+            continue
+
+        poll_count += 1
+        # Print a dot on every poll so progress is visible within 3s.
+        print(".", end="", flush=True)
+
+        result_data = result.get("data") or {}
+        status = str(result_data.get("status") or "").lower()
+
+        if status == "success":
+            print()  # newline after "Fetching configuration results..." dots
+            bot_info = result_data.get("bot_info") or {}
+            bot_id = str(bot_info.get("botid") or bot_info.get("bot_id") or "").strip()
+            secret = str(bot_info.get("secret") or "").strip()
+            if bot_id and secret:
+                return {"bot_id": bot_id, "secret": secret}
+            logger.warning(
+                "WeCom QR: scan reported success but bot_info missing or incomplete: %s",
+                result_data,
+            )
+            print(
+                "  QR scan reported success but no bot credentials were returned.\n"
+                "  This usually means the bot was not actually created on the WeCom side.\n"
+                "  Falling back to manual credential entry."
+            )
+            return None
+
+        time.sleep(_QR_POLL_INTERVAL)
+
+    print()  # newline after dots
+    print(f"  QR scan timed out ({timeout_seconds // 60} minutes). Please try again.")
+    return None
@@ -66,6 +66,37 @@ def _kill_port_process(port: int) -> None:
    except Exception:
        pass

+
+def _terminate_bridge_process(proc, *, force: bool = False) -> None:
+    """Terminate the bridge process using process-tree semantics where possible."""
+    if _IS_WINDOWS:
+        cmd = ["taskkill", "/PID", str(proc.pid), "/T"]
+        if force:
+            cmd.append("/F")
+        try:
+            result = subprocess.run(
+                cmd,
+                capture_output=True,
+                text=True,
+                timeout=10,
+            )
+        except FileNotFoundError:
+            if force:
+                proc.kill()
+            else:
+                proc.terminate()
+            return
+
+        if result.returncode != 0:
+            details = (result.stderr or result.stdout or "").strip()
+            raise OSError(details or f"taskkill failed for PID {proc.pid}")
+        return
+
+    import signal
+
+    sig = signal.SIGTERM if not force else signal.SIGKILL
+    os.killpg(os.getpgid(proc.pid), sig)
+
 import sys
 sys.path.insert(0, str(Path(__file__).resolve().parents[2]))

@@ -118,6 +149,10 @@ class WhatsAppAdapter(BasePlatformAdapter):
    - bridge_script: Path to the Node.js bridge script
    - bridge_port: Port for HTTP communication (default: 3000)
    - session_path: Path to store WhatsApp session data
+    - dm_policy: "open" | "allowlist" | "disabled" — how DMs are handled (default: "open")
+    - allow_from: List of sender IDs allowed in DMs (when dm_policy="allowlist")
+    - group_policy: "open" | "allowlist" | "disabled" — which groups are processed (default: "open")
+    - group_allow_from: List of group JIDs allowed (when group_policy="allowlist")
    """
    
    # WhatsApp message limits — practical UX limit, not protocol max.
@@ -140,6 +175,10 @@ class WhatsAppAdapter(BasePlatformAdapter):
            get_hermes_dir("platforms/whatsapp/session", "whatsapp/session")
        ))
        self._reply_prefix: Optional[str] = config.extra.get("reply_prefix")
+        self._dm_policy = str(config.extra.get("dm_policy") or os.getenv("WHATSAPP_DM_POLICY", "open")).strip().lower()
+        self._allow_from = self._coerce_allow_list(config.extra.get("allow_from") or config.extra.get("allowFrom"))
+        self._group_policy = str(config.extra.get("group_policy") or os.getenv("WHATSAPP_GROUP_POLICY", "open")).strip().lower()
+        self._group_allow_from = self._coerce_allow_list(config.extra.get("group_allow_from") or config.extra.get("groupAllowFrom"))
        self._mention_patterns = self._compile_mention_patterns()
        self._message_queue: asyncio.Queue = asyncio.Queue()
        self._bridge_log_fh = None
@@ -163,6 +202,33 @@ class WhatsAppAdapter(BasePlatformAdapter):
            return {str(part).strip() for part in raw if str(part).strip()}
        return {part.strip() for part in str(raw).split(",") if part.strip()}

+    @staticmethod
+    def _coerce_allow_list(raw) -> set[str]:
+        """Parse allow_from / group_allow_from from config or env var."""
+        if raw is None:
+            return set()
+        if isinstance(raw, list):
+            return {str(part).strip() for part in raw if str(part).strip()}
+        return {part.strip() for part in str(raw).split(",") if part.strip()}
+
+    def _is_dm_allowed(self, sender_id: str) -> bool:
+        """Check whether a DM from the given sender should be processed."""
+        if self._dm_policy == "disabled":
+            return False
+        if self._dm_policy == "allowlist":
+            return sender_id in self._allow_from
+        # "open" — all DMs allowed
+        return True
+
+    def _is_group_allowed(self, chat_id: str) -> bool:
+        """Check whether a group chat should be processed."""
+        if self._group_policy == "disabled":
+            return False
+        if self._group_policy == "allowlist":
+            return chat_id in self._group_allow_from
+        # "open" — all groups allowed
+        return True
+
    def _compile_mention_patterns(self):
        patterns = self.config.extra.get("mention_patterns")
        if patterns is None:
@@ -255,8 +321,18 @@ class WhatsAppAdapter(BasePlatformAdapter):
        return cleaned.strip() or text

    def _should_process_message(self, data: Dict[str, Any]) -> bool:
-        if not data.get("isGroup"):
+        is_group = data.get("isGroup", False)
+        if is_group:
+            chat_id = str(data.get("chatId") or "")
+            if not self._is_group_allowed(chat_id):
+                return False
+        else:
+            sender_id = str(data.get("senderId") or data.get("from") or "")
+            if not self._is_dm_allowed(sender_id):
+                return False
+            # DMs that pass the policy gate are always processed
            return True
+        # Group messages: check mention / free-response settings
        chat_id = str(data.get("chatId") or "")
        if chat_id in self._whatsapp_free_response_chats():
            return True
@@ -323,7 +399,6 @@ class WhatsAppAdapter(BasePlatformAdapter):
            
            # Check if bridge is already running and connected
            import aiohttp
-            import asyncio
            try:
                async with aiohttp.ClientSession() as session:
                    async with session.get(
@@ -492,22 +567,14 @@ class WhatsAppAdapter(BasePlatformAdapter):
        """Stop the WhatsApp bridge and clean up any orphaned processes."""
        if self._bridge_process:
            try:
-                # Kill the entire process group so child node processes die too
-                import signal
                try:
-                    if _IS_WINDOWS:
-                        self._bridge_process.terminate()
-                    else:
-                        os.killpg(os.getpgid(self._bridge_process.pid), signal.SIGTERM)
+                    _terminate_bridge_process(self._bridge_process, force=False)
                except (ProcessLookupError, PermissionError):
                    self._bridge_process.terminate()
                await asyncio.sleep(1)
                if self._bridge_process.poll() is None:
                    try:
-                        if _IS_WINDOWS:
-                            self._bridge_process.kill()
-                        else:
-                            os.killpg(os.getpgid(self._bridge_process.pid), signal.SIGKILL)
+                        _terminate_bridge_process(self._bridge_process, force=True)
                    except (ProcessLookupError, PermissionError):
                        self._bridge_process.kill()
            except Exception as e:
@@ -30,6 +30,8 @@ from pathlib import Path
 from datetime import datetime
 from typing import Dict, Optional, Any, List

+from agent.account_usage import fetch_account_usage, render_account_usage_lines
+
 # --- Agent cache tuning ---------------------------------------------------
 # Bounds the per-session AIAgent cache to prevent unbounded growth in
 # long-lived gateways (each AIAgent holds LLM clients, tool schemas,
@@ -86,7 +88,7 @@ sys.path.insert(0, str(Path(__file__).parent.parent))

 # Resolve Hermes home directory (respects HERMES_HOME override)
 from hermes_constants import get_hermes_home
-from utils import atomic_yaml_write, is_truthy_value
+from utils import atomic_yaml_write, base_url_host_matches, is_truthy_value
 _hermes_home = get_hermes_home()

 # Load environment variables from ~/.hermes/.env first.
@@ -279,6 +281,7 @@ from gateway.session import (
    build_session_context,
    build_session_context_prompt,
    build_session_key,
+    is_shared_multi_user_session,
 )
 from gateway.delivery import DeliveryRouter
 from gateway.platforms.base import (
@@ -707,7 +710,26 @@ class GatewayRunner:
            self._session_db = SessionDB()
        except Exception as e:
            logger.debug("SQLite session store not available: %s", e)
-        
+
+        # Opportunistic state.db maintenance: prune ended sessions older
+        # than sessions.retention_days + optional VACUUM. Tracks last-run
+        # in state_meta so it only actually executes once per
+        # sessions.min_interval_hours.  Gateway is long-lived so blocking
+        # a few seconds once per day is acceptable; failures are logged
+        # but never raised.
+        if self._session_db is not None:
+            try:
+                from hermes_cli.config import load_config as _load_full_config
+                _sess_cfg = (_load_full_config().get("sessions") or {})
+                if _sess_cfg.get("auto_prune", False):
+                    self._session_db.maybe_auto_prune_and_vacuum(
+                        retention_days=int(_sess_cfg.get("retention_days", 90)),
+                        min_interval_hours=int(_sess_cfg.get("min_interval_hours", 24)),
+                        vacuum=bool(_sess_cfg.get("vacuum_after_prune", True)),
+                    )
+            except Exception as exc:
+                logger.debug("state.db auto-maintenance skipped: %s", exc)
+
        # DM pairing store for code-based user authorization
        from gateway.pairing import PairingStore
        self.pairing_store = PairingStore()
@@ -1266,7 +1288,6 @@ class GatewayRunner:
        the prefill_messages_file key in ~/.hermes/config.yaml.
        Relative paths are resolved from ~/.hermes/.
        """
-        import json as _json
        file_path = os.getenv("HERMES_PREFILL_MESSAGES_FILE", "")
        if not file_path:
            try:
@@ -1288,7 +1309,7 @@ class GatewayRunner:
            return []
        try:
            with open(path, "r", encoding="utf-8") as f:
-                data = _json.load(f)
+                data = json.load(f)
            if not isinstance(data, list):
                logger.warning("Prefill messages file must contain a JSON array: %s", path)
                return []
@@ -1960,6 +1981,39 @@ class GatewayRunner:
                "or configure platform allowlists (e.g., TELEGRAM_ALLOWED_USERS=your_id)."
            )
        
+        # Discover Python plugins before shell hooks so plugin block
+        # decisions take precedence in tie cases.  The CLI startup path
+        # does this via an explicit call in hermes_cli/main.py; the
+        # gateway lazily imports run_agent inside per-request handlers,
+        # so the discover_plugins() side-effect in model_tools.py is NOT
+        # guaranteed to have run by the time we reach this point.
+        try:
+            from hermes_cli.plugins import discover_plugins
+            discover_plugins()
+        except Exception:
+            logger.debug(
+                "plugin discovery failed at gateway startup", exc_info=True,
+            )
+
+        # Register declarative shell hooks from cli-config.yaml.  Gateway
+        # has no TTY, so consent has to come from one of the three opt-in
+        # channels (--accept-hooks on launch, HERMES_ACCEPT_HOOKS env var,
+        # or hooks_auto_accept: true in config.yaml).  We pass
+        # accept_hooks=False here and let register_from_config resolve
+        # the effective value from env + config itself — the CLI-side
+        # registration already honored --accept-hooks, and re-reading
+        # hooks_auto_accept here would just duplicate that lookup.
+        # Failures are logged but must never block gateway startup.
+        try:
+            from hermes_cli.config import load_config
+            from agent.shell_hooks import register_from_config
+            register_from_config(load_config(), accept_hooks=False)
+        except Exception:
+            logger.debug(
+                "shell-hook registration failed at gateway startup",
+                exc_info=True,
+            )
+
        # Discover and load event hooks
        self.hooks.discover_and_load()
        
@@ -3242,10 +3296,9 @@ class GatewayRunner:
                    return "Usage: /queue <prompt>"
                adapter = self.adapters.get(source.platform)
                if adapter:
-                    from gateway.platforms.base import MessageEvent as _ME, MessageType as _MT
-                    queued_event = _ME(
+                    queued_event = MessageEvent(
                        text=queued_text,
-                        message_type=_MT.TEXT,
+                        message_type=MessageType.TEXT,
                        source=event.source,
                        message_id=event.message_id,
                        channel_prompt=event.channel_prompt,
@@ -3267,10 +3320,9 @@ class GatewayRunner:
                    # Agent hasn't started yet — queue as turn-boundary fallback.
                    adapter = self.adapters.get(source.platform)
                    if adapter:
-                        from gateway.platforms.base import MessageEvent as _ME, MessageType as _MT
-                        queued_event = _ME(
+                        queued_event = MessageEvent(
                            text=steer_text,
-                            message_type=_MT.TEXT,
+                            message_type=MessageType.TEXT,
                            source=event.source,
                            message_id=event.message_id,
                            channel_prompt=event.channel_prompt,
@@ -3290,10 +3342,9 @@ class GatewayRunner:
                # Running agent is missing or lacks steer() — fall back to queue.
                adapter = self.adapters.get(source.platform)
                if adapter:
-                    from gateway.platforms.base import MessageEvent as _ME, MessageType as _MT
-                    queued_event = _ME(
+                    queued_event = MessageEvent(
                        text=steer_text,
-                        message_type=_MT.TEXT,
+                        message_type=MessageType.TEXT,
                        source=event.source,
                        message_id=event.message_id,
                        channel_prompt=event.channel_prompt,
@@ -3642,9 +3693,8 @@ class GatewayRunner:
                plugin_handler = get_plugin_command_handler(command.replace("_", "-"))
                if plugin_handler:
                    user_args = event.get_command_args().strip()
-                    import asyncio as _aio
                    result = plugin_handler(user_args)
-                    if _aio.iscoroutine(result):
+                    if asyncio.iscoroutine(result):
                        result = await result
                    return str(result) if result else None
            except Exception as e:
@@ -3761,12 +3811,12 @@ class GatewayRunner:
        history = history or []
        message_text = event.text or ""

-        _is_shared_thread = (
-            source.chat_type != "dm"
-            and source.thread_id
-            and not getattr(self.config, "thread_sessions_per_user", False)
+        _is_shared_multi_user = is_shared_multi_user_session(
+            source,
+            group_sessions_per_user=getattr(self.config, "group_sessions_per_user", True),
+            thread_sessions_per_user=getattr(self.config, "thread_sessions_per_user", False),
        )
-        if _is_shared_thread and source.user_name:
+        if _is_shared_multi_user and source.user_name:
            message_text = f"[{source.user_name}] {message_text}"

        if event.media_urls:
@@ -3826,9 +3876,7 @@ class GatewayRunner:
            for i, path in enumerate(event.media_urls):
                mtype = event.media_types[i] if i < len(event.media_types) else ""
                if mtype in ("", "application/octet-stream"):
-                    import os as _os2
-
-                    _ext = _os2.path.splitext(path)[1].lower()
+                    _ext = os.path.splitext(path)[1].lower()
                    if _ext in _TEXT_EXTENSIONS:
                        mtype = "text/plain"
                    else:
@@ -3838,13 +3886,10 @@ class GatewayRunner:
                if not mtype.startswith(("application/", "text/")):
                    continue

-                import os as _os
-                import re as _re
-
-                basename = _os.path.basename(path)
+                basename = os.path.basename(path)
                parts = basename.split("_", 2)
                display_name = parts[2] if len(parts) >= 3 else basename
-                display_name = _re.sub(r'[^\w.\- ]', '_', display_name)
+                display_name = re.sub(r'[^\w.\- ]', '_', display_name)

                if mtype.startswith("text/"):
                    context_note = (
@@ -3861,14 +3906,14 @@ class GatewayRunner:
                message_text = f"{context_note}\n\n{message_text}"

        if getattr(event, "reply_to_text", None) and event.reply_to_message_id:
+            # Always inject the reply-to pointer — even when the quoted text
+            # already appears in history. The prefix isn't deduplication, it's
+            # disambiguation: it tells the agent *which* prior message the user
+            # is referencing. History can contain the same or similar text
+            # multiple times, and without an explicit pointer the agent has to
+            # guess (or answer for both subjects). Token overhead is minimal.
            reply_snippet = event.reply_to_text[:500]
-            found_in_history = any(
-                reply_snippet[:200] in (msg.get("content") or "")
-                for msg in history
-                if msg.get("role") in ("assistant", "user", "tool")
-            )
-            if not found_in_history:
-                message_text = f'[Replying to: "{reply_snippet}"]\n\n{message_text}'
+            message_text = f'[Replying to: "{reply_snippet}"]\n\n{message_text}'

        if "@" in message_text:
            try:
@@ -3876,9 +3921,11 @@ class GatewayRunner:
                from agent.model_metadata import get_model_context_length

                _msg_cwd = os.environ.get("TERMINAL_CWD", os.path.expanduser("~"))
+                _msg_runtime = _resolve_runtime_agent_kwargs()
                _msg_ctx_len = get_model_context_length(
                    self._model,
-                    base_url=self._base_url or "",
+                    base_url=self._base_url or _msg_runtime.get("base_url") or "",
+                    api_key=_msg_runtime.get("api_key") or "",
                )
                _ctx_result = await preprocess_context_references_async(
                    message_text,
@@ -5140,7 +5187,6 @@ class GatewayRunner:
        # Save the requester's routing info so the new gateway process can
        # notify them once it comes back online.
        try:
-            import json as _json
            notify_data = {
                "platform": event.source.platform.value if event.source.platform else None,
                "chat_id": event.source.chat_id,
@@ -5148,7 +5194,7 @@ class GatewayRunner:
            if event.source.thread_id:
                notify_data["thread_id"] = event.source.thread_id
            (_hermes_home / ".restart_notify.json").write_text(
-                _json.dumps(notify_data)
+                json.dumps(notify_data)
            )
        except Exception as e:
            logger.debug("Failed to write restart notify file: %s", e)
@@ -5159,16 +5205,14 @@ class GatewayRunner:
        # marker persists so the new gateway can still detect a delayed
        # /restart redelivery from Telegram.  Overwritten on every /restart.
        try:
-            import json as _json
-            import time as _time
            dedup_data = {
                "platform": event.source.platform.value if event.source.platform else None,
-                "requested_at": _time.time(),
+                "requested_at": time.time(),
            }
            if event.platform_update_id is not None:
                dedup_data["update_id"] = event.platform_update_id
            (_hermes_home / ".restart_last_processed.json").write_text(
-                _json.dumps(dedup_data)
+                json.dumps(dedup_data)
            )
        except Exception as e:
            logger.debug("Failed to write restart dedup marker: %s", e)
@@ -5216,12 +5260,10 @@ class GatewayRunner:
            return False

        try:
-            import json as _json
-            import time as _time
            marker_path = _hermes_home / ".restart_last_processed.json"
            if not marker_path.exists():
                return False
-            data = _json.loads(marker_path.read_text())
+            data = json.loads(marker_path.read_text())
        except Exception:
            return False

@@ -5235,7 +5277,7 @@ class GatewayRunner:
        # swallow a fresh /restart from the user.
        requested_at = data.get("requested_at")
        if isinstance(requested_at, (int, float)):
-            if _time.time() - requested_at > 300:
+            if time.time() - requested_at > 300:
                return False
        return event.platform_update_id <= recorded_uid

@@ -5626,7 +5668,7 @@ class GatewayRunner:

        # Cache notice
        cache_enabled = (
-            ("openrouter" in (result.base_url or "").lower() and "claude" in result.new_model.lower())
+            (base_url_host_matches(result.base_url or "", "openrouter.ai") and "claude" in result.new_model.lower())
            or result.api_mode == "anthropic_messages"
        )
        if cache_enabled:
@@ -5648,6 +5690,7 @@ class GatewayRunner:
        from hermes_cli.models import (
            list_available_providers,
            normalize_provider,
+            provider_for_base_url,
            _PROVIDER_LABELS,
        )

@@ -5676,7 +5719,10 @@ class GatewayRunner:
        # Detect custom endpoint from config base_url
        if current_provider == "openrouter":
            _cfg_base = model_cfg.get("base_url", "") if isinstance(model_cfg, dict) else ""
-            if _cfg_base and "openrouter.ai" not in _cfg_base:
+            inferred_provider = provider_for_base_url(_cfg_base)
+            if inferred_provider:
+                current_provider = inferred_provider
+            elif _cfg_base and "openrouter.ai" not in _cfg_base:
                current_provider = "custom"

        current_label = _PROVIDER_LABELS.get(current_provider, current_provider)
@@ -6433,6 +6479,11 @@ class GatewayRunner:
                    session_id=task_id,
                    platform=platform_key,
                    user_id=source.user_id,
+                    user_name=source.user_name,
+                    chat_id=source.chat_id,
+                    chat_name=source.chat_name,
+                    chat_type=source.chat_type,
+                    thread_id=source.thread_id,
                    session_db=self._session_db,
                    fallback_model=self._fallback_model,
                )
@@ -7193,6 +7244,7 @@ class GatewayRunner:
                    tool_calls=msg.get("tool_calls"),
                    tool_call_id=msg.get("tool_call_id"),
                    reasoning=msg.get("reasoning"),
+                    reasoning_content=msg.get("reasoning_content"),
                )
            except Exception:
                pass  # Best-effort copy
@@ -7241,6 +7293,38 @@ class GatewayRunner:
                    if cached:
                        agent = cached[0]

+        # Resolve provider/base_url/api_key for the account-usage fetch.
+        # Prefer the live agent; fall back to persisted billing data on the
+        # SessionDB row so `/usage` still returns account info between turns
+        # when no agent is resident.
+        provider = getattr(agent, "provider", None) if agent and agent is not _AGENT_PENDING_SENTINEL else None
+        base_url = getattr(agent, "base_url", None) if agent and agent is not _AGENT_PENDING_SENTINEL else None
+        api_key = getattr(agent, "api_key", None) if agent and agent is not _AGENT_PENDING_SENTINEL else None
+        if not provider and getattr(self, "_session_db", None) is not None:
+            try:
+                _entry_for_billing = self.session_store.get_or_create_session(source)
+                persisted = self._session_db.get_session(_entry_for_billing.session_id) or {}
+            except Exception:
+                persisted = {}
+            provider = provider or persisted.get("billing_provider")
+            base_url = base_url or persisted.get("billing_base_url")
+
+        # Fetch account usage off the event loop so slow provider APIs don't
+        # block the gateway. Failures are non-fatal -- account_lines stays [].
+        account_lines: list[str] = []
+        if provider:
+            try:
+                account_snapshot = await asyncio.to_thread(
+                    fetch_account_usage,
+                    provider,
+                    base_url=base_url,
+                    api_key=api_key,
+                )
+            except Exception:
+                account_snapshot = None
+            if account_snapshot:
+                account_lines = render_account_usage_lines(account_snapshot, markdown=True)
+
        if agent and hasattr(agent, "session_total_tokens") and agent.session_api_calls > 0:
            lines = []

@@ -7298,6 +7382,10 @@ class GatewayRunner:
            if ctx.compression_count:
                lines.append(f"Compressions: {ctx.compression_count}")

+            if account_lines:
+                lines.append("")
+                lines.extend(account_lines)
+
            return "\n".join(lines)

        # No agent at all -- check session history for a rough count
@@ -7307,23 +7395,26 @@ class GatewayRunner:
            from agent.model_metadata import estimate_messages_tokens_rough
            msgs = [m for m in history if m.get("role") in ("user", "assistant") and m.get("content")]
            approx = estimate_messages_tokens_rough(msgs)
-            return (
-                f"📊 **Session Info**\n"
-                f"Messages: {len(msgs)}\n"
-                f"Estimated context: ~{approx:,} tokens\n"
-                f"_(Detailed usage available after the first agent response)_"
-            )
+            lines = [
+                "📊 **Session Info**",
+                f"Messages: {len(msgs)}",
+                f"Estimated context: ~{approx:,} tokens",
+                "_(Detailed usage available after the first agent response)_",
+            ]
+            if account_lines:
+                lines.append("")
+                lines.extend(account_lines)
+            return "\n".join(lines)
+        if account_lines:
+            return "\n".join(account_lines)
        return "No usage data available for this session."

    async def _handle_insights_command(self, event: MessageEvent) -> str:
        """Handle /insights command -- show usage insights and analytics."""
-        import asyncio as _asyncio
-
        args = event.get_command_args().strip()

        # Normalize Unicode dashes (Telegram/iOS auto-converts -- to em/en dash)
-        import re as _re
-        args = _re.sub(r'[\u2012\u2013\u2014\u2015](days|source)', r'--\1', args)
+        args = re.sub(r'[\u2012\u2013\u2014\u2015](days|source)', r'--\1', args)

        days = 30
        source = None
@@ -7352,7 +7443,7 @@ class GatewayRunner:
            from hermes_state import SessionDB
            from agent.insights import InsightsEngine

-            loop = _asyncio.get_running_loop()
+            loop = asyncio.get_running_loop()

            def _run_insights():
                db = SessionDB()
@@ -7710,9 +7801,6 @@ class GatewayRunner:
        the messenger.  The user's next message is intercepted by
        ``_handle_message`` and written to ``.update_response``.
        """
-        import json
-        import re as _re
-
        pending_path = _hermes_home / ".update_pending.json"
        claimed_path = _hermes_home / ".update_pending.claimed.json"
        output_path = _hermes_home / ".update_output.txt"
@@ -7757,7 +7845,7 @@ class GatewayRunner:
            return

        def _strip_ansi(text: str) -> str:
-            return _re.sub(r'\x1b\[[0-9;]*[A-Za-z]', '', text)
+            return re.sub(r'\x1b\[[0-9;]*[A-Za-z]', '', text)

        bytes_sent = 0
        last_stream_time = loop.time()
@@ -7905,9 +7993,6 @@ class GatewayRunner:
        cannot resolve the adapter (e.g. after a gateway restart where the
        platform hasn't reconnected yet).
        """
-        import json
-        import re as _re
-
        pending_path = _hermes_home / ".update_pending.json"
        claimed_path = _hermes_home / ".update_pending.claimed.json"
        output_path = _hermes_home / ".update_output.txt"
@@ -7953,7 +8038,7 @@ class GatewayRunner:

            if adapter and chat_id:
                # Strip ANSI escape codes for clean display
-                output = _re.sub(r'\x1b\[[0-9;]*m', '', output).strip()
+                output = re.sub(r'\x1b\[[0-9;]*m', '', output).strip()
                if output:
                    if len(output) > 3500:
                        output = "…" + output[-3500:]
@@ -7986,14 +8071,12 @@ class GatewayRunner:

    async def _send_restart_notification(self) -> None:
        """Notify the chat that initiated /restart that the gateway is back."""
-        import json as _json
-
        notify_path = _hermes_home / ".restart_notify.json"
        if not notify_path.exists():
            return

        try:
-            data = _json.loads(notify_path.read_text())
+            data = json.loads(notify_path.read_text())
            platform_str = data.get("platform")
            chat_id = data.get("chat_id")
            thread_id = data.get("thread_id")
@@ -8079,7 +8162,6 @@ class GatewayRunner:
            The enriched message string with vision descriptions prepended.
        """
        from tools.vision_tools import vision_analyze_tool
-        import json as _json

        analysis_prompt = (
            "Describe everything visible in this image in thorough detail. "
@@ -8095,7 +8177,7 @@ class GatewayRunner:
                    image_url=path,
                    user_prompt=analysis_prompt,
                )
-                result = _json.loads(result_json)
+                result = json.loads(result_json)
                if result.get("success"):
                    description = result.get("analysis", "")
                    enriched_parts.append(
@@ -8154,7 +8236,6 @@ class GatewayRunner:
            return disabled_note

        from tools.transcription_tools import transcribe_audio
-        import asyncio

        enriched_parts = []
        for path in audio_paths:
@@ -8290,7 +8371,6 @@ class GatewayRunner:
        if not adapter:
            return
        try:
-            from gateway.platforms.base import MessageEvent, MessageType
            synth_event = MessageEvent(
                text=synth_text,
                message_type=MessageType.TEXT,
@@ -8395,7 +8475,6 @@ class GatewayRunner:
                            break
                    if adapter and source.chat_id:
                        try:
-                            from gateway.platforms.base import MessageEvent, MessageType
                            synth_event = MessageEvent(
                                text=synth_text,
                                message_type=MessageType.TEXT,
@@ -8917,7 +8996,6 @@ class GatewayRunner:
        if _streaming_enabled:
            try:
                from gateway.stream_consumer import GatewayStreamConsumer, StreamConsumerConfig
-                from gateway.config import Platform
                _adapter = self.adapters.get(source.platform)
                if _adapter:
                    _adapter_supports_edit = getattr(_adapter, "SUPPORTS_MESSAGE_EDITING", True)
@@ -9201,8 +9279,7 @@ class GatewayRunner:
                if args:
                    from agent.display import get_tool_preview_max_len
                    _pl = get_tool_preview_max_len()
-                    import json as _json
-                    args_str = _json.dumps(args, ensure_ascii=False, default=str)
+                    args_str = json.dumps(args, ensure_ascii=False, default=str)
                    # When tool_preview_length is 0 (default), don't truncate
                    # in verbose mode — the user explicitly asked for full
                    # detail.  Platform message-length limits handle the rest.
@@ -9268,8 +9345,7 @@ class GatewayRunner:
            # Skip tool progress for platforms that don't support message
            # editing (e.g. iMessage/BlueBubbles) — each progress update
            # would become a separate message bubble, which is noisy.
-            from gateway.platforms.base import BasePlatformAdapter as _BaseAdapter
-            if type(adapter).edit_message is _BaseAdapter.edit_message:
+            if type(adapter).edit_message is BasePlatformAdapter.edit_message:
                while not progress_queue.empty():
                    try:
                        progress_queue.get_nowait()
@@ -9651,6 +9727,11 @@ class GatewayRunner:
                    session_id=session_id,
                    platform=platform_key,
                    user_id=source.user_id,
+                    user_name=source.user_name,
+                    chat_id=source.chat_id,
+                    chat_name=source.chat_name,
+                    chat_type=source.chat_type,
+                    thread_id=source.thread_id,
                    gateway_session_key=session_key,
                    session_db=self._session_db,
                    fallback_model=self._fallback_model,
@@ -10717,7 +10798,6 @@ async def start_gateway(config: Optional[GatewayConfig] = None, replace: bool =
    # The PID file is scoped to HERMES_HOME, so future multi-profile
    # setups (each profile using a distinct HERMES_HOME) will naturally
    # allow concurrent instances without tripping this guard.
-    import time as _time
    from gateway.status import get_running_pid, remove_pid_file, terminate_pid
    existing_pid = get_running_pid()
    if existing_pid is not None and existing_pid != os.getpid():
@@ -10757,7 +10837,7 @@ async def start_gateway(config: Optional[GatewayConfig] = None, replace: bool =
            for _ in range(20):
                try:
                    os.kill(existing_pid, 0)
-                    _time.sleep(0.5)
+                    time.sleep(0.5)
                except (ProcessLookupError, PermissionError):
                    break  # Process is gone
            else:
@@ -10768,10 +10848,16 @@ async def start_gateway(config: Optional[GatewayConfig] = None, replace: bool =
                )
                try:
                    terminate_pid(existing_pid, force=True)
-                    _time.sleep(0.5)
+                    time.sleep(0.5)
                except (ProcessLookupError, PermissionError, OSError):
                    pass
            remove_pid_file()
+            # remove_pid_file() is a no-op when the PID doesn't match.
+            # Force-unlink to cover the old-process-crashed case.
+            try:
+                (get_hermes_home() / "gateway.pid").unlink(missing_ok=True)
+            except Exception:
+                pass
            # Clean up any takeover marker the old process didn't consume
            # (e.g. SIGKILL'd before its shutdown handler could read it).
            try:
@@ -10910,6 +10996,30 @@ async def start_gateway(config: Optional[GatewayConfig] = None, replace: bool =
    else:
        logger.info("Skipping signal handlers (not running in main thread).")
    
+    # Claim the PID file BEFORE bringing up any platform adapters.
+    # This closes the --replace race window: two concurrent `gateway run
+    # --replace` invocations both pass the termination-wait above, but
+    # only the winner of the O_CREAT|O_EXCL race below will ever open
+    # Telegram polling, Discord gateway sockets, etc. The loser exits
+    # cleanly before touching any external service.
+    import atexit
+    from gateway.status import write_pid_file, remove_pid_file, get_running_pid
+    _current_pid = get_running_pid()
+    if _current_pid is not None and _current_pid != os.getpid():
+        logger.error(
+            "Another gateway instance (PID %d) started during our startup. "
+            "Exiting to avoid double-running.", _current_pid
+        )
+        return False
+    try:
+        write_pid_file()
+    except FileExistsError:
+        logger.error(
+            "PID file race lost to another gateway instance. Exiting."
+        )
+        return False
+    atexit.register(remove_pid_file)
+
    # Start the gateway
    success = await runner.start()
    if not success:
@@ -10919,12 +11029,6 @@ async def start_gateway(config: Optional[GatewayConfig] = None, replace: bool =
            logger.error("Gateway exiting cleanly: %s", runner.exit_reason)
        return True
    
-    # Write PID file so CLI can detect gateway is running
-    import atexit
-    from gateway.status import write_pid_file, remove_pid_file
-    write_pid_file()
-    atexit.register(remove_pid_file)
-    
    # Start background cron ticker so scheduled jobs fire automatically.
    # Pass the event loop so cron delivery can use live adapters (E2EE support).
    cron_stop = threading.Event()
@@ -152,6 +152,7 @@ class SessionContext:
    source: SessionSource
    connected_platforms: List[Platform]
    home_channels: Dict[Platform, HomeChannel]
+    shared_multi_user_session: bool = False
    
    # Session metadata
    session_key: str = ""
@@ -166,6 +167,7 @@ class SessionContext:
            "home_channels": {
                p.value: hc.to_dict() for p, hc in self.home_channels.items()
            },
+            "shared_multi_user_session": self.shared_multi_user_session,
            "session_key": self.session_key,
            "session_id": self.session_id,
            "created_at": self.created_at.isoformat() if self.created_at else None,
@@ -240,18 +242,16 @@ def build_session_context_prompt(
        lines.append(f"**Channel Topic:** {context.source.chat_topic}")

    # User identity.
-    # In shared thread sessions (non-DM with thread_id), multiple users
-    # contribute to the same conversation.  Don't pin a single user name
-    # in the system prompt — it changes per-turn and would bust the prompt
-    # cache.  Instead, note that this is a multi-user thread; individual
-    # sender names are prefixed on each user message by the gateway.
-    _is_shared_thread = (
-        context.source.chat_type != "dm"
-        and context.source.thread_id
-    )
-    if _is_shared_thread:
+    # In shared multi-user sessions (shared threads OR shared non-thread groups
+    # when group_sessions_per_user=False), multiple users contribute to the same
+    # conversation.  Don't pin a single user name in the system prompt — it
+    # changes per-turn and would bust the prompt cache.  Instead, note that
+    # this is a multi-user session; individual sender names are prefixed on
+    # each user message by the gateway.
+    if context.shared_multi_user_session:
+        session_label = "Multi-user thread" if context.source.thread_id else "Multi-user session"
        lines.append(
-            "**Session type:** Multi-user thread — messages are prefixed "
+            f"**Session type:** {session_label} — messages are prefixed "
            "with [sender name]. Multiple users may participate."
        )
    elif context.source.user_name:
@@ -467,6 +467,27 @@ class SessionEntry:
        )


+def is_shared_multi_user_session(
+    source: SessionSource,
+    *,
+    group_sessions_per_user: bool = True,
+    thread_sessions_per_user: bool = False,
+) -> bool:
+    """Return True when a non-DM session is shared across participants.
+
+    Mirrors the isolation rules in :func:`build_session_key`:
+      - DMs are never shared.
+      - Threads are shared unless ``thread_sessions_per_user`` is True.
+      - Non-thread group/channel sessions are shared unless
+        ``group_sessions_per_user`` is True (default: True = isolated).
+    """
+    if source.chat_type == "dm":
+        return False
+    if source.thread_id:
+        return not thread_sessions_per_user
+    return not group_sessions_per_user
+
+
 def build_session_key(
    source: SessionSource,
    group_sessions_per_user: bool = True,
@@ -1126,6 +1147,10 @@ class SessionStore:
                    tool_name=message.get("tool_name"),
                    tool_calls=message.get("tool_calls"),
                    tool_call_id=message.get("tool_call_id"),
+                    reasoning=message.get("reasoning") if message.get("role") == "assistant" else None,
+                    reasoning_content=message.get("reasoning_content") if message.get("role") == "assistant" else None,
+                    reasoning_details=message.get("reasoning_details") if message.get("role") == "assistant" else None,
+                    codex_reasoning_items=message.get("codex_reasoning_items") if message.get("role") == "assistant" else None,
                )
            except Exception as e:
                logger.debug("Session DB operation failed: %s", e)
@@ -1155,6 +1180,7 @@ class SessionStore:
                        tool_calls=msg.get("tool_calls"),
                        tool_call_id=msg.get("tool_call_id"),
                        reasoning=msg.get("reasoning") if role == "assistant" else None,
+                        reasoning_content=msg.get("reasoning_content") if role == "assistant" else None,
                        reasoning_details=msg.get("reasoning_details") if role == "assistant" else None,
                        codex_reasoning_items=msg.get("codex_reasoning_items") if role == "assistant" else None,
                    )
@@ -1238,6 +1264,11 @@ def build_session_context(
        source=source,
        connected_platforms=connected,
        home_channels=home_channels,
+        shared_multi_user_session=is_shared_multi_user_session(
+            source,
+            group_sessions_per_user=getattr(config, "group_sessions_per_user", True),
+            thread_sessions_per_user=getattr(config, "thread_sessions_per_user", False),
+        ),
    )
    
    if session_entry:
@@ -56,6 +56,12 @@ _SESSION_USER_ID: ContextVar = ContextVar("HERMES_SESSION_USER_ID", default=_UNS
 _SESSION_USER_NAME: ContextVar = ContextVar("HERMES_SESSION_USER_NAME", default=_UNSET)
 _SESSION_KEY: ContextVar = ContextVar("HERMES_SESSION_KEY", default=_UNSET)

+# Cron auto-delivery vars — set per-job in run_job() so concurrent jobs
+# don't clobber each other's delivery targets.
+_CRON_AUTO_DELIVER_PLATFORM: ContextVar = ContextVar("HERMES_CRON_AUTO_DELIVER_PLATFORM", default=_UNSET)
+_CRON_AUTO_DELIVER_CHAT_ID: ContextVar = ContextVar("HERMES_CRON_AUTO_DELIVER_CHAT_ID", default=_UNSET)
+_CRON_AUTO_DELIVER_THREAD_ID: ContextVar = ContextVar("HERMES_CRON_AUTO_DELIVER_THREAD_ID", default=_UNSET)
+
 _VAR_MAP = {
    "HERMES_SESSION_PLATFORM": _SESSION_PLATFORM,
    "HERMES_SESSION_CHAT_ID": _SESSION_CHAT_ID,
@@ -64,6 +70,9 @@ _VAR_MAP = {
    "HERMES_SESSION_USER_ID": _SESSION_USER_ID,
    "HERMES_SESSION_USER_NAME": _SESSION_USER_NAME,
    "HERMES_SESSION_KEY": _SESSION_KEY,
+    "HERMES_CRON_AUTO_DELIVER_PLATFORM": _CRON_AUTO_DELIVER_PLATFORM,
+    "HERMES_CRON_AUTO_DELIVER_CHAT_ID": _CRON_AUTO_DELIVER_CHAT_ID,
+    "HERMES_CRON_AUTO_DELIVER_THREAD_ID": _CRON_AUTO_DELIVER_THREAD_ID,
 }


@@ -225,8 +225,28 @@ def _cleanup_invalid_pid_path(pid_path: Path, *, cleanup_stale: bool) -> None:


 def write_pid_file() -> None:
-    """Write the current process PID and metadata to the gateway PID file."""
-    _write_json_file(_get_pid_path(), _build_pid_record())
+    """Write the current process PID and metadata to the gateway PID file.
+
+    Uses atomic O_CREAT | O_EXCL creation so that concurrent --replace
+    invocations race: exactly one process wins and the rest get
+    FileExistsError.
+    """
+    path = _get_pid_path()
+    path.parent.mkdir(parents=True, exist_ok=True)
+    record = json.dumps(_build_pid_record())
+    try:
+        fd = os.open(path, os.O_CREAT | os.O_EXCL | os.O_WRONLY)
+    except FileExistsError:
+        raise  # Let caller decide: another gateway is racing us
+    try:
+        with os.fdopen(fd, "w", encoding="utf-8") as f:
+            f.write(record)
+    except Exception:
+        try:
+            path.unlink(missing_ok=True)
+        except OSError:
+            pass
+        raise


 def write_runtime_status(
@@ -39,6 +39,13 @@ import httpx
 import yaml

 from hermes_cli.config import get_hermes_home, get_config_path, read_raw_config
+from hermes_cli.volcengine_byteplus import (
+    VOLCENGINE_PROVIDER,
+    BYTEPLUS_PROVIDER,
+    VOLCENGINE_STANDARD_BASE_URL,
+    BYTEPLUS_STANDARD_BASE_URL,
+    base_url_for_provider_model,
+)
 from hermes_constants import OPENROUTER_BASE_URL

 logger = logging.getLogger(__name__)
@@ -72,6 +79,8 @@ DEFAULT_QWEN_BASE_URL = "https://portal.qwen.ai/v1"
 DEFAULT_GITHUB_MODELS_BASE_URL = "https://api.githubcopilot.com"
 DEFAULT_COPILOT_ACP_BASE_URL = "acp://copilot"
 DEFAULT_OLLAMA_CLOUD_BASE_URL = "https://ollama.com/v1"
+STEPFUN_STEP_PLAN_INTL_BASE_URL = "https://api.stepfun.ai/step_plan/v1"
+STEPFUN_STEP_PLAN_CN_BASE_URL = "https://api.stepfun.com/step_plan/v1"
 CODEX_OAUTH_CLIENT_ID = "app_EMoamEEZ73f0CkXaXp7hrann"
 CODEX_OAUTH_TOKEN_URL = "https://auth.openai.com/oauth/token"
 CODEX_ACCESS_TOKEN_REFRESH_SKEW_SECONDS = 120
@@ -168,8 +177,11 @@ PROVIDER_REGISTRY: Dict[str, ProviderConfig] = {
        id="kimi-coding",
        name="Kimi / Moonshot",
        auth_type="api_key",
+        # Legacy platform.moonshot.ai keys use this endpoint (OpenAI-compat).
+        # sk-kimi- (Kimi Code) keys are auto-redirected to api.kimi.com/coding
+        # by _resolve_kimi_base_url() below.
        inference_base_url="https://api.moonshot.ai/v1",
-        api_key_env_vars=("KIMI_API_KEY",),
+        api_key_env_vars=("KIMI_API_KEY", "KIMI_CODING_API_KEY"),
        base_url_env_var="KIMI_BASE_URL",
    ),
    "kimi-coding-cn": ProviderConfig(
@@ -179,6 +191,14 @@ PROVIDER_REGISTRY: Dict[str, ProviderConfig] = {
        inference_base_url="https://api.moonshot.cn/v1",
        api_key_env_vars=("KIMI_CN_API_KEY",),
    ),
+    "stepfun": ProviderConfig(
+        id="stepfun",
+        name="StepFun Step Plan",
+        auth_type="api_key",
+        inference_base_url=STEPFUN_STEP_PLAN_INTL_BASE_URL,
+        api_key_env_vars=("STEPFUN_API_KEY",),
+        base_url_env_var="STEPFUN_BASE_URL",
+    ),
    "arcee": ProviderConfig(
        id="arcee",
        name="Arcee AI",
@@ -294,6 +314,20 @@ PROVIDER_REGISTRY: Dict[str, ProviderConfig] = {
        api_key_env_vars=("XIAOMI_API_KEY",),
        base_url_env_var="XIAOMI_BASE_URL",
    ),
+    "volcengine": ProviderConfig(
+        id=VOLCENGINE_PROVIDER,
+        name="Volcengine",
+        auth_type="api_key",
+        inference_base_url=VOLCENGINE_STANDARD_BASE_URL,
+        api_key_env_vars=("VOLCENGINE_API_KEY",),
+    ),
+    "byteplus": ProviderConfig(
+        id=BYTEPLUS_PROVIDER,
+        name="BytePlus",
+        auth_type="api_key",
+        inference_base_url=BYTEPLUS_STANDARD_BASE_URL,
+        api_key_env_vars=("BYTEPLUS_API_KEY",),
+    ),
    "ollama-cloud": ProviderConfig(
        id="ollama-cloud",
        name="Ollama Cloud",
@@ -340,10 +374,16 @@ def get_anthropic_key() -> str:
 # =============================================================================

 # Kimi Code (kimi.com/code) issues keys prefixed "sk-kimi-" that only work
-# on api.kimi.com/coding/v1.  Legacy keys from platform.moonshot.ai work on
-# api.moonshot.ai/v1 (the default).  Auto-detect when user hasn't set
+# on api.kimi.com/coding.  Legacy keys from platform.moonshot.ai work on
+# api.moonshot.ai/v1 (the old default).  Auto-detect when user hasn't set
 # KIMI_BASE_URL explicitly.
-KIMI_CODE_BASE_URL = "https://api.kimi.com/coding/v1"
+#
+# Note: the base URL intentionally has NO /v1 suffix.  The /coding endpoint
+# speaks the Anthropic Messages protocol, and the anthropic SDK appends
+# "/v1/messages" internally — so "/coding" + SDK suffix → "/coding/v1/messages"
+# (the correct target). Using "/coding/v1" here would produce
+# "/coding/v1/v1/messages" (a 404).
+KIMI_CODE_BASE_URL = "https://api.kimi.com/coding"


 def _resolve_kimi_base_url(api_key: str, default_url: str, env_override: str) -> str:
@@ -983,6 +1023,7 @@ def resolve_provider(
        "x-ai": "xai", "x.ai": "xai", "grok": "xai",
        "kimi": "kimi-coding", "kimi-for-coding": "kimi-coding", "moonshot": "kimi-coding",
        "kimi-cn": "kimi-coding-cn", "moonshot-cn": "kimi-coding-cn",
+        "step": "stepfun", "stepfun-coding-plan": "stepfun",
        "arcee-ai": "arcee", "arceeai": "arcee",
        "minimax-china": "minimax-cn", "minimax_cn": "minimax-cn",
        "claude": "anthropic", "claude-code": "anthropic",
@@ -995,6 +1036,10 @@ def resolve_provider(
        "hf": "huggingface", "hugging-face": "huggingface", "huggingface-hub": "huggingface",
        "mimo": "xiaomi", "xiaomi-mimo": "xiaomi",
        "aws": "bedrock", "aws-bedrock": "bedrock", "amazon-bedrock": "bedrock", "amazon": "bedrock",
+        "volcengine-coding-plan": "volcengine",
+        "volcengine_coding_plan": "volcengine",
+        "byteplus-coding-plan": "byteplus",
+        "byteplus_coding_plan": "byteplus",
        "go": "opencode-go", "opencode-go-sub": "opencode-go",
        "kilo": "kilocode", "kilo-code": "kilocode", "kilo-gateway": "kilocode",
        # Local server aliases — route through the generic custom provider
@@ -1137,6 +1182,21 @@ def _qwen_cli_auth_path() -> Path:
    return Path.home() / ".qwen" / "oauth_creds.json"


+def _current_model_for_provider(provider_id: str) -> str:
+    """Return the currently configured model when it belongs to the provider."""
+    try:
+        config = read_raw_config()
+    except Exception:
+        return ""
+
+    model_cfg = config.get("model")
+    if isinstance(model_cfg, dict):
+        configured_provider = str(model_cfg.get("provider") or "").strip().lower()
+        if configured_provider == provider_id:
+            return str(model_cfg.get("default") or model_cfg.get("model") or "").strip()
+    return ""
+
+
 def _read_qwen_cli_tokens() -> Dict[str, Any]:
    auth_path = _qwen_cli_auth_path()
    if not auth_path.exists():
@@ -2535,7 +2595,11 @@ def get_api_key_provider_status(provider_id: str) -> Dict[str, Any]:
    if pconfig.base_url_env_var:
        env_url = os.getenv(pconfig.base_url_env_var, "").strip()

-    if provider_id in ("kimi-coding", "kimi-coding-cn"):
+    active_model = _current_model_for_provider(provider_id)
+
+    if provider_id in {VOLCENGINE_PROVIDER, BYTEPLUS_PROVIDER}:
+        base_url = base_url_for_provider_model(provider_id, active_model) or pconfig.inference_base_url
+    elif provider_id in ("kimi-coding", "kimi-coding-cn"):
        base_url = _resolve_kimi_base_url(api_key, pconfig.inference_base_url, env_url)
    elif env_url:
        base_url = env_url
@@ -2630,7 +2694,11 @@ def resolve_api_key_provider_credentials(provider_id: str) -> Dict[str, Any]:
    if pconfig.base_url_env_var:
        env_url = os.getenv(pconfig.base_url_env_var, "").strip()

-    if provider_id in ("kimi-coding", "kimi-coding-cn"):
+    active_model = _current_model_for_provider(provider_id)
+
+    if provider_id in {VOLCENGINE_PROVIDER, BYTEPLUS_PROVIDER}:
+        base_url = base_url_for_provider_model(provider_id, active_model) or pconfig.inference_base_url
+    elif provider_id in ("kimi-coding", "kimi-coding-cn"):
        base_url = _resolve_kimi_base_url(api_key, pconfig.inference_base_url, env_url)
    elif provider_id == "zai":
        base_url = _resolve_zai_base_url(api_key, pconfig.inference_base_url, env_url)
@@ -3375,7 +3443,7 @@ def _login_nous(args, pconfig: ProviderConfig) -> None:
                )

            from hermes_cli.models import (
-                _PROVIDER_MODELS, get_pricing_for_provider, filter_nous_free_models,
+                _PROVIDER_MODELS, get_pricing_for_provider,
                check_nous_free_tier, partition_nous_models_by_tier,
            )
            model_ids = _PROVIDER_MODELS.get("nous", [])
@@ -3384,7 +3452,6 @@ def _login_nous(args, pconfig: ProviderConfig) -> None:
            unavailable_models: list = []
            if model_ids:
                pricing = get_pricing_for_provider("nous")
-                model_ids = filter_nous_free_models(model_ids, pricing)
                free_tier = check_nous_free_tier()
                if free_tier:
                    model_ids, unavailable_models = partition_nous_models_by_tier(
@@ -152,6 +152,23 @@ def auth_add_command(args) -> None:

    pool = load_pool(provider)

+    # Clear ALL suppressions for this provider — re-adding a credential is
+    # a strong signal the user wants auth re-enabled.  This covers env:*
+    # (shell-exported vars), gh_cli (copilot), claude_code, qwen-cli,
+    # device_code (codex), etc.  One consistent re-engagement pattern.
+    # Matches the Codex device_code re-link pattern that predates this.
+    if not provider.startswith(CUSTOM_POOL_PREFIX):
+        try:
+            from hermes_cli.auth import (
+                _load_auth_store,
+                unsuppress_credential_source,
+            )
+            suppressed = _load_auth_store().get("suppressed_sources", {})
+            for src in list(suppressed.get(provider, []) or []):
+                unsuppress_credential_source(provider, src)
+        except Exception:
+            pass
+
    if requested_type == AUTH_TYPE_API_KEY:
        token = (getattr(args, "api_key", None) or "").strip()
        if not token:
@@ -338,71 +355,28 @@ def auth_remove_command(args) -> None:
        raise SystemExit(f'No credential matching "{target}" for provider {provider}.')
    print(f"Removed {provider} credential #{index} ({removed.label})")

-    # If this was an env-seeded credential, also clear the env var from .env
-    # so it doesn't get re-seeded on the next load_pool() call.
-    if removed.source.startswith("env:"):
-        env_var = removed.source[len("env:"):]
-        if env_var:
-            from hermes_cli.config import remove_env_value
-            cleared = remove_env_value(env_var)
-            if cleared:
-                print(f"Cleared {env_var} from .env")
+    # Unified removal dispatch.  Every credential source Hermes reads from
+    # (env vars, external OAuth files, auth.json blocks, custom config)
+    # has a RemovalStep registered in agent.credential_sources.  The step
+    # handles its source-specific cleanup and we centralise suppression +
+    # user-facing output here so every source behaves identically from
+    # the user's perspective.
+    from agent.credential_sources import find_removal_step
+    from hermes_cli.auth import suppress_credential_source

-    # If this was a singleton-seeded credential (OAuth device_code, hermes_pkce),
-    # clear the underlying auth store / credential file so it doesn't get
-    # re-seeded on the next load_pool() call.
-    elif provider == "openai-codex" and (
-        removed.source == "device_code" or removed.source.endswith(":device_code")
-    ):
-        # Codex tokens live in TWO places: the Hermes auth store and
-        # ~/.codex/auth.json (the Codex CLI shared file).  On every refresh,
-        # refresh_codex_oauth_pure() writes to both.  So clearing only the
-        # Hermes auth store is not enough — _seed_from_singletons() will
-        # auto-import from ~/.codex/auth.json on the next load_pool() and
-        # the removal is instantly undone.  Mark the source as suppressed
-        # so auto-import is skipped; leave ~/.codex/auth.json untouched so
-        # the Codex CLI itself keeps working.
-        from hermes_cli.auth import (
-            _load_auth_store, _save_auth_store, _auth_store_lock,
-            suppress_credential_source,
-        )
-        with _auth_store_lock():
-            auth_store = _load_auth_store()
-            providers_dict = auth_store.get("providers")
-            if isinstance(providers_dict, dict) and provider in providers_dict:
-                del providers_dict[provider]
-                _save_auth_store(auth_store)
-                print(f"Cleared {provider} OAuth tokens from auth store")
-        suppress_credential_source(provider, "device_code")
-        print("Suppressed openai-codex device_code source — it will not be re-seeded.")
-        print("Note: Codex CLI credentials still live in ~/.codex/auth.json")
-        print("Run `hermes auth add openai-codex` to re-enable if needed.")
+    step = find_removal_step(provider, removed.source)
+    if step is None:
+        # Unregistered source — e.g. "manual", which has nothing external
+        # to clean up.  The pool entry is already gone; we're done.
+        return

-    elif removed.source == "device_code" and provider == "nous":
-        from hermes_cli.auth import (
-            _load_auth_store, _save_auth_store, _auth_store_lock,
-        )
-        with _auth_store_lock():
-            auth_store = _load_auth_store()
-            providers_dict = auth_store.get("providers")
-            if isinstance(providers_dict, dict) and provider in providers_dict:
-                del providers_dict[provider]
-                _save_auth_store(auth_store)
-                print(f"Cleared {provider} OAuth tokens from auth store")
-
-    elif removed.source == "hermes_pkce" and provider == "anthropic":
-        from hermes_constants import get_hermes_home
-        oauth_file = get_hermes_home() / ".anthropic_oauth.json"
-        if oauth_file.exists():
-            oauth_file.unlink()
-            print("Cleared Hermes Anthropic OAuth credentials")
-
-    elif removed.source == "claude_code" and provider == "anthropic":
-        from hermes_cli.auth import suppress_credential_source
-        suppress_credential_source(provider, "claude_code")
-        print("Suppressed claude_code credential — it will not be re-seeded.")
-        print("Note: Claude Code credentials still live in ~/.claude/.credentials.json")
-        print("Run `hermes auth add anthropic` to re-enable if needed.")
+    result = step.remove_fn(provider, removed)
+    for line in result.cleaned:
+        print(line)
+    if result.suppress:
+        suppress_credential_source(provider, removed.source)
+    for line in result.hints:
+        print(line)


 def auth_reset_command(args) -> None:
@@ -924,12 +924,22 @@ class SlashCommandCompleter(Completer):
                    display_meta=meta,
                )

-        # If the user typed @file: or @folder:, delegate to path completions
+        # If the user typed @file: / @folder: (or just @file / @folder with
+        # no colon yet), delegate to path completions.  Accepting the bare
+        # form lets the picker surface directories as soon as the user has
+        # typed `@folder`, without requiring them to first accept the static
+        # `@folder:` hint and re-trigger completion.
        for prefix in ("@file:", "@folder:"):
-            if word.startswith(prefix):
-                path_part = word[len(prefix):] or "."
+            bare = prefix[:-1]
+
+            if word == bare or word.startswith(prefix):
+                want_dir = prefix == "@folder:"
+                path_part = '' if word == bare else word[len(prefix):]
                expanded = os.path.expanduser(path_part)
-                if expanded.endswith("/"):
+
+                if not expanded or expanded == ".":
+                    search_dir, match_prefix = ".", ""
+                elif expanded.endswith("/"):
                    search_dir, match_prefix = expanded, ""
                else:
                    search_dir = os.path.dirname(expanded) or "."
@@ -945,15 +955,21 @@ class SlashCommandCompleter(Completer):
                for entry in sorted(entries):
                    if match_prefix and not entry.lower().startswith(prefix_lower):
                        continue
-                    if count >= limit:
-                        break
                    full_path = os.path.join(search_dir, entry)
                    is_dir = os.path.isdir(full_path)
+                    # `@folder:` must only surface directories; `@file:` only
+                    # regular files.  Without this filter `@folder:` listed
+                    # every .env / .gitignore in the cwd, defeating the
+                    # explicit prefix and confusing users expecting a
+                    # directory picker.
+                    if want_dir != is_dir:
+                        continue
+                    if count >= limit:
+                        break
                    display_path = os.path.relpath(full_path)
                    suffix = "/" if is_dir else ""
-                    kind = "folder" if is_dir else "file"
                    meta = "dir" if is_dir else _file_size_label(full_path)
-                    completion = f"@{kind}:{display_path}{suffix}"
+                    completion = f"{prefix}{display_path}{suffix}"
                    yield Completion(
                        completion,
                        start_position=-len(word),
@@ -387,6 +387,26 @@ DEFAULT_CONFIG = {
        # (terminal and execute_code).  Skill-declared required_environment_variables
        # are passed through automatically; this list is for non-skill use cases.
        "env_passthrough": [],
+        # Extra files to source in the login shell when building the
+        # per-session environment snapshot.  Use this when tools like nvm,
+        # pyenv, asdf, or custom PATH entries are registered by files that
+        # a bash login shell would skip — most commonly ``~/.bashrc``
+        # (bash doesn't source bashrc in non-interactive login mode) or
+        # zsh-specific files like ``~/.zshrc`` / ``~/.zprofile``.
+        # Paths support ``~`` / ``${VAR}``. Missing files are silently
+        # skipped. When empty, Hermes auto-appends ``~/.bashrc`` if the
+        # snapshot shell is bash (this is the ``auto_source_bashrc``
+        # behaviour — disable with that key if you want strict login-only
+        # semantics).
+        "shell_init_files": [],
+        # When true (default), Hermes sources ``~/.bashrc`` in the login
+        # shell used to build the environment snapshot.  This captures
+        # PATH additions, shell functions, and aliases defined in the
+        # user's bashrc — which a plain ``bash -l -c`` would otherwise
+        # miss because bash skips bashrc in non-interactive login mode.
+        # Turn this off if you have a bashrc that misbehaves when sourced
+        # non-interactively (e.g. one that hard-exits on TTY checks).
+        "auto_source_bashrc": True,
        "docker_image": "nikolaik/python-nodejs:python3.11-nodejs20",
        "docker_forward_env": [],
        # Explicit environment variables to set inside Docker containers.
@@ -593,6 +613,10 @@ DEFAULT_CONFIG = {
    },
    
    # Text-to-speech configuration
+    # Each provider supports an optional `max_text_length:` override for the
+    # per-request input-character cap. Omit it to use the provider's documented
+    # limit (OpenAI 4096, xAI 15000, MiniMax 10000, ElevenLabs 5k-40k model-aware,
+    # Gemini 5000, Edge 5000, Mistral 4000, NeuTTS/KittenTTS 2000).
    "tts": {
        "provider": "edge",  # "edge" (free) | "elevenlabs" (premium) | "openai" | "xai" | "minimax" | "mistral" | "neutts" (local)
        "edge": {
@@ -645,6 +669,7 @@ DEFAULT_CONFIG = {
        "record_key": "ctrl+b",
        "max_recording_seconds": 120,
        "auto_tts": False,
+        "beep_enabled": True,         # Play record start/stop beeps in CLI voice mode
        "silence_threshold": 200,     # RMS below this = silence (0-32767)
        "silence_duration": 3.0,      # Seconds of silence before auto-stop
    },
@@ -691,6 +716,12 @@ DEFAULT_CONFIG = {
                               # independent of the parent's max_iterations)
        "reasoning_effort": "",  # reasoning effort for subagents: "xhigh", "high", "medium",
                                 # "low", "minimal", "none" (empty = inherit parent's level)
+        "max_concurrent_children": 3,  # max parallel children per batch; floor of 1 enforced, no ceiling
+        # Orchestrator role controls (see tools/delegate_tool.py:_get_max_spawn_depth
+        # and _get_orchestrator_enabled).  Values are clamped to [1, 3] with a
+        # warning log if out of range.
+        "max_spawn_depth": 1,        # depth cap (1 = flat [default], 2 = orchestrator→leaf, 3 = three-level)
+        "orchestrator_enabled": True,  # kill switch for role="orchestrator"
    },

    # Ephemeral prefill messages file — JSON list of {role, content} dicts
@@ -703,6 +734,20 @@ DEFAULT_CONFIG = {
    # always goes to ~/.hermes/skills/.
    "skills": {
        "external_dirs": [],   # e.g. ["~/.agents/skills", "/shared/team-skills"]
+        # Substitute ${HERMES_SKILL_DIR} and ${HERMES_SESSION_ID} in SKILL.md
+        # content with the absolute skill directory and the active session id
+        # before the agent sees it.  Lets skill authors reference bundled
+        # scripts without the agent having to join paths.
+        "template_vars": True,
+        # Pre-execute inline shell snippets written as !`cmd` in SKILL.md
+        # body.  Their stdout is inlined into the skill message before the
+        # agent reads it, so skills can inject dynamic context (dates, git
+        # state, detected tool versions, …).  Off by default because any
+        # content from the skill author runs on the host without approval;
+        # only enable for skill sources you trust.
+        "inline_shell": False,
+        # Timeout (seconds) for each !`cmd` snippet when inline_shell is on.
+        "inline_shell_timeout": 10,
    },

    # Honcho AI-native memory -- reads ~/.honcho/config.json as single source of truth.
@@ -773,6 +818,21 @@ DEFAULT_CONFIG = {
    "command_allowlist": [],
    # User-defined quick commands that bypass the agent loop (type: exec only)
    "quick_commands": {},
+
+    # Shell-script hooks — declarative bridge that invokes shell scripts
+    # on plugin-hook events (pre_tool_call, post_tool_call, pre_llm_call,
+    # subagent_stop, etc.).  Each entry maps an event name to a list of
+    # {matcher, command, timeout} dicts.  First registration of a new
+    # command prompts the user for consent; subsequent runs reuse the
+    # stored approval from ~/.hermes/shell-hooks-allowlist.json.
+    # See `website/docs/user-guide/features/hooks.md` for schema + examples.
+    "hooks": {},
+
+    # Auto-accept shell-hook registrations without a TTY prompt.  Also
+    # toggleable per-invocation via --accept-hooks or HERMES_ACCEPT_HOOKS=1.
+    # Gateway / cron / non-interactive runs need this (or one of the other
+    # channels) to pick up newly-added hooks.
+    "hooks_auto_accept": False,
    # Custom personalities — add your own entries here
    # Supports string format: {"name": "system prompt"}
    # Or dict format: {"name": {"description": "...", "system_prompt": "...", "tone": "...", "style": "..."}}
@@ -796,6 +856,11 @@ DEFAULT_CONFIG = {
        # Wrap delivered cron responses with a header (task name) and footer
        # ("The agent cannot see this message").  Set to false for clean output.
        "wrap_response": True,
+        # Maximum number of due jobs to run in parallel per tick.
+        # null/0 = unbounded (limited only by thread count).
+        # 1 = serial (pre-v0.9 behaviour).
+        # Also overridable via HERMES_CRON_MAX_PARALLEL env var.
+        "max_parallel_jobs": None,
    },

    # execute_code settings — controls the tool used for programmatic tool calls.
@@ -828,8 +893,36 @@ DEFAULT_CONFIG = {
        "force_ipv4": False,
    },

+    # Session storage — controls automatic cleanup of ~/.hermes/state.db.
+    # state.db accumulates every session, message, tool call, and FTS5 index
+    # entry forever.  Without auto-pruning, a heavy user (gateway + cron)
+    # reports 384MB+ databases with 68K+ messages, which slows down FTS5
+    # inserts, /resume listing, and insights queries.
+    "sessions": {
+        # When true, prune ended sessions older than retention_days once
+        # per (roughly) min_interval_hours at CLI/gateway/cron startup.
+        # Only touches ended sessions — active sessions are always preserved.
+        # Default false: session history is valuable for search recall, and
+        # silently deleting it could surprise users.  Opt in explicitly.
+        "auto_prune": False,
+        # How many days of ended-session history to keep.  Matches the
+        # default of ``hermes sessions prune``.
+        "retention_days": 90,
+        # VACUUM after a prune that actually deleted rows.  SQLite does not
+        # reclaim disk space on DELETE — freed pages are just reused on
+        # subsequent INSERTs — so without VACUUM the file stays bloated
+        # even after pruning.  VACUUM blocks writes for a few seconds per
+        # 100MB, so it only runs at startup, and only when prune deleted
+        # ≥1 session.
+        "vacuum_after_prune": True,
+        # Minimum hours between auto-maintenance runs (avoids repeating
+        # the sweep on every CLI invocation).  Tracked via state_meta in
+        # state.db itself, so it's shared across all processes.
+        "min_interval_hours": 24,
+    },
+
    # Config schema version - bump this when adding new required fields
-    "_config_version": 21,
+    "_config_version": 22,
 }

 # =============================================================================
@@ -985,6 +1078,22 @@ OPTIONAL_ENV_VARS = {
        "category": "provider",
        "advanced": True,
    },
+    "STEPFUN_API_KEY": {
+        "description": "StepFun Step Plan API key",
+        "prompt": "StepFun Step Plan API key",
+        "url": "https://platform.stepfun.com/",
+        "password": True,
+        "category": "provider",
+        "advanced": True,
+    },
+    "STEPFUN_BASE_URL": {
+        "description": "StepFun Step Plan base URL override",
+        "prompt": "StepFun Step Plan base URL (leave empty for default)",
+        "url": None,
+        "password": False,
+        "category": "provider",
+        "advanced": True,
+    },
    "ARCEEAI_API_KEY": {
        "description": "Arcee AI API key",
        "prompt": "Arcee AI API key",
@@ -1172,6 +1281,20 @@ OPTIONAL_ENV_VARS = {
        "category": "provider",
        "advanced": True,
    },
+    "VOLCENGINE_API_KEY": {
+        "description": "Volcengine API key for Doubao / Seed models (standard + Coding Plan catalogs)",
+        "prompt": "Volcengine API Key",
+        "url": "https://www.volcengine.com/product/ark",
+        "password": True,
+        "category": "provider",
+    },
+    "BYTEPLUS_API_KEY": {
+        "description": "BytePlus API key for Seed / Dola models (standard + Coding Plan catalogs)",
+        "prompt": "BytePlus API Key",
+        "url": "https://www.byteplus.com/en/product/modelark",
+        "password": True,
+        "category": "provider",
+    },
    "AWS_REGION": {
        "description": "AWS region for Bedrock API calls (e.g. us-east-1, eu-central-1)",
        "prompt": "AWS Region",
@@ -2037,6 +2160,7 @@ _KNOWN_ROOT_KEYS = {
    "fallback_providers", "credential_pool_strategies", "toolsets",
    "agent", "terminal", "display", "compression", "delegation",
    "auxiliary", "custom_providers", "context", "memory", "gateway",
+    "sessions",
 }

 # Valid fields inside a custom_providers list entry
@@ -2194,7 +2318,6 @@ def print_config_warnings(config: Optional[Dict[str, Any]] = None) -> None:
    if not issues:
        return

-    import sys
    lines = ["\033[33m⚠ Config issues detected in config.yaml:\033[0m"]
    for ci in issues:
        marker = "\033[31m✗\033[0m" if ci.severity == "error" else "\033[33m⚠\033[0m"
@@ -2209,7 +2332,6 @@ def warn_deprecated_cwd_env_vars(config: Optional[Dict[str, Any]] = None) -> Non
    These env vars are deprecated — the canonical setting is terminal.cwd
    in config.yaml.  Prints a migration hint to stderr.
    """
-    import os, sys
    messaging_cwd = os.environ.get("MESSAGING_CWD")
    terminal_cwd_env = os.environ.get("TERMINAL_CWD")

@@ -2552,8 +2674,7 @@ def migrate_config(interactive: bool = True, quiet: bool = False) -> Dict[str, A
            # Scan ``$HERMES_HOME/plugins/`` for currently installed user plugins.
            grandfathered: List[str] = []
            try:
-                from hermes_constants import get_hermes_home as _ghome
-                user_plugins_dir = _ghome() / "plugins"
+                user_plugins_dir = get_hermes_home() / "plugins"
                if user_plugins_dir.is_dir():
                    for child in sorted(user_plugins_dir.iterdir()):
                        if not child.is_dir():
@@ -3218,7 +3339,6 @@ def _check_non_ascii_credential(key: str, value: str) -> str:
            bad_chars.append(f"  position {i}: {ch!r} (U+{ord(ch):04X})")
    sanitized = value.encode("ascii", errors="ignore").decode("ascii")

-    import sys
    print(
        f"\n  Warning: {key} contains non-ASCII characters that will break API requests.\n"
        f"  This usually happens when copy-pasting from a PDF, rich-text editor,\n"
@@ -30,6 +30,7 @@ load_dotenv(PROJECT_ROOT / ".env", override=False, encoding="utf-8")

 from hermes_cli.colors import Colors, color
 from hermes_constants import OPENROUTER_MODELS_URL
+from utils import base_url_host_matches


 _PROVIDER_ENV_HINTS = (
@@ -911,6 +912,7 @@ def run_doctor(args):
    _apikey_providers = [
        ("Z.AI / GLM",      ("GLM_API_KEY", "ZAI_API_KEY", "Z_AI_API_KEY"), "https://api.z.ai/api/paas/v4/models", "GLM_BASE_URL", True),
        ("Kimi / Moonshot",  ("KIMI_API_KEY",),                              "https://api.moonshot.ai/v1/models",   "KIMI_BASE_URL", True),
+        ("StepFun Step Plan",   ("STEPFUN_API_KEY",),                           "https://api.stepfun.ai/step_plan/v1/models", "STEPFUN_BASE_URL", True),
        ("Kimi / Moonshot (China)", ("KIMI_CN_API_KEY",),                    "https://api.moonshot.cn/v1/models",   None, True),
        ("Arcee AI",         ("ARCEEAI_API_KEY",),                            "https://api.arcee.ai/api/v1/models",  "ARCEE_BASE_URL", True),
        ("DeepSeek",         ("DEEPSEEK_API_KEY",),                           "https://api.deepseek.com/v1/models",  "DEEPSEEK_BASE_URL", True),
@@ -942,18 +944,22 @@ def run_doctor(args):
            try:
                import httpx
                _base = os.getenv(_base_env, "") if _base_env else ""
-                # Auto-detect Kimi Code keys (sk-kimi-) → api.kimi.com
+                # Auto-detect Kimi Code keys (sk-kimi-) → api.kimi.com/coding/v1
+                # (OpenAI-compat surface, which exposes /models for health check).
                if not _base and _key.startswith("sk-kimi-"):
                    _base = "https://api.kimi.com/coding/v1"
-                # Anthropic-compat endpoints (/anthropic) don't support /models.
-                # Rewrite to the OpenAI-compat /v1 surface for health checks.
+                # Anthropic-compat endpoints (/anthropic, api.kimi.com/coding
+                # with no /v1) don't support /models.  Rewrite to the OpenAI-compat
+                # /v1 surface for health checks.
                if _base and _base.rstrip("/").endswith("/anthropic"):
                    from agent.auxiliary_client import _to_openai_base_url
                    _base = _to_openai_base_url(_base)
+                if base_url_host_matches(_base, "api.kimi.com") and _base.rstrip("/").endswith("/coding"):
+                    _base = _base.rstrip("/") + "/v1"
                _url = (_base.rstrip("/") + "/models") if _base else _default_url
                _headers = {"Authorization": f"Bearer {_key}"}
-                if "api.kimi.com" in _url.lower():
-                    _headers["User-Agent"] = "KimiCLI/1.30.0"
+                if base_url_host_matches(_base, "api.kimi.com"):
+                    _headers["User-Agent"] = "claude-code/0.1.0"
                _resp = httpx.get(
                    _url,
                    headers=_headers,
@@ -3,6 +3,7 @@
 from __future__ import annotations

 import os
+import sys
 from pathlib import Path

 from dotenv import load_dotenv
@@ -14,6 +15,26 @@ from dotenv import load_dotenv
 # pure ASCII (they become HTTP header values).
 _CREDENTIAL_SUFFIXES = ("_API_KEY", "_TOKEN", "_SECRET", "_KEY")

+# Names we've already warned about during this process, so repeated
+# load_hermes_dotenv() calls (user env + project env, gateway hot-reload,
+# tests) don't spam the same warning multiple times.
+_WARNED_KEYS: set[str] = set()
+
+
+def _format_offending_chars(value: str, limit: int = 3) -> str:
+    """Return a compact 'U+XXXX ('c'), ...' summary of non-ASCII codepoints."""
+    seen: list[str] = []
+    for ch in value:
+        if ord(ch) > 127:
+            label = f"U+{ord(ch):04X}"
+            if ch.isprintable():
+                label += f" ({ch!r})"
+            if label not in seen:
+                seen.append(label)
+            if len(seen) >= limit:
+                break
+    return ", ".join(seen)
+

 def _sanitize_loaded_credentials() -> None:
    """Strip non-ASCII characters from credential env vars in os.environ.
@@ -21,14 +42,42 @@ def _sanitize_loaded_credentials() -> None:
    Called after dotenv loads so the rest of the codebase never sees
    non-ASCII API keys.  Only touches env vars whose names end with
    known credential suffixes (``_API_KEY``, ``_TOKEN``, etc.).
+
+    Emits a one-line warning to stderr when characters are stripped.
+    Silent stripping would mask copy-paste corruption (Unicode lookalike
+    glyphs from PDFs / rich-text editors, ZWSP from web pages) as opaque
+    provider-side "invalid API key" errors (see #6843).
    """
    for key, value in list(os.environ.items()):
        if not any(key.endswith(suffix) for suffix in _CREDENTIAL_SUFFIXES):
            continue
        try:
            value.encode("ascii")
+            continue
        except UnicodeEncodeError:
-            os.environ[key] = value.encode("ascii", errors="ignore").decode("ascii")
+            pass
+        cleaned = value.encode("ascii", errors="ignore").decode("ascii")
+        os.environ[key] = cleaned
+        if key in _WARNED_KEYS:
+            continue
+        _WARNED_KEYS.add(key)
+        stripped = len(value) - len(cleaned)
+        detail = _format_offending_chars(value) or "non-printable"
+        print(
+            f"  Warning: {key} contained {stripped} non-ASCII character"
+            f"{'s' if stripped != 1 else ''} ({detail}) — stripped so the "
+            f"key can be sent as an HTTP header.",
+            file=sys.stderr,
+        )
+        print(
+            "  This usually means the key was copy-pasted from a PDF, "
+            "rich-text editor, or web page that substituted lookalike\n"
+            "  Unicode glyphs for ASCII letters. If authentication fails "
+            "(e.g. \"API key not valid\"), re-copy the key from the\n"
+            "  provider's dashboard and run `hermes setup` (or edit the "
+            ".env file in a plain-text editor).",
+            file=sys.stderr,
+        )


 def _load_dotenv_with_fallback(path: Path, *, override: bool) -> None:
@@ -111,6 +160,8 @@ def load_hermes_dotenv(
    # Fix corrupted .env files before python-dotenv parses them (#8908).
    if user_env.exists():
        _sanitize_env_file_if_needed(user_env)
+    if project_env_path and project_env_path.exists():
+        _sanitize_env_file_if_needed(project_env_path)

    if user_env.exists():
        _load_dotenv_with_fallback(user_env, override=True)
@@ -994,8 +994,6 @@ def get_systemd_linger_status() -> tuple[bool | None, str]:
    if not is_linux():
        return None, "not supported on this platform"

-    import shutil
-
    if not shutil.which("loginctl"):
        return None, "loginctl not found"

@@ -1347,7 +1345,6 @@ def _ensure_linger_enabled() -> None:
        return

    import getpass
-    import shutil

    username = getpass.getuser()
    linger_file = Path(f"/var/lib/systemd/linger/{username}")
@@ -1656,7 +1653,6 @@ def get_launchd_label() -> str:


 def _launchd_domain() -> str:
-    import os
    return f"gui/{os.getuid()}"


@@ -2643,9 +2639,120 @@ def _setup_dingtalk():


 def _setup_wecom():
-    """Configure WeCom (Enterprise WeChat) via the standard platform setup."""
-    wecom_platform = next(p for p in _PLATFORMS if p["key"] == "wecom")
-    _setup_standard_platform(wecom_platform)
+    """Interactive setup for WeCom — scan QR code or manual credential input."""
+    print()
+    print(color("  ─── 💬 WeCom (Enterprise WeChat) Setup ───", Colors.CYAN))
+
+    existing_bot_id = get_env_value("WECOM_BOT_ID")
+    existing_secret = get_env_value("WECOM_SECRET")
+    if existing_bot_id and existing_secret:
+        print()
+        print_success("WeCom is already configured.")
+        if not prompt_yes_no("  Reconfigure WeCom?", False):
+            return
+
+    # ── Choose setup method ──
+    print()
+    method_choices = [
+        "Scan QR code to obtain Bot ID and Secret automatically (recommended)",
+        "Enter existing Bot ID and Secret manually",
+    ]
+    method_idx = prompt_choice("  How would you like to set up WeCom?", method_choices, 0)
+
+    bot_id = None
+    secret = None
+
+    if method_idx == 0:
+        # ── QR scan flow ──
+        try:
+            from gateway.platforms.wecom import qr_scan_for_bot_info
+        except Exception as exc:
+            print_error(f"  WeCom QR scan import failed: {exc}")
+            qr_scan_for_bot_info = None
+
+        if qr_scan_for_bot_info is not None:
+            try:
+                credentials = qr_scan_for_bot_info()
+            except KeyboardInterrupt:
+                print()
+                print_warning("  WeCom setup cancelled.")
+                return
+            except Exception as exc:
+                print_warning(f"  QR scan failed: {exc}")
+                credentials = None
+            if credentials:
+                bot_id = credentials.get("bot_id", "")
+                secret = credentials.get("secret", "")
+                print_success("  ✔ QR scan successful! Bot ID and Secret obtained.")
+
+        if not bot_id or not secret:
+            print_info("  QR scan did not complete. Continuing with manual input.")
+            bot_id = None
+            secret = None
+
+    # ── Manual credential input ──
+    if not bot_id or not secret:
+        print()
+        print_info("  1. Go to WeCom Application → Workspace → Smart Robot -> Create smart robots")
+        print_info("  2. Select API Mode")
+        print_info("  3. Copy the Bot ID and Secret from the bot's credentials info")
+        print_info("  4. The bot connects via WebSocket — no public endpoint needed")
+        print()
+        bot_id = prompt("  Bot ID", password=False)
+        if not bot_id:
+            print_warning("  Skipped — WeCom won't work without a Bot ID.")
+            return
+        secret = prompt("  Secret", password=True)
+        if not secret:
+            print_warning("  Skipped — WeCom won't work without a Secret.")
+            return
+
+    # ── Save core credentials ──
+    save_env_value("WECOM_BOT_ID", bot_id)
+    save_env_value("WECOM_SECRET", secret)
+
+    # ── Allowed users (deny-by-default security) ──
+    print()
+    print_info("  The gateway DENIES all users by default for security.")
+    print_info("  Enter user IDs to create an allowlist, or leave empty.")
+    allowed = prompt("  Allowed user IDs (comma-separated, or empty)", password=False)
+    if allowed:
+        cleaned = allowed.replace(" ", "")
+        save_env_value("WECOM_ALLOWED_USERS", cleaned)
+        print_success("  Saved — only these users can interact with the bot.")
+    else:
+        print()
+        access_choices = [
+            "Enable open access (anyone can message the bot)",
+            "Use DM pairing (unknown users request access, you approve with 'hermes pairing approve')",
+            "Disable direct messages",
+            "Skip for now (bot will deny all users until configured)",
+        ]
+        access_idx = prompt_choice("  How should unauthorized users be handled?", access_choices, 1)
+        if access_idx == 0:
+            save_env_value("WECOM_DM_POLICY", "open")
+            save_env_value("GATEWAY_ALLOW_ALL_USERS", "true")
+            print_warning("  Open access enabled — anyone can use your bot!")
+        elif access_idx == 1:
+            save_env_value("WECOM_DM_POLICY", "pairing")
+            print_success("  DM pairing mode — users will receive a code to request access.")
+            print_info("  Approve with: hermes pairing approve <platform> <code>")
+        elif access_idx == 2:
+            save_env_value("WECOM_DM_POLICY", "disabled")
+            print_warning("  Direct messages disabled.")
+        else:
+            print_info("  Skipped — configure later with 'hermes gateway setup'")
+
+    # ── Home channel (optional) ──
+    print()
+    print_info("  Chat ID for scheduled results and notifications.")
+    home = prompt("  Home chat ID (optional, for cron/notifications)", password=False)
+    if home:
+        save_env_value("WECOM_HOME_CHANNEL", home)
+        print_success(f"  Home channel set to {home}")
+
+    print()
+    print_success("💬 WeCom configured!")


 def _is_service_installed() -> bool:
@@ -3025,7 +3132,8 @@ def _setup_qqbot():
    if method_idx == 0:
        # ── QR scan-to-configure ──
        try:
-            credentials = _qqbot_qr_flow()
+            from gateway.platforms.qqbot import qr_register
+            credentials = qr_register()
        except KeyboardInterrupt:
            print()
            print_warning("  QQ Bot setup cancelled.")
@@ -3107,106 +3215,6 @@ def _setup_qqbot():
    print_info(f"  App ID: {credentials['app_id']}")


-def _qqbot_render_qr(url: str) -> bool:
-    """Try to render a QR code in the terminal. Returns True if successful."""
-    try:
-        import qrcode as _qr
-        qr = _qr.QRCode(border=1,error_correction=_qr.constants.ERROR_CORRECT_L)
-        qr.add_data(url)
-        qr.make(fit=True)
-        qr.print_ascii(invert=True)
-        return True
-    except Exception:
-        return False
-
-
-def _qqbot_qr_flow():
-    """Run the QR-code scan-to-configure flow.
-
-    Returns a dict with app_id, client_secret, user_openid on success,
-    or None on failure/cancel.
-    """
-    try:
-        from gateway.platforms.qqbot import (
-            create_bind_task, poll_bind_result, build_connect_url,
-            decrypt_secret, BindStatus,
-        )
-        from gateway.platforms.qqbot.constants import ONBOARD_POLL_INTERVAL
-    except Exception as exc:
-        print_error(f"  QQBot onboard import failed: {exc}")
-        return None
-
-    import asyncio
-    import time
-
-    MAX_REFRESHES = 3
-    refresh_count = 0
-
-    while refresh_count <= MAX_REFRESHES:
-        loop = asyncio.new_event_loop()
-
-        # ── Create bind task ──
-        try:
-            task_id, aes_key = loop.run_until_complete(create_bind_task())
-        except Exception as e:
-            print_warning(f"  Failed to create bind task: {e}")
-            loop.close()
-            return None
-
-        url = build_connect_url(task_id)
-
-        # ── Display QR code + URL ──
-        print()
-        if _qqbot_render_qr(url):
-            print(f"  Scan the QR code above, or open this URL directly:\n  {url}")
-        else:
-            print(f"  Open this URL in QQ on your phone:\n  {url}")
-            print_info("  Tip: pip install qrcode  to show a scannable QR code here")
-
-        # ── Poll loop (silent — keep QR visible at bottom) ──
-        try:
-            while True:
-                try:
-                    status, app_id, encrypted_secret, user_openid = loop.run_until_complete(
-                        poll_bind_result(task_id)
-                    )
-                except Exception:
-                    time.sleep(ONBOARD_POLL_INTERVAL)
-                    continue
-
-                if status == BindStatus.COMPLETED:
-                    client_secret = decrypt_secret(encrypted_secret, aes_key)
-                    print()
-                    print_success(f"  QR scan complete! (App ID: {app_id})")
-                    if user_openid:
-                        print_info(f"  Scanner's OpenID: {user_openid}")
-                    return {
-                        "app_id": app_id,
-                        "client_secret": client_secret,
-                        "user_openid": user_openid,
-                    }
-
-                if status == BindStatus.EXPIRED:
-                    refresh_count += 1
-                    if refresh_count > MAX_REFRESHES:
-                        print()
-                        print_warning(f"  QR code expired {MAX_REFRESHES} times — giving up.")
-                        return None
-                    print()
-                    print_warning(f"  QR code expired, refreshing... ({refresh_count}/{MAX_REFRESHES})")
-                    loop.close()
-                    break  # outer while creates a new task
-
-                time.sleep(ONBOARD_POLL_INTERVAL)
-        except KeyboardInterrupt:
-            loop.close()
-            raise
-        finally:
-            loop.close()
-
-    return None
-
-
 def _setup_signal():
    """Interactive setup for Signal messenger."""
    import shutil
@@ -3394,6 +3402,8 @@ def gateway_setup():
            _setup_feishu()
        elif platform["key"] == "qqbot":
            _setup_qqbot()
+        elif platform["key"] == "wecom":
+            _setup_wecom()
        else:
            _setup_standard_platform(platform)

@@ -0,0 +1,385 @@
+"""hermes hooks — inspect and manage shell-script hooks.
+
+Usage::
+
+    hermes hooks list
+    hermes hooks test <event> [--for-tool X] [--payload-file F]
+    hermes hooks revoke <command>
+    hermes hooks doctor
+
+Consent records live under ``~/.hermes/shell-hooks-allowlist.json`` and
+hook definitions come from the ``hooks:`` block in ``~/.hermes/config.yaml``
+(the same config read by the CLI / gateway at startup).
+
+This module is a thin CLI shell over :mod:`agent.shell_hooks`; every
+shared concern (payload serialisation, response parsing, allowlist
+format) lives there.
+"""
+
+from __future__ import annotations
+
+import json
+import os
+from pathlib import Path
+from typing import Any, Dict, List, Optional
+
+
+def hooks_command(args) -> None:
+    """Entry point for ``hermes hooks`` — dispatches to the requested action."""
+    sub = getattr(args, "hooks_action", None)
+
+    if not sub:
+        print("Usage: hermes hooks {list|test|revoke|doctor}")
+        print("Run 'hermes hooks --help' for details.")
+        return
+
+    if sub in ("list", "ls"):
+        _cmd_list(args)
+    elif sub == "test":
+        _cmd_test(args)
+    elif sub in ("revoke", "remove", "rm"):
+        _cmd_revoke(args)
+    elif sub == "doctor":
+        _cmd_doctor(args)
+    else:
+        print(f"Unknown hooks subcommand: {sub}")
+
+
+# ---------------------------------------------------------------------------
+# list
+# ---------------------------------------------------------------------------
+
+def _cmd_list(_args) -> None:
+    from hermes_cli.config import load_config
+    from agent import shell_hooks
+
+    specs = shell_hooks.iter_configured_hooks(load_config())
+
+    if not specs:
+        print("No shell hooks configured in ~/.hermes/config.yaml.")
+        print("See `hermes hooks --help` or")
+        print("    website/docs/user-guide/features/hooks.md")
+        print("for the config schema and worked examples.")
+        return
+
+    by_event: Dict[str, List] = {}
+    for spec in specs:
+        by_event.setdefault(spec.event, []).append(spec)
+
+    allowlist = shell_hooks.load_allowlist()
+    approved = {
+        (e.get("event"), e.get("command"))
+        for e in allowlist.get("approvals", [])
+        if isinstance(e, dict)
+    }
+
+    print(f"Configured shell hooks ({len(specs)} total):\n")
+
+    for event in sorted(by_event.keys()):
+        print(f"  [{event}]")
+        for spec in by_event[event]:
+            is_approved = (spec.event, spec.command) in approved
+            status = "✓ allowed" if is_approved else "✗ not allowlisted"
+            matcher_part = f" matcher={spec.matcher!r}" if spec.matcher else ""
+            print(
+                f"    - {spec.command}{matcher_part} "
+                f"(timeout={spec.timeout}s, {status})"
+            )
+
+            if is_approved:
+                entry = shell_hooks.allowlist_entry_for(spec.event, spec.command)
+                if entry and entry.get("approved_at"):
+                    print(f"      approved_at: {entry['approved_at']}")
+                    mtime_now = shell_hooks.script_mtime_iso(spec.command)
+                    mtime_at = entry.get("script_mtime_at_approval")
+                    if mtime_now and mtime_at and mtime_now > mtime_at:
+                        print(
+                            f"      ⚠ script modified since approval "
+                            f"(was {mtime_at}, now {mtime_now}) — "
+                            f"run `hermes hooks doctor` to re-validate"
+                        )
+        print()
+
+
+# ---------------------------------------------------------------------------
+# test
+# ---------------------------------------------------------------------------
+
+# Synthetic kwargs matching the real invoke_hook() call sites — these are
+# passed verbatim to agent.shell_hooks.run_once(), which routes them through
+# the same _serialize_payload() that production firings use.  That way the
+# stdin a script sees under `hermes hooks test` and `hermes hooks doctor`
+# is identical in shape to what it will see at runtime.
+_DEFAULT_PAYLOADS = {
+    "pre_tool_call": {
+        "tool_name": "terminal",
+        "args": {"command": "echo hello"},
+        "session_id": "test-session",
+        "task_id": "test-task",
+        "tool_call_id": "test-call",
+    },
+    "post_tool_call": {
+        "tool_name": "terminal",
+        "args": {"command": "echo hello"},
+        "session_id": "test-session",
+        "task_id": "test-task",
+        "tool_call_id": "test-call",
+        "result": '{"output": "hello"}',
+    },
+    "pre_llm_call": {
+        "session_id": "test-session",
+        "user_message": "What is the weather?",
+        "conversation_history": [],
+        "is_first_turn": True,
+        "model": "gpt-4",
+        "platform": "cli",
+    },
+    "post_llm_call": {
+        "session_id": "test-session",
+        "model": "gpt-4",
+        "platform": "cli",
+    },
+    "on_session_start": {"session_id": "test-session"},
+    "on_session_end": {"session_id": "test-session"},
+    "on_session_finalize": {"session_id": "test-session"},
+    "on_session_reset": {"session_id": "test-session"},
+    "pre_api_request": {
+        "session_id": "test-session",
+        "task_id": "test-task",
+        "platform": "cli",
+        "model": "claude-sonnet-4-6",
+        "provider": "anthropic",
+        "base_url": "https://api.anthropic.com",
+        "api_mode": "anthropic_messages",
+        "api_call_count": 1,
+        "message_count": 4,
+        "tool_count": 12,
+        "approx_input_tokens": 2048,
+        "request_char_count": 8192,
+        "max_tokens": 4096,
+    },
+    "post_api_request": {
+        "session_id": "test-session",
+        "task_id": "test-task",
+        "platform": "cli",
+        "model": "claude-sonnet-4-6",
+        "provider": "anthropic",
+        "base_url": "https://api.anthropic.com",
+        "api_mode": "anthropic_messages",
+        "api_call_count": 1,
+        "api_duration": 1.234,
+        "finish_reason": "stop",
+        "message_count": 4,
+        "response_model": "claude-sonnet-4-6",
+        "usage": {"input_tokens": 2048, "output_tokens": 512},
+        "assistant_content_chars": 1200,
+        "assistant_tool_call_count": 0,
+    },
+    "subagent_stop": {
+        "parent_session_id": "parent-sess",
+        "child_role": None,
+        "child_summary": "Synthetic summary for hooks test",
+        "child_status": "completed",
+        "duration_ms": 1234,
+    },
+}
+
+
+def _cmd_test(args) -> None:
+    from hermes_cli.config import load_config
+    from hermes_cli.plugins import VALID_HOOKS
+    from agent import shell_hooks
+
+    event = args.event
+    if event not in VALID_HOOKS:
+        print(f"Unknown event: {event!r}")
+        print(f"Valid events: {', '.join(sorted(VALID_HOOKS))}")
+        return
+
+    # Synthetic kwargs in the same shape invoke_hook() would pass.  Merged
+    # with --for-tool (overrides tool_name) and --payload-file (extra kwargs).
+    payload = dict(_DEFAULT_PAYLOADS.get(event, {"session_id": "test-session"}))
+
+    if getattr(args, "for_tool", None):
+        payload["tool_name"] = args.for_tool
+
+    if getattr(args, "payload_file", None):
+        try:
+            custom = json.loads(Path(args.payload_file).read_text())
+            if isinstance(custom, dict):
+                payload.update(custom)
+            else:
+                print(f"Warning: {args.payload_file} is not a JSON object; ignoring")
+        except Exception as exc:
+            print(f"Error reading payload file: {exc}")
+            return
+
+    specs = shell_hooks.iter_configured_hooks(load_config())
+    specs = [s for s in specs if s.event == event]
+
+    if getattr(args, "for_tool", None):
+        specs = [
+            s for s in specs
+            if s.event not in ("pre_tool_call", "post_tool_call")
+            or s.matches_tool(args.for_tool)
+        ]
+
+    if not specs:
+        print(f"No shell hooks configured for event: {event}")
+        if getattr(args, "for_tool", None):
+            print(f"(with matcher filter --for-tool={args.for_tool})")
+        return
+
+    print(f"Firing {len(specs)} hook(s) for event '{event}':\n")
+    for spec in specs:
+        print(f"  → {spec.command}")
+        result = shell_hooks.run_once(spec, payload)
+        _print_run_result(result)
+        print()
+
+
+def _print_run_result(result: Dict[str, Any]) -> None:
+    if result.get("error"):
+        print(f"      ✗ error: {result['error']}")
+        return
+    if result.get("timed_out"):
+        print(f"      ✗ timed out after {result['elapsed_seconds']}s")
+        return
+
+    rc = result.get("returncode")
+    elapsed = result.get("elapsed_seconds", 0)
+    print(f"      exit={rc}  elapsed={elapsed}s")
+
+    stdout = (result.get("stdout") or "").strip()
+    stderr = (result.get("stderr") or "").strip()
+    if stdout:
+        print(f"      stdout: {_truncate(stdout, 400)}")
+    if stderr:
+        print(f"      stderr: {_truncate(stderr, 400)}")
+
+    parsed = result.get("parsed")
+    if parsed:
+        print(f"      parsed (Hermes wire shape): {json.dumps(parsed)}")
+    else:
+        print("      parsed: <none — hook contributed nothing to the dispatcher>")
+
+
+def _truncate(s: str, n: int) -> str:
+    return s if len(s) <= n else s[: n - 3] + "..."
+
+
+# ---------------------------------------------------------------------------
+# revoke
+# ---------------------------------------------------------------------------
+
+def _cmd_revoke(args) -> None:
+    from agent import shell_hooks
+
+    removed = shell_hooks.revoke(args.command)
+    if removed == 0:
+        print(f"No allowlist entry found for command: {args.command}")
+        return
+    print(f"Removed {removed} allowlist entry/entries for: {args.command}")
+    print(
+        "Note: currently running CLI / gateway processes keep their "
+        "already-registered callbacks until they restart."
+    )
+
+
+# ---------------------------------------------------------------------------
+# doctor
+# ---------------------------------------------------------------------------
+
+def _cmd_doctor(_args) -> None:
+    from hermes_cli.config import load_config
+    from agent import shell_hooks
+
+    specs = shell_hooks.iter_configured_hooks(load_config())
+
+    if not specs:
+        print("No shell hooks configured — nothing to check.")
+        return
+
+    print(f"Checking {len(specs)} configured shell hook(s)...\n")
+
+    problems = 0
+    for spec in specs:
+        print(f"  [{spec.event}] {spec.command}")
+        problems += _doctor_one(spec, shell_hooks)
+        print()
+
+    if problems:
+        print(f"{problems} issue(s) found.  Fix before relying on these hooks.")
+    else:
+        print("All shell hooks look healthy.")
+
+
+def _doctor_one(spec, shell_hooks) -> int:
+    problems = 0
+
+    # 1. Script exists and is executable
+    if shell_hooks.script_is_executable(spec.command):
+        print("      ✓ script exists and is executable")
+    else:
+        problems += 1
+        print("      ✗ script missing or not executable "
+              "(chmod +x the file, or fix the path)")
+
+    # 2. Allowlist status
+    entry = shell_hooks.allowlist_entry_for(spec.event, spec.command)
+    if entry:
+        print(f"      ✓ allowlisted (approved {entry.get('approved_at', '?')})")
+    else:
+        problems += 1
+        print("      ✗ not allowlisted — hook will NOT fire at runtime "
+              "(run with --accept-hooks once, or confirm at the TTY prompt)")
+
+    # 3. Mtime drift
+    if entry and entry.get("script_mtime_at_approval"):
+        mtime_now = shell_hooks.script_mtime_iso(spec.command)
+        mtime_at = entry["script_mtime_at_approval"]
+        if mtime_now and mtime_at and mtime_now > mtime_at:
+            problems += 1
+            print(f"      ⚠ script modified since approval "
+                  f"(was {mtime_at}, now {mtime_now}) — review changes, "
+                  f"then `hermes hooks revoke` + re-approve to refresh")
+        elif mtime_now and mtime_at and mtime_now == mtime_at:
+            print("      ✓ script unchanged since approval")
+
+    # 4. Produces valid JSON for a synthetic payload — only when the entry
+    # is already allowlisted.  Otherwise `hermes hooks doctor` would execute
+    # every script listed in a freshly-pulled config before the user has
+    # reviewed them, which directly contradicts the documented workflow
+    # ("spot newly-added hooks *before they register*").
+    if not entry:
+        print("      ℹ skipped JSON smoke test — not allowlisted yet. "
+              "Approve the hook first (via TTY prompt or --accept-hooks), "
+              "then re-run `hermes hooks doctor`.")
+    elif shell_hooks.script_is_executable(spec.command):
+        payload = _DEFAULT_PAYLOADS.get(spec.event, {"extra": {}})
+        result = shell_hooks.run_once(spec, payload)
+        if result.get("timed_out"):
+            problems += 1
+            print(f"      ✗ timed out after {result['elapsed_seconds']}s "
+                  f"on synthetic payload (timeout={spec.timeout}s)")
+        elif result.get("error"):
+            problems += 1
+            print(f"      ✗ execution error: {result['error']}")
+        else:
+            rc = result.get("returncode")
+            elapsed = result.get("elapsed_seconds", 0)
+            stdout = (result.get("stdout") or "").strip()
+            if stdout:
+                try:
+                    json.loads(stdout)
+                    print(f"      ✓ produced valid JSON on synthetic payload "
+                          f"(exit={rc}, {elapsed}s)")
+                except json.JSONDecodeError:
+                    problems += 1
+                    print(f"      ✗ stdout was not valid JSON (exit={rc}, "
+                          f"{elapsed}s): {_truncate(stdout, 120)}")
+            else:
+                print(f"      ✓ ran clean with empty stdout "
+                      f"(exit={rc}, {elapsed}s) — hook is observer-only")
+
+    return problems
@@ -51,6 +51,19 @@ import sys
 from pathlib import Path
 from typing import Optional

+def _add_accept_hooks_flag(parser) -> None:
+    """Attach the ``--accept-hooks`` flag.  Shared across every agent
+    subparser so the flag works regardless of CLI position."""
+    parser.add_argument(
+        "--accept-hooks",
+        action="store_true",
+        default=argparse.SUPPRESS,
+        help=(
+            "Auto-approve unseen shell hooks without a TTY prompt "
+            "(equivalent to HERMES_ACCEPT_HOOKS=1 / hooks_auto_accept: true)."
+        ),
+    )
+

 def _require_tty(command_name: str) -> None:
    """Exit with a clear error if stdin is not a terminal.
@@ -180,7 +193,7 @@ import time as _time
 from datetime import datetime

 from hermes_cli import __version__, __release_date__
-from hermes_constants import OPENROUTER_BASE_URL
+from hermes_constants import AI_GATEWAY_BASE_URL, OPENROUTER_BASE_URL

 logger = logging.getLogger(__name__)

@@ -605,7 +618,6 @@ def _exec_in_container(container_info: dict, cli_args: list):
        container_info: dict with backend, container_name, exec_user, hermes_bin
        cli_args: the original CLI arguments (everything after 'hermes')
    """
-    import shutil

    backend = container_info["backend"]
    container_name = container_info["container_name"]
@@ -1003,6 +1015,17 @@ def _launch_tui(resume_session_id: Optional[str] = None, tui_dev: bool = False):
    )
    env.setdefault("HERMES_PYTHON", sys.executable)
    env.setdefault("HERMES_CWD", os.getcwd())
+    # Guarantee an 8GB V8 heap + exposed GC for the TUI. Default node cap is
+    # ~1.5–4GB depending on version and can fatal-OOM on long sessions with
+    # large transcripts / reasoning blobs. Token-level merge: respect any
+    # user-supplied --max-old-space-size (they may have set it higher) and
+    # avoid duplicating --expose-gc.
+    _tokens = env.get("NODE_OPTIONS", "").split()
+    if not any(t.startswith("--max-old-space-size=") for t in _tokens):
+        _tokens.append("--max-old-space-size=8192")
+    if "--expose-gc" not in _tokens:
+        _tokens.append("--expose-gc")
+    env["NODE_OPTIONS"] = " ".join(_tokens)
    if resume_session_id:
        env["HERMES_TUI_RESUME"] = resume_session_id

@@ -1157,8 +1180,6 @@ def cmd_gateway(args):
 def cmd_whatsapp(args):
    """Set up WhatsApp: choose mode, configure, install bridge, pair via QR."""
    _require_tty("whatsapp")
-    import subprocess
-    from pathlib import Path
    from hermes_cli.config import get_env_value, save_env_value

    print()
@@ -1267,16 +1288,27 @@ def cmd_whatsapp(args):
        return

    if not (bridge_dir / "node_modules").exists():
-        print("\n→ Installing WhatsApp bridge dependencies...")
-        result = subprocess.run(
-            ["npm", "install"],
-            cwd=str(bridge_dir),
-            capture_output=True,
-            text=True,
-            timeout=120,
-        )
+        print("\n→ Installing WhatsApp bridge dependencies (this can take a few minutes)...")
+        npm = shutil.which("npm")
+        if not npm:
+            print("  ✗ npm not found on PATH — install Node.js first")
+            return
+        try:
+            result = subprocess.run(
+                [npm, "install", "--no-fund", "--no-audit", "--progress=false"],
+                cwd=str(bridge_dir),
+                stdout=subprocess.DEVNULL,
+                stderr=subprocess.PIPE,
+                text=True,
+            )
+        except KeyboardInterrupt:
+            print("\n  ✗ Install cancelled")
+            return
        if result.returncode != 0:
-            print(f"  ✗ npm install failed: {result.stderr}")
+            err = (result.stderr or "").strip()
+            preview = "\n".join(err.splitlines()[-30:]) if err else "(no output)"
+            print("  ✗ npm install failed:")
+            print(preview)
            return
        print("  ✓ Dependencies installed")
    else:
@@ -1295,8 +1327,6 @@ def cmd_whatsapp(args):
        except (EOFError, KeyboardInterrupt):
            response = "n"
        if response.lower() in ("y", "yes"):
-            import shutil
-
            shutil.rmtree(session_dir, ignore_errors=True)
            session_dir.mkdir(parents=True, exist_ok=True)
            print("  ✓ Session cleared")
@@ -1392,8 +1422,6 @@ def select_provider_and_model(args=None):

    # Read effective provider the same way the CLI does at startup:
    # config.yaml model.provider > env var > auto-detect
-    import os
-
    config_provider = None
    model_cfg = config.get("model")
    if isinstance(model_cfg, dict):
@@ -1504,6 +1532,8 @@ def select_provider_and_model(args=None):
    # Step 2: Provider-specific setup + model selection
    if selected_provider == "openrouter":
        _model_flow_openrouter(config, current_model)
+    elif selected_provider == "ai-gateway":
+        _model_flow_ai_gateway(config, current_model)
    elif selected_provider == "nous":
        _model_flow_nous(config, current_model, args=args)
    elif selected_provider == "openai-codex":
@@ -1536,8 +1566,12 @@ def select_provider_and_model(args=None):
        _model_flow_anthropic(config, current_model)
    elif selected_provider == "kimi-coding":
        _model_flow_kimi(config, current_model)
+    elif selected_provider == "stepfun":
+        _model_flow_stepfun(config, current_model)
    elif selected_provider == "bedrock":
        _model_flow_bedrock(config, current_model)
+    elif selected_provider in ("volcengine", "byteplus"):
+        _model_flow_contract_provider(config, selected_provider, current_model)
    elif selected_provider in (
        "gemini",
        "deepseek",
@@ -1549,7 +1583,6 @@ def select_provider_and_model(args=None):
        "kilocode",
        "opencode-zen",
        "opencode-go",
-        "ai-gateway",
        "alibaba",
        "huggingface",
        "xiaomi",
@@ -1923,7 +1956,7 @@ def _aux_flow_custom_endpoint(task: str, task_cfg: dict) -> None:
    print(f"{display_name}: custom ({short_url})" + (f" · {model}" if model else ""))


-def _prompt_provider_choice(choices, *, default=0):
+def _prompt_provider_choice(choices, *, default=0, title="Select provider:"):
    """Show provider selection menu with curses arrow-key navigation.

    Falls back to a numbered list when curses is unavailable (e.g. piped
@@ -1932,8 +1965,7 @@ def _prompt_provider_choice(choices, *, default=0):
    """
    try:
        from hermes_cli.setup import _curses_prompt_choice
-
-        idx = _curses_prompt_choice("Select provider:", choices, default)
+        idx = _curses_prompt_choice(title, choices, default)
        if idx >= 0:
            print()
            return idx
@@ -1941,7 +1973,7 @@ def _prompt_provider_choice(choices, *, default=0):
        pass

    # Fallback: numbered list
-    print("Select provider:")
+    print(title)
    for i, c in enumerate(choices, 1):
        marker = "→" if i - 1 == default else " "
        print(f"  {marker} {i}. {c}")
@@ -2021,6 +2053,63 @@ def _model_flow_openrouter(config, current_model=""):
        print("No change.")


+def _model_flow_ai_gateway(config, current_model=""):
+    """Vercel AI Gateway provider: ensure API key, then pick model with pricing."""
+    from hermes_cli.auth import (
+        _prompt_model_selection,
+        _save_model_choice,
+        deactivate_provider,
+    )
+    from hermes_cli.config import get_env_value, save_env_value
+
+    api_key = get_env_value("AI_GATEWAY_API_KEY")
+    if not api_key:
+        print("No Vercel AI Gateway API key configured.")
+        print("Create API key here: https://vercel.com/d?to=%2F%5Bteam%5D%2F%7E%2Fai-gateway&title=AI+Gateway")
+        print("Add a payment method to get $5 in free credits.")
+        print()
+        try:
+            import getpass
+
+            key = getpass.getpass("AI Gateway API key (or Enter to cancel): ").strip()
+        except (KeyboardInterrupt, EOFError):
+            print()
+            return
+        if not key:
+            print("Cancelled.")
+            return
+        save_env_value("AI_GATEWAY_API_KEY", key)
+        print("API key saved.")
+        print()
+
+    from hermes_cli.models import ai_gateway_model_ids, get_pricing_for_provider
+
+    models_list = ai_gateway_model_ids(force_refresh=True)
+    pricing = get_pricing_for_provider("ai-gateway", force_refresh=True)
+
+    selected = _prompt_model_selection(
+        models_list, current_model=current_model, pricing=pricing
+    )
+    if selected:
+        _save_model_choice(selected)
+
+        from hermes_cli.config import load_config, save_config
+
+        cfg = load_config()
+        model = cfg.get("model")
+        if not isinstance(model, dict):
+            model = {"default": model} if model else {}
+            cfg["model"] = model
+        model["provider"] = "ai-gateway"
+        model["base_url"] = AI_GATEWAY_BASE_URL
+        model["api_mode"] = "chat_completions"
+        save_config(cfg)
+        deactivate_provider()
+        print(f"Default model set to: {selected} (via Vercel AI Gateway)")
+    else:
+        print("No change.")
+
+
 def _model_flow_nous(config, current_model="", args=None):
    """Nous Portal provider: ensure logged in, then pick model."""
    from hermes_cli.auth import (
@@ -2041,7 +2130,6 @@ def _model_flow_nous(config, current_model="", args=None):
        save_env_value,
    )
    from hermes_cli.nous_subscription import prompt_enable_tool_gateway
-    import argparse

    state = get_provider_auth_state("nous")
    if not state or not state.get("access_token"):
@@ -2080,7 +2168,6 @@ def _model_flow_nous(config, current_model="", args=None):
    from hermes_cli.models import (
        _PROVIDER_MODELS,
        get_pricing_for_provider,
-        filter_nous_free_models,
        check_nous_free_tier,
        partition_nous_models_by_tier,
    )
@@ -2123,10 +2210,8 @@ def _model_flow_nous(config, current_model="", args=None):
    # Check if user is on free tier
    free_tier = check_nous_free_tier()

-    # For both tiers: apply the allowlist filter first (removes non-allowlisted
-    # free models and allowlist models that aren't actually free).
-    # Then for free users: partition remaining models into selectable/unavailable.
-    model_ids = filter_nous_free_models(model_ids, pricing)
+    # For free users: partition models into selectable/unavailable based on
+    # whether they are free per the Portal-reported pricing.
    unavailable_models: list[str] = []
    if free_tier:
        model_ids, unavailable_models = partition_nous_models_by_tier(
@@ -2209,7 +2294,6 @@ def _model_flow_openai_codex(config, current_model=""):
        DEFAULT_CODEX_BASE_URL,
    )
    from hermes_cli.codex_models import get_codex_model_ids
-    import argparse

    status = get_codex_auth_status()
    if not status.get("logged_in"):
@@ -2861,6 +2945,10 @@ def _model_flow_named_custom(config, provider_info):

 # Curated model lists for direct API-key providers — single source in models.py
 from hermes_cli.models import _PROVIDER_MODELS
+from hermes_cli.volcengine_byteplus import (
+    base_url_for_provider_model,
+    provider_models,
+)


 def _current_reasoning_effort(config) -> str:
@@ -3340,8 +3428,9 @@ def _model_flow_kimi(config, current_model=""):

    # Step 3: Model selection — show appropriate models for the endpoint
    if is_coding_plan:
-        # Coding Plan models (kimi-k2.5 first)
+        # Coding Plan models (kimi-k2.6 first)
        model_list = [
+            "kimi-k2.6",
            "kimi-k2.5",
            "kimi-for-coding",
            "kimi-k2-thinking",
@@ -3380,6 +3469,140 @@ def _model_flow_kimi(config, current_model=""):
        print("No change.")


+def _infer_stepfun_region(base_url: str) -> str:
+    """Infer the current StepFun region from the configured endpoint."""
+    normalized = (base_url or "").strip().lower()
+    if "api.stepfun.com" in normalized:
+        return "china"
+    return "international"
+
+
+def _stepfun_base_url_for_region(region: str) -> str:
+    from hermes_cli.auth import (
+        STEPFUN_STEP_PLAN_CN_BASE_URL,
+        STEPFUN_STEP_PLAN_INTL_BASE_URL,
+    )
+
+    return (
+        STEPFUN_STEP_PLAN_CN_BASE_URL
+        if region == "china"
+        else STEPFUN_STEP_PLAN_INTL_BASE_URL
+    )
+
+
+def _model_flow_stepfun(config, current_model=""):
+    """StepFun Step Plan flow with region-specific endpoints."""
+    from hermes_cli.auth import (
+        PROVIDER_REGISTRY,
+        _prompt_model_selection,
+        _save_model_choice,
+        deactivate_provider,
+    )
+    from hermes_cli.config import get_env_value, save_env_value, load_config, save_config
+    from hermes_cli.models import fetch_api_models
+
+    provider_id = "stepfun"
+    pconfig = PROVIDER_REGISTRY[provider_id]
+    key_env = pconfig.api_key_env_vars[0] if pconfig.api_key_env_vars else ""
+    base_url_env = pconfig.base_url_env_var or ""
+
+    existing_key = ""
+    for ev in pconfig.api_key_env_vars:
+        existing_key = get_env_value(ev) or os.getenv(ev, "")
+        if existing_key:
+            break
+
+    if not existing_key:
+        print(f"No {pconfig.name} API key configured.")
+        if key_env:
+            try:
+                import getpass
+                new_key = getpass.getpass(f"{key_env} (or Enter to cancel): ").strip()
+            except (KeyboardInterrupt, EOFError):
+                print()
+                return
+            if not new_key:
+                print("Cancelled.")
+                return
+            save_env_value(key_env, new_key)
+            existing_key = new_key
+            print("API key saved.")
+            print()
+    else:
+        print(f"  {pconfig.name} API key: {existing_key[:8]}... ✓")
+        print()
+
+    current_base = ""
+    if base_url_env:
+        current_base = get_env_value(base_url_env) or os.getenv(base_url_env, "")
+    if not current_base:
+        model_cfg = config.get("model")
+        if isinstance(model_cfg, dict):
+            current_base = str(model_cfg.get("base_url") or "").strip()
+    current_region = _infer_stepfun_region(current_base or pconfig.inference_base_url)
+
+    region_choices = [
+        ("international", f"International ({_stepfun_base_url_for_region('international')})"),
+        ("china", f"China ({_stepfun_base_url_for_region('china')})"),
+    ]
+    ordered_regions = []
+    for region_key, label in region_choices:
+        if region_key == current_region:
+            ordered_regions.insert(0, (region_key, f"{label}  ← currently active"))
+        else:
+            ordered_regions.append((region_key, label))
+    ordered_regions.append(("cancel", "Cancel"))
+
+    region_idx = _prompt_provider_choice([label for _, label in ordered_regions])
+    if region_idx is None or ordered_regions[region_idx][0] == "cancel":
+        print("No change.")
+        return
+
+    selected_region = ordered_regions[region_idx][0]
+    effective_base = _stepfun_base_url_for_region(selected_region)
+    if base_url_env:
+        save_env_value(base_url_env, effective_base)
+
+    live_models = fetch_api_models(existing_key, effective_base)
+    if live_models:
+        model_list = live_models
+        print(f"  Found {len(model_list)} model(s) from {pconfig.name} API")
+    else:
+        model_list = _PROVIDER_MODELS.get(provider_id, [])
+        if model_list:
+            print(
+                f"  Could not auto-detect models from {pconfig.name} API — "
+                "showing Step Plan fallback catalog."
+            )
+
+    if model_list:
+        selected = _prompt_model_selection(model_list, current_model=current_model)
+    else:
+        try:
+            selected = input("Model name: ").strip()
+        except (KeyboardInterrupt, EOFError):
+            selected = None
+
+    if selected:
+        _save_model_choice(selected)
+
+        cfg = load_config()
+        model = cfg.get("model")
+        if not isinstance(model, dict):
+            model = {"default": model} if model else {}
+            cfg["model"] = model
+        model["provider"] = provider_id
+        model["base_url"] = effective_base
+        model.pop("api_mode", None)
+        save_config(cfg)
+        deactivate_provider()
+
+        config["model"] = dict(model)
+        print(f"Default model set to: {selected} (via {pconfig.name})")
+    else:
+        print("No change.")
+
+
 def _model_flow_bedrock_api_key(config, region, current_model=""):
    """Bedrock API Key mode — uses the OpenAI-compatible bedrock-mantle endpoint.

@@ -3815,6 +4038,70 @@ def _model_flow_api_key_provider(config, provider_id, current_model=""):
        print("No change.")


+def _model_flow_contract_provider(config, provider_id, current_model=""):
+    """Provider flow for Volcengine / BytePlus contract-backed catalogs."""
+    from hermes_cli.auth import (
+        PROVIDER_REGISTRY,
+        _prompt_model_selection,
+        _save_model_choice,
+        deactivate_provider,
+    )
+    from hermes_cli.config import get_env_value, load_config, save_config, save_env_value
+
+    pconfig = PROVIDER_REGISTRY[provider_id]
+    key_env = pconfig.api_key_env_vars[0] if pconfig.api_key_env_vars else ""
+    existing_key = ""
+    for env_var in pconfig.api_key_env_vars:
+        existing_key = get_env_value(env_var) or os.getenv(env_var, "")
+        if existing_key:
+            break
+
+    if not existing_key:
+        print(f"No {pconfig.name} API key configured.")
+        if key_env:
+            try:
+                import getpass
+
+                new_key = getpass.getpass(f"{key_env} (or Enter to cancel): ").strip()
+            except (KeyboardInterrupt, EOFError):
+                print()
+                return
+            if not new_key:
+                print("Cancelled.")
+                return
+            save_env_value(key_env, new_key)
+            print("API key saved.")
+            print()
+    else:
+        print(f"  {pconfig.name} API key: {existing_key[:8]}... ✓")
+        print()
+
+    model_list = provider_models(provider_id)
+    if not model_list:
+        print(f"No curated model catalog found for {pconfig.name}.")
+        return
+
+    selected = _prompt_model_selection(model_list, current_model=current_model)
+    if not selected:
+        print("No change.")
+        return
+
+    _save_model_choice(selected)
+
+    cfg = load_config()
+    model = cfg.get("model")
+    if not isinstance(model, dict):
+        model = {"default": model} if model else {}
+        cfg["model"] = model
+    model["provider"] = provider_id
+    model["base_url"] = base_url_for_provider_model(provider_id, selected)
+    model.pop("api_mode", None)
+    save_config(cfg)
+    deactivate_provider()
+
+    print(f"Default model set to: {selected} (via {pconfig.name})")
+
+
 def _run_anthropic_oauth_flow(save_env_value):
    """Run the Claude OAuth setup-token flow. Returns True if credentials were saved."""
    from agent.anthropic_adapter import (
@@ -4080,6 +4367,12 @@ def cmd_webhook(args):
    webhook_command(args)


+def cmd_hooks(args):
+    """Shell-hook inspection and management."""
+    from hermes_cli.hooks import hooks_command
+    hooks_command(args)
+
+
 def cmd_doctor(args):
    """Check configuration and dependencies."""
    from hermes_cli.doctor import run_doctor
@@ -4189,9 +4482,7 @@ def _clear_bytecode_cache(root: Path) -> int:
        ]
        if os.path.basename(dirpath) == "__pycache__":
            try:
-                import shutil as _shutil
-
-                _shutil.rmtree(dirpath)
+                shutil.rmtree(dirpath)
                removed += 1
            except OSError:
                pass
@@ -4230,8 +4521,6 @@ def _gateway_prompt(prompt_text: str, default: str = "", timeout: float = 300.0)
    tmp.replace(prompt_path)

    # Poll for response
-    import time as _time
-
    deadline = _time.monotonic() + timeout
    while _time.monotonic() < deadline:
        if response_path.exists():
@@ -4263,7 +4552,6 @@ def _build_web_ui(web_dir: Path, *, fatal: bool = False) -> bool:
    """
    if not (web_dir / "package.json").exists():
        return True
-    import shutil

    npm = shutil.which("npm")
    if not npm:
@@ -4300,7 +4588,6 @@ def _update_via_zip(args):
    Used on Windows when git file I/O is broken (antivirus, NTFS filter
    drivers causing 'Invalid argument' errors on file creation).
    """
-    import shutil
    import tempfile
    import zipfile
    from urllib.request import urlretrieve
@@ -4377,7 +4664,6 @@ def _update_via_zip(args):
    # breaks on this machine, keep base deps and reinstall the remaining extras
    # individually so update does not silently strip working capabilities.
    print("→ Updating Python dependencies...")
-    import subprocess

    uv_bin = shutil.which("uv")
    if uv_bin:
@@ -5128,9 +5414,11 @@ def _install_hangup_protection(gateway_mode: bool = False):
    # (2) Mirror output to update.log and wrap stdio for broken-pipe
    # tolerance.  Any failure here is non-fatal; we just skip the wrap.
    try:
-        from hermes_cli.config import get_hermes_home
+        # Late-bound import so tests can monkeypatch
+        # hermes_cli.config.get_hermes_home to simulate setup failure.
+        from hermes_cli.config import get_hermes_home as _get_hermes_home

-        logs_dir = get_hermes_home() / "logs"
+        logs_dir = _get_hermes_home() / "logs"
        logs_dir.mkdir(parents=True, exist_ok=True)
        log_path = logs_dir / "update.log"
        log_file = open(log_path, "a", buffering=1, encoding="utf-8")
@@ -5705,8 +5993,6 @@ def _cmd_update_impl(args, gateway_mode: bool):
                                    # Verify the service actually survived the
                                    # restart.  systemctl restart returns 0 even
                                    # if the new process crashes immediately.
-                                    import time as _time
-
                                    _time.sleep(3)
                                    verify = subprocess.run(
                                        scope_cmd + ["is-active", svc_name],
@@ -6359,6 +6645,17 @@ For more help on a command:
        default=False,
        help="Run in an isolated git worktree (for parallel agents)",
    )
+    parser.add_argument(
+        "--accept-hooks",
+        action="store_true",
+        default=False,
+        help=(
+            "Auto-approve any unseen shell hooks declared in config.yaml "
+            "without a TTY prompt.  Equivalent to HERMES_ACCEPT_HOOKS=1 or "
+            "hooks_auto_accept: true in config.yaml.  Use on CI / headless "
+            "runs that can't prompt."
+        ),
+    )
    parser.add_argument(
        "--skills",
        "-s",
@@ -6438,6 +6735,7 @@ For more help on a command:
            "zai",
            "kimi-coding",
            "kimi-coding-cn",
+            "stepfun",
            "minimax",
            "minimax-cn",
            "kilocode",
@@ -6481,6 +6779,16 @@ For more help on a command:
        default=argparse.SUPPRESS,
        help="Run in an isolated git worktree (for parallel agents on the same repo)",
    )
+    chat_parser.add_argument(
+        "--accept-hooks",
+        action="store_true",
+        default=argparse.SUPPRESS,
+        help=(
+            "Auto-approve any unseen shell hooks declared in config.yaml "
+            "without a TTY prompt (see also HERMES_ACCEPT_HOOKS env var and "
+            "hooks_auto_accept: in config.yaml)."
+        ),
+    )
    chat_parser.add_argument(
        "--checkpoints",
        action="store_true",
@@ -6600,6 +6908,8 @@ For more help on a command:
        action="store_true",
        help="Replace any existing gateway instance (useful for systemd)",
    )
+    _add_accept_hooks_flag(gateway_run)
+    _add_accept_hooks_flag(gateway_parser)

    # gateway start
    gateway_start = gateway_subparsers.add_parser(
@@ -6964,6 +7274,7 @@ For more help on a command:
        "run", help="Run a job on the next scheduler tick"
    )
    cron_run.add_argument("job_id", help="Job ID to trigger")
+    _add_accept_hooks_flag(cron_run)

    cron_remove = cron_subparsers.add_parser(
        "remove", aliases=["rm", "delete"], help="Remove a scheduled job"
@@ -6974,8 +7285,9 @@ For more help on a command:
    cron_subparsers.add_parser("status", help="Check if cron scheduler is running")

    # cron tick (mostly for debugging)
-    cron_subparsers.add_parser("tick", help="Run due jobs once and exit")
-
+    cron_tick = cron_subparsers.add_parser("tick", help="Run due jobs once and exit")
+    _add_accept_hooks_flag(cron_tick)
+    _add_accept_hooks_flag(cron_parser)
    cron_parser.set_defaults(func=cmd_cron)

    # =========================================================================
@@ -7042,6 +7354,67 @@ For more help on a command:

    webhook_parser.set_defaults(func=cmd_webhook)

+    # =========================================================================
+    # hooks command — shell-hook inspection and management
+    # =========================================================================
+    hooks_parser = subparsers.add_parser(
+        "hooks",
+        help="Inspect and manage shell-script hooks",
+        description=(
+            "Inspect shell-script hooks declared in ~/.hermes/config.yaml, "
+            "test them against synthetic payloads, and manage the first-use "
+            "consent allowlist at ~/.hermes/shell-hooks-allowlist.json."
+        ),
+    )
+    hooks_subparsers = hooks_parser.add_subparsers(dest="hooks_action")
+
+    hooks_subparsers.add_parser(
+        "list", aliases=["ls"],
+        help="List configured hooks with matcher, timeout, and consent status",
+    )
+
+    _hk_test = hooks_subparsers.add_parser(
+        "test",
+        help="Fire every hook matching <event> against a synthetic payload",
+    )
+    _hk_test.add_argument(
+        "event",
+        help="Hook event name (e.g. pre_tool_call, pre_llm_call, subagent_stop)",
+    )
+    _hk_test.add_argument(
+        "--for-tool", dest="for_tool", default=None,
+        help=(
+            "Only fire hooks whose matcher matches this tool name "
+            "(used for pre_tool_call / post_tool_call)"
+        ),
+    )
+    _hk_test.add_argument(
+        "--payload-file", dest="payload_file", default=None,
+        help=(
+            "Path to a JSON file whose contents are merged into the "
+            "synthetic payload before execution"
+        ),
+    )
+
+    _hk_revoke = hooks_subparsers.add_parser(
+        "revoke", aliases=["remove", "rm"],
+        help="Remove a command's allowlist entries (takes effect on next restart)",
+    )
+    _hk_revoke.add_argument(
+        "command",
+        help="The exact command string to revoke (as declared in config.yaml)",
+    )
+
+    hooks_subparsers.add_parser(
+        "doctor",
+        help=(
+            "Check each configured hook: exec bit, allowlist, mtime drift, "
+            "JSON validity, and synthetic run timing"
+        ),
+    )
+
+    hooks_parser.set_defaults(func=cmd_hooks)
+
    # =========================================================================
    # doctor command
    # =========================================================================
@@ -7507,9 +7880,7 @@ Examples:
            )
            cmd_info["setup_fn"](plugin_parser)
    except Exception as _exc:
-        import logging as _log
-
-        _log.getLogger(__name__).debug("Plugin CLI discovery failed: %s", _exc)
+        logging.getLogger(__name__).debug("Plugin CLI discovery failed: %s", _exc)

    # =========================================================================
    # memory command
@@ -7715,6 +8086,7 @@ Examples:
        action="store_true",
        help="Enable verbose logging on stderr",
    )
+    _add_accept_hooks_flag(mcp_serve_p)

    mcp_add_p = mcp_sub.add_parser(
        "add", help="Add an MCP server (discovery-first install)"
@@ -7753,6 +8125,8 @@ Examples:
    )
    mcp_login_p.add_argument("name", help="Server name to re-authenticate")

+    _add_accept_hooks_flag(mcp_parser)
+
    def cmd_mcp(args):
        from hermes_cli.mcp_config import mcp_command

@@ -7891,7 +8265,6 @@ Examples:
                    return
                line = _json.dumps(data, ensure_ascii=False) + "\n"
                if args.output == "-":
-                    import sys

                    sys.stdout.write(line)
                else:
@@ -7901,7 +8274,6 @@ Examples:
            else:
                sessions = db.export_all(source=args.source)
                if args.output == "-":
-                    import sys

                    for s in sessions:
                        sys.stdout.write(_json.dumps(s, ensure_ascii=False) + "\n")
@@ -7972,8 +8344,6 @@ Examples:

            # Launch hermes --resume <id> by replacing the current process
            print(f"Resuming session: {selected_id}")
-            import shutil
-
            hermes_bin = shutil.which("hermes")
            if hermes_bin:
                os.execvp(hermes_bin, ["hermes", "--resume", selected_id])
@@ -8164,6 +8534,7 @@ Examples:
        help="Run Hermes Agent as an ACP (Agent Client Protocol) server",
        description="Start Hermes Agent in ACP mode for editor integration (VS Code, Zed, JetBrains)",
    )
+    _add_accept_hooks_flag(acp_parser)

    def cmd_acp(args):
        """Launch Hermes Agent as an ACP server."""
@@ -8437,6 +8808,42 @@ Examples:
        cmd_version(args)
        return

+    # Discover Python plugins and register shell hooks once, before any
+    # command that can fire lifecycle hooks.  Both are idempotent; gated
+    # so introspection/management commands (hermes hooks list, cron
+    # list, gateway status, mcp add, ...) don't pay discovery cost or
+    # trigger consent prompts for hooks the user is still inspecting.
+    # Groups with mixed admin/CRUD vs. agent-running entries narrow via
+    # the nested subcommand (dest varies by parser).
+    _AGENT_COMMANDS = {None, "chat", "acp", "rl"}
+    _AGENT_SUBCOMMANDS = {
+        "cron":    ("cron_command",    {"run", "tick"}),
+        "gateway": ("gateway_command", {"run"}),
+        "mcp":     ("mcp_action",      {"serve"}),
+    }
+    _sub_attr, _sub_set = _AGENT_SUBCOMMANDS.get(args.command, (None, None))
+    if (
+        args.command in _AGENT_COMMANDS
+        or (_sub_attr and getattr(args, _sub_attr, None) in _sub_set)
+    ):
+        _accept_hooks = bool(getattr(args, "accept_hooks", False))
+        try:
+            from hermes_cli.plugins import discover_plugins
+            discover_plugins()
+        except Exception:
+            logger.debug(
+                "plugin discovery failed at CLI startup", exc_info=True,
+            )
+        try:
+            from hermes_cli.config import load_config
+            from agent.shell_hooks import register_from_config
+            register_from_config(load_config(), accept_hooks=_accept_hooks)
+        except Exception:
+            logger.debug(
+                "shell-hook registration failed at CLI startup",
+                exc_info=True,
+            )
+
    # Handle top-level --resume / --continue as shortcut to chat
    if (args.resume or args.continue_last) and args.command is None:
        args.command = "chat"
@@ -97,6 +97,8 @@ _MATCHING_PREFIX_STRIP_PROVIDERS: frozenset[str] = frozenset({
    "xiaomi",
    "arcee",
    "ollama-cloud",
+    "volcengine",
+    "byteplus",
    "custom",
 })

@@ -423,4 +425,3 @@ def normalize_model_for_provider(model_input: str, target_provider: str) -> str:
 # ---------------------------------------------------------------------------
 # Batch / convenience helpers
 # ---------------------------------------------------------------------------
-
@@ -143,7 +143,7 @@ MODEL_ALIASES: dict[str, ModelIdentity] = {
    # Z.AI / GLM
    "glm":       ModelIdentity("z-ai", "glm"),

-    # StepFun
+    # Step Plan (StepFun)
    "step":      ModelIdentity("stepfun", "step"),

    # Xiaomi
@@ -678,6 +678,7 @@ def switch_model(
        _da = DIRECT_ALIASES.get(resolved_alias)
        if _da is not None and _da.base_url:
            base_url = _da.base_url
+            api_mode = ""  # clear so determine_api_mode re-detects from URL
            if not api_key:
                api_key = "no-key-required"

@@ -1095,6 +1096,7 @@ def list_authenticated_providers(
                "api_url": api_url,
            })
            seen_slugs.add(ep_name.lower())
+            seen_slugs.add(custom_provider_slug(display_name).lower())
            _pair = (
                str(display_name).strip().lower(),
                str(api_url).strip().rstrip("/").lower(),
@@ -22,6 +22,12 @@ from hermes_cli import __version__ as _HERMES_VERSION
 # Check (error 1010) don't reject the default ``Python-urllib/*`` signature.
 _HERMES_USER_AGENT = f"hermes-cli/{_HERMES_VERSION}"

+from hermes_cli.volcengine_byteplus import (
+    BYTEPLUS_PROVIDER,
+    VOLCENGINE_PROVIDER,
+    provider_models,
+)
+
 COPILOT_BASE_URL = "https://api.githubcopilot.com"
 COPILOT_MODELS_URL = f"{COPILOT_BASE_URL}/models"
 COPILOT_EDITOR_VERSION = "vscode/1.104.1"
@@ -32,7 +38,7 @@ COPILOT_REASONING_EFFORTS_O_SERIES = ["low", "medium", "high"]
 # Fallback OpenRouter snapshot used when the live catalog is unavailable.
 # (model_id, display description shown in menus)
 OPENROUTER_MODELS: list[tuple[str, str]] = [
-    ("moonshotai/kimi-k2.5",            "recommended"),
+    ("moonshotai/kimi-k2.6",            "recommended"),
    ("anthropic/claude-opus-4.7",       ""),
    ("anthropic/claude-opus-4.6",       ""),
    ("anthropic/claude-sonnet-4.6",     ""),
@@ -53,6 +59,7 @@ OPENROUTER_MODELS: list[tuple[str, str]] = [
    ("stepfun/step-3.5-flash",          ""),
    ("minimax/minimax-m2.7",            ""),
    ("minimax/minimax-m2.5",            ""),
+    ("minimax/minimax-m2.5:free",       "free"),
    ("z-ai/glm-5.1",                    ""),
    ("z-ai/glm-5v-turbo",               ""),
    ("z-ai/glm-5-turbo",                ""),
@@ -68,6 +75,31 @@ OPENROUTER_MODELS: list[tuple[str, str]] = [
 _openrouter_catalog_cache: list[tuple[str, str]] | None = None


+# Fallback Vercel AI Gateway snapshot used when the live catalog is unavailable.
+# OSS / open-weight models prioritized first, then closed-source by family.
+# Slugs match Vercel's actual /v1/models catalog (e.g. alibaba/ for Qwen,
+# zai/ and xai/ without hyphens).
+VERCEL_AI_GATEWAY_MODELS: list[tuple[str, str]] = [
+    ("moonshotai/kimi-k2.6",                 "recommended"),
+    ("alibaba/qwen3.6-plus",                 ""),
+    ("zai/glm-5.1",                          ""),
+    ("minimax/minimax-m2.7",                 ""),
+    ("anthropic/claude-sonnet-4.6",          ""),
+    ("anthropic/claude-opus-4.7",            ""),
+    ("anthropic/claude-opus-4.6",            ""),
+    ("anthropic/claude-haiku-4.5",           ""),
+    ("openai/gpt-5.4",                       ""),
+    ("openai/gpt-5.4-mini",                  ""),
+    ("openai/gpt-5.3-codex",                 ""),
+    ("google/gemini-3.1-pro-preview",        ""),
+    ("google/gemini-3-flash",                ""),
+    ("google/gemini-3.1-flash-lite-preview", ""),
+    ("xai/grok-4.20-reasoning",              ""),
+]
+
+_ai_gateway_catalog_cache: list[tuple[str, str]] | None = None
+
+
 def _codex_curated_models() -> list[str]:
    """Derive the openai-codex curated list from codex_models.py.

@@ -81,7 +113,7 @@ def _codex_curated_models() -> list[str]:

 _PROVIDER_MODELS: dict[str, list[str]] = {
    "nous": [
-        "moonshotai/kimi-k2.5",
+        "moonshotai/kimi-k2.6",
        "xiaomi/mimo-v2-pro",
        "anthropic/claude-opus-4.7",
        "anthropic/claude-opus-4.6",
@@ -100,17 +132,15 @@ _PROVIDER_MODELS: dict[str, list[str]] = {
        "stepfun/step-3.5-flash",
        "minimax/minimax-m2.7",
        "minimax/minimax-m2.5",
+        "minimax/minimax-m2.5:free",
        "z-ai/glm-5.1",
        "z-ai/glm-5v-turbo",
        "z-ai/glm-5-turbo",
        "x-ai/grok-4.20-beta",
        "nvidia/nemotron-3-super-120b-a12b",
-        "nvidia/nemotron-3-super-120b-a12b:free",
-        "arcee-ai/trinity-large-preview:free",
        "arcee-ai/trinity-large-thinking",
        "openai/gpt-5.4-pro",
        "openai/gpt-5.4-nano",
-        "openrouter/elephant-alpha",
    ],
    "openai-codex": _codex_curated_models(),
    "copilot-acp": [
@@ -165,12 +195,13 @@ _PROVIDER_MODELS: dict[str, list[str]] = {
        # (map to OpenRouter defaults — users get familiar picks on NIM)
        "qwen/qwen3.5-397b-a17b",
        "deepseek-ai/deepseek-v3.2",
-        "moonshotai/kimi-k2.5",
+        "moonshotai/kimi-k2.6",
        "minimaxai/minimax-m2.5",
        "z-ai/glm5",
        "openai/gpt-oss-120b",
    ],
    "kimi-coding": [
+        "kimi-k2.6",
        "kimi-k2.5",
        "kimi-for-coding",
        "kimi-k2-thinking",
@@ -179,12 +210,18 @@ _PROVIDER_MODELS: dict[str, list[str]] = {
        "kimi-k2-0905-preview",
    ],
    "kimi-coding-cn": [
+        "kimi-k2.6",
        "kimi-k2.5",
        "kimi-k2-thinking",
        "kimi-k2-turbo-preview",
        "kimi-k2-0905-preview",
    ],
+    "stepfun": [
+        "step-3.5-flash",
+        "step-3.5-flash-2603",
+    ],
    "moonshot": [
+        "kimi-k2.6",
        "kimi-k2.5",
        "kimi-k2-thinking",
        "kimi-k2-turbo-preview",
@@ -264,6 +301,7 @@ _PROVIDER_MODELS: dict[str, list[str]] = {
        "big-pickle",
    ],
    "opencode-go": [
+        "kimi-k2.6",
        "kimi-k2.5",
        "glm-5.1",
        "glm-5",
@@ -271,20 +309,8 @@ _PROVIDER_MODELS: dict[str, list[str]] = {
        "mimo-v2-omni",
        "minimax-m2.7",
        "minimax-m2.5",
-    ],
-    "ai-gateway": [
-        "anthropic/claude-opus-4.6",
-        "anthropic/claude-sonnet-4.6",
-        "anthropic/claude-sonnet-4.5",
-        "anthropic/claude-haiku-4.5",
-        "openai/gpt-5",
-        "openai/gpt-4.1",
-        "openai/gpt-4.1-mini",
-        "google/gemini-3-pro-preview",
-        "google/gemini-3-flash",
-        "google/gemini-2.5-pro",
-        "google/gemini-2.5-flash",
-        "deepseek/deepseek-v3.2",
+        "qwen3.6-plus",
+        "qwen3.5-plus",
    ],
    "kilocode": [
        "anthropic/claude-opus-4.6",
@@ -318,6 +344,7 @@ _PROVIDER_MODELS: dict[str, list[str]] = {
        "zai-org/GLM-5",
        "XiaomiMiMo/MiMo-V2-Flash",
        "moonshotai/Kimi-K2-Thinking",
+        "moonshotai/Kimi-K2.6",
    ],
    # AWS Bedrock — static fallback list used when dynamic discovery is
    # unavailable (no boto3, no credentials, or API error).  The agent
@@ -335,20 +362,22 @@ _PROVIDER_MODELS: dict[str, list[str]] = {
        "us.meta.llama4-maverick-17b-instruct-v1:0",
        "us.meta.llama4-scout-17b-instruct-v1:0",
    ],
+    VOLCENGINE_PROVIDER: provider_models(VOLCENGINE_PROVIDER),
+    BYTEPLUS_PROVIDER: provider_models(BYTEPLUS_PROVIDER),
 }

+# Vercel AI Gateway: derive the bare-model-id catalog from the curated
+# ``VERCEL_AI_GATEWAY_MODELS`` snapshot so both the picker (tuples with descriptions)
+# and the static fallback catalog (bare ids) stay in sync from a single
+# source of truth.
+_PROVIDER_MODELS["ai-gateway"] = [mid for mid, _ in VERCEL_AI_GATEWAY_MODELS]
+
 # ---------------------------------------------------------------------------
-# Nous Portal free-model filtering
+# Nous Portal free-model helper
 # ---------------------------------------------------------------------------
-# Models that are ALLOWED to appear when priced as free on Nous Portal.
-# Any other free model is hidden — prevents promotional/temporary free models
-# from cluttering the selection when users are paying subscribers.
-# Models in this list are ALSO filtered out if they are NOT free (i.e. they
-# should only appear in the menu when they are genuinely free).
-_NOUS_ALLOWED_FREE_MODELS: frozenset[str] = frozenset({
-    "xiaomi/mimo-v2-pro",
-    "xiaomi/mimo-v2-omni",
-})
+# The Nous Portal models endpoint is the source of truth for which models
+# are currently offered (free or paid). We trust whatever it returns and
+# surface it to users as-is — no local allowlist filtering.


 def _is_model_free(model_id: str, pricing: dict[str, dict[str, str]]) -> bool:
@@ -362,35 +391,6 @@ def _is_model_free(model_id: str, pricing: dict[str, dict[str, str]]) -> bool:
        return False


-def filter_nous_free_models(
-    model_ids: list[str],
-    pricing: dict[str, dict[str, str]],
-) -> list[str]:
-    """Filter the Nous Portal model list according to free-model policy.
-
-    Rules:
-      • Paid models that are NOT in the allowlist → keep (normal case).
-      • Free models that are NOT in the allowlist → drop.
-      • Allowlist models that ARE free → keep.
-      • Allowlist models that are NOT free → drop.
-    """
-    if not pricing:
-        return model_ids  # no pricing data — can't filter, show everything
-
-    result: list[str] = []
-    for mid in model_ids:
-        free = _is_model_free(mid, pricing)
-        if mid in _NOUS_ALLOWED_FREE_MODELS:
-            # Allowlist model: only show when it's actually free
-            if free:
-                result.append(mid)
-        else:
-            # Regular model: keep only when it's NOT free
-            if not free:
-                result.append(mid)
-    return result
-
-
 # ---------------------------------------------------------------------------
 # Nous Portal account tier detection
 # ---------------------------------------------------------------------------
@@ -454,8 +454,7 @@ def partition_nous_models_by_tier(
 ) -> tuple[list[str], list[str]]:
    """Split Nous models into (selectable, unavailable) based on user tier.

-    For paid-tier users: all models are selectable, none unavailable
-    (free-model filtering is handled separately by ``filter_nous_free_models``).
+    For paid-tier users: all models are selectable, none unavailable.

    For free-tier users: only free models are selectable; paid models
    are returned as unavailable (shown grayed out in the menu).
@@ -494,8 +493,6 @@ def check_nous_free_tier() -> bool:
    Returns False (assume paid) on any error — never blocks paying users.
    """
    global _free_tier_cache
-    import time
-
    now = time.monotonic()
    if _free_tier_cache is not None:
        cached_result, cached_at = _free_tier_cache
@@ -527,6 +524,157 @@ def check_nous_free_tier() -> bool:
        return False  # default to paid on error — don't block users


+# ---------------------------------------------------------------------------
+# Nous Portal recommended models
+#
+# The Portal publishes a curated list of suggested models (separated into
+# paid and free tiers) plus dedicated recommendations for compaction (text
+# summarisation / auxiliary) and vision tasks. We fetch it once per process
+# with a TTL cache so callers can ask "what's the best aux model right now?"
+# without hitting the network on every lookup.
+#
+# Shape of the response (fields we care about):
+#   {
+#     "paidRecommendedModels":     [ {modelName, ...}, ... ],
+#     "freeRecommendedModels":     [ {modelName, ...}, ... ],
+#     "paidRecommendedCompactionModel":  {modelName, ...} | null,
+#     "paidRecommendedVisionModel":      {modelName, ...} | null,
+#     "freeRecommendedCompactionModel":  {modelName, ...} | null,
+#     "freeRecommendedVisionModel":      {modelName, ...} | null,
+#   }
+# ---------------------------------------------------------------------------
+
+NOUS_RECOMMENDED_MODELS_PATH = "/api/nous/recommended-models"
+_NOUS_RECOMMENDED_CACHE_TTL: int = 600  # seconds (10 minutes)
+# (result_dict, timestamp) keyed by portal_base_url so staging vs prod don't collide.
+_nous_recommended_cache: dict[str, tuple[dict[str, Any], float]] = {}
+
+
+def fetch_nous_recommended_models(
+    portal_base_url: str = "",
+    timeout: float = 5.0,
+    *,
+    force_refresh: bool = False,
+) -> dict[str, Any]:
+    """Fetch the Nous Portal's curated recommended-models payload.
+
+    Hits ``<portal>/api/nous/recommended-models``. The endpoint is public —
+    no auth is required. Results are cached per portal URL for
+    ``_NOUS_RECOMMENDED_CACHE_TTL`` seconds; pass ``force_refresh=True`` to
+    bypass the cache.
+
+    Returns the parsed JSON dict on success, or ``{}`` on any failure
+    (network, parse, non-2xx). Callers must treat missing/null fields as
+    "no recommendation" and fall back to their own default.
+    """
+    base = (portal_base_url or "https://portal.nousresearch.com").rstrip("/")
+    now = time.monotonic()
+    cached = _nous_recommended_cache.get(base)
+    if not force_refresh and cached is not None:
+        payload, cached_at = cached
+        if now - cached_at < _NOUS_RECOMMENDED_CACHE_TTL:
+            return payload
+
+    url = f"{base}{NOUS_RECOMMENDED_MODELS_PATH}"
+    try:
+        req = urllib.request.Request(
+            url,
+            headers={"Accept": "application/json"},
+        )
+        with urllib.request.urlopen(req, timeout=timeout) as resp:
+            data = json.loads(resp.read().decode())
+        if not isinstance(data, dict):
+            data = {}
+    except Exception:
+        data = {}
+
+    _nous_recommended_cache[base] = (data, now)
+    return data
+
+
+def _resolve_nous_portal_url() -> str:
+    """Best-effort lookup of the Portal base URL the user is authed against."""
+    try:
+        from hermes_cli.auth import (
+            DEFAULT_NOUS_PORTAL_URL,
+            get_provider_auth_state,
+        )
+        state = get_provider_auth_state("nous") or {}
+        portal = str(state.get("portal_base_url") or "").strip()
+        if portal:
+            return portal.rstrip("/")
+        return str(DEFAULT_NOUS_PORTAL_URL).rstrip("/")
+    except Exception:
+        return "https://portal.nousresearch.com"
+
+
+def _extract_model_name(entry: Any) -> Optional[str]:
+    """Pull the ``modelName`` field from a recommended-model entry, else None."""
+    if not isinstance(entry, dict):
+        return None
+    model_name = entry.get("modelName")
+    if isinstance(model_name, str) and model_name.strip():
+        return model_name.strip()
+    return None
+
+
+def get_nous_recommended_aux_model(
+    *,
+    vision: bool = False,
+    free_tier: Optional[bool] = None,
+    portal_base_url: str = "",
+    force_refresh: bool = False,
+) -> Optional[str]:
+    """Return the Portal's recommended model name for an auxiliary task.
+
+    Picks the best field from the Portal's recommended-models payload:
+
+    * ``vision=True``  → ``paidRecommendedVisionModel``  (paid tier) or
+                         ``freeRecommendedVisionModel``  (free tier)
+    * ``vision=False`` → ``paidRecommendedCompactionModel`` or
+                         ``freeRecommendedCompactionModel``
+
+    When ``free_tier`` is ``None`` (default) the user's tier is auto-detected
+    via :func:`check_nous_free_tier`. Pass an explicit bool to bypass the
+    detection — useful for tests or when the caller already knows the tier.
+
+    For paid-tier users we prefer the paid recommendation but gracefully fall
+    back to the free recommendation if the Portal returned ``null`` for the
+    paid field (common during the staged rollout of new paid models).
+
+    Returns ``None`` when every candidate is missing, null, or the fetch
+    fails — callers should fall back to their own default (currently
+    ``google/gemini-3-flash-preview``).
+    """
+    base = portal_base_url or _resolve_nous_portal_url()
+    payload = fetch_nous_recommended_models(base, force_refresh=force_refresh)
+    if not payload:
+        return None
+
+    if free_tier is None:
+        try:
+            free_tier = check_nous_free_tier()
+        except Exception:
+            # On any detection error, assume paid — paid users see both fields
+            # anyway so this is a safe default that maximises model quality.
+            free_tier = False
+
+    if vision:
+        paid_key, free_key = "paidRecommendedVisionModel", "freeRecommendedVisionModel"
+    else:
+        paid_key, free_key = "paidRecommendedCompactionModel", "freeRecommendedCompactionModel"
+
+    # Preference order:
+    #   free tier  → free only
+    #   paid tier  → paid, then free (if paid field is null)
+    candidates = [free_key] if free_tier else [paid_key, free_key]
+    for key in candidates:
+        name = _extract_model_name(payload.get(key))
+        if name:
+            return name
+    return None
+
+
 # ---------------------------------------------------------------------------
 # Canonical provider list — single source of truth for provider identity.
 # Every code path that lists, displays, or iterates providers derives from
@@ -547,8 +695,11 @@ class ProviderEntry(NamedTuple):
 CANONICAL_PROVIDERS: list[ProviderEntry] = [
    ProviderEntry("nous",           "Nous Portal",              "Nous Portal (Nous Research subscription)"),
    ProviderEntry("openrouter",     "OpenRouter",               "OpenRouter (100+ models, pay-per-use)"),
+    ProviderEntry("ai-gateway",     "Vercel AI Gateway",        "Vercel AI Gateway (200+ models, $5 free credit, no markup)"),
    ProviderEntry("anthropic",      "Anthropic",                "Anthropic (Claude models — API key or Claude Code)"),
    ProviderEntry("openai-codex",   "OpenAI Codex",             "OpenAI Codex"),
+    ProviderEntry(VOLCENGINE_PROVIDER, "Volcengine",            "Volcengine (standard + Coding Plan catalogs)"),
+    ProviderEntry(BYTEPLUS_PROVIDER, "BytePlus",                "BytePlus (standard + Coding Plan catalogs)"),
    ProviderEntry("xiaomi",         "Xiaomi MiMo",              "Xiaomi MiMo (MiMo-V2 models — pro, omni, flash)"),
    ProviderEntry("nvidia",         "NVIDIA NIM",               "NVIDIA NIM (Nemotron models — build.nvidia.com or local NIM)"),
    ProviderEntry("qwen-oauth",     "Qwen OAuth (Portal)",      "Qwen OAuth (reuses local Qwen CLI login)"),
@@ -562,6 +713,7 @@ CANONICAL_PROVIDERS: list[ProviderEntry] = [
    ProviderEntry("zai",            "Z.AI / GLM",               "Z.AI / GLM (Zhipu AI direct API)"),
    ProviderEntry("kimi-coding",    "Kimi / Kimi Coding Plan",  "Kimi Coding Plan (api.kimi.com) & Moonshot API"),
    ProviderEntry("kimi-coding-cn", "Kimi / Moonshot (China)",  "Kimi / Moonshot China (Moonshot CN direct API)"),
+    ProviderEntry("stepfun",        "StepFun Step Plan",       "StepFun Step Plan (agent/coding models via Step Plan API)"),
    ProviderEntry("minimax",        "MiniMax",                  "MiniMax (global direct API)"),
    ProviderEntry("minimax-cn",     "MiniMax (China)",          "MiniMax China (domestic direct API)"),
    ProviderEntry("alibaba",        "Alibaba Cloud (DashScope)","Alibaba Cloud / DashScope Coding (Qwen + multi-provider)"),
@@ -570,7 +722,6 @@ CANONICAL_PROVIDERS: list[ProviderEntry] = [
    ProviderEntry("kilocode",       "Kilo Code",                "Kilo Code (Kilo Gateway API)"),
    ProviderEntry("opencode-zen",   "OpenCode Zen",             "OpenCode Zen (35+ curated models, pay-as-you-go)"),
    ProviderEntry("opencode-go",    "OpenCode Go",              "OpenCode Go (open models, $10/month subscription)"),
-    ProviderEntry("ai-gateway",     "Vercel AI Gateway",        "Vercel AI Gateway (200+ models, pay-per-use)"),
    ProviderEntry("bedrock",        "AWS Bedrock",              "AWS Bedrock (Claude, Nova, Llama, DeepSeek — IAM or API key)"),
 ]

@@ -578,7 +729,6 @@ CANONICAL_PROVIDERS: list[ProviderEntry] = [
 _PROVIDER_LABELS = {p.slug: p.label for p in CANONICAL_PROVIDERS}
 _PROVIDER_LABELS["custom"] = "Custom endpoint"  # special case: not a named provider

-
 _PROVIDER_ALIASES = {
    "glm": "zai",
    "z-ai": "zai",
@@ -597,6 +747,8 @@ _PROVIDER_ALIASES = {
    "moonshot": "kimi-coding",
    "kimi-cn": "kimi-coding-cn",
    "moonshot-cn": "kimi-coding-cn",
+    "step": "stepfun",
+    "stepfun-coding-plan": "stepfun",
    "arcee-ai": "arcee",
    "arceeai": "arcee",
    "minimax-china": "minimax-cn",
@@ -639,6 +791,10 @@ _PROVIDER_ALIASES = {
    "nemotron": "nvidia",
    "ollama": "custom",  # bare "ollama" = local; use "ollama-cloud" for cloud
    "ollama_cloud": "ollama-cloud",
+    "volcengine-coding-plan": VOLCENGINE_PROVIDER,
+    "volcengine_coding_plan": VOLCENGINE_PROVIDER,
+    "byteplus-coding-plan": BYTEPLUS_PROVIDER,
+    "byteplus_coding_plan": BYTEPLUS_PROVIDER,
 }


@@ -666,6 +822,31 @@ def _openrouter_model_is_free(pricing: Any) -> bool:
        return False


+def _openrouter_model_supports_tools(item: Any) -> bool:
+    """Return True when the model's ``supported_parameters`` advertise tool calling.
+
+    hermes-agent is tool-calling-first — every provider path assumes the model
+    can invoke tools. Models that don't advertise ``tools`` in their
+    ``supported_parameters`` (e.g. image-only or completion-only models) cannot
+    be driven by the agent loop and would fail at the first tool call.
+
+    **Permissive when the field is missing.** Some OpenRouter-compatible gateways
+    (Nous Portal, private mirrors, older catalog snapshots) don't populate
+    ``supported_parameters`` at all. Treat that as "unknown capability → allow"
+    so the picker doesn't silently empty for those users. Only hide models
+    whose ``supported_parameters`` is an explicit list that omits ``tools``.
+
+    Ported from Kilo-Org/kilocode#9068.
+    """
+    if not isinstance(item, dict):
+        return True
+    params = item.get("supported_parameters")
+    if not isinstance(params, list):
+        # Field absent / malformed / None — be permissive.
+        return True
+    return "tools" in params
+
+
 def fetch_openrouter_models(
    timeout: float = 8.0,
    *,
@@ -708,6 +889,11 @@ def fetch_openrouter_models(
        live_item = live_by_id.get(preferred_id)
        if live_item is None:
            continue
+        # Hide models that don't advertise tool-calling support — hermes-agent
+        # requires it and surfacing them leads to immediate runtime failures
+        # when the user selects them. Ported from Kilo-Org/kilocode#9068.
+        if not _openrouter_model_supports_tools(live_item):
+            continue
        desc = "free" if _openrouter_model_is_free(live_item.get("pricing")) else ""
        curated.append((preferred_id, desc))

@@ -725,6 +911,93 @@ def model_ids(*, force_refresh: bool = False) -> list[str]:
    return [mid for mid, _ in fetch_openrouter_models(force_refresh=force_refresh)]


+def _ai_gateway_model_is_free(pricing: Any) -> bool:
+    """Return True if an AI Gateway model has $0 input AND output pricing."""
+    if not isinstance(pricing, dict):
+        return False
+    try:
+        return float(pricing.get("input", "0")) == 0 and float(pricing.get("output", "0")) == 0
+    except (TypeError, ValueError):
+        return False
+
+
+def fetch_ai_gateway_models(
+    timeout: float = 8.0,
+    *,
+    force_refresh: bool = False,
+) -> list[tuple[str, str]]:
+    """Return the curated AI Gateway picker list, refreshed from the live catalog when possible."""
+    global _ai_gateway_catalog_cache
+
+    if _ai_gateway_catalog_cache is not None and not force_refresh:
+        return list(_ai_gateway_catalog_cache)
+
+    from hermes_constants import AI_GATEWAY_BASE_URL
+
+    fallback = list(VERCEL_AI_GATEWAY_MODELS)
+    preferred_ids = [mid for mid, _ in fallback]
+
+    try:
+        req = urllib.request.Request(
+            f"{AI_GATEWAY_BASE_URL.rstrip('/')}/models",
+            headers={"Accept": "application/json"},
+        )
+        with urllib.request.urlopen(req, timeout=timeout) as resp:
+            payload = json.loads(resp.read().decode())
+    except Exception:
+        return list(_ai_gateway_catalog_cache or fallback)
+
+    live_items = payload.get("data", [])
+    if not isinstance(live_items, list):
+        return list(_ai_gateway_catalog_cache or fallback)
+
+    live_by_id: dict[str, dict[str, Any]] = {}
+    for item in live_items:
+        if not isinstance(item, dict):
+            continue
+        mid = str(item.get("id") or "").strip()
+        if not mid:
+            continue
+        live_by_id[mid] = item
+
+    curated: list[tuple[str, str]] = []
+    for preferred_id in preferred_ids:
+        live_item = live_by_id.get(preferred_id)
+        if live_item is None:
+            continue
+        desc = "free" if _ai_gateway_model_is_free(live_item.get("pricing")) else ""
+        curated.append((preferred_id, desc))
+
+    if not curated:
+        return list(_ai_gateway_catalog_cache or fallback)
+
+    # If the live catalog offers a free Moonshot model, auto-promote it to
+    # position #1 as "recommended" — dynamic discovery without a PR.
+    free_moonshot = next(
+        (
+            mid
+            for mid, item in live_by_id.items()
+            if mid.startswith("moonshotai/")
+            and _ai_gateway_model_is_free(item.get("pricing"))
+        ),
+        None,
+    )
+    if free_moonshot:
+        curated = [(mid, desc) for mid, desc in curated if mid != free_moonshot]
+        curated.insert(0, (free_moonshot, "recommended"))
+    else:
+        first_id, _ = curated[0]
+        curated[0] = (first_id, "recommended")
+
+    _ai_gateway_catalog_cache = curated
+    return list(curated)
+
+
+def ai_gateway_model_ids(*, force_refresh: bool = False) -> list[str]:
+    """Return just the AI Gateway model-id strings."""
+    return [mid for mid, _ in fetch_ai_gateway_models(force_refresh=force_refresh)]
+
+


 # ---------------------------------------------------------------------------
@@ -869,6 +1142,56 @@ def fetch_models_with_pricing(
    return result


+def fetch_ai_gateway_pricing(
+    timeout: float = 8.0,
+    *,
+    force_refresh: bool = False,
+) -> dict[str, dict[str, str]]:
+    """Fetch Vercel AI Gateway /v1/models and return hermes-shaped pricing.
+
+    Vercel uses ``input`` / ``output`` field names; hermes's picker expects
+    ``prompt`` / ``completion``. This translates. Cache read/write field names
+    already match.
+    """
+    from hermes_constants import AI_GATEWAY_BASE_URL
+
+    cache_key = AI_GATEWAY_BASE_URL.rstrip("/")
+    if not force_refresh and cache_key in _pricing_cache:
+        return _pricing_cache[cache_key]
+
+    try:
+        req = urllib.request.Request(
+            f"{cache_key}/models",
+            headers={"Accept": "application/json"},
+        )
+        with urllib.request.urlopen(req, timeout=timeout) as resp:
+            payload = json.loads(resp.read().decode())
+    except Exception:
+        _pricing_cache[cache_key] = {}
+        return {}
+
+    result: dict[str, dict[str, str]] = {}
+    for item in payload.get("data", []):
+        if not isinstance(item, dict):
+            continue
+        mid = item.get("id")
+        pricing = item.get("pricing")
+        if not (mid and isinstance(pricing, dict)):
+            continue
+        entry: dict[str, str] = {
+            "prompt": str(pricing.get("input", "")),
+            "completion": str(pricing.get("output", "")),
+        }
+        if pricing.get("input_cache_read"):
+            entry["input_cache_read"] = str(pricing["input_cache_read"])
+        if pricing.get("input_cache_write"):
+            entry["input_cache_write"] = str(pricing["input_cache_write"])
+        result[mid] = entry
+
+    _pricing_cache[cache_key] = result
+    return result
+
+
 def _resolve_openrouter_api_key() -> str:
    """Best-effort OpenRouter API key for pricing fetch."""
    return os.getenv("OPENROUTER_API_KEY", "").strip()
@@ -887,7 +1210,7 @@ def _resolve_nous_pricing_credentials() -> tuple[str, str]:


 def get_pricing_for_provider(provider: str, *, force_refresh: bool = False) -> dict[str, dict[str, str]]:
-    """Return live pricing for providers that support it (openrouter, nous)."""
+    """Return live pricing for providers that support it (openrouter, nous, ai-gateway)."""
    normalized = normalize_provider(provider)
    if normalized == "openrouter":
        return fetch_models_with_pricing(
@@ -895,6 +1218,8 @@ def get_pricing_for_provider(provider: str, *, force_refresh: bool = False) -> d
            base_url="https://openrouter.ai/api",
            force_refresh=force_refresh,
        )
+    if normalized == "ai-gateway":
+        return fetch_ai_gateway_pricing(force_refresh=force_refresh)
    if normalized == "nous":
        api_key, base_url = _resolve_nous_pricing_credentials()
        if base_url:
@@ -930,7 +1255,6 @@ def list_available_providers() -> list[dict[str, str]]:
    """
    # Derive display order from canonical list + custom
    provider_order = [p.slug for p in CANONICAL_PROVIDERS] + ["custom"]
-
    # Build reverse alias map
    aliases_for: dict[str, list[str]] = {}
    for alias, canonical in _PROVIDER_ALIASES.items():
@@ -946,7 +1270,7 @@ def list_available_providers() -> list[dict[str, str]]:
            from hermes_cli.auth import get_auth_status, has_usable_secret
            if pid == "custom":
                custom_base_url = _get_custom_base_url() or ""
-                has_creds = bool(custom_base_url.strip())
+                has_creds = bool(custom_base_url.strip()) and provider_for_base_url(custom_base_url) is None
            elif pid == "openrouter":
                has_creds = has_usable_secret(os.getenv("OPENROUTER_API_KEY", ""))
            else:
@@ -1012,6 +1336,29 @@ def _get_custom_base_url() -> str:
    return ""


+def provider_for_base_url(base_url: str) -> Optional[str]:
+    """Return a known built-in provider for a configured base URL, if any.
+
+    Uses the canonical _URL_TO_PROVIDER mapping from model_metadata plus
+    additional entries for providers not in that dict.
+    """
+    normalized = str(base_url or "").strip().rstrip("/")
+    if not normalized or "openrouter.ai" in normalized.lower():
+        return None
+
+    url_lower = normalized.lower()
+
+    # Primary source — shared with context-length resolution
+    from agent.model_metadata import _URL_TO_PROVIDER
+
+    for host, provider_id in _URL_TO_PROVIDER.items():
+        if host in url_lower:
+            canonical = normalize_provider(provider_id)
+            if canonical in _PROVIDER_LABELS and canonical != "custom":
+                return canonical
+    return None
+
+
 def curated_models_for_provider(
    provider: Optional[str],
    *,
@@ -1099,7 +1446,6 @@ def detect_provider_for_model(
            from hermes_cli.auth import PROVIDER_REGISTRY
            pconfig = PROVIDER_REGISTRY.get(direct_match)
            if pconfig:
-                import os
                for env_var in pconfig.api_key_env_vars:
                    if os.getenv(env_var, "").strip():
                        has_creds = True
@@ -1309,6 +1655,19 @@ def provider_model_ids(provider: Optional[str], *, force_refresh: bool = False)
                    return live
        except Exception:
            pass
+    if normalized == "stepfun":
+        try:
+            from hermes_cli.auth import resolve_api_key_provider_credentials
+
+            creds = resolve_api_key_provider_credentials("stepfun")
+            api_key = str(creds.get("api_key") or "").strip()
+            base_url = str(creds.get("base_url") or "").strip()
+            if api_key and base_url:
+                live = fetch_api_models(api_key, base_url)
+                if live:
+                    return live
+        except Exception:
+            pass
    if normalized == "anthropic":
        live = _fetch_anthropic_models()
        if live:
@@ -2236,13 +2595,70 @@ def validate_requested_model(
        except Exception:
            pass  # Fall through to generic warning

+    # Static-catalog fallback: when the /models probe was unreachable,
+    # validate against the curated list from provider_model_ids() — same
+    # pattern as the openai-codex and minimax branches above.  This fixes
+    # /model switches in the gateway for providers like opencode-go and
+    # opencode-zen whose /models endpoint returns 404 against the HTML
+    # marketing site.  Without this block, validate_requested_model would
+    # reject every model on such providers, switch_model() would return
+    # success=False, and the gateway would never write to
+    # _session_model_overrides.
    provider_label = _PROVIDER_LABELS.get(normalized, normalized)
+    try:
+        catalog_models = provider_model_ids(normalized)
+    except Exception:
+        catalog_models = []
+
+    if catalog_models:
+        catalog_lower = {m.lower(): m for m in catalog_models}
+        if requested_for_lookup.lower() in catalog_lower:
+            return {
+                "accepted": True,
+                "persist": True,
+                "recognized": True,
+                "message": None,
+            }
+        catalog_lower_list = list(catalog_lower.keys())
+        auto = get_close_matches(
+            requested_for_lookup.lower(), catalog_lower_list, n=1, cutoff=0.9
+        )
+        if auto:
+            corrected = catalog_lower[auto[0]]
+            return {
+                "accepted": True,
+                "persist": True,
+                "recognized": True,
+                "corrected_model": corrected,
+                "message": f"Auto-corrected `{requested}` → `{corrected}`",
+            }
+        suggestions = get_close_matches(
+            requested_for_lookup.lower(), catalog_lower_list, n=3, cutoff=0.5
+        )
+        suggestion_text = ""
+        if suggestions:
+            suggestion_text = "\n  Similar models: " + ", ".join(
+                f"`{catalog_lower[s]}`" for s in suggestions
+            )
+        return {
+            "accepted": True,
+            "persist": True,
+            "recognized": False,
+            "message": (
+                f"Note: `{requested}` was not found in the {provider_label} curated catalog "
+                f"and the /models endpoint was unreachable.{suggestion_text}"
+                f"\n  The model may still work if it exists on the provider."
+            ),
+        }
+
+    # No catalog available — accept with a warning, matching the comment's
+    # stated intent ("Accept and persist, but warn").
    return {
-        "accepted": False,
-        "persist": False,
+        "accepted": True,
+        "persist": True,
        "recognized": False,
        "message": (
-            f"Could not reach the {provider_label} API to validate `{requested}`. "
+            f"Note: could not reach the {provider_label} API to validate `{requested}`. "
            f"If the service isn't down, this model may not be valid."
        ),
    }
@@ -10,6 +10,7 @@ from hermes_cli.auth import get_nous_auth_status
 from hermes_cli.config import get_env_value, load_config
 from tools.managed_tool_gateway import is_managed_tool_gateway_ready
 from tools.tool_backend_helpers import (
+    fal_key_is_configured,
    has_direct_modal_credentials,
    managed_nous_tools_enabled,
    normalize_browser_cloud_provider,
@@ -271,7 +272,7 @@ def get_nous_subscription_features(
    direct_firecrawl = bool(get_env_value("FIRECRAWL_API_KEY") or get_env_value("FIRECRAWL_API_URL"))
    direct_parallel = bool(get_env_value("PARALLEL_API_KEY"))
    direct_tavily = bool(get_env_value("TAVILY_API_KEY"))
-    direct_fal = bool(get_env_value("FAL_KEY"))
+    direct_fal = fal_key_is_configured()
    direct_openai_tts = bool(resolve_openai_audio_api_key())
    direct_elevenlabs = bool(get_env_value("ELEVENLABS_API_KEY"))
    direct_camofox = bool(get_env_value("CAMOFOX_URL"))
@@ -520,7 +521,7 @@ def apply_nous_managed_defaults(
        browser_cfg["cloud_provider"] = "browser-use"
        changed.add("browser")

-    if "image_gen" in selected_toolsets and not get_env_value("FAL_KEY"):
+    if "image_gen" in selected_toolsets and not fal_key_is_configured():
        changed.add("image_gen")

    return changed
@@ -548,7 +549,7 @@ def _get_gateway_direct_credentials() -> Dict[str, bool]:
            or get_env_value("TAVILY_API_KEY")
            or get_env_value("EXA_API_KEY")
        ),
-        "image_gen": bool(get_env_value("FAL_KEY")),
+        "image_gen": fal_key_is_configured(),
        "tts": bool(
            resolve_openai_audio_api_key()
            or get_env_value("ELEVENLABS_API_KEY")
@@ -586,7 +587,6 @@ def get_gateway_eligible_tools(
        return [], [], []

    if config is None:
-        from hermes_cli.config import load_config
        config = load_config() or {}

    # Quick provider check without the heavy get_nous_subscription_features call
@@ -70,6 +70,7 @@ VALID_HOOKS: Set[str] = {
    "on_session_end",
    "on_session_finalize",
    "on_session_reset",
+    "subagent_stop",
 }

 ENTRY_POINTS_GROUP = "hermes_agent.plugins"
@@ -132,6 +133,9 @@ def _get_enabled_plugins() -> Optional[set]:
 # Data classes
 # ---------------------------------------------------------------------------

+_VALID_PLUGIN_KINDS: Set[str] = {"standalone", "backend", "exclusive"}
+
+
@dataclass
 class PluginManifest:
    """Parsed representation of a plugin.yaml manifest."""
@@ -145,6 +149,23 @@ class PluginManifest:
    provides_hooks: List[str] = field(default_factory=list)
    source: str = ""        # "user", "project", or "entrypoint"
    path: Optional[str] = None
+    # Plugin kind — see plugins.py module docstring for semantics.
+    # ``standalone`` (default): hooks/tools of its own; opt-in via
+    #                           ``plugins.enabled``.
+    # ``backend``: pluggable backend for an existing core tool (e.g.
+    #              image_gen). Built-in (bundled) backends auto-load;
+    #              user-installed still gated by ``plugins.enabled``.
+    # ``exclusive``: category with exactly one active provider (memory).
+    #              Selection via ``<category>.provider`` config key; the
+    #              category's own discovery system handles loading and the
+    #              general scanner skips these.
+    kind: str = "standalone"
+    # Registry key — path-derived, used by ``plugins.enabled``/``disabled``
+    # lookups and by ``hermes plugins list``. For a flat plugin at
+    # ``plugins/disk-cleanup/`` the key is ``disk-cleanup``; for a nested
+    # category plugin at ``plugins/image_gen/openai/`` the key is
+    # ``image_gen/openai``. When empty, falls back to ``name``.
+    key: str = ""


@dataclass
@@ -365,6 +386,33 @@ class PluginContext:
            self.manifest.name, engine.name,
        )

+    # -- image gen provider registration ------------------------------------
+
+    def register_image_gen_provider(self, provider) -> None:
+        """Register an image generation backend.
+
+        ``provider`` must be an instance of
+        :class:`agent.image_gen_provider.ImageGenProvider`. The
+        ``provider.name`` attribute is what ``image_gen.provider`` in
+        ``config.yaml`` matches against when routing ``image_generate``
+        tool calls.
+        """
+        from agent.image_gen_provider import ImageGenProvider
+        from agent.image_gen_registry import register_provider
+
+        if not isinstance(provider, ImageGenProvider):
+            logger.warning(
+                "Plugin '%s' tried to register an image_gen provider that does "
+                "not inherit from ImageGenProvider. Ignoring.",
+                self.manifest.name,
+            )
+            return
+        register_provider(provider)
+        logger.info(
+            "Plugin '%s' registered image_gen provider: %s",
+            self.manifest.name, provider.name,
+        )
+
    # -- hook registration --------------------------------------------------

    def register_hook(self, hook_name: str, callback: Callable) -> None:
@@ -464,11 +512,16 @@ class PluginManager:
        manifests: List[PluginManifest] = []

        # 1. Bundled plugins (<repo>/plugins/<name>/)
-        # Repo-shipped generic plugins live next to hermes_cli/.  Memory and
-        # context_engine subdirs are handled by their own discovery paths, so
-        # skip those names here.  Bundled plugins are discovered (so they
-        # show up in `hermes plugins`) but only loaded when added to
-        # `plugins.enabled` in config.yaml — opt-in like any other plugin.
+        #
+        # Repo-shipped plugins live next to hermes_cli/. Two layouts are
+        # supported (see ``_scan_directory`` for details):
+        #
+        #   - flat: ``plugins/disk-cleanup/plugin.yaml`` (standalone)
+        #   - category: ``plugins/image_gen/openai/plugin.yaml`` (backend)
+        #
+        # ``memory/`` and ``context_engine/`` are skipped at the top level —
+        # they have their own discovery systems. Porting those to the
+        # category-namespace ``kind: exclusive`` model is a future PR.
        repo_plugins = Path(__file__).resolve().parent.parent / "plugins"
        manifests.extend(
            self._scan_directory(
@@ -491,36 +544,69 @@ class PluginManager:
        manifests.extend(self._scan_entry_points())

        # Load each manifest (skip user-disabled plugins).
-        # Later sources override earlier ones on name collision — user plugins
-        # take precedence over bundled, project plugins take precedence over
-        # user.  Dedup here so we only load the final winner.
+        # Later sources override earlier ones on key collision — user
+        # plugins take precedence over bundled, project plugins take
+        # precedence over user. Dedup here so we only load the final
+        # winner. Keys are path-derived (``image_gen/openai``,
+        # ``disk-cleanup``) so ``tts/openai`` and ``image_gen/openai``
+        # don't collide even when both manifests say ``name: openai``.
        disabled = _get_disabled_plugins()
        enabled = _get_enabled_plugins()  # None = opt-in default (nothing enabled)
        winners: Dict[str, PluginManifest] = {}
        for manifest in manifests:
-            winners[manifest.name] = manifest
+            winners[manifest.key or manifest.name] = manifest
        for manifest in winners.values():
-            # Explicit disable always wins.
-            if manifest.name in disabled:
+            lookup_key = manifest.key or manifest.name
+
+            # Explicit disable always wins (matches on key or on legacy
+            # bare name for back-compat with existing user configs).
+            if lookup_key in disabled or manifest.name in disabled:
                loaded = LoadedPlugin(manifest=manifest, enabled=False)
                loaded.error = "disabled via config"
-                self._plugins[manifest.name] = loaded
-                logger.debug("Skipping disabled plugin '%s'", manifest.name)
+                self._plugins[lookup_key] = loaded
+                logger.debug("Skipping disabled plugin '%s'", lookup_key)
                continue
-            # Opt-in gate: plugins must be in the enabled allow-list.
-            # If the allow-list is missing (None), treat as "nothing enabled"
-            # — users have to explicitly enable plugins to load them.
-            # Memory and context_engine providers are excluded from this gate
-            # since they have their own single-select config (memory.provider
-            # / context.engine), not the enabled list.
-            if enabled is None or manifest.name not in enabled:
+
+            # Exclusive plugins (memory providers) have their own
+            # discovery/activation path. The general loader records the
+            # manifest for introspection but does not load the module.
+            if manifest.kind == "exclusive":
                loaded = LoadedPlugin(manifest=manifest, enabled=False)
-                loaded.error = "not enabled in config (run `hermes plugins enable {}` to activate)".format(
-                    manifest.name
+                loaded.error = (
+                    "exclusive plugin — activate via <category>.provider config"
                )
-                self._plugins[manifest.name] = loaded
+                self._plugins[lookup_key] = loaded
                logger.debug(
-                    "Skipping '%s' (not in plugins.enabled)", manifest.name
+                    "Skipping '%s' (exclusive, handled by category discovery)",
+                    lookup_key,
+                )
+                continue
+
+            # Built-in backends auto-load — they ship with hermes and must
+            # just work. Selection among them (e.g. which image_gen backend
+            # services calls) is driven by ``<category>.provider`` config,
+            # enforced by the tool wrapper.
+            if manifest.kind == "backend" and manifest.source == "bundled":
+                self._load_plugin(manifest)
+                continue
+
+            # Everything else (standalone, user-installed backends,
+            # entry-point plugins) is opt-in via plugins.enabled.
+            # Accept both the path-derived key and the legacy bare name
+            # so existing configs keep working.
+            is_enabled = (
+                enabled is not None
+                and (lookup_key in enabled or manifest.name in enabled)
+            )
+            if not is_enabled:
+                loaded = LoadedPlugin(manifest=manifest, enabled=False)
+                loaded.error = (
+                    "not enabled in config (run `hermes plugins enable {}` to activate)"
+                    .format(lookup_key)
+                )
+                self._plugins[lookup_key] = loaded
+                logger.debug(
+                    "Skipping '%s' (not in plugins.enabled)", lookup_key
                )
                continue
            self._load_plugin(manifest)
@@ -544,9 +630,37 @@ class PluginManager:
    ) -> List[PluginManifest]:
        """Read ``plugin.yaml`` manifests from subdirectories of *path*.

-        *skip_names* is an optional allow-list of names to ignore (used
-        for the bundled scan to exclude ``memory`` / ``context_engine``
-        subdirs that have their own discovery path).
+        Supports two layouts, mixed freely:
+
+        * **Flat** — ``<root>/<plugin-name>/plugin.yaml``. Key is
+          ``<plugin-name>`` (e.g. ``disk-cleanup``).
+        * **Category** — ``<root>/<category>/<plugin-name>/plugin.yaml``,
+          where the ``<category>`` directory itself has no ``plugin.yaml``.
+          Key is ``<category>/<plugin-name>`` (e.g. ``image_gen/openai``).
+          Depth is capped at two segments.
+
+        *skip_names* is an optional allow-list of names to ignore at the
+        top level (kept for back-compat; the current call sites no longer
+        pass it now that categories are first-class).
+        """
+        return self._scan_directory_level(
+            path, source, skip_names=skip_names, prefix="", depth=0
+        )
+
+    def _scan_directory_level(
+        self,
+        path: Path,
+        source: str,
+        *,
+        skip_names: Optional[Set[str]],
+        prefix: str,
+        depth: int,
+    ) -> List[PluginManifest]:
+        """Recursive implementation of :meth:`_scan_directory`.
+
+        ``prefix`` is the category path already accumulated ("" at root,
+        "image_gen" one level in). ``depth`` is the recursion depth; we
+        cap at 2 so ``<root>/a/b/c/`` is ignored.
        """
        manifests: List[PluginManifest] = []
        if not path.is_dir():
@@ -555,37 +669,112 @@ class PluginManager:
        for child in sorted(path.iterdir()):
            if not child.is_dir():
                continue
-            if skip_names and child.name in skip_names:
+            if depth == 0 and skip_names and child.name in skip_names:
                continue
            manifest_file = child / "plugin.yaml"
            if not manifest_file.exists():
                manifest_file = child / "plugin.yml"
-            if not manifest_file.exists():
-                logger.debug("Skipping %s (no plugin.yaml)", child)
+
+            if manifest_file.exists():
+                manifest = self._parse_manifest(
+                    manifest_file, child, source, prefix
+                )
+                if manifest is not None:
+                    manifests.append(manifest)
                continue

-            try:
-                if yaml is None:
-                    logger.warning("PyYAML not installed – cannot load %s", manifest_file)
-                    continue
-                data = yaml.safe_load(manifest_file.read_text()) or {}
-                manifest = PluginManifest(
-                    name=data.get("name", child.name),
-                    version=str(data.get("version", "")),
-                    description=data.get("description", ""),
-                    author=data.get("author", ""),
-                    requires_env=data.get("requires_env", []),
-                    provides_tools=data.get("provides_tools", []),
-                    provides_hooks=data.get("provides_hooks", []),
-                    source=source,
-                    path=str(child),
+            # No manifest at this level. If we're still within the depth
+            # cap, treat this directory as a category namespace and recurse
+            # one level in looking for children with manifests.
+            if depth >= 1:
+                logger.debug("Skipping %s (no plugin.yaml, depth cap reached)", child)
+                continue
+
+            sub_prefix = f"{prefix}/{child.name}" if prefix else child.name
+            manifests.extend(
+                self._scan_directory_level(
+                    child,
+                    source,
+                    skip_names=None,
+                    prefix=sub_prefix,
+                    depth=depth + 1,
                )
-                manifests.append(manifest)
-            except Exception as exc:
-                logger.warning("Failed to parse %s: %s", manifest_file, exc)
+            )

        return manifests

+    def _parse_manifest(
+        self,
+        manifest_file: Path,
+        plugin_dir: Path,
+        source: str,
+        prefix: str,
+    ) -> Optional[PluginManifest]:
+        """Parse a single ``plugin.yaml`` into a :class:`PluginManifest`.
+
+        Returns ``None`` on parse failure (logs a warning).
+        """
+        try:
+            if yaml is None:
+                logger.warning("PyYAML not installed – cannot load %s", manifest_file)
+                return None
+            data = yaml.safe_load(manifest_file.read_text()) or {}
+
+            name = data.get("name", plugin_dir.name)
+            key = f"{prefix}/{plugin_dir.name}" if prefix else name
+
+            raw_kind = data.get("kind", "standalone")
+            if not isinstance(raw_kind, str):
+                raw_kind = "standalone"
+            kind = raw_kind.strip().lower()
+            if kind not in _VALID_PLUGIN_KINDS:
+                logger.warning(
+                    "Plugin %s: unknown kind '%s' (valid: %s); treating as 'standalone'",
+                    key, raw_kind, ", ".join(sorted(_VALID_PLUGIN_KINDS)),
+                )
+                kind = "standalone"
+
+            # Auto-coerce user-installed memory providers to kind="exclusive"
+            # so they're routed to plugins/memory discovery instead of being
+            # loaded by the general PluginManager (which has no
+            # register_memory_provider on PluginContext). Mirrors the
+            # heuristic in plugins/memory/__init__.py:_is_memory_provider_dir.
+            # Bundled memory providers are already skipped via skip_names.
+            if kind == "standalone" and "kind" not in data:
+                init_file = plugin_dir / "__init__.py"
+                if init_file.exists():
+                    try:
+                        source_text = init_file.read_text(errors="replace")[:8192]
+                        if (
+                            "register_memory_provider" in source_text
+                            or "MemoryProvider" in source_text
+                        ):
+                            kind = "exclusive"
+                            logger.debug(
+                                "Plugin %s: detected memory provider, "
+                                "treating as kind='exclusive'",
+                                key,
+                            )
+                    except Exception:
+                        pass
+
+            return PluginManifest(
+                name=name,
+                version=str(data.get("version", "")),
+                description=data.get("description", ""),
+                author=data.get("author", ""),
+                requires_env=data.get("requires_env", []),
+                provides_tools=data.get("provides_tools", []),
+                provides_hooks=data.get("provides_hooks", []),
+                source=source,
+                path=str(plugin_dir),
+                kind=kind,
+                key=key,
+            )
+        except Exception as exc:
+            logger.warning("Failed to parse %s: %s", manifest_file, exc)
+            return None
+
    # -----------------------------------------------------------------------
    # Entry-point scanning
    # -----------------------------------------------------------------------
@@ -608,6 +797,7 @@ class PluginManager:
                    name=ep.name,
                    source="entrypoint",
                    path=ep.value,
+                    key=ep.name,
                )
                manifests.append(manifest)
        except Exception as exc:
@@ -669,10 +859,16 @@ class PluginManager:
            loaded.error = str(exc)
            logger.warning("Failed to load plugin '%s': %s", manifest.name, exc)

-        self._plugins[manifest.name] = loaded
+        self._plugins[manifest.key or manifest.name] = loaded

    def _load_directory_module(self, manifest: PluginManifest) -> types.ModuleType:
-        """Import a directory-based plugin as ``hermes_plugins.<name>``."""
+        """Import a directory-based plugin as ``hermes_plugins.<slug>``.
+
+        The module slug is derived from ``manifest.key`` so category-namespaced
+        plugins (``image_gen/openai``) import as
+        ``hermes_plugins.image_gen__openai`` without colliding with any
+        future ``tts/openai``.
+        """
        plugin_dir = Path(manifest.path)  # type: ignore[arg-type]
        init_file = plugin_dir / "__init__.py"
        if not init_file.exists():
@@ -685,7 +881,9 @@ class PluginManager:
            ns_pkg.__package__ = _NS_PARENT
            sys.modules[_NS_PARENT] = ns_pkg

-        module_name = f"{_NS_PARENT}.{manifest.name.replace('-', '_')}"
+        key = manifest.key or manifest.name
+        slug = key.replace("/", "__").replace("-", "_")
+        module_name = f"{_NS_PARENT}.{slug}"
        spec = importlib.util.spec_from_file_location(
            module_name,
            init_file,
@@ -766,10 +964,12 @@ class PluginManager:
    def list_plugins(self) -> List[Dict[str, Any]]:
        """Return a list of info dicts for all discovered plugins."""
        result: List[Dict[str, Any]] = []
-        for name, loaded in sorted(self._plugins.items()):
+        for key, loaded in sorted(self._plugins.items()):
            result.append(
                {
-                    "name": name,
+                    "name": loaded.manifest.name,
+                    "key": loaded.manifest.key or loaded.manifest.name,
+                    "kind": loaded.manifest.kind,
                    "version": loaded.manifest.version,
                    "description": loaded.manifest.description,
                    "source": loaded.manifest.source,
@@ -23,6 +23,14 @@ import logging
 from dataclasses import dataclass
 from typing import Any, Dict, List, Optional, Tuple

+from hermes_cli.volcengine_byteplus import (
+    BYTEPLUS_PROVIDER,
+    BYTEPLUS_STANDARD_BASE_URL,
+    VOLCENGINE_PROVIDER,
+    VOLCENGINE_STANDARD_BASE_URL,
+)
+from utils import base_url_host_matches, base_url_hostname
+
 logger = logging.getLogger(__name__)


@@ -92,6 +100,12 @@ HERMES_OVERLAYS: Dict[str, HermesOverlay] = {
        transport="openai_chat",
        base_url_env_var="KIMI_BASE_URL",
    ),
+    "stepfun": HermesOverlay(
+        transport="openai_chat",
+        extra_env_vars=("STEPFUN_API_KEY",),
+        base_url_override="https://api.stepfun.ai/step_plan/v1",
+        base_url_env_var="STEPFUN_BASE_URL",
+    ),
    "minimax": HermesOverlay(
        transport="anthropic_messages",
        base_url_env_var="MINIMAX_BASE_URL",
@@ -155,6 +169,16 @@ HERMES_OVERLAYS: Dict[str, HermesOverlay] = {
        transport="openai_chat",
        base_url_env_var="OLLAMA_BASE_URL",
    ),
+    VOLCENGINE_PROVIDER: HermesOverlay(
+        transport="openai_chat",
+        extra_env_vars=("VOLCENGINE_API_KEY",),
+        base_url_override=VOLCENGINE_STANDARD_BASE_URL,
+    ),
+    BYTEPLUS_PROVIDER: HermesOverlay(
+        transport="openai_chat",
+        extra_env_vars=("BYTEPLUS_API_KEY",),
+        base_url_override=BYTEPLUS_STANDARD_BASE_URL,
+    ),
 }


@@ -208,6 +232,10 @@ ALIASES: Dict[str, str] = {
    "kimi-coding-cn": "kimi-for-coding",
    "moonshot": "kimi-for-coding",

+    # stepfun
+    "step": "stepfun",
+    "stepfun-coding-plan": "stepfun",
+
    # minimax-cn
    "minimax-china": "minimax-cn",
    "minimax_cn": "minimax-cn",
@@ -261,6 +289,10 @@ ALIASES: Dict[str, str] = {
    # xiaomi
    "mimo": "xiaomi",
    "xiaomi-mimo": "xiaomi",
+    "volcengine-coding-plan": VOLCENGINE_PROVIDER,
+    "volcengine_coding_plan": VOLCENGINE_PROVIDER,
+    "byteplus-coding-plan": BYTEPLUS_PROVIDER,
+    "byteplus_coding_plan": BYTEPLUS_PROVIDER,

    # bedrock
    "aws": "bedrock",
@@ -292,7 +324,10 @@ _LABEL_OVERRIDES: Dict[str, str] = {
    "nous": "Nous Portal",
    "openai-codex": "OpenAI Codex",
    "copilot-acp": "GitHub Copilot ACP",
+    "stepfun": "StepFun Step Plan",
    "xiaomi": "Xiaomi MiMo",
+    VOLCENGINE_PROVIDER: "Volcengine",
+    BYTEPLUS_PROVIDER: "BytePlus",
    "local": "Local endpoint",
    "bedrock": "AWS Bedrock",
    "ollama-cloud": "Ollama Cloud",
@@ -425,6 +460,16 @@ def determine_api_mode(provider: str, base_url: str = "") -> str:
    """
    pdef = get_provider(provider)
    if pdef is not None:
+        # Even for known providers, check URL heuristics for special endpoints
+        # (e.g. kimi /coding endpoint needs anthropic_messages even on 'custom')
+        if base_url:
+            url_lower = base_url.rstrip("/").lower()
+            if "api.kimi.com/coding" in url_lower:
+                return "anthropic_messages"
+            if url_lower.endswith("/anthropic") or "api.anthropic.com" in url_lower:
+                return "anthropic_messages"
+            if "api.openai.com" in url_lower:
+                return "codex_responses"
        return TRANSPORT_TO_API_MODE.get(pdef.transport, "chat_completions")

    # Direct provider checks for providers not in HERMES_OVERLAYS
@@ -434,11 +479,14 @@ def determine_api_mode(provider: str, base_url: str = "") -> str:
    # URL-based heuristics for custom / unknown providers
    if base_url:
        url_lower = base_url.rstrip("/").lower()
-        if url_lower.endswith("/anthropic") or "api.anthropic.com" in url_lower:
+        hostname = base_url_hostname(base_url)
+        if url_lower.endswith("/anthropic") or hostname == "api.anthropic.com":
            return "anthropic_messages"
-        if "api.openai.com" in url_lower:
+        if hostname == "api.kimi.com" and "/coding" in url_lower:
+            return "anthropic_messages"
+        if hostname == "api.openai.com":
            return "codex_responses"
-        if "bedrock-runtime" in url_lower and "amazonaws.com" in url_lower:
+        if hostname.startswith("bedrock-runtime.") and base_url_host_matches(base_url, "amazonaws.com"):
            return "bedrock_converse"

    return "chat_completions"
@@ -29,6 +29,7 @@ from hermes_cli.auth import (
 )
 from hermes_cli.config import get_compatible_custom_providers, load_config
 from hermes_constants import OPENROUTER_BASE_URL
+from utils import base_url_host_matches, base_url_hostname


 def _normalize_custom_provider_name(value: str) -> str:
@@ -45,14 +46,20 @@ def _detect_api_mode_for_url(base_url: str) -> Optional[str]:
      protocol under a ``/anthropic`` suffix — treat those as
      ``anthropic_messages`` transport instead of the default
      ``chat_completions``.
+    - Kimi Code's ``api.kimi.com/coding`` endpoint also speaks the
+      Anthropic Messages protocol (the /coding route accepts Claude
+      Code's native request shape).
    """
    normalized = (base_url or "").strip().lower().rstrip("/")
-    if "api.x.ai" in normalized:
+    hostname = base_url_hostname(base_url)
+    if hostname == "api.x.ai":
        return "codex_responses"
-    if "api.openai.com" in normalized and "openrouter" not in normalized:
+    if hostname == "api.openai.com":
        return "codex_responses"
    if normalized.endswith("/anthropic"):
        return "anthropic_messages"
+    if hostname == "api.kimi.com" and "/coding" in normalized:
+        return "anthropic_messages"
    return None


@@ -203,7 +210,8 @@ def _resolve_runtime_from_pool_entry(
            api_mode = opencode_model_api_mode(provider, model_cfg.get("default", ""))
        else:
            # Auto-detect Anthropic-compatible endpoints (/anthropic suffix,
-            # api.openai.com → codex_responses, api.x.ai → codex_responses).
+            # Kimi /coding, api.openai.com → codex_responses, api.x.ai →
+            # codex_responses).
            detected = _detect_api_mode_for_url(base_url)
            if detected:
                api_mode = detected
@@ -480,7 +488,7 @@ def _resolve_openrouter_runtime(
    # When hitting a custom endpoint (e.g. Z.ai, local LLM), prefer
    # OPENAI_API_KEY so the OpenRouter key doesn't leak to an unrelated
    # provider (issues #420, #560).
-    _is_openrouter_url = "openrouter.ai" in base_url
+    _is_openrouter_url = base_url_host_matches(base_url, "openrouter.ai")
    if _is_openrouter_url:
        api_key_candidates = [
            explicit_api_key,
@@ -490,8 +498,12 @@ def _resolve_openrouter_runtime(
    else:
        # Custom endpoint: use api_key from config when using config base_url (#1760).
        # When the endpoint is Ollama Cloud, check OLLAMA_API_KEY — it's
-        # the canonical env var for ollama.com authentication.
-        _is_ollama_url = "ollama.com" in base_url.lower()
+        # the canonical env var for ollama.com authentication. Match on
+        # HOST, not substring — a custom base_url whose path contains
+        # "ollama.com" (e.g. http://127.0.0.1/ollama.com/v1) or whose
+        # hostname is a look-alike (ollama.com.attacker.test) must not
+        # receive the Ollama credential. See GHSA-76xc-57q6-vm5m.
+        _is_ollama_url = base_url_host_matches(base_url, "ollama.com")
        api_key_candidates = [
            explicit_api_key,
            (cfg_api_key if use_config_base_url else ""),
@@ -631,7 +643,7 @@ def _resolve_explicit_runtime(

        base_url = explicit_base_url
        if not base_url:
-            if provider in ("kimi-coding", "kimi-coding-cn"):
+            if provider in ("kimi-coding", "kimi-coding-cn", "volcengine", "byteplus"):
                creds = resolve_api_key_provider_credentials(provider)
                base_url = creds.get("base_url", "").rstrip("/")
            else:
@@ -654,7 +666,8 @@ def _resolve_explicit_runtime(
            if configured_mode:
                api_mode = configured_mode
            else:
-                # Auto-detect Anthropic-compatible endpoints (/anthropic suffix).
+                # Auto-detect from URL (Anthropic /anthropic suffix,
+                # api.openai.com → Responses, Kimi /coding, etc.).
                detected = _detect_api_mode_for_url(base_url)
                if detected:
                    api_mode = detected
@@ -904,8 +917,7 @@ def resolve_runtime_provider(
                code="no_aws_credentials",
            )
        # Read bedrock-specific config from config.yaml
-        from hermes_cli.config import load_config as _load_bedrock_config
-        _bedrock_cfg = _load_bedrock_config().get("bedrock", {})
+        _bedrock_cfg = load_config().get("bedrock", {})
        # Region priority: config.yaml bedrock.region → env var → us-east-1
        region = (_bedrock_cfg.get("region") or "").strip() or resolve_bedrock_region()
        auth_source = resolve_aws_auth_env_var() or "aws-sdk-default-chain"
@@ -22,6 +22,7 @@ from typing import Optional, Dict, Any

 from hermes_cli.nous_subscription import get_nous_subscription_features
 from tools.tool_backend_helpers import managed_nous_tools_enabled
+from utils import base_url_hostname
 from hermes_constants import get_optional_skills_dir

 logger = logging.getLogger(__name__)
@@ -93,15 +94,16 @@ _DEFAULT_PROVIDER_MODELS = {
        "gemini-3-flash-preview", "gemini-3.1-flash-lite-preview",
    ],
    "zai": ["glm-5.1", "glm-5", "glm-4.7", "glm-4.5", "glm-4.5-flash"],
-    "kimi-coding": ["kimi-k2.5", "kimi-k2-thinking", "kimi-k2-turbo-preview"],
-    "kimi-coding-cn": ["kimi-k2.5", "kimi-k2-thinking", "kimi-k2-turbo-preview"],
+    "kimi-coding": ["kimi-k2.6", "kimi-k2.5", "kimi-k2-thinking", "kimi-k2-turbo-preview"],
+    "kimi-coding-cn": ["kimi-k2.6", "kimi-k2.5", "kimi-k2-thinking", "kimi-k2-turbo-preview"],
+    "stepfun": ["step-3.5-flash", "step-3.5-flash-2603"],
    "arcee": ["trinity-large-thinking", "trinity-large-preview", "trinity-mini"],
    "minimax": ["MiniMax-M2.7", "MiniMax-M2.5", "MiniMax-M2.1", "MiniMax-M2"],
    "minimax-cn": ["MiniMax-M2.7", "MiniMax-M2.5", "MiniMax-M2.1", "MiniMax-M2"],
    "ai-gateway": ["anthropic/claude-opus-4.6", "anthropic/claude-sonnet-4.6", "openai/gpt-5", "google/gemini-3-flash"],
    "kilocode": ["anthropic/claude-opus-4.6", "anthropic/claude-sonnet-4.6", "openai/gpt-5.4", "google/gemini-3-pro-preview", "google/gemini-3-flash-preview"],
    "opencode-zen": ["gpt-5.4", "gpt-5.3-codex", "claude-sonnet-4-6", "gemini-3-flash", "glm-5", "kimi-k2.5", "minimax-m2.7"],
-    "opencode-go": ["glm-5.1", "glm-5", "kimi-k2.5", "mimo-v2-pro", "mimo-v2-omni", "minimax-m2.5", "minimax-m2.7"],
+    "opencode-go": ["kimi-k2.6", "kimi-k2.5", "glm-5.1", "glm-5", "mimo-v2-pro", "mimo-v2-omni", "minimax-m2.5", "minimax-m2.7", "qwen3.6-plus", "qwen3.5-plus"],
    "huggingface": [
        "Qwen/Qwen3.5-397B-A17B", "Qwen/Qwen3-235B-A22B-Thinking-2507",
        "Qwen/Qwen3-Coder-480B-A35B-Instruct", "deepseek-ai/DeepSeek-R1-0528",
@@ -407,13 +409,36 @@ def _print_setup_summary(config: dict, hermes_home):
            ("Browser Automation", False, missing_browser_hint)
        )

-    # FAL (image generation)
+    # Image generation — FAL (direct or via Nous), or any plugin-registered
+    # provider (OpenAI, etc.)
    if subscription_features.image_gen.managed_by_nous:
        tool_status.append(("Image Generation (Nous subscription)", True, None))
    elif subscription_features.image_gen.available:
        tool_status.append(("Image Generation", True, None))
    else:
-        tool_status.append(("Image Generation", False, "FAL_KEY"))
+        # Fall back to probing plugin-registered providers so OpenAI-only
+        # setups don't show as "missing FAL_KEY".
+        _img_backend = None
+        try:
+            from agent.image_gen_registry import list_providers
+            from hermes_cli.plugins import _ensure_plugins_discovered
+
+            _ensure_plugins_discovered()
+            for _p in list_providers():
+                if _p.name == "fal":
+                    continue
+                try:
+                    if _p.is_available():
+                        _img_backend = _p.display_name
+                        break
+                except Exception:
+                    continue
+        except Exception:
+            pass
+        if _img_backend:
+            tool_status.append((f"Image Generation ({_img_backend})", True, None))
+        else:
+            tool_status.append(("Image Generation", False, "FAL_KEY or OPENAI_API_KEY"))

    # TTS — show configured provider
    tts_provider = config.get("tts", {}).get("provider", "edge")
@@ -433,7 +458,6 @@ def _print_setup_summary(config: dict, hermes_home):
        tool_status.append(("Text-to-Speech (Google Gemini)", True, None))
    elif tts_provider == "neutts":
        try:
-            import importlib.util
            neutts_ok = importlib.util.find_spec("neutts") is not None
        except Exception:
            neutts_ok = False
@@ -441,6 +465,16 @@ def _print_setup_summary(config: dict, hermes_home):
            tool_status.append(("Text-to-Speech (NeuTTS local)", True, None))
        else:
            tool_status.append(("Text-to-Speech (NeuTTS — not installed)", False, "run 'hermes setup tts'"))
+    elif tts_provider == "kittentts":
+        try:
+            import importlib.util
+            kittentts_ok = importlib.util.find_spec("kittentts") is not None
+        except Exception:
+            kittentts_ok = False
+        if kittentts_ok:
+            tool_status.append(("Text-to-Speech (KittenTTS local)", True, None))
+        else:
+            tool_status.append(("Text-to-Speech (KittenTTS — not installed)", False, "run 'hermes setup tts'"))
    else:
        tool_status.append(("Text-to-Speech (Edge TTS)", True, None))

@@ -771,6 +805,7 @@ def setup_model_provider(config: dict, *, quick: bool = False):
            "zai": "Z.AI / GLM",
            "kimi-coding": "Kimi / Moonshot",
            "kimi-coding-cn": "Kimi / Moonshot (China)",
+            "stepfun": "StepFun Step Plan",
            "minimax": "MiniMax",
            "minimax-cn": "MiniMax CN",
            "anthropic": "Anthropic",
@@ -803,7 +838,8 @@ def setup_model_provider(config: dict, *, quick: bool = False):
        elif _vision_idx == 1:  # OpenAI-compatible endpoint
            _base_url = prompt("  Base URL (blank for OpenAI)").strip() or "https://api.openai.com/v1"
            _api_key_label = "  API key"
-            if "api.openai.com" in _base_url.lower():
+            _is_native_openai = base_url_hostname(_base_url) == "api.openai.com"
+            if _is_native_openai:
                _api_key_label = "  OpenAI API key"
            _oai_key = prompt(_api_key_label, password=True).strip()
            if _oai_key:
@@ -811,7 +847,7 @@ def setup_model_provider(config: dict, *, quick: bool = False):
                # Save vision base URL to config (not .env — only secrets go there)
                _vaux = config.setdefault("auxiliary", {}).setdefault("vision", {})
                _vaux["base_url"] = _base_url
-                if "api.openai.com" in _base_url.lower():
+                if _is_native_openai:
                    _oai_vision_models = ["gpt-4o", "gpt-4o-mini", "gpt-4.1", "gpt-4.1-mini", "gpt-4.1-nano"]
                    _vm_choices = _oai_vision_models + ["Use default (gpt-4o-mini)"]
                    _vm_idx = prompt_choice("Select vision model:", _vm_choices, 0)
@@ -847,7 +883,6 @@ def setup_model_provider(config: dict, *, quick: bool = False):

 def _check_espeak_ng() -> bool:
    """Check if espeak-ng is installed."""
-    import shutil
    return shutil.which("espeak-ng") is not None or shutil.which("espeak") is not None


@@ -901,6 +936,31 @@ def _install_neutts_deps() -> bool:
        return False


+def _install_kittentts_deps() -> bool:
+    """Install KittenTTS dependencies with user approval. Returns True on success."""
+    import subprocess
+    import sys
+
+    wheel_url = (
+        "https://github.com/KittenML/KittenTTS/releases/download/"
+        "0.8.1/kittentts-0.8.1-py3-none-any.whl"
+    )
+    print()
+    print_info("Installing kittentts Python package (~25-80MB model downloaded on first use)...")
+    print()
+    try:
+        subprocess.run(
+            [sys.executable, "-m", "pip", "install", "-U", wheel_url, "soundfile", "--quiet"],
+            check=True, timeout=300,
+        )
+        print_success("kittentts installed successfully")
+        return True
+    except (subprocess.CalledProcessError, subprocess.TimeoutExpired) as e:
+        print_error(f"Failed to install kittentts: {e}")
+        print_info(f"Try manually: python -m pip install -U '{wheel_url}' soundfile")
+        return False
+
+
 def _setup_tts_provider(config: dict):
    """Interactive TTS provider selection with install flow for NeuTTS."""
    tts_config = config.get("tts", {})
@@ -916,6 +976,7 @@ def _setup_tts_provider(config: dict):
        "mistral": "Mistral Voxtral TTS",
        "gemini": "Google Gemini TTS",
        "neutts": "NeuTTS",
+        "kittentts": "KittenTTS",
    }
    current_label = provider_labels.get(current_provider, current_provider)

@@ -939,9 +1000,10 @@ def _setup_tts_provider(config: dict):
            "Mistral Voxtral TTS (multilingual, native Opus, needs API key)",
            "Google Gemini TTS (30 prebuilt voices, prompt-controllable, needs API key)",
            "NeuTTS (local on-device, free, ~300MB model download)",
+            "KittenTTS (local on-device, free, lightweight ~25-80MB ONNX)",
        ]
    )
-    providers.extend(["edge", "elevenlabs", "openai", "xai", "minimax", "mistral", "gemini", "neutts"])
+    providers.extend(["edge", "elevenlabs", "openai", "xai", "minimax", "mistral", "gemini", "neutts", "kittentts"])
    choices.append(f"Keep current ({current_label})")
    keep_current_idx = len(choices) - 1
    idx = prompt_choice("Select TTS provider:", choices, keep_current_idx)
@@ -962,7 +1024,6 @@ def _setup_tts_provider(config: dict):
    if selected == "neutts":
        # Check if already installed
        try:
-            import importlib.util
            already_installed = importlib.util.find_spec("neutts") is not None
        except Exception:
            already_installed = False
@@ -1061,6 +1122,29 @@ def _setup_tts_provider(config: dict):
                print_warning("No API key provided. Falling back to Edge TTS.")
                selected = "edge"

+    elif selected == "kittentts":
+        # Check if already installed
+        try:
+            import importlib.util
+            already_installed = importlib.util.find_spec("kittentts") is not None
+        except Exception:
+            already_installed = False
+
+        if already_installed:
+            print_success("KittenTTS is already installed")
+        else:
+            print()
+            print_info("KittenTTS is lightweight (~25-80MB, CPU-only, no API key required).")
+            print_info("Voices: Jasper, Bella, Luna, Bruno, Rosie, Hugo, Kiki, Leo")
+            print()
+            if prompt_yes_no("Install KittenTTS now?", True):
+                if not _install_kittentts_deps():
+                    print_warning("KittenTTS installation incomplete. Falling back to Edge TTS.")
+                    selected = "edge"
+            else:
+                print_info("Skipping install. Set tts.provider to 'kittentts' after installing manually.")
+                selected = "edge"
+
    # Save the selection
    if "tts" not in config:
        config["tts"] = {}
@@ -1082,8 +1166,6 @@ def setup_tts(config: dict):
 def setup_terminal_backend(config: dict):
    """Configure the terminal execution backend."""
    import platform as _platform
-    import shutil
-
    print_header("Terminal Backend")
    print_info("Choose where Hermes runs shell commands and code.")
    print_info("This affects tool execution, file access, and isolation.")
@@ -2358,6 +2440,74 @@ def setup_tools(config: dict, first_install: bool = False):
 # =============================================================================


+def _model_section_has_credentials(config: dict) -> bool:
+    """Return True when any known inference provider has usable credentials.
+
+    Sources of truth:
+      * ``PROVIDER_REGISTRY`` in ``hermes_cli.auth`` — lists every supported
+        provider along with its ``api_key_env_vars``.
+      * ``active_provider`` in the auth store — covers OAuth device-code /
+        external-OAuth providers (Nous, Codex, Qwen, Gemini CLI, ...).
+      * The legacy OpenRouter aggregator env vars, which route generic
+        ``OPENAI_API_KEY`` / ``OPENROUTER_API_KEY`` values through OpenRouter.
+    """
+    try:
+        from hermes_cli.auth import get_active_provider
+        if get_active_provider():
+            return True
+    except Exception:
+        pass
+
+    try:
+        from hermes_cli.auth import PROVIDER_REGISTRY
+    except Exception:
+        PROVIDER_REGISTRY = {}  # type: ignore[assignment]
+
+    def _has_key(pconfig) -> bool:
+        for env_var in pconfig.api_key_env_vars:
+            # CLAUDE_CODE_OAUTH_TOKEN is set by Claude Code itself, not by
+            # the user — mirrors is_provider_explicitly_configured in auth.py.
+            if env_var == "CLAUDE_CODE_OAUTH_TOKEN":
+                continue
+            if get_env_value(env_var):
+                return True
+        return False
+
+    # Prefer the provider declared in config.yaml, avoids false positives
+    # from stray env vars (GH_TOKEN, etc.) when the user has already picked
+    # a different provider.
+    model_cfg = config.get("model") if isinstance(config, dict) else None
+    if isinstance(model_cfg, dict):
+        provider_id = (model_cfg.get("provider") or "").strip().lower()
+        if provider_id in PROVIDER_REGISTRY:
+            if _has_key(PROVIDER_REGISTRY[provider_id]):
+                return True
+        if provider_id == "openrouter":
+            for env_var in ("OPENROUTER_API_KEY", "OPENAI_API_KEY"):
+                if get_env_value(env_var):
+                    return True
+
+    # OpenRouter aggregator fallback (no provider declared in config).
+    for env_var in ("OPENROUTER_API_KEY", "OPENAI_API_KEY"):
+        if get_env_value(env_var):
+            return True
+
+    for pid, pconfig in PROVIDER_REGISTRY.items():
+        # Skip copilot in auto-detect: GH_TOKEN / GITHUB_TOKEN are
+        # commonly set for git tooling.  Mirrors resolve_provider in auth.py.
+        if pid == "copilot":
+            continue
+        if _has_key(pconfig):
+            return True
+    return False
+
+
+def _gateway_platform_short_label(label: str) -> str:
+    """Strip trailing parenthetical qualifiers from a gateway platform label."""
+    base = label.split("(", 1)[0].strip()
+    return base or label
+
+
 def _get_section_config_summary(config: dict, section_key: str) -> Optional[str]:
    """Return a short summary if a setup section is already configured, else None.

@@ -2366,20 +2516,7 @@ def _get_section_config_summary(config: dict, section_key: str) -> Optional[str]
    so that test patches on ``setup_mod.get_env_value`` take effect.
    """
    if section_key == "model":
-        has_key = bool(
-            get_env_value("OPENROUTER_API_KEY")
-            or get_env_value("OPENAI_API_KEY")
-            or get_env_value("ANTHROPIC_API_KEY")
-        )
-        if not has_key:
-            # Check for OAuth providers
-            try:
-                from hermes_cli.auth import get_active_provider
-                if get_active_provider():
-                    has_key = True
-            except Exception:
-                pass
-        if not has_key:
+        if not _model_section_has_credentials(config):
            return None
        model = config.get("model")
        if isinstance(model, str) and model.strip():
@@ -2397,37 +2534,11 @@ def _get_section_config_summary(config: dict, section_key: str) -> Optional[str]
        return f"max turns: {max_turns}"

    elif section_key == "gateway":
-        platforms = []
-        if get_env_value("TELEGRAM_BOT_TOKEN"):
-            platforms.append("Telegram")
-        if get_env_value("DISCORD_BOT_TOKEN"):
-            platforms.append("Discord")
-        if get_env_value("SLACK_BOT_TOKEN"):
-            platforms.append("Slack")
-        if get_env_value("SIGNAL_ACCOUNT"):
-            platforms.append("Signal")
-        if get_env_value("EMAIL_ADDRESS"):
-            platforms.append("Email")
-        if get_env_value("TWILIO_ACCOUNT_SID"):
-            platforms.append("SMS")
-        if get_env_value("MATRIX_ACCESS_TOKEN") or get_env_value("MATRIX_PASSWORD"):
-            platforms.append("Matrix")
-        if get_env_value("MATTERMOST_TOKEN"):
-            platforms.append("Mattermost")
-        if get_env_value("WHATSAPP_PHONE_NUMBER_ID"):
-            platforms.append("WhatsApp")
-        if get_env_value("DINGTALK_CLIENT_ID"):
-            platforms.append("DingTalk")
-        if get_env_value("FEISHU_APP_ID"):
-            platforms.append("Feishu")
-        if get_env_value("WECOM_BOT_ID"):
-            platforms.append("WeCom")
-        if get_env_value("WEIXIN_ACCOUNT_ID"):
-            platforms.append("Weixin")
-        if get_env_value("BLUEBUBBLES_SERVER_URL"):
-            platforms.append("BlueBubbles")
-        if get_env_value("WEBHOOK_ENABLED"):
-            platforms.append("Webhooks")
+        platforms = [
+            _gateway_platform_short_label(label)
+            for label, env_var, _ in _GATEWAY_PLATFORMS
+            if get_env_value(env_var)
+        ]
        if platforms:
            return ", ".join(platforms)
        return None  # No platforms configured — section must run
@@ -122,6 +122,7 @@ def show_status(args):
        "OpenAI": "OPENAI_API_KEY",
        "Z.AI/GLM": "GLM_API_KEY",
        "Kimi": "KIMI_API_KEY",
+        "StepFun Step Plan": "STEPFUN_API_KEY",
        "MiniMax": "MINIMAX_API_KEY",
        "MiniMax-CN": "MINIMAX_CN_API_KEY",
        "Firecrawl": "FIRECRAWL_API_KEY",
@@ -252,6 +253,7 @@ def show_status(args):
    apikey_providers = {
        "Z.AI / GLM":       ("GLM_API_KEY", "ZAI_API_KEY", "Z_AI_API_KEY"),
        "Kimi / Moonshot":  ("KIMI_API_KEY",),
+        "StepFun Step Plan": ("STEPFUN_API_KEY",),
        "MiniMax":          ("MINIMAX_API_KEY",),
        "MiniMax (China)":  ("MINIMAX_CN_API_KEY",),
    }
@@ -127,7 +127,7 @@ TIPS = [

    # --- Tools & Capabilities ---
    "execute_code runs Python scripts that call Hermes tools programmatically — results stay out of context.",
-    "delegate_task spawns up to 3 concurrent sub-agents with isolated contexts for parallel work.",
+    "delegate_task spawns up to 3 concurrent sub-agents by default (configurable via delegation.max_concurrent_children) with isolated contexts for parallel work.",
    "web_extract works on PDF URLs — pass any PDF link and it converts to markdown.",
    "search_files is ripgrep-backed and faster than grep — use it instead of terminal grep.",
    "patch uses 9 fuzzy matching strategies so minor whitespace differences won't break edits.",
@@ -24,7 +24,8 @@ from hermes_cli.nous_subscription import (
    apply_nous_managed_defaults,
    get_nous_subscription_features,
 )
-from tools.tool_backend_helpers import managed_nous_tools_enabled
+from tools.tool_backend_helpers import fal_key_is_configured, managed_nous_tools_enabled
+from utils import base_url_hostname

 logger = logging.getLogger(__name__)

@@ -181,6 +182,14 @@ TOOL_CATEGORIES = {
                ],
                "tts_provider": "gemini",
            },
+            {
+                "name": "KittenTTS",
+                "badge": "local · free",
+                "tag": "Lightweight local ONNX TTS (~25MB), no API key",
+                "env_vars": [],
+                "tts_provider": "kittentts",
+                "post_setup": "kittentts",
+            },
        ],
    },
    "web": {
@@ -422,6 +431,36 @@ def _run_post_setup(post_setup_key: str):
            _print_warning("    Node.js not found. Install Camofox via Docker:")
            _print_info("      docker run -p 9377:9377 -e CAMOFOX_PORT=9377 jo-inc/camofox-browser")

+    elif post_setup_key == "kittentts":
+        try:
+            __import__("kittentts")
+            _print_success("    kittentts is already installed")
+            return
+        except ImportError:
+            pass
+        import subprocess
+        _print_info("    Installing kittentts (~25-80MB model, CPU-only)...")
+        wheel_url = (
+            "https://github.com/KittenML/KittenTTS/releases/download/"
+            "0.8.1/kittentts-0.8.1-py3-none-any.whl"
+        )
+        try:
+            result = subprocess.run(
+                [sys.executable, "-m", "pip", "install", "-U", wheel_url, "soundfile", "--quiet"],
+                capture_output=True, text=True, timeout=300,
+            )
+            if result.returncode == 0:
+                _print_success("    kittentts installed")
+                _print_info("    Voices: Jasper, Bella, Luna, Bruno, Rosie, Hugo, Kiki, Leo")
+                _print_info("    Models: KittenML/kitten-tts-nano-0.8-int8 (25MB), micro (41MB), mini (80MB)")
+            else:
+                _print_warning("    kittentts install failed:")
+                _print_info(f"      {result.stderr.strip()[:300]}")
+                _print_info(f"    Run manually: python -m pip install -U '{wheel_url}' soundfile")
+        except subprocess.TimeoutExpired:
+            _print_warning("    kittentts install timed out (>5min)")
+            _print_info(f"    Run manually: python -m pip install -U '{wheel_url}' soundfile")
+
    elif post_setup_key == "rl_training":
        try:
            __import__("tinker_atropos")
@@ -546,6 +585,10 @@ def _get_platform_tools(
            ts_tools = set(resolve_toolset(ts_key))
            if ts_tools and ts_tools.issubset(all_tool_names):
                enabled_toolsets.add(ts_key)
+        default_off = set(_DEFAULT_OFF_TOOLSETS)
+        if platform in default_off:
+            default_off.remove(platform)
+        enabled_toolsets -= default_off

    # Plugin toolsets: enabled by default unless explicitly disabled.
    # A plugin toolset is "known" for a platform once `hermes tools`
@@ -804,6 +847,51 @@ def _configure_toolset(ts_key: str, config: dict):
        _configure_simple_requirements(ts_key)


+def _plugin_image_gen_providers() -> list[dict]:
+    """Build picker-row dicts from plugin-registered image gen providers.
+
+    Each returned dict looks like a regular ``TOOL_CATEGORIES`` provider
+    row but carries an ``image_gen_plugin_name`` marker so downstream
+    code (config writing, model picker) knows to route through the
+    plugin registry instead of the in-tree FAL backend.
+
+    FAL is skipped — it's already exposed by the hardcoded
+    ``TOOL_CATEGORIES["image_gen"]`` entries. When FAL gets ported to
+    a plugin in a follow-up PR, the hardcoded entries go away and this
+    function surfaces it alongside OpenAI automatically.
+    """
+    try:
+        from agent.image_gen_registry import list_providers
+        from hermes_cli.plugins import _ensure_plugins_discovered
+
+        _ensure_plugins_discovered()
+        providers = list_providers()
+    except Exception:
+        return []
+
+    rows: list[dict] = []
+    for provider in providers:
+        if getattr(provider, "name", None) == "fal":
+            # FAL has its own hardcoded rows today.
+            continue
+        try:
+            schema = provider.get_setup_schema()
+        except Exception:
+            continue
+        if not isinstance(schema, dict):
+            continue
+        rows.append(
+            {
+                "name": schema.get("name", provider.display_name),
+                "badge": schema.get("badge", ""),
+                "tag": schema.get("tag", ""),
+                "env_vars": schema.get("env_vars", []),
+                "image_gen_plugin_name": provider.name,
+            }
+        )
+    return rows
+
+
 def _visible_providers(cat: dict, config: dict) -> list[dict]:
    """Return provider entries visible for the current auth/config state."""
    features = get_nous_subscription_features(config)
@@ -814,6 +902,12 @@ def _visible_providers(cat: dict, config: dict) -> list[dict]:
        if provider.get("requires_nous_auth") and not features.nous_auth_present:
            continue
        visible.append(provider)
+
+    # Inject plugin-registered image_gen backends (OpenAI today, more
+    # later) so the picker lists them alongside FAL / Nous Subscription.
+    if cat.get("name") == "Image Generation":
+        visible.extend(_plugin_image_gen_providers())
+
    return visible


@@ -833,7 +927,24 @@ def _toolset_needs_configuration_prompt(ts_key: str, config: dict) -> bool:
        browser_cfg = config.get("browser", {})
        return not isinstance(browser_cfg, dict) or "cloud_provider" not in browser_cfg
    if ts_key == "image_gen":
-        return not get_env_value("FAL_KEY")
+        # Satisfied when the in-tree FAL backend is configured OR any
+        # plugin-registered image gen provider is available.
+        if fal_key_is_configured():
+            return False
+        try:
+            from agent.image_gen_registry import list_providers
+            from hermes_cli.plugins import _ensure_plugins_discovered
+
+            _ensure_plugins_discovered()
+            for provider in list_providers():
+                try:
+                    if provider.is_available():
+                        return False
+                except Exception:
+                    continue
+        except Exception:
+            pass
+        return True

    return not _toolset_has_keys(ts_key, config)

@@ -1052,6 +1163,88 @@ def _configure_imagegen_model(backend_name: str, config: dict) -> None:
    _print_success(f"  Model set to: {chosen}")


+def _plugin_image_gen_catalog(plugin_name: str):
+    """Return ``(catalog_dict, default_model_id)`` for a plugin provider.
+
+    ``catalog_dict`` is shaped like the legacy ``FAL_MODELS`` table —
+    ``{model_id: {"display", "speed", "strengths", "price", ...}}`` —
+    so the existing picker code paths work without change. Returns
+    ``({}, None)`` if the provider isn't registered or has no models.
+    """
+    try:
+        from agent.image_gen_registry import get_provider
+        from hermes_cli.plugins import _ensure_plugins_discovered
+
+        _ensure_plugins_discovered()
+        provider = get_provider(plugin_name)
+    except Exception:
+        return {}, None
+    if provider is None:
+        return {}, None
+    try:
+        models = provider.list_models() or []
+        default = provider.default_model()
+    except Exception:
+        return {}, None
+    catalog = {m["id"]: m for m in models if isinstance(m, dict) and "id" in m}
+    return catalog, default
+
+
+def _configure_imagegen_model_for_plugin(plugin_name: str, config: dict) -> None:
+    """Prompt the user to pick a model for a plugin-registered backend.
+
+    Writes selection to ``image_gen.model``. Mirrors
+    :func:`_configure_imagegen_model` but sources its catalog from the
+    plugin registry instead of :data:`IMAGEGEN_BACKENDS`.
+    """
+    catalog, default_model = _plugin_image_gen_catalog(plugin_name)
+    if not catalog:
+        return
+
+    cur_cfg = config.setdefault("image_gen", {})
+    if not isinstance(cur_cfg, dict):
+        cur_cfg = {}
+        config["image_gen"] = cur_cfg
+    current_model = cur_cfg.get("model") or default_model
+    if current_model not in catalog:
+        current_model = default_model
+
+    model_ids = list(catalog.keys())
+    ordered = [current_model] + [m for m in model_ids if m != current_model]
+
+    widths = {
+        "model": max(len(m) for m in model_ids),
+        "speed": max((len(catalog[m].get("speed", "")) for m in model_ids), default=6),
+        "strengths": max((len(catalog[m].get("strengths", "")) for m in model_ids), default=0),
+    }
+
+    print()
+    header = (
+        f"  {'Model':<{widths['model']}}  "
+        f"{'Speed':<{widths['speed']}}  "
+        f"{'Strengths':<{widths['strengths']}}  "
+        f"Price"
+    )
+    print(color(header, Colors.CYAN))
+
+    rows = []
+    for mid in ordered:
+        row = _format_imagegen_model_row(mid, catalog[mid], widths)
+        if mid == current_model:
+            row += "  ← currently in use"
+        rows.append(row)
+
+    idx = _prompt_choice(
+        f"  Choose {plugin_name} model:",
+        rows,
+        default=0,
+    )
+
+    chosen = ordered[idx]
+    cur_cfg["model"] = chosen
+    _print_success(f"  Model set to: {chosen}")
+
+
 def _configure_provider(provider: dict, config: dict):
    """Configure a single provider - prompt for API keys and set config."""
    env_vars = provider.get("env_vars", [])
@@ -1108,10 +1301,28 @@ def _configure_provider(provider: dict, config: dict):
        _print_success(f"  {provider['name']} - no configuration needed!")
        if managed_feature:
            _print_info("  Requests for this tool will be billed to your Nous subscription.")
+        # Plugin-registered image_gen provider: write image_gen.provider
+        # and route model selection to the plugin's own catalog.
+        plugin_name = provider.get("image_gen_plugin_name")
+        if plugin_name:
+            img_cfg = config.setdefault("image_gen", {})
+            if not isinstance(img_cfg, dict):
+                img_cfg = {}
+                config["image_gen"] = img_cfg
+            img_cfg["provider"] = plugin_name
+            _print_success(f"  image_gen.provider set to: {plugin_name}")
+            _configure_imagegen_model_for_plugin(plugin_name, config)
+            return
        # Imagegen backends prompt for model selection after backend pick.
        backend = provider.get("imagegen_backend")
        if backend:
            _configure_imagegen_model(backend, config)
+            # In-tree FAL is the only non-plugin backend today. Keep
+            # image_gen.provider clear so the dispatch shim falls through
+            # to the legacy FAL path.
+            img_cfg = config.setdefault("image_gen", {})
+            if isinstance(img_cfg, dict) and img_cfg.get("provider") not in (None, "", "fal"):
+                img_cfg["provider"] = "fal"
        return

    # Prompt for each required env var
@@ -1146,10 +1357,23 @@ def _configure_provider(provider: dict, config: dict):

    if all_configured:
        _print_success(f"  {provider['name']} configured!")
+        plugin_name = provider.get("image_gen_plugin_name")
+        if plugin_name:
+            img_cfg = config.setdefault("image_gen", {})
+            if not isinstance(img_cfg, dict):
+                img_cfg = {}
+                config["image_gen"] = img_cfg
+            img_cfg["provider"] = plugin_name
+            _print_success(f"  image_gen.provider set to: {plugin_name}")
+            _configure_imagegen_model_for_plugin(plugin_name, config)
+            return
        # Imagegen backends prompt for model selection after env vars are in.
        backend = provider.get("imagegen_backend")
        if backend:
            _configure_imagegen_model(backend, config)
+            img_cfg = config.setdefault("image_gen", {})
+            if isinstance(img_cfg, dict) and img_cfg.get("provider") not in (None, "", "fal"):
+                img_cfg["provider"] = "fal"


 def _configure_simple_requirements(ts_key: str):
@@ -1175,17 +1399,17 @@ def _configure_simple_requirements(ts_key: str):
                _print_warning("    Skipped")
        elif idx == 1:
            base_url = _prompt("    OPENAI_BASE_URL (blank for OpenAI)").strip() or "https://api.openai.com/v1"
-            key_label = "    OPENAI_API_KEY" if "api.openai.com" in base_url.lower() else "    API key"
+            is_native_openai = base_url_hostname(base_url) == "api.openai.com"
+            key_label = "    OPENAI_API_KEY" if is_native_openai else "    API key"
            api_key = _prompt(key_label, password=True)
            if api_key and api_key.strip():
                save_env_value("OPENAI_API_KEY", api_key.strip())
                # Save vision base URL to config (not .env — only secrets go there)
-                from hermes_cli.config import load_config, save_config
                _cfg = load_config()
                _aux = _cfg.setdefault("auxiliary", {}).setdefault("vision", {})
                _aux["base_url"] = base_url
                save_config(_cfg)
-                if "api.openai.com" in base_url.lower():
+                if is_native_openai:
                    save_env_value("AUXILIARY_VISION_MODEL", "gpt-4o-mini")
                _print_success("    Saved")
            else:
@@ -0,0 +1,134 @@
+"""Source-of-truth contracts for built-in providers without models.dev catalogs."""
+
+from __future__ import annotations
+
+from typing import Dict, List, Tuple
+
+VOLCENGINE_PROVIDER = "volcengine"
+BYTEPLUS_PROVIDER = "byteplus"
+
+VOLCENGINE_STANDARD_BASE_URL = "https://ark.cn-beijing.volces.com/api/v3"
+VOLCENGINE_CODING_PLAN_BASE_URL = "https://ark.cn-beijing.volces.com/api/coding/v3"
+BYTEPLUS_STANDARD_BASE_URL = "https://ark.ap-southeast.bytepluses.com/api/v3"
+BYTEPLUS_CODING_PLAN_BASE_URL = "https://ark.ap-southeast.bytepluses.com/api/coding/v3"
+
+VOLCENGINE_STANDARD_MODELS: Tuple[str, ...] = (
+    "doubao-seed-2-0-pro-260215",
+    "doubao-seed-2-0-lite-260215",
+    "doubao-seed-2-0-mini-260215",
+    "doubao-seed-2-0-code-preview-260215",
+    "kimi-k2-5-260127",
+    "glm-4-7-251222",
+    "deepseek-v3-2-251201",
+)
+
+VOLCENGINE_CODING_PLAN_MODELS: Tuple[str, ...] = (
+    "doubao-seed-2.0-code",
+    "doubao-seed-2.0-pro",
+    "doubao-seed-2.0-lite",
+    "doubao-seed-code",
+    "minimax-m2.5",
+    "glm-4.7",
+    "deepseek-v3.2",
+    "kimi-k2.5",
+)
+
+BYTEPLUS_STANDARD_MODELS: Tuple[str, ...] = (
+    "seed-2-0-pro-260328",
+    "seed-2-0-lite-260228",
+    "seed-2-0-mini-260215",
+    "kimi-k2-5-260127",
+    "glm-4-7-251222",
+)
+
+BYTEPLUS_CODING_PLAN_MODELS: Tuple[str, ...] = (
+    "dola-seed-2.0-pro",
+    "dola-seed-2.0-lite",
+    "bytedance-seed-code",
+    "glm-4.7",
+    "kimi-k2.5",
+    "gpt-oss-120b",
+)
+
+VOLCENGINE_STANDARD_MODEL_REFS: Tuple[str, ...] = tuple(
+    f"{VOLCENGINE_PROVIDER}/{model_id}" for model_id in VOLCENGINE_STANDARD_MODELS
+)
+VOLCENGINE_CODING_PLAN_MODEL_REFS: Tuple[str, ...] = tuple(
+    f"{VOLCENGINE_PROVIDER}-coding-plan/{model_id}" for model_id in VOLCENGINE_CODING_PLAN_MODELS
+)
+BYTEPLUS_STANDARD_MODEL_REFS: Tuple[str, ...] = tuple(
+    f"{BYTEPLUS_PROVIDER}/{model_id}" for model_id in BYTEPLUS_STANDARD_MODELS
+)
+BYTEPLUS_CODING_PLAN_MODEL_REFS: Tuple[str, ...] = tuple(
+    f"{BYTEPLUS_PROVIDER}-coding-plan/{model_id}" for model_id in BYTEPLUS_CODING_PLAN_MODELS
+)
+
+PROVIDER_MODEL_CATALOGS: Dict[str, Tuple[str, ...]] = {
+    VOLCENGINE_PROVIDER: VOLCENGINE_STANDARD_MODEL_REFS + VOLCENGINE_CODING_PLAN_MODEL_REFS,
+    BYTEPLUS_PROVIDER: BYTEPLUS_STANDARD_MODEL_REFS + BYTEPLUS_CODING_PLAN_MODEL_REFS,
+}
+
+MODEL_CONTEXT_WINDOWS: Dict[str, int] = {
+    "doubao-seed-2-0-pro-260215": 256000,
+    "doubao-seed-2-0-lite-260215": 256000,
+    "doubao-seed-2-0-mini-260215": 256000,
+    "doubao-seed-2-0-code-preview-260215": 256000,
+    "kimi-k2-5-260127": 256000,
+    "glm-4-7-251222": 200000,
+    "deepseek-v3-2-251201": 128000,
+    "doubao-seed-2.0-code": 256000,
+    "doubao-seed-2.0-pro": 256000,
+    "doubao-seed-2.0-lite": 256000,
+    "doubao-seed-code": 256000,
+    "minimax-m2.5": 200000,
+    "glm-4.7": 200000,
+    "deepseek-v3.2": 128000,
+    "kimi-k2.5": 256000,
+    "seed-2-0-pro-260328": 256000,
+    "seed-2-0-lite-260228": 256000,
+    "seed-2-0-mini-260215": 256000,
+}
+
+
+def provider_models(provider_id: str) -> List[str]:
+    """Return the full user-facing model catalog for a provider."""
+    return list(PROVIDER_MODEL_CATALOGS.get(provider_id, ()))
+
+
+def _bare_model_name(model_name: str) -> str:
+    value = (model_name or "").strip()
+    if not value:
+        return ""
+    if "/" in value:
+        return value.split("/", 1)[1].strip()
+    return value
+
+
+def is_coding_plan_model(provider_id: str, model_name: str) -> bool:
+    """Return True when a model belongs to the coding-plan catalog."""
+    raw = (model_name or "").strip()
+    bare = _bare_model_name(raw)
+    if provider_id == VOLCENGINE_PROVIDER:
+        return raw in VOLCENGINE_CODING_PLAN_MODEL_REFS or bare in VOLCENGINE_CODING_PLAN_MODELS
+    if provider_id == BYTEPLUS_PROVIDER:
+        return raw in BYTEPLUS_CODING_PLAN_MODEL_REFS or bare in BYTEPLUS_CODING_PLAN_MODELS
+    return False
+
+
+def base_url_for_provider_model(provider_id: str, model_name: str) -> str:
+    """Resolve the source-of-truth base URL for a provider+model pair."""
+    if provider_id == VOLCENGINE_PROVIDER:
+        if is_coding_plan_model(provider_id, model_name):
+            return VOLCENGINE_CODING_PLAN_BASE_URL
+        return VOLCENGINE_STANDARD_BASE_URL
+    if provider_id == BYTEPLUS_PROVIDER:
+        if is_coding_plan_model(provider_id, model_name):
+            return BYTEPLUS_CODING_PLAN_BASE_URL
+        return BYTEPLUS_STANDARD_BASE_URL
+    return ""
+
+
+def model_context_window(model_name: str) -> int | None:
+    """Return a known context window for a model, if specified by the contract."""
+    bare = _bare_model_name(model_name)
+    return MODEL_CONTEXT_WINDOWS.get(bare)
@@ -16,6 +16,7 @@ import json
 import logging
 import os
 import secrets
+import subprocess
 import sys
 import threading
 import time
@@ -114,6 +115,91 @@ def _require_token(request: Request) -> None:
        raise HTTPException(status_code=401, detail="Unauthorized")


+# Accepted Host header values for loopback binds. DNS rebinding attacks
+# point a victim browser at an attacker-controlled hostname (evil.test)
+# which resolves to 127.0.0.1 after a TTL flip — bypassing same-origin
+# checks because the browser now considers evil.test and our dashboard
+# "same origin". Validating the Host header at the app layer rejects any
+# request whose Host isn't one we bound for. See GHSA-ppp5-vxwm-4cf7.
+_LOOPBACK_HOST_VALUES: frozenset = frozenset({
+    "localhost", "127.0.0.1", "::1",
+})
+
+
+def _is_accepted_host(host_header: str, bound_host: str) -> bool:
+    """True if the Host header targets the interface we bound to.
+
+    Accepts:
+    - Exact bound host (with or without port suffix)
+    - Loopback aliases when bound to loopback
+    - Any host when bound to 0.0.0.0 (explicit opt-in to non-loopback,
+      no protection possible at this layer)
+    """
+    if not host_header:
+        return False
+    # Strip port suffix. IPv6 addresses use bracket notation:
+    #   [::1]         — no port
+    #   [::1]:9119    — with port
+    # Plain hosts/v4:
+    #   localhost:9119
+    #   127.0.0.1:9119
+    h = host_header.strip()
+    if h.startswith("["):
+        # IPv6 bracketed — port (if any) follows "]:"
+        close = h.find("]")
+        if close != -1:
+            host_only = h[1:close]  # strip brackets
+        else:
+            host_only = h.strip("[]")
+    else:
+        host_only = h.rsplit(":", 1)[0] if ":" in h else h
+    host_only = host_only.lower()
+
+    # 0.0.0.0 bind means operator explicitly opted into all-interfaces
+    # (requires --insecure per web_server.start_server). No Host-layer
+    # defence can protect that mode; rely on operator network controls.
+    if bound_host in ("0.0.0.0", "::"):
+        return True
+
+    # Loopback bind: accept the loopback names
+    bound_lc = bound_host.lower()
+    if bound_lc in _LOOPBACK_HOST_VALUES:
+        return host_only in _LOOPBACK_HOST_VALUES
+
+    # Explicit non-loopback bind: require exact host match
+    return host_only == bound_lc
+
+
+@app.middleware("http")
+async def host_header_middleware(request: Request, call_next):
+    """Reject requests whose Host header doesn't match the bound interface.
+
+    Defends against DNS rebinding: a victim browser on a localhost
+    dashboard is tricked into fetching from an attacker hostname that
+    TTL-flips to 127.0.0.1. CORS and same-origin checks don't help —
+    the browser now treats the attacker origin as same-origin with the
+    dashboard. Host-header validation at the app layer catches it.
+
+    See GHSA-ppp5-vxwm-4cf7.
+    """
+    # Store the bound host on app.state so this middleware can read it —
+    # set by start_server() at listen time.
+    bound_host = getattr(app.state, "bound_host", None)
+    if bound_host:
+        host_header = request.headers.get("host", "")
+        if not _is_accepted_host(host_header, bound_host):
+            return JSONResponse(
+                status_code=400,
+                content={
+                    "detail": (
+                        "Invalid Host header. Dashboard requests must use "
+                        "the hostname the server was bound to."
+                    ),
+                },
+            )
+    return await call_next(request)
+
+
@app.middleware("http")
 async def auth_middleware(request: Request, call_next):
    """Require the session token on all /api/ routes except the public list."""
@@ -476,6 +562,138 @@ async def get_status():
    }


+# ---------------------------------------------------------------------------
+# Gateway + update actions (invoked from the Status page).
+#
+# Both commands are spawned as detached subprocesses so the HTTP request
+# returns immediately.  stdin is closed (``DEVNULL``) so any stray ``input()``
+# calls fail fast with EOF rather than hanging forever.  stdout/stderr are
+# streamed to a per-action log file under ``~/.hermes/logs/<action>.log`` so
+# the dashboard can tail them back to the user.
+# ---------------------------------------------------------------------------
+
+_ACTION_LOG_DIR: Path = get_hermes_home() / "logs"
+
+# Short ``name`` (from the URL) → absolute log file path.
+_ACTION_LOG_FILES: Dict[str, str] = {
+    "gateway-restart": "gateway-restart.log",
+    "hermes-update": "hermes-update.log",
+}
+
+# ``name`` → most recently spawned Popen handle.  Used so ``status`` can
+# report liveness and exit code without shelling out to ``ps``.
+_ACTION_PROCS: Dict[str, subprocess.Popen] = {}
+
+
+def _spawn_hermes_action(subcommand: List[str], name: str) -> subprocess.Popen:
+    """Spawn ``hermes <subcommand>`` detached and record the Popen handle.
+
+    Uses the running interpreter's ``hermes_cli.main`` module so the action
+    inherits the same venv/PYTHONPATH the web server is using.
+    """
+    log_file_name = _ACTION_LOG_FILES[name]
+    _ACTION_LOG_DIR.mkdir(parents=True, exist_ok=True)
+    log_path = _ACTION_LOG_DIR / log_file_name
+    log_file = open(log_path, "ab", buffering=0)
+    log_file.write(
+        f"\n=== {name} started {time.strftime('%Y-%m-%d %H:%M:%S')} ===\n".encode()
+    )
+
+    cmd = [sys.executable, "-m", "hermes_cli.main", *subcommand]
+
+    popen_kwargs: Dict[str, Any] = {
+        "cwd": str(PROJECT_ROOT),
+        "stdin": subprocess.DEVNULL,
+        "stdout": log_file,
+        "stderr": subprocess.STDOUT,
+        "env": {**os.environ, "HERMES_NONINTERACTIVE": "1"},
+    }
+    if sys.platform == "win32":
+        popen_kwargs["creationflags"] = (
+            subprocess.CREATE_NEW_PROCESS_GROUP  # type: ignore[attr-defined]
+            | getattr(subprocess, "DETACHED_PROCESS", 0)
+        )
+    else:
+        popen_kwargs["start_new_session"] = True
+
+    proc = subprocess.Popen(cmd, **popen_kwargs)
+    _ACTION_PROCS[name] = proc
+    return proc
+
+
+def _tail_lines(path: Path, n: int) -> List[str]:
+    """Return the last ``n`` lines of ``path``.  Reads the whole file — fine
+    for our small per-action logs.  Binary-decoded with ``errors='replace'``
+    so log corruption doesn't 500 the endpoint."""
+    if not path.exists():
+        return []
+    try:
+        text = path.read_text(errors="replace")
+    except OSError:
+        return []
+    lines = text.splitlines()
+    return lines[-n:] if n > 0 else lines
+
+
+@app.post("/api/gateway/restart")
+async def restart_gateway():
+    """Kick off a ``hermes gateway restart`` in the background."""
+    try:
+        proc = _spawn_hermes_action(["gateway", "restart"], "gateway-restart")
+    except Exception as exc:
+        _log.exception("Failed to spawn gateway restart")
+        raise HTTPException(status_code=500, detail=f"Failed to restart gateway: {exc}")
+    return {
+        "ok": True,
+        "pid": proc.pid,
+        "name": "gateway-restart",
+    }
+
+
+@app.post("/api/hermes/update")
+async def update_hermes():
+    """Kick off ``hermes update`` in the background."""
+    try:
+        proc = _spawn_hermes_action(["update"], "hermes-update")
+    except Exception as exc:
+        _log.exception("Failed to spawn hermes update")
+        raise HTTPException(status_code=500, detail=f"Failed to start update: {exc}")
+    return {
+        "ok": True,
+        "pid": proc.pid,
+        "name": "hermes-update",
+    }
+
+
+@app.get("/api/actions/{name}/status")
+async def get_action_status(name: str, lines: int = 200):
+    """Tail an action log and report whether the process is still running."""
+    log_file_name = _ACTION_LOG_FILES.get(name)
+    if log_file_name is None:
+        raise HTTPException(status_code=404, detail=f"Unknown action: {name}")
+
+    log_path = _ACTION_LOG_DIR / log_file_name
+    tail = _tail_lines(log_path, min(max(lines, 1), 2000))
+
+    proc = _ACTION_PROCS.get(name)
+    if proc is None:
+        running = False
+        exit_code: Optional[int] = None
+        pid: Optional[int] = None
+    else:
+        exit_code = proc.poll()
+        running = exit_code is None
+        pid = proc.pid
+
+    return {
+        "name": name,
+        "running": running,
+        "exit_code": exit_code,
+        "pid": pid,
+        "lines": tail,
+    }
+
+
@app.get("/api/sessions")
 async def get_sessions(limit: int = 20, offset: int = 0):
    try:
@@ -1971,7 +2189,8 @@ async def get_usage_analytics(days: int = 30):
                   SUM(reasoning_tokens) as reasoning_tokens,
                   COALESCE(SUM(estimated_cost_usd), 0) as estimated_cost,
                   COALESCE(SUM(actual_cost_usd), 0) as actual_cost,
-                   COUNT(*) as sessions
+                   COUNT(*) as sessions,
+                   SUM(COALESCE(api_call_count, 0)) as api_calls
            FROM sessions WHERE started_at > ?
            GROUP BY day ORDER BY day
        """, (cutoff,))
@@ -1982,7 +2201,8 @@ async def get_usage_analytics(days: int = 30):
                   SUM(input_tokens) as input_tokens,
                   SUM(output_tokens) as output_tokens,
                   COALESCE(SUM(estimated_cost_usd), 0) as estimated_cost,
-                   COUNT(*) as sessions
+                   COUNT(*) as sessions,
+                   SUM(COALESCE(api_call_count, 0)) as api_calls
            FROM sessions WHERE started_at > ? AND model IS NOT NULL
            GROUP BY model ORDER BY SUM(input_tokens) + SUM(output_tokens) DESC
        """, (cutoff,))
@@ -1995,7 +2215,8 @@ async def get_usage_analytics(days: int = 30):
                   SUM(reasoning_tokens) as total_reasoning,
                   COALESCE(SUM(estimated_cost_usd), 0) as total_estimated_cost,
                   COALESCE(SUM(actual_cost_usd), 0) as total_actual_cost,
-                   COUNT(*) as total_sessions
+                   COUNT(*) as total_sessions,
+                   SUM(COALESCE(api_call_count, 0)) as total_api_calls
            FROM sessions WHERE started_at > ?
        """, (cutoff,))
        totals = dict(cur3.fetchone())
@@ -2323,13 +2544,15 @@ def start_server(
            "authentication. Only use on trusted networks.", host,
        )

+    # Record the bound host so host_header_middleware can validate incoming
+    # Host headers against it. Defends against DNS rebinding (GHSA-ppp5-vxwm-4cf7).
+    app.state.bound_host = host
+
    if open_browser:
-        import threading
        import webbrowser

        def _open():
-            import time as _t
-            _t.sleep(1.0)
+            time.sleep(1.0)
            webbrowser.open(f"http://{host}:{port}")

        threading.Thread(target=_open, daemon=True).start()
@@ -31,7 +31,7 @@ T = TypeVar("T")

 DEFAULT_DB_PATH = get_hermes_home() / "state.db"

-SCHEMA_VERSION = 6
+SCHEMA_VERSION = 8

 SCHEMA_SQL = """
 CREATE TABLE IF NOT EXISTS schema_version (
@@ -65,6 +65,7 @@ CREATE TABLE IF NOT EXISTS sessions (
    cost_source TEXT,
    pricing_version TEXT,
    title TEXT,
+    api_call_count INTEGER DEFAULT 0,
    FOREIGN KEY (parent_session_id) REFERENCES sessions(id)
 );

@@ -80,10 +81,16 @@ CREATE TABLE IF NOT EXISTS messages (
    token_count INTEGER,
    finish_reason TEXT,
    reasoning TEXT,
+    reasoning_content TEXT,
    reasoning_details TEXT,
    codex_reasoning_items TEXT
 );

+CREATE TABLE IF NOT EXISTS state_meta (
+    key TEXT PRIMARY KEY,
+    value TEXT
+);
+
 CREATE INDEX IF NOT EXISTS idx_sessions_source ON sessions(source);
 CREATE INDEX IF NOT EXISTS idx_sessions_parent ON sessions(parent_session_id);
 CREATE INDEX IF NOT EXISTS idx_sessions_started ON sessions(started_at DESC);
@@ -329,6 +336,26 @@ class SessionDB:
                    except sqlite3.OperationalError:
                        pass  # Column already exists
                cursor.execute("UPDATE schema_version SET version = 6")
+            if current_version < 7:
+                # v7: preserve provider-native reasoning_content separately from
+                # normalized reasoning text. Kimi/Moonshot replay can require
+                # this field on assistant tool-call messages when thinking is on.
+                try:
+                    cursor.execute('ALTER TABLE messages ADD COLUMN "reasoning_content" TEXT')
+                except sqlite3.OperationalError:
+                    pass  # Column already exists
+                cursor.execute("UPDATE schema_version SET version = 7")
+            if current_version < 8:
+                # v8: add api_call_count column to sessions — tracks the number
+                # of individual LLM API calls made within a session (as opposed
+                # to the session count itself).
+                try:
+                    cursor.execute(
+                        'ALTER TABLE sessions ADD COLUMN "api_call_count" INTEGER DEFAULT 0'
+                    )
+                except sqlite3.OperationalError:
+                    pass  # Column already exists
+                cursor.execute("UPDATE schema_version SET version = 8")

        # Unique title index — always ensure it exists (safe to run after migrations
        # since the title column is guaranteed to exist at this point)
@@ -435,6 +462,7 @@ class SessionDB:
        billing_provider: Optional[str] = None,
        billing_base_url: Optional[str] = None,
        billing_mode: Optional[str] = None,
+        api_call_count: int = 0,
        absolute: bool = False,
    ) -> None:
        """Update token counters and backfill model if not already set.
@@ -464,7 +492,8 @@ class SessionDB:
                   billing_provider = COALESCE(billing_provider, ?),
                   billing_base_url = COALESCE(billing_base_url, ?),
                   billing_mode = COALESCE(billing_mode, ?),
-                   model = COALESCE(model, ?)
+                   model = COALESCE(model, ?),
+                   api_call_count = ?
                   WHERE id = ?"""
        else:
            sql = """UPDATE sessions SET
@@ -484,7 +513,8 @@ class SessionDB:
                   billing_provider = COALESCE(billing_provider, ?),
                   billing_base_url = COALESCE(billing_base_url, ?),
                   billing_mode = COALESCE(billing_mode, ?),
-                   model = COALESCE(model, ?)
+                   model = COALESCE(model, ?),
+                   api_call_count = COALESCE(api_call_count, 0) + ?
                   WHERE id = ?"""
        params = (
            input_tokens,
@@ -502,6 +532,7 @@ class SessionDB:
            billing_base_url,
            billing_mode,
            model,
+            api_call_count,
            session_id,
        )
        def _do(conn):
@@ -922,6 +953,7 @@ class SessionDB:
        token_count: int = None,
        finish_reason: str = None,
        reasoning: str = None,
+        reasoning_content: str = None,
        reasoning_details: Any = None,
        codex_reasoning_items: Any = None,
    ) -> int:
@@ -951,8 +983,8 @@ class SessionDB:
            cursor = conn.execute(
                """INSERT INTO messages (session_id, role, content, tool_call_id,
                   tool_calls, tool_name, timestamp, token_count, finish_reason,
-                   reasoning, reasoning_details, codex_reasoning_items)
-                   VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)""",
+                   reasoning, reasoning_content, reasoning_details, codex_reasoning_items)
+                   VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)""",
                (
                    session_id,
                    role,
@@ -964,6 +996,7 @@ class SessionDB:
                    token_count,
                    finish_reason,
                    reasoning,
+                    reasoning_content,
                    reasoning_details_json,
                    codex_items_json,
                ),
@@ -1014,7 +1047,7 @@ class SessionDB:
        with self._lock:
            cursor = self._conn.execute(
                "SELECT role, content, tool_call_id, tool_calls, tool_name, "
-                "reasoning, reasoning_details, codex_reasoning_items "
+                "reasoning, reasoning_content, reasoning_details, codex_reasoning_items "
                "FROM messages WHERE session_id = ? ORDER BY timestamp, id",
                (session_id,),
            )
@@ -1038,6 +1071,8 @@ class SessionDB:
            if row["role"] == "assistant":
                if row["reasoning"]:
                    msg["reasoning"] = row["reasoning"]
+                if row["reasoning_content"] is not None:
+                    msg["reasoning_content"] = row["reasoning_content"]
                if row["reasoning_details"]:
                    try:
                        msg["reasoning_details"] = json.loads(row["reasoning_details"])
@@ -1441,3 +1476,116 @@ class SessionDB:
            return len(session_ids)

        return self._execute_write(_do)
+
+    # ── Meta key/value (for scheduler bookkeeping) ──
+
+    def get_meta(self, key: str) -> Optional[str]:
+        """Read a value from the state_meta key/value store."""
+        with self._lock:
+            row = self._conn.execute(
+                "SELECT value FROM state_meta WHERE key = ?", (key,)
+            ).fetchone()
+        if row is None:
+            return None
+        return row["value"] if isinstance(row, sqlite3.Row) else row[0]
+
+    def set_meta(self, key: str, value: str) -> None:
+        """Write a value to the state_meta key/value store."""
+        def _do(conn):
+            conn.execute(
+                "INSERT INTO state_meta (key, value) VALUES (?, ?) "
+                "ON CONFLICT(key) DO UPDATE SET value = excluded.value",
+                (key, value),
+            )
+        self._execute_write(_do)
+
+    # ── Space reclamation ──
+
+    def vacuum(self) -> None:
+        """Run VACUUM to reclaim disk space after large deletes.
+
+        SQLite does not shrink the database file when rows are deleted —
+        freed pages just get reused on the next insert. After a prune that
+        removed hundreds of sessions, the file stays bloated unless we
+        explicitly VACUUM.
+
+        VACUUM rewrites the entire DB, so it's expensive (seconds per
+        100MB) and cannot run inside a transaction. It also acquires an
+        exclusive lock, so callers must ensure no other writers are
+        active. Safe to call at startup before the gateway/CLI starts
+        serving traffic.
+        """
+        # VACUUM cannot be executed inside a transaction.
+        with self._lock:
+            # Best-effort WAL checkpoint first, then VACUUM.
+            try:
+                self._conn.execute("PRAGMA wal_checkpoint(TRUNCATE)")
+            except Exception:
+                pass
+            self._conn.execute("VACUUM")
+
+    def maybe_auto_prune_and_vacuum(
+        self,
+        retention_days: int = 90,
+        min_interval_hours: int = 24,
+        vacuum: bool = True,
+    ) -> Dict[str, Any]:
+        """Idempotent auto-maintenance: prune old sessions + optional VACUUM.
+
+        Records the last run timestamp in state_meta so subsequent calls
+        within ``min_interval_hours`` no-op. Designed to be called once at
+        startup from long-lived entrypoints (CLI, gateway, cron scheduler).
+
+        Never raises. On any failure, logs a warning and returns a dict
+        with ``"error"`` set.
+
+        Returns a dict with keys:
+          - ``"skipped"`` (bool) — true if within min_interval_hours of last run
+          - ``"pruned"`` (int)   — number of sessions deleted
+          - ``"vacuumed"`` (bool) — true if VACUUM ran
+          - ``"error"`` (str, optional) — present only on failure
+        """
+        result: Dict[str, Any] = {"skipped": False, "pruned": 0, "vacuumed": False}
+        try:
+            # Skip if another process/call did maintenance recently.
+            last_raw = self.get_meta("last_auto_prune")
+            now = time.time()
+            if last_raw:
+                try:
+                    last_ts = float(last_raw)
+                    if now - last_ts < min_interval_hours * 3600:
+                        result["skipped"] = True
+                        return result
+                except (TypeError, ValueError):
+                    pass  # corrupt meta; treat as no prior run
+
+            pruned = self.prune_sessions(older_than_days=retention_days)
+            result["pruned"] = pruned
+
+            # Only VACUUM if we actually freed rows — VACUUM on a tight DB
+            # is wasted I/O. Threshold keeps small DBs from paying the cost.
+            if vacuum and pruned > 0:
+                try:
+                    self.vacuum()
+                    result["vacuumed"] = True
+                except Exception as exc:
+                    logger.warning("state.db VACUUM failed: %s", exc)
+
+            # Record the attempt even if pruned == 0, so we don't retry
+            # every startup within the min_interval_hours window.
+            self.set_meta("last_auto_prune", str(now))
+
+            if pruned > 0:
+                logger.info(
+                    "state.db auto-maintenance: pruned %d session(s) older than %d days%s",
+                    pruned,
+                    retention_days,
+                    " + VACUUM" if result["vacuumed"] else "",
+                )
+        except Exception as exc:
+            # Maintenance must never block startup. Log and return error marker.
+            logger.warning("state.db auto-maintenance failed: %s", exc)
+            result["error"] = str(exc)
+
+        return result
+
@@ -47,12 +47,19 @@ def _effective_temperature_for_model(
    model: str,
    base_url: Optional[str] = None,
 ) -> Optional[float]:
-    """Return a fixed temperature for models with strict sampling contracts."""
+    """Return a fixed temperature for models with strict sampling contracts.
+
+    Returns ``None`` when the model manages temperature server-side (Kimi);
+    callers must omit the ``temperature`` kwarg entirely in that case.
+    """
    try:
-        from agent.auxiliary_client import _fixed_temperature_for_model
+        from agent.auxiliary_client import _fixed_temperature_for_model, OMIT_TEMPERATURE
    except Exception:
        return None
-    return _fixed_temperature_for_model(model, base_url)
+    result = _fixed_temperature_for_model(model, base_url)
+    if result is OMIT_TEMPERATURE:
+        return None  # caller must omit temperature
+    return result



@@ -7,7 +7,8 @@
    let
      hermes-agent = inputs.self.packages.${system}.default;
      hermes-tui = inputs.self.packages.${system}.tui;
-      packages = [ hermes-agent hermes-tui ];
+      hermes-web = inputs.self.packages.${system}.web;
+      packages = [ hermes-agent hermes-tui hermes-web ];
    in {
      devShells.default = pkgs.mkShell {
        inputsFrom = packages;
@@ -0,0 +1,217 @@
+# nix/lib.nix — Shared helpers for nix stuff
+{ pkgs, npm-lockfile-fix }:
+{
+  # Returns a buildNpmPackage-compatible attrs set that provides:
+  #   patchPhase          — ensures lockfile has exactly one trailing newline
+  #   nativeBuildInputs   — [ updateLockfileScript ] (list, prepend with ++ for more)
+  #   passthru.devShellHook  — stamp-checked npm install + hash auto-update
+  #   passthru.npmLockfile   — metadata for mkFixLockfiles
+  #
+  # NOTE: npmConfigHook runs `diff` between the source lockfile and the
+  # npm-deps cache lockfile. fetchNpmDeps preserves whatever trailing
+  # newlines the lockfile has. The patchPhase normalizes to exactly one
+  # trailing newline so both sides always match.
+  #
+  # Usage:
+  #   npm = hermesNpmLib.mkNpmPassthru { folder = "ui-tui"; attr = "tui"; pname = "hermes-tui"; };
+  #   pkgs.buildNpmPackage (npm // { ... } # or:
+  #   pkgs.buildNpmPackage ({ ... } // npm)
+  mkNpmPassthru =
+    {
+      folder, # repo-relative folder with package.json, e.g. "ui-tui"
+      attr, # flake package attr, e.g. "tui"
+      pname, # e.g. "hermes-tui"
+      nixFile ? "nix/${attr}.nix", # defaults to nix/<attr>.nix
+    }:
+    {
+      patchPhase = ''
+        runHook prePatch
+        # Normalize trailing newlines so source and npm-deps always match,
+        # regardless of what fetchNpmDeps preserves.
+        sed -i -z 's/\n*$/\n/' package-lock.json
+
+        # Make npmConfigHook's byte-for-byte diff newline-agnostic by
+        # replacing its hardcoded /nix/store/.../diff with a wrapper that
+        # normalizes trailing newlines on both sides before comparing.
+        mkdir -p "$TMPDIR/bin"
+        cat > "$TMPDIR/bin/diff" << DIFFWRAP
+        #!/bin/sh
+        f1=\$(mktemp) && sed -z 's/\n*$/\n/' "\$1" > "\$f1"
+        f2=\$(mktemp) && sed -z 's/\n*$/\n/' "\$2" > "\$f2"
+        ${pkgs.diffutils}/bin/diff "\$f1" "\$f2" && rc=0 || rc=\$?
+        rm -f "\$f1" "\$f2"
+        exit \$rc
+        DIFFWRAP
+        chmod +x "$TMPDIR/bin/diff"
+        export PATH="$TMPDIR/bin:$PATH"
+
+        runHook postPatch
+      '';
+
+      nativeBuildInputs = [
+        (pkgs.writeShellScriptBin "update_${attr}_lockfile" ''
+          set -euox pipefail
+
+          REPO_ROOT=$(git rev-parse --show-toplevel)
+
+          cd "$REPO_ROOT/${folder}"
+          rm -rf node_modules/
+          npm cache clean --force
+          CI=true npm install
+          ${pkgs.lib.getExe npm-lockfile-fix} ./package-lock.json
+
+          NIX_FILE="$REPO_ROOT/${nixFile}"
+          sed -i "s/hash = \"[^\"]*\";/hash = \"\";/" $NIX_FILE
+          NIX_OUTPUT=$(nix build .#${attr} 2>&1 || true)
+          NEW_HASH=$(echo "$NIX_OUTPUT" | grep 'got:' | awk '{print $2}')
+          echo got new hash $NEW_HASH
+          sed -i "s|hash = \"[^\"]*\";|hash = \"$NEW_HASH\";|" $NIX_FILE
+          nix build .#${attr}
+          echo "Updated npm hash in $NIX_FILE to $NEW_HASH"
+        '')
+      ];
+
+      passthru = {
+        devShellHook = pkgs.writeShellScript "npm-dev-hook-${pname}" ''
+          REPO_ROOT=$(git rev-parse --show-toplevel)
+
+          _hermes_npm_stamp() {
+            sha256sum "${folder}/package.json" "${folder}/package-lock.json" \
+              2>/dev/null | sha256sum | awk '{print $1}'
+          }
+          STAMP=".nix-stamps/${pname}"
+          STAMP_VALUE="$(_hermes_npm_stamp)"
+          if [ ! -f "$STAMP" ] || [ "$(cat "$STAMP")" != "$STAMP_VALUE" ]; then
+            echo "${pname}: installing npm dependencies..."
+            ( cd ${folder} && CI=true npm install --silent --no-fund --no-audit 2>/dev/null )
+
+            # Auto-update the nix hash so it stays in sync with the lockfile
+            echo "${pname}: prefetching npm deps..."
+            NIX_FILE="$REPO_ROOT/${nixFile}"
+            if NEW_HASH=$(${pkgs.lib.getExe pkgs.prefetch-npm-deps} "${folder}/package-lock.json" 2>/dev/null); then
+              sed -i "s|hash = \"sha256-[A-Za-z0-9+/=]+\"|hash = \"$NEW_HASH\";|" "$NIX_FILE"
+              echo "${pname}: updated hash to $NEW_HASH"
+            else
+              echo "${pname}: warning: prefetch failed, run 'nix run .#fix-lockfiles -- --apply' manually" >&2
+            fi
+
+            mkdir -p .nix-stamps
+            _hermes_npm_stamp > "$STAMP"
+          fi
+          unset -f _hermes_npm_stamp
+        '';
+
+        npmLockfile = {
+          inherit attr folder nixFile;
+        };
+      };
+    };
+
+  # Aggregate `fix-lockfiles` bin from a list of packages carrying
+  #   passthru.npmLockfile = { attr; folder; nixFile; };
+  # Invocations:
+  #   fix-lockfiles --check   # exit 1 if any hash is stale
+  #   fix-lockfiles --apply   # rewrite stale hashes in place
+  # Writes machine-readable fields (stale, changed, report) to $GITHUB_OUTPUT
+  # when set, so CI workflows can post a sticky PR comment directly.
+  mkFixLockfiles =
+    {
+      packages, # list of packages with passthru.npmLockfile
+    }:
+    let
+      entries = map (p: p.passthru.npmLockfile) packages;
+      entryArgs = pkgs.lib.concatMapStringsSep " " (e: "\"${e.attr}:${e.folder}:${e.nixFile}\"") entries;
+    in
+    pkgs.writeShellScriptBin "fix-lockfiles" ''
+      set -uox pipefail
+      MODE="''${1:---check}"
+      case "$MODE" in
+        --check|--apply) ;;
+        -h|--help)
+          echo "usage: fix-lockfiles [--check|--apply]"
+          exit 0 ;;
+        *)
+          echo "usage: fix-lockfiles [--check|--apply]" >&2
+          exit 2 ;;
+      esac
+
+      ENTRIES=(${entryArgs})
+
+      REPO_ROOT="$(git rev-parse --show-toplevel)"
+      cd "$REPO_ROOT"
+
+      # When running in GH Actions, emit Markdown links in the report pointing
+      # at the offending line of the nix file (and the lockfile) at the exact
+      # commit that was checked. LINK_SHA should be set by the workflow to the
+      # PR head SHA; falls back to GITHUB_SHA (which on pull_request is the
+      # test-merge commit, still browseable).
+      LINK_SERVER="''${GITHUB_SERVER_URL:-https://github.com}"
+      LINK_REPO="''${GITHUB_REPOSITORY:-}"
+      LINK_SHA="''${LINK_SHA:-''${GITHUB_SHA:-}}"
+
+      STALE=0
+      FIXED=0
+      REPORT=""
+
+      for entry in "''${ENTRIES[@]}"; do
+        IFS=":" read -r ATTR FOLDER NIX_FILE <<< "$entry"
+        echo "==> .#$ATTR ($FOLDER -> $NIX_FILE)"
+        OUTPUT=$(nix build ".#$ATTR.npmDeps" --no-link --print-build-logs 2>&1)
+        STATUS=$?
+        if [ "$STATUS" -eq 0 ]; then
+          echo "    ok"
+          continue
+        fi
+
+        NEW_HASH=$(echo "$OUTPUT" | awk '/got:/ {print $2; exit}')
+        if [ -z "$NEW_HASH" ]; then
+          echo "    build failed with no hash mismatch:" >&2
+          echo "$OUTPUT" | tail -40 >&2
+          exit 1
+        fi
+
+        HASH_LINE=$(grep -n 'hash = "sha256-' "$NIX_FILE" | head -1 | cut -d: -f1)
+        OLD_HASH=$(grep -oE 'hash = "sha256-[^"]+"' "$NIX_FILE" | head -1 \
+          | sed -E 's/hash = "(.*)"/\1/')
+        LOCK_FILE="$FOLDER/package-lock.json"
+        echo "    stale: $NIX_FILE:$HASH_LINE $OLD_HASH -> $NEW_HASH"
+        STALE=1
+
+        if [ -n "$LINK_REPO" ] && [ -n "$LINK_SHA" ]; then
+          NIX_URL="$LINK_SERVER/$LINK_REPO/blob/$LINK_SHA/$NIX_FILE#L$HASH_LINE"
+          LOCK_URL="$LINK_SERVER/$LINK_REPO/blob/$LINK_SHA/$LOCK_FILE"
+          REPORT+="- [\`$NIX_FILE:$HASH_LINE\`]($NIX_URL) (\`.#$ATTR\`): \`$OLD_HASH\` → \`$NEW_HASH\` — lockfile: [\`$LOCK_FILE\`]($LOCK_URL)"$'\n'
+        else
+          REPORT+="- \`$NIX_FILE:$HASH_LINE\` (\`.#$ATTR\`): \`$OLD_HASH\` → \`$NEW_HASH\`"$'\n'
+        fi
+
+        if [ "$MODE" = "--apply" ]; then
+          sed -i "s|hash = \"sha256-[^\"]*\";|hash = \"$NEW_HASH\";|" "$NIX_FILE"
+          nix build ".#$ATTR.npmDeps" --no-link --print-build-logs
+          FIXED=1
+          echo "    fixed"
+        fi
+      done
+
+      if [ -n "''${GITHUB_OUTPUT:-}" ]; then
+        {
+          [ "$STALE" -eq 1 ] && echo "stale=true" || echo "stale=false"
+          [ "$FIXED" -eq 1 ] && echo "changed=true" || echo "changed=false"
+          if [ -n "$REPORT" ]; then
+            echo "report<<REPORT_EOF"
+            printf "%s" "$REPORT"
+            echo "REPORT_EOF"
+          fi
+        } >> "$GITHUB_OUTPUT"
+      fi
+
+      if [ "$STALE" -eq 1 ] && [ "$MODE" = "--check" ]; then
+        echo
+        echo "Stale lockfile hashes detected. Run:"
+        echo "  nix run .#fix-lockfiles -- --apply"
+        exit 1
+      fi
+
+      exit 0
+    '';
+}
@@ -8,10 +8,14 @@
        inherit (inputs) uv2nix pyproject-nix pyproject-build-systems;
      };

-      hermesTui = pkgs.callPackage ./tui.nix {
+      hermesNpmLib = pkgs.callPackage ./lib.nix {
        npm-lockfile-fix = inputs'.npm-lockfile-fix.packages.default;
      };

+      hermesTui = pkgs.callPackage ./tui.nix {
+        inherit hermesNpmLib;
+      };
+
      # Import bundled skills, excluding runtime caches
      bundledSkills = pkgs.lib.cleanSourceWith {
        src = ../skills;
@@ -19,7 +23,7 @@
      };

      hermesWeb = pkgs.callPackage ./web.nix {
-        npm-lockfile-fix = inputs'.npm-lockfile-fix.packages.default;
+        inherit hermesNpmLib;
      };

      runtimeDeps = with pkgs; [
@@ -111,6 +115,10 @@

        tui = hermesTui;
        web = hermesWeb;
+
+        fix-lockfiles = hermesNpmLib.mkFixLockfiles {
+          packages = [ hermesTui hermesWeb ];
+        };
      };
    };
 }
@@ -1,18 +1,18 @@
 # nix/tui.nix — Hermes TUI (Ink/React) compiled with tsc and bundled
-{ pkgs, npm-lockfile-fix, ... }:
+{ pkgs, hermesNpmLib, ... }:
 let
  src = ../ui-tui;
  npmDeps = pkgs.fetchNpmDeps {
    inherit src;
-    hash = "sha256-mG3vpgGi4ljt4X3XIf3I/5mIcm+rVTUAmx2DQ6YVA90=";
+    hash = "sha256-RU4qSHgJPMyfRSEJDzkG4+MReDZDc6QbTD2wisa5QE0=";
  };

+  npm = hermesNpmLib.mkNpmPassthru { folder = "ui-tui"; attr = "tui"; pname = "hermes-tui"; };
+
  packageJson = builtins.fromJSON (builtins.readFile (src + "/package.json"));
  version = packageJson.version;
-
-  npmLockHash = builtins.hashString "sha256" (builtins.readFile ../ui-tui/package-lock.json);
 in
-pkgs.buildNpmPackage {
+pkgs.buildNpmPackage (npm // {
  pname = "hermes-tui";
  inherit src npmDeps version;

@@ -37,41 +37,4 @@ pkgs.buildNpmPackage {

    runHook postInstall
  '';
-
-  nativeBuildInputs = [
-    (pkgs.writeShellScriptBin "update_tui_lockfile" ''
-      set -euox pipefail
-
-      # get root of repo
-      REPO_ROOT=$(git rev-parse --show-toplevel)
-
-      # cd into ui-tui and reinstall
-      cd "$REPO_ROOT/ui-tui"
-      rm -rf node_modules/
-      npm cache clean --force
-      CI=true npm install # ci env var to suppress annoying unicode install banner lag
-      ${pkgs.lib.getExe npm-lockfile-fix} ./package-lock.json
-
-      NIX_FILE="$REPO_ROOT/nix/tui.nix"
-      # compute the new hash
-      sed -i "s/hash = \"[^\"]*\";/hash = \"\";/" $NIX_FILE
-      NIX_OUTPUT=$(nix build .#tui 2>&1 || true)
-      NEW_HASH=$(echo "$NIX_OUTPUT" | grep 'got:' | awk '{print $2}') 
-      echo got new hash $NEW_HASH
-      sed -i "s|hash = \"[^\"]*\";|hash = \"$NEW_HASH\";|" $NIX_FILE
-      nix build .#tui
-      echo "Updated npm hash in $NIX_FILE to $NEW_HASH"
-    '')
-  ];
-
-  passthru.devShellHook = ''
-    STAMP=".nix-stamps/hermes-tui"
-    STAMP_VALUE="${npmLockHash}"
-    if [ ! -f "$STAMP" ] || [ "$(cat "$STAMP")" != "$STAMP_VALUE" ]; then
-      echo "hermes-tui: installing npm dependencies..."
-      cd ui-tui && CI=true npm install --silent --no-fund --no-audit 2>/dev/null && cd ..
-      mkdir -p .nix-stamps
-      echo "$STAMP_VALUE" > "$STAMP"
-    fi
-  '';
-}
+})
@@ -1,15 +1,15 @@
 # nix/web.nix — Hermes Web Dashboard (Vite/React) frontend build
-{ pkgs, npm-lockfile-fix, ... }:
+{ pkgs, hermesNpmLib, ... }:
 let
  src = ../web;
  npmDeps = pkgs.fetchNpmDeps {
    inherit src;
-    hash = "sha256-Y0pOzdFG8BLjfvCLmsvqYpjxFjAQabXp1i7X9W/cCU4=";
+    hash = "sha256-TS/vrCHbdvXkPcAPxImKzAd2pdDCrKlgYZkXBMQ+TEg=";
  };

-  npmLockHash = builtins.hashString "sha256" (builtins.readFile ../web/package-lock.json);
+  npm = hermesNpmLib.mkNpmPassthru { folder = "web"; attr = "web"; pname = "hermes-web"; };
 in
-pkgs.buildNpmPackage {
+pkgs.buildNpmPackage (npm // {
  pname = "hermes-web";
  version = "0.0.0";
  inherit src npmDeps;
@@ -26,38 +26,4 @@ pkgs.buildNpmPackage {
    cp -r dist $out
    runHook postInstall
  '';
-
-  nativeBuildInputs = [
-    (pkgs.writeShellScriptBin "update_web_lockfile" ''
-      set -euox pipefail
-
-      REPO_ROOT=$(git rev-parse --show-toplevel)
-
-      cd "$REPO_ROOT/web"
-      rm -rf node_modules/
-      npm cache clean --force
-      CI=true npm install
-      ${pkgs.lib.getExe npm-lockfile-fix} ./package-lock.json
-
-      NIX_FILE="$REPO_ROOT/nix/web.nix"
-      sed -i "s/hash = \"[^\"]*\";/hash = \"\";/" $NIX_FILE
-      NIX_OUTPUT=$(nix build .#web 2>&1 || true)
-      NEW_HASH=$(echo "$NIX_OUTPUT" | grep 'got:' | awk '{print $2}')
-      echo got new hash $NEW_HASH
-      sed -i "s|hash = \"[^\"]*\";|hash = \"$NEW_HASH\";|" $NIX_FILE
-      nix build .#web
-      echo "Updated npm hash in $NIX_FILE to $NEW_HASH"
-    '')
-  ];
-
-  passthru.devShellHook = ''
-    STAMP=".nix-stamps/hermes-web"
-    STAMP_VALUE="${npmLockHash}"
-    if [ ! -f "$STAMP" ] || [ "$(cat "$STAMP")" != "$STAMP_VALUE" ]; then
-      echo "hermes-web: installing npm dependencies..."
-      cd web && CI=true npm install --silent --no-fund --no-audit 2>/dev/null && cd ..
-      mkdir -p .nix-stamps
-      echo "$STAMP_VALUE" > "$STAMP"
-    fi
-  '';
-}
+})
@@ -0,0 +1,3 @@
+# Dogfood — Advanced QA & Testing Skills
+
+Specialized QA workflows that go beyond basic bug-finding. These skills use structured methodologies to surface UX friction, accessibility issues, and product-level problems that standard testing misses.
@@ -0,0 +1,190 @@
+---
+name: adversarial-ux-test
+description: Roleplay the most difficult, tech-resistant user for your product. Browse the app as that persona, find every UX pain point, then filter complaints through a pragmatism layer to separate real problems from noise. Creates actionable tickets from genuine issues only.
+version: 1.0.0
+author: Omni @ Comelse
+license: MIT
+metadata:
+  hermes:
+    tags: [qa, ux, testing, adversarial, dogfood, personas, user-testing]
+    related_skills: [dogfood]
+---
+
+# Adversarial UX Test
+
+Roleplay the worst-case user for your product — the person who hates technology, doesn't want your software, and will find every reason to complain. Then filter their feedback through a pragmatism layer to separate real UX problems from "I hate computers" noise.
+
+Think of it as an automated "mom test" — but angry.
+
+## Why This Works
+
+Most QA finds bugs. This finds **friction**. A technically correct app can still be unusable for real humans. The adversarial persona catches:
+- Confusing terminology that makes sense to developers but not users
+- Too many steps to accomplish basic tasks
+- Missing onboarding or "aha moments"
+- Accessibility issues (font size, contrast, click targets)
+- Cold-start problems (empty states, no demo content)
+- Paywall/signup friction that kills conversion
+
+The **pragmatism filter** (Phase 3) is what makes this useful instead of just entertaining. Without it, you'd add a "print this page" button to every screen because Grandpa can't figure out PDFs.
+
+## How to Use
+
+Tell the agent:
+```
+"Run an adversarial UX test on [URL]"
+"Be a grumpy [persona type] and test [app name]"
+"Do an asshole user test on my staging site"
+```
+
+You can provide a persona or let the agent generate one based on your product's target audience.
+
+## Step 1: Define the Persona
+
+If no persona is provided, generate one by answering:
+
+1. **Who is the HARDEST user for this product?** (age 50+, non-technical role, decades of experience doing it "the old way")
+2. **What is their tech comfort level?** (the lower the better — WhatsApp-only, paper notebooks, wife set up their email)
+3. **What is the ONE thing they need to accomplish?** (their core job, not your feature list)
+4. **What would make them give up?** (too many clicks, jargon, slow, confusing)
+5. **How do they talk when frustrated?** (blunt, sweary, dismissive, sighing)
+
+### Good Persona Example
+> **"Big Mick" McAllister** — 58-year-old S&C coach. Uses WhatsApp and that's it. His "spreadsheet" is a paper notebook. "If I can't figure it out in 10 seconds I'm going back to my notebook." Needs to log session results for 25 players. Hates small text, jargon, and passwords.
+
+### Bad Persona Example
+> "A user who doesn't like the app" — too vague, no constraints, no voice.
+
+The persona must be **specific enough to stay in character** for 20 minutes of testing.
+
+## Step 2: Become the Asshole (Browse as the Persona)
+
+1. Read any available project docs for app context and URLs
+2. **Fully inhabit the persona** — their frustrations, limitations, goals
+3. Navigate to the app using browser tools
+4. **Attempt the persona's ACTUAL TASKS** (not a feature tour):
+   - Can they do what they came to do?
+   - How many clicks/screens to accomplish it?
+   - What confuses them?
+   - What makes them angry?
+   - Where do they get lost?
+   - What would make them give up and go back to their old way?
+
+5. Test these friction categories:
+   - **First impression** — would they even bother past the landing page?
+   - **Core workflow** — the ONE thing they need to do most often
+   - **Error recovery** — what happens when they do something wrong?
+   - **Readability** — text size, contrast, information density
+   - **Speed** — does it feel faster than their current method?
+   - **Terminology** — any jargon they wouldn't understand?
+   - **Navigation** — can they find their way back? do they know where they are?
+
+6. Take screenshots of every pain point
+7. Check browser console for JS errors on every page
+
+## Step 3: The Rant (Write Feedback in Character)
+
+Write the feedback AS THE PERSONA — in their voice, with their frustrations. This is not a bug report. This is a real human venting.
+
+```
+[PERSONA NAME]'s Review of [PRODUCT]
+
+Overall: [Would they keep using it? Yes/No/Maybe with conditions]
+
+THE GOOD (grudging admission):
+- [things even they have to admit work]
+
+THE BAD (legitimate UX issues):
+- [real problems that would stop them from using the product]
+
+THE UGLY (showstoppers):
+- [things that would make them uninstall/cancel immediately]
+
+SPECIFIC COMPLAINTS:
+1. [Page/feature]: "[quote in persona voice]" — [what happened, expected]
+2. ...
+
+VERDICT: "[one-line persona quote summarizing their experience]"
+```
+
+## Step 4: The Pragmatism Filter (Critical — Do Not Skip)
+
+Step OUT of the persona. Evaluate each complaint as a product person:
+
+- **RED: REAL UX BUG** — Any user would have this problem, not just grumpy ones. Fix it.
+- **YELLOW: VALID BUT LOW PRIORITY** — Real issue but only for extreme users. Note it.
+- **WHITE: PERSONA NOISE** — "I hate computers" talking, not a product problem. Skip it.
+- **GREEN: FEATURE REQUEST** — Good idea hidden in the complaint. Consider it.
+
+### Filter Criteria
+1. Would a 35-year-old competent-but-busy user have the same complaint? → RED
+2. Is this a genuine accessibility issue (font size, contrast, click targets)? → RED
+3. Is this "I want it to work like paper" resistance to digital? → WHITE
+4. Is this a real workflow inefficiency the persona stumbled on? → YELLOW or RED
+5. Would fixing this add complexity for the 80% who are fine? → WHITE
+6. Does the complaint reveal a missing onboarding moment? → GREEN
+
+**This filter is MANDATORY.** Never ship raw persona complaints as tickets.
+
+## Step 5: Create Tickets
+
+For **RED** and **GREEN** items only:
+- Clear, actionable title
+- Include the persona's verbatim quote (entertaining + memorable)
+- The real UX issue underneath (objective)
+- A suggested fix (actionable)
+- Tag/label: "ux-review"
+
+For **YELLOW** items: one catch-all ticket with all notes.
+
+**WHITE** items appear in the report only. No tickets.
+
+**Max 10 tickets per session** — focus on the worst issues.
+
+## Step 6: Report
+
+Deliver:
+1. The persona rant (Step 3) — entertaining and visceral
+2. The filtered assessment (Step 4) — pragmatic and actionable
+3. Tickets created (Step 5) — with links
+4. Screenshots of key issues
+
+## Tips
+
+- **One persona per session.** Don't mix perspectives.
+- **Stay in character during Steps 2-3.** Break character only at Step 4.
+- **Test the CORE WORKFLOW first.** Don't get distracted by settings pages.
+- **Empty states are gold.** New user experience reveals the most friction.
+- **The best findings are RED items the persona found accidentally** while trying to do something else.
+- **If the persona has zero complaints, your persona is too tech-savvy.** Make them older, less patient, more set in their ways.
+- **Run this before demos, launches, or after shipping a batch of features.**
+- **Register as a NEW user when possible.** Don't use pre-seeded admin accounts — the cold start experience is where most friction lives.
+- **Zero WHITE items is a signal, not a failure.** If the pragmatism filter finds no noise, your product has real UX problems, not just a grumpy persona.
+- **Check known issues in project docs AFTER the test.** If the persona found a bug that's already in the known issues list, that's actually the most damning finding — it means the team knew about it but never felt the user's pain.
+- **Subscription/paywall testing is critical.** Test with expired accounts, not just active ones. The "what happens when you can't pay" experience reveals whether the product respects users or holds their data hostage.
+- **Count the clicks to accomplish the persona's ONE task.** If it's more than 5, that's almost always a RED finding regardless of persona tech level.
+
+## Example Personas by Industry
+
+These are starting points — customize for your specific product:
+
+| Product Type | Persona | Age | Key Trait |
+|-------------|---------|-----|-----------|
+| CRM | Retirement home director | 68 | Filing cabinet is the current CRM |
+| Photography SaaS | Rural wedding photographer | 62 | Books clients by phone, invoices on paper |
+| AI/ML Tool | Department store buyer | 55 | Burned by 3 failed tech startups |
+| Fitness App | Old-school gym coach | 58 | Paper notebook, thick fingers, bad eyes |
+| Accounting | Family bakery owner | 64 | Shoebox of receipts, hates subscriptions |
+| E-commerce | Market stall vendor | 60 | Cash only, smartphone is for calls |
+| Healthcare | Senior GP | 63 | Dictates notes, nurse handles the computer |
+| Education | Veteran teacher | 57 | Chalk and talk, worksheets in ring binders |
+
+## Rules
+
+- Stay in character during Steps 2-3
+- Be genuinely mean but fair — find real problems, not manufactured ones
+- The pragmatism filter (Step 4) is **MANDATORY**
+- Screenshots required for every complaint
+- Max 10 tickets per session
+- Test on staging/deployed app, not local dev
+- One persona, one session, one report
@@ -0,0 +1,5 @@
+# Web Development
+
+Optional skills for client-side web development workflows — embedding agents, copilots, and AI-native UX patterns into user-facing web apps.
+
+These are distinct from Hermes' own browser automation (Browserbase, Camofox), which operate *on* websites from outside. Web-development skills here help users build *into* their own websites.
@@ -0,0 +1,189 @@
+---
+name: page-agent
+description: Embed alibaba/page-agent into your own web application — a pure-JavaScript in-page GUI agent that ships as a single <script> tag or npm package and lets end-users of your site drive the UI with natural language ("click login, fill username as John"). No Python, no headless browser, no extension required. Use this skill when the user is a web developer who wants to add an AI copilot to their SaaS / admin panel / B2B tool, make a legacy web app accessible via natural language, or evaluate page-agent against a local (Ollama) or cloud (Qwen / OpenAI / OpenRouter) LLM. NOT for server-side browser automation — point those users to Hermes' built-in browser tool instead.
+version: 1.0.0
+author: Hermes Agent
+license: MIT
+metadata:
+  hermes:
+    tags: [web, javascript, agent, browser, gui, alibaba, embed, copilot, saas]
+    category: web-development
+---
+
+# page-agent
+
+alibaba/page-agent (https://github.com/alibaba/page-agent, 17k+ stars, MIT) is an in-page GUI agent written in TypeScript. It lives inside a webpage, reads the DOM as text (no screenshots, no multi-modal LLM), and executes natural-language instructions like "click the login button, then fill username as John" against the current page. Pure client-side — the host site just includes a script and passes an OpenAI-compatible LLM endpoint.
+
+## When to use this skill
+
+Load this skill when a user wants to:
+
+- **Ship an AI copilot inside their own web app** (SaaS, admin panel, B2B tool, ERP, CRM) — "users on my dashboard should be able to type 'create invoice for Acme Corp and email it' instead of clicking through five screens"
+- **Modernize a legacy web app** without rewriting the frontend — page-agent drops on top of existing DOM
+- **Add accessibility via natural language** — voice / screen-reader users drive the UI by describing what they want
+- **Demo or evaluate page-agent** against a local (Ollama) or hosted (Qwen, OpenAI, OpenRouter) LLM
+- **Build interactive training / product demos** — let an AI walk a user through "how to submit an expense report" live in the real UI
+
+## When NOT to use this skill
+
+- User wants **Hermes itself to drive a browser** → use Hermes' built-in browser tool (Browserbase / Camofox). page-agent is the *opposite* direction.
+- User wants **cross-tab automation without embedding** → use Playwright, browser-use, or the page-agent Chrome extension
+- User needs **visual grounding / screenshots** → page-agent is text-DOM only; use a multimodal browser agent instead
+
+## Prerequisites
+
+- Node 22.13+ or 24+, npm 10+ (docs claim 11+ but 10.9 works fine)
+- An OpenAI-compatible LLM endpoint: Qwen (DashScope), OpenAI, Ollama, OpenRouter, or anything speaking `/v1/chat/completions`
+- Browser with devtools (for debugging)
+
+## Path 1 — 30-second demo via CDN (no install)
+
+Fastest way to see it work. Uses alibaba's free testing LLM proxy — **for evaluation only**, subject to their terms.
+
+Add to any HTML page (or paste into the devtools console as a bookmarklet):
+
+```html
+<script src="https://cdn.jsdelivr.net/npm/page-agent@1.8.0/dist/iife/page-agent.demo.js" crossorigin="true"></script>
+```
+
+A panel appears. Type an instruction. Done.
+
+Bookmarklet form (drop into bookmarks bar, click on any page):
+
+```javascript
+javascript:(function(){var s=document.createElement('script');s.src='https://cdn.jsdelivr.net/npm/page-agent@1.8.0/dist/iife/page-agent.demo.js';document.head.appendChild(s);})();
+```
+
+## Path 2 — npm install into your own web app (production use)
+
+Inside an existing web project (React / Vue / Svelte / plain):
+
+```bash
+npm install page-agent
+```
+
+Wire it up with your own LLM endpoint — **never ship the demo CDN to real users**:
+
+```javascript
+import { PageAgent } from 'page-agent'
+
+const agent = new PageAgent({
+    model: 'qwen3.5-plus',
+    baseURL: 'https://dashscope.aliyuncs.com/compatible-mode/v1',
+    apiKey: process.env.LLM_API_KEY,   // never hardcode
+    language: 'en-US',
+})
+
+// Show the panel for end users:
+agent.panel.show()
+
+// Or drive it programmatically:
+await agent.execute('Click submit button, then fill username as John')
+```
+
+Provider examples (any OpenAI-compatible endpoint works):
+
+| Provider | `baseURL` | `model` |
+|----------|-----------|---------|
+| Qwen / DashScope | `https://dashscope.aliyuncs.com/compatible-mode/v1` | `qwen3.5-plus` |
+| OpenAI | `https://api.openai.com/v1` | `gpt-4o-mini` |
+| Ollama (local) | `http://localhost:11434/v1` | `qwen3:14b` |
+| OpenRouter | `https://openrouter.ai/api/v1` | `anthropic/claude-sonnet-4.6` |
+
+**Key config fields** (passed to `new PageAgent({...})`):
+
+- `model`, `baseURL`, `apiKey` — LLM connection
+- `language` — UI language (`en-US`, `zh-CN`, etc.)
+- Allowlist and data-masking hooks exist for locking down what the agent can touch — see https://alibaba.github.io/page-agent/ for the full option list
+
+**Security.** Don't put your `apiKey` in client-side code for a real deployment — proxy LLM calls through your backend and point `baseURL` at your proxy. The demo CDN exists because alibaba runs that proxy for evaluation.
+
+## Path 3 — clone the source repo (contributing, or hacking on it)
+
+Use this when the user wants to modify page-agent itself, test it against arbitrary sites via a local IIFE bundle, or develop the browser extension.
+
+```bash
+git clone https://github.com/alibaba/page-agent.git
+cd page-agent
+npm ci              # exact lockfile install (or `npm i` to allow updates)
+```
+
+Create `.env` in the repo root with an LLM endpoint. Example:
+
+```
+LLM_MODEL_NAME=gpt-4o-mini
+LLM_API_KEY=sk-...
+LLM_BASE_URL=https://api.openai.com/v1
+```
+
+Ollama flavor:
+
+```
+LLM_BASE_URL=http://localhost:11434/v1
+LLM_API_KEY=NA
+LLM_MODEL_NAME=qwen3:14b
+```
+
+Common commands:
+
+```bash
+npm start           # docs/website dev server
+npm run build       # build every package
+npm run dev:demo    # serve IIFE bundle at http://localhost:5174/page-agent.demo.js
+npm run dev:ext     # develop the browser extension (WXT + React)
+npm run build:ext   # build the extension
+```
+
+**Test on any website** using the local IIFE bundle. Add this bookmarklet:
+
+```javascript
+javascript:(function(){var s=document.createElement('script');s.src=`http://localhost:5174/page-agent.demo.js?t=${Math.random()}`;s.onload=()=>console.log('PageAgent ready!');document.head.appendChild(s);})();
+```
+
+Then: `npm run dev:demo`, click the bookmarklet on any page, and the local build injects. Auto-rebuilds on save.
+
+**Warning:** your `.env` `LLM_API_KEY` is inlined into the IIFE bundle during dev builds. Don't share the bundle. Don't commit it. Don't paste the URL into Slack. (Verified: grepping the public dev bundle returns the literal values from `.env`.)
+
+## Repo layout (Path 3)
+
+Monorepo with npm workspaces. Key packages:
+
+| Package | Path | Purpose |
+|---------|------|---------|
+| `page-agent` | `packages/page-agent/` | Main entry with UI panel |
+| `@page-agent/core` | `packages/core/` | Core agent logic, no UI |
+| `@page-agent/mcp` | `packages/mcp/` | MCP server (beta) |
+| — | `packages/llms/` | LLM client |
+| — | `packages/page-controller/` | DOM ops + visual feedback |
+| — | `packages/ui/` | Panel + i18n |
+| — | `packages/extension/` | Chrome/Firefox extension |
+| — | `packages/website/` | Docs + landing site |
+
+## Verifying it works
+
+After Path 1 or Path 2:
+1. Open the page in a browser with devtools open
+2. You should see a floating panel. If not, check the console for errors (most common: CORS on the LLM endpoint, wrong `baseURL`, or a bad API key)
+3. Type a simple instruction matching something visible on the page ("click the Login link")
+4. Watch the Network tab — you should see a request to your `baseURL`
+
+After Path 3:
+1. `npm run dev:demo` prints `Accepting connections at http://localhost:5174`
+2. `curl -I http://localhost:5174/page-agent.demo.js` returns `HTTP/1.1 200 OK` with `Content-Type: application/javascript`
+3. Click the bookmarklet on any site; panel appears
+
+## Pitfalls
+
+- **Demo CDN in production** — don't. It's rate-limited, uses alibaba's free proxy, and their terms forbid production use.
+- **API key exposure** — any key passed to `new PageAgent({apiKey: ...})` ships in your JS bundle. Always proxy through your own backend for real deployments.
+- **Non-OpenAI-compatible endpoints** fail silently or with cryptic errors. If your provider needs native Anthropic/Gemini formatting, use an OpenAI-compatibility proxy (LiteLLM, OpenRouter) in front.
+- **CSP blocks** — sites with strict Content-Security-Policy may refuse to load the CDN script or disallow inline eval. In that case, self-host from your origin.
+- **Restart dev server** after editing `.env` in Path 3 — Vite only reads env at startup.
+- **Node version** — the repo declares `^22.13.0 || >=24`. Node 20 will fail `npm ci` with engine errors.
+- **npm 10 vs 11** — docs say npm 11+; npm 10.9 actually works fine.
+
+## Reference
+
+- Repo: https://github.com/alibaba/page-agent
+- Docs: https://alibaba.github.io/page-agent/
+- License: MIT (built on browser-use's DOM processing internals, Copyright 2024 Gregor Zunic)
@@ -1069,6 +1069,7 @@
        }
      ],
      "license": "MIT",
+      "peer": true,
      "dependencies": {
        "baseline-browser-mapping": "^2.10.12",
        "caniuse-lite": "^1.0.30001782",
@@ -3911,6 +3912,7 @@
      "resolved": "https://registry.npmjs.org/playwright/-/playwright-1.59.1.tgz",
      "integrity": "sha512-C8oWjPR3F81yljW9o5OxcWzfh6avkVwDD2VYdwIGqTkl+OGFISgypqzfu7dOe4QNLL2aqcWBmI3PMtLIK233lw==",
      "license": "Apache-2.0",
+      "peer": true,
      "dependencies": {
        "playwright-core": "1.59.1"
      },
@@ -3929,6 +3931,7 @@
      "resolved": "https://registry.npmjs.org/playwright-core/-/playwright-core-1.59.1.tgz",
      "integrity": "sha512-HBV/RJg81z5BiiZ9yPzIiClYV/QMsDCKUyogwH9p3MCP6IYjUFu/MActgYAvK0oWyV9NlwM3GLBjADyWgydVyg==",
      "license": "Apache-2.0",
+      "peer": true,
      "bin": {
        "playwright-core": "cli.js"
      },
@@ -0,0 +1,303 @@
+"""OpenAI image generation backend.
+
+Exposes OpenAI's ``gpt-image-2`` model at three quality tiers as an
+:class:`ImageGenProvider` implementation. The tiers are implemented as
+three virtual model IDs so the ``hermes tools`` model picker and the
+``image_gen.model`` config key behave like any other multi-model backend:
+
+    gpt-image-2-low     ~15s   fastest, good for iteration
+    gpt-image-2-medium  ~40s   default — balanced
+    gpt-image-2-high    ~2min  slowest, highest fidelity
+
+All three hit the same underlying API model (``gpt-image-2``) with a
+different ``quality`` parameter. Output is base64 JSON → saved under
+``$HERMES_HOME/cache/images/``.
+
+Selection precedence (first hit wins):
+
+1. ``OPENAI_IMAGE_MODEL`` env var (escape hatch for scripts / tests)
+2. ``image_gen.openai.model`` in ``config.yaml``
+3. ``image_gen.model`` in ``config.yaml`` (when it's one of our tier IDs)
+4. :data:`DEFAULT_MODEL` — ``gpt-image-2-medium``
+"""
+
+from __future__ import annotations
+
+import logging
+import os
+from typing import Any, Dict, List, Optional, Tuple
+
+from agent.image_gen_provider import (
+    DEFAULT_ASPECT_RATIO,
+    ImageGenProvider,
+    error_response,
+    resolve_aspect_ratio,
+    save_b64_image,
+    success_response,
+)
+
+logger = logging.getLogger(__name__)
+
+
+# ---------------------------------------------------------------------------
+# Model catalog
+# ---------------------------------------------------------------------------
+#
+# All three IDs resolve to the same underlying API model with a different
+# ``quality`` setting. ``api_model`` is what gets sent to OpenAI;
+# ``quality`` is the knob that changes generation time and output fidelity.
+
+API_MODEL = "gpt-image-2"
+
+_MODELS: Dict[str, Dict[str, Any]] = {
+    "gpt-image-2-low": {
+        "display": "GPT Image 2 (Low)",
+        "speed": "~15s",
+        "strengths": "Fast iteration, lowest cost",
+        "quality": "low",
+    },
+    "gpt-image-2-medium": {
+        "display": "GPT Image 2 (Medium)",
+        "speed": "~40s",
+        "strengths": "Balanced — default",
+        "quality": "medium",
+    },
+    "gpt-image-2-high": {
+        "display": "GPT Image 2 (High)",
+        "speed": "~2min",
+        "strengths": "Highest fidelity, strongest prompt adherence",
+        "quality": "high",
+    },
+}
+
+DEFAULT_MODEL = "gpt-image-2-medium"
+
+_SIZES = {
+    "landscape": "1536x1024",
+    "square": "1024x1024",
+    "portrait": "1024x1536",
+}
+
+
+def _load_openai_config() -> Dict[str, Any]:
+    """Read ``image_gen`` from config.yaml (returns {} on any failure)."""
+    try:
+        from hermes_cli.config import load_config
+
+        cfg = load_config()
+        section = cfg.get("image_gen") if isinstance(cfg, dict) else None
+        return section if isinstance(section, dict) else {}
+    except Exception as exc:
+        logger.debug("Could not load image_gen config: %s", exc)
+        return {}
+
+
+def _resolve_model() -> Tuple[str, Dict[str, Any]]:
+    """Decide which tier to use and return ``(model_id, meta)``."""
+    env_override = os.environ.get("OPENAI_IMAGE_MODEL")
+    if env_override and env_override in _MODELS:
+        return env_override, _MODELS[env_override]
+
+    cfg = _load_openai_config()
+    openai_cfg = cfg.get("openai") if isinstance(cfg.get("openai"), dict) else {}
+    candidate: Optional[str] = None
+    if isinstance(openai_cfg, dict):
+        value = openai_cfg.get("model")
+        if isinstance(value, str) and value in _MODELS:
+            candidate = value
+    if candidate is None:
+        top = cfg.get("model")
+        if isinstance(top, str) and top in _MODELS:
+            candidate = top
+
+    if candidate is not None:
+        return candidate, _MODELS[candidate]
+
+    return DEFAULT_MODEL, _MODELS[DEFAULT_MODEL]
+
+
+# ---------------------------------------------------------------------------
+# Provider
+# ---------------------------------------------------------------------------
+
+
+class OpenAIImageGenProvider(ImageGenProvider):
+    """OpenAI ``images.generate`` backend — gpt-image-2 at low/medium/high."""
+
+    @property
+    def name(self) -> str:
+        return "openai"
+
+    @property
+    def display_name(self) -> str:
+        return "OpenAI"
+
+    def is_available(self) -> bool:
+        if not os.environ.get("OPENAI_API_KEY"):
+            return False
+        try:
+            import openai  # noqa: F401
+        except ImportError:
+            return False
+        return True
+
+    def list_models(self) -> List[Dict[str, Any]]:
+        return [
+            {
+                "id": model_id,
+                "display": meta["display"],
+                "speed": meta["speed"],
+                "strengths": meta["strengths"],
+                "price": "varies",
+            }
+            for model_id, meta in _MODELS.items()
+        ]
+
+    def default_model(self) -> Optional[str]:
+        return DEFAULT_MODEL
+
+    def get_setup_schema(self) -> Dict[str, Any]:
+        return {
+            "name": "OpenAI",
+            "badge": "paid",
+            "tag": "gpt-image-2 at low/medium/high quality tiers",
+            "env_vars": [
+                {
+                    "key": "OPENAI_API_KEY",
+                    "prompt": "OpenAI API key",
+                    "url": "https://platform.openai.com/api-keys",
+                },
+            ],
+        }
+
+    def generate(
+        self,
+        prompt: str,
+        aspect_ratio: str = DEFAULT_ASPECT_RATIO,
+        **kwargs: Any,
+    ) -> Dict[str, Any]:
+        prompt = (prompt or "").strip()
+        aspect = resolve_aspect_ratio(aspect_ratio)
+
+        if not prompt:
+            return error_response(
+                error="Prompt is required and must be a non-empty string",
+                error_type="invalid_argument",
+                provider="openai",
+                aspect_ratio=aspect,
+            )
+
+        if not os.environ.get("OPENAI_API_KEY"):
+            return error_response(
+                error=(
+                    "OPENAI_API_KEY not set. Run `hermes tools` → Image "
+                    "Generation → OpenAI to configure, or `hermes setup` "
+                    "to add the key."
+                ),
+                error_type="auth_required",
+                provider="openai",
+                aspect_ratio=aspect,
+            )
+
+        try:
+            import openai
+        except ImportError:
+            return error_response(
+                error="openai Python package not installed (pip install openai)",
+                error_type="missing_dependency",
+                provider="openai",
+                aspect_ratio=aspect,
+            )
+
+        tier_id, meta = _resolve_model()
+        size = _SIZES.get(aspect, _SIZES["square"])
+
+        # gpt-image-2 returns b64_json unconditionally and REJECTS
+        # ``response_format`` as an unknown parameter. Don't send it.
+        payload: Dict[str, Any] = {
+            "model": API_MODEL,
+            "prompt": prompt,
+            "size": size,
+            "n": 1,
+            "quality": meta["quality"],
+        }
+
+        try:
+            client = openai.OpenAI()
+            response = client.images.generate(**payload)
+        except Exception as exc:
+            logger.debug("OpenAI image generation failed", exc_info=True)
+            return error_response(
+                error=f"OpenAI image generation failed: {exc}",
+                error_type="api_error",
+                provider="openai",
+                model=tier_id,
+                prompt=prompt,
+                aspect_ratio=aspect,
+            )
+
+        data = getattr(response, "data", None) or []
+        if not data:
+            return error_response(
+                error="OpenAI returned no image data",
+                error_type="empty_response",
+                provider="openai",
+                model=tier_id,
+                prompt=prompt,
+                aspect_ratio=aspect,
+            )
+
+        first = data[0]
+        b64 = getattr(first, "b64_json", None)
+        url = getattr(first, "url", None)
+        revised_prompt = getattr(first, "revised_prompt", None)
+
+        if b64:
+            try:
+                saved_path = save_b64_image(b64, prefix=f"openai_{tier_id}")
+            except Exception as exc:
+                return error_response(
+                    error=f"Could not save image to cache: {exc}",
+                    error_type="io_error",
+                    provider="openai",
+                    model=tier_id,
+                    prompt=prompt,
+                    aspect_ratio=aspect,
+                )
+            image_ref = str(saved_path)
+        elif url:
+            # Defensive — gpt-image-2 returns b64 today, but fall back
+            # gracefully if the API ever changes.
+            image_ref = url
+        else:
+            return error_response(
+                error="OpenAI response contained neither b64_json nor URL",
+                error_type="empty_response",
+                provider="openai",
+                model=tier_id,
+                prompt=prompt,
+                aspect_ratio=aspect,
+            )
+
+        extra: Dict[str, Any] = {"size": size, "quality": meta["quality"]}
+        if revised_prompt:
+            extra["revised_prompt"] = revised_prompt
+
+        return success_response(
+            image=image_ref,
+            model=tier_id,
+            prompt=prompt,
+            aspect_ratio=aspect,
+            provider="openai",
+            extra=extra,
+        )
+
+
+# ---------------------------------------------------------------------------
+# Plugin entry point
+# ---------------------------------------------------------------------------
+
+
+def register(ctx) -> None:
+    """Plugin entry point — wire ``OpenAIImageGenProvider`` into the registry."""
+    ctx.register_image_gen_provider(OpenAIImageGenProvider())
@@ -0,0 +1,7 @@
+name: openai
+version: 1.0.0
+description: "OpenAI image generation backend (gpt-image-2). Saves generated images to $HERMES_HOME/cache/images/."
+author: NousResearch
+kind: backend
+requires_env:
+  - OPENAI_API_KEY
@@ -84,7 +84,10 @@ Config file: `~/.hermes/hindsight/config.json`
 | `retain_async` | `true` | Process retain asynchronously on the Hindsight server |
 | `retain_every_n_turns` | `1` | Retain every N turns (1 = every turn) |
 | `retain_context` | `conversation between Hermes Agent and the User` | Context label for retained memories |
-| `tags` | — | Tags applied when storing memories |
+| `retain_tags` | — | Default tags applied to retained memories; merged with per-call tool tags |
+| `retain_source` | — | Optional `metadata.source` attached to retained memories |
+| `retain_user_prefix` | `User` | Label used before user turns in auto-retained transcripts |
+| `retain_assistant_prefix` | `Assistant` | Label used before assistant turns in auto-retained transcripts |

 ### Integration

@@ -113,7 +116,7 @@ Available in `hybrid` and `tools` memory modes:

 | Tool | Description |
 |------|-------------|
-| `hindsight_retain` | Store information with auto entity extraction |
+| `hindsight_retain` | Store information with auto entity extraction; supports optional per-call `tags` |
 | `hindsight_recall` | Multi-strategy search (semantic + entity graph) |
 | `hindsight_reflect` | Cross-memory synthesis (LLM-powered) |

@@ -6,11 +6,15 @@ retrieval. Supports cloud (API key) and local modes.
 Original PR #1811 by benfrank241, adapted to MemoryProvider ABC.

 Config via environment variables:
-  HINDSIGHT_API_KEY   — API key for Hindsight Cloud
-  HINDSIGHT_BANK_ID   — memory bank identifier (default: hermes)
-  HINDSIGHT_BUDGET    — recall budget: low/mid/high (default: mid)
-  HINDSIGHT_API_URL   — API endpoint
-  HINDSIGHT_MODE      — cloud or local (default: cloud)
+  HINDSIGHT_API_KEY                — API key for Hindsight Cloud
+  HINDSIGHT_BANK_ID                — memory bank identifier (default: hermes)
+  HINDSIGHT_BUDGET                 — recall budget: low/mid/high (default: mid)
+  HINDSIGHT_API_URL                — API endpoint
+  HINDSIGHT_MODE                   — cloud or local (default: cloud)
+  HINDSIGHT_RETAIN_TAGS            — comma-separated tags attached to retained memories
+  HINDSIGHT_RETAIN_SOURCE          — metadata source value attached to retained memories
+  HINDSIGHT_RETAIN_USER_PREFIX     — label used before user turns in retained transcripts
+  HINDSIGHT_RETAIN_ASSISTANT_PREFIX — label used before assistant turns in retained transcripts

 Or via $HERMES_HOME/hindsight/config.json (profile-scoped), falling back to
 ~/.hindsight/config.json (legacy, shared) for backward compatibility.
@@ -24,7 +28,7 @@ import logging
 import os
 import threading

-from hermes_constants import get_hermes_home
+from datetime import datetime, timezone
 from typing import Any, Dict, List

 from agent.memory_provider import MemoryProvider
@@ -99,6 +103,11 @@ RETAIN_SCHEMA = {
        "properties": {
            "content": {"type": "string", "description": "The information to store."},
            "context": {"type": "string", "description": "Short label (e.g. 'user preference', 'project decision')."},
+            "tags": {
+                "type": "array",
+                "items": {"type": "string"},
+                "description": "Optional per-call tags to merge with configured default retain tags.",
+            },
        },
        "required": ["content"],
    },
@@ -168,6 +177,10 @@ def _load_config() -> dict:
    return {
        "mode": os.environ.get("HINDSIGHT_MODE", "cloud"),
        "apiKey": os.environ.get("HINDSIGHT_API_KEY", ""),
+        "retain_tags": os.environ.get("HINDSIGHT_RETAIN_TAGS", ""),
+        "retain_source": os.environ.get("HINDSIGHT_RETAIN_SOURCE", ""),
+        "retain_user_prefix": os.environ.get("HINDSIGHT_RETAIN_USER_PREFIX", "User"),
+        "retain_assistant_prefix": os.environ.get("HINDSIGHT_RETAIN_ASSISTANT_PREFIX", "Assistant"),
        "banks": {
            "hermes": {
                "bankId": os.environ.get("HINDSIGHT_BANK_ID", "hermes"),
@@ -178,6 +191,48 @@ def _load_config() -> dict:
    }


+def _normalize_retain_tags(value: Any) -> List[str]:
+    """Normalize tag config/tool values to a deduplicated list of strings."""
+    if value is None:
+        return []
+
+    raw_items: list[Any]
+    if isinstance(value, list):
+        raw_items = value
+    elif isinstance(value, str):
+        text = value.strip()
+        if not text:
+            return []
+        if text.startswith("["):
+            try:
+                parsed = json.loads(text)
+            except Exception:
+                parsed = None
+            if isinstance(parsed, list):
+                raw_items = parsed
+            else:
+                raw_items = text.split(",")
+        else:
+            raw_items = text.split(",")
+    else:
+        raw_items = [value]
+
+    normalized = []
+    seen = set()
+    for item in raw_items:
+        tag = str(item).strip()
+        if not tag or tag in seen:
+            continue
+        seen.add(tag)
+        normalized.append(tag)
+    return normalized
+
+
+def _utc_timestamp() -> str:
+    """Return current UTC timestamp in ISO-8601 with milliseconds and Z suffix."""
+    return datetime.now(timezone.utc).isoformat(timespec="milliseconds").replace("+00:00", "Z")
+
+
 # ---------------------------------------------------------------------------
 # MemoryProvider implementation
 # ---------------------------------------------------------------------------
@@ -195,6 +250,19 @@ class HindsightMemoryProvider(MemoryProvider):
        self._llm_base_url = ""
        self._memory_mode = "hybrid"  # "context", "tools", or "hybrid"
        self._prefetch_method = "recall"  # "recall" or "reflect"
+        self._retain_tags: List[str] = []
+        self._retain_source = ""
+        self._retain_user_prefix = "User"
+        self._retain_assistant_prefix = "Assistant"
+        self._platform = ""
+        self._user_id = ""
+        self._user_name = ""
+        self._chat_id = ""
+        self._chat_name = ""
+        self._chat_type = ""
+        self._thread_id = ""
+        self._agent_identity = ""
+        self._turn_index = 0
        self._client = None
        self._prefetch_result = ""
        self._prefetch_lock = threading.Lock()
@@ -210,6 +278,7 @@ class HindsightMemoryProvider(MemoryProvider):
        # Retain controls
        self._auto_retain = True
        self._retain_every_n_turns = 1
+        self._retain_async = True
        self._retain_context = "conversation between Hermes Agent and the User"
        self._turn_counter = 0
        self._session_turns: list[str] = []  # accumulates ALL turns for the session
@@ -224,7 +293,6 @@ class HindsightMemoryProvider(MemoryProvider):
        # Bank
        self._bank_mission = ""
        self._bank_retain_mission: str | None = None
-        self._retain_async = True

    @property
    def name(self) -> str:
@@ -423,7 +491,10 @@ class HindsightMemoryProvider(MemoryProvider):
            {"key": "recall_budget", "description": "Recall thoroughness", "default": "mid", "choices": ["low", "mid", "high"]},
            {"key": "memory_mode", "description": "Memory integration mode", "default": "hybrid", "choices": ["hybrid", "context", "tools"]},
            {"key": "recall_prefetch_method", "description": "Auto-recall method", "default": "recall", "choices": ["recall", "reflect"]},
-            {"key": "tags", "description": "Tags applied when storing memories (comma-separated)", "default": ""},
+            {"key": "retain_tags", "description": "Default tags applied to retained memories (comma-separated)", "default": ""},
+            {"key": "retain_source", "description": "Metadata source value attached to retained memories", "default": ""},
+            {"key": "retain_user_prefix", "description": "Label used before user turns in retained transcripts", "default": "User"},
+            {"key": "retain_assistant_prefix", "description": "Label used before assistant turns in retained transcripts", "default": "Assistant"},
            {"key": "recall_tags", "description": "Tags to filter when searching memories (comma-separated)", "default": ""},
            {"key": "recall_tags_match", "description": "Tag matching mode for recall", "default": "any", "choices": ["any", "all", "any_strict", "all_strict"]},
            {"key": "auto_recall", "description": "Automatically recall memories before each turn", "default": True},
@@ -467,7 +538,7 @@ class HindsightMemoryProvider(MemoryProvider):
        return self._client

    def initialize(self, session_id: str, **kwargs) -> None:
-        self._session_id = session_id
+        self._session_id = str(session_id or "").strip()

        # Check client version and auto-upgrade if needed
        try:
@@ -496,6 +567,16 @@ class HindsightMemoryProvider(MemoryProvider):
            pass  # packaging not available or other issue — proceed anyway

        self._config = _load_config()
+        self._platform = str(kwargs.get("platform") or "").strip()
+        self._user_id = str(kwargs.get("user_id") or "").strip()
+        self._user_name = str(kwargs.get("user_name") or "").strip()
+        self._chat_id = str(kwargs.get("chat_id") or "").strip()
+        self._chat_name = str(kwargs.get("chat_name") or "").strip()
+        self._chat_type = str(kwargs.get("chat_type") or "").strip()
+        self._thread_id = str(kwargs.get("thread_id") or "").strip()
+        self._agent_identity = str(kwargs.get("agent_identity") or "").strip()
+        self._turn_index = 0
+        self._session_turns = []
        self._mode = self._config.get("mode", "cloud")
        # "local" is a legacy alias for "local_embedded"
        if self._mode == "local":
@@ -513,7 +594,7 @@ class HindsightMemoryProvider(MemoryProvider):
        memory_mode = self._config.get("memory_mode", "hybrid")
        self._memory_mode = memory_mode if memory_mode in ("context", "tools", "hybrid") else "hybrid"

-        prefetch_method = self._config.get("recall_prefetch_method", "recall")
+        prefetch_method = self._config.get("recall_prefetch_method") or self._config.get("prefetch_method", "recall")
        self._prefetch_method = prefetch_method if prefetch_method in ("recall", "reflect") else "recall"

        # Bank options
@@ -521,9 +602,22 @@ class HindsightMemoryProvider(MemoryProvider):
        self._bank_retain_mission = self._config.get("bank_retain_mission") or None

        # Tags
-        self._tags = self._config.get("tags") or None
+        self._retain_tags = _normalize_retain_tags(
+            self._config.get("retain_tags")
+            or os.environ.get("HINDSIGHT_RETAIN_TAGS", "")
+        )
+        self._tags = self._retain_tags or None
        self._recall_tags = self._config.get("recall_tags") or None
        self._recall_tags_match = self._config.get("recall_tags_match", "any")
+        self._retain_source = str(
+            self._config.get("retain_source") or os.environ.get("HINDSIGHT_RETAIN_SOURCE", "")
+        ).strip()
+        self._retain_user_prefix = str(
+            self._config.get("retain_user_prefix") or os.environ.get("HINDSIGHT_RETAIN_USER_PREFIX", "User")
+        ).strip() or "User"
+        self._retain_assistant_prefix = str(
+            self._config.get("retain_assistant_prefix") or os.environ.get("HINDSIGHT_RETAIN_ASSISTANT_PREFIX", "Assistant")
+        ).strip() or "Assistant"

        # Retain controls
        self._auto_retain = self._config.get("auto_retain", True)
@@ -547,11 +641,9 @@ class HindsightMemoryProvider(MemoryProvider):
        logger.info("Hindsight initialized: mode=%s, api_url=%s, bank=%s, budget=%s, memory_mode=%s, prefetch_method=%s, client=%s",
                     self._mode, self._api_url, self._bank_id, self._budget, self._memory_mode, self._prefetch_method, _client_version)
        logger.debug("Hindsight config: auto_retain=%s, auto_recall=%s, retain_every_n=%d, "
-                     "retain_async=%s, retain_context=%s, "
-                     "recall_max_tokens=%d, recall_max_input_chars=%d, tags=%s, recall_tags=%s",
+                     "retain_async=%s, retain_context=%s, recall_max_tokens=%d, recall_max_input_chars=%d, tags=%s, recall_tags=%s",
                     self._auto_retain, self._auto_recall, self._retain_every_n_turns,
-                     self._retain_async, self._retain_context,
-                     self._recall_max_tokens, self._recall_max_input_chars,
+                     self._retain_async, self._retain_context, self._recall_max_tokens, self._recall_max_input_chars,
                     self._tags, self._recall_tags)

        # For local mode, start the embedded daemon in the background so it
@@ -712,6 +804,78 @@ class HindsightMemoryProvider(MemoryProvider):
        self._prefetch_thread = threading.Thread(target=_run, daemon=True, name="hindsight-prefetch")
        self._prefetch_thread.start()

+    def _build_turn_messages(self, user_content: str, assistant_content: str) -> List[Dict[str, str]]:
+        now = datetime.now(timezone.utc).isoformat()
+        return [
+            {
+                "role": "user",
+                "content": f"{self._retain_user_prefix}: {user_content}",
+                "timestamp": now,
+            },
+            {
+                "role": "assistant",
+                "content": f"{self._retain_assistant_prefix}: {assistant_content}",
+                "timestamp": now,
+            },
+        ]
+
+    def _build_metadata(self, *, message_count: int, turn_index: int) -> Dict[str, str]:
+        metadata: Dict[str, str] = {
+            "retained_at": _utc_timestamp(),
+            "message_count": str(message_count),
+            "turn_index": str(turn_index),
+        }
+        if self._retain_source:
+            metadata["source"] = self._retain_source
+        if self._session_id:
+            metadata["session_id"] = self._session_id
+        if self._platform:
+            metadata["platform"] = self._platform
+        if self._user_id:
+            metadata["user_id"] = self._user_id
+        if self._user_name:
+            metadata["user_name"] = self._user_name
+        if self._chat_id:
+            metadata["chat_id"] = self._chat_id
+        if self._chat_name:
+            metadata["chat_name"] = self._chat_name
+        if self._chat_type:
+            metadata["chat_type"] = self._chat_type
+        if self._thread_id:
+            metadata["thread_id"] = self._thread_id
+        if self._agent_identity:
+            metadata["agent_identity"] = self._agent_identity
+        return metadata
+
+    def _build_retain_kwargs(
+        self,
+        content: str,
+        *,
+        context: str | None = None,
+        document_id: str | None = None,
+        metadata: Dict[str, str] | None = None,
+        tags: List[str] | None = None,
+        retain_async: bool | None = None,
+    ) -> Dict[str, Any]:
+        kwargs: Dict[str, Any] = {
+            "bank_id": self._bank_id,
+            "content": content,
+            "metadata": metadata or self._build_metadata(message_count=1, turn_index=self._turn_index),
+        }
+        if context is not None:
+            kwargs["context"] = context
+        if document_id:
+            kwargs["document_id"] = document_id
+        if retain_async is not None:
+            kwargs["retain_async"] = retain_async
+        merged_tags = _normalize_retain_tags(self._retain_tags)
+        for tag in _normalize_retain_tags(tags):
+            if tag not in merged_tags:
+                merged_tags.append(tag)
+        if merged_tags:
+            kwargs["tags"] = merged_tags
+        return kwargs
+
    def sync_turn(self, user_content: str, assistant_content: str, *, session_id: str = "") -> None:
        """Retain conversation turn in background (non-blocking).

@@ -721,19 +885,14 @@ class HindsightMemoryProvider(MemoryProvider):
            logger.debug("sync_turn: skipped (auto_retain disabled)")
            return

-        from datetime import datetime, timezone
-        now = datetime.now(timezone.utc).isoformat()
+        if session_id:
+            self._session_id = str(session_id).strip()

-        messages = [
-            {"role": "user", "content": user_content, "timestamp": now},
-            {"role": "assistant", "content": assistant_content, "timestamp": now},
-        ]
-
-        turn = json.dumps(messages)
+        turn = json.dumps(self._build_turn_messages(user_content, assistant_content))
        self._session_turns.append(turn)
        self._turn_counter += 1
+        self._turn_index = self._turn_counter

-        # Only retain every N turns
        if self._turn_counter % self._retain_every_n_turns != 0:
            logger.debug("sync_turn: buffered turn %d (will retain at turn %d)",
                         self._turn_counter, self._turn_counter + (self._retain_every_n_turns - self._turn_counter % self._retain_every_n_turns))
@@ -741,19 +900,21 @@ class HindsightMemoryProvider(MemoryProvider):

        logger.debug("sync_turn: retaining %d turns, total session content %d chars",
                     len(self._session_turns), sum(len(t) for t in self._session_turns))
-        # Send the ENTIRE session as a single JSON array (document_id deduplicates).
-        # Each element in _session_turns is a JSON string of that turn's messages.
        content = "[" + ",".join(self._session_turns) + "]"

        def _sync():
            try:
                client = self._get_client()
-                item: dict = {
-                    "content": content,
-                    "context": self._retain_context,
-                }
-                if self._tags:
-                    item["tags"] = self._tags
+                item = self._build_retain_kwargs(
+                    content,
+                    context=self._retain_context,
+                    metadata=self._build_metadata(
+                        message_count=len(self._session_turns) * 2,
+                        turn_index=self._turn_index,
+                    ),
+                )
+                item.pop("bank_id", None)
+                item.pop("retain_async", None)
                logger.debug("Hindsight retain: bank=%s, doc=%s, async=%s, content_len=%d, num_turns=%d",
                             self._bank_id, self._session_id, self._retain_async, len(content), len(self._session_turns))
                _run_sync(client.aretain_batch(
@@ -789,11 +950,11 @@ class HindsightMemoryProvider(MemoryProvider):
                return tool_error("Missing required parameter: content")
            context = args.get("context")
            try:
-                retain_kwargs: dict = {
-                    "bank_id": self._bank_id, "content": content, "context": context,
-                }
-                if self._tags:
-                    retain_kwargs["tags"] = self._tags
+                retain_kwargs = self._build_retain_kwargs(
+                    content,
+                    context=context,
+                    tags=args.get("tags"),
+                )
                logger.debug("Tool hindsight_retain: bank=%s, content_len=%d, context=%s",
                             self._bank_id, len(content), context)
                _run_sync(client.aretain(**retain_kwargs))
@@ -126,7 +126,7 @@ py-modules = ["run_agent", "model_tools", "toolsets", "batch_runner", "trajector
 hermes_cli = ["web_dist/**/*"]

 [tool.setuptools.packages.find]
-include = ["agent", "tools", "tools.*", "hermes_cli", "gateway", "gateway.*", "tui_gateway", "tui_gateway.*", "cron", "acp_adapter", "plugins", "plugins.*"]
+include = ["agent", "agent.*", "tools", "tools.*", "hermes_cli", "gateway", "gateway.*", "tui_gateway", "tui_gateway.*", "cron", "acp_adapter", "plugins", "plugins.*"]

 [tool.pytest.ini_options]
 testpaths = ["tests"]
@@ -1,36 +0,0 @@
-# NOTE: This file is maintained for convenience only.
-# The canonical dependency list is in pyproject.toml.
-# Preferred install: pip install -e ".[all]"
-
-# Core dependencies
-openai
-python-dotenv
-fire
-httpx
-rich
-tenacity
-prompt_toolkit
-pyyaml
-requests
-jinja2
-pydantic>=2.0
-PyJWT[crypto]
-debugpy
-
-# Web tools
-firecrawl-py
-parallel-web>=0.4.2
-
-# Image generation
-fal-client
-
-# Text-to-speech (Edge TTS is free, no API key needed)
-edge-tts
-
-# Optional: For cron expression parsing (cronjob scheduling)
-croniter
-
-# Optional: For messaging platform integrations (gateway)
-python-telegram-bot[webhooks]>=22.6
-discord.py>=2.0
-aiohttp>=3.9.0
--- a/Show More
+++ b/Show More