refactor: extract codex_responses logic into dedicated adapter

Move 10 Responses API format-conversion and normalization functions from run_agent.py into agent/codex_responses_adapter.py. All functions are now stateless module-level functions with zero self references. The AIAgent methods remain as thin one-line wrappers that delegate to the adapter, so all callers (tests, gateway, CLI) are unchanged. Functions extracted: - _deterministic_call_id: deterministic tool call ID generation - _split_responses_tool_id: composite ID splitting - _derive_responses_function_call_id: call_ to fc_ prefix conversion - _responses_tools: chat completions tool schema → Responses format - _chat_messages_to_responses_input: message format conversion - _preflight_codex_input_items: input item normalization - _preflight_codex_api_kwargs: API kwargs validation/cleaning - _extract_responses_message_text: text extraction from response items - _extract_responses_reasoning_text: reasoning extraction - _normalize_codex_response: full response normalization This brings codex_responses in line with anthropic_adapter.py and bedrock_adapter.py which already have their own adapter files. run_agent.py: 12410 → 11845 lines (-565 net)
2026-04-20 16:32:43 +05:30
529 changed files with 9054 additions and 53339 deletions
@@ -14,6 +14,3 @@ node_modules
 .env

 *.md
-
-# Runtime data (bind-mounted at /opt/data; must not leak into build context)
-data/
@@ -1,8 +0,0 @@
-name: 'Setup Nix'
-description: 'Install Nix with DeterminateSystems and enable magic-nix-cache'
-
-runs:
-  using: composite
-  steps:
-    - uses: DeterminateSystems/nix-installer-action@ef8a148080ab6020fd15196c2084a2eea5ff2d25 # v22
-    - uses: DeterminateSystems/magic-nix-cache-action@565684385bcd71bad329742eefe8d12f2e765b39 # v13
@@ -1,68 +0,0 @@
-name: Nix Lockfile Check
-
-on:
-  pull_request:
-  workflow_dispatch:
-
-permissions:
-  contents: read
-  pull-requests: write
-
-concurrency:
-  group: nix-lockfile-check-${{ github.ref }}
-  cancel-in-progress: true
-
-jobs:
-  check:
-    runs-on: ubuntu-latest
-    timeout-minutes: 20
-    steps:
-      - uses: actions/checkout@34e114876b0b11c390a56381ad16ebd13914f8d5  # v4
-
-      - uses: ./.github/actions/nix-setup
-
-      - name: Resolve head SHA
-        id: sha
-        shell: bash
-        run: |
-          FULL="${{ github.event.pull_request.head.sha || github.sha }}"
-          echo "full=$FULL" >> "$GITHUB_OUTPUT"
-          echo "short=${FULL:0:7}" >> "$GITHUB_OUTPUT"
-
-      - name: Check lockfile hashes
-        id: check
-        continue-on-error: true
-        env:
-          LINK_SHA: ${{ steps.sha.outputs.full }}
-        run: nix run .#fix-lockfiles -- --check
-
-      - name: Post sticky PR comment (stale)
-        if: steps.check.outputs.stale == 'true' && github.event_name == 'pull_request'
-        uses: marocchino/sticky-pull-request-comment@52423e01640425a022ef5fd42c6fb5f633a02728  # v2.9.1
-        with:
-          header: nix-lockfile-check
-          message: |
-            ### ⚠️ npm lockfile hash out of date
-
-            Checked against commit [`${{ steps.sha.outputs.short }}`](${{ github.server_url }}/${{ github.repository }}/commit/${{ steps.sha.outputs.full }}) (PR head at check time).
-
-            The `hash = "sha256-..."` line in these nix files no longer matches the committed `package-lock.json`:
-
-            ${{ steps.check.outputs.report }}
-
-            #### Apply the fix
-
-            - [ ] **Apply lockfile fix** — tick to push a commit with the correct hashes to this PR branch
-            - Or [run the Nix Lockfile Fix workflow](${{ github.server_url }}/${{ github.repository }}/actions/workflows/nix-lockfile-fix.yml) manually (pass PR `#${{ github.event.pull_request.number }}`)
-            - Or locally: `nix run .#fix-lockfiles -- --apply` and commit the diff
-
-      - name: Clear sticky PR comment (resolved)
-        if: steps.check.outputs.stale == 'false' && github.event_name == 'pull_request'
-        uses: marocchino/sticky-pull-request-comment@52423e01640425a022ef5fd42c6fb5f633a02728  # v2.9.1
-        with:
-          header: nix-lockfile-check
-          delete: true
-
-      - name: Fail if stale
-        if: steps.check.outputs.stale == 'true'
-        run: exit 1
@@ -1,149 +0,0 @@
-name: Nix Lockfile Fix
-
-on:
-  workflow_dispatch:
-    inputs:
-      pr_number:
-        description: 'PR number to fix (leave empty to run on the selected branch)'
-        required: false
-        type: string
-  issue_comment:
-    types: [edited]
-
-permissions:
-  contents: write
-  pull-requests: write
-
-concurrency:
-  group: nix-lockfile-fix-${{ github.event.issue.number || github.event.inputs.pr_number || github.ref }}
-  cancel-in-progress: false
-
-jobs:
-  fix:
-    # Run on manual dispatch OR when a task-list checkbox in the sticky
-    # lockfile-check comment flips from `[ ]` to `[x]`.
-    if: |
-      github.event_name == 'workflow_dispatch' ||
-      (github.event_name == 'issue_comment'
-       && github.event.issue.pull_request != null
-       && contains(github.event.comment.body, '[x] **Apply lockfile fix**')
-       && !contains(github.event.changes.body.from, '[x] **Apply lockfile fix**'))
-    runs-on: ubuntu-latest
-    timeout-minutes: 25
-    steps:
-      - name: Authorize & resolve PR
-        id: resolve
-        uses: actions/github-script@60a0d83039c74a4aee543508d2ffcb1c3799cdea  # v7.0.1
-        with:
-          script: |
-            // 1. Verify the actor has write access — applies to both checkbox
-            //    clicks and manual dispatch.
-            const { data: perm } =
-              await github.rest.repos.getCollaboratorPermissionLevel({
-                owner: context.repo.owner,
-                repo: context.repo.repo,
-                username: context.actor,
-              });
-            if (!['admin', 'write', 'maintain'].includes(perm.permission)) {
-              core.setFailed(
-                `${context.actor} lacks write access (has: ${perm.permission})`
-              );
-              return;
-            }
-
-            // 2. Resolve which ref to check out.
-            let prNumber = '';
-            if (context.eventName === 'issue_comment') {
-              prNumber = String(context.payload.issue.number);
-            } else if (context.eventName === 'workflow_dispatch') {
-              prNumber = context.payload.inputs.pr_number || '';
-            }
-
-            if (!prNumber) {
-              core.setOutput('ref', context.ref.replace(/^refs\/heads\//, ''));
-              core.setOutput('repo', context.repo.repo);
-              core.setOutput('owner', context.repo.owner);
-              core.setOutput('pr', '');
-              return;
-            }
-
-            const { data: pr } = await github.rest.pulls.get({
-              owner: context.repo.owner,
-              repo: context.repo.repo,
-              pull_number: Number(prNumber),
-            });
-            core.setOutput('ref', pr.head.ref);
-            core.setOutput('repo', pr.head.repo.name);
-            core.setOutput('owner', pr.head.repo.owner.login);
-            core.setOutput('pr', String(pr.number));
-
-      # Wipe the sticky lockfile-check comment to a "running" state as soon
-      # as the job is authorized, so the user sees their click was picked up
-      # before the ~minute of nix build work.
-      - name: Mark sticky as running
-        if: steps.resolve.outputs.pr != ''
-        uses: marocchino/sticky-pull-request-comment@52423e01640425a022ef5fd42c6fb5f633a02728  # v2.9.1
-        with:
-          header: nix-lockfile-check
-          number: ${{ steps.resolve.outputs.pr }}
-          message: |
-            ### 🔄 Applying lockfile fix…
-
-            Triggered by @${{ github.actor }} — [workflow run](${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }}).
-
-      - uses: actions/checkout@34e114876b0b11c390a56381ad16ebd13914f8d5  # v4
-        with:
-          repository: ${{ steps.resolve.outputs.owner }}/${{ steps.resolve.outputs.repo }}
-          ref: ${{ steps.resolve.outputs.ref }}
-          token: ${{ secrets.GITHUB_TOKEN }}
-          fetch-depth: 0
-
-      - uses: ./.github/actions/nix-setup
-
-      - name: Apply lockfile hashes
-        id: apply
-        run: nix run .#fix-lockfiles -- --apply
-
-      - name: Commit & push
-        if: steps.apply.outputs.changed == 'true'
-        shell: bash
-        run: |
-          set -euo pipefail
-          git config user.name 'github-actions[bot]'
-          git config user.email '41898282+github-actions[bot]@users.noreply.github.com'
-          git add nix/tui.nix nix/web.nix
-          git commit -m "fix(nix): refresh npm lockfile hashes"
-          git push
-
-      - name: Update sticky (applied)
-        if: steps.apply.outputs.changed == 'true' && steps.resolve.outputs.pr != ''
-        uses: marocchino/sticky-pull-request-comment@52423e01640425a022ef5fd42c6fb5f633a02728  # v2.9.1
-        with:
-          header: nix-lockfile-check
-          number: ${{ steps.resolve.outputs.pr }}
-          message: |
-            ### ✅ Lockfile fix applied
-
-            Pushed a commit refreshing the npm lockfile hashes — [workflow run](${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }}).
-
-      - name: Update sticky (already current)
-        if: steps.apply.outputs.changed == 'false' && steps.resolve.outputs.pr != ''
-        uses: marocchino/sticky-pull-request-comment@52423e01640425a022ef5fd42c6fb5f633a02728  # v2.9.1
-        with:
-          header: nix-lockfile-check
-          number: ${{ steps.resolve.outputs.pr }}
-          message: |
-            ### ✅ Lockfile hashes already current
-
-            Nothing to commit — [workflow run](${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }}).
-
-      - name: Update sticky (failed)
-        if: failure() && steps.resolve.outputs.pr != ''
-        uses: marocchino/sticky-pull-request-comment@52423e01640425a022ef5fd42c6fb5f633a02728  # v2.9.1
-        with:
-          header: nix-lockfile-check
-          number: ${{ steps.resolve.outputs.pr }}
-          message: |
-            ### ❌ Lockfile fix failed
-
-            See the [workflow run](${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }}) for logs.
@@ -4,6 +4,15 @@ on:
  push:
    branches: [main]
  pull_request:
+    paths:
+      - 'flake.nix'
+      - 'flake.lock'
+      - 'nix/**'
+      - 'pyproject.toml'
+      - 'uv.lock'
+      - 'hermes_cli/**'
+      - 'run_agent.py'
+      - 'acp_adapter/**'

 permissions:
  contents: read
@@ -20,8 +29,9 @@ jobs:
    runs-on: ${{ matrix.os }}
    timeout-minutes: 30
    steps:
-      - uses: actions/checkout@34e114876b0b11c390a56381ad16ebd13914f8d5 # v4
-      - uses: ./.github/actions/nix-setup
+      - uses: actions/checkout@34e114876b0b11c390a56381ad16ebd13914f8d5  # v4
+      - uses: DeterminateSystems/nix-installer-action@ef8a148080ab6020fd15196c2084a2eea5ff2d25  # v22
+      - uses: DeterminateSystems/magic-nix-cache-action@565684385bcd71bad329742eefe8d12f2e765b39  # v13
      - name: Check flake
        if: runner.os == 'Linux'
        run: nix flake check --print-build-logs
@@ -1,4 +1,3 @@
-.DS_Store
 /venv/
 /_pycache/
 *.pyc*
@@ -566,52 +566,3 @@ python -m pytest tests/ -q -n 4
 Worker count above 4 will surface test-ordering flakes that CI never sees.

 Always run the full suite before pushing changes.
-
-### Don't write change-detector tests
-
-A test is a **change-detector** if it fails whenever data that is **expected
-to change** gets updated — model catalogs, config version numbers,
-enumeration counts, hardcoded lists of provider models. These tests add no
-behavioral coverage; they just guarantee that routine source updates break
-CI and cost engineering time to "fix."
-
-**Do not write:**
-
-```python
-# catalog snapshot — breaks every model release
-assert "gemini-2.5-pro" in _PROVIDER_MODELS["gemini"]
-assert "MiniMax-M2.7" in models
-
-# config version literal — breaks every schema bump
-assert DEFAULT_CONFIG["_config_version"] == 21
-
-# enumeration count — breaks every time a skill/provider is added
-assert len(_PROVIDER_MODELS["huggingface"]) == 8
-```
-
-**Do write:**
-
-```python
-# behavior: does the catalog plumbing work at all?
-assert "gemini" in _PROVIDER_MODELS
-assert len(_PROVIDER_MODELS["gemini"]) >= 1
-
-# behavior: does migration bump the user's version to current latest?
-assert raw["_config_version"] == DEFAULT_CONFIG["_config_version"]
-
-# invariant: no plan-only model leaks into the legacy list
-assert not (set(moonshot_models) & coding_plan_only_models)
-
-# invariant: every model in the catalog has a context-length entry
-for m in _PROVIDER_MODELS["huggingface"]:
-    assert m.lower() in DEFAULT_CONTEXT_LENGTHS_LOWER
-```
-
-The rule: if the test reads like a snapshot of current data, delete it. If
-it reads like a contract about how two pieces of data must relate, keep it.
-When a PR adds a new provider/model and you want a test, make the test
-assert the relationship (e.g. "catalog entries all have context lengths"),
-not the specific names.
-
-Reviewers should reject new change-detector tests; authors should convert
-them into invariants before re-requesting review.
@@ -55,10 +55,10 @@ If your skill is specialized, community-contributed, or niche, it's better suite

 | Requirement | Notes |
 |-------------|-------|
-| **Git** | With `--recurse-submodules` support, and the `git-lfs` extension installed |
+| **Git** | With `--recurse-submodules` support |
 | **Python 3.11+** | uv will install it if missing |
 | **uv** | Fast Python package manager ([install](https://docs.astral.sh/uv/)) |
-| **Node.js 20+** | Optional — needed for browser tools and WhatsApp bridge (matches root `package.json` engines) |
+| **Node.js 18+** | Optional — needed for browser tools and WhatsApp bridge |

 ### Clone and install

@@ -88,7 +88,7 @@ cp cli-config.yaml.example ~/.hermes/config.yaml
 touch ~/.hermes/.env

 # Add at minimum an LLM provider key:
-echo "OPENROUTER_API_KEY=***" >> ~/.hermes/.env
+echo 'OPENROUTER_API_KEY=sk-or-v1-your-key' >> ~/.hermes/.env
 ```

 ### Run
@@ -12,7 +12,7 @@ ENV PLAYWRIGHT_BROWSERS_PATH=/opt/hermes/.playwright
 # Install system dependencies in one layer, clear APT cache
 RUN apt-get update && \
    apt-get install -y --no-install-recommends \
-        build-essential nodejs npm python3 ripgrep ffmpeg gcc python3-dev libffi-dev procps git openssh-client docker-cli && \
+        build-essential nodejs npm python3 ripgrep ffmpeg gcc python3-dev libffi-dev procps git && \
    rm -rf /var/lib/apt/lists/*

 # Non-root user for runtime; UID can be overridden via HERMES_UID at runtime
@@ -27,10 +27,12 @@ WORKDIR /opt/hermes
 # Copy only package manifests first so npm install + Playwright are cached
 # unless the lockfiles themselves change.
 COPY package.json package-lock.json ./
+COPY scripts/whatsapp-bridge/package.json scripts/whatsapp-bridge/package-lock.json scripts/whatsapp-bridge/
 COPY web/package.json web/package-lock.json web/

 RUN npm install --prefer-offline --no-audit && \
    npx playwright install --with-deps chromium --only-shell && \
+    (cd scripts/whatsapp-bridge && npm install --prefer-offline --no-audit) && \
    (cd web && npm install --prefer-offline --no-audit) && \
    npm cache clean --force

@@ -50,6 +52,5 @@ RUN uv venv && \
 # ---------- Runtime ----------
 ENV HERMES_WEB_DIST=/opt/hermes/hermes_cli/web_dist
 ENV HERMES_HOME=/opt/data
-ENV PATH="/opt/data/.local/bin:${PATH}"
 VOLUME [ "/opt/data" ]
 ENTRYPOINT [ "/opt/hermes/docker/entrypoint.sh" ]
@@ -173,6 +173,7 @@ python -m pytest tests/ -q
 - 💬 [Discord](https://discord.gg/NousResearch)
 - 📚 [Skills Hub](https://agentskills.io)
 - 🐛 [Issues](https://github.com/NousResearch/hermes-agent/issues)
+- 💡 [Discussions](https://github.com/NousResearch/hermes-agent/discussions)
 - 🔌 [HermesClaw](https://github.com/AaronWong1999/hermesclaw) — Community WeChat bridge: Run Hermes Agent and OpenClaw on the same WeChat account.

 ---
@@ -63,9 +63,6 @@ def make_approval_callback(
            logger.warning("Permission request timed out or failed: %s", exc)
            return "deny"

-        if response is None:
-            return "deny"
-
        outcome = response.outcome
        if isinstance(outcome, AllowedOutcome):
            option_id = outcome.option_id
@@ -4,7 +4,6 @@ from __future__ import annotations

 import asyncio
 import logging
-import os
 from collections import defaultdict, deque
 from concurrent.futures import ThreadPoolExecutor
 from typing import Any, Deque, Optional
@@ -52,7 +51,7 @@ try:
 except ImportError:
    from acp.schema import AuthMethod as AuthMethodAgent  # type: ignore[attr-defined]

-from acp_adapter.auth import detect_provider
+from acp_adapter.auth import detect_provider, has_provider
 from acp_adapter.events import (
    make_message_cb,
    make_step_cb,
@@ -72,11 +71,6 @@ except Exception:
 # Thread pool for running AIAgent (synchronous) in parallel.
 _executor = ThreadPoolExecutor(max_workers=4, thread_name_prefix="acp-agent")

-# Server-side page size for list_sessions. The ACP ListSessionsRequest schema
-# does not expose a client-side limit, so this is a fixed cap that clients
-# paginate against using `cursor` / `next_cursor`.
-_LIST_SESSIONS_PAGE_SIZE = 50
-

 def _extract_text(
    prompt: list[
@@ -357,18 +351,9 @@ class HermesACPAgent(acp.Agent):
        )

    async def authenticate(self, method_id: str, **kwargs: Any) -> AuthenticateResponse | None:
-        # Only accept authenticate() calls whose method_id matches the
-        # provider we advertised in initialize(). Without this check,
-        # authenticate() would acknowledge any method_id as long as the
-        # server has provider credentials configured — harmless under
-        # Hermes' threat model (ACP is stdio-only, local-trust), but poor
-        # API hygiene and confusing if ACP ever grows multi-method auth.
-        provider = detect_provider()
-        if not provider:
-            return None
-        if not isinstance(method_id, str) or method_id.strip().lower() != provider:
-            return None
-        return AuthenticateResponse()
+        if has_provider():
+            return AuthenticateResponse()
+        return None

    # ---- Session management -------------------------------------------------

@@ -452,28 +437,7 @@ class HermesACPAgent(acp.Agent):
        cwd: str | None = None,
        **kwargs: Any,
    ) -> ListSessionsResponse:
-        """List ACP sessions with optional ``cwd`` filtering and cursor pagination.
-
-        ``cwd`` is passed through to ``SessionManager.list_sessions`` which already
-        normalizes and filters by working directory. ``cursor`` is a ``session_id``
-        previously returned as ``next_cursor``; results resume after that entry.
-        Server-side page size is capped at ``_LIST_SESSIONS_PAGE_SIZE``; when more
-        results remain, ``next_cursor`` is set to the last returned ``session_id``.
-        """
        infos = self.session_manager.list_sessions(cwd=cwd)
-
-        if cursor:
-            for idx, s in enumerate(infos):
-                if s["session_id"] == cursor:
-                    infos = infos[idx + 1:]
-                    break
-            else:
-                # Unknown cursor -> empty page (do not fall back to full list).
-                infos = []
-
-        has_more = len(infos) > _LIST_SESSIONS_PAGE_SIZE
-        infos = infos[:_LIST_SESSIONS_PAGE_SIZE]
-
        sessions = []
        for s in infos:
            updated_at = s.get("updated_at")
@@ -487,9 +451,7 @@ class HermesACPAgent(acp.Agent):
                    updated_at=updated_at,
                )
            )
-
-        next_cursor = sessions[-1].session_id if has_more and sessions else None
-        return ListSessionsResponse(sessions=sessions, next_cursor=next_cursor)
+        return ListSessionsResponse(sessions=sessions)

    # ---- Prompt (core) ------------------------------------------------------

@@ -555,32 +517,15 @@ class HermesACPAgent(acp.Agent):
        agent.step_callback = step_cb
        agent.message_callback = message_cb

-        # Approval callback is per-thread (thread-local, GHSA-qg5c-hvr5-hjgr).
-        # Set it INSIDE _run_agent so the TLS write happens in the executor
-        # thread — setting it here would write to the event-loop thread's TLS,
-        # not the executor's. Also set HERMES_INTERACTIVE so approval.py
-        # takes the CLI-interactive path (which calls the registered
-        # callback via prompt_dangerous_approval) instead of the
-        # non-interactive auto-approve branch (GHSA-96vc-wcxf-jjff).
-        # ACP's conn.request_permission maps cleanly to the interactive
-        # callback shape — not the gateway-queue HERMES_EXEC_ASK path,
-        # which requires a notify_cb registered in _gateway_notify_cbs.
-        previous_approval_cb = None
-        previous_interactive = None
+        if approval_cb:
+            try:
+                from tools import terminal_tool as _terminal_tool
+                previous_approval_cb = getattr(_terminal_tool, "_approval_callback", None)
+                _terminal_tool.set_approval_callback(approval_cb)
+            except Exception:
+                logger.debug("Could not set ACP approval callback", exc_info=True)

        def _run_agent() -> dict:
-            nonlocal previous_approval_cb, previous_interactive
-            if approval_cb:
-                try:
-                    from tools import terminal_tool as _terminal_tool
-                    previous_approval_cb = _terminal_tool._get_approval_callback()
-                    _terminal_tool.set_approval_callback(approval_cb)
-                except Exception:
-                    logger.debug("Could not set ACP approval callback", exc_info=True)
-            # Signal to tools.approval that we have an interactive callback
-            # and the non-interactive auto-approve path must not fire.
-            previous_interactive = os.environ.get("HERMES_INTERACTIVE")
-            os.environ["HERMES_INTERACTIVE"] = "1"
            try:
                result = agent.run_conversation(
                    user_message=user_text,
@@ -592,11 +537,6 @@ class HermesACPAgent(acp.Agent):
                logger.exception("Agent error in session %s", session_id)
                return {"final_response": f"Error: {e}", "messages": state.history}
            finally:
-                # Restore HERMES_INTERACTIVE.
-                if previous_interactive is None:
-                    os.environ.pop("HERMES_INTERACTIVE", None)
-                else:
-                    os.environ["HERMES_INTERACTIVE"] = previous_interactive
                if approval_cb:
                    try:
                        from tools import terminal_tool as _terminal_tool
@@ -673,8 +613,8 @@ class HermesACPAgent(acp.Agent):
            await self._conn.session_update(
                session_id=session_id,
                update=AvailableCommandsUpdate(
-                    session_update="available_commands_update",
-                    available_commands=self._available_commands(),
+                    sessionUpdate="available_commands_update",
+                    availableCommands=self._available_commands(),
                ),
            )
        except Exception:
@@ -1,326 +0,0 @@
-from __future__ import annotations
-
-from dataclasses import dataclass
-from datetime import datetime, timezone
-from typing import Any, Optional
-
-import httpx
-
-from agent.anthropic_adapter import _is_oauth_token, resolve_anthropic_token
-from hermes_cli.auth import _read_codex_tokens, resolve_codex_runtime_credentials
-from hermes_cli.runtime_provider import resolve_runtime_provider
-
-
-def _utc_now() -> datetime:
-    return datetime.now(timezone.utc)
-
-
-@dataclass(frozen=True)
-class AccountUsageWindow:
-    label: str
-    used_percent: Optional[float] = None
-    reset_at: Optional[datetime] = None
-    detail: Optional[str] = None
-
-
-@dataclass(frozen=True)
-class AccountUsageSnapshot:
-    provider: str
-    source: str
-    fetched_at: datetime
-    title: str = "Account limits"
-    plan: Optional[str] = None
-    windows: tuple[AccountUsageWindow, ...] = ()
-    details: tuple[str, ...] = ()
-    unavailable_reason: Optional[str] = None
-
-    @property
-    def available(self) -> bool:
-        return bool(self.windows or self.details) and not self.unavailable_reason
-
-
-def _title_case_slug(value: Optional[str]) -> Optional[str]:
-    cleaned = str(value or "").strip()
-    if not cleaned:
-        return None
-    return cleaned.replace("_", " ").replace("-", " ").title()
-
-
-def _parse_dt(value: Any) -> Optional[datetime]:
-    if value in (None, ""):
-        return None
-    if isinstance(value, (int, float)):
-        return datetime.fromtimestamp(float(value), tz=timezone.utc)
-    if isinstance(value, str):
-        text = value.strip()
-        if not text:
-            return None
-        if text.endswith("Z"):
-            text = text[:-1] + "+00:00"
-        try:
-            dt = datetime.fromisoformat(text)
-            return dt if dt.tzinfo else dt.replace(tzinfo=timezone.utc)
-        except ValueError:
-            return None
-    return None
-
-
-def _format_reset(dt: Optional[datetime]) -> str:
-    if not dt:
-        return "unknown"
-    local_dt = dt.astimezone()
-    delta = dt - _utc_now()
-    total_seconds = int(delta.total_seconds())
-    if total_seconds <= 0:
-        return f"now ({local_dt.strftime('%Y-%m-%d %H:%M %Z')})"
-    hours, rem = divmod(total_seconds, 3600)
-    minutes = rem // 60
-    if hours >= 24:
-        days, hours = divmod(hours, 24)
-        rel = f"in {days}d {hours}h"
-    elif hours > 0:
-        rel = f"in {hours}h {minutes}m"
-    else:
-        rel = f"in {minutes}m"
-    return f"{rel} ({local_dt.strftime('%Y-%m-%d %H:%M %Z')})"
-
-
-def render_account_usage_lines(snapshot: Optional[AccountUsageSnapshot], *, markdown: bool = False) -> list[str]:
-    if not snapshot:
-        return []
-    header = f"📈 {'**' if markdown else ''}{snapshot.title}{'**' if markdown else ''}"
-    lines = [header]
-    if snapshot.plan:
-        lines.append(f"Provider: {snapshot.provider} ({snapshot.plan})")
-    else:
-        lines.append(f"Provider: {snapshot.provider}")
-    for window in snapshot.windows:
-        if window.used_percent is None:
-            base = f"{window.label}: unavailable"
-        else:
-            remaining = max(0, round(100 - float(window.used_percent)))
-            used = max(0, round(float(window.used_percent)))
-            base = f"{window.label}: {remaining}% remaining ({used}% used)"
-        if window.reset_at:
-            base += f" • resets {_format_reset(window.reset_at)}"
-        elif window.detail:
-            base += f" • {window.detail}"
-        lines.append(base)
-    for detail in snapshot.details:
-        lines.append(detail)
-    if snapshot.unavailable_reason:
-        lines.append(f"Unavailable: {snapshot.unavailable_reason}")
-    return lines
-
-
-def _resolve_codex_usage_url(base_url: str) -> str:
-    normalized = (base_url or "").strip().rstrip("/")
-    if not normalized:
-        normalized = "https://chatgpt.com/backend-api/codex"
-    if normalized.endswith("/codex"):
-        normalized = normalized[: -len("/codex")]
-    if "/backend-api" in normalized:
-        return normalized + "/wham/usage"
-    return normalized + "/api/codex/usage"
-
-
-def _fetch_codex_account_usage() -> Optional[AccountUsageSnapshot]:
-    creds = resolve_codex_runtime_credentials(refresh_if_expiring=True)
-    token_data = _read_codex_tokens()
-    tokens = token_data.get("tokens") or {}
-    account_id = str(tokens.get("account_id", "") or "").strip() or None
-    headers = {
-        "Authorization": f"Bearer {creds['api_key']}",
-        "Accept": "application/json",
-        "User-Agent": "codex-cli",
-    }
-    if account_id:
-        headers["ChatGPT-Account-Id"] = account_id
-    with httpx.Client(timeout=15.0) as client:
-        response = client.get(_resolve_codex_usage_url(creds.get("base_url", "")), headers=headers)
-        response.raise_for_status()
-    payload = response.json() or {}
-    rate_limit = payload.get("rate_limit") or {}
-    windows: list[AccountUsageWindow] = []
-    for key, label in (("primary_window", "Session"), ("secondary_window", "Weekly")):
-        window = rate_limit.get(key) or {}
-        used = window.get("used_percent")
-        if used is None:
-            continue
-        windows.append(
-            AccountUsageWindow(
-                label=label,
-                used_percent=float(used),
-                reset_at=_parse_dt(window.get("reset_at")),
-            )
-        )
-    details: list[str] = []
-    credits = payload.get("credits") or {}
-    if credits.get("has_credits"):
-        balance = credits.get("balance")
-        if isinstance(balance, (int, float)):
-            details.append(f"Credits balance: ${float(balance):.2f}")
-        elif credits.get("unlimited"):
-            details.append("Credits balance: unlimited")
-    return AccountUsageSnapshot(
-        provider="openai-codex",
-        source="usage_api",
-        fetched_at=_utc_now(),
-        plan=_title_case_slug(payload.get("plan_type")),
-        windows=tuple(windows),
-        details=tuple(details),
-    )
-
-
-def _fetch_anthropic_account_usage() -> Optional[AccountUsageSnapshot]:
-    token = (resolve_anthropic_token() or "").strip()
-    if not token:
-        return None
-    if not _is_oauth_token(token):
-        return AccountUsageSnapshot(
-            provider="anthropic",
-            source="oauth_usage_api",
-            fetched_at=_utc_now(),
-            unavailable_reason="Anthropic account limits are only available for OAuth-backed Claude accounts.",
-        )
-    headers = {
-        "Authorization": f"Bearer {token}",
-        "Accept": "application/json",
-        "Content-Type": "application/json",
-        "anthropic-beta": "oauth-2025-04-20",
-        "User-Agent": "claude-code/2.1.0",
-    }
-    with httpx.Client(timeout=15.0) as client:
-        response = client.get("https://api.anthropic.com/api/oauth/usage", headers=headers)
-        response.raise_for_status()
-    payload = response.json() or {}
-    windows: list[AccountUsageWindow] = []
-    mapping = (
-        ("five_hour", "Current session"),
-        ("seven_day", "Current week"),
-        ("seven_day_opus", "Opus week"),
-        ("seven_day_sonnet", "Sonnet week"),
-    )
-    for key, label in mapping:
-        window = payload.get(key) or {}
-        util = window.get("utilization")
-        if util is None:
-            continue
-        used = float(util) * 100 if float(util) <= 1 else float(util)
-        windows.append(
-            AccountUsageWindow(
-                label=label,
-                used_percent=used,
-                reset_at=_parse_dt(window.get("resets_at")),
-            )
-        )
-    details: list[str] = []
-    extra = payload.get("extra_usage") or {}
-    if extra.get("is_enabled"):
-        used_credits = extra.get("used_credits")
-        monthly_limit = extra.get("monthly_limit")
-        currency = extra.get("currency") or "USD"
-        if isinstance(used_credits, (int, float)) and isinstance(monthly_limit, (int, float)):
-            details.append(
-                f"Extra usage: {used_credits:.2f} / {monthly_limit:.2f} {currency}"
-            )
-    return AccountUsageSnapshot(
-        provider="anthropic",
-        source="oauth_usage_api",
-        fetched_at=_utc_now(),
-        windows=tuple(windows),
-        details=tuple(details),
-    )
-
-
-def _fetch_openrouter_account_usage(base_url: Optional[str], api_key: Optional[str]) -> Optional[AccountUsageSnapshot]:
-    runtime = resolve_runtime_provider(
-        requested="openrouter",
-        explicit_base_url=base_url,
-        explicit_api_key=api_key,
-    )
-    token = str(runtime.get("api_key", "") or "").strip()
-    if not token:
-        return None
-    normalized = str(runtime.get("base_url", "") or "").rstrip("/")
-    credits_url = f"{normalized}/credits"
-    key_url = f"{normalized}/key"
-    headers = {
-        "Authorization": f"Bearer {token}",
-        "Accept": "application/json",
-    }
-    with httpx.Client(timeout=10.0) as client:
-        credits_resp = client.get(credits_url, headers=headers)
-        credits_resp.raise_for_status()
-        credits = (credits_resp.json() or {}).get("data") or {}
-        try:
-            key_resp = client.get(key_url, headers=headers)
-            key_resp.raise_for_status()
-            key_data = (key_resp.json() or {}).get("data") or {}
-        except Exception:
-            key_data = {}
-    total_credits = float(credits.get("total_credits") or 0.0)
-    total_usage = float(credits.get("total_usage") or 0.0)
-    details = [f"Credits balance: ${max(0.0, total_credits - total_usage):.2f}"]
-    windows: list[AccountUsageWindow] = []
-    limit = key_data.get("limit")
-    limit_remaining = key_data.get("limit_remaining")
-    limit_reset = str(key_data.get("limit_reset") or "").strip()
-    usage = key_data.get("usage")
-    if (
-        isinstance(limit, (int, float))
-        and float(limit) > 0
-        and isinstance(limit_remaining, (int, float))
-        and 0 <= float(limit_remaining) <= float(limit)
-    ):
-        limit_value = float(limit)
-        remaining_value = float(limit_remaining)
-        used_percent = ((limit_value - remaining_value) / limit_value) * 100
-        detail_parts = [f"${remaining_value:.2f} of ${limit_value:.2f} remaining"]
-        if limit_reset:
-            detail_parts.append(f"resets {limit_reset}")
-        windows.append(
-            AccountUsageWindow(
-                label="API key quota",
-                used_percent=used_percent,
-                detail=" • ".join(detail_parts),
-            )
-        )
-    if isinstance(usage, (int, float)):
-        usage_parts = [f"API key usage: ${float(usage):.2f} total"]
-        for value, label in (
-            (key_data.get("usage_daily"), "today"),
-            (key_data.get("usage_weekly"), "this week"),
-            (key_data.get("usage_monthly"), "this month"),
-        ):
-            if isinstance(value, (int, float)) and float(value) > 0:
-                usage_parts.append(f"${float(value):.2f} {label}")
-        details.append(" • ".join(usage_parts))
-    return AccountUsageSnapshot(
-        provider="openrouter",
-        source="credits_api",
-        fetched_at=_utc_now(),
-        windows=tuple(windows),
-        details=tuple(details),
-    )
-
-
-def fetch_account_usage(
-    provider: Optional[str],
-    *,
-    base_url: Optional[str] = None,
-    api_key: Optional[str] = None,
-) -> Optional[AccountUsageSnapshot]:
-    normalized = str(provider or "").strip().lower()
-    if normalized in {"", "auto", "custom"}:
-        return None
-    try:
-        if normalized == "openai-codex":
-            return _fetch_codex_account_usage()
-        if normalized == "anthropic":
-            return _fetch_anthropic_account_usage()
-        if normalized == "openrouter":
-            return _fetch_openrouter_account_usage(base_url, api_key)
-    except Exception:
-        return None
-    return None
@@ -17,8 +17,8 @@ import os
 from pathlib import Path

 from hermes_constants import get_hermes_home
+from types import SimpleNamespace
 from typing import Any, Dict, List, Optional, Tuple
-from utils import normalize_proxy_env_vars

 try:
    import anthropic as _anthropic_sdk
@@ -116,63 +116,6 @@ def _get_anthropic_max_output(model: str) -> int:
    return best_val


-def _resolve_positive_anthropic_max_tokens(value) -> Optional[int]:
-    """Return ``value`` floored to a positive int, or ``None`` if it is not a
-    finite positive number. Ported from openclaw/openclaw#66664.
-
-    Anthropic's Messages API rejects ``max_tokens`` values that are 0,
-    negative, non-integer, or non-finite with HTTP 400. Python's ``or``
-    idiom (``max_tokens or fallback``) correctly catches ``0`` but lets
-    negative ints and fractional floats (``-1``, ``0.5``) through to the
-    API, producing a user-visible failure instead of a local error.
-    """
-    # Booleans are a subclass of int — exclude explicitly so ``True`` doesn't
-    # silently become 1 and ``False`` doesn't become 0.
-    if isinstance(value, bool):
-        return None
-    if not isinstance(value, (int, float)):
-        return None
-    try:
-        import math
-        if not math.isfinite(value):
-            return None
-    except Exception:
-        return None
-    floored = int(value)  # truncates toward zero for floats
-    return floored if floored > 0 else None
-
-
-def _resolve_anthropic_messages_max_tokens(
-    requested,
-    model: str,
-    context_length: Optional[int] = None,
-) -> int:
-    """Resolve the ``max_tokens`` budget for an Anthropic Messages call.
-
-    Prefers ``requested`` when it is a positive finite number; otherwise
-    falls back to the model's output ceiling. Raises ``ValueError`` if no
-    positive budget can be resolved (should not happen with current model
-    table defaults, but guards against a future regression where
-    ``_get_anthropic_max_output`` could return ``0``).
-
-    Separately, callers apply a context-window clamp — this resolver does
-    not, to keep the positive-value contract independent of endpoint
-    specifics.
-
-    Ported from openclaw/openclaw#66664 (resolveAnthropicMessagesMaxTokens).
-    """
-    resolved = _resolve_positive_anthropic_max_tokens(requested)
-    if resolved is not None:
-        return resolved
-    fallback = _get_anthropic_max_output(model)
-    if fallback > 0:
-        return fallback
-    raise ValueError(
-        f"Anthropic Messages adapter requires a positive max_tokens value for "
-        f"model {model!r}; got {requested!r} and no model default resolved."
-    )
-
-
 def _supports_adaptive_thinking(model: str) -> bool:
    """Return True for Claude 4.6+ models that support adaptive thinking."""
    return any(v in model for v in _ADAPTIVE_THINKING_SUBSTRINGS)
@@ -322,14 +265,6 @@ def _is_third_party_anthropic_endpoint(base_url: str | None) -> bool:
    return True  # Any other endpoint is a third-party proxy


-def _is_kimi_coding_endpoint(base_url: str | None) -> bool:
-    """Return True for Kimi's /coding endpoint that requires claude-code UA."""
-    normalized = _normalize_base_url_text(base_url)
-    if not normalized:
-        return False
-    return normalized.rstrip("/").lower().startswith("https://api.kimi.com/coding")
-
-
 def _requires_bearer_auth(base_url: str | None) -> bool:
    """Return True for Anthropic-compatible providers that require Bearer auth.

@@ -357,7 +292,7 @@ def _common_betas_for_base_url(base_url: str | None) -> list[str]:
    return _COMMON_BETAS


-def build_anthropic_client(api_key: str, base_url: str = None, timeout: Optional[float] = None):
+def build_anthropic_client(api_key: str, base_url: str = None, timeout: float = None):
    """Create an Anthropic client, auto-detecting setup-tokens vs API keys.

    If *timeout* is provided it overrides the default 900s read timeout.  The
@@ -373,9 +308,6 @@ def build_anthropic_client(api_key: str, base_url: str = None, timeout: Optional
            "The 'anthropic' package is required for the Anthropic provider. "
            "Install it with: pip install 'anthropic>=0.39.0'"
        )
-
-    normalize_proxy_env_vars()
-
    from httpx import Timeout

    normalized_base_url = _normalize_base_url_text(base_url)
@@ -387,18 +319,9 @@ def build_anthropic_client(api_key: str, base_url: str = None, timeout: Optional
        kwargs["base_url"] = normalized_base_url
    common_betas = _common_betas_for_base_url(normalized_base_url)

-    if _is_kimi_coding_endpoint(base_url):
-        # Kimi's /coding endpoint requires User-Agent: claude-code/0.1.0
-        # to be recognized as a valid Coding Agent. Without it, returns 403.
-        # Check this BEFORE _requires_bearer_auth since both match api.kimi.com/coding.
-        kwargs["api_key"] = api_key
-        kwargs["default_headers"] = {
-            "User-Agent": "claude-code/0.1.0",
-            **( {"anthropic-beta": ",".join(common_betas)} if common_betas else {} )
-        }
-    elif _requires_bearer_auth(normalized_base_url):
+    if _requires_bearer_auth(normalized_base_url):
        # Some Anthropic-compatible providers (e.g. MiniMax) expect the API key in
-        # Authorization: Bearer *** for regular API keys. Route those endpoints
+        # Authorization: Bearer even for regular API keys. Route those endpoints
        # through auth_token so the SDK sends Bearer auth instead of x-api-key.
        # Check this before OAuth token shape detection because MiniMax secrets do
        # not use Anthropic's sk-ant-api prefix and would otherwise be misread as
@@ -1139,31 +1062,6 @@ def convert_messages_to_anthropic(
                    "name": fn.get("name", ""),
                    "input": parsed_args,
                })
-            # Kimi's /coding endpoint (Anthropic protocol) requires assistant
-            # tool-call messages to carry reasoning_content when thinking is
-            # enabled server-side.  Preserve it as a thinking block so Kimi
-            # can validate the message history.  See hermes-agent#13848.
-            #
-            # Accept empty string "" — _copy_reasoning_content_for_api()
-            # injects "" as a tier-3 fallback for Kimi tool-call messages
-            # that had no reasoning.  Kimi requires the field to exist, even
-            # if empty.
-            #
-            # Prepend (not append): Anthropic protocol requires thinking
-            # blocks before text and tool_use blocks.
-            #
-            # Guard: only add when reasoning_details didn't already contribute
-            # thinking blocks.  On native Anthropic, reasoning_details produces
-            # signed thinking blocks — adding another unsigned one from
-            # reasoning_content would create a duplicate (same text) that gets
-            # downgraded to a spurious text block on the last assistant message.
-            reasoning_content = m.get("reasoning_content")
-            _already_has_thinking = any(
-                isinstance(b, dict) and b.get("type") in ("thinking", "redacted_thinking")
-                for b in blocks
-            )
-            if isinstance(reasoning_content, str) and not _already_has_thinking:
-                blocks.insert(0, {"type": "thinking", "thinking": reasoning_content})
            # Anthropic rejects empty assistant content
            effective = blocks or content
            if not effective or effective == "":
@@ -1319,7 +1217,6 @@ def convert_messages_to_anthropic(
    #    cache markers can interfere with signature validation.
    _THINKING_TYPES = frozenset(("thinking", "redacted_thinking"))
    _is_third_party = _is_third_party_anthropic_endpoint(base_url)
-    _is_kimi = _is_kimi_coding_endpoint(base_url)

    last_assistant_idx = None
    for i in range(len(result) - 1, -1, -1):
@@ -1331,25 +1228,7 @@ def convert_messages_to_anthropic(
        if m.get("role") != "assistant" or not isinstance(m.get("content"), list):
            continue

-        if _is_kimi:
-            # Kimi's /coding endpoint enables thinking server-side and
-            # requires unsigned thinking blocks on replayed assistant
-            # tool-call messages.  Strip signed Anthropic blocks (Kimi
-            # can't validate signatures) but preserve the unsigned ones
-            # we synthesised from reasoning_content above.
-            new_content = []
-            for b in m["content"]:
-                if not isinstance(b, dict) or b.get("type") not in _THINKING_TYPES:
-                    new_content.append(b)
-                    continue
-                if b.get("signature") or b.get("data"):
-                    # Anthropic-signed block — Kimi can't validate, strip
-                    continue
-                # Unsigned thinking (synthesised from reasoning_content) —
-                # keep it: Kimi needs it for message-history validation.
-                new_content.append(b)
-            m["content"] = new_content or [{"type": "text", "text": "(empty)"}]
-        elif _is_third_party or idx != last_assistant_idx:
+        if _is_third_party or idx != last_assistant_idx:
            # Third-party endpoint: strip ALL thinking blocks from every
            # assistant message — signatures are Anthropic-proprietary.
            # Direct Anthropic: strip from non-latest assistant messages only.
@@ -1447,12 +1326,7 @@ def build_anthropic_kwargs(

    model = normalize_model_name(model, preserve_dots=preserve_dots)
    # effective_max_tokens = output cap for this call (≠ total context window)
-    # Use the resolver helper so non-positive values (negative ints,
-    # fractional floats, NaN, non-numeric) fail locally with a clear error
-    # rather than 400-ing at the Anthropic API. See openclaw/openclaw#66664.
-    effective_max_tokens = _resolve_anthropic_messages_max_tokens(
-        max_tokens, model, context_length=context_length
-    )
+    effective_max_tokens = max_tokens or _get_anthropic_max_output(model)

    # Clamp output cap to fit inside the total context window.
    # Only matters for small custom endpoints where context_length < native
@@ -1531,25 +1405,11 @@ def build_anthropic_kwargs(
    # MiniMax Anthropic-compat endpoints support thinking (manual mode only,
    # not adaptive).  Haiku does NOT support extended thinking — skip entirely.
    #
-    # Kimi's /coding endpoint speaks the Anthropic Messages protocol but has
-    # its own thinking semantics: when ``thinking.enabled`` is sent, Kimi
-    # validates the message history and requires every prior assistant
-    # tool-call message to carry OpenAI-style ``reasoning_content``.  The
-    # Anthropic path never populates that field, and
-    # ``convert_messages_to_anthropic`` strips all Anthropic thinking blocks
-    # on third-party endpoints — so the request fails with HTTP 400
-    # "thinking is enabled but reasoning_content is missing in assistant
-    # tool call message at index N".  Kimi's reasoning is driven server-side
-    # on the /coding route, so skip Anthropic's thinking parameter entirely
-    # for that host.  (Kimi on chat_completions enables thinking via
-    # extra_body in the ChatCompletionsTransport — see #13503.)
-    #
    # On 4.7+ the `thinking.display` field defaults to "omitted", which
    # silently hides reasoning text that Hermes surfaces in its CLI. We
    # request "summarized" so the reasoning blocks stay populated — matching
    # 4.6 behavior and preserving the activity-feed UX during long tool runs.
-    _is_kimi_coding = _is_kimi_coding_endpoint(base_url)
-    if reasoning_config and isinstance(reasoning_config, dict) and not _is_kimi_coding:
+    if reasoning_config and isinstance(reasoning_config, dict):
        if reasoning_config.get("enabled") is not False and "haiku" not in model.lower():
            effort = str(reasoning_config.get("effort", "medium")).lower()
            budget = THINKING_BUDGET.get(effort, 8000)
@@ -1598,4 +1458,70 @@ def build_anthropic_kwargs(
    return kwargs


+def normalize_anthropic_response(
+    response,
+    strip_tool_prefix: bool = False,
+) -> Tuple[SimpleNamespace, str]:
+    """Normalize Anthropic response to match the shape expected by AIAgent.

+    Returns (assistant_message, finish_reason) where assistant_message has
+    .content, .tool_calls, and .reasoning attributes.
+
+    When *strip_tool_prefix* is True, removes the ``mcp_`` prefix that was
+    added to tool names for OAuth Claude Code compatibility.
+    """
+    text_parts = []
+    reasoning_parts = []
+    reasoning_details = []
+    tool_calls = []
+
+    for block in response.content:
+        if block.type == "text":
+            text_parts.append(block.text)
+        elif block.type == "thinking":
+            reasoning_parts.append(block.thinking)
+            block_dict = _to_plain_data(block)
+            if isinstance(block_dict, dict):
+                reasoning_details.append(block_dict)
+        elif block.type == "tool_use":
+            name = block.name
+            if strip_tool_prefix and name.startswith(_MCP_TOOL_PREFIX):
+                name = name[len(_MCP_TOOL_PREFIX):]
+            tool_calls.append(
+                SimpleNamespace(
+                    id=block.id,
+                    type="function",
+                    function=SimpleNamespace(
+                        name=name,
+                        arguments=json.dumps(block.input),
+                    ),
+                )
+            )
+
+    # Map Anthropic stop_reason to OpenAI finish_reason.
+    # Newer stop reasons added in Claude 4.5+ / 4.7:
+    #   - refusal: the model declined to answer (cyber safeguards, CSAM, etc.)
+    #   - model_context_window_exceeded: hit context limit (not max_tokens)
+    # Both need distinct handling upstream — a refusal should surface to the
+    # user with a clear message, and a context-window overflow should trigger
+    # compression/truncation rather than be treated as normal end-of-turn.
+    stop_reason_map = {
+        "end_turn": "stop",
+        "tool_use": "tool_calls",
+        "max_tokens": "length",
+        "stop_sequence": "stop",
+        "refusal": "content_filter",
+        "model_context_window_exceeded": "length",
+    }
+    finish_reason = stop_reason_map.get(response.stop_reason, "stop")
+
+    return (
+        SimpleNamespace(
+            content="\n".join(text_parts) if text_parts else None,
+            tool_calls=tool_calls or None,
+            reasoning="\n\n".join(reasoning_parts) if reasoning_parts else None,
+            reasoning_content=None,
+            reasoning_details=reasoning_details or None,
+        ),
+        finish_reason,
+    )
@@ -41,17 +41,13 @@ import threading
 import time
 from pathlib import Path  # noqa: F401 — used by test mocks
 from types import SimpleNamespace
-from typing import TYPE_CHECKING, Any, Dict, List, Optional, Tuple, Union
+from typing import Any, Dict, List, Optional, Tuple

 from openai import OpenAI

-if TYPE_CHECKING:
-    from agent.gemini_native_adapter import GeminiNativeClient
-
 from agent.credential_pool import load_pool
 from hermes_cli.config import get_hermes_home
 from hermes_constants import OPENROUTER_BASE_URL
-from utils import base_url_host_matches, base_url_hostname, normalize_proxy_env_vars

 logger = logging.getLogger(__name__)

@@ -99,37 +95,84 @@ def _normalize_aux_provider(provider: Optional[str]) -> str:
    return _PROVIDER_ALIASES.get(normalized, normalized)


-# Sentinel: when returned by _fixed_temperature_for_model(), callers must
-# strip the ``temperature`` key from API kwargs entirely so the provider's
-# server-side default applies.  Kimi/Moonshot models manage temperature
-# internally — sending *any* value (even the "correct" one) can conflict
-# with gateway-side mode selection (thinking → 1.0, non-thinking → 0.6).
-OMIT_TEMPERATURE: object = object()
+_FIXED_TEMPERATURE_MODELS: Dict[str, float] = {
+    "kimi-for-coding": 0.6,
+}

+# Moonshot's kimi-for-coding endpoint (api.kimi.com/coding) documents:
+# "k2.5 model will use a fixed value 1.0, non-thinking mode will use a fixed
+# value 0.6.  Any other value will result in an error."  The same lock applies
+# to the other k2.* models served on that endpoint.  Enumerated explicitly so
+# non-coding siblings like `kimi-k2-instruct` (variable temperature, served on
+# the standard chat API and third parties) are NOT clamped.
+# Source: https://platform.kimi.ai/docs/guide/kimi-k2-5-quickstart
+_KIMI_INSTANT_MODELS: frozenset = frozenset({
+    "kimi-k2.5",
+    "kimi-k2-turbo-preview",
+    "kimi-k2-0905-preview",
+})
+_KIMI_THINKING_MODELS: frozenset = frozenset({
+    "kimi-k2-thinking",
+    "kimi-k2-thinking-turbo",
+})

-def _is_kimi_model(model: Optional[str]) -> bool:
-    """True for any Kimi / Moonshot model that manages temperature server-side."""
-    bare = (model or "").strip().lower().rsplit("/", 1)[-1]
-    return bare.startswith("kimi-") or bare == "kimi"
+# Moonshot's public chat endpoint (api.moonshot.ai/v1) enforces a different
+# temperature contract than the Coding Plan endpoint above.  Empirically,
+# `kimi-k2.5` on the public API rejects 0.6 with HTTP 400
+# "invalid temperature: only 1 is allowed for this model" — the Coding Plan
+# lock (0.6 for non-thinking) does not apply.  `kimi-k2-turbo-preview` and the
+# thinking variants already match the Coding Plan contract on the public
+# endpoint, so we only override the models that diverge.
+# Users hit this endpoint when `KIMI_API_KEY` is a legacy `sk-*` key (the
+# `sk-kimi-*` prefix routes to api.kimi.com/coding/v1 instead — see
+# hermes_cli/auth.py:_kimi_base_url_for_key).
+_KIMI_PUBLIC_API_OVERRIDES: Dict[str, float] = {
+    "kimi-k2.5": 1.0,
+}


 def _fixed_temperature_for_model(
    model: Optional[str],
    base_url: Optional[str] = None,
-) -> "Optional[float] | object":
-    """Return a temperature directive for models with strict contracts.
+) -> Optional[float]:
+    """Return a required temperature override for models with strict contracts.

-    Returns:
-        ``OMIT_TEMPERATURE`` — caller must remove the ``temperature`` key so the
-            provider chooses its own default.  Used for all Kimi / Moonshot
-            models whose gateway selects temperature server-side.
-        ``float`` — a specific value the caller must use (reserved for future
-            models with fixed-temperature contracts).
-        ``None`` — no override; caller should use its own default.
+    Moonshot's kimi-for-coding endpoint rejects any non-approved temperature on
+    the k2.5 family.  Non-thinking variants require exactly 0.6; thinking
+    variants require 1.0.  An optional ``vendor/`` prefix (e.g.
+    ``moonshotai/kimi-k2.5``) is tolerated for aggregator routings.
+
+    When ``base_url`` points to Moonshot's public chat endpoint
+    (``api.moonshot.ai``), the contract changes for ``kimi-k2.5``: the public
+    API only accepts ``temperature=1``, not 0.6.  That override takes precedence
+    over the Coding Plan defaults above.
+
+    Returns ``None`` for every other model, including ``kimi-k2-instruct*``
+    which is the separate non-coding K2 family with variable temperature.
    """
-    if _is_kimi_model(model):
-        logger.debug("Omitting temperature for Kimi model %r (server-managed)", model)
-        return OMIT_TEMPERATURE
+    normalized = (model or "").strip().lower()
+    bare = normalized.rsplit("/", 1)[-1]
+
+    # Public Moonshot API has a stricter contract for some models than the
+    # Coding Plan endpoint — check it first so it wins on conflict.
+    if base_url and ("api.moonshot.ai" in base_url.lower() or "api.moonshot.cn" in base_url.lower()):
+        public = _KIMI_PUBLIC_API_OVERRIDES.get(bare)
+        if public is not None:
+            logger.debug(
+                "Forcing temperature=%s for %r on public Moonshot API", public, model
+            )
+            return public
+
+    fixed = _FIXED_TEMPERATURE_MODELS.get(normalized)
+    if fixed is not None:
+        logger.debug("Forcing temperature=%s for model %r (fixed map)", fixed, model)
+        return fixed
+    if bare in _KIMI_THINKING_MODELS:
+        logger.debug("Forcing temperature=1.0 for kimi thinking model %r", model)
+        return 1.0
+    if bare in _KIMI_INSTANT_MODELS:
+        logger.debug("Forcing temperature=0.6 for kimi instant model %r", model)
+        return 0.6
    return None

 # Default auxiliary models for direct API-key providers (cheap/fast for side tasks)
@@ -137,7 +180,6 @@ _API_KEY_PROVIDER_AUX_MODELS: Dict[str, str] = {
    "gemini": "gemini-3-flash-preview",
    "zai": "glm-4.5-flash",
    "kimi-coding": "kimi-k2-turbo-preview",
-    "stepfun": "step-3.5-flash",
    "kimi-coding-cn": "kimi-k2-turbo-preview",
    "minimax": "MiniMax-M2.7",
    "minimax-cn": "MiniMax-M2.7",
@@ -165,16 +207,6 @@ _OR_HEADERS = {
    "X-OpenRouter-Categories": "productivity,cli-agent",
 }

-# Vercel AI Gateway app attribution headers. HTTP-Referer maps to
-# referrerUrl and X-Title maps to appName in the gateway's analytics.
-from hermes_cli import __version__ as _HERMES_VERSION
-
-_AI_GATEWAY_HEADERS = {
-    "HTTP-Referer": "https://hermes-agent.nousresearch.com",
-    "X-Title": "Hermes Agent",
-    "User-Agent": f"HermesAgent/{_HERMES_VERSION}",
-}
-
 # Nous Portal extra_body for product attribution.
 # Callers should pass this as extra_body in chat.completions.create()
 # when the auxiliary client is backed by Nous Portal.
@@ -186,6 +218,8 @@ auxiliary_is_nous: bool = False
 # Default auxiliary models per provider
 _OPENROUTER_MODEL = "google/gemini-3-flash-preview"
 _NOUS_MODEL = "google/gemini-3-flash-preview"
+_NOUS_FREE_TIER_VISION_MODEL = "xiaomi/mimo-v2-omni"
+_NOUS_FREE_TIER_AUX_MODEL = "xiaomi/mimo-v2-pro"
 _NOUS_DEFAULT_BASE_URL = "https://inference-api.nousresearch.com/v1"
 _ANTHROPIC_DEFAULT_BASE_URL = "https://api.anthropic.com"
 _AUTH_JSON_PATH = get_hermes_home() / "auth.json"
@@ -576,8 +610,7 @@ class _AnthropicCompletionsAdapter:
        self._is_oauth = is_oauth

    def create(self, **kwargs) -> Any:
-        from agent.anthropic_adapter import build_anthropic_kwargs
-        from agent.transports import get_transport
+        from agent.anthropic_adapter import build_anthropic_kwargs, normalize_anthropic_response

        messages = kwargs.get("messages", [])
        model = kwargs.get("model", self._model)
@@ -614,19 +647,7 @@ class _AnthropicCompletionsAdapter:
                anthropic_kwargs["temperature"] = temperature

        response = self._client.messages.create(**anthropic_kwargs)
-        _transport = get_transport("anthropic_messages")
-        _nr = _transport.normalize_response(
-            response, strip_tool_prefix=self._is_oauth
-        )
-
-        # ToolCall already duck-types as OpenAI shape (.type, .function.name,
-        # .function.arguments) via properties, so no wrapping needed.
-        assistant_message = SimpleNamespace(
-            content=_nr.content,
-            tool_calls=_nr.tool_calls,
-            reasoning=_nr.reasoning,
-        )
-        finish_reason = _nr.finish_reason
+        assistant_message, finish_reason = normalize_anthropic_response(response)

        usage = None
        if hasattr(response, "usage") and response.usage:
@@ -743,33 +764,6 @@ def _nous_base_url() -> str:
    return os.getenv("NOUS_INFERENCE_BASE_URL", _NOUS_DEFAULT_BASE_URL)


-def _resolve_nous_runtime_api(*, force_refresh: bool = False) -> Optional[tuple[str, str]]:
-    """Return fresh Nous runtime credentials when available.
-
-    This mirrors the main agent's 401 recovery path and keeps auxiliary
-    clients aligned with the singleton auth store + mint flow instead of
-    relying only on whatever raw tokens happen to be sitting in auth.json
-    or the credential pool.
-    """
-    try:
-        from hermes_cli.auth import resolve_nous_runtime_credentials
-
-        creds = resolve_nous_runtime_credentials(
-            min_key_ttl_seconds=max(60, int(os.getenv("HERMES_NOUS_MIN_KEY_TTL_SECONDS", "1800"))),
-            timeout_seconds=float(os.getenv("HERMES_NOUS_TIMEOUT_SECONDS", "15")),
-            force_mint=force_refresh,
-        )
-    except Exception as exc:
-        logger.debug("Auxiliary Nous runtime credential resolution failed: %s", exc)
-        return None
-
-    api_key = str(creds.get("api_key") or "").strip()
-    base_url = str(creds.get("base_url") or "").strip().rstrip("/")
-    if not api_key or not base_url:
-        return None
-    return api_key, base_url
-
-
 def _read_codex_access_token() -> Optional[str]:
    """Read a valid, non-expired Codex OAuth access token from Hermes auth store.

@@ -813,11 +807,7 @@ def _read_codex_access_token() -> Optional[str]:
        return None


-# TODO(refactor): This function has messy types and duplicated logic (pool vs direct creds).
-#     Ideal fix: (1) define an AuxiliaryClient Protocol both OpenAI/GeminiNativeClient satisfy,
-#     (2) return a NamedTuple or dataclass instead of raw tuple, (3) extract the repeated
-#     Gemini/Kimi/Copilot client-building into a helper.
-def _resolve_api_key_provider() -> Tuple[Optional[Union[OpenAI, "GeminiNativeClient"]], Optional[str]]:
+def _resolve_api_key_provider() -> Tuple[Optional[OpenAI], Optional[str]]:
    """Try each API-key provider in PROVIDER_REGISTRY order.

    Returns (client, model) for the first provider with usable runtime
@@ -863,9 +853,9 @@ def _resolve_api_key_provider() -> Tuple[Optional[Union[OpenAI, "GeminiNativeCli
                if is_native_gemini_base_url(base_url):
                    return GeminiNativeClient(api_key=api_key, base_url=base_url), model
            extra = {}
-            if base_url_host_matches(base_url, "api.kimi.com"):
-                extra["default_headers"] = {"User-Agent": "claude-code/0.1.0"}
-            elif base_url_host_matches(base_url, "api.githubcopilot.com"):
+            if "api.kimi.com" in base_url.lower():
+                extra["default_headers"] = {"User-Agent": "KimiCLI/1.30.0"}
+            elif "api.githubcopilot.com" in base_url.lower():
                from hermes_cli.models import copilot_default_headers

                extra["default_headers"] = copilot_default_headers()
@@ -889,9 +879,9 @@ def _resolve_api_key_provider() -> Tuple[Optional[Union[OpenAI, "GeminiNativeCli
            if is_native_gemini_base_url(base_url):
                return GeminiNativeClient(api_key=api_key, base_url=base_url), model
        extra = {}
-        if base_url_host_matches(base_url, "api.kimi.com"):
-            extra["default_headers"] = {"User-Agent": "claude-code/0.1.0"}
-        elif base_url_host_matches(base_url, "api.githubcopilot.com"):
+        if "api.kimi.com" in base_url.lower():
+            extra["default_headers"] = {"User-Agent": "KimiCLI/1.30.0"}
+        elif "api.githubcopilot.com" in base_url.lower():
            from hermes_cli.models import copilot_default_headers

            extra["default_headers"] = copilot_default_headers()
@@ -940,50 +930,29 @@ def _try_nous(vision: bool = False) -> Tuple[Optional[OpenAI], Optional[str]]:
        pass

    nous = _read_nous_auth()
-    runtime = _resolve_nous_runtime_api(force_refresh=False)
-    if runtime is None and not nous:
+    if not nous:
        return None, None
    global auxiliary_is_nous
    auxiliary_is_nous = True
    logger.debug("Auxiliary client: Nous Portal")
-
-    # Ask the Portal which model it currently recommends for this task type.
-    # The /api/nous/recommended-models endpoint is the authoritative source:
-    # it distinguishes paid vs free tier recommendations, and get_nous_recommended_aux_model
-    # auto-detects the caller's tier via check_nous_free_tier().  Fall back to
-    # _NOUS_MODEL (google/gemini-3-flash-preview) when the Portal is unreachable
-    # or returns a null recommendation for this task type.
-    model = _NOUS_MODEL
-    try:
-        from hermes_cli.models import get_nous_recommended_aux_model
-        recommended = get_nous_recommended_aux_model(vision=vision)
-        if recommended:
-            model = recommended
-            logger.debug(
-                "Auxiliary/%s: using Portal-recommended model %s",
-                "vision" if vision else "text", model,
-            )
-        else:
-            logger.debug(
-                "Auxiliary/%s: no Portal recommendation, falling back to %s",
-                "vision" if vision else "text", model,
-            )
-    except Exception as exc:
-        logger.debug(
-            "Auxiliary/%s: recommended-models lookup failed (%s); "
-            "falling back to %s",
-            "vision" if vision else "text", exc, model,
-        )
-
-    if runtime is not None:
-        api_key, base_url = runtime
+    if nous.get("source") == "pool":
+        model = "gemini-3-flash"
    else:
-        api_key = _nous_api_key(nous or {})
-        base_url = str((nous or {}).get("inference_base_url") or _nous_base_url()).rstrip("/")
+        model = _NOUS_MODEL
+    # Free-tier users can't use paid auxiliary models — use the free
+    # models instead: mimo-v2-omni for vision, mimo-v2-pro for text tasks.
+    try:
+        from hermes_cli.models import check_nous_free_tier
+        if check_nous_free_tier():
+            model = _NOUS_FREE_TIER_VISION_MODEL if vision else _NOUS_FREE_TIER_AUX_MODEL
+            logger.debug("Free-tier Nous account — using %s for auxiliary/%s",
+                         model, "vision" if vision else "text")
+    except Exception:
+        pass
    return (
        OpenAI(
-            api_key=api_key,
-            base_url=base_url,
+            api_key=_nous_api_key(nous),
+            base_url=str(nous.get("inference_base_url") or _nous_base_url()).rstrip("/"),
        ),
        model,
    )
@@ -1061,7 +1030,7 @@ def _resolve_custom_runtime() -> Tuple[Optional[str], Optional[str], Optional[st
        return None, None, None

    custom_base = custom_base.strip().rstrip("/")
-    if base_url_host_matches(custom_base, "openrouter.ai"):
+    if "openrouter.ai" in custom_base.lower():
        # requested='custom' falls back to OpenRouter when no custom endpoint is
        # configured. Treat that as "no custom endpoint" for auxiliary routing.
        return None, None, None
@@ -1095,8 +1064,6 @@ def _validate_proxy_env_urls() -> None:
    """
    from urllib.parse import urlparse

-    normalize_proxy_env_vars()
-
    for key in ("HTTPS_PROXY", "HTTP_PROXY", "ALL_PROXY",
                "https_proxy", "http_proxy", "all_proxy"):
        value = str(os.environ.get(key) or "").strip()
@@ -1327,15 +1294,6 @@ def _is_connection_error(exc: Exception) -> bool:
    return False


-def _is_auth_error(exc: Exception) -> bool:
-    """Detect auth failures that should trigger provider-specific refresh."""
-    status = getattr(exc, "status_code", None)
-    if status == 401:
-        return True
-    err_lower = str(exc).lower()
-    return "error code: 401" in err_lower or "authenticationerror" in type(exc).__name__.lower()
-
-
 def _try_payment_fallback(
    failed_provider: str,
    task: str = None,
@@ -1511,15 +1469,15 @@ def _to_async_client(sync_client, model: str):
        "api_key": sync_client.api_key,
        "base_url": str(sync_client.base_url),
    }
-    sync_base_url = str(sync_client.base_url)
-    if base_url_host_matches(sync_base_url, "openrouter.ai"):
+    base_lower = str(sync_client.base_url).lower()
+    if "openrouter" in base_lower:
        async_kwargs["default_headers"] = dict(_OR_HEADERS)
-    elif base_url_host_matches(sync_base_url, "api.githubcopilot.com"):
+    elif "api.githubcopilot.com" in base_lower:
        from hermes_cli.models import copilot_default_headers

        async_kwargs["default_headers"] = copilot_default_headers()
-    elif base_url_host_matches(sync_base_url, "api.kimi.com"):
-        async_kwargs["default_headers"] = {"User-Agent": "claude-code/0.1.0"}
+    elif "api.kimi.com" in base_lower:
+        async_kwargs["default_headers"] = {"User-Agent": "KimiCLI/1.30.0"}
    return AsyncOpenAI(**async_kwargs), model


@@ -1595,7 +1553,8 @@ def resolve_provider_client(
        # Auto-detect: api.openai.com + codex model name pattern
        if api_mode and api_mode != "codex_responses":
            return False  # explicit non-codex mode
-        if base_url_hostname(base_url_str) == "api.openai.com":
+        normalized_base = (base_url_str or "").strip().lower()
+        if "api.openai.com" in normalized_base and "openrouter" not in normalized_base:
            model_lower = (model_str or "").lower()
            if "codex" in model_lower:
                return True
@@ -1643,13 +1602,7 @@ def resolve_provider_client(

    # ── Nous Portal (OAuth) ──────────────────────────────────────────
    if provider == "nous":
-        # Detect vision tasks: either explicit model override from
-        # _PROVIDER_VISION_MODELS, or caller passed a known vision model.
-        _is_vision = (
-            model in _PROVIDER_VISION_MODELS.values()
-            or (model or "").strip().lower() == "mimo-v2-omni"
-        )
-        client, default = _try_nous(vision=_is_vision)
+        client, default = _try_nous()
        if client is None:
            logger.warning("resolve_provider_client: nous requested "
                           "but Nous Portal not configured (run: hermes auth)")
@@ -1705,9 +1658,9 @@ def resolve_provider_client(
                provider,
            )
            extra = {}
-            if base_url_host_matches(custom_base, "api.kimi.com"):
-                extra["default_headers"] = {"User-Agent": "claude-code/0.1.0"}
-            elif base_url_host_matches(custom_base, "api.githubcopilot.com"):
+            if "api.kimi.com" in custom_base.lower():
+                extra["default_headers"] = {"User-Agent": "KimiCLI/1.30.0"}
+            elif "api.githubcopilot.com" in custom_base.lower():
                from hermes_cli.models import copilot_default_headers
                extra["default_headers"] = copilot_default_headers()
            client = OpenAI(api_key=custom_key, base_url=custom_base, **extra)
@@ -1812,9 +1765,9 @@ def resolve_provider_client(

        # Provider-specific headers
        headers = {}
-        if base_url_host_matches(base_url, "api.kimi.com"):
-            headers["User-Agent"] = "claude-code/0.1.0"
-        elif base_url_host_matches(base_url, "api.githubcopilot.com"):
+        if "api.kimi.com" in base_url.lower():
+            headers["User-Agent"] = "KimiCLI/1.30.0"
+        elif "api.githubcopilot.com" in base_url.lower():
            from hermes_cli.models import copilot_default_headers

            headers.update(copilot_default_headers())
@@ -2045,35 +1998,24 @@ def resolve_vision_provider_client(
        #      _PROVIDER_VISION_MODELS provides per-provider vision model
        #      overrides when the provider has a dedicated multimodal model
        #      that differs from the chat model (e.g. xiaomi → mimo-v2-omni,
-        #      zai → glm-5v-turbo). Nous is the exception: it has a dedicated
-        #      strict vision backend with tier-aware defaults, so it must not
-        #      fall through to the user's text chat model here.
+        #      zai → glm-5v-turbo).
        #   2. OpenRouter  (vision-capable aggregator fallback)
        #   3. Nous Portal (vision-capable aggregator fallback)
        #   4. Stop
        main_provider = _read_main_provider()
        main_model = _read_main_model()
        if main_provider and main_provider not in ("auto", ""):
-            if main_provider == "nous":
-                sync_client, default_model = _resolve_strict_vision_backend(main_provider)
-                if sync_client is not None:
-                    logger.info(
-                        "Vision auto-detect: using main provider %s (%s)",
-                        main_provider, default_model or resolved_model or main_model,
-                    )
-                    return _finalize(main_provider, sync_client, default_model)
-            else:
-                vision_model = _PROVIDER_VISION_MODELS.get(main_provider, main_model)
-                rpc_client, rpc_model = resolve_provider_client(
-                    main_provider, vision_model,
-                    api_mode=resolved_api_mode)
-                if rpc_client is not None:
-                    logger.info(
-                        "Vision auto-detect: using main provider %s (%s)",
-                        main_provider, rpc_model or vision_model,
-                    )
-                    return _finalize(
-                        main_provider, rpc_client, rpc_model or vision_model)
+            vision_model = _PROVIDER_VISION_MODELS.get(main_provider, main_model)
+            rpc_client, rpc_model = resolve_provider_client(
+                main_provider, vision_model,
+                api_mode=resolved_api_mode)
+            if rpc_client is not None:
+                logger.info(
+                    "Vision auto-detect: using main provider %s (%s)",
+                    main_provider, rpc_model or vision_model,
+                )
+                return _finalize(
+                    main_provider, rpc_client, rpc_model or vision_model)

        # Fall back through aggregators (uses their dedicated vision model,
        # not the user's main model) when main provider has no client.
@@ -2120,7 +2062,7 @@ def auxiliary_max_tokens_param(value: int) -> dict:
    # Only use max_completion_tokens for direct OpenAI custom endpoints
    if (not or_key
            and _read_nous_auth() is None
-            and base_url_hostname(custom_base) == "api.openai.com"):
+            and "api.openai.com" in custom_base.lower()):
        return {"max_completion_tokens": value}
    return {"max_tokens": value}

@@ -2148,76 +2090,6 @@ _client_cache_lock = threading.Lock()
 _CLIENT_CACHE_MAX_SIZE = 64  # safety belt — evict oldest when exceeded


-def _client_cache_key(
-    provider: str,
-    *,
-    async_mode: bool,
-    base_url: Optional[str] = None,
-    api_key: Optional[str] = None,
-    api_mode: Optional[str] = None,
-    main_runtime: Optional[Dict[str, Any]] = None,
-) -> tuple:
-    runtime = _normalize_main_runtime(main_runtime)
-    runtime_key = tuple(runtime.get(field, "") for field in _MAIN_RUNTIME_FIELDS) if provider == "auto" else ()
-    return (provider, async_mode, base_url or "", api_key or "", api_mode or "", runtime_key)
-
-
-def _store_cached_client(cache_key: tuple, client: Any, default_model: Optional[str], *, bound_loop: Any = None) -> None:
-    with _client_cache_lock:
-        old_entry = _client_cache.get(cache_key)
-        if old_entry is not None and old_entry[0] is not client:
-            _force_close_async_httpx(old_entry[0])
-            try:
-                close_fn = getattr(old_entry[0], "close", None)
-                if callable(close_fn):
-                    close_fn()
-            except Exception:
-                pass
-        _client_cache[cache_key] = (client, default_model, bound_loop)
-
-
-def _refresh_nous_auxiliary_client(
-    *,
-    cache_provider: str,
-    model: Optional[str],
-    async_mode: bool,
-    base_url: Optional[str] = None,
-    api_key: Optional[str] = None,
-    api_mode: Optional[str] = None,
-    main_runtime: Optional[Dict[str, Any]] = None,
-) -> Tuple[Optional[Any], Optional[str]]:
-    """Refresh Nous runtime creds, rebuild the client, and replace the cache entry."""
-    runtime = _resolve_nous_runtime_api(force_refresh=True)
-    if runtime is None:
-        return None, model
-
-    fresh_key, fresh_base_url = runtime
-    sync_client = OpenAI(api_key=fresh_key, base_url=fresh_base_url)
-    final_model = model
-
-    current_loop = None
-    if async_mode:
-        try:
-            import asyncio as _aio
-            current_loop = _aio.get_event_loop()
-        except RuntimeError:
-            pass
-        client, final_model = _to_async_client(sync_client, final_model or "")
-    else:
-        client = sync_client
-
-    cache_key = _client_cache_key(
-        cache_provider,
-        async_mode=async_mode,
-        base_url=base_url,
-        api_key=api_key,
-        api_mode=api_mode,
-        main_runtime=main_runtime,
-    )
-    _store_cached_client(cache_key, client, final_model, bound_loop=current_loop)
-    return client, final_model
-
-
 def neuter_async_httpx_del() -> None:
    """Monkey-patch ``AsyncHttpxClientWrapper.__del__`` to be a no-op.

@@ -2319,7 +2191,7 @@ def cleanup_stale_async_clients() -> None:

 def _is_openrouter_client(client: Any) -> bool:
    for obj in (client, getattr(client, "_client", None), getattr(client, "client", None)):
-        if obj and base_url_host_matches(str(getattr(obj, "base_url", "") or ""), "openrouter.ai"):
+        if obj and "openrouter" in str(getattr(obj, "base_url", "") or "").lower():
            return True
    return False

@@ -2371,14 +2243,8 @@ def _get_cached_client(
        except RuntimeError:
            pass
    runtime = _normalize_main_runtime(main_runtime)
-    cache_key = _client_cache_key(
-        provider,
-        async_mode=async_mode,
-        base_url=base_url,
-        api_key=api_key,
-        api_mode=api_mode,
-        main_runtime=main_runtime,
-    )
+    runtime_key = tuple(runtime.get(field, "") for field in _MAIN_RUNTIME_FIELDS) if provider == "auto" else ()
+    cache_key = (provider, async_mode, base_url or "", api_key or "", api_mode or "", runtime_key)
    with _client_cache_lock:
        if cache_key in _client_cache:
            cached_client, cached_default, cached_loop = _client_cache[cache_key]
@@ -2609,9 +2475,7 @@ def _build_call_kwargs(
    }

    fixed_temperature = _fixed_temperature_for_model(model, base_url)
-    if fixed_temperature is OMIT_TEMPERATURE:
-        temperature = None  # strip — let server choose
-    elif fixed_temperature is not None:
+    if fixed_temperature is not None:
        temperature = fixed_temperature

    # Opus 4.7+ rejects any non-default temperature/top_p/top_k — silently
@@ -2631,7 +2495,7 @@ def _build_call_kwargs(
        # Direct OpenAI api.openai.com with newer models needs max_completion_tokens.
        if provider == "custom":
            custom_base = base_url or _current_custom_base_url()
-            if base_url_hostname(custom_base) == "api.openai.com":
+            if "api.openai.com" in custom_base.lower():
                kwargs["max_completion_tokens"] = max_tokens
            else:
                kwargs["max_tokens"] = max_tokens
@@ -2826,29 +2690,6 @@ def call_llm(
                    raise
                first_err = retry_err

-        # ── Nous auth refresh parity with main agent ──────────────────
-        client_is_nous = (
-            resolved_provider == "nous"
-            or base_url_host_matches(_base_info, "inference-api.nousresearch.com")
-        )
-        if _is_auth_error(first_err) and client_is_nous:
-            refreshed_client, refreshed_model = _refresh_nous_auxiliary_client(
-                cache_provider=resolved_provider or "nous",
-                model=final_model,
-                async_mode=False,
-                base_url=resolved_base_url,
-                api_key=resolved_api_key,
-                api_mode=resolved_api_mode,
-                main_runtime=main_runtime,
-            )
-            if refreshed_client is not None:
-                logger.info("Auxiliary %s: refreshed Nous runtime credentials after 401, retrying",
-                            task or "call")
-                if refreshed_model and refreshed_model != kwargs.get("model"):
-                    kwargs["model"] = refreshed_model
-                return _validate_llm_response(
-                    refreshed_client.chat.completions.create(**kwargs), task)
-
        # ── Payment / credit exhaustion fallback ──────────────────────
        # When the resolved provider returns 402 or a credit-related error,
        # try alternative providers instead of giving up.  This handles the
@@ -3047,28 +2888,6 @@ async def async_call_llm(
                    raise
                first_err = retry_err

-        # ── Nous auth refresh parity with main agent ──────────────────
-        client_is_nous = (
-            resolved_provider == "nous"
-            or base_url_host_matches(_client_base, "inference-api.nousresearch.com")
-        )
-        if _is_auth_error(first_err) and client_is_nous:
-            refreshed_client, refreshed_model = _refresh_nous_auxiliary_client(
-                cache_provider=resolved_provider or "nous",
-                model=final_model,
-                async_mode=True,
-                base_url=resolved_base_url,
-                api_key=resolved_api_key,
-                api_mode=resolved_api_mode,
-            )
-            if refreshed_client is not None:
-                logger.info("Auxiliary %s (async): refreshed Nous runtime credentials after 401, retrying",
-                            task or "call")
-                if refreshed_model and refreshed_model != kwargs.get("model"):
-                    kwargs["model"] = refreshed_model
-                return _validate_llm_response(
-                    await refreshed_client.chat.completions.create(**kwargs), task)
-
        # ── Payment / connection fallback (mirrors sync call_llm) ─────
        should_fallback = _is_payment_error(first_err) or _is_connection_error(first_err)
        is_auto = resolved_provider in ("auto", "", None)
@@ -22,98 +22,6 @@ from agent.prompt_builder import DEFAULT_AGENT_IDENTITY

 logger = logging.getLogger(__name__)

-
-# ---------------------------------------------------------------------------
-# Multimodal content helpers
-# ---------------------------------------------------------------------------
-
-def _chat_content_to_responses_parts(content: Any) -> List[Dict[str, Any]]:
-    """Convert chat-style multimodal content to Responses API input parts.
-
-    Input:  ``[{"type":"text"|"image_url", ...}]`` (native OpenAI Chat format)
-    Output: ``[{"type":"input_text"|"input_image", ...}]`` (Responses format)
-
-    Returns an empty list when ``content`` is not a list or contains no
-    recognized parts — callers fall back to the string path.
-    """
-    if not isinstance(content, list):
-        return []
-    converted: List[Dict[str, Any]] = []
-    for part in content:
-        if isinstance(part, str):
-            if part:
-                converted.append({"type": "input_text", "text": part})
-            continue
-        if not isinstance(part, dict):
-            continue
-        ptype = str(part.get("type") or "").strip().lower()
-        if ptype in {"text", "input_text", "output_text"}:
-            text = part.get("text")
-            if isinstance(text, str) and text:
-                converted.append({"type": "input_text", "text": text})
-            continue
-        if ptype in {"image_url", "input_image"}:
-            image_ref = part.get("image_url")
-            detail = part.get("detail")
-            if isinstance(image_ref, dict):
-                url = image_ref.get("url")
-                detail = image_ref.get("detail", detail)
-            else:
-                url = image_ref
-            if not isinstance(url, str) or not url:
-                continue
-            image_part: Dict[str, Any] = {"type": "input_image", "image_url": url}
-            if isinstance(detail, str) and detail.strip():
-                image_part["detail"] = detail.strip()
-            converted.append(image_part)
-    return converted
-
-
-def _summarize_user_message_for_log(content: Any) -> str:
-    """Return a short text summary of a user message for logging/trajectory.
-
-    Multimodal messages arrive as a list of ``{type:"text"|"image_url", ...}``
-    parts from the API server.  Logging, spinner previews, and trajectory
-    files all want a plain string — this helper extracts the first chunk of
-    text and notes any attached images.  Returns an empty string for empty
-    lists and ``str(content)`` for unexpected scalar types.
-    """
-    if content is None:
-        return ""
-    if isinstance(content, str):
-        return content
-    if isinstance(content, list):
-        text_bits: List[str] = []
-        image_count = 0
-        for part in content:
-            if isinstance(part, str):
-                if part:
-                    text_bits.append(part)
-                continue
-            if not isinstance(part, dict):
-                continue
-            ptype = str(part.get("type") or "").strip().lower()
-            if ptype in {"text", "input_text", "output_text"}:
-                text = part.get("text")
-                if isinstance(text, str) and text:
-                    text_bits.append(text)
-            elif ptype in {"image_url", "input_image"}:
-                image_count += 1
-        summary = " ".join(text_bits).strip()
-        if image_count:
-            note = f"[{image_count} image{'s' if image_count != 1 else ''}]"
-            summary = f"{note} {summary}" if summary else note
-        return summary
-    try:
-        return str(content)
-    except Exception:
-        return ""
-
-
-# ---------------------------------------------------------------------------
-# ID helpers
-# ---------------------------------------------------------------------------
-
 def _deterministic_call_id(fn_name: str, arguments: str, index: int = 0) -> str:
    """Generate a deterministic call_id from tool call content.

@@ -172,17 +80,14 @@ def _derive_responses_function_call_id(
    return f"fc_{digest}"


-# ---------------------------------------------------------------------------
-# Schema conversion
-# ---------------------------------------------------------------------------
-
 def _responses_tools(tools: Optional[List[Dict[str, Any]]] = None) -> Optional[List[Dict[str, Any]]]:
    """Convert chat-completions tool schemas to Responses function-tool schemas."""
-    if not tools:
+    source_tools = tools
+    if not source_tools:
        return None

    converted: List[Dict[str, Any]] = []
-    for item in tools:
+    for item in source_tools:
        fn = item.get("function", {}) if isinstance(item, dict) else {}
        name = fn.get("name")
        if not isinstance(name, str) or not name.strip():
@@ -197,10 +102,6 @@ def _responses_tools(tools: Optional[List[Dict[str, Any]]] = None) -> Optional[L
    return converted or None


-# ---------------------------------------------------------------------------
-# Message format conversion
-# ---------------------------------------------------------------------------
-
 def _chat_messages_to_responses_input(messages: List[Dict[str, Any]]) -> List[Dict[str, Any]]:
    """Convert internal chat-style messages to Responses input items."""
    items: List[Dict[str, Any]] = []
@@ -215,14 +116,7 @@ def _chat_messages_to_responses_input(messages: List[Dict[str, Any]]) -> List[Di

        if role in {"user", "assistant"}:
            content = msg.get("content", "")
-            if isinstance(content, list):
-                content_parts = _chat_content_to_responses_parts(content)
-                content_text = "".join(
-                    p.get("text", "") for p in content_parts if p.get("type") == "input_text"
-                )
-            else:
-                content_parts = []
-                content_text = str(content) if content is not None else ""
+            content_text = str(content) if content is not None else ""

            if role == "assistant":
                # Replay encrypted reasoning items from previous turns
@@ -245,9 +139,7 @@ def _chat_messages_to_responses_input(messages: List[Dict[str, Any]]) -> List[Di
                                seen_item_ids.add(item_id)
                            has_codex_reasoning = True

-                if content_parts:
-                    items.append({"role": "assistant", "content": content_parts})
-                elif content_text.strip():
+                if content_text.strip():
                    items.append({"role": "assistant", "content": content_text})
                elif has_codex_reasoning:
                    # The Responses API requires a following item after each
@@ -300,12 +192,7 @@ def _chat_messages_to_responses_input(messages: List[Dict[str, Any]]) -> List[Di
                        })
                continue

-            # Non-assistant (user) role: emit multimodal parts when present,
-            # otherwise fall back to the text payload.
-            if content_parts:
-                items.append({"role": role, "content": content_parts})
-            else:
-                items.append({"role": role, "content": content_text})
+            items.append({"role": role, "content": content_text})
            continue

        if role == "tool":
@@ -325,10 +212,6 @@ def _chat_messages_to_responses_input(messages: List[Dict[str, Any]]) -> List[Di
    return items


-# ---------------------------------------------------------------------------
-# Input preflight / validation
-# ---------------------------------------------------------------------------
-
 def _preflight_codex_input_items(raw_items: Any) -> List[Dict[str, Any]]:
    if not isinstance(raw_items, list):
        raise ValueError("Codex Responses input must be a list of input items.")
@@ -410,46 +293,6 @@ def _preflight_codex_input_items(raw_items: Any) -> List[Dict[str, Any]]:
            content = item.get("content", "")
            if content is None:
                content = ""
-            if isinstance(content, list):
-                # Multimodal content from ``_chat_messages_to_responses_input``
-                # is already in Responses format (``input_text`` / ``input_image``).
-                # Validate each part and pass through.
-                validated: List[Dict[str, Any]] = []
-                for part_idx, part in enumerate(content):
-                    if isinstance(part, str):
-                        if part:
-                            validated.append({"type": "input_text", "text": part})
-                        continue
-                    if not isinstance(part, dict):
-                        raise ValueError(
-                            f"Codex Responses input[{idx}].content[{part_idx}] must be an object or string."
-                        )
-                    ptype = str(part.get("type") or "").strip().lower()
-                    if ptype in {"input_text", "text", "output_text"}:
-                        text = part.get("text", "")
-                        if not isinstance(text, str):
-                            text = str(text or "")
-                        validated.append({"type": "input_text", "text": text})
-                    elif ptype in {"input_image", "image_url"}:
-                        image_ref = part.get("image_url", "")
-                        detail = part.get("detail")
-                        if isinstance(image_ref, dict):
-                            url = image_ref.get("url", "")
-                            detail = image_ref.get("detail", detail)
-                        else:
-                            url = image_ref
-                        if not isinstance(url, str):
-                            url = str(url or "")
-                        image_part: Dict[str, Any] = {"type": "input_image", "image_url": url}
-                        if isinstance(detail, str) and detail.strip():
-                            image_part["detail"] = detail.strip()
-                        validated.append(image_part)
-                    else:
-                        raise ValueError(
-                            f"Codex Responses input[{idx}].content[{part_idx}] has unsupported type {part.get('type')!r}."
-                        )
-                normalized.append({"role": role, "content": validated})
-                continue
            if not isinstance(content, str):
                content = str(content)

@@ -606,10 +449,6 @@ def _preflight_codex_api_kwargs(
    return normalized


-# ---------------------------------------------------------------------------
-# Response extraction helpers
-# ---------------------------------------------------------------------------
-
 def _extract_responses_message_text(item: Any) -> str:
    """Extract assistant text from a Responses message output item."""
    content = getattr(item, "content", None)
@@ -644,10 +483,6 @@ def _extract_responses_reasoning_text(item: Any) -> str:
    return ""


-# ---------------------------------------------------------------------------
-# Full response normalization
-# ---------------------------------------------------------------------------
-
 def _normalize_codex_response(response: Any) -> tuple[Any, str]:
    """Normalize a Responses API object to an assistant_message-like object."""
    output = getattr(response, "output", None)
@@ -811,3 +646,5 @@ def _normalize_codex_response(response: Any) -> tuple[Any, str]:
    else:
        finish_reason = "stop"
    return assistant_message, finish_reason
+
+
@@ -31,7 +31,6 @@ from agent.model_metadata import (
    get_model_context_length,
    estimate_messages_tokens_rough,
 )
-from agent.redact import redact_sensitive_text

 logger = logging.getLogger(__name__)

@@ -64,47 +63,6 @@ _CHARS_PER_TOKEN = 4
 _SUMMARY_FAILURE_COOLDOWN_SECONDS = 600


-def _content_text_for_contains(content: Any) -> str:
-    """Return a best-effort text view of message content.
-
-    Used only for substring checks when we need to know whether we've already
-    appended a note to a message. Keeps multimodal lists intact elsewhere.
-    """
-    if content is None:
-        return ""
-    if isinstance(content, str):
-        return content
-    if isinstance(content, list):
-        parts: list[str] = []
-        for item in content:
-            if isinstance(item, str):
-                parts.append(item)
-            elif isinstance(item, dict):
-                text = item.get("text")
-                if isinstance(text, str):
-                    parts.append(text)
-        return "\n".join(part for part in parts if part)
-    return str(content)
-
-
-def _append_text_to_content(content: Any, text: str, *, prepend: bool = False) -> Any:
-    """Append or prepend plain text to message content safely.
-
-    Compression sometimes needs to add a note or merge a summary into an
-    existing message. Message content may be plain text or a multimodal list of
-    blocks, so direct string concatenation is not always safe.
-    """
-    if content is None:
-        return text
-    if isinstance(content, str):
-        return text + content if prepend else content + text
-    if isinstance(content, list):
-        text_block = {"type": "text", "text": text}
-        return [text_block, *content] if prepend else [*content, text_block]
-    rendered = str(content)
-    return text + rendered if prepend else rendered + text
-
-
 def _truncate_tool_call_args_json(args: str, head_chars: int = 200) -> str:
    """Shrink long string values inside a tool-call arguments JSON blob while
    preserving JSON validity.
@@ -592,15 +550,11 @@ class ContextCompressor(ContextEngine):
        Includes tool call arguments and result content (up to
        ``_CONTENT_MAX`` chars per message) so the summarizer can preserve
        specific details like file paths, commands, and outputs.
-
-        All content is redacted before serialization to prevent secrets
-        (API keys, tokens, passwords) from leaking into the summary that
-        gets sent to the auxiliary model and persisted across compactions.
        """
        parts = []
        for msg in turns:
            role = msg.get("role", "unknown")
-            content = redact_sensitive_text(msg.get("content") or "")
+            content = msg.get("content") or ""

            # Tool results: keep enough content for the summarizer
            if role == "tool":
@@ -621,7 +575,7 @@ class ContextCompressor(ContextEngine):
                        if isinstance(tc, dict):
                            fn = tc.get("function", {})
                            name = fn.get("name", "?")
-                            args = redact_sensitive_text(fn.get("arguments", ""))
+                            args = fn.get("arguments", "")
                            # Truncate long arguments but keep enough for context
                            if len(args) > self._TOOL_ARGS_MAX:
                                args = args[:self._TOOL_ARGS_HEAD] + "..."
@@ -681,11 +635,7 @@ class ContextCompressor(ContextEngine):
            "only output the structured summary. "
            "Do NOT include any preamble, greeting, or prefix. "
            "Write the summary in the same language the user was using in the "
-            "conversation — do not translate or switch to English. "
-            "NEVER include API keys, tokens, passwords, secrets, credentials, "
-            "or connection strings in the summary — replace any that appear "
-            "with [REDACTED]. Note that the user had credentials present, but "
-            "do not preserve their values."
+            "conversation — do not translate or switch to English."
        )

        # Shared structured template (used by both paths).
@@ -742,7 +692,7 @@ Be specific with file paths, commands, line numbers, and results.]
 [What remains to be done — framed as context, not instructions]

 ## Critical Context
-[Any specific values, error messages, configuration details, or data that would be lost without explicit preservation. NEVER include API keys, tokens, passwords, or credentials — write [REDACTED] instead.]
+[Any specific values, error messages, configuration details, or data that would be lost without explicit preservation]

 Target ~{summary_budget} tokens. Be CONCRETE — include file paths, command outputs, error messages, line numbers, and specific values. Avoid vague descriptions like "made some changes" — say exactly what changed.

@@ -782,7 +732,7 @@ Use this exact structure:
            prompt += f"""

 FOCUS TOPIC: "{focus_topic}"
-The user has requested that this compaction PRIORITISE preserving all information related to the focus topic above. For content related to "{focus_topic}", include full detail — exact values, file paths, command outputs, error messages, and decisions. For content NOT related to the focus topic, summarise more aggressively (brief one-liners or omit if truly irrelevant). The focus topic sections should receive roughly 60-70% of the summary token budget. Even for the focus topic, NEVER preserve API keys, tokens, passwords, or credentials — use [REDACTED]."""
+The user has requested that this compaction PRIORITISE preserving all information related to the focus topic above. For content related to "{focus_topic}", include full detail — exact values, file paths, command outputs, error messages, and decisions. For content NOT related to the focus topic, summarise more aggressively (brief one-liners or omit if truly irrelevant). The focus topic sections should receive roughly 60-70% of the summary token budget."""

        try:
            call_kwargs = {
@@ -805,9 +755,7 @@ The user has requested that this compaction PRIORITISE preserving all informatio
            # Handle cases where content is not a string (e.g., dict from llama.cpp)
            if not isinstance(content, str):
                content = str(content) if content else ""
-            # Redact the summary output as well — the summarizer LLM may
-            # ignore prompt instructions and echo back secrets verbatim.
-            summary = redact_sensitive_text(content.strip())
+            summary = content.strip()
            # Store for iterative updates on next compaction
            self._previous_summary = summary
            self._summary_failure_cooldown_until = 0.0
@@ -848,7 +796,7 @@ The user has requested that this compaction PRIORITISE preserving all informatio
                )
                self.summary_model = ""  # empty = use main model
                self._summary_failure_cooldown_until = 0.0  # no cooldown
-                return self._generate_summary(turns_to_summarize, focus_topic=focus_topic)  # retry immediately
+                return self._generate_summary(messages, summary_budget)  # retry immediately

            # Transient errors (timeout, rate limit, network) — shorter cooldown
            _transient_cooldown = 60
@@ -1185,13 +1133,10 @@ The user has requested that this compaction PRIORITISE preserving all informatio
        for i in range(compress_start):
            msg = messages[i].copy()
            if i == 0 and msg.get("role") == "system":
-                existing = msg.get("content")
+                existing = msg.get("content") or ""
                _compression_note = "[Note: Some earlier conversation turns have been compacted into a handoff summary to preserve context space. The current session state may still reflect earlier work, so build on that summary and state rather than re-doing work.]"
-                if _compression_note not in _content_text_for_contains(existing):
-                    msg["content"] = _append_text_to_content(
-                        existing,
-                        "\n\n" + _compression_note if isinstance(existing, str) and existing else _compression_note,
-                    )
+                if _compression_note not in existing:
+                    msg["content"] = existing + "\n\n" + _compression_note
            compressed.append(msg)

        # If LLM summary failed, insert a static fallback so the model
@@ -1235,15 +1180,12 @@ The user has requested that this compaction PRIORITISE preserving all informatio
        for i in range(compress_end, n_messages):
            msg = messages[i].copy()
            if _merge_summary_into_tail and i == compress_end:
-                merged_prefix = (
+                original = msg.get("content") or ""
+                msg["content"] = (
                    summary
                    + "\n\n--- END OF CONTEXT SUMMARY — "
                    "respond to the message below, not the summary above ---\n\n"
-                )
-                msg["content"] = _append_text_to_content(
-                    msg.get("content"),
-                    merged_prefix,
-                    prepend=True,
+                    + original
                )
                _merge_summary_into_tail = False
            compressed.append(msg)
@@ -21,9 +21,6 @@ from pathlib import Path
 from types import SimpleNamespace
 from typing import Any

-from agent.file_safety import get_read_block_error, is_write_denied
-from agent.redact import redact_sensitive_text
-
 ACP_MARKER_BASE_URL = "acp://copilot"
 _DEFAULT_TIMEOUT_SECONDS = 900.0

@@ -57,18 +54,6 @@ def _jsonrpc_error(message_id: Any, code: int, message: str) -> dict[str, Any]:
    }


-def _permission_denied(message_id: Any) -> dict[str, Any]:
-    return {
-        "jsonrpc": "2.0",
-        "id": message_id,
-        "result": {
-            "outcome": {
-                "outcome": "cancelled",
-            }
-        },
-    }
-
-
 def _format_messages_as_prompt(
    messages: list[dict[str, Any]],
    model: str | None = None,
@@ -401,8 +386,6 @@ class CopilotACPClient:
        stderr_tail: deque[str] = deque(maxlen=40)

        def _stdout_reader() -> None:
-            if proc.stdout is None:
-                return
            for line in proc.stdout:
                try:
                    inbox.put(json.loads(line))
@@ -550,13 +533,18 @@ class CopilotACPClient:
        params = msg.get("params") or {}

        if method == "session/request_permission":
-            response = _permission_denied(message_id)
+            response = {
+                "jsonrpc": "2.0",
+                "id": message_id,
+                "result": {
+                    "outcome": {
+                        "outcome": "allow_once",
+                    }
+                },
+            }
        elif method == "fs/read_text_file":
            try:
                path = _ensure_path_within_cwd(str(params.get("path") or ""), cwd)
-                block_error = get_read_block_error(str(path))
-                if block_error:
-                    raise PermissionError(block_error)
                content = path.read_text() if path.exists() else ""
                line = params.get("line")
                limit = params.get("limit")
@@ -565,8 +553,6 @@ class CopilotACPClient:
                    start = line - 1
                    end = start + limit if isinstance(limit, int) and limit > 0 else None
                    content = "".join(lines[start:end])
-                if content:
-                    content = redact_sensitive_text(content)
                response = {
                    "jsonrpc": "2.0",
                    "id": message_id,
@@ -579,10 +565,6 @@ class CopilotACPClient:
        elif method == "fs/write_text_file":
            try:
                path = _ensure_path_within_cwd(str(params.get("path") or ""), cwd)
-                if is_write_denied(str(path)):
-                    raise PermissionError(
-                        f"Write denied: '{path}' is a protected system/credential file."
-                    )
                path.parent.mkdir(parents=True, exist_ok=True)
                path.write_text(str(params.get("content") or ""))
                response = {
@@ -29,7 +29,6 @@ from hermes_cli.auth import (
    _save_auth_store,
    _save_provider_state,
    read_credential_pool,
-    read_provider_credentials,
    write_credential_pool,
 )

@@ -322,7 +321,7 @@ def get_custom_provider_pool_key(base_url: str) -> Optional[str]:

 def list_custom_pool_providers() -> List[str]:
    """Return all 'custom:*' pool keys that have entries in auth.json."""
-    pool_data = read_credential_pool()
+    pool_data = read_credential_pool(None)
    return sorted(
        key for key in pool_data
        if key.startswith(CUSTOM_POOL_PREFIX)
@@ -876,20 +875,6 @@ class CredentialPool:
            self._current_id = None
        return removed

-    def remove_entry(self, entry_id: str) -> Optional[PooledCredential]:
-        for idx, entry in enumerate(self._entries):
-            if entry.id == entry_id:
-                removed = self._entries.pop(idx)
-                self._entries = [
-                    replace(e, priority=new_priority)
-                    for new_priority, e in enumerate(self._entries)
-                ]
-                self._persist()
-                if self._current_id == removed.id:
-                    self._current_id = None
-                return removed
-        return None
-
    def resolve_target(self, target: Any) -> Tuple[Optional[int], Optional[PooledCredential], Optional[str]]:
        raw = str(target or "").strip()
        if not raw:
@@ -998,14 +983,6 @@ def _seed_from_singletons(provider: str, entries: List[PooledCredential]) -> Tup
    active_sources: Set[str] = set()
    auth_store = _load_auth_store()

-    # Shared suppression gate — used at every upsert site so
-    # `hermes auth remove <provider> <N>` is stable across all source types.
-    try:
-        from hermes_cli.auth import is_source_suppressed as _is_suppressed
-    except ImportError:
-        def _is_suppressed(_p, _s):  # type: ignore[misc]
-            return False
-
    if provider == "anthropic":
        # Only auto-discover external credentials (Claude Code, Hermes PKCE)
        # when the user has explicitly configured anthropic as their provider.
@@ -1025,8 +1002,13 @@ def _seed_from_singletons(provider: str, entries: List[PooledCredential]) -> Tup
            ("claude_code", read_claude_code_credentials()),
        ):
            if creds and creds.get("accessToken"):
-                if _is_suppressed(provider, source_name):
-                    continue
+                # Check if user explicitly removed this source
+                try:
+                    from hermes_cli.auth import is_source_suppressed
+                    if is_source_suppressed(provider, source_name):
+                        continue
+                except ImportError:
+                    pass
                active_sources.add(source_name)
                changed |= _upsert_entry(
                    entries,
@@ -1044,7 +1026,7 @@ def _seed_from_singletons(provider: str, entries: List[PooledCredential]) -> Tup

    elif provider == "nous":
        state = _load_provider_state(auth_store, "nous")
-        if state and not _is_suppressed(provider, "device_code"):
+        if state:
            active_sources.add("device_code")
            # Prefer a user-supplied label embedded in the singleton state
            # (set by persist_nous_credentials(label=...) when the user ran
@@ -1085,21 +1067,20 @@ def _seed_from_singletons(provider: str, entries: List[PooledCredential]) -> Tup
            token, source = resolve_copilot_token()
            if token:
                source_name = "gh_cli" if "gh" in source.lower() else f"env:{source}"
-                if not _is_suppressed(provider, source_name):
-                    active_sources.add(source_name)
-                    pconfig = PROVIDER_REGISTRY.get(provider)
-                    changed |= _upsert_entry(
-                        entries,
-                        provider,
-                        source_name,
-                        {
-                            "source": source_name,
-                            "auth_type": AUTH_TYPE_API_KEY,
-                            "access_token": token,
-                            "base_url": pconfig.inference_base_url if pconfig else "",
-                            "label": source,
-                        },
-                    )
+                active_sources.add(source_name)
+                pconfig = PROVIDER_REGISTRY.get(provider)
+                changed |= _upsert_entry(
+                    entries,
+                    provider,
+                    source_name,
+                    {
+                        "source": source_name,
+                        "auth_type": AUTH_TYPE_API_KEY,
+                        "access_token": token,
+                        "base_url": pconfig.inference_base_url if pconfig else "",
+                        "label": source,
+                    },
+                )
        except Exception as exc:
            logger.debug("Copilot token seed failed: %s", exc)

@@ -1115,21 +1096,20 @@ def _seed_from_singletons(provider: str, entries: List[PooledCredential]) -> Tup
            token = creds.get("api_key", "")
            if token:
                source_name = creds.get("source", "qwen-cli")
-                if not _is_suppressed(provider, source_name):
-                    active_sources.add(source_name)
-                    changed |= _upsert_entry(
-                        entries,
-                        provider,
-                        source_name,
-                        {
-                            "source": source_name,
-                            "auth_type": AUTH_TYPE_OAUTH,
-                            "access_token": token,
-                            "expires_at_ms": creds.get("expires_at_ms"),
-                            "base_url": creds.get("base_url", ""),
-                            "label": creds.get("auth_file", source_name),
-                        },
-                    )
+                active_sources.add(source_name)
+                changed |= _upsert_entry(
+                    entries,
+                    provider,
+                    source_name,
+                    {
+                        "source": source_name,
+                        "auth_type": AUTH_TYPE_OAUTH,
+                        "access_token": token,
+                        "expires_at_ms": creds.get("expires_at_ms"),
+                        "base_url": creds.get("base_url", ""),
+                        "label": creds.get("auth_file", source_name),
+                    },
+                )
        except Exception as exc:
            logger.debug("Qwen OAuth token seed failed: %s", exc)

@@ -1138,7 +1118,13 @@ def _seed_from_singletons(provider: str, entries: List[PooledCredential]) -> Tup
        # the device_code source as suppressed so it won't be re-seeded from
        # the Hermes auth store.  Without this gate the removal is instantly
        # undone on the next load_pool() call.
-        if _is_suppressed(provider, "device_code"):
+        codex_suppressed = False
+        try:
+            from hermes_cli.auth import is_source_suppressed
+            codex_suppressed = is_source_suppressed(provider, "device_code")
+        except ImportError:
+            pass
+        if codex_suppressed:
            return changed, active_sources

        state = _load_provider_state(auth_store, "openai-codex")
@@ -1172,22 +1158,10 @@ def _seed_from_singletons(provider: str, entries: List[PooledCredential]) -> Tup
 def _seed_from_env(provider: str, entries: List[PooledCredential]) -> Tuple[bool, Set[str]]:
    changed = False
    active_sources: Set[str] = set()
-    # Honour user suppression — `hermes auth remove <provider> <N>` for an
-    # env-seeded credential marks the env:<VAR> source as suppressed so it
-    # won't be re-seeded from the user's shell environment or ~/.hermes/.env.
-    # Without this gate the removal is silently undone on the next
-    # load_pool() call whenever the var is still exported by the shell.
-    try:
-        from hermes_cli.auth import is_source_suppressed as _is_source_suppressed
-    except ImportError:
-        def _is_source_suppressed(_p, _s):  # type: ignore[misc]
-            return False
    if provider == "openrouter":
        token = os.getenv("OPENROUTER_API_KEY", "").strip()
        if token:
            source = "env:OPENROUTER_API_KEY"
-            if _is_source_suppressed(provider, source):
-                return changed, active_sources
            active_sources.add(source)
            changed |= _upsert_entry(
                entries,
@@ -1224,8 +1198,6 @@ def _seed_from_env(provider: str, entries: List[PooledCredential]) -> Tuple[bool
        if not token:
            continue
        source = f"env:{env_var}"
-        if _is_source_suppressed(provider, source):
-            continue
        active_sources.add(source)
        auth_type = AUTH_TYPE_OAUTH if provider == "anthropic" and not token.startswith("sk-ant-api") else AUTH_TYPE_API_KEY
        base_url = env_url or pconfig.inference_base_url
@@ -1270,13 +1242,6 @@ def _seed_custom_pool(pool_key: str, entries: List[PooledCredential]) -> Tuple[b
    changed = False
    active_sources: Set[str] = set()

-    # Shared suppression gate — same pattern as _seed_from_env/_seed_from_singletons.
-    try:
-        from hermes_cli.auth import is_source_suppressed as _is_suppressed
-    except ImportError:
-        def _is_suppressed(_p, _s):  # type: ignore[misc]
-            return False
-
    # Seed from the custom_providers config entry's api_key field
    cp_config = _get_custom_provider_config(pool_key)
    if cp_config:
@@ -1285,20 +1250,19 @@ def _seed_custom_pool(pool_key: str, entries: List[PooledCredential]) -> Tuple[b
        name = str(cp_config.get("name") or "").strip()
        if api_key:
            source = f"config:{name}"
-            if not _is_suppressed(pool_key, source):
-                active_sources.add(source)
-                changed |= _upsert_entry(
-                    entries,
-                    pool_key,
-                    source,
-                    {
-                        "source": source,
-                        "auth_type": AUTH_TYPE_API_KEY,
-                        "access_token": api_key,
-                        "base_url": base_url,
-                        "label": name or source,
-                    },
-                )
+            active_sources.add(source)
+            changed |= _upsert_entry(
+                entries,
+                pool_key,
+                source,
+                {
+                    "source": source,
+                    "auth_type": AUTH_TYPE_API_KEY,
+                    "access_token": api_key,
+                    "base_url": base_url,
+                    "label": name or source,
+                },
+            )

    # Seed from model.api_key if model.provider=='custom' and model.base_url matches
    try:
@@ -1318,20 +1282,19 @@ def _seed_custom_pool(pool_key: str, entries: List[PooledCredential]) -> Tuple[b
                matched_key = get_custom_provider_pool_key(model_base_url)
                if matched_key == pool_key:
                    source = "model_config"
-                    if not _is_suppressed(pool_key, source):
-                        active_sources.add(source)
-                        changed |= _upsert_entry(
-                            entries,
-                            pool_key,
-                            source,
-                            {
-                                "source": source,
-                                "auth_type": AUTH_TYPE_API_KEY,
-                                "access_token": model_api_key,
-                                "base_url": model_base_url,
-                                "label": "model_config",
-                            },
-                        )
+                    active_sources.add(source)
+                    changed |= _upsert_entry(
+                        entries,
+                        pool_key,
+                        source,
+                        {
+                            "source": source,
+                            "auth_type": AUTH_TYPE_API_KEY,
+                            "access_token": model_api_key,
+                            "base_url": model_base_url,
+                            "label": "model_config",
+                        },
+                    )
    except Exception:
        pass

@@ -1340,7 +1303,7 @@ def _seed_custom_pool(pool_key: str, entries: List[PooledCredential]) -> Tuple[b

 def load_pool(provider: str) -> CredentialPool:
    provider = (provider or "").strip().lower()
-    raw_entries = read_provider_credentials(provider)
+    raw_entries = read_credential_pool(provider)
    entries = [PooledCredential.from_dict(provider, payload) for payload in raw_entries]

    if provider.startswith(CUSTOM_POOL_PREFIX):
@@ -1,401 +0,0 @@
-"""Unified removal contract for every credential source Hermes reads from.
-
-Hermes seeds its credential pool from many places:
-
-    env:<VAR>     — os.environ / ~/.hermes/.env
-    claude_code   — ~/.claude/.credentials.json
-    hermes_pkce   — ~/.hermes/.anthropic_oauth.json
-    device_code   — auth.json providers.<provider> (nous, openai-codex, ...)
-    qwen-cli      — ~/.qwen/oauth_creds.json
-    gh_cli        — gh auth token
-    config:<name> — custom_providers config entry
-    model_config  — model.api_key when model.provider == "custom"
-    manual        — user ran `hermes auth add`
-
-Each source has its own reader inside ``agent.credential_pool._seed_from_*``
-(which keep their existing shape — we haven't restructured them).  What we
-unify here is **removal**:
-
-    ``hermes auth remove <provider> <N>`` must make the pool entry stay gone.
-
-Before this module, every source had an ad-hoc removal branch in
-``auth_remove_command``, and several sources had no branch at all — so
-``auth remove`` silently reverted on the next ``load_pool()`` call for
-qwen-cli, nous device_code (partial), hermes_pkce, copilot gh_cli, and
-custom-config sources.
-
-Now every source registers a ``RemovalStep`` that does exactly three things
-in the same shape:
-
-    1. Clean up whatever externally-readable state the source reads from
-       (.env line, auth.json block, OAuth file, etc.)
-    2. Suppress the ``(provider, source_id)`` in auth.json so the
-       corresponding ``_seed_from_*`` branch skips the upsert on re-load
-    3. Return ``RemovalResult`` describing what was cleaned and any
-       diagnostic hints the user should see (shell-exported env vars,
-       external credential files we deliberately don't delete, etc.)
-
-Adding a new credential source is:
-    - wire up a reader branch in ``_seed_from_*`` (existing pattern)
-    - gate that reader behind ``is_source_suppressed(provider, source_id)``
-    - register a ``RemovalStep`` here
-
-No more per-source if/elif chain in ``auth_remove_command``.
-"""
-
-from __future__ import annotations
-
-import os
-from dataclasses import dataclass, field
-from pathlib import Path
-from typing import Callable, List, Optional
-
-
-@dataclass
-class RemovalResult:
-    """Outcome of removing a credential source.
-
-    Attributes:
-        cleaned: Short strings describing external state that was actually
-            mutated (``"Cleared XAI_API_KEY from .env"``,
-            ``"Cleared openai-codex OAuth tokens from auth store"``).
-            Printed as plain lines to the user.
-        hints: Diagnostic lines ABOUT state the user may need to clean up
-            themselves or is deliberately left intact (shell-exported env
-            var, Claude Code credential file we don't delete, etc.).
-            Printed as plain lines to the user.  Always non-destructive.
-        suppress: Whether to call ``suppress_credential_source`` after
-            cleanup so future ``load_pool`` calls skip this source.
-            Default True — almost every source needs this to stay sticky.
-            The only legitimate False is ``manual`` entries, which aren't
-            seeded from anywhere external.
-    """
-
-    cleaned: List[str] = field(default_factory=list)
-    hints: List[str] = field(default_factory=list)
-    suppress: bool = True
-
-
-@dataclass
-class RemovalStep:
-    """How to remove one specific credential source cleanly.
-
-    Attributes:
-        provider: Provider pool key (``"xai"``, ``"anthropic"``, ``"nous"``, ...).
-            Special value ``"*"`` means "matches any provider" — used for
-            sources like ``manual`` that aren't provider-specific.
-        source_id: Source identifier as it appears in
-            ``PooledCredential.source``.  May be a literal (``"claude_code"``)
-            or a prefix pattern matched via ``match_fn``.
-        match_fn: Optional predicate overriding literal ``source_id``
-            matching.  Gets the removed entry's source string.  Used for
-            ``env:*`` (any env-seeded key), ``config:*`` (any custom
-            pool), and ``manual:*`` (any manual-source variant).
-        remove_fn: ``(provider, removed_entry) -> RemovalResult``.  Does the
-            actual cleanup and returns what happened for the user.
-        description: One-line human-readable description for docs / tests.
-    """
-
-    provider: str
-    source_id: str
-    remove_fn: Callable[..., RemovalResult]
-    match_fn: Optional[Callable[[str], bool]] = None
-    description: str = ""
-
-    def matches(self, provider: str, source: str) -> bool:
-        if self.provider != "*" and self.provider != provider:
-            return False
-        if self.match_fn is not None:
-            return self.match_fn(source)
-        return source == self.source_id
-
-
-_REGISTRY: List[RemovalStep] = []
-
-
-def register(step: RemovalStep) -> RemovalStep:
-    _REGISTRY.append(step)
-    return step
-
-
-def find_removal_step(provider: str, source: str) -> Optional[RemovalStep]:
-    """Return the first matching RemovalStep, or None if unregistered.
-
-    Unregistered sources fall through to the default remove path in
-    ``auth_remove_command``: the pool entry is already gone (that happens
-    before dispatch), no external cleanup, no suppression.  This is the
-    correct behaviour for ``manual`` entries — they were only ever stored
-    in the pool, nothing external to clean up.
-    """
-    for step in _REGISTRY:
-        if step.matches(provider, source):
-            return step
-    return None
-
-
-# ---------------------------------------------------------------------------
-# Individual RemovalStep implementations — one per source.
-# ---------------------------------------------------------------------------
-# Each remove_fn is intentionally small and single-purpose.  Adding a new
-# credential source means adding ONE entry here — no other changes to
-# auth_remove_command.
-
-
-def _remove_env_source(provider: str, removed) -> RemovalResult:
-    """env:<VAR> — the most common case.
-
-    Handles three user situations:
-      1. Var lives only in ~/.hermes/.env  → clear it
-      2. Var lives only in the user's shell (shell profile, systemd
-         EnvironmentFile, launchd plist) → hint them where to unset it
-      3. Var lives in both → clear from .env, hint about shell
-    """
-    from hermes_cli.config import get_env_path, remove_env_value
-
-    result = RemovalResult()
-    env_var = removed.source[len("env:"):]
-    if not env_var:
-        return result
-
-    # Detect shell vs .env BEFORE remove_env_value pops os.environ.
-    env_in_process = bool(os.getenv(env_var))
-    env_in_dotenv = False
-    try:
-        env_path = get_env_path()
-        if env_path.exists():
-            env_in_dotenv = any(
-                line.strip().startswith(f"{env_var}=")
-                for line in env_path.read_text(errors="replace").splitlines()
-            )
-    except OSError:
-        pass
-    shell_exported = env_in_process and not env_in_dotenv
-
-    cleared = remove_env_value(env_var)
-    if cleared:
-        result.cleaned.append(f"Cleared {env_var} from .env")
-
-    if shell_exported:
-        result.hints.extend([
-            f"Note: {env_var} is still set in your shell environment "
-            f"(not in ~/.hermes/.env).",
-            "  Unset it there (shell profile, systemd EnvironmentFile, "
-            "launchd plist, etc.) or it will keep being visible to Hermes.",
-            f"  The pool entry is now suppressed — Hermes will ignore "
-            f"{env_var} until you run `hermes auth add {provider}`.",
-        ])
-    else:
-        result.hints.append(
-            f"Suppressed env:{env_var} — it will not be re-seeded even "
-            f"if the variable is re-exported later."
-        )
-    return result
-
-
-def _remove_claude_code(provider: str, removed) -> RemovalResult:
-    """~/.claude/.credentials.json is owned by Claude Code itself.
-
-    We don't delete it — the user's Claude Code install still needs to
-    work.  We just suppress it so Hermes stops reading it.
-    """
-    return RemovalResult(hints=[
-        "Suppressed claude_code credential — it will not be re-seeded.",
-        "Note: Claude Code credentials still live in ~/.claude/.credentials.json",
-        "Run `hermes auth add anthropic` to re-enable if needed.",
-    ])
-
-
-def _remove_hermes_pkce(provider: str, removed) -> RemovalResult:
-    """~/.hermes/.anthropic_oauth.json is ours — delete it outright."""
-    from hermes_constants import get_hermes_home
-
-    result = RemovalResult()
-    oauth_file = get_hermes_home() / ".anthropic_oauth.json"
-    if oauth_file.exists():
-        try:
-            oauth_file.unlink()
-            result.cleaned.append("Cleared Hermes Anthropic OAuth credentials")
-        except OSError as exc:
-            result.hints.append(f"Could not delete {oauth_file}: {exc}")
-    return result
-
-
-def _clear_auth_store_provider(provider: str) -> bool:
-    """Delete auth_store.providers[provider].  Returns True if deleted."""
-    from hermes_cli.auth import (
-        _auth_store_lock,
-        _load_auth_store,
-        _save_auth_store,
-    )
-
-    with _auth_store_lock():
-        auth_store = _load_auth_store()
-        providers_dict = auth_store.get("providers")
-        if isinstance(providers_dict, dict) and provider in providers_dict:
-            del providers_dict[provider]
-            _save_auth_store(auth_store)
-            return True
-    return False
-
-
-def _remove_nous_device_code(provider: str, removed) -> RemovalResult:
-    """Nous OAuth lives in auth.json providers.nous — clear it and suppress.
-
-    We suppress in addition to clearing because nothing else stops the
-    user's next `hermes login` run from writing providers.nous again
-    before they decide to.  Suppression forces them to go through
-    `hermes auth add nous` to re-engage, which is the documented re-add
-    path and clears the suppression atomically.
-    """
-    result = RemovalResult()
-    if _clear_auth_store_provider(provider):
-        result.cleaned.append(f"Cleared {provider} OAuth tokens from auth store")
-    return result
-
-
-def _remove_codex_device_code(provider: str, removed) -> RemovalResult:
-    """Codex tokens live in TWO places: our auth store AND ~/.codex/auth.json.
-
-    refresh_codex_oauth_pure() writes both every time, so clearing only
-    the Hermes auth store is not enough — _seed_from_singletons() would
-    re-import from ~/.codex/auth.json on the next load_pool() call and
-    the removal would be instantly undone.  We suppress instead of
-    deleting Codex CLI's file, so the Codex CLI itself keeps working.
-
-    The canonical source name in ``_seed_from_singletons`` is
-    ``"device_code"`` (no prefix).  Entries may show up in the pool as
-    either ``"device_code"`` (seeded) or ``"manual:device_code"`` (added
-    via ``hermes auth add openai-codex``), but in both cases the re-seed
-    gate lives at the ``"device_code"`` suppression key.  We suppress
-    that canonical key here; the central dispatcher also suppresses
-    ``removed.source`` which is fine — belt-and-suspenders, idempotent.
-    """
-    from hermes_cli.auth import suppress_credential_source
-
-    result = RemovalResult()
-    if _clear_auth_store_provider(provider):
-        result.cleaned.append(f"Cleared {provider} OAuth tokens from auth store")
-    # Suppress the canonical re-seed source, not just whatever source the
-    # removed entry had.  Otherwise `manual:device_code` removals wouldn't
-    # block the `device_code` re-seed path.
-    suppress_credential_source(provider, "device_code")
-    result.hints.extend([
-        "Suppressed openai-codex device_code source — it will not be re-seeded.",
-        "Note: Codex CLI credentials still live in ~/.codex/auth.json",
-        "Run `hermes auth add openai-codex` to re-enable if needed.",
-    ])
-    return result
-
-
-def _remove_qwen_cli(provider: str, removed) -> RemovalResult:
-    """~/.qwen/oauth_creds.json is owned by the Qwen CLI.
-
-    Same pattern as claude_code — suppress, don't delete.  The user's
-    Qwen CLI install still reads from that file.
-    """
-    return RemovalResult(hints=[
-        "Suppressed qwen-cli credential — it will not be re-seeded.",
-        "Note: Qwen CLI credentials still live in ~/.qwen/oauth_creds.json",
-        "Run `hermes auth add qwen-oauth` to re-enable if needed.",
-    ])
-
-
-def _remove_copilot_gh(provider: str, removed) -> RemovalResult:
-    """Copilot token comes from `gh auth token` or COPILOT_GITHUB_TOKEN / GH_TOKEN / GITHUB_TOKEN.
-
-    Copilot is special: the same token can be seeded as multiple source
-    entries (gh_cli from ``_seed_from_singletons`` plus env:<VAR> from
-    ``_seed_from_env``), so removing one entry without suppressing the
-    others lets the duplicates resurrect.  We suppress ALL known copilot
-    sources here so removal is stable regardless of which entry the
-    user clicked.
-
-    We don't touch the user's gh CLI or shell state — just suppress so
-    Hermes stops picking the token up.
-    """
-    # Suppress ALL copilot source variants up-front so no path resurrects
-    # the pool entry.  The central dispatcher in auth_remove_command will
-    # ALSO suppress removed.source, but it's idempotent so double-calling
-    # is harmless.
-    from hermes_cli.auth import suppress_credential_source
-    suppress_credential_source(provider, "gh_cli")
-    for env_var in ("COPILOT_GITHUB_TOKEN", "GH_TOKEN", "GITHUB_TOKEN"):
-        suppress_credential_source(provider, f"env:{env_var}")
-
-    return RemovalResult(hints=[
-        "Suppressed all copilot token sources (gh_cli + env vars) — they will not be re-seeded.",
-        "Note: Your gh CLI / shell environment is unchanged.",
-        "Run `hermes auth add copilot` to re-enable if needed.",
-    ])
-
-
-def _remove_custom_config(provider: str, removed) -> RemovalResult:
-    """Custom provider pools are seeded from custom_providers config or
-    model.api_key.  Both are in config.yaml — modifying that from here
-    is more invasive than suppression.  We suppress; the user can edit
-    config.yaml if they want to remove the key from disk entirely.
-    """
-    source_label = removed.source
-    return RemovalResult(hints=[
-        f"Suppressed {source_label} — it will not be re-seeded.",
-        "Note: The underlying value in config.yaml is unchanged.  Edit it "
-        "directly if you want to remove the credential from disk.",
-    ])
-
-
-def _register_all_sources() -> None:
-    """Called once on module import.
-
-    ORDER MATTERS — ``find_removal_step`` returns the first match.  Put
-    provider-specific steps before the generic ``env:*`` step so that e.g.
-    copilot's ``env:GH_TOKEN`` goes through the copilot removal (which
-    doesn't touch the user's shell), not the generic env-var removal
-    (which would try to clear .env).
-    """
-    register(RemovalStep(
-        provider="copilot", source_id="gh_cli",
-        match_fn=lambda src: src == "gh_cli" or src.startswith("env:"),
-        remove_fn=_remove_copilot_gh,
-        description="gh auth token / COPILOT_GITHUB_TOKEN / GH_TOKEN",
-    ))
-    register(RemovalStep(
-        provider="*", source_id="env:",
-        match_fn=lambda src: src.startswith("env:"),
-        remove_fn=_remove_env_source,
-        description="Any env-seeded credential (XAI_API_KEY, DEEPSEEK_API_KEY, etc.)",
-    ))
-    register(RemovalStep(
-        provider="anthropic", source_id="claude_code",
-        remove_fn=_remove_claude_code,
-        description="~/.claude/.credentials.json",
-    ))
-    register(RemovalStep(
-        provider="anthropic", source_id="hermes_pkce",
-        remove_fn=_remove_hermes_pkce,
-        description="~/.hermes/.anthropic_oauth.json",
-    ))
-    register(RemovalStep(
-        provider="nous", source_id="device_code",
-        remove_fn=_remove_nous_device_code,
-        description="auth.json providers.nous",
-    ))
-    register(RemovalStep(
-        provider="openai-codex", source_id="device_code",
-        match_fn=lambda src: src == "device_code" or src.endswith(":device_code"),
-        remove_fn=_remove_codex_device_code,
-        description="auth.json providers.openai-codex + ~/.codex/auth.json",
-    ))
-    register(RemovalStep(
-        provider="qwen-oauth", source_id="qwen-cli",
-        remove_fn=_remove_qwen_cli,
-        description="~/.qwen/oauth_creds.json",
-    ))
-    register(RemovalStep(
-        provider="*", source_id="config:",
-        match_fn=lambda src: src.startswith("config:") or src == "model_config",
-        remove_fn=_remove_custom_config,
-        description="Custom provider config.yaml api_key field",
-    ))
-
-
-_register_all_sources()
@@ -729,7 +729,6 @@ class KawaiiSpinner:
                time.sleep(0.1)
                continue
            frame = self.spinner_frames[self.frame_idx % len(self.spinner_frames)]
-            assert self.start_time is not None  # start() sets it before thread starts
            elapsed = time.time() - self.start_time
            if wings:
                left, right = wings[self.frame_idx % len(wings)]
@@ -220,25 +220,12 @@ _TRANSPORT_ERROR_TYPES = frozenset({
    "ConnectionAbortedError", "BrokenPipeError",
    "TimeoutError", "ReadError",
    "ServerDisconnectedError",
-    # SSL/TLS transport errors — transient mid-stream handshake/record
-    # failures that should retry rather than surface as a stalled session.
-    # ssl.SSLError subclasses OSError (caught by isinstance) but we list
-    # the type names here so provider-wrapped SSL errors (e.g. when the
-    # SDK re-raises without preserving the exception chain) still classify
-    # as transport rather than falling through to the unknown bucket.
-    "SSLError", "SSLZeroReturnError", "SSLWantReadError",
-    "SSLWantWriteError", "SSLEOFError", "SSLSyscallError",
    # OpenAI SDK errors (not subclasses of Python builtins)
    "APIConnectionError",
    "APITimeoutError",
 })

-# Server disconnect patterns (no status code, but transport-level).
-# These are the "ambiguous" patterns — a plain connection close could be
-# transient transport hiccup OR server-side context overflow rejection
-# (common when the API gateway disconnects instead of returning an HTTP
-# error for oversized requests).  A large session + one of these patterns
-# triggers the context-overflow-with-compression recovery path.
+# Server disconnect patterns (no status code, but transport-level)
 _SERVER_DISCONNECT_PATTERNS = [
    "server disconnected",
    "peer closed connection",
@@ -249,40 +236,6 @@ _SERVER_DISCONNECT_PATTERNS = [
    "incomplete chunked read",
 ]

-# SSL/TLS transient failure patterns — intentionally distinct from
-# _SERVER_DISCONNECT_PATTERNS above.
-#
-# An SSL alert mid-stream is almost always a transport-layer hiccup
-# (flaky network, mid-session TLS renegotiation failure, load balancer
-# dropping the connection) — NOT a server-side context overflow signal.
-# So we want the retry path but NOT the compression path; lumping these
-# into _SERVER_DISCONNECT_PATTERNS would trigger unnecessary (and
-# expensive) context compression on any large-session SSL hiccup.
-#
-# The OpenSSL library constructs error codes by prepending a format string
-# to the uppercased alert reason; OpenSSL 3.x changed the separator
-# (e.g. `SSLV3_ALERT_BAD_RECORD_MAC` → `SSL/TLS_ALERT_BAD_RECORD_MAC`),
-# which silently stopped matching anything explicit.  Matching on the
-# stable substrings (`bad record mac`, `ssl alert`, `tls alert`, etc.)
-# survives future OpenSSL format churn without code changes.
-_SSL_TRANSIENT_PATTERNS = [
-    # Space-separated (human-readable form, Python ssl module, most SDKs)
-    "bad record mac",
-    "ssl alert",
-    "tls alert",
-    "ssl handshake failure",
-    "tlsv1 alert",
-    "sslv3 alert",
-    # Underscore-separated (OpenSSL error code tokens, e.g.
-    # `ERR_SSL_SSL/TLS_ALERT_BAD_RECORD_MAC`, `SSLV3_ALERT_BAD_RECORD_MAC`)
-    "bad_record_mac",
-    "ssl_alert",
-    "tls_alert",
-    "tls_alert_internal_error",
-    # Python ssl module prefix, e.g. "[SSL: BAD_RECORD_MAC]"
-    "[ssl:",
-]
-

 # ── Classification pipeline ─────────────────────────────────────────────

@@ -302,10 +255,9 @@ def classify_api_error(
      2. HTTP status code + message-aware refinement
      3. Error code classification (from body)
      4. Message pattern matching (billing vs rate_limit vs context vs auth)
-      5. SSL/TLS transient alert patterns → retry as timeout
+      5. Transport error heuristics
      6. Server disconnect + large session → context overflow
-      7. Transport error heuristics
-      8. Fallback: unknown (retryable with backoff)
+      7. Fallback: unknown (retryable with backoff)

    Args:
        error: The exception from the API call.
@@ -436,18 +388,7 @@ def classify_api_error(
    if classified is not None:
        return classified

-    # ── 5. SSL/TLS transient errors → retry as timeout (not compression) ──
-    # SSL alerts mid-stream are transport hiccups, not server-side context
-    # overflow signals.  Classify before the disconnect check so a large
-    # session doesn't incorrectly trigger context compression when the real
-    # cause is a flaky TLS handshake.  Also matches when the error is
-    # wrapped in a generic exception whose message string carries the SSL
-    # alert text but the type isn't ssl.SSLError (happens with some SDKs
-    # that re-raise without chaining).
-    if any(p in error_msg for p in _SSL_TRANSIENT_PATTERNS):
-        return _result(FailoverReason.timeout, retryable=True)
-
-    # ── 6. Server disconnect + large session → context overflow ─────
+    # ── 5. Server disconnect + large session → context overflow ─────
    # Must come BEFORE generic transport error catch — a disconnect on
    # a large session is more likely context overflow than a transient
    # transport hiccup.  Without this ordering, RemoteProtocolError
@@ -464,12 +405,12 @@ def classify_api_error(
            )
        return _result(FailoverReason.timeout, retryable=True)

-    # ── 7. Transport / timeout heuristics ───────────────────────────
+    # ── 6. Transport / timeout heuristics ───────────────────────────

    if error_type in _TRANSPORT_ERROR_TYPES or isinstance(error, (TimeoutError, ConnectionError, OSError)):
        return _result(FailoverReason.timeout, retryable=True)

-    # ── 8. Fallback: unknown ────────────────────────────────────────
+    # ── 7. Fallback: unknown ────────────────────────────────────────

    return _result(FailoverReason.unknown, retryable=True)

@@ -529,16 +470,11 @@ def _classify_by_status(
                retryable=False,
                should_fallback=True,
            )
-        # Generic 404 with no "model not found" signal — could be a wrong
-        # endpoint path (common with local llama.cpp / Ollama / vLLM when
-        # the URL is slightly misconfigured), a proxy routing glitch, or
-        # a transient backend issue.  Classifying these as model_not_found
-        # silently falls back to a different provider and tells the model
-        # the model is missing, which is wrong and wastes a turn.  Treat
-        # as unknown so the retry loop surfaces the real error instead.
+        # Generic 404 — could be model or endpoint
        return result_fn(
-            FailoverReason.unknown,
-            retryable=True,
+            FailoverReason.model_not_found,
+            retryable=False,
+            should_fallback=True,
        )

    if status_code == 413:
@@ -1,111 +0,0 @@
-"""Shared file safety rules used by both tools and ACP shims."""
-
-from __future__ import annotations
-
-import os
-from pathlib import Path
-from typing import Optional
-
-
-def _hermes_home_path() -> Path:
-    """Resolve the active HERMES_HOME (profile-aware) without circular imports."""
-    try:
-        from hermes_constants import get_hermes_home  # local import to avoid cycles
-        return get_hermes_home()
-    except Exception:
-        return Path(os.path.expanduser("~/.hermes"))
-
-
-def build_write_denied_paths(home: str) -> set[str]:
-    """Return exact sensitive paths that must never be written."""
-    hermes_home = _hermes_home_path()
-    return {
-        os.path.realpath(p)
-        for p in [
-            os.path.join(home, ".ssh", "authorized_keys"),
-            os.path.join(home, ".ssh", "id_rsa"),
-            os.path.join(home, ".ssh", "id_ed25519"),
-            os.path.join(home, ".ssh", "config"),
-            str(hermes_home / ".env"),
-            os.path.join(home, ".bashrc"),
-            os.path.join(home, ".zshrc"),
-            os.path.join(home, ".profile"),
-            os.path.join(home, ".bash_profile"),
-            os.path.join(home, ".zprofile"),
-            os.path.join(home, ".netrc"),
-            os.path.join(home, ".pgpass"),
-            os.path.join(home, ".npmrc"),
-            os.path.join(home, ".pypirc"),
-            "/etc/sudoers",
-            "/etc/passwd",
-            "/etc/shadow",
-        ]
-    }
-
-
-def build_write_denied_prefixes(home: str) -> list[str]:
-    """Return sensitive directory prefixes that must never be written."""
-    return [
-        os.path.realpath(p) + os.sep
-        for p in [
-            os.path.join(home, ".ssh"),
-            os.path.join(home, ".aws"),
-            os.path.join(home, ".gnupg"),
-            os.path.join(home, ".kube"),
-            "/etc/sudoers.d",
-            "/etc/systemd",
-            os.path.join(home, ".docker"),
-            os.path.join(home, ".azure"),
-            os.path.join(home, ".config", "gh"),
-        ]
-    ]
-
-
-def get_safe_write_root() -> Optional[str]:
-    """Return the resolved HERMES_WRITE_SAFE_ROOT path, or None if unset."""
-    root = os.getenv("HERMES_WRITE_SAFE_ROOT", "")
-    if not root:
-        return None
-    try:
-        return os.path.realpath(os.path.expanduser(root))
-    except Exception:
-        return None
-
-
-def is_write_denied(path: str) -> bool:
-    """Return True if path is blocked by the write denylist or safe root."""
-    home = os.path.realpath(os.path.expanduser("~"))
-    resolved = os.path.realpath(os.path.expanduser(str(path)))
-
-    if resolved in build_write_denied_paths(home):
-        return True
-    for prefix in build_write_denied_prefixes(home):
-        if resolved.startswith(prefix):
-            return True
-
-    safe_root = get_safe_write_root()
-    if safe_root and not (resolved == safe_root or resolved.startswith(safe_root + os.sep)):
-        return True
-
-    return False
-
-
-def get_read_block_error(path: str) -> Optional[str]:
-    """Return an error message when a read targets internal Hermes cache files."""
-    resolved = Path(path).expanduser().resolve()
-    hermes_home = _hermes_home_path().resolve()
-    blocked_dirs = [
-        hermes_home / "skills" / ".hub" / "index-cache",
-        hermes_home / "skills" / ".hub",
-    ]
-    for blocked in blocked_dirs:
-        try:
-            resolved.relative_to(blocked)
-        except ValueError:
-            continue
-        return (
-            f"Access denied: {path} is an internal Hermes cache file "
-            "and cannot be read directly to prevent prompt injection. "
-            "Use the skills_list or skill_view tools instead."
-        )
-    return None
@@ -799,8 +799,7 @@ def _gemini_http_error(response: httpx.Response) -> CodeAssistError:
        err_obj = {}
    err_status = str(err_obj.get("status") or "").strip()
    err_message = str(err_obj.get("message") or "").strip()
-    _raw_details = err_obj.get("details")
-    err_details_list = _raw_details if isinstance(_raw_details, list) else []
+    err_details_list = err_obj.get("details") if isinstance(err_obj.get("details"), list) else []

    # Extract google.rpc.ErrorInfo reason + metadata.  There may be more
    # than one ErrorInfo (rare), so we pick the first one with a reason.
@@ -613,8 +613,7 @@ def gemini_http_error(response: httpx.Response) -> GeminiAPIError:
        err_obj = {}
    err_status = str(err_obj.get("status") or "").strip()
    err_message = str(err_obj.get("message") or "").strip()
-    _raw_details = err_obj.get("details")
-    details_list = _raw_details if isinstance(_raw_details, list) else []
+    details_list = err_obj.get("details") if isinstance(err_obj.get("details"), list) else []

    reason = ""
    retry_after: Optional[float] = None
@@ -1,242 +0,0 @@
-"""
-Image Generation Provider ABC
-=============================
-
-Defines the pluggable-backend interface for image generation. Providers register
-instances via ``PluginContext.register_image_gen_provider()``; the active one
-(selected via ``image_gen.provider`` in ``config.yaml``) services every
-``image_generate`` tool call.
-
-Providers live in ``<repo>/plugins/image_gen/<name>/`` (built-in, auto-loaded
-as ``kind: backend``) or ``~/.hermes/plugins/image_gen/<name>/`` (user, opt-in
-via ``plugins.enabled``).
-
-Response shape
--------------
-All providers return a dict that :func:`success_response` / :func:`error_response`
-produce. The tool wrapper JSON-serializes it. Keys:
-
-    success        bool
-    image          str | None       URL or absolute file path
-    model          str              provider-specific model identifier
-    prompt         str              echoed prompt
-    aspect_ratio   str              "landscape" | "square" | "portrait"
-    provider       str              provider name (for diagnostics)
-    error          str              only when success=False
-    error_type     str              only when success=False
-"""
-
-from __future__ import annotations
-
-import abc
-import base64
-import datetime
-import logging
-import uuid
-from pathlib import Path
-from typing import Any, Dict, List, Optional, Tuple
-
-logger = logging.getLogger(__name__)
-
-
-VALID_ASPECT_RATIOS: Tuple[str, ...] = ("landscape", "square", "portrait")
-DEFAULT_ASPECT_RATIO = "landscape"
-
-
-# ---------------------------------------------------------------------------
-# ABC
-# ---------------------------------------------------------------------------
-
-
-class ImageGenProvider(abc.ABC):
-    """Abstract base class for an image generation backend.
-
-    Subclasses must implement :meth:`generate`. Everything else has sane
-    defaults — override only what your provider needs.
-    """
-
-    @property
-    @abc.abstractmethod
-    def name(self) -> str:
-        """Stable short identifier used in ``image_gen.provider`` config.
-
-        Lowercase, no spaces. Examples: ``fal``, ``openai``, ``replicate``.
-        """
-
-    @property
-    def display_name(self) -> str:
-        """Human-readable label shown in ``hermes tools``. Defaults to ``name.title()``."""
-        return self.name.title()
-
-    def is_available(self) -> bool:
-        """Return True when this provider can service calls.
-
-        Typically checks for a required API key. Default: True
-        (providers with no external dependencies are always available).
-        """
-        return True
-
-    def list_models(self) -> List[Dict[str, Any]]:
-        """Return catalog entries for ``hermes tools`` model picker.
-
-        Each entry::
-
-            {
-                "id": "gpt-image-1.5",               # required
-                "display": "GPT Image 1.5",          # optional; defaults to id
-                "speed": "~10s",                     # optional
-                "strengths": "...",                  # optional
-                "price": "$...",                     # optional
-            }
-
-        Default: empty list (provider has no user-selectable models).
-        """
-        return []
-
-    def get_setup_schema(self) -> Dict[str, Any]:
-        """Return provider metadata for the ``hermes tools`` picker.
-
-        Used by ``tools_config.py`` to inject this provider as a row in
-        the Image Generation provider list. Shape::
-
-            {
-                "name": "OpenAI",                     # picker label
-                "badge": "paid",                      # optional short tag
-                "tag": "One-line description...",     # optional subtitle
-                "env_vars": [                         # keys to prompt for
-                    {"key": "OPENAI_API_KEY",
-                     "prompt": "OpenAI API key",
-                     "url": "https://platform.openai.com/api-keys"},
-                ],
-            }
-
-        Default: minimal entry derived from ``display_name``. Override to
-        expose API key prompts and custom badges.
-        """
-        return {
-            "name": self.display_name,
-            "badge": "",
-            "tag": "",
-            "env_vars": [],
-        }
-
-    def default_model(self) -> Optional[str]:
-        """Return the default model id, or None if not applicable."""
-        models = self.list_models()
-        if models:
-            return models[0].get("id")
-        return None
-
-    @abc.abstractmethod
-    def generate(
-        self,
-        prompt: str,
-        aspect_ratio: str = DEFAULT_ASPECT_RATIO,
-        **kwargs: Any,
-    ) -> Dict[str, Any]:
-        """Generate an image.
-
-        Implementations should return the dict from :func:`success_response`
-        or :func:`error_response`. ``kwargs`` may contain forward-compat
-        parameters future versions of the schema will expose — implementations
-        should ignore unknown keys.
-        """
-
-
-# ---------------------------------------------------------------------------
-# Helpers
-# ---------------------------------------------------------------------------
-
-
-def resolve_aspect_ratio(value: Optional[str]) -> str:
-    """Clamp an aspect_ratio value to the valid set, defaulting to landscape.
-
-    Invalid values are coerced rather than rejected so the tool surface is
-    forgiving of agent mistakes.
-    """
-    if not isinstance(value, str):
-        return DEFAULT_ASPECT_RATIO
-    v = value.strip().lower()
-    if v in VALID_ASPECT_RATIOS:
-        return v
-    return DEFAULT_ASPECT_RATIO
-
-
-def _images_cache_dir() -> Path:
-    """Return ``$HERMES_HOME/cache/images/``, creating parents as needed."""
-    from hermes_constants import get_hermes_home
-
-    path = get_hermes_home() / "cache" / "images"
-    path.mkdir(parents=True, exist_ok=True)
-    return path
-
-
-def save_b64_image(
-    b64_data: str,
-    *,
-    prefix: str = "image",
-    extension: str = "png",
-) -> Path:
-    """Decode base64 image data and write it under ``$HERMES_HOME/cache/images/``.
-
-    Returns the absolute :class:`Path` to the saved file.
-
-    Filename format: ``<prefix>_<YYYYMMDD_HHMMSS>_<short-uuid>.<ext>``.
-    """
-    raw = base64.b64decode(b64_data)
-    ts = datetime.datetime.now().strftime("%Y%m%d_%H%M%S")
-    short = uuid.uuid4().hex[:8]
-    path = _images_cache_dir() / f"{prefix}_{ts}_{short}.{extension}"
-    path.write_bytes(raw)
-    return path
-
-
-def success_response(
-    *,
-    image: str,
-    model: str,
-    prompt: str,
-    aspect_ratio: str,
-    provider: str,
-    extra: Optional[Dict[str, Any]] = None,
-) -> Dict[str, Any]:
-    """Build a uniform success response dict.
-
-    ``image`` may be an HTTP URL or an absolute filesystem path (for b64
-    providers like OpenAI). Callers that need to pass through additional
-    backend-specific fields can supply ``extra``.
-    """
-    payload: Dict[str, Any] = {
-        "success": True,
-        "image": image,
-        "model": model,
-        "prompt": prompt,
-        "aspect_ratio": aspect_ratio,
-        "provider": provider,
-    }
-    if extra:
-        for k, v in extra.items():
-            payload.setdefault(k, v)
-    return payload
-
-
-def error_response(
-    *,
-    error: str,
-    error_type: str = "provider_error",
-    provider: str = "",
-    model: str = "",
-    prompt: str = "",
-    aspect_ratio: str = DEFAULT_ASPECT_RATIO,
-) -> Dict[str, Any]:
-    """Build a uniform error response dict."""
-    return {
-        "success": False,
-        "image": None,
-        "error": error,
-        "error_type": error_type,
-        "model": model,
-        "prompt": prompt,
-        "aspect_ratio": aspect_ratio,
-        "provider": provider,
-    }
@@ -1,120 +0,0 @@
-"""
-Image Generation Provider Registry
-==================================
-
-Central map of registered providers. Populated by plugins at import-time via
-``PluginContext.register_image_gen_provider()``; consumed by the
-``image_generate`` tool to dispatch each call to the active backend.
-
-Active selection
----------------
-The active provider is chosen by ``image_gen.provider`` in ``config.yaml``.
-If unset, :func:`get_active_provider` applies fallback logic:
-
-1. If exactly one provider is registered, use it.
-2. Otherwise if a provider named ``fal`` is registered, use it (legacy
-   default — matches pre-plugin behavior).
-3. Otherwise return ``None`` (the tool surfaces a helpful error pointing
-   the user at ``hermes tools``).
-"""
-
-from __future__ import annotations
-
-import logging
-import threading
-from typing import Dict, List, Optional
-
-from agent.image_gen_provider import ImageGenProvider
-
-logger = logging.getLogger(__name__)
-
-
-_providers: Dict[str, ImageGenProvider] = {}
-_lock = threading.Lock()
-
-
-def register_provider(provider: ImageGenProvider) -> None:
-    """Register an image generation provider.
-
-    Re-registration (same ``name``) overwrites the previous entry and logs
-    a debug message — this makes hot-reload scenarios (tests, dev loops)
-    behave predictably.
-    """
-    if not isinstance(provider, ImageGenProvider):
-        raise TypeError(
-            f"register_provider() expects an ImageGenProvider instance, "
-            f"got {type(provider).__name__}"
-        )
-    name = provider.name
-    if not isinstance(name, str) or not name.strip():
-        raise ValueError("Image gen provider .name must be a non-empty string")
-    with _lock:
-        existing = _providers.get(name)
-        _providers[name] = provider
-    if existing is not None:
-        logger.debug("Image gen provider '%s' re-registered (was %r)", name, type(existing).__name__)
-    else:
-        logger.debug("Registered image gen provider '%s' (%s)", name, type(provider).__name__)
-
-
-def list_providers() -> List[ImageGenProvider]:
-    """Return all registered providers, sorted by name."""
-    with _lock:
-        items = list(_providers.values())
-    return sorted(items, key=lambda p: p.name)
-
-
-def get_provider(name: str) -> Optional[ImageGenProvider]:
-    """Return the provider registered under *name*, or None."""
-    if not isinstance(name, str):
-        return None
-    with _lock:
-        return _providers.get(name.strip())
-
-
-def get_active_provider() -> Optional[ImageGenProvider]:
-    """Resolve the currently-active provider.
-
-    Reads ``image_gen.provider`` from config.yaml; falls back per the
-    module docstring.
-    """
-    configured: Optional[str] = None
-    try:
-        from hermes_cli.config import load_config
-
-        cfg = load_config()
-        section = cfg.get("image_gen") if isinstance(cfg, dict) else None
-        if isinstance(section, dict):
-            raw = section.get("provider")
-            if isinstance(raw, str) and raw.strip():
-                configured = raw.strip()
-    except Exception as exc:
-        logger.debug("Could not read image_gen.provider from config: %s", exc)
-
-    with _lock:
-        snapshot = dict(_providers)
-
-    if configured:
-        provider = snapshot.get(configured)
-        if provider is not None:
-            return provider
-        logger.debug(
-            "image_gen.provider='%s' configured but not registered; falling back",
-            configured,
-        )
-
-    # Fallback: single-provider case
-    if len(snapshot) == 1:
-        return next(iter(snapshot.values()))
-
-    # Fallback: prefer legacy FAL for backward compat
-    if "fal" in snapshot:
-        return snapshot["fal"]
-
-    return None
-
-
-def _reset_for_tests() -> None:
-    """Clear the registry. **Test-only.**"""
-    with _lock:
-        _providers.clear()
@@ -124,7 +124,6 @@ class InsightsEngine:
        # Gather raw data
        sessions = self._get_sessions(cutoff, source)
        tool_usage = self._get_tool_usage(cutoff, source)
-        skill_usage = self._get_skill_usage(cutoff, source)
        message_stats = self._get_message_stats(cutoff, source)

        if not sessions:
@@ -136,15 +135,6 @@ class InsightsEngine:
                "models": [],
                "platforms": [],
                "tools": [],
-                "skills": {
-                    "summary": {
-                        "total_skill_loads": 0,
-                        "total_skill_edits": 0,
-                        "total_skill_actions": 0,
-                        "distinct_skills_used": 0,
-                    },
-                    "top_skills": [],
-                },
                "activity": {},
                "top_sessions": [],
            }
@@ -154,7 +144,6 @@ class InsightsEngine:
        models = self._compute_model_breakdown(sessions)
        platforms = self._compute_platform_breakdown(sessions)
        tools = self._compute_tool_breakdown(tool_usage)
-        skills = self._compute_skill_breakdown(skill_usage)
        activity = self._compute_activity_patterns(sessions)
        top_sessions = self._compute_top_sessions(sessions)

@@ -167,7 +156,6 @@ class InsightsEngine:
            "models": models,
            "platforms": platforms,
            "tools": tools,
-            "skills": skills,
            "activity": activity,
            "top_sessions": top_sessions,
        }
@@ -296,82 +284,6 @@ class InsightsEngine:
            for name, count in tool_counts.most_common()
        ]

-    def _get_skill_usage(self, cutoff: float, source: str = None) -> List[Dict]:
-        """Extract per-skill usage from assistant tool calls."""
-        skill_counts: Dict[str, Dict[str, Any]] = {}
-
-        if source:
-            cursor = self._conn.execute(
-                """SELECT m.tool_calls, m.timestamp
-                   FROM messages m
-                   JOIN sessions s ON s.id = m.session_id
-                   WHERE s.started_at >= ? AND s.source = ?
-                     AND m.role = 'assistant' AND m.tool_calls IS NOT NULL""",
-                (cutoff, source),
-            )
-        else:
-            cursor = self._conn.execute(
-                """SELECT m.tool_calls, m.timestamp
-                   FROM messages m
-                   JOIN sessions s ON s.id = m.session_id
-                   WHERE s.started_at >= ?
-                     AND m.role = 'assistant' AND m.tool_calls IS NOT NULL""",
-                (cutoff,),
-            )
-
-        for row in cursor.fetchall():
-            try:
-                calls = row["tool_calls"]
-                if isinstance(calls, str):
-                    calls = json.loads(calls)
-                if not isinstance(calls, list):
-                    continue
-            except (json.JSONDecodeError, TypeError):
-                continue
-
-            timestamp = row["timestamp"]
-            for call in calls:
-                if not isinstance(call, dict):
-                    continue
-                func = call.get("function", {})
-                tool_name = func.get("name")
-                if tool_name not in {"skill_view", "skill_manage"}:
-                    continue
-
-                args = func.get("arguments")
-                if isinstance(args, str):
-                    try:
-                        args = json.loads(args)
-                    except (json.JSONDecodeError, TypeError):
-                        continue
-                if not isinstance(args, dict):
-                    continue
-
-                skill_name = args.get("name")
-                if not isinstance(skill_name, str) or not skill_name.strip():
-                    continue
-
-                entry = skill_counts.setdefault(
-                    skill_name,
-                    {
-                        "skill": skill_name,
-                        "view_count": 0,
-                        "manage_count": 0,
-                        "last_used_at": None,
-                    },
-                )
-                if tool_name == "skill_view":
-                    entry["view_count"] += 1
-                else:
-                    entry["manage_count"] += 1
-
-                if timestamp is not None and (
-                    entry["last_used_at"] is None or timestamp > entry["last_used_at"]
-                ):
-                    entry["last_used_at"] = timestamp
-
-        return list(skill_counts.values())
-
    def _get_message_stats(self, cutoff: float, source: str = None) -> Dict:
        """Get aggregate message statistics."""
        if source:
@@ -563,46 +475,6 @@ class InsightsEngine:
            })
        return result

-    def _compute_skill_breakdown(self, skill_usage: List[Dict]) -> Dict[str, Any]:
-        """Process per-skill usage into summary + ranked list."""
-        total_skill_loads = sum(s["view_count"] for s in skill_usage) if skill_usage else 0
-        total_skill_edits = sum(s["manage_count"] for s in skill_usage) if skill_usage else 0
-        total_skill_actions = total_skill_loads + total_skill_edits
-
-        top_skills = []
-        for skill in skill_usage:
-            total_count = skill["view_count"] + skill["manage_count"]
-            percentage = (total_count / total_skill_actions * 100) if total_skill_actions else 0
-            top_skills.append({
-                "skill": skill["skill"],
-                "view_count": skill["view_count"],
-                "manage_count": skill["manage_count"],
-                "total_count": total_count,
-                "percentage": percentage,
-                "last_used_at": skill.get("last_used_at"),
-            })
-
-        top_skills.sort(
-            key=lambda s: (
-                s["total_count"],
-                s["view_count"],
-                s["manage_count"],
-                s["last_used_at"] or 0,
-                s["skill"],
-            ),
-            reverse=True,
-        )
-
-        return {
-            "summary": {
-                "total_skill_loads": total_skill_loads,
-                "total_skill_edits": total_skill_edits,
-                "total_skill_actions": total_skill_actions,
-                "distinct_skills_used": len(skill_usage),
-            },
-            "top_skills": top_skills,
-        }
-
    def _compute_activity_patterns(self, sessions: List[Dict]) -> Dict:
        """Analyze activity patterns by day of week and hour."""
        day_counts = Counter()  # 0=Monday ... 6=Sunday
@@ -798,28 +670,6 @@ class InsightsEngine:
                lines.append(f"  ... and {len(report['tools']) - 15} more tools")
            lines.append("")

-        # Skill usage
-        skills = report.get("skills", {})
-        top_skills = skills.get("top_skills", [])
-        if top_skills:
-            lines.append("  🧠 Top Skills")
-            lines.append("  " + "─" * 56)
-            lines.append(f"  {'Skill':<28} {'Loads':>7} {'Edits':>7} {'Last used':>11}")
-            for skill in top_skills[:10]:
-                last_used = "—"
-                if skill.get("last_used_at"):
-                    last_used = datetime.fromtimestamp(skill["last_used_at"]).strftime("%b %d")
-                lines.append(
-                    f"  {skill['skill'][:28]:<28} {skill['view_count']:>7,} {skill['manage_count']:>7,} {last_used:>11}"
-                )
-            summary = skills.get("summary", {})
-            lines.append(
-                f"  Distinct skills: {summary.get('distinct_skills_used', 0)}  "
-                f"Loads: {summary.get('total_skill_loads', 0):,}  "
-                f"Edits: {summary.get('total_skill_edits', 0):,}"
-            )
-            lines.append("")
-
        # Activity patterns
        act = report.get("activity", {})
        if act.get("by_day"):
@@ -903,18 +753,6 @@ class InsightsEngine:
                lines.append(f"  {t['tool']} — {t['count']:,} calls ({t['percentage']:.1f}%)")
            lines.append("")

-        skills = report.get("skills", {})
-        if skills.get("top_skills"):
-            lines.append("**🧠 Top Skills:**")
-            for skill in skills["top_skills"][:5]:
-                suffix = ""
-                if skill.get("last_used_at"):
-                    suffix = f", last used {datetime.fromtimestamp(skill['last_used_at']).strftime('%b %d')}"
-                lines.append(
-                    f"  {skill['skill']} — {skill['view_count']:,} loads, {skill['manage_count']:,} edits{suffix}"
-                )
-            lines.append("")
-
        # Activity summary
        act = report.get("activity", {})
        if act.get("busiest_day") and act.get("busiest_hour"):
@@ -4,7 +4,6 @@ Pure utility functions with no AIAgent dependency. Used by ContextCompressor
 and run_agent.py for pre-flight context checks.
 """

-import ipaddress
 import logging
 import re
 import time
@@ -15,8 +14,6 @@ from urllib.parse import urlparse
 import requests
 import yaml

-from utils import base_url_host_matches, base_url_hostname
-
 from hermes_constants import OPENROUTER_MODELS_URL

 logger = logging.getLogger(__name__)
@@ -26,7 +23,7 @@ logger = logging.getLogger(__name__)
 # are preserved so the full model name reaches cache lookups and server queries.
 _PROVIDER_PREFIXES: frozenset[str] = frozenset({
    "openrouter", "nous", "openai-codex", "copilot", "copilot-acp",
-    "gemini", "ollama-cloud", "zai", "kimi-coding", "kimi-coding-cn", "stepfun", "minimax", "minimax-cn", "anthropic", "deepseek",
+    "gemini", "ollama-cloud", "zai", "kimi-coding", "kimi-coding-cn", "minimax", "minimax-cn", "anthropic", "deepseek",
    "opencode-zen", "opencode-go", "ai-gateway", "kilocode", "alibaba",
    "qwen-oauth",
    "xiaomi",
@@ -37,7 +34,7 @@ _PROVIDER_PREFIXES: frozenset[str] = frozenset({
    "glm", "z-ai", "z.ai", "zhipu", "github", "github-copilot",
    "github-models", "kimi", "moonshot", "kimi-cn", "moonshot-cn", "claude", "deep-seek",
    "ollama",
-    "stepfun", "opencode", "zen", "go", "vercel", "kilo", "dashscope", "aliyun", "qwen",
+    "opencode", "zen", "go", "vercel", "kilo", "dashscope", "aliyun", "qwen",
    "mimo", "xiaomi-mimo",
    "arcee-ai", "arceeai",
    "xai", "x-ai", "x.ai", "grok",
@@ -52,13 +49,6 @@ _OLLAMA_TAG_PATTERN = re.compile(
 )


-# Tailscale's CGNAT range (RFC 6598). `ipaddress.is_private` excludes this
-# block, so without an explicit check Ollama reached over Tailscale (e.g.
-# `http://100.77.243.5:11434`) wouldn't be treated as local and its stream
-# read / stale timeouts wouldn't get auto-bumped. Built once at import time.
-_TAILSCALE_CGNAT = ipaddress.IPv4Network("100.64.0.0/10")
-
-
 def _strip_provider_prefix(model: str) -> str:
    """Strip a recognised provider prefix from a model string.

@@ -126,6 +116,7 @@ DEFAULT_CONTEXT_LENGTHS = {
    "gpt-5.4-nano": 400000,           # 400k (not 1.05M like full 5.4)
    "gpt-5.4-mini": 400000,           # 400k (not 1.05M like full 5.4)
    "gpt-5.4": 1050000,               # GPT-5.4, GPT-5.4 Pro (1.05M context)
+    "gpt-5.3-codex-spark": 128000,    # Spark variant has reduced 128k context
    "gpt-5.1-chat": 128000,           # Chat variant has 128k context
    "gpt-5": 400000,                  # GPT-5.x base, mini, codex variants (400k)
    "gpt-4.1": 1047576,
@@ -133,8 +124,6 @@ DEFAULT_CONTEXT_LENGTHS = {
    # Google
    "gemini": 1048576,
    # Gemma (open models served via AI Studio)
-    "gemma-4": 256000,  # Gemma 4 family
-    "gemma4": 256000,  # Ollama-style naming (e.g. gemma4:31b-cloud)
    "gemma-4-31b": 256000,
    "gemma-3": 131072,
    "gemma": 8192,  # fallback for older gemma models
@@ -180,15 +169,12 @@ DEFAULT_CONTEXT_LENGTHS = {
    "Qwen/Qwen3.5-35B-A3B": 131072,
    "deepseek-ai/DeepSeek-V3.2": 65536,
    "moonshotai/Kimi-K2.5": 262144,
-    "moonshotai/Kimi-K2.6": 262144,
    "moonshotai/Kimi-K2-Thinking": 262144,
    "MiniMaxAI/MiniMax-M2.5": 204800,
    "XiaomiMiMo/MiMo-V2-Flash": 256000,
    "mimo-v2-pro": 1000000,
    "mimo-v2-omni": 256000,
    "mimo-v2-flash": 256000,
-    "mimo-v2.5-pro": 1000000,
-    "mimo-v2.5": 1000000,
    "zai-org/GLM-5": 202752,
 }

@@ -203,7 +189,6 @@ _CONTEXT_LENGTH_KEYS = (
    "max_seq_len",
    "n_ctx_train",
    "n_ctx",
-    "ctx_size",
 )

 _MAX_COMPLETION_KEYS = (
@@ -226,15 +211,8 @@ def _normalize_base_url(base_url: str) -> str:
    return (base_url or "").strip().rstrip("/")


-def _auth_headers(api_key: str = "") -> Dict[str, str]:
-    token = str(api_key or "").strip()
-    if not token:
-        return {}
-    return {"Authorization": f"Bearer {token}"}
-
-
 def _is_openrouter_base_url(base_url: str) -> bool:
-    return base_url_host_matches(base_url, "openrouter.ai")
+    return "openrouter.ai" in _normalize_base_url(base_url).lower()


 def _is_custom_endpoint(base_url: str) -> bool:
@@ -247,12 +225,9 @@ _URL_TO_PROVIDER: Dict[str, str] = {
    "chatgpt.com": "openai",
    "api.anthropic.com": "anthropic",
    "api.z.ai": "zai",
-    "open.bigmodel.cn": "zai",
    "api.moonshot.ai": "kimi-coding",
    "api.moonshot.cn": "kimi-coding-cn",
    "api.kimi.com": "kimi-coding",
-    "api.stepfun.ai": "stepfun",
-    "api.stepfun.com": "stepfun",
    "api.arcee.ai": "arcee",
    "api.minimax": "minimax",
    "dashscope.aliyuncs.com": "alibaba",
@@ -297,15 +272,7 @@ def _is_known_provider_base_url(base_url: str) -> bool:


 def is_local_endpoint(base_url: str) -> bool:
-    """Return True if base_url points to a local machine.
-
-    Recognises loopback (``localhost``, ``127.0.0.0/8``, ``::1``),
-    container-internal DNS names (``host.docker.internal`` et al.),
-    RFC-1918 private ranges (``10/8``, ``172.16/12``, ``192.168/16``),
-    link-local, and Tailscale CGNAT (``100.64.0.0/10``). Tailscale CGNAT
-    is included so remote-but-trusted Ollama boxes reached over a
-    Tailscale mesh get the same timeout auto-bumps as localhost Ollama.
-    """
+    """Return True if base_url points to a local machine (localhost / RFC-1918 / WSL)."""
    normalized = _normalize_base_url(base_url)
    if not normalized:
        return False
@@ -320,17 +287,14 @@ def is_local_endpoint(base_url: str) -> bool:
    # Docker / Podman / Lima internal DNS names (e.g. host.docker.internal)
    if any(host.endswith(suffix) for suffix in _CONTAINER_LOCAL_SUFFIXES):
        return True
-    # RFC-1918 private ranges, link-local, and Tailscale CGNAT
+    # RFC-1918 private ranges and link-local
+    import ipaddress
    try:
        addr = ipaddress.ip_address(host)
-        if addr.is_private or addr.is_loopback or addr.is_link_local:
-            return True
-        if isinstance(addr, ipaddress.IPv4Address) and addr in _TAILSCALE_CGNAT:
-            return True
+        return addr.is_private or addr.is_loopback or addr.is_link_local
    except ValueError:
        pass
    # Bare IP that looks like a private range (e.g. 172.26.x.x for WSL)
-    # or Tailscale CGNAT (100.64.x.x–100.127.x.x).
    parts = host.split(".")
    if len(parts) == 4:
        try:
@@ -341,14 +305,12 @@ def is_local_endpoint(base_url: str) -> bool:
                return True
            if first == 192 and second == 168:
                return True
-            if first == 100 and 64 <= second <= 127:
-                return True
        except ValueError:
            pass
    return False


-def detect_local_server_type(base_url: str, api_key: str = "") -> Optional[str]:
+def detect_local_server_type(base_url: str) -> Optional[str]:
    """Detect which local server is running at base_url by probing known endpoints.

    Returns one of: "ollama", "lm-studio", "vllm", "llamacpp", or None.
@@ -360,10 +322,8 @@ def detect_local_server_type(base_url: str, api_key: str = "") -> Optional[str]:
    if server_url.endswith("/v1"):
        server_url = server_url[:-3]

-    headers = _auth_headers(api_key)
-
    try:
-        with httpx.Client(timeout=2.0, headers=headers) as client:
+        with httpx.Client(timeout=2.0) as client:
            # LM Studio exposes /api/v1/models — check first (most specific)
            try:
                r = client.get(f"{server_url}/api/v1/models")
@@ -550,59 +510,6 @@ def fetch_endpoint_model_metadata(
    headers = {"Authorization": f"Bearer {api_key}"} if api_key else {}
    last_error: Optional[Exception] = None

-    if is_local_endpoint(normalized):
-        try:
-            if detect_local_server_type(normalized, api_key=api_key) == "lm-studio":
-                server_url = normalized[:-3].rstrip("/") if normalized.endswith("/v1") else normalized
-                response = requests.get(
-                    server_url.rstrip("/") + "/api/v1/models",
-                    headers=headers,
-                    timeout=10,
-                )
-                response.raise_for_status()
-                payload = response.json()
-                cache: Dict[str, Dict[str, Any]] = {}
-                for model in payload.get("models", []):
-                    if not isinstance(model, dict):
-                        continue
-                    model_id = model.get("key") or model.get("id")
-                    if not model_id:
-                        continue
-                    entry: Dict[str, Any] = {"name": model.get("name", model_id)}
-
-                    context_length = None
-                    for inst in model.get("loaded_instances", []) or []:
-                        if not isinstance(inst, dict):
-                            continue
-                        cfg = inst.get("config", {})
-                        ctx = cfg.get("context_length") if isinstance(cfg, dict) else None
-                        if isinstance(ctx, int) and ctx > 0:
-                            context_length = ctx
-                            break
-                    if context_length is None:
-                        context_length = _extract_context_length(model)
-                    if context_length is not None:
-                        entry["context_length"] = context_length
-
-                    max_completion_tokens = _extract_max_completion_tokens(model)
-                    if max_completion_tokens is not None:
-                        entry["max_completion_tokens"] = max_completion_tokens
-
-                    pricing = _extract_pricing(model)
-                    if pricing:
-                        entry["pricing"] = pricing
-
-                    _add_model_aliases(cache, model_id, entry)
-                    alt_id = model.get("id")
-                    if isinstance(alt_id, str) and alt_id and alt_id != model_id:
-                        _add_model_aliases(cache, alt_id, entry)
-
-                _endpoint_model_metadata_cache[normalized] = cache
-                _endpoint_model_metadata_cache_time[normalized] = time.time()
-                return cache
-        except Exception as exc:
-            last_error = exc
-
    for candidate in candidates:
        url = candidate.rstrip("/") + "/models"
        try:
@@ -809,7 +716,7 @@ def _model_id_matches(candidate_id: str, lookup_model: str) -> bool:
    return False


-def query_ollama_num_ctx(model: str, base_url: str, api_key: str = "") -> Optional[int]:
+def query_ollama_num_ctx(model: str, base_url: str) -> Optional[int]:
    """Query an Ollama server for the model's context length.

    Returns the model's maximum context from GGUF metadata via ``/api/show``,
@@ -827,16 +734,14 @@ def query_ollama_num_ctx(model: str, base_url: str, api_key: str = "") -> Option
        server_url = server_url[:-3]

    try:
-        server_type = detect_local_server_type(base_url, api_key=api_key)
+        server_type = detect_local_server_type(base_url)
    except Exception:
        return None
    if server_type != "ollama":
        return None

-    headers = _auth_headers(api_key)
-
    try:
-        with httpx.Client(timeout=3.0, headers=headers) as client:
+        with httpx.Client(timeout=3.0) as client:
            resp = client.post(f"{server_url}/api/show", json={"name": bare_model})
            if resp.status_code != 200:
                return None
@@ -864,7 +769,7 @@ def query_ollama_num_ctx(model: str, base_url: str, api_key: str = "") -> Option
    return None


-def _query_local_context_length(model: str, base_url: str, api_key: str = "") -> Optional[int]:
+def _query_local_context_length(model: str, base_url: str) -> Optional[int]:
    """Query a local server for the model's context length."""
    import httpx

@@ -877,15 +782,13 @@ def _query_local_context_length(model: str, base_url: str, api_key: str = "") ->
    if server_url.endswith("/v1"):
        server_url = server_url[:-3]

-    headers = _auth_headers(api_key)
-
    try:
-        server_type = detect_local_server_type(base_url, api_key=api_key)
+        server_type = detect_local_server_type(base_url)
    except Exception:
        server_type = None

    try:
-        with httpx.Client(timeout=3.0, headers=headers) as client:
+        with httpx.Client(timeout=3.0) as client:
            # Ollama: /api/show returns model details with context info
            if server_type == "ollama":
                resp = client.post(f"{server_url}/api/show", json={"name": model})
@@ -1096,7 +999,7 @@ def get_model_context_length(
        if not _is_known_provider_base_url(base_url):
            # 3. Try querying local server directly
            if is_local_endpoint(base_url):
-                local_ctx = _query_local_context_length(model, base_url, api_key=api_key)
+                local_ctx = _query_local_context_length(model, base_url)
                if local_ctx and local_ctx > 0:
                    save_context_length(model, base_url, local_ctx)
                    return local_ctx
@@ -1110,7 +1013,7 @@ def get_model_context_length(

    # 4. Anthropic /v1/models API (only for regular API keys, not OAuth)
    if provider == "anthropic" or (
-        base_url and base_url_hostname(base_url) == "api.anthropic.com"
+        base_url and "api.anthropic.com" in base_url
    ):
        ctx = _query_anthropic_context_length(model, base_url or "https://api.anthropic.com", api_key)
        if ctx:
@@ -1119,11 +1022,7 @@ def get_model_context_length(
    # 4b. AWS Bedrock — use static context length table.
    # Bedrock's ListFoundationModels doesn't expose context window sizes,
    # so we maintain a curated table in bedrock_adapter.py.
-    if provider == "bedrock" or (
-        base_url
-        and base_url_hostname(base_url).startswith("bedrock-runtime.")
-        and base_url_host_matches(base_url, "amazonaws.com")
-    ):
+    if provider == "bedrock" or (base_url and "bedrock-runtime" in base_url):
        try:
            from agent.bedrock_adapter import get_bedrock_context_length
            return get_bedrock_context_length(model)
@@ -1170,7 +1069,7 @@ def get_model_context_length(

    # 9. Query local server as last resort
    if base_url and is_local_endpoint(base_url):
-        local_ctx = _query_local_context_length(model, base_url, api_key=api_key)
+        local_ctx = _query_local_context_length(model, base_url)
        if local_ctx and local_ctx > 0:
            save_context_length(model, base_url, local_ctx)
            return local_ctx
@@ -146,7 +146,6 @@ PROVIDER_TO_MODELS_DEV: Dict[str, str] = {
    "openai-codex": "openai",
    "zai": "zai",
    "kimi-coding": "kimi-for-coding",
-    "stepfun": "stepfun",
    "kimi-coding-cn": "kimi-for-coding",
    "minimax": "minimax",
    "minimax-cn": "minimax-cn",
@@ -418,9 +417,6 @@ def list_provider_models(provider: str) -> List[str]:

    Returns an empty list if the provider is unknown or has no data.
    """
-    from hermes_cli.models import normalize_provider
-    provider = normalize_provider(provider) or provider
-    
    models = _get_provider_models(provider)
    if models is None:
        return []
@@ -350,13 +350,7 @@ PLATFORM_HINTS = {
    ),
    "cli": (
        "You are a CLI AI Agent. Try not to use markdown but simple text "
-        "renderable inside a terminal. "
-        "File delivery: there is no attachment channel — the user reads your "
-        "response directly in their terminal. Do NOT emit MEDIA:/path tags "
-        "(those are only intercepted on messaging platforms like Telegram, "
-        "Discord, Slack, etc.; on the CLI they render as literal text). "
-        "When referring to a file you created or changed, just state its "
-        "absolute path in plain text; the user can open it from there."
+        "renderable inside a terminal."
    ),
    "sms": (
        "You are communicating via SMS. Keep responses concise and use plain text "
@@ -370,32 +364,6 @@ PLATFORM_HINTS = {
        "MEDIA:/absolute/path/to/file in your response. Images (.jpg, .png, "
        ".heic) appear as photos and other files arrive as attachments."
    ),
-    "mattermost": (
-        "You are in a Mattermost workspace communicating with your user. "
-        "Mattermost renders standard Markdown — headings, bold, italic, code "
-        "blocks, and tables all work. "
-        "You can send media files natively: include MEDIA:/absolute/path/to/file "
-        "in your response. Images (.jpg, .png, .webp) are uploaded as photo "
-        "attachments, audio and video as file attachments. "
-        "Image URLs in markdown format ![alt](url) are rendered as inline previews automatically."
-    ),
-    "matrix": (
-        "You are in a Matrix room communicating with your user. "
-        "Matrix renders Markdown — bold, italic, code blocks, and links work; "
-        "the adapter converts your Markdown to HTML for rich display. "
-        "You can send media files natively: include MEDIA:/absolute/path/to/file "
-        "in your response. Images (.jpg, .png, .webp) are sent as inline photos, "
-        "audio (.ogg, .mp3) as voice/audio messages, video (.mp4) inline, "
-        "and other files as downloadable attachments."
-    ),
-    "feishu": (
-        "You are in a Feishu (Lark) workspace communicating with your user. "
-        "Feishu renders Markdown in messages — bold, italic, code blocks, and "
-        "links are supported. "
-        "You can send media files natively: include MEDIA:/absolute/path/to/file "
-        "in your response. Images (.jpg, .png, .webp) are uploaded and displayed "
-        "inline, audio files as voice messages, and other files as attachments."
-    ),
    "weixin": (
        "You are on Weixin/WeChat. Markdown formatting is supported, so you may use it when "
        "it improves readability, but keep the message compact and chat-friendly. You can send media files natively: "
@@ -13,48 +13,6 @@ import re

 logger = logging.getLogger(__name__)

-# Sensitive query-string parameter names (case-insensitive exact match).
-# Ported from nearai/ironclaw#2529 — catches tokens whose values don't match
-# any known vendor prefix regex (e.g. opaque tokens, short OAuth codes).
-_SENSITIVE_QUERY_PARAMS = frozenset({
-    "access_token",
-    "refresh_token",
-    "id_token",
-    "token",
-    "api_key",
-    "apikey",
-    "client_secret",
-    "password",
-    "auth",
-    "jwt",
-    "session",
-    "secret",
-    "key",
-    "code",           # OAuth authorization codes
-    "signature",      # pre-signed URL signatures
-    "x-amz-signature",
-})
-
-# Sensitive form-urlencoded / JSON body key names (case-insensitive exact match).
-# Exact match, NOT substring — "token_count" and "session_id" must NOT match.
-# Ported from nearai/ironclaw#2529.
-_SENSITIVE_BODY_KEYS = frozenset({
-    "access_token",
-    "refresh_token",
-    "id_token",
-    "token",
-    "api_key",
-    "apikey",
-    "client_secret",
-    "password",
-    "auth",
-    "jwt",
-    "secret",
-    "private_key",
-    "authorization",
-    "key",
-})
-
 # Snapshot at import time so runtime env mutations (e.g. LLM-generated
 # `export HERMES_REDACT_SECRETS=false`) cannot disable redaction mid-session.
 _REDACT_ENABLED = os.getenv("HERMES_REDACT_SECRETS", "").lower() not in ("0", "false", "no", "off")
@@ -150,30 +108,6 @@ _DISCORD_MENTION_RE = re.compile(r"<@!?(\d{17,20})>")
 # Negative lookahead prevents matching hex strings or identifiers
 _SIGNAL_PHONE_RE = re.compile(r"(\+[1-9]\d{6,14})(?![A-Za-z0-9])")

-# URLs containing query strings — matches `scheme://...?...[# or end]`.
-# Used to scan text for URLs whose query params may contain secrets.
-# Ported from nearai/ironclaw#2529.
-_URL_WITH_QUERY_RE = re.compile(
-    r"(https?|wss?|ftp)://"          # scheme
-    r"([^\s/?#]+)"                    # authority (may include userinfo)
-    r"([^\s?#]*)"                     # path
-    r"\?([^\s#]+)"                    # query (required)
-    r"(#\S*)?",                       # optional fragment
-)
-
-# URLs containing userinfo — `scheme://user:password@host` for ANY scheme
-# (not just DB protocols already covered by _DB_CONNSTR_RE above).
-# Catches things like `https://user:token@api.example.com/v1/foo`.
-_URL_USERINFO_RE = re.compile(
-    r"(https?|wss?|ftp)://([^/\s:@]+):([^/\s@]+)@",
-)
-
-# Form-urlencoded body detection: conservative — only applies when the entire
-# text looks like a query string (k=v&k=v pattern with no newlines).
-_FORM_BODY_RE = re.compile(
-    r"^[A-Za-z_][A-Za-z0-9_.-]*=[^&\s]*(?:&[A-Za-z_][A-Za-z0-9_.-]*=[^&\s]*)+$"
-)
-
 # Compile known prefix patterns into one alternation
 _PREFIX_RE = re.compile(
    r"(?<![A-Za-z0-9_-])(" + "|".join(_PREFIX_PATTERNS) + r")(?![A-Za-z0-9_-])"
@@ -187,72 +121,6 @@ def _mask_token(token: str) -> str:
    return f"{token[:6]}...{token[-4:]}"


-def _redact_query_string(query: str) -> str:
-    """Redact sensitive parameter values in a URL query string.
-
-    Handles `k=v&k=v` format. Sensitive keys (case-insensitive) have values
-    replaced with `***`. Non-sensitive keys pass through unchanged.
-    Empty or malformed pairs are preserved as-is.
-    """
-    if not query:
-        return query
-    parts = []
-    for pair in query.split("&"):
-        if "=" not in pair:
-            parts.append(pair)
-            continue
-        key, _, value = pair.partition("=")
-        if key.lower() in _SENSITIVE_QUERY_PARAMS:
-            parts.append(f"{key}=***")
-        else:
-            parts.append(pair)
-    return "&".join(parts)
-
-
-def _redact_url_query_params(text: str) -> str:
-    """Scan text for URLs with query strings and redact sensitive params.
-
-    Catches opaque tokens that don't match vendor prefix regexes, e.g.
-    `https://example.com/cb?code=ABC123&state=xyz` → `...?code=***&state=xyz`.
-    """
-    def _sub(m: re.Match) -> str:
-        scheme = m.group(1)
-        authority = m.group(2)
-        path = m.group(3)
-        query = _redact_query_string(m.group(4))
-        fragment = m.group(5) or ""
-        return f"{scheme}://{authority}{path}?{query}{fragment}"
-    return _URL_WITH_QUERY_RE.sub(_sub, text)
-
-
-def _redact_url_userinfo(text: str) -> str:
-    """Strip `user:password@` from HTTP/WS/FTP URLs.
-
-    DB protocols (postgres, mysql, mongodb, redis, amqp) are handled
-    separately by `_DB_CONNSTR_RE`.
-    """
-    return _URL_USERINFO_RE.sub(
-        lambda m: f"{m.group(1)}://{m.group(2)}:***@",
-        text,
-    )
-
-
-def _redact_form_body(text: str) -> str:
-    """Redact sensitive values in a form-urlencoded body.
-
-    Only applies when the entire input looks like a pure form body
-    (k=v&k=v with no newlines, no other text). Single-line non-form
-    text passes through unchanged. This is a conservative pass — the
-    `_redact_url_query_params` function handles embedded query strings.
-    """
-    if not text or "\n" in text or "&" not in text:
-        return text
-    # The body-body form check is strict: only trigger on clean k=v&k=v.
-    if not _FORM_BODY_RE.match(text.strip()):
-        return text
-    return _redact_query_string(text.strip())
-
-
 def redact_sensitive_text(text: str) -> str:
    """Apply all redaction patterns to a block of text.

@@ -305,16 +173,6 @@ def redact_sensitive_text(text: str) -> str:
    # JWT tokens (eyJ... — base64-encoded JSON headers)
    text = _JWT_RE.sub(lambda m: _mask_token(m.group(0)), text)

-    # URL userinfo (http(s)://user:pass@host) — redact for non-DB schemes.
-    # DB schemes are handled above by _DB_CONNSTR_RE.
-    text = _redact_url_userinfo(text)
-
-    # URL query params containing opaque tokens (?access_token=…&code=…)
-    text = _redact_url_query_params(text)
-
-    # Form-urlencoded bodies (only triggers on clean k=v&k=v inputs).
-    text = _redact_form_body(text)
-
    # Discord user/role mentions (<@snowflake_id>)
    text = _DISCORD_MENTION_RE.sub(lambda m: f"<@{'!' if '!' in m.group(0) else ''}***>", text)

@@ -1,831 +0,0 @@
-"""
-Shell-script hooks bridge.
-
-Reads the ``hooks:`` block from ``cli-config.yaml``, prompts the user for
-consent on first use of each ``(event, command)`` pair, and registers
-callbacks on the existing plugin hook manager so every existing
-``invoke_hook()`` site dispatches to the configured shell scripts — with
-zero changes to call sites.
-
-Design notes
------------
-* Python plugins and shell hooks compose naturally: both flow through
-  :func:`hermes_cli.plugins.invoke_hook` and its aggregators.  Python
-  plugins are registered first (via ``discover_and_load()``) so their
-  block decisions win ties over shell-hook blocks.
-* Subprocess execution uses ``shlex.split(os.path.expanduser(command))``
-  with ``shell=False`` — no shell injection footguns.  Users that need
-  pipes/redirection wrap their logic in a script.
-* First-use consent is gated by the allowlist under
-  ``~/.hermes/shell-hooks-allowlist.json``.  Non-TTY callers must pass
-  ``accept_hooks=True`` (resolved from ``--accept-hooks``,
-  ``HERMES_ACCEPT_HOOKS``, or ``hooks_auto_accept: true`` in config)
-  for registration to succeed without a prompt.
-* Registration is idempotent — safe to invoke from both the CLI entry
-  point (``hermes_cli/main.py``) and the gateway entry point
-  (``gateway/run.py``).
-
-Wire protocol
-------------
-**stdin** (JSON, piped to the script)::
-
-    {
-        "hook_event_name": "pre_tool_call",
-        "tool_name":       "terminal",
-        "tool_input":      {"command": "rm -rf /"},
-        "session_id":      "sess_abc123",
-        "cwd":             "/home/user/project",
-        "extra":           {...}   # event-specific kwargs
-    }
-
-**stdout** (JSON, optional — anything else is ignored)::
-
-    # Block a pre_tool_call (either shape accepted; normalised internally):
-    {"decision": "block", "reason":  "Forbidden command"}   # Claude-Code-style
-    {"action":   "block", "message": "Forbidden command"}   # Hermes-canonical
-
-    # Inject context for pre_llm_call:
-    {"context": "Today is Friday"}
-
-    # Silent no-op:
-    <empty or any non-matching JSON object>
-"""
-
-from __future__ import annotations
-
-import difflib
-import json
-import logging
-import os
-import re
-import shlex
-import subprocess
-import sys
-import tempfile
-import threading
-import time
-from contextlib import contextmanager
-from dataclasses import dataclass, field
-from datetime import datetime, timezone
-from pathlib import Path
-from typing import Any, Callable, Dict, Iterator, List, Optional, Set, Tuple
-
-try:
-    import fcntl  # POSIX only; Windows falls back to best-effort without flock.
-except ImportError:  # pragma: no cover
-    fcntl = None  # type: ignore[assignment]
-
-from hermes_constants import get_hermes_home
-
-logger = logging.getLogger(__name__)
-
-DEFAULT_TIMEOUT_SECONDS = 60
-MAX_TIMEOUT_SECONDS = 300
-ALLOWLIST_FILENAME = "shell-hooks-allowlist.json"
-
-# (event, matcher, command) triples that have been wired to the plugin
-# manager in the current process.  Matcher is part of the key because
-# the same script can legitimately register for different matchers under
-# the same event (e.g. one entry per tool the user wants to gate).
-# Second registration attempts for the exact same triple become no-ops
-# so the CLI and gateway can both call register_from_config() safely.
-_registered: Set[Tuple[str, Optional[str], str]] = set()
-_registered_lock = threading.Lock()
-
-# Intra-process lock for allowlist read-modify-write on platforms that
-# lack ``fcntl`` (non-POSIX).  Kept separate from ``_registered_lock``
-# because ``register_from_config`` already holds ``_registered_lock`` when
-# it triggers ``_record_approval`` — reusing it here would self-deadlock
-# (``threading.Lock`` is non-reentrant).  POSIX callers use the sibling
-# ``.lock`` file via ``fcntl.flock`` and bypass this.
-_allowlist_write_lock = threading.Lock()
-
-
-@dataclass
-class ShellHookSpec:
-    """Parsed and validated representation of a single ``hooks:`` entry."""
-
-    event: str
-    command: str
-    matcher: Optional[str] = None
-    timeout: int = DEFAULT_TIMEOUT_SECONDS
-    compiled_matcher: Optional[re.Pattern] = field(default=None, repr=False)
-
-    def __post_init__(self) -> None:
-        # Strip whitespace introduced by YAML quirks (e.g. multi-line string
-        # folding) — a matcher of " terminal" would otherwise silently fail
-        # to match "terminal" without any diagnostic.
-        if isinstance(self.matcher, str):
-            stripped = self.matcher.strip()
-            self.matcher = stripped if stripped else None
-        if self.matcher:
-            try:
-                self.compiled_matcher = re.compile(self.matcher)
-            except re.error as exc:
-                logger.warning(
-                    "shell hook matcher %r is invalid (%s) — treating as "
-                    "literal equality", self.matcher, exc,
-                )
-                self.compiled_matcher = None
-
-    def matches_tool(self, tool_name: Optional[str]) -> bool:
-        if not self.matcher:
-            return True
-        if tool_name is None:
-            return False
-        if self.compiled_matcher is not None:
-            return self.compiled_matcher.fullmatch(tool_name) is not None
-        # compiled_matcher is None only when the regex failed to compile,
-        # in which case we already warned and fall back to literal equality.
-        return tool_name == self.matcher
-
-
-# ---------------------------------------------------------------------------
-# Public API
-# ---------------------------------------------------------------------------
-
-def register_from_config(
-    cfg: Optional[Dict[str, Any]],
-    *,
-    accept_hooks: bool = False,
-) -> List[ShellHookSpec]:
-    """Register every configured shell hook on the plugin manager.
-
-    ``cfg`` is the full parsed config dict (``hermes_cli.config.load_config``
-    output).  The ``hooks:`` key is read out of it.  Missing, empty, or
-    non-dict ``hooks`` is treated as zero configured hooks.
-
-    ``accept_hooks=True`` skips the TTY consent prompt — the caller is
-    promising that the user has opted in via a flag, env var, or config
-    setting.  ``HERMES_ACCEPT_HOOKS=1`` and ``hooks_auto_accept: true`` are
-    also honored inside this function so either CLI or gateway call sites
-    pick them up.
-
-    Returns the list of :class:`ShellHookSpec` entries that ended up wired
-    up on the plugin manager.  Skipped entries (unknown events, malformed,
-    not allowlisted, already registered) are logged but not returned.
-    """
-    if not isinstance(cfg, dict):
-        return []
-
-    effective_accept = _resolve_effective_accept(cfg, accept_hooks)
-
-    specs = _parse_hooks_block(cfg.get("hooks"))
-    if not specs:
-        return []
-
-    registered: List[ShellHookSpec] = []
-
-    # Import lazily — avoids circular imports at module-load time.
-    from hermes_cli.plugins import get_plugin_manager
-
-    manager = get_plugin_manager()
-
-    # Idempotence + allowlist read happen under the lock; the TTY
-    # prompt runs outside so other threads aren't parked on a blocking
-    # input().  Mutation re-takes the lock with a defensive idempotence
-    # re-check in case two callers ever race through the prompt.
-    for spec in specs:
-        key = (spec.event, spec.matcher, spec.command)
-        with _registered_lock:
-            if key in _registered:
-                continue
-            already_allowlisted = _is_allowlisted(spec.event, spec.command)
-
-        if not already_allowlisted:
-            if not _prompt_and_record(
-                spec.event, spec.command, accept_hooks=effective_accept,
-            ):
-                logger.warning(
-                    "shell hook for %s (%s) not allowlisted — skipped. "
-                    "Use --accept-hooks / HERMES_ACCEPT_HOOKS=1 / "
-                    "hooks_auto_accept: true, or approve at the TTY "
-                    "prompt next run.",
-                    spec.event, spec.command,
-                )
-                continue
-
-        with _registered_lock:
-            if key in _registered:
-                continue
-            manager._hooks.setdefault(spec.event, []).append(_make_callback(spec))
-            _registered.add(key)
-            registered.append(spec)
-            logger.info(
-                "shell hook registered: %s -> %s (matcher=%s, timeout=%ds)",
-                spec.event, spec.command, spec.matcher, spec.timeout,
-            )
-
-    return registered
-
-
-def iter_configured_hooks(cfg: Optional[Dict[str, Any]]) -> List[ShellHookSpec]:
-    """Return the parsed ``ShellHookSpec`` entries from config without
-    registering anything.  Used by ``hermes hooks list`` and ``doctor``."""
-    if not isinstance(cfg, dict):
-        return []
-    return _parse_hooks_block(cfg.get("hooks"))
-
-
-def reset_for_tests() -> None:
-    """Clear the idempotence set.  Test-only helper."""
-    with _registered_lock:
-        _registered.clear()
-
-
-# ---------------------------------------------------------------------------
-# Config parsing
-# ---------------------------------------------------------------------------
-
-def _parse_hooks_block(hooks_cfg: Any) -> List[ShellHookSpec]:
-    """Normalise the ``hooks:`` dict into a flat list of ``ShellHookSpec``.
-
-    Malformed entries warn-and-skip — we never raise from config parsing
-    because a broken hook must not crash the agent.
-    """
-    from hermes_cli.plugins import VALID_HOOKS
-
-    if not isinstance(hooks_cfg, dict):
-        return []
-
-    specs: List[ShellHookSpec] = []
-
-    for event_name, entries in hooks_cfg.items():
-        if event_name not in VALID_HOOKS:
-            suggestion = difflib.get_close_matches(
-                str(event_name), VALID_HOOKS, n=1, cutoff=0.6,
-            )
-            if suggestion:
-                logger.warning(
-                    "unknown hook event %r in hooks: config — did you mean %r?",
-                    event_name, suggestion[0],
-                )
-            else:
-                logger.warning(
-                    "unknown hook event %r in hooks: config (valid: %s)",
-                    event_name, ", ".join(sorted(VALID_HOOKS)),
-                )
-            continue
-
-        if entries is None:
-            continue
-
-        if not isinstance(entries, list):
-            logger.warning(
-                "hooks.%s must be a list of hook definitions; got %s",
-                event_name, type(entries).__name__,
-            )
-            continue
-
-        for i, raw in enumerate(entries):
-            spec = _parse_single_entry(event_name, i, raw)
-            if spec is not None:
-                specs.append(spec)
-
-    return specs
-
-
-def _parse_single_entry(
-    event: str, index: int, raw: Any,
-) -> Optional[ShellHookSpec]:
-    if not isinstance(raw, dict):
-        logger.warning(
-            "hooks.%s[%d] must be a mapping with a 'command' key; got %s",
-            event, index, type(raw).__name__,
-        )
-        return None
-
-    command = raw.get("command")
-    if not isinstance(command, str) or not command.strip():
-        logger.warning(
-            "hooks.%s[%d] is missing a non-empty 'command' field",
-            event, index,
-        )
-        return None
-
-    matcher = raw.get("matcher")
-    if matcher is not None and not isinstance(matcher, str):
-        logger.warning(
-            "hooks.%s[%d].matcher must be a string regex; ignoring",
-            event, index,
-        )
-        matcher = None
-
-    if matcher is not None and event not in ("pre_tool_call", "post_tool_call"):
-        logger.warning(
-            "hooks.%s[%d].matcher=%r will be ignored at runtime — the "
-            "matcher field is only honored for pre_tool_call / "
-            "post_tool_call.  The hook will fire on every %s event.",
-            event, index, matcher, event,
-        )
-        matcher = None
-
-    timeout_raw = raw.get("timeout", DEFAULT_TIMEOUT_SECONDS)
-    try:
-        timeout = int(timeout_raw)
-    except (TypeError, ValueError):
-        logger.warning(
-            "hooks.%s[%d].timeout must be an int (got %r); using default %ds",
-            event, index, timeout_raw, DEFAULT_TIMEOUT_SECONDS,
-        )
-        timeout = DEFAULT_TIMEOUT_SECONDS
-
-    if timeout < 1:
-        logger.warning(
-            "hooks.%s[%d].timeout must be >=1; using default %ds",
-            event, index, DEFAULT_TIMEOUT_SECONDS,
-        )
-        timeout = DEFAULT_TIMEOUT_SECONDS
-
-    if timeout > MAX_TIMEOUT_SECONDS:
-        logger.warning(
-            "hooks.%s[%d].timeout=%ds exceeds max %ds; clamping",
-            event, index, timeout, MAX_TIMEOUT_SECONDS,
-        )
-        timeout = MAX_TIMEOUT_SECONDS
-
-    return ShellHookSpec(
-        event=event,
-        command=command.strip(),
-        matcher=matcher,
-        timeout=timeout,
-    )
-
-
-# ---------------------------------------------------------------------------
-# Subprocess callback
-# ---------------------------------------------------------------------------
-
-_TOP_LEVEL_PAYLOAD_KEYS = {"tool_name", "args", "session_id", "parent_session_id"}
-
-
-def _spawn(spec: ShellHookSpec, stdin_json: str) -> Dict[str, Any]:
-    """Run ``spec.command`` as a subprocess with ``stdin_json`` on stdin.
-
-    Returns a diagnostic dict with the same keys for every outcome
-    (``returncode``, ``stdout``, ``stderr``, ``timed_out``,
-    ``elapsed_seconds``, ``error``).  This is the single place the
-    subprocess is actually invoked — both the live callback path
-    (:func:`_make_callback`) and the CLI test helper (:func:`run_once`)
-    go through it.
-    """
-    result: Dict[str, Any] = {
-        "returncode": None,
-        "stdout": "",
-        "stderr": "",
-        "timed_out": False,
-        "elapsed_seconds": 0.0,
-        "error": None,
-    }
-    try:
-        argv = shlex.split(os.path.expanduser(spec.command))
-    except ValueError as exc:
-        result["error"] = f"command {spec.command!r} cannot be parsed: {exc}"
-        return result
-    if not argv:
-        result["error"] = "empty command"
-        return result
-
-    t0 = time.monotonic()
-    try:
-        proc = subprocess.run(
-            argv,
-            input=stdin_json,
-            capture_output=True,
-            timeout=spec.timeout,
-            text=True,
-            shell=False,
-        )
-    except subprocess.TimeoutExpired:
-        result["timed_out"] = True
-        result["elapsed_seconds"] = round(time.monotonic() - t0, 3)
-        return result
-    except FileNotFoundError:
-        result["error"] = "command not found"
-        return result
-    except PermissionError:
-        result["error"] = "command not executable"
-        return result
-    except Exception as exc:  # pragma: no cover — defensive
-        result["error"] = str(exc)
-        return result
-
-    result["returncode"] = proc.returncode
-    result["stdout"] = proc.stdout or ""
-    result["stderr"] = proc.stderr or ""
-    result["elapsed_seconds"] = round(time.monotonic() - t0, 3)
-    return result
-
-
-def _make_callback(spec: ShellHookSpec) -> Callable[..., Optional[Dict[str, Any]]]:
-    """Build the closure that ``invoke_hook()`` will call per firing."""
-
-    def _callback(**kwargs: Any) -> Optional[Dict[str, Any]]:
-        # Matcher gate — only meaningful for tool-scoped events.
-        if spec.event in ("pre_tool_call", "post_tool_call"):
-            if not spec.matches_tool(kwargs.get("tool_name")):
-                return None
-
-        r = _spawn(spec, _serialize_payload(spec.event, kwargs))
-
-        if r["error"]:
-            logger.warning(
-                "shell hook failed (event=%s command=%s): %s",
-                spec.event, spec.command, r["error"],
-            )
-            return None
-        if r["timed_out"]:
-            logger.warning(
-                "shell hook timed out after %.2fs (event=%s command=%s)",
-                r["elapsed_seconds"], spec.event, spec.command,
-            )
-            return None
-
-        stderr = r["stderr"].strip()
-        if stderr:
-            logger.debug(
-                "shell hook stderr (event=%s command=%s): %s",
-                spec.event, spec.command, stderr[:400],
-            )
-        # Non-zero exits: log but still parse stdout so scripts that
-        # signal failure via exit code can also return a block directive.
-        if r["returncode"] != 0:
-            logger.warning(
-                "shell hook exited %d (event=%s command=%s); stderr=%s",
-                r["returncode"], spec.event, spec.command, stderr[:400],
-            )
-        return _parse_response(spec.event, r["stdout"])
-
-    _callback.__name__ = f"shell_hook[{spec.event}:{spec.command}]"
-    _callback.__qualname__ = _callback.__name__
-    return _callback
-
-
-def _serialize_payload(event: str, kwargs: Dict[str, Any]) -> str:
-    """Render the stdin JSON payload.  Unserialisable values are
-    stringified via ``default=str`` rather than dropped."""
-    extras = {k: v for k, v in kwargs.items() if k not in _TOP_LEVEL_PAYLOAD_KEYS}
-    try:
-        cwd = str(Path.cwd())
-    except OSError:
-        cwd = ""
-    payload = {
-        "hook_event_name": event,
-        "tool_name": kwargs.get("tool_name"),
-        "tool_input": kwargs.get("args") if isinstance(kwargs.get("args"), dict) else None,
-        "session_id": kwargs.get("session_id") or kwargs.get("parent_session_id") or "",
-        "cwd": cwd,
-        "extra": extras,
-    }
-    return json.dumps(payload, ensure_ascii=False, default=str)
-
-
-def _parse_response(event: str, stdout: str) -> Optional[Dict[str, Any]]:
-    """Translate stdout JSON into a Hermes wire-shape dict.
-
-    For ``pre_tool_call`` the Claude-Code-style ``{"decision": "block",
-    "reason": "..."}`` payload is translated into the canonical Hermes
-    ``{"action": "block", "message": "..."}`` shape expected by
-    :func:`hermes_cli.plugins.get_pre_tool_call_block_message`.  This is
-    the single most important correctness invariant in this module —
-    skipping the translation silently breaks every ``pre_tool_call``
-    block directive.
-
-    For ``pre_llm_call``, ``{"context": "..."}`` is passed through
-    unchanged to match the existing plugin-hook contract.
-
-    Anything else returns ``None``.
-    """
-    stdout = (stdout or "").strip()
-    if not stdout:
-        return None
-
-    try:
-        data = json.loads(stdout)
-    except json.JSONDecodeError:
-        logger.warning(
-            "shell hook stdout was not valid JSON (event=%s): %s",
-            event, stdout[:200],
-        )
-        return None
-
-    if not isinstance(data, dict):
-        return None
-
-    if event == "pre_tool_call":
-        if data.get("action") == "block":
-            message = data.get("message") or data.get("reason") or ""
-            if isinstance(message, str) and message:
-                return {"action": "block", "message": message}
-        if data.get("decision") == "block":
-            message = data.get("reason") or data.get("message") or ""
-            if isinstance(message, str) and message:
-                return {"action": "block", "message": message}
-        return None
-
-    context = data.get("context")
-    if isinstance(context, str) and context.strip():
-        return {"context": context}
-
-    return None
-
-
-# ---------------------------------------------------------------------------
-# Allowlist / consent
-# ---------------------------------------------------------------------------
-
-def allowlist_path() -> Path:
-    """Path to the per-user shell-hook allowlist file."""
-    return get_hermes_home() / ALLOWLIST_FILENAME
-
-
-def load_allowlist() -> Dict[str, Any]:
-    """Return the parsed allowlist, or an empty skeleton if absent."""
-    try:
-        raw = json.loads(allowlist_path().read_text())
-    except (FileNotFoundError, json.JSONDecodeError, OSError):
-        return {"approvals": []}
-    if not isinstance(raw, dict):
-        return {"approvals": []}
-    approvals = raw.get("approvals")
-    if not isinstance(approvals, list):
-        raw["approvals"] = []
-    return raw
-
-
-def save_allowlist(data: Dict[str, Any]) -> None:
-    """Atomically persist the allowlist via per-process ``mkstemp`` +
-    ``os.replace``.  Cross-process read-modify-write races are handled
-    by :func:`_locked_update_approvals` (``fcntl.flock``).  On OSError
-    the failure is logged; the in-process hook still registers but
-    the approval won't survive across runs."""
-    p = allowlist_path()
-    try:
-        p.parent.mkdir(parents=True, exist_ok=True)
-        fd, tmp_path = tempfile.mkstemp(
-            prefix=f"{p.name}.", suffix=".tmp", dir=str(p.parent),
-        )
-        try:
-            with os.fdopen(fd, "w") as fh:
-                fh.write(json.dumps(data, indent=2, sort_keys=True))
-            os.replace(tmp_path, p)
-        except Exception:
-            try:
-                os.unlink(tmp_path)
-            except OSError:
-                pass
-            raise
-    except OSError as exc:
-        logger.warning(
-            "Failed to persist shell hook allowlist to %s: %s. "
-            "The approval is in-memory for this run, but the next "
-            "startup will re-prompt (or skip registration on non-TTY "
-            "runs without --accept-hooks / HERMES_ACCEPT_HOOKS).",
-            p, exc,
-        )
-
-
-def _is_allowlisted(event: str, command: str) -> bool:
-    data = load_allowlist()
-    return any(
-        isinstance(e, dict)
-        and e.get("event") == event
-        and e.get("command") == command
-        for e in data.get("approvals", [])
-    )
-
-
-@contextmanager
-def _locked_update_approvals() -> Iterator[Dict[str, Any]]:
-    """Serialise read-modify-write on the allowlist across processes.
-
-    Holds an exclusive ``flock`` on a sibling lock file for the duration
-    of the update so concurrent ``_record_approval``/``revoke`` callers
-    cannot clobber each other's changes (the race Codex reproduced with
-    20–50 simultaneous writers).  Falls back to an in-process lock on
-    platforms without ``fcntl``.
-    """
-    p = allowlist_path()
-    p.parent.mkdir(parents=True, exist_ok=True)
-    lock_path = p.with_suffix(p.suffix + ".lock")
-
-    if fcntl is None:  # pragma: no cover — non-POSIX fallback
-        with _allowlist_write_lock:
-            data = load_allowlist()
-            yield data
-            save_allowlist(data)
-        return
-
-    with open(lock_path, "a+") as lock_fh:
-        fcntl.flock(lock_fh.fileno(), fcntl.LOCK_EX)
-        try:
-            data = load_allowlist()
-            yield data
-            save_allowlist(data)
-        finally:
-            fcntl.flock(lock_fh.fileno(), fcntl.LOCK_UN)
-
-
-def _prompt_and_record(
-    event: str, command: str, *, accept_hooks: bool,
-) -> bool:
-    """Decide whether to approve an unseen ``(event, command)`` pair.
-    Returns ``True`` iff the approval was granted and recorded.
-    """
-    if accept_hooks:
-        _record_approval(event, command)
-        logger.info(
-            "shell hook auto-approved via --accept-hooks / env / config: "
-            "%s -> %s", event, command,
-        )
-        return True
-
-    if not sys.stdin.isatty():
-        return False
-
-    print(
-        f"\n⚠ Hermes is about to register a shell hook that will run a\n"
-        f"  command on your behalf.\n\n"
-        f"    Event:   {event}\n"
-        f"    Command: {command}\n\n"
-        f"  Commands run with your full user credentials.  Only approve\n"
-        f"  commands you trust."
-    )
-    try:
-        answer = input("Allow this hook to run? [y/N]: ").strip().lower()
-    except (EOFError, KeyboardInterrupt):
-        print()  # keep the terminal tidy after ^C
-        return False
-
-    if answer in ("y", "yes"):
-        _record_approval(event, command)
-        return True
-
-    return False
-
-
-def _record_approval(event: str, command: str) -> None:
-    entry = {
-        "event": event,
-        "command": command,
-        "approved_at": _utc_now_iso(),
-        "script_mtime_at_approval": script_mtime_iso(command),
-    }
-    with _locked_update_approvals() as data:
-        data["approvals"] = [
-            e for e in data.get("approvals", [])
-            if not (
-                isinstance(e, dict)
-                and e.get("event") == event
-                and e.get("command") == command
-            )
-        ] + [entry]
-
-
-def _utc_now_iso() -> str:
-    return datetime.now(tz=timezone.utc).isoformat().replace("+00:00", "Z")
-
-
-def revoke(command: str) -> int:
-    """Remove every allowlist entry matching ``command``.
-
-    Returns the number of entries removed.  Does not unregister any
-    callbacks that are already live on the plugin manager in the current
-    process — restart the CLI / gateway to drop them.
-    """
-    with _locked_update_approvals() as data:
-        before = len(data.get("approvals", []))
-        data["approvals"] = [
-            e for e in data.get("approvals", [])
-            if not (isinstance(e, dict) and e.get("command") == command)
-        ]
-        after = len(data["approvals"])
-    return before - after
-
-
-_SCRIPT_EXTENSIONS: Tuple[str, ...] = (
-    ".sh", ".bash", ".zsh", ".fish",
-    ".py", ".pyw",
-    ".rb", ".pl", ".lua",
-    ".js", ".mjs", ".cjs", ".ts",
-)
-
-
-def _command_script_path(command: str) -> str:
-    """Return the script path from ``command`` for doctor / drift checks.
-
-    Prefers a token ending in a known script extension, then a token
-    containing ``/`` or leading ``~``, then the first token.  Handles
-    ``python3 /path/hook.py``, ``/usr/bin/env bash hook.sh``, and the
-    common bare-path form.
-    """
-    try:
-        parts = shlex.split(command)
-    except ValueError:
-        return command
-    if not parts:
-        return command
-    for part in parts:
-        if part.lower().endswith(_SCRIPT_EXTENSIONS):
-            return part
-    for part in parts:
-        if "/" in part or part.startswith("~"):
-            return part
-    return parts[0]
-
-
-# ---------------------------------------------------------------------------
-# Helpers for accept-hooks resolution
-# ---------------------------------------------------------------------------
-
-def _resolve_effective_accept(
-    cfg: Dict[str, Any], accept_hooks_arg: bool,
-) -> bool:
-    """Combine all three opt-in channels into a single boolean.
-
-    Precedence (any truthy source flips us on):
-      1. ``--accept-hooks`` flag (CLI) / explicit argument
-      2. ``HERMES_ACCEPT_HOOKS`` env var
-      3. ``hooks_auto_accept: true`` in ``cli-config.yaml``
-    """
-    if accept_hooks_arg:
-        return True
-    env = os.environ.get("HERMES_ACCEPT_HOOKS", "").strip().lower()
-    if env in ("1", "true", "yes", "on"):
-        return True
-    cfg_val = cfg.get("hooks_auto_accept", False)
-    return bool(cfg_val)
-
-
-# ---------------------------------------------------------------------------
-# Introspection (used by `hermes hooks` CLI)
-# ---------------------------------------------------------------------------
-
-def allowlist_entry_for(event: str, command: str) -> Optional[Dict[str, Any]]:
-    """Return the allowlist record for this pair, if any."""
-    for e in load_allowlist().get("approvals", []):
-        if (
-            isinstance(e, dict)
-            and e.get("event") == event
-            and e.get("command") == command
-        ):
-            return e
-    return None
-
-
-def script_mtime_iso(command: str) -> Optional[str]:
-    """ISO-8601 mtime of the resolved script path, or ``None`` if the
-    script is missing."""
-    path = _command_script_path(command)
-    if not path:
-        return None
-    try:
-        expanded = os.path.expanduser(path)
-        return datetime.fromtimestamp(
-            os.path.getmtime(expanded), tz=timezone.utc,
-        ).isoformat().replace("+00:00", "Z")
-    except OSError:
-        return None
-
-
-def script_is_executable(command: str) -> bool:
-    """Return ``True`` iff ``command`` is runnable as configured.
-
-    For a bare invocation (``/path/hook.sh``) the script itself must be
-    executable.  For interpreter-prefixed commands (``python3
-    /path/hook.py``, ``/usr/bin/env bash hook.sh``) the script just has
-    to be readable — the interpreter doesn't care about the ``X_OK``
-    bit.  Mirrors what ``_spawn`` would actually do at runtime."""
-    path = _command_script_path(command)
-    if not path:
-        return False
-    expanded = os.path.expanduser(path)
-    if not os.path.isfile(expanded):
-        return False
-    try:
-        argv = shlex.split(command)
-    except ValueError:
-        return False
-    is_bare_invocation = bool(argv) and argv[0] == path
-    required = os.X_OK if is_bare_invocation else os.R_OK
-    return os.access(expanded, required)
-
-
-def run_once(
-    spec: ShellHookSpec, kwargs: Dict[str, Any],
-) -> Dict[str, Any]:
-    """Fire a single shell-hook invocation with a synthetic payload.
-    Used by ``hermes hooks test`` and ``hermes hooks doctor``.
-
-    ``kwargs`` is the same dict that :func:`hermes_cli.plugins.invoke_hook`
-    would pass at runtime.  It is routed through :func:`_serialize_payload`
-    so the synthetic stdin exactly matches what a real hook firing would
-    produce — otherwise scripts tested via ``hermes hooks test`` could
-    diverge silently from production behaviour.
-
-    Returns the :func:`_spawn` diagnostic dict plus a ``parsed`` field
-    holding the canonical Hermes-wire-shape response."""
-    stdin_json = _serialize_payload(spec.event, kwargs)
-    result = _spawn(spec, stdin_json)
-    result["parsed"] = _parse_response(spec.event, result["stdout"])
-    return result
@@ -8,7 +8,6 @@ can invoke skills via /skill-name commands and prompt-only built-ins like
 import json
 import logging
 import re
-import subprocess
 from datetime import datetime
 from pathlib import Path
 from typing import Any, Dict, Optional
@@ -23,110 +22,6 @@ _PLAN_SLUG_RE = re.compile(r"[^a-z0-9]+")
 _SKILL_INVALID_CHARS = re.compile(r"[^a-z0-9-]")
 _SKILL_MULTI_HYPHEN = re.compile(r"-{2,}")

-# Matches ${HERMES_SKILL_DIR} / ${HERMES_SESSION_ID} tokens in SKILL.md.
-# Tokens that don't resolve (e.g. ${HERMES_SESSION_ID} with no session) are
-# left as-is so the user can debug them.
-_SKILL_TEMPLATE_RE = re.compile(r"\$\{(HERMES_SKILL_DIR|HERMES_SESSION_ID)\}")
-
-# Matches inline shell snippets like:  !`date +%Y-%m-%d`
-# Non-greedy, single-line only — no newlines inside the backticks.
-_INLINE_SHELL_RE = re.compile(r"!`([^`\n]+)`")
-
-# Cap inline-shell output so a runaway command can't blow out the context.
-_INLINE_SHELL_MAX_OUTPUT = 4000
-
-
-def _load_skills_config() -> dict:
-    """Load the ``skills`` section of config.yaml (best-effort)."""
-    try:
-        from hermes_cli.config import load_config
-
-        cfg = load_config() or {}
-        skills_cfg = cfg.get("skills")
-        if isinstance(skills_cfg, dict):
-            return skills_cfg
-    except Exception:
-        logger.debug("Could not read skills config", exc_info=True)
-    return {}
-
-
-def _substitute_template_vars(
-    content: str,
-    skill_dir: Path | None,
-    session_id: str | None,
-) -> str:
-    """Replace ${HERMES_SKILL_DIR} / ${HERMES_SESSION_ID} in skill content.
-
-    Only substitutes tokens for which a concrete value is available —
-    unresolved tokens are left in place so the author can spot them.
-    """
-    if not content:
-        return content
-
-    skill_dir_str = str(skill_dir) if skill_dir else None
-
-    def _replace(match: re.Match) -> str:
-        token = match.group(1)
-        if token == "HERMES_SKILL_DIR" and skill_dir_str:
-            return skill_dir_str
-        if token == "HERMES_SESSION_ID" and session_id:
-            return str(session_id)
-        return match.group(0)
-
-    return _SKILL_TEMPLATE_RE.sub(_replace, content)
-
-
-def _run_inline_shell(command: str, cwd: Path | None, timeout: int) -> str:
-    """Execute a single inline-shell snippet and return its stdout (trimmed).
-
-    Failures return a short ``[inline-shell error: ...]`` marker instead of
-    raising, so one bad snippet can't wreck the whole skill message.
-    """
-    try:
-        completed = subprocess.run(
-            ["bash", "-c", command],
-            cwd=str(cwd) if cwd else None,
-            capture_output=True,
-            text=True,
-            timeout=max(1, int(timeout)),
-            check=False,
-        )
-    except subprocess.TimeoutExpired:
-        return f"[inline-shell timeout after {timeout}s: {command}]"
-    except FileNotFoundError:
-        return f"[inline-shell error: bash not found]"
-    except Exception as exc:
-        return f"[inline-shell error: {exc}]"
-
-    output = (completed.stdout or "").rstrip("\n")
-    if not output and completed.stderr:
-        output = completed.stderr.rstrip("\n")
-    if len(output) > _INLINE_SHELL_MAX_OUTPUT:
-        output = output[:_INLINE_SHELL_MAX_OUTPUT] + "…[truncated]"
-    return output
-
-
-def _expand_inline_shell(
-    content: str,
-    skill_dir: Path | None,
-    timeout: int,
-) -> str:
-    """Replace every !`cmd` snippet in ``content`` with its stdout.
-
-    Runs each snippet with the skill directory as CWD so relative paths in
-    the snippet work the way the author expects.
-    """
-    if "!`" not in content:
-        return content
-
-    def _replace(match: re.Match) -> str:
-        cmd = match.group(1).strip()
-        if not cmd:
-            return ""
-        return _run_inline_shell(cmd, skill_dir, timeout)
-
-    return _INLINE_SHELL_RE.sub(_replace, content)
-

 def build_plan_path(
    user_instruction: str = "",
@@ -238,36 +133,14 @@ def _build_skill_message(
    activation_note: str,
    user_instruction: str = "",
    runtime_note: str = "",
-    session_id: str | None = None,
 ) -> str:
    """Format a loaded skill into a user/system message payload."""
    from tools.skills_tool import SKILLS_DIR

    content = str(loaded_skill.get("content") or "")

-    # ── Template substitution and inline-shell expansion ──
-    # Done before anything else so downstream blocks (setup notes,
-    # supporting-file hints) see the expanded content.
-    skills_cfg = _load_skills_config()
-    if skills_cfg.get("template_vars", True):
-        content = _substitute_template_vars(content, skill_dir, session_id)
-    if skills_cfg.get("inline_shell", False):
-        timeout = int(skills_cfg.get("inline_shell_timeout", 10) or 10)
-        content = _expand_inline_shell(content, skill_dir, timeout)
-
    parts = [activation_note, "", content.strip()]

-    # ── Inject the absolute skill directory so the agent can reference
-    #    bundled scripts without an extra skill_view() round-trip. ──
-    if skill_dir:
-        parts.append("")
-        parts.append(f"[Skill directory: {skill_dir}]")
-        parts.append(
-            "Resolve any relative paths in this skill (e.g. `scripts/foo.js`, "
-            "`templates/config.yaml`) against that directory, then run them "
-            "with the terminal tool using the absolute path."
-        )
-
    # ── Inject resolved skill config values ──
    _inject_skill_config(loaded_skill, parts)

@@ -315,13 +188,11 @@ def _build_skill_message(
            # Skill is from an external dir — use the skill name instead
            skill_view_target = skill_dir.name
        parts.append("")
-        parts.append("[This skill has supporting files:]")
+        parts.append("[This skill has supporting files you can load with the skill_view tool:]")
        for sf in supporting:
-            parts.append(f"- {sf}  ->  {skill_dir / sf}")
+            parts.append(f"- {sf}")
        parts.append(
-            f'\nLoad any of these with skill_view(name="{skill_view_target}", '
-            f'file_path="<path>"), or run scripts directly by absolute path '
-            f"(e.g. `node {skill_dir}/scripts/foo.js`)."
+            f'\nTo view any of these, use: skill_view(name="{skill_view_target}", file_path="<path>")'
        )

    if user_instruction:
@@ -461,7 +332,6 @@ def build_skill_invocation_message(
        activation_note,
        user_instruction=user_instruction,
        runtime_note=runtime_note,
-        session_id=task_id,
    )


@@ -500,7 +370,6 @@ def build_preloaded_skills_prompt(
                loaded_skill,
                skill_dir,
                activation_note,
-                session_id=task_id,
            )
        )
        loaded_names.append(skill_name)
@@ -435,7 +435,7 @@ def iter_skill_index_files(skills_dir: Path, filename: str):
    Excludes ``.git``, ``.github``, ``.hub`` directories.
    """
    matches = []
-    for root, dirs, files in os.walk(skills_dir, followlinks=True):
+    for root, dirs, files in os.walk(skills_dir):
        dirs[:] = [d for d in dirs if d not in EXCLUDED_SKILL_DIRS]
        if filename in files:
            matches.append(Path(root) / filename)
@@ -455,8 +455,7 @@ def parse_qualified_name(name: str) -> Tuple[Optional[str], str]:
    """
    if ":" not in name:
        return None, name
-    ns, bare = name.split(":", 1)
-    return ns, bare
+    return tuple(name.split(":", 1))  # type: ignore[return-value]


 def is_valid_namespace(candidate: Optional[str]) -> bool:
@@ -38,7 +38,7 @@ def generate_title(user_message: str, assistant_response: str, timeout: float =
        response = call_llm(
            task="title_generation",
            messages=messages,
-            max_tokens=500,
+            max_tokens=30,
            temperature=0.3,
            timeout=timeout,
        )
@@ -1,51 +0,0 @@
-"""Transport layer types and registry for provider response normalization.
-
-Usage:
-    from agent.transports import get_transport
-    transport = get_transport("anthropic_messages")
-    result = transport.normalize_response(raw_response)
-"""
-
-from agent.transports.types import NormalizedResponse, ToolCall, Usage, build_tool_call, map_finish_reason  # noqa: F401
-
-_REGISTRY: dict = {}
-
-
-def register_transport(api_mode: str, transport_cls: type) -> None:
-    """Register a transport class for an api_mode string."""
-    _REGISTRY[api_mode] = transport_cls
-
-
-def get_transport(api_mode: str):
-    """Get a transport instance for the given api_mode.
-
-    Returns None if no transport is registered for this api_mode.
-    This allows gradual migration — call sites can check for None
-    and fall back to the legacy code path.
-    """
-    if not _REGISTRY:
-        _discover_transports()
-    cls = _REGISTRY.get(api_mode)
-    if cls is None:
-        return None
-    return cls()
-
-
-def _discover_transports() -> None:
-    """Import all transport modules to trigger auto-registration."""
-    try:
-        import agent.transports.anthropic  # noqa: F401
-    except ImportError:
-        pass
-    try:
-        import agent.transports.codex  # noqa: F401
-    except ImportError:
-        pass
-    try:
-        import agent.transports.chat_completions  # noqa: F401
-    except ImportError:
-        pass
-    try:
-        import agent.transports.bedrock  # noqa: F401
-    except ImportError:
-        pass
@@ -1,177 +0,0 @@
-"""Anthropic Messages API transport.
-
-Delegates to the existing adapter functions in agent/anthropic_adapter.py.
-This transport owns format conversion and normalization — NOT client lifecycle.
-"""
-
-from typing import Any, Dict, List, Optional
-
-from agent.transports.base import ProviderTransport
-from agent.transports.types import NormalizedResponse
-
-
-class AnthropicTransport(ProviderTransport):
-    """Transport for api_mode='anthropic_messages'.
-
-    Wraps the existing functions in anthropic_adapter.py behind the
-    ProviderTransport ABC.  Each method delegates — no logic is duplicated.
-    """
-
-    @property
-    def api_mode(self) -> str:
-        return "anthropic_messages"
-
-    def convert_messages(self, messages: List[Dict[str, Any]], **kwargs) -> Any:
-        """Convert OpenAI messages to Anthropic (system, messages) tuple.
-
-        kwargs:
-            base_url: Optional[str] — affects thinking signature handling.
-        """
-        from agent.anthropic_adapter import convert_messages_to_anthropic
-
-        base_url = kwargs.get("base_url")
-        return convert_messages_to_anthropic(messages, base_url=base_url)
-
-    def convert_tools(self, tools: List[Dict[str, Any]]) -> Any:
-        """Convert OpenAI tool schemas to Anthropic input_schema format."""
-        from agent.anthropic_adapter import convert_tools_to_anthropic
-
-        return convert_tools_to_anthropic(tools)
-
-    def build_kwargs(
-        self,
-        model: str,
-        messages: List[Dict[str, Any]],
-        tools: Optional[List[Dict[str, Any]]] = None,
-        **params,
-    ) -> Dict[str, Any]:
-        """Build Anthropic messages.create() kwargs.
-
-        Calls convert_messages and convert_tools internally.
-
-        params (all optional):
-            max_tokens: int
-            reasoning_config: dict | None
-            tool_choice: str | None
-            is_oauth: bool
-            preserve_dots: bool
-            context_length: int | None
-            base_url: str | None
-            fast_mode: bool
-        """
-        from agent.anthropic_adapter import build_anthropic_kwargs
-
-        return build_anthropic_kwargs(
-            model=model,
-            messages=messages,
-            tools=tools,
-            max_tokens=params.get("max_tokens", 16384),
-            reasoning_config=params.get("reasoning_config"),
-            tool_choice=params.get("tool_choice"),
-            is_oauth=params.get("is_oauth", False),
-            preserve_dots=params.get("preserve_dots", False),
-            context_length=params.get("context_length"),
-            base_url=params.get("base_url"),
-            fast_mode=params.get("fast_mode", False),
-        )
-
-    def normalize_response(self, response: Any, **kwargs) -> NormalizedResponse:
-        """Normalize Anthropic response to NormalizedResponse.
-
-        Parses content blocks (text, thinking, tool_use), maps stop_reason
-        to OpenAI finish_reason, and collects reasoning_details in provider_data.
-        """
-        import json
-        from agent.anthropic_adapter import _to_plain_data
-        from agent.transports.types import ToolCall
-
-        strip_tool_prefix = kwargs.get("strip_tool_prefix", False)
-        _MCP_PREFIX = "mcp_"
-
-        text_parts = []
-        reasoning_parts = []
-        reasoning_details = []
-        tool_calls = []
-
-        for block in response.content:
-            if block.type == "text":
-                text_parts.append(block.text)
-            elif block.type == "thinking":
-                reasoning_parts.append(block.thinking)
-                block_dict = _to_plain_data(block)
-                if isinstance(block_dict, dict):
-                    reasoning_details.append(block_dict)
-            elif block.type == "tool_use":
-                name = block.name
-                if strip_tool_prefix and name.startswith(_MCP_PREFIX):
-                    name = name[len(_MCP_PREFIX):]
-                tool_calls.append(
-                    ToolCall(
-                        id=block.id,
-                        name=name,
-                        arguments=json.dumps(block.input),
-                    )
-                )
-
-        finish_reason = self._STOP_REASON_MAP.get(response.stop_reason, "stop")
-
-        provider_data = {}
-        if reasoning_details:
-            provider_data["reasoning_details"] = reasoning_details
-
-        return NormalizedResponse(
-            content="\n".join(text_parts) if text_parts else None,
-            tool_calls=tool_calls or None,
-            finish_reason=finish_reason,
-            reasoning="\n\n".join(reasoning_parts) if reasoning_parts else None,
-            usage=None,
-            provider_data=provider_data or None,
-        )
-
-    def validate_response(self, response: Any) -> bool:
-        """Check Anthropic response structure is valid.
-
-        An empty content list is legitimate when ``stop_reason == "end_turn"``
-        — the model's canonical way of signalling "nothing more to add" after
-        a tool turn that already delivered the user-facing text. Treating it
-        as invalid falsely retries a completed response.
-        """
-        if response is None:
-            return False
-        content_blocks = getattr(response, "content", None)
-        if not isinstance(content_blocks, list):
-            return False
-        if not content_blocks:
-            return getattr(response, "stop_reason", None) == "end_turn"
-        return True
-
-    def extract_cache_stats(self, response: Any) -> Optional[Dict[str, int]]:
-        """Extract Anthropic cache_read and cache_creation token counts."""
-        usage = getattr(response, "usage", None)
-        if usage is None:
-            return None
-        cached = getattr(usage, "cache_read_input_tokens", 0) or 0
-        written = getattr(usage, "cache_creation_input_tokens", 0) or 0
-        if cached or written:
-            return {"cached_tokens": cached, "creation_tokens": written}
-        return None
-
-    # Promote the adapter's canonical mapping to module level so it's shared
-    _STOP_REASON_MAP = {
-        "end_turn": "stop",
-        "tool_use": "tool_calls",
-        "max_tokens": "length",
-        "stop_sequence": "stop",
-        "refusal": "content_filter",
-        "model_context_window_exceeded": "length",
-    }
-
-    def map_finish_reason(self, raw_reason: str) -> str:
-        """Map Anthropic stop_reason to OpenAI finish_reason."""
-        return self._STOP_REASON_MAP.get(raw_reason, "stop")
-
-
-# Auto-register on import
-from agent.transports import register_transport  # noqa: E402
-
-register_transport("anthropic_messages", AnthropicTransport)
@@ -1,89 +0,0 @@
-"""Abstract base for provider transports.
-
-A transport owns the data path for one api_mode:
-  convert_messages → convert_tools → build_kwargs → normalize_response
-
-It does NOT own: client construction, streaming, credential refresh,
-prompt caching, interrupt handling, or retry logic.  Those stay on AIAgent.
-"""
-
-from abc import ABC, abstractmethod
-from typing import Any, Dict, List, Optional
-
-from agent.transports.types import NormalizedResponse
-
-
-class ProviderTransport(ABC):
-    """Base class for provider-specific format conversion and normalization."""
-
-    @property
-    @abstractmethod
-    def api_mode(self) -> str:
-        """The api_mode string this transport handles (e.g. 'anthropic_messages')."""
-        ...
-
-    @abstractmethod
-    def convert_messages(self, messages: List[Dict[str, Any]], **kwargs) -> Any:
-        """Convert OpenAI-format messages to provider-native format.
-
-        Returns provider-specific structure (e.g. (system, messages) for Anthropic,
-        or the messages list unchanged for chat_completions).
-        """
-        ...
-
-    @abstractmethod
-    def convert_tools(self, tools: List[Dict[str, Any]]) -> Any:
-        """Convert OpenAI-format tool definitions to provider-native format.
-
-        Returns provider-specific tool list (e.g. Anthropic input_schema format).
-        """
-        ...
-
-    @abstractmethod
-    def build_kwargs(
-        self,
-        model: str,
-        messages: List[Dict[str, Any]],
-        tools: Optional[List[Dict[str, Any]]] = None,
-        **params,
-    ) -> Dict[str, Any]:
-        """Build the complete API call kwargs dict.
-
-        This is the primary entry point — it typically calls convert_messages()
-        and convert_tools() internally, then adds model-specific config.
-
-        Returns a dict ready to be passed to the provider's SDK client.
-        """
-        ...
-
-    @abstractmethod
-    def normalize_response(self, response: Any, **kwargs) -> NormalizedResponse:
-        """Normalize a raw provider response to the shared NormalizedResponse type.
-
-        This is the only method that returns a transport-layer type.
-        """
-        ...
-
-    def validate_response(self, response: Any) -> bool:
-        """Optional: check if the raw response is structurally valid.
-
-        Returns True if valid, False if the response should be treated as invalid.
-        Default implementation always returns True.
-        """
-        return True
-
-    def extract_cache_stats(self, response: Any) -> Optional[Dict[str, int]]:
-        """Optional: extract provider-specific cache hit/creation stats.
-
-        Returns dict with 'cached_tokens' and 'creation_tokens', or None.
-        Default returns None.
-        """
-        return None
-
-    def map_finish_reason(self, raw_reason: str) -> str:
-        """Optional: map provider-specific stop reason to OpenAI equivalent.
-
-        Default returns the raw reason unchanged.  Override for providers
-        with different stop reason vocabularies.
-        """
-        return raw_reason
@@ -1,154 +0,0 @@
-"""AWS Bedrock Converse API transport.
-
-Delegates to the existing adapter functions in agent/bedrock_adapter.py.
-Bedrock uses its own boto3 client (not the OpenAI SDK), so the transport
-owns format conversion and normalization, while client construction and
-boto3 calls stay on AIAgent.
-"""
-
-from typing import Any, Dict, List, Optional
-
-from agent.transports.base import ProviderTransport
-from agent.transports.types import NormalizedResponse, ToolCall, Usage
-
-
-class BedrockTransport(ProviderTransport):
-    """Transport for api_mode='bedrock_converse'."""
-
-    @property
-    def api_mode(self) -> str:
-        return "bedrock_converse"
-
-    def convert_messages(self, messages: List[Dict[str, Any]], **kwargs) -> Any:
-        """Convert OpenAI messages to Bedrock Converse format."""
-        from agent.bedrock_adapter import convert_messages_to_converse
-        return convert_messages_to_converse(messages)
-
-    def convert_tools(self, tools: List[Dict[str, Any]]) -> Any:
-        """Convert OpenAI tool schemas to Bedrock Converse toolConfig."""
-        from agent.bedrock_adapter import convert_tools_to_converse
-        return convert_tools_to_converse(tools)
-
-    def build_kwargs(
-        self,
-        model: str,
-        messages: List[Dict[str, Any]],
-        tools: Optional[List[Dict[str, Any]]] = None,
-        **params,
-    ) -> Dict[str, Any]:
-        """Build Bedrock converse() kwargs.
-
-        Calls convert_messages and convert_tools internally.
-
-        params:
-            max_tokens: int — output token limit (default 4096)
-            temperature: float | None
-            guardrail_config: dict | None — Bedrock guardrails
-            region: str — AWS region (default 'us-east-1')
-        """
-        from agent.bedrock_adapter import build_converse_kwargs
-
-        region = params.get("region", "us-east-1")
-        guardrail = params.get("guardrail_config")
-
-        kwargs = build_converse_kwargs(
-            model=model,
-            messages=messages,
-            tools=tools,
-            max_tokens=params.get("max_tokens", 4096),
-            temperature=params.get("temperature"),
-            guardrail_config=guardrail,
-        )
-        # Sentinel keys for dispatch — agent pops these before the boto3 call
-        kwargs["__bedrock_converse__"] = True
-        kwargs["__bedrock_region__"] = region
-        return kwargs
-
-    def normalize_response(self, response: Any, **kwargs) -> NormalizedResponse:
-        """Normalize Bedrock response to NormalizedResponse.
-
-        Handles two shapes:
-        1. Raw boto3 dict (from direct converse() calls)
-        2. Already-normalized SimpleNamespace with .choices (from dispatch site)
-        """
-        from agent.bedrock_adapter import normalize_converse_response
-
-        # Normalize to OpenAI-compatible SimpleNamespace
-        if hasattr(response, "choices") and response.choices:
-            # Already normalized at dispatch site
-            ns = response
-        else:
-            # Raw boto3 dict
-            ns = normalize_converse_response(response)
-
-        choice = ns.choices[0]
-        msg = choice.message
-        finish_reason = choice.finish_reason or "stop"
-
-        tool_calls = None
-        if msg.tool_calls:
-            tool_calls = [
-                ToolCall(
-                    id=tc.id,
-                    name=tc.function.name,
-                    arguments=tc.function.arguments,
-                )
-                for tc in msg.tool_calls
-            ]
-
-        usage = None
-        if hasattr(ns, "usage") and ns.usage:
-            u = ns.usage
-            usage = Usage(
-                prompt_tokens=getattr(u, "prompt_tokens", 0) or 0,
-                completion_tokens=getattr(u, "completion_tokens", 0) or 0,
-                total_tokens=getattr(u, "total_tokens", 0) or 0,
-            )
-
-        reasoning = getattr(msg, "reasoning", None) or getattr(msg, "reasoning_content", None)
-
-        return NormalizedResponse(
-            content=msg.content,
-            tool_calls=tool_calls,
-            finish_reason=finish_reason,
-            reasoning=reasoning,
-            usage=usage,
-        )
-
-    def validate_response(self, response: Any) -> bool:
-        """Check Bedrock response structure.
-
-        After normalize_converse_response, the response has OpenAI-compatible
-        .choices — same check as chat_completions.
-        """
-        if response is None:
-            return False
-        # Raw Bedrock dict response — check for 'output' key
-        if isinstance(response, dict):
-            return "output" in response
-        # Already-normalized SimpleNamespace
-        if hasattr(response, "choices"):
-            return bool(response.choices)
-        return False
-
-    def map_finish_reason(self, raw_reason: str) -> str:
-        """Map Bedrock stop reason to OpenAI finish_reason.
-
-        The adapter already does this mapping inside normalize_converse_response,
-        so this is only used for direct access to raw responses.
-        """
-        _MAP = {
-            "end_turn": "stop",
-            "tool_use": "tool_calls",
-            "max_tokens": "length",
-            "stop_sequence": "stop",
-            "guardrail_intervened": "content_filter",
-            "content_filtered": "content_filter",
-        }
-        return _MAP.get(raw_reason, "stop")
-
-
-# Auto-register on import
-from agent.transports import register_transport  # noqa: E402
-
-register_transport("bedrock_converse", BedrockTransport)
@@ -1,387 +0,0 @@
-"""OpenAI Chat Completions transport.
-
-Handles the default api_mode ('chat_completions') used by ~16 OpenAI-compatible
-providers (OpenRouter, Nous, NVIDIA, Qwen, Ollama, DeepSeek, xAI, Kimi, etc.).
-
-Messages and tools are already in OpenAI format — convert_messages and
-convert_tools are near-identity.  The complexity lives in build_kwargs
-which has provider-specific conditionals for max_tokens defaults,
-reasoning configuration, temperature handling, and extra_body assembly.
-"""
-
-import copy
-from typing import Any, Dict, List, Optional
-
-from agent.prompt_builder import DEVELOPER_ROLE_MODELS
-from agent.transports.base import ProviderTransport
-from agent.transports.types import NormalizedResponse, ToolCall, Usage
-
-
-class ChatCompletionsTransport(ProviderTransport):
-    """Transport for api_mode='chat_completions'.
-
-    The default path for OpenAI-compatible providers.
-    """
-
-    @property
-    def api_mode(self) -> str:
-        return "chat_completions"
-
-    def convert_messages(self, messages: List[Dict[str, Any]], **kwargs) -> List[Dict[str, Any]]:
-        """Messages are already in OpenAI format — sanitize Codex leaks only.
-
-        Strips Codex Responses API fields (``codex_reasoning_items`` on the
-        message, ``call_id``/``response_item_id`` on tool_calls) that strict
-        chat-completions providers reject with 400/422.
-        """
-        needs_sanitize = False
-        for msg in messages:
-            if not isinstance(msg, dict):
-                continue
-            if "codex_reasoning_items" in msg:
-                needs_sanitize = True
-                break
-            tool_calls = msg.get("tool_calls")
-            if isinstance(tool_calls, list):
-                for tc in tool_calls:
-                    if isinstance(tc, dict) and ("call_id" in tc or "response_item_id" in tc):
-                        needs_sanitize = True
-                        break
-                if needs_sanitize:
-                    break
-
-        if not needs_sanitize:
-            return messages
-
-        sanitized = copy.deepcopy(messages)
-        for msg in sanitized:
-            if not isinstance(msg, dict):
-                continue
-            msg.pop("codex_reasoning_items", None)
-            tool_calls = msg.get("tool_calls")
-            if isinstance(tool_calls, list):
-                for tc in tool_calls:
-                    if isinstance(tc, dict):
-                        tc.pop("call_id", None)
-                        tc.pop("response_item_id", None)
-        return sanitized
-
-    def convert_tools(self, tools: List[Dict[str, Any]]) -> List[Dict[str, Any]]:
-        """Tools are already in OpenAI format — identity."""
-        return tools
-
-    def build_kwargs(
-        self,
-        model: str,
-        messages: List[Dict[str, Any]],
-        tools: Optional[List[Dict[str, Any]]] = None,
-        **params,
-    ) -> Dict[str, Any]:
-        """Build chat.completions.create() kwargs.
-
-        This is the most complex transport method — it handles ~16 providers
-        via params rather than subclasses.
-
-        params:
-            timeout: float — API call timeout
-            max_tokens: int | None — user-configured max tokens
-            ephemeral_max_output_tokens: int | None — one-shot override (error recovery)
-            max_tokens_param_fn: callable — returns {max_tokens: N} or {max_completion_tokens: N}
-            reasoning_config: dict | None
-            request_overrides: dict | None
-            session_id: str | None
-            qwen_session_metadata: dict | None — {sessionId, promptId} precomputed
-            model_lower: str — lowercase model name for pattern matching
-            # Provider detection flags (all optional, default False)
-            is_openrouter: bool
-            is_nous: bool
-            is_qwen_portal: bool
-            is_github_models: bool
-            is_nvidia_nim: bool
-            is_kimi: bool
-            is_custom_provider: bool
-            ollama_num_ctx: int | None
-            # Provider routing
-            provider_preferences: dict | None
-            # Qwen-specific
-            qwen_prepare_fn: callable | None — runs AFTER codex sanitization
-            qwen_prepare_inplace_fn: callable | None — in-place variant for deepcopied lists
-            # Temperature
-            fixed_temperature: Any — from _fixed_temperature_for_model()
-            omit_temperature: bool
-            # Reasoning
-            supports_reasoning: bool
-            github_reasoning_extra: dict | None
-            # Claude on OpenRouter/Nous max output
-            anthropic_max_output: int | None
-            # Extra
-            extra_body_additions: dict | None — pre-built extra_body entries
-        """
-        # Codex sanitization: drop reasoning_items / call_id / response_item_id
-        sanitized = self.convert_messages(messages)
-
-        # Qwen portal prep AFTER codex sanitization.  If sanitize already
-        # deepcopied, reuse that copy via the in-place variant to avoid a
-        # second deepcopy.
-        is_qwen = params.get("is_qwen_portal", False)
-        if is_qwen:
-            qwen_prep = params.get("qwen_prepare_fn")
-            qwen_prep_inplace = params.get("qwen_prepare_inplace_fn")
-            if sanitized is messages:
-                if qwen_prep is not None:
-                    sanitized = qwen_prep(sanitized)
-            else:
-                # Already deepcopied — transform in place
-                if qwen_prep_inplace is not None:
-                    qwen_prep_inplace(sanitized)
-                elif qwen_prep is not None:
-                    sanitized = qwen_prep(sanitized)
-
-        # Developer role swap for GPT-5/Codex models
-        model_lower = params.get("model_lower", (model or "").lower())
-        if (
-            sanitized
-            and isinstance(sanitized[0], dict)
-            and sanitized[0].get("role") == "system"
-            and any(p in model_lower for p in DEVELOPER_ROLE_MODELS)
-        ):
-            sanitized = list(sanitized)
-            sanitized[0] = {**sanitized[0], "role": "developer"}
-
-        api_kwargs: Dict[str, Any] = {
-            "model": model,
-            "messages": sanitized,
-        }
-
-        timeout = params.get("timeout")
-        if timeout is not None:
-            api_kwargs["timeout"] = timeout
-
-        # Temperature
-        fixed_temp = params.get("fixed_temperature")
-        omit_temp = params.get("omit_temperature", False)
-        if omit_temp:
-            api_kwargs.pop("temperature", None)
-        elif fixed_temp is not None:
-            api_kwargs["temperature"] = fixed_temp
-
-        # Qwen metadata (caller precomputes {sessionId, promptId})
-        qwen_meta = params.get("qwen_session_metadata")
-        if qwen_meta and is_qwen:
-            api_kwargs["metadata"] = qwen_meta
-
-        # Tools
-        if tools:
-            api_kwargs["tools"] = tools
-
-        # max_tokens resolution — priority: ephemeral > user > provider default
-        max_tokens_fn = params.get("max_tokens_param_fn")
-        ephemeral = params.get("ephemeral_max_output_tokens")
-        max_tokens = params.get("max_tokens")
-        anthropic_max_out = params.get("anthropic_max_output")
-        is_nvidia_nim = params.get("is_nvidia_nim", False)
-        is_kimi = params.get("is_kimi", False)
-        reasoning_config = params.get("reasoning_config")
-
-        if ephemeral is not None and max_tokens_fn:
-            api_kwargs.update(max_tokens_fn(ephemeral))
-        elif max_tokens is not None and max_tokens_fn:
-            api_kwargs.update(max_tokens_fn(max_tokens))
-        elif is_nvidia_nim and max_tokens_fn:
-            api_kwargs.update(max_tokens_fn(16384))
-        elif is_qwen and max_tokens_fn:
-            api_kwargs.update(max_tokens_fn(65536))
-        elif is_kimi and max_tokens_fn:
-            # Kimi/Moonshot: 32000 matches Kimi CLI's default
-            api_kwargs.update(max_tokens_fn(32000))
-        elif anthropic_max_out is not None:
-            api_kwargs["max_tokens"] = anthropic_max_out
-
-        # Kimi: top-level reasoning_effort (unless thinking disabled)
-        if is_kimi:
-            _kimi_thinking_off = bool(
-                reasoning_config
-                and isinstance(reasoning_config, dict)
-                and reasoning_config.get("enabled") is False
-            )
-            if not _kimi_thinking_off:
-                _kimi_effort = "medium"
-                if reasoning_config and isinstance(reasoning_config, dict):
-                    _e = (reasoning_config.get("effort") or "").strip().lower()
-                    if _e in ("low", "medium", "high"):
-                        _kimi_effort = _e
-                api_kwargs["reasoning_effort"] = _kimi_effort
-
-        # extra_body assembly
-        extra_body: Dict[str, Any] = {}
-
-        is_openrouter = params.get("is_openrouter", False)
-        is_nous = params.get("is_nous", False)
-        is_github_models = params.get("is_github_models", False)
-
-        provider_prefs = params.get("provider_preferences")
-        if provider_prefs and is_openrouter:
-            extra_body["provider"] = provider_prefs
-
-        # Kimi extra_body.thinking
-        if is_kimi:
-            _kimi_thinking_enabled = True
-            if reasoning_config and isinstance(reasoning_config, dict):
-                if reasoning_config.get("enabled") is False:
-                    _kimi_thinking_enabled = False
-            extra_body["thinking"] = {
-                "type": "enabled" if _kimi_thinking_enabled else "disabled",
-            }
-
-        # Reasoning
-        if params.get("supports_reasoning", False):
-            if is_github_models:
-                gh_reasoning = params.get("github_reasoning_extra")
-                if gh_reasoning is not None:
-                    extra_body["reasoning"] = gh_reasoning
-            else:
-                if reasoning_config is not None:
-                    rc = dict(reasoning_config)
-                    if is_nous and rc.get("enabled") is False:
-                        pass  # omit for Nous when disabled
-                    else:
-                        extra_body["reasoning"] = rc
-                else:
-                    extra_body["reasoning"] = {"enabled": True, "effort": "medium"}
-
-        if is_nous:
-            extra_body["tags"] = ["product=hermes-agent"]
-
-        # Ollama num_ctx
-        ollama_ctx = params.get("ollama_num_ctx")
-        if ollama_ctx:
-            options = extra_body.get("options", {})
-            options["num_ctx"] = ollama_ctx
-            extra_body["options"] = options
-
-        # Ollama/custom think=false
-        if params.get("is_custom_provider", False):
-            if reasoning_config and isinstance(reasoning_config, dict):
-                _effort = (reasoning_config.get("effort") or "").strip().lower()
-                _enabled = reasoning_config.get("enabled", True)
-                if _effort == "none" or _enabled is False:
-                    extra_body["think"] = False
-
-        if is_qwen:
-            extra_body["vl_high_resolution_images"] = True
-
-        # Merge any pre-built extra_body additions
-        additions = params.get("extra_body_additions")
-        if additions:
-            extra_body.update(additions)
-
-        if extra_body:
-            api_kwargs["extra_body"] = extra_body
-
-        # Request overrides last (service_tier etc.)
-        overrides = params.get("request_overrides")
-        if overrides:
-            api_kwargs.update(overrides)
-
-        return api_kwargs
-
-    def normalize_response(self, response: Any, **kwargs) -> NormalizedResponse:
-        """Normalize OpenAI ChatCompletion to NormalizedResponse.
-
-        For chat_completions, this is near-identity — the response is already
-        in OpenAI format.  extra_content on tool_calls (Gemini thought_signature)
-        is preserved via ToolCall.provider_data.  reasoning_details (OpenRouter
-        unified format) and reasoning_content (DeepSeek/Moonshot) are also
-        preserved for downstream replay.
-        """
-        choice = response.choices[0]
-        msg = choice.message
-        finish_reason = choice.finish_reason or "stop"
-
-        tool_calls = None
-        if msg.tool_calls:
-            tool_calls = []
-            for tc in msg.tool_calls:
-                # Preserve provider-specific extras on the tool call.
-                # Gemini 3 thinking models attach extra_content with
-                # thought_signature — without replay on the next turn the API
-                # rejects the request with 400.
-                tc_provider_data: Dict[str, Any] = {}
-                extra = getattr(tc, "extra_content", None)
-                if extra is None and hasattr(tc, "model_extra"):
-                    extra = (tc.model_extra or {}).get("extra_content")
-                if extra is not None:
-                    if hasattr(extra, "model_dump"):
-                        try:
-                            extra = extra.model_dump()
-                        except Exception:
-                            pass
-                    tc_provider_data["extra_content"] = extra
-                tool_calls.append(ToolCall(
-                    id=tc.id,
-                    name=tc.function.name,
-                    arguments=tc.function.arguments,
-                    provider_data=tc_provider_data or None,
-                ))
-
-        usage = None
-        if hasattr(response, "usage") and response.usage:
-            u = response.usage
-            usage = Usage(
-                prompt_tokens=getattr(u, "prompt_tokens", 0) or 0,
-                completion_tokens=getattr(u, "completion_tokens", 0) or 0,
-                total_tokens=getattr(u, "total_tokens", 0) or 0,
-            )
-
-        # Preserve reasoning fields separately.  DeepSeek/Moonshot use
-        # ``reasoning_content``; others use ``reasoning``.  Downstream code
-        # (_extract_reasoning, thinking-prefill retry) reads both distinctly,
-        # so keep them apart in provider_data rather than merging.
-        reasoning = getattr(msg, "reasoning", None)
-        reasoning_content = getattr(msg, "reasoning_content", None)
-
-        provider_data: Dict[str, Any] = {}
-        if reasoning_content:
-            provider_data["reasoning_content"] = reasoning_content
-        rd = getattr(msg, "reasoning_details", None)
-        if rd:
-            provider_data["reasoning_details"] = rd
-
-        return NormalizedResponse(
-            content=msg.content,
-            tool_calls=tool_calls,
-            finish_reason=finish_reason,
-            reasoning=reasoning,
-            usage=usage,
-            provider_data=provider_data or None,
-        )
-
-    def validate_response(self, response: Any) -> bool:
-        """Check that response has valid choices."""
-        if response is None:
-            return False
-        if not hasattr(response, "choices") or response.choices is None:
-            return False
-        if not response.choices:
-            return False
-        return True
-
-    def extract_cache_stats(self, response: Any) -> Optional[Dict[str, int]]:
-        """Extract OpenRouter/OpenAI cache stats from prompt_tokens_details."""
-        usage = getattr(response, "usage", None)
-        if usage is None:
-            return None
-        details = getattr(usage, "prompt_tokens_details", None)
-        if details is None:
-            return None
-        cached = getattr(details, "cached_tokens", 0) or 0
-        written = getattr(details, "cache_write_tokens", 0) or 0
-        if cached or written:
-            return {"cached_tokens": cached, "creation_tokens": written}
-        return None
-
-
-# Auto-register on import
-from agent.transports import register_transport  # noqa: E402
-
-register_transport("chat_completions", ChatCompletionsTransport)
@@ -1,217 +0,0 @@
-"""OpenAI Responses API (Codex) transport.
-
-Delegates to the existing adapter functions in agent/codex_responses_adapter.py.
-This transport owns format conversion and normalization — NOT client lifecycle,
-streaming, or the _run_codex_stream() call path.
-"""
-
-from typing import Any, Dict, List, Optional
-
-from agent.transports.base import ProviderTransport
-from agent.transports.types import NormalizedResponse, ToolCall, Usage
-
-
-class ResponsesApiTransport(ProviderTransport):
-    """Transport for api_mode='codex_responses'.
-
-    Wraps the functions extracted into codex_responses_adapter.py (PR 1).
-    """
-
-    @property
-    def api_mode(self) -> str:
-        return "codex_responses"
-
-    def convert_messages(self, messages: List[Dict[str, Any]], **kwargs) -> Any:
-        """Convert OpenAI chat messages to Responses API input items."""
-        from agent.codex_responses_adapter import _chat_messages_to_responses_input
-        return _chat_messages_to_responses_input(messages)
-
-    def convert_tools(self, tools: List[Dict[str, Any]]) -> Any:
-        """Convert OpenAI tool schemas to Responses API function definitions."""
-        from agent.codex_responses_adapter import _responses_tools
-        return _responses_tools(tools)
-
-    def build_kwargs(
-        self,
-        model: str,
-        messages: List[Dict[str, Any]],
-        tools: Optional[List[Dict[str, Any]]] = None,
-        **params,
-    ) -> Dict[str, Any]:
-        """Build Responses API kwargs.
-
-        Calls convert_messages and convert_tools internally.
-
-        params:
-            instructions: str — system prompt (extracted from messages[0] if not given)
-            reasoning_config: dict | None — {effort, enabled}
-            session_id: str | None — used for prompt_cache_key + xAI conv header
-            max_tokens: int | None — max_output_tokens
-            request_overrides: dict | None — extra kwargs merged in
-            provider: str | None — provider name for backend-specific logic
-            base_url: str | None — endpoint URL
-            base_url_hostname: str | None — hostname for backend detection
-            is_github_responses: bool — Copilot/GitHub models backend
-            is_codex_backend: bool — chatgpt.com/backend-api/codex
-            is_xai_responses: bool — xAI/Grok backend
-            github_reasoning_extra: dict | None — Copilot reasoning params
-        """
-        from agent.codex_responses_adapter import (
-            _chat_messages_to_responses_input,
-            _responses_tools,
-        )
-
-        from run_agent import DEFAULT_AGENT_IDENTITY
-
-        instructions = params.get("instructions", "")
-        payload_messages = messages
-        if not instructions:
-            if messages and messages[0].get("role") == "system":
-                instructions = str(messages[0].get("content") or "").strip()
-                payload_messages = messages[1:]
-        if not instructions:
-            instructions = DEFAULT_AGENT_IDENTITY
-
-        is_github_responses = params.get("is_github_responses", False)
-        is_codex_backend = params.get("is_codex_backend", False)
-        is_xai_responses = params.get("is_xai_responses", False)
-
-        # Resolve reasoning effort
-        reasoning_effort = "medium"
-        reasoning_enabled = True
-        reasoning_config = params.get("reasoning_config")
-        if reasoning_config and isinstance(reasoning_config, dict):
-            if reasoning_config.get("enabled") is False:
-                reasoning_enabled = False
-            elif reasoning_config.get("effort"):
-                reasoning_effort = reasoning_config["effort"]
-
-        _effort_clamp = {"minimal": "low"}
-        reasoning_effort = _effort_clamp.get(reasoning_effort, reasoning_effort)
-
-        kwargs = {
-            "model": model,
-            "instructions": instructions,
-            "input": _chat_messages_to_responses_input(payload_messages),
-            "tools": _responses_tools(tools),
-            "tool_choice": "auto",
-            "parallel_tool_calls": True,
-            "store": False,
-        }
-
-        session_id = params.get("session_id")
-        if not is_github_responses and session_id:
-            kwargs["prompt_cache_key"] = session_id
-
-        if reasoning_enabled and is_xai_responses:
-            kwargs["include"] = ["reasoning.encrypted_content"]
-        elif reasoning_enabled:
-            if is_github_responses:
-                github_reasoning = params.get("github_reasoning_extra")
-                if github_reasoning is not None:
-                    kwargs["reasoning"] = github_reasoning
-            else:
-                kwargs["reasoning"] = {"effort": reasoning_effort, "summary": "auto"}
-                kwargs["include"] = ["reasoning.encrypted_content"]
-        elif not is_github_responses and not is_xai_responses:
-            kwargs["include"] = []
-
-        request_overrides = params.get("request_overrides")
-        if request_overrides:
-            kwargs.update(request_overrides)
-
-        max_tokens = params.get("max_tokens")
-        if max_tokens is not None and not is_codex_backend:
-            kwargs["max_output_tokens"] = max_tokens
-
-        if is_xai_responses and session_id:
-            kwargs["extra_headers"] = {"x-grok-conv-id": session_id}
-
-        return kwargs
-
-    def normalize_response(self, response: Any, **kwargs) -> NormalizedResponse:
-        """Normalize Codex Responses API response to NormalizedResponse."""
-        from agent.codex_responses_adapter import (
-            _normalize_codex_response,
-            _extract_responses_message_text,
-            _extract_responses_reasoning_text,
-        )
-
-        # _normalize_codex_response returns (SimpleNamespace, finish_reason_str)
-        msg, finish_reason = _normalize_codex_response(response)
-
-        tool_calls = None
-        if msg and msg.tool_calls:
-            tool_calls = []
-            for tc in msg.tool_calls:
-                provider_data = {}
-                if hasattr(tc, "call_id") and tc.call_id:
-                    provider_data["call_id"] = tc.call_id
-                if hasattr(tc, "response_item_id") and tc.response_item_id:
-                    provider_data["response_item_id"] = tc.response_item_id
-                tool_calls.append(ToolCall(
-                    id=tc.id if hasattr(tc, "id") else (tc.function.name if hasattr(tc, "function") else None),
-                    name=tc.function.name if hasattr(tc, "function") else getattr(tc, "name", ""),
-                    arguments=tc.function.arguments if hasattr(tc, "function") else getattr(tc, "arguments", "{}"),
-                    provider_data=provider_data or None,
-                ))
-
-        # Extract reasoning items for provider_data
-        provider_data = {}
-        if msg and hasattr(msg, "codex_reasoning_items") and msg.codex_reasoning_items:
-            provider_data["codex_reasoning_items"] = msg.codex_reasoning_items
-        if msg and hasattr(msg, "reasoning_details") and msg.reasoning_details:
-            provider_data["reasoning_details"] = msg.reasoning_details
-
-        return NormalizedResponse(
-            content=msg.content if msg else None,
-            tool_calls=tool_calls,
-            finish_reason=finish_reason or "stop",
-            reasoning=msg.reasoning if msg and hasattr(msg, "reasoning") else None,
-            usage=None,  # Codex usage is extracted separately in normalize_usage()
-            provider_data=provider_data or None,
-        )
-
-    def validate_response(self, response: Any) -> bool:
-        """Check Codex Responses API response has valid output structure.
-
-        Returns True only if response.output is a non-empty list.
-        Does NOT check output_text fallback — the caller handles that
-        with diagnostic logging for stream backfill recovery.
-        """
-        if response is None:
-            return False
-        output = getattr(response, "output", None)
-        if not isinstance(output, list) or not output:
-            return False
-        return True
-
-    def preflight_kwargs(self, api_kwargs: Any, *, allow_stream: bool = False) -> dict:
-        """Validate and sanitize Codex API kwargs before the call.
-
-        Normalizes input items, strips unsupported fields, validates structure.
-        """
-        from agent.codex_responses_adapter import _preflight_codex_api_kwargs
-        return _preflight_codex_api_kwargs(api_kwargs, allow_stream=allow_stream)
-
-    def map_finish_reason(self, raw_reason: str) -> str:
-        """Map Codex response.status to OpenAI finish_reason.
-
-        Codex uses response.status ('completed', 'incomplete') +
-        response.incomplete_details.reason for granular mapping.
-        This method handles the simple status string; the caller
-        should check incomplete_details separately for 'max_output_tokens'.
-        """
-        _MAP = {
-            "completed": "stop",
-            "incomplete": "length",
-            "failed": "stop",
-            "cancelled": "stop",
-        }
-        return _MAP.get(raw_reason, "stop")
-
-
-# Auto-register on import
-from agent.transports import register_transport  # noqa: E402
-
-register_transport("codex_responses", ResponsesApiTransport)
@@ -1,142 +0,0 @@
-"""Shared types for normalized provider responses.
-
-These dataclasses define the canonical shape that all provider adapters
-normalize responses to.  The shared surface is intentionally minimal —
-only fields that every downstream consumer reads are top-level.
-Protocol-specific state goes in ``provider_data`` dicts (response-level
-and per-tool-call) so that protocol-aware code paths can access it
-without polluting the shared type.
-"""
-
-from __future__ import annotations
-
-import json
-from dataclasses import dataclass, field
-from typing import Any, Dict, List, Optional
-
-
-@dataclass
-class ToolCall:
-    """A normalized tool call from any provider.
-
-    ``id`` is the protocol's canonical identifier — what gets used in
-    ``tool_call_id`` / ``tool_use_id`` when constructing tool result
-    messages.  May be ``None`` when the provider omits it; the agent
-    fills it via ``_deterministic_call_id()`` before storing in history.
-
-    ``provider_data`` carries per-tool-call protocol metadata that only
-    protocol-aware code reads:
-
-    * Codex: ``{"call_id": "call_XXX", "response_item_id": "fc_XXX"}``
-    * Gemini: ``{"extra_content": {"google": {"thought_signature": "..."}}}``
-    * Others: ``None``
-    """
-
-    id: Optional[str]
-    name: str
-    arguments: str  # JSON string
-    provider_data: Optional[Dict[str, Any]] = field(default=None, repr=False)
-
-    # ── Backward compatibility ──────────────────────────────────
-    # The agent loop reads tc.function.name / tc.function.arguments
-    # throughout run_agent.py (45+ sites).  These properties let
-    # NormalizedResponse pass through without the _nr_to_assistant_message
-    # shim, while keeping ToolCall's canonical fields flat.
-    @property
-    def type(self) -> str:
-        return "function"
-
-    @property
-    def function(self) -> "ToolCall":
-        """Return self so tc.function.name / tc.function.arguments work."""
-        return self
-
-    @property
-    def call_id(self) -> Optional[str]:
-        """Codex call_id from provider_data, accessed via getattr by _build_assistant_message."""
-        return (self.provider_data or {}).get("call_id")
-
-    @property
-    def response_item_id(self) -> Optional[str]:
-        """Codex response_item_id from provider_data."""
-        return (self.provider_data or {}).get("response_item_id")
-
-
-@dataclass
-class Usage:
-    """Token usage from an API response."""
-
-    prompt_tokens: int = 0
-    completion_tokens: int = 0
-    total_tokens: int = 0
-    cached_tokens: int = 0
-
-
-@dataclass
-class NormalizedResponse:
-    """Normalized API response from any provider.
-
-    Shared fields are truly cross-provider — every caller can rely on
-    them without branching on api_mode.  Protocol-specific state goes in
-    ``provider_data`` so that only protocol-aware code paths read it.
-
-    Response-level ``provider_data`` examples:
-
-    * Anthropic: ``{"reasoning_details": [...]}``
-    * Codex: ``{"codex_reasoning_items": [...]}``
-    * Others: ``None``
-    """
-
-    content: Optional[str]
-    tool_calls: Optional[List[ToolCall]]
-    finish_reason: str  # "stop", "tool_calls", "length", "content_filter"
-    reasoning: Optional[str] = None
-    usage: Optional[Usage] = None
-    provider_data: Optional[Dict[str, Any]] = field(default=None, repr=False)
-
-    # ── Backward compatibility ──────────────────────────────────
-    # The shim _nr_to_assistant_message() mapped these from provider_data.
-    # These properties let NormalizedResponse pass through directly.
-    @property
-    def reasoning_content(self) -> Optional[str]:
-        pd = self.provider_data or {}
-        return pd.get("reasoning_content")
-
-    @property
-    def reasoning_details(self):
-        pd = self.provider_data or {}
-        return pd.get("reasoning_details")
-
-    @property
-    def codex_reasoning_items(self):
-        pd = self.provider_data or {}
-        return pd.get("codex_reasoning_items")
-
-
-# ---------------------------------------------------------------------------
-# Factory helpers
-# ---------------------------------------------------------------------------
-
-def build_tool_call(
-    id: Optional[str],
-    name: str,
-    arguments: Any,
-    **provider_fields: Any,
-) -> ToolCall:
-    """Build a ``ToolCall``, auto-serialising *arguments* if it's a dict.
-
-    Any extra keyword arguments are collected into ``provider_data``.
-    """
-    args_str = json.dumps(arguments) if isinstance(arguments, dict) else str(arguments)
-    pd = dict(provider_fields) if provider_fields else None
-    return ToolCall(id=id, name=name, arguments=args_str, provider_data=pd)
-
-
-def map_finish_reason(reason: Optional[str], mapping: Dict[str, str]) -> str:
-    """Translate a provider-specific stop reason to the normalised set.
-
-    Falls back to ``"stop"`` for unknown or ``None`` reasons.
-    """
-    if reason is None:
-        return "stop"
-    return mapping.get(reason, "stop")
@@ -6,7 +6,6 @@ from decimal import Decimal
 from typing import Any, Dict, Literal, Optional

 from agent.model_metadata import fetch_endpoint_model_metadata, fetch_model_metadata
-from utils import base_url_host_matches

 DEFAULT_PRICING = {"input": 0.0, "output": 0.0}

@@ -394,7 +393,7 @@ def resolve_billing_route(

    if provider_name == "openai-codex":
        return BillingRoute(provider="openai-codex", model=model, base_url=base_url or "", billing_mode="subscription_included")
-    if provider_name == "openrouter" or base_url_host_matches(base_url or "", "openrouter.ai"):
+    if provider_name == "openrouter" or "openrouter.ai" in base:
        return BillingRoute(provider="openrouter", model=model, base_url=base_url or "", billing_mode="official_models_api")
    if provider_name == "anthropic":
        return BillingRoute(provider="anthropic", model=model.split("/")[-1], base_url=base_url or "", billing_mode="official_docs_snapshot")
@@ -533,22 +532,10 @@ def normalize_usage(
        prompt_total = _to_int(getattr(response_usage, "prompt_tokens", 0))
        output_tokens = _to_int(getattr(response_usage, "completion_tokens", 0))
        details = getattr(response_usage, "prompt_tokens_details", None)
-        # Primary: OpenAI-style prompt_tokens_details. Fallback: Anthropic-style
-        # top-level fields that some OpenAI-compatible proxies (OpenRouter, Vercel
-        # AI Gateway, Cline) expose when routing Claude models — without this
-        # fallback, cache writes are undercounted as 0 and cache reads can be
-        # missed when the proxy only surfaces them at the top level.
-        # Port of cline/cline#10266.
        cache_read_tokens = _to_int(getattr(details, "cached_tokens", 0) if details else 0)
-        if not cache_read_tokens:
-            cache_read_tokens = _to_int(getattr(response_usage, "cache_read_input_tokens", 0))
        cache_write_tokens = _to_int(
            getattr(details, "cache_write_tokens", 0) if details else 0
        )
-        if not cache_write_tokens:
-            cache_write_tokens = _to_int(
-                getattr(response_usage, "cache_creation_input_tokens", 0)
-            )
        input_tokens = max(0, prompt_total - cache_read_tokens - cache_write_tokens)

    reasoning_tokens = 0
@@ -20,13 +20,9 @@ Usage:
    python batch_runner.py --dataset_file=data.jsonl --batch_size=10 --run_name=my_run --distribution=image_gen
 """

-import os
-import sys
-
-sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
-
 import json
 import logging
+import os
 import time
 from pathlib import Path
 from typing import List, Dict, Any, Optional, Tuple
@@ -448,7 +444,6 @@ def _process_batch_worker(args: Tuple) -> Dict[str, Any]:
            if not reasoning.get("has_any_reasoning", True):
                print(f"   🚫 Prompt {prompt_index} discarded (no reasoning in any turn)")
                discarded_no_reasoning += 1
-                completed_in_batch.append(prompt_index)
                continue
            
            # Get and normalize tool stats for consistent schema across all entries
@@ -1130,7 +1125,7 @@ def main(
    num_workers: int = 4,
    resume: bool = False,
    verbose: bool = False,
-    show_distributions: bool = False,
+    list_distributions: bool = False,
    ephemeral_system_prompt: str = None,
    log_prefix_chars: int = 100,
    providers_allowed: str = None,
@@ -1158,7 +1153,7 @@ def main(
        num_workers (int): Number of parallel worker processes (default: 4)
        resume (bool): Resume from checkpoint if run was interrupted (default: False)
        verbose (bool): Enable verbose logging (default: False)
-        show_distributions (bool): List available toolset distributions and exit
+        list_distributions (bool): List available toolset distributions and exit
        ephemeral_system_prompt (str): System prompt used during agent execution but NOT saved to trajectories (optional)
        log_prefix_chars (int): Number of characters to show in log previews for tool calls/responses (default: 20)
        providers_allowed (str): Comma-separated list of OpenRouter providers to allow (e.g. "anthropic,openai")
@@ -1190,16 +1185,16 @@ def main(
                               --prefill_messages_file=configs/prefill_opus.json
        
        # List available distributions
-        python batch_runner.py --show_distributions
+        python batch_runner.py --list_distributions
    """
    # Handle list distributions
-    if show_distributions:
-        from toolset_distributions import print_distribution_info
-
+    if list_distributions:
+        from toolset_distributions import list_distributions as get_all_dists, print_distribution_info
+        
        print("📊 Available Toolset Distributions")
        print("=" * 70)
-
-        all_dists = list_distributions()
+        
+        all_dists = get_all_dists()
        for dist_name in sorted(all_dists.keys()):
            print_distribution_info(dist_name)
        
@@ -770,13 +770,10 @@ code_execution:
 # Subagent Delegation
 # =============================================================================
 # The delegate_task tool spawns child agents with isolated context.
-# Supports single tasks and batch mode (default 3 parallel, configurable).
+# Supports single tasks and batch mode (up to 3 parallel).
 delegation:
  max_iterations: 50                          # Max tool-calling turns per child (default: 50)
-  # max_concurrent_children: 3                # Max parallel child agents (default: 3)
-  # max_spawn_depth: 1                        # Tree depth cap (1-3, default: 1 = flat). Raise to 2 or 3 to allow orchestrator children to spawn their own workers.
-  # orchestrator_enabled: true                # Kill switch for role="orchestrator" children (default: true).
-  # inherit_mcp_toolsets: true                # When explicit child toolsets are narrowed, also keep the parent's MCP toolsets (default: true). Set false for strict intersection.
+  default_toolsets: ["terminal", "file", "web"]  # Default toolsets for subagents
  # model: "google/gemini-3-flash-preview"    # Override model for subagents (empty = inherit parent)
  # provider: "openrouter"                    # Override provider for subagents (empty = inherit parent)
  #                                           # Resolves full credentials (base_url, api_key) automatically.
@@ -920,39 +917,3 @@ display:
 #   # Names and usernames are NOT affected (user-chosen, publicly visible).
 #   # Routing/delivery still uses the original values internally.
 #   redact_pii: false
-
-# =============================================================================
-# Shell-script hooks
-# =============================================================================
-# Register shell scripts as plugin-hook callbacks.  Each entry is executed as
-# a subprocess (shell=False, shlex.split) with a JSON payload on stdin.  On
-# stdout the script may return JSON that either blocks the tool call or
-# injects context into the next LLM call.
-#
-# Valid events (mirror hermes_cli.plugins.VALID_HOOKS):
-#   pre_tool_call, post_tool_call, pre_llm_call, post_llm_call,
-#   pre_api_request, post_api_request, on_session_start, on_session_end,
-#   on_session_finalize, on_session_reset, subagent_stop
-#
-# First-use consent: each (event, command) pair prompts once on a TTY, then
-# is persisted to ~/.hermes/shell-hooks-allowlist.json.  Non-interactive
-# runs (gateway, cron) need --accept-hooks, HERMES_ACCEPT_HOOKS=1, or the
-# hooks_auto_accept key below.
-#
-# See website/docs/user-guide/features/hooks.md for the full JSON wire
-# protocol and worked examples.
-#
-# hooks:
-#   pre_tool_call:
-#     - matcher: "terminal"
-#       command: "~/.hermes/agent-hooks/block-rm-rf.sh"
-#       timeout: 10
-#   post_tool_call:
-#     - matcher: "write_file|patch"
-#       command: "~/.hermes/agent-hooks/auto-format.sh"
-#   pre_llm_call:
-#     - command: "~/.hermes/agent-hooks/inject-cwd-context.sh"
-#   subagent_stop:
-#     - command: "~/.hermes/agent-hooks/log-orchestration.sh"
-#
-# hooks_auto_accept: false
@@ -9,7 +9,6 @@ import copy
 import json
 import logging
 import tempfile
-import threading
 import os
 import re
 import uuid
@@ -35,11 +34,6 @@ except ImportError:
 HERMES_DIR = get_hermes_home().resolve()
 CRON_DIR = HERMES_DIR / "cron"
 JOBS_FILE = CRON_DIR / "jobs.json"
-
-# In-process lock protecting load_jobs→modify→save_jobs cycles.
-# Required when tick() runs jobs in parallel threads — without this,
-# concurrent mark_job_run / advance_next_run calls can clobber each other.
-_jobs_file_lock = threading.Lock()
 OUTPUT_DIR = CRON_DIR / "output"
 ONESHOT_GRACE_SECONDS = 120

@@ -600,44 +594,43 @@ def mark_job_run(job_id: str, success: bool, error: Optional[str] = None,
    ``delivery_error`` is tracked separately from the agent error — a job
    can succeed (agent produced output) but fail delivery (platform down).
    """
-    with _jobs_file_lock:
-        jobs = load_jobs()
-        for i, job in enumerate(jobs):
-            if job["id"] == job_id:
-                now = _hermes_now().isoformat()
-                job["last_run_at"] = now
-                job["last_status"] = "ok" if success else "error"
-                job["last_error"] = error if not success else None
-                # Track delivery failures separately — cleared on successful delivery
-                job["last_delivery_error"] = delivery_error
+    jobs = load_jobs()
+    for i, job in enumerate(jobs):
+        if job["id"] == job_id:
+            now = _hermes_now().isoformat()
+            job["last_run_at"] = now
+            job["last_status"] = "ok" if success else "error"
+            job["last_error"] = error if not success else None
+            # Track delivery failures separately — cleared on successful delivery
+            job["last_delivery_error"] = delivery_error
+            
+            # Increment completed count
+            if job.get("repeat"):
+                job["repeat"]["completed"] = job["repeat"].get("completed", 0) + 1
                
-                # Increment completed count
-                if job.get("repeat"):
-                    job["repeat"]["completed"] = job["repeat"].get("completed", 0) + 1
-                    
-                    # Check if we've hit the repeat limit
-                    times = job["repeat"].get("times")
-                    completed = job["repeat"]["completed"]
-                    if times is not None and times > 0 and completed >= times:
-                        # Remove the job (limit reached)
-                        jobs.pop(i)
-                        save_jobs(jobs)
-                        return
-                
-                # Compute next run
-                job["next_run_at"] = compute_next_run(job["schedule"], now)
+                # Check if we've hit the repeat limit
+                times = job["repeat"].get("times")
+                completed = job["repeat"]["completed"]
+                if times is not None and times > 0 and completed >= times:
+                    # Remove the job (limit reached)
+                    jobs.pop(i)
+                    save_jobs(jobs)
+                    return
+            
+            # Compute next run
+            job["next_run_at"] = compute_next_run(job["schedule"], now)

-                # If no next run (one-shot completed), disable
-                if job["next_run_at"] is None:
-                    job["enabled"] = False
-                    job["state"] = "completed"
-                elif job.get("state") != "paused":
-                    job["state"] = "scheduled"
+            # If no next run (one-shot completed), disable
+            if job["next_run_at"] is None:
+                job["enabled"] = False
+                job["state"] = "completed"
+            elif job.get("state") != "paused":
+                job["state"] = "scheduled"

-                save_jobs(jobs)
-                return
+            save_jobs(jobs)
+            return

-        logger.warning("mark_job_run: job_id %s not found, skipping save", job_id)
+    logger.warning("mark_job_run: job_id %s not found, skipping save", job_id)


 def advance_next_run(job_id: str) -> bool:
@@ -652,21 +645,20 @@ def advance_next_run(job_id: str) -> bool:

    Returns True if next_run_at was advanced, False otherwise.
    """
-    with _jobs_file_lock:
-        jobs = load_jobs()
-        for job in jobs:
-            if job["id"] == job_id:
-                kind = job.get("schedule", {}).get("kind")
-                if kind not in ("cron", "interval"):
-                    return False
-                now = _hermes_now().isoformat()
-                new_next = compute_next_run(job["schedule"], now)
-                if new_next and new_next != job.get("next_run_at"):
-                    job["next_run_at"] = new_next
-                    save_jobs(jobs)
-                    return True
+    jobs = load_jobs()
+    for job in jobs:
+        if job["id"] == job_id:
+            kind = job.get("schedule", {}).get("kind")
+            if kind not in ("cron", "interval"):
                return False
-        return False
+            now = _hermes_now().isoformat()
+            new_next = compute_next_run(job["schedule"], now)
+            if new_next and new_next != job.get("next_run_at"):
+                job["next_run_at"] = new_next
+                save_jobs(jobs)
+                return True
+            return False
+    return False


 def get_due_jobs() -> List[Dict[str, Any]]:
@@ -252,11 +252,7 @@ def _send_media_via_adapter(adapter, chat_id: str, media_files: list, metadata:
                coro = adapter.send_document(chat_id=chat_id, file_path=media_path, metadata=metadata)

            future = asyncio.run_coroutine_threadsafe(coro, loop)
-            try:
-                result = future.result(timeout=30)
-            except TimeoutError:
-                future.cancel()
-                raise
+            result = future.result(timeout=30)
            if result and not getattr(result, "success", True):
                logger.warning(
                    "Job '%s': media send failed for %s: %s",
@@ -386,11 +382,7 @@ def _deliver_result(job: dict, content: str, adapters=None, loop=None) -> Option
                        runtime_adapter.send(chat_id, text_to_send, metadata=send_metadata),
                        loop,
                    )
-                    try:
-                        send_result = future.result(timeout=60)
-                    except TimeoutError:
-                        future.cancel()
-                        raise
+                    send_result = future.result(timeout=60)
                    if send_result and not getattr(send_result, "success", True):
                        err = getattr(send_result, "error", "unknown")
                        logger.warning(
@@ -430,6 +422,7 @@ def _deliver_result(job: dict, content: str, adapters=None, loop=None) -> Option
                # prevent "coroutine was never awaited" RuntimeWarning, then retry in a
                # fresh thread that has no running loop.
                coro.close()
+                import concurrent.futures
                with concurrent.futures.ThreadPoolExecutor(max_workers=1) as pool:
                    future = pool.submit(asyncio.run, _send_to_platform(platform, pconfig, chat_id, cleaned_delivery_content, thread_id=thread_id, media_files=media_files))
                    result = future.result(timeout=30)
@@ -439,9 +432,8 @@ def _deliver_result(job: dict, content: str, adapters=None, loop=None) -> Option
                delivery_errors.append(msg)
                continue

-            error = result.get("error") if result else None
-            if error:
-                msg = f"delivery error: {error}"
+            if result and result.get("error"):
+                msg = f"delivery error: {result['error']}"
                logger.error("Job '%s': %s", job["id"], msg)
                delivery_errors.append(msg)
                continue
@@ -755,17 +747,14 @@ def run_job(job: dict) -> tuple[bool, str, str, Optional[str]]:
    # scheduler process — every job this process runs is a cron job.
    os.environ["HERMES_CRON_SESSION"] = "1"

-    # Use ContextVars for per-job session/delivery state so parallel jobs
-    # don't clobber each other's targets (os.environ is process-global).
-    from gateway.session_context import set_session_vars, clear_session_vars, _VAR_MAP
-
-    _ctx_tokens = set_session_vars(
-        platform=origin["platform"] if origin else "",
-        chat_id=str(origin["chat_id"]) if origin else "",
-        chat_name=origin.get("chat_name", "") if origin else "",
-    )
-
    try:
+        # Inject origin context so the agent's send_message tool knows the chat.
+        # Must be INSIDE the try block so the finally cleanup always runs.
+        if origin:
+            os.environ["HERMES_SESSION_PLATFORM"] = origin["platform"]
+            os.environ["HERMES_SESSION_CHAT_ID"] = str(origin["chat_id"])
+            if origin.get("chat_name"):
+                os.environ["HERMES_SESSION_CHAT_NAME"] = origin["chat_name"]
        # Re-read .env and config.yaml fresh every run so provider/key
        # changes take effect without a gateway restart.
        from dotenv import load_dotenv
@@ -776,10 +765,10 @@ def run_job(job: dict) -> tuple[bool, str, str, Optional[str]]:

        delivery_target = _resolve_delivery_target(job)
        if delivery_target:
-            _VAR_MAP["HERMES_CRON_AUTO_DELIVER_PLATFORM"].set(delivery_target["platform"])
-            _VAR_MAP["HERMES_CRON_AUTO_DELIVER_CHAT_ID"].set(str(delivery_target["chat_id"]))
+            os.environ["HERMES_CRON_AUTO_DELIVER_PLATFORM"] = delivery_target["platform"]
+            os.environ["HERMES_CRON_AUTO_DELIVER_CHAT_ID"] = str(delivery_target["chat_id"])
            if delivery_target.get("thread_id") is not None:
-                _VAR_MAP["HERMES_CRON_AUTO_DELIVER_THREAD_ID"].set(str(delivery_target["thread_id"]))
+                os.environ["HERMES_CRON_AUTO_DELIVER_THREAD_ID"] = str(delivery_target["thread_id"])

        model = job.get("model") or os.getenv("HERMES_MODEL") or ""

@@ -818,13 +807,14 @@ def run_job(job: dict) -> tuple[bool, str, str, Optional[str]]:
        prefill_messages = None
        prefill_file = os.getenv("HERMES_PREFILL_MESSAGES_FILE", "") or _cfg.get("prefill_messages_file", "")
        if prefill_file:
+            import json as _json
            pfpath = Path(prefill_file).expanduser()
            if not pfpath.is_absolute():
                pfpath = _hermes_home / pfpath
            if pfpath.exists():
                try:
                    with open(pfpath, "r", encoding="utf-8") as _pf:
-                        prefill_messages = json.load(_pf)
+                        prefill_messages = _json.load(_pf)
                    if not isinstance(prefill_messages, list):
                        prefill_messages = None
                except Exception as e:
@@ -973,12 +963,6 @@ def run_job(job: dict) -> tuple[bool, str, str, Optional[str]]:
                f"— last activity: {_last_desc}"
            )

-        # Guard against non-dict returns from run_conversation under error conditions
-        if not isinstance(result, dict):
-            raise RuntimeError(
-                f"agent.run_conversation returned {type(result).__name__} instead of dict: {result!r}"
-            )
-
        final_response = result.get("final_response", "") or ""
        # Strip leaked placeholder text that upstream may inject on empty completions.
        if final_response.strip() == "(No response generated)":
@@ -1028,8 +1012,16 @@ def run_job(job: dict) -> tuple[bool, str, str, Optional[str]]:
        return False, output, "", error_msg

    finally:
-        # Clean up ContextVar session/delivery state for this job.
-        clear_session_vars(_ctx_tokens)
+        # Clean up injected env vars so they don't leak to other jobs
+        for key in (
+            "HERMES_SESSION_PLATFORM",
+            "HERMES_SESSION_CHAT_ID",
+            "HERMES_SESSION_CHAT_NAME",
+            "HERMES_CRON_AUTO_DELIVER_PLATFORM",
+            "HERMES_CRON_AUTO_DELIVER_CHAT_ID",
+            "HERMES_CRON_AUTO_DELIVER_THREAD_ID",
+        ):
+            os.environ.pop(key, None)
        if _session_db:
            try:
                _session_db.end_session(_cron_session_id, "cron_complete")
@@ -1082,41 +1074,15 @@ def tick(verbose: bool = True, adapters=None, loop=None) -> int:
        if verbose:
            logger.info("%s - %s job(s) due", _hermes_now().strftime('%H:%M:%S'), len(due_jobs))

-        # Advance next_run_at for all recurring jobs FIRST, under the file lock,
-        # before any execution begins.  This preserves at-most-once semantics.
+        executed = 0
        for job in due_jobs:
-            advance_next_run(job["id"])
-
-        # Resolve max parallel workers: env var > config.yaml > unbounded.
-        # Set HERMES_CRON_MAX_PARALLEL=1 to restore old serial behaviour.
-        _max_workers: Optional[int] = None
-        try:
-            _env_par = os.getenv("HERMES_CRON_MAX_PARALLEL", "").strip()
-            if _env_par:
-                _max_workers = int(_env_par) or None
-        except (ValueError, TypeError):
-            logger.warning("Invalid HERMES_CRON_MAX_PARALLEL value; defaulting to unbounded")
-        if _max_workers is None:
            try:
-                _ucfg = load_config() or {}
-                _cfg_par = (
-                    _ucfg.get("cron", {}) if isinstance(_ucfg, dict) else {}
-                ).get("max_parallel_jobs")
-                if _cfg_par is not None:
-                    _max_workers = int(_cfg_par) or None
-            except Exception:
-                pass
+                # For recurring jobs (cron/interval), advance next_run_at to the
+                # next future occurrence BEFORE execution.  This way, if the
+                # process crashes mid-run, the job won't re-fire on restart.
+                # One-shot jobs are left alone so they can retry on restart.
+                advance_next_run(job["id"])

-        if verbose:
-            logger.info(
-                "Running %d job(s) in parallel (max_workers=%s)",
-                len(due_jobs),
-                _max_workers if _max_workers else "unbounded",
-            )
-
-        def _process_job(job: dict) -> bool:
-            """Run one due job end-to-end: execute, save, deliver, mark."""
-            try:
                success, output, final_response, error = run_job(job)

                output_file = save_job_output(job["id"], output)
@@ -1148,23 +1114,13 @@ def tick(verbose: bool = True, adapters=None, loop=None) -> int:
                    error = "Agent completed but produced empty response (model error, timeout, or misconfiguration)"

                mark_job_run(job["id"], success, error, delivery_error=delivery_error)
-                return True
+                executed += 1

            except Exception as e:
                logger.error("Error processing job %s: %s", job['id'], e)
                mark_job_run(job["id"], False, str(e))
-                return False

-        # Run all due jobs concurrently, each in its own ContextVar copy
-        # so session/delivery state stays isolated per-thread.
-        with concurrent.futures.ThreadPoolExecutor(max_workers=_max_workers) as _tick_pool:
-            _futures = []
-            for job in due_jobs:
-                _ctx = contextvars.copy_context()
-                _futures.append(_tick_pool.submit(_ctx.run, _process_job, job))
-            _results = [f.result() for f in _futures]
-
-        return sum(_results)
+        return executed
    finally:
        if fcntl:
            fcntl.flock(lock_fd, fcntl.LOCK_UN)
@@ -29,7 +29,7 @@ echo "📝 Logging to: $LOG_FILE"
 # Point to the example dataset in this directory
 SCRIPT_DIR="$(cd "$(dirname "$0")" && pwd)"

-python scripts/batch_runner.py \
+python batch_runner.py \
  --dataset_file="$SCRIPT_DIR/example_browser_tasks.jsonl" \
  --batch_size=5 \
  --run_name="browser_tasks_example" \
@@ -4,7 +4,7 @@
 # Generates tool-calling trajectories for multi-step web research tasks.
 #
 # Usage:
-#   python scripts/batch_runner.py \
+#   python batch_runner.py \
 #     --config datagen-config-examples/web_research.yaml \
 #     --run_name web_research_v1

@@ -58,13 +58,6 @@ if [ ! -f "$HERMES_HOME/config.yaml" ]; then
    cp "$INSTALL_DIR/cli-config.yaml.example" "$HERMES_HOME/config.yaml"
 fi

-# Ensure the main config file remains accessible to the hermes runtime user
-# even if it was edited on the host after initial ownership setup.
-if [ -f "$HERMES_HOME/config.yaml" ]; then
-    chown hermes:hermes "$HERMES_HOME/config.yaml"
-    chmod 640 "$HERMES_HOME/config.yaml"
-fi
-
 # SOUL.md
 if [ ! -f "$HERMES_HOME/SOUL.md" ]; then
    cp "$INSTALL_DIR/docker/SOUL.md" "$HERMES_HOME/SOUL.md"
@@ -75,19 +68,4 @@ if [ -d "$INSTALL_DIR/skills" ]; then
    python3 "$INSTALL_DIR/tools/skills_sync.py"
 fi

-# Final exec: two supported invocation patterns.
-#
-#   docker run <image>                 -> exec `hermes` with no args (legacy default)
-#   docker run <image> chat -q "..."   -> exec `hermes chat -q "..."` (legacy wrap)
-#   docker run <image> sleep infinity  -> exec `sleep infinity` directly
-#   docker run <image> bash            -> exec `bash` directly
-#
-# If the first positional arg resolves to an executable on PATH, we assume the
-# caller wants to run it directly (needed by the launcher which runs long-lived
-# `sleep infinity` sandbox containers — see tools/environments/docker.py).
-# Otherwise we treat the args as a hermes subcommand and wrap with `hermes`,
-# preserving the documented `docker run <image> <subcommand>` behavior.
-if [ $# -gt 0 ] && command -v "$1" >/dev/null 2>&1; then
-    exec "$@"
-fi
 exec hermes "$@"
@@ -18,10 +18,7 @@ import logging
 import os
 import uuid
 from dataclasses import dataclass, field
-from typing import Any, Dict, List, Optional, Set, TYPE_CHECKING
-
-if TYPE_CHECKING:
-    from tools.budget_config import BudgetConfig
+from typing import Any, Dict, List, Optional, Set

 from model_tools import handle_function_call
 from tools.terminal_tool import get_active_env
@@ -53,6 +53,7 @@ def _run_tool_in_thread(tool_name: str, arguments: Dict[str, Any], task_id: str)
    try:
        loop = asyncio.get_running_loop()
        # We're in an async context -- need to run in thread
+        import concurrent.futures
        with concurrent.futures.ThreadPoolExecutor(max_workers=1) as pool:
            future = pool.submit(
                handle_function_call, tool_name, arguments, task_id
@@ -576,14 +576,6 @@ def load_gateway_config() -> GatewayConfig:
                    bridged["free_response_channels"] = platform_cfg["free_response_channels"]
                if "mention_patterns" in platform_cfg:
                    bridged["mention_patterns"] = platform_cfg["mention_patterns"]
-                if "dm_policy" in platform_cfg:
-                    bridged["dm_policy"] = platform_cfg["dm_policy"]
-                if "allow_from" in platform_cfg:
-                    bridged["allow_from"] = platform_cfg["allow_from"]
-                if "group_policy" in platform_cfg:
-                    bridged["group_policy"] = platform_cfg["group_policy"]
-                if "group_allow_from" in platform_cfg:
-                    bridged["group_allow_from"] = platform_cfg["group_allow_from"]
                if plat == Platform.DISCORD and "channel_skill_bindings" in platform_cfg:
                    bridged["channel_skill_bindings"] = platform_cfg["channel_skill_bindings"]
                if "channel_prompts" in platform_cfg:
@@ -616,8 +608,6 @@ def load_gateway_config() -> GatewayConfig:
                    if isinstance(frc, list):
                        frc = ",".join(str(v) for v in frc)
                    os.environ["SLACK_FREE_RESPONSE_CHANNELS"] = str(frc)
-                if "reactions" in slack_cfg and not os.getenv("SLACK_REACTIONS"):
-                    os.environ["SLACK_REACTIONS"] = str(slack_cfg["reactions"]).lower()

            # Discord settings → env vars (env vars take precedence)
            discord_cfg = yaml_cfg.get("discord", {})
@@ -672,7 +662,8 @@ def load_gateway_config() -> GatewayConfig:
                if "require_mention" in telegram_cfg and not os.getenv("TELEGRAM_REQUIRE_MENTION"):
                    os.environ["TELEGRAM_REQUIRE_MENTION"] = str(telegram_cfg["require_mention"]).lower()
                if "mention_patterns" in telegram_cfg and not os.getenv("TELEGRAM_MENTION_PATTERNS"):
-                    os.environ["TELEGRAM_MENTION_PATTERNS"] = json.dumps(telegram_cfg["mention_patterns"])
+                    import json as _json
+                    os.environ["TELEGRAM_MENTION_PATTERNS"] = _json.dumps(telegram_cfg["mention_patterns"])
                frc = telegram_cfg.get("free_response_chats")
                if frc is not None and not os.getenv("TELEGRAM_FREE_RESPONSE_CHATS"):
                    if isinstance(frc, list):
@@ -709,20 +700,6 @@ def load_gateway_config() -> GatewayConfig:
                    if isinstance(frc, list):
                        frc = ",".join(str(v) for v in frc)
                    os.environ["WHATSAPP_FREE_RESPONSE_CHATS"] = str(frc)
-                if "dm_policy" in whatsapp_cfg and not os.getenv("WHATSAPP_DM_POLICY"):
-                    os.environ["WHATSAPP_DM_POLICY"] = str(whatsapp_cfg["dm_policy"]).lower()
-                af = whatsapp_cfg.get("allow_from")
-                if af is not None and not os.getenv("WHATSAPP_ALLOWED_USERS"):
-                    if isinstance(af, list):
-                        af = ",".join(str(v) for v in af)
-                    os.environ["WHATSAPP_ALLOWED_USERS"] = str(af)
-                if "group_policy" in whatsapp_cfg and not os.getenv("WHATSAPP_GROUP_POLICY"):
-                    os.environ["WHATSAPP_GROUP_POLICY"] = str(whatsapp_cfg["group_policy"]).lower()
-                gaf = whatsapp_cfg.get("group_allow_from")
-                if gaf is not None and not os.getenv("WHATSAPP_GROUP_ALLOWED_USERS"):
-                    if isinstance(gaf, list):
-                        gaf = ",".join(str(v) for v in gaf)
-                    os.environ["WHATSAPP_GROUP_ALLOWED_USERS"] = str(gaf)

            # DingTalk settings → env vars (env vars take precedence)
            dingtalk_cfg = yaml_cfg.get("dingtalk", {})
@@ -1260,6 +1237,7 @@ def _apply_env_overrides(config: GatewayConfig) -> None:
            if legacy_home:
                qq_home = legacy_home
                qq_home_name_env = "QQ_HOME_CHANNEL_NAME"
+                import logging
                logging.getLogger(__name__).warning(
                    "QQ_HOME_CHANNEL is deprecated; rename to QQBOT_HOME_CHANNEL "
                    "in your .env for consistency with the platform key."
@@ -135,22 +135,9 @@ class HookRegistry:
            except Exception as e:
                print(f"[hooks] Error loading hook {hook_dir.name}: {e}", flush=True)

-    def _resolve_handlers(self, event_type: str) -> List[Callable]:
-        """Return all handlers that should fire for ``event_type``.
-
-        Exact matches fire first, followed by wildcard matches (e.g.
-        ``command:*`` matches ``command:reset``).
-        """
-        handlers = list(self._handlers.get(event_type, []))
-        if ":" in event_type:
-            base = event_type.split(":")[0]
-            wildcard_key = f"{base}:*"
-            handlers.extend(self._handlers.get(wildcard_key, []))
-        return handlers
-
    async def emit(self, event_type: str, context: Optional[Dict[str, Any]] = None) -> None:
        """
-        Fire all handlers registered for an event, discarding return values.
+        Fire all handlers registered for an event.

        Supports wildcard matching: handlers registered for "command:*" will
        fire for any "command:..." event. Handlers registered for a base type
@@ -164,7 +151,16 @@ class HookRegistry:
        if context is None:
            context = {}

-        for fn in self._resolve_handlers(event_type):
+        # Collect handlers: exact match + wildcard match
+        handlers = list(self._handlers.get(event_type, []))
+
+        # Check for wildcard patterns (e.g., "command:*" matches "command:reset")
+        if ":" in event_type:
+            base = event_type.split(":")[0]
+            wildcard_key = f"{base}:*"
+            handlers.extend(self._handlers.get(wildcard_key, []))
+
+        for fn in handlers:
            try:
                result = fn(event_type, context)
                # Support both sync and async handlers
@@ -172,32 +168,3 @@ class HookRegistry:
                    await result
            except Exception as e:
                print(f"[hooks] Error in handler for '{event_type}': {e}", flush=True)
-
-    async def emit_collect(
-        self,
-        event_type: str,
-        context: Optional[Dict[str, Any]] = None,
-    ) -> List[Any]:
-        """Fire handlers and return their non-None return values in order.
-
-        Like :meth:`emit` but captures each handler's return value. Used for
-        decision-style hooks (e.g. ``command:<name>`` policies that want to
-        allow/deny/rewrite the command before normal dispatch).
-
-        Exceptions from individual handlers are logged but do not abort the
-        remaining handlers.
-        """
-        if context is None:
-            context = {}
-
-        results: List[Any] = []
-        for fn in self._resolve_handlers(event_type):
-            try:
-                result = fn(event_type, context)
-                if asyncio.iscoroutine(result):
-                    result = await result
-                if result is not None:
-                    results.append(result)
-            except Exception as e:
-                print(f"[hooks] Error in handler for '{event_type}': {e}", flush=True)
-        return results
@@ -32,7 +32,14 @@ import sqlite3
 import time
 import uuid
 from typing import Any, Dict, List, Optional
-from aiohttp import web
+
+try:
+    from aiohttp import web
+    AIOHTTP_AVAILABLE = True
+except ImportError:
+    AIOHTTP_AVAILABLE = False
+    web = None  # type: ignore[assignment]
+
 from gateway.config import Platform, PlatformConfig
 from gateway.platforms.base import (
    BasePlatformAdapter,
@@ -110,159 +117,11 @@ def _normalize_chat_content(
        return ""


-# Content part type aliases used by the OpenAI Chat Completions and Responses
-# APIs.  We accept both spellings on input and emit a single canonical internal
-# shape (``{"type": "text", ...}`` / ``{"type": "image_url", ...}``) that the
-# rest of the agent pipeline already understands.
-_TEXT_PART_TYPES = frozenset({"text", "input_text", "output_text"})
-_IMAGE_PART_TYPES = frozenset({"image_url", "input_image"})
-_FILE_PART_TYPES = frozenset({"file", "input_file"})
+def check_api_server_requirements() -> bool:
+    """Check if API server dependencies are available."""
+    return AIOHTTP_AVAILABLE


-def _normalize_multimodal_content(content: Any) -> Any:
-    """Validate and normalize multimodal content for the API server.
-
-    Returns a plain string when the content is text-only, or a list of
-    ``{"type": "text"|"image_url", ...}`` parts when images are present.
-    The output shape is the native OpenAI Chat Completions vision format,
-    which the agent pipeline accepts verbatim (OpenAI-wire providers) or
-    converts (``_preprocess_anthropic_content`` for Anthropic).
-
-    Raises ``ValueError`` with an OpenAI-style code on invalid input:
-      * ``unsupported_content_type`` — file/input_file/file_id parts, or
-        non-image ``data:`` URLs.
-      * ``invalid_image_url`` — missing URL or unsupported scheme.
-      * ``invalid_content_part`` — malformed text/image objects.
-
-    Callers translate the ValueError into a 400 response.
-    """
-    # Scalar passthrough mirrors ``_normalize_chat_content``.
-    if content is None:
-        return ""
-    if isinstance(content, str):
-        return content[:MAX_NORMALIZED_TEXT_LENGTH] if len(content) > MAX_NORMALIZED_TEXT_LENGTH else content
-    if not isinstance(content, list):
-        # Mirror the legacy text-normalizer's fallback so callers that
-        # pre-existed image support still get a string back.
-        return _normalize_chat_content(content)
-
-    items = content[:MAX_CONTENT_LIST_SIZE] if len(content) > MAX_CONTENT_LIST_SIZE else content
-    normalized_parts: List[Dict[str, Any]] = []
-    text_accum_len = 0
-
-    for part in items:
-        if isinstance(part, str):
-            if part:
-                trimmed = part[:MAX_NORMALIZED_TEXT_LENGTH]
-                normalized_parts.append({"type": "text", "text": trimmed})
-                text_accum_len += len(trimmed)
-            continue
-
-        if not isinstance(part, dict):
-            # Ignore unknown scalars for forward compatibility with future
-            # Responses API additions (e.g. ``refusal``).  The same policy
-            # the text normalizer applies.
-            continue
-
-        raw_type = part.get("type")
-        part_type = str(raw_type or "").strip().lower()
-
-        if part_type in _TEXT_PART_TYPES:
-            text = part.get("text")
-            if text is None:
-                continue
-            if not isinstance(text, str):
-                text = str(text)
-            if text:
-                trimmed = text[:MAX_NORMALIZED_TEXT_LENGTH]
-                normalized_parts.append({"type": "text", "text": trimmed})
-                text_accum_len += len(trimmed)
-            continue
-
-        if part_type in _IMAGE_PART_TYPES:
-            detail = part.get("detail")
-            image_ref = part.get("image_url")
-            # OpenAI Responses sends ``input_image`` with a top-level
-            # ``image_url`` string; Chat Completions sends ``image_url`` as
-            # ``{"url": "...", "detail": "..."}``.  Support both.
-            if isinstance(image_ref, dict):
-                url_value = image_ref.get("url")
-                detail = image_ref.get("detail", detail)
-            else:
-                url_value = image_ref
-            if not isinstance(url_value, str) or not url_value.strip():
-                raise ValueError("invalid_image_url:Image parts must include a non-empty image URL.")
-            url_value = url_value.strip()
-            lowered = url_value.lower()
-            if lowered.startswith("data:"):
-                if not lowered.startswith("data:image/") or "," not in url_value:
-                    raise ValueError(
-                        "unsupported_content_type:Only image data URLs are supported. "
-                        "Non-image data payloads are not supported."
-                    )
-            elif not (lowered.startswith("http://") or lowered.startswith("https://")):
-                raise ValueError(
-                    "invalid_image_url:Image inputs must use http(s) URLs or data:image/... URLs."
-                )
-            image_part: Dict[str, Any] = {"type": "image_url", "image_url": {"url": url_value}}
-            if detail is not None:
-                if not isinstance(detail, str) or not detail.strip():
-                    raise ValueError("invalid_content_part:Image detail must be a non-empty string when provided.")
-                image_part["image_url"]["detail"] = detail.strip()
-            normalized_parts.append(image_part)
-            continue
-
-        if part_type in _FILE_PART_TYPES:
-            raise ValueError(
-                "unsupported_content_type:Inline image inputs are supported, "
-                "but uploaded files and document inputs are not supported on this endpoint."
-            )
-
-        # Unknown part type — reject explicitly so clients get a clear error
-        # instead of a silently dropped turn.
-        raise ValueError(
-            f"unsupported_content_type:Unsupported content part type {raw_type!r}. "
-            "Only text and image_url/input_image parts are supported."
-        )
-
-    if not normalized_parts:
-        return ""
-
-    # Text-only: collapse to a plain string so downstream logging/trajectory
-    # code sees the native shape and prompt caching on text-only turns is
-    # unaffected.
-    if all(p.get("type") == "text" for p in normalized_parts):
-        return "\n".join(p["text"] for p in normalized_parts if p.get("text"))
-
-    return normalized_parts
-
-
-def _content_has_visible_payload(content: Any) -> bool:
-    """True when content has any text or image attachment.  Used to reject empty turns."""
-    if isinstance(content, str):
-        return bool(content.strip())
-    if isinstance(content, list):
-        for part in content:
-            if isinstance(part, dict):
-                ptype = str(part.get("type") or "").strip().lower()
-                if ptype in _TEXT_PART_TYPES and str(part.get("text") or "").strip():
-                    return True
-                if ptype in _IMAGE_PART_TYPES:
-                    return True
-    return False
-
-
-def _multimodal_validation_error(exc: ValueError, *, param: str) -> "web.Response":
-    """Translate a ``_normalize_multimodal_content`` ValueError into a 400 response."""
-    raw = str(exc)
-    code, _, message = raw.partition(":")
-    if not message:
-        code, message = "invalid_content_part", raw
-    return web.json_response(
-        _openai_error(message, code=code, param=param),
-        status=400,
-    )
-
 class ResponseStore:
    """
    SQLite-backed LRU store for Responses API state.
@@ -310,6 +169,7 @@ class ResponseStore:
        ).fetchone()
        if row is None:
            return None
+        import time
        self._conn.execute(
            "UPDATE responses SET accessed_at = ? WHERE response_id = ?",
            (time.time(), response_id),
@@ -319,6 +179,7 @@ class ResponseStore:

    def put(self, response_id: str, data: Dict[str, Any]) -> None:
        """Store a response, evicting the oldest if at capacity."""
+        import time
        self._conn.execute(
            "INSERT OR REPLACE INTO responses (response_id, data, accessed_at) VALUES (?, ?, ?)",
            (response_id, json.dumps(data, default=str), time.time()),
@@ -378,26 +239,30 @@ _CORS_HEADERS = {
 }


-@web.middleware
-async def cors_middleware(request, handler):
-    """Add CORS headers for explicitly allowed origins; handle OPTIONS preflight."""
-    adapter = request.app.get("api_server_adapter")
-    origin = request.headers.get("Origin", "")
-    cors_headers = None
-    if adapter is not None:
-        if not adapter._origin_allowed(origin):
-            return web.Response(status=403)
-        cors_headers = adapter._cors_headers_for_origin(origin)
+if AIOHTTP_AVAILABLE:
+    @web.middleware
+    async def cors_middleware(request, handler):
+        """Add CORS headers for explicitly allowed origins; handle OPTIONS preflight."""
+        adapter = request.app.get("api_server_adapter")
+        origin = request.headers.get("Origin", "")
+        cors_headers = None
+        if adapter is not None:
+            if not adapter._origin_allowed(origin):
+                return web.Response(status=403)
+            cors_headers = adapter._cors_headers_for_origin(origin)

-    if request.method == "OPTIONS":
-        if cors_headers is None:
-            return web.Response(status=403)
-        return web.Response(status=200, headers=cors_headers)
+        if request.method == "OPTIONS":
+            if cors_headers is None:
+                return web.Response(status=403)
+            return web.Response(status=200, headers=cors_headers)
+
+        response = await handler(request)
+        if cors_headers is not None:
+            response.headers.update(cors_headers)
+        return response
+else:
+    cors_middleware = None  # type: ignore[assignment]

-    response = await handler(request)
-    if cors_headers is not None:
-        response.headers.update(cors_headers)
-    return response

 def _openai_error(message: str, err_type: str = "invalid_request_error", param: str = None, code: str = None) -> Dict[str, Any]:
    """OpenAI-style error envelope."""
@@ -411,18 +276,21 @@ def _openai_error(message: str, err_type: str = "invalid_request_error", param:
    }


-@web.middleware
-async def body_limit_middleware(request, handler):
-    """Reject overly large request bodies early based on Content-Length."""
-    if request.method in ("POST", "PUT", "PATCH"):
-        cl = request.headers.get("Content-Length")
-        if cl is not None:
-            try:
-                if int(cl) > MAX_REQUEST_BYTES:
-                    return web.json_response(_openai_error("Request body too large.", code="body_too_large"), status=413)
-            except ValueError:
-                return web.json_response(_openai_error("Invalid Content-Length header.", code="invalid_content_length"), status=400)
-    return await handler(request)
+if AIOHTTP_AVAILABLE:
+    @web.middleware
+    async def body_limit_middleware(request, handler):
+        """Reject overly large request bodies early based on Content-Length."""
+        if request.method in ("POST", "PUT", "PATCH"):
+            cl = request.headers.get("Content-Length")
+            if cl is not None:
+                try:
+                    if int(cl) > MAX_REQUEST_BYTES:
+                        return web.json_response(_openai_error("Request body too large.", code="body_too_large"), status=413)
+                except ValueError:
+                    return web.json_response(_openai_error("Invalid Content-Length header.", code="invalid_content_length"), status=400)
+        return await handler(request)
+else:
+    body_limit_middleware = None  # type: ignore[assignment]

 _SECURITY_HEADERS = {
    "X-Content-Type-Options": "nosniff",
@@ -430,13 +298,16 @@ _SECURITY_HEADERS = {
 }


-@web.middleware
-async def security_headers_middleware(request, handler):
-    """Add security headers to all responses (including errors)."""
-    response = await handler(request)
-    for k, v in _SECURITY_HEADERS.items():
-        response.headers.setdefault(k, v)
-    return response
+if AIOHTTP_AVAILABLE:
+    @web.middleware
+    async def security_headers_middleware(request, handler):
+        """Add security headers to all responses (including errors)."""
+        response = await handler(request)
+        for k, v in _SECURITY_HEADERS.items():
+            response.headers.setdefault(k, v)
+        return response
+else:
+    security_headers_middleware = None  # type: ignore[assignment]


 class _IdempotencyCache:
@@ -444,12 +315,12 @@ class _IdempotencyCache:
    def __init__(self, max_items: int = 1000, ttl_seconds: int = 300):
        from collections import OrderedDict
        self._store = OrderedDict()
-        self._inflight: Dict[tuple[str, str], "asyncio.Task[Any]"] = {}
        self._ttl = ttl_seconds
        self._max = max_items

    def _purge(self):
-        now = time.time()
+        import time as _t
+        now = _t.time()
        expired = [k for k, v in self._store.items() if now - v["ts"] > self._ttl]
        for k in expired:
            self._store.pop(k, None)
@@ -461,27 +332,11 @@ class _IdempotencyCache:
        item = self._store.get(key)
        if item and item["fp"] == fingerprint:
            return item["resp"]
-
-        inflight_key = (key, fingerprint)
-        task = self._inflight.get(inflight_key)
-        if task is None:
-            async def _compute_and_store():
-                resp = await compute_coro()
-                import time as _t
-                self._store[key] = {"resp": resp, "fp": fingerprint, "ts": _t.time()}
-                self._purge()
-                return resp
-
-            task = asyncio.create_task(_compute_and_store())
-            self._inflight[inflight_key] = task
-
-            def _clear_inflight(done_task: "asyncio.Task[Any]") -> None:
-                if self._inflight.get(inflight_key) is done_task:
-                    self._inflight.pop(inflight_key, None)
-
-            task.add_done_callback(_clear_inflight)
-
-        return await asyncio.shield(task)
+        resp = await compute_coro()
+        import time as _t
+        self._store[key] = {"resp": resp, "fp": fingerprint, "ts": _t.time()}
+        self._purge()
+        return resp


 _idem_cache = _IdempotencyCache()
@@ -511,30 +366,6 @@ def _derive_chat_session_id(
    return f"api-{digest}"


-_CRON_AVAILABLE = False
-try:
-    from cron.jobs import (
-        list_jobs as _cron_list,
-        get_job as _cron_get,
-        create_job as _cron_create,
-        update_job as _cron_update,
-        remove_job as _cron_remove,
-        pause_job as _cron_pause,
-        resume_job as _cron_resume,
-        trigger_job as _cron_trigger,
-    )
-    _CRON_AVAILABLE = True
-except ImportError:
-    _cron_list = None
-    _cron_get = None
-    _cron_create = None
-    _cron_update = None
-    _cron_remove = None
-    _cron_pause = None
-    _cron_resume = None
-    _cron_trigger = None
-
-
 class APIServerAdapter(BasePlatformAdapter):
    """
    OpenAI-compatible HTTP API server adapter.
@@ -781,7 +612,7 @@ class APIServerAdapter(BasePlatformAdapter):
            ],
        })

-    async def _handle_chat_completions(self, request: "web.Request") -> "web.StreamResponse":
+    async def _handle_chat_completions(self, request: "web.Request") -> "web.Response":
        """POST /v1/chat/completions — OpenAI Chat Completions format."""
        auth_err = self._check_auth(request)
        if auth_err:
@@ -806,32 +637,26 @@ class APIServerAdapter(BasePlatformAdapter):
        system_prompt = None
        conversation_messages: List[Dict[str, str]] = []

-        for idx, msg in enumerate(messages):
+        for msg in messages:
            role = msg.get("role", "")
-            raw_content = msg.get("content", "")
+            content = _normalize_chat_content(msg.get("content", ""))
            if role == "system":
-                # System messages don't support images (Anthropic rejects, OpenAI
-                # text-model systems don't render them).  Flatten to text.
-                content = _normalize_chat_content(raw_content)
+                # Accumulate system messages
                if system_prompt is None:
                    system_prompt = content
                else:
                    system_prompt = system_prompt + "\n" + content
            elif role in ("user", "assistant"):
-                try:
-                    content = _normalize_multimodal_content(raw_content)
-                except ValueError as exc:
-                    return _multimodal_validation_error(exc, param=f"messages[{idx}].content")
                conversation_messages.append({"role": role, "content": content})

        # Extract the last user message as the primary input
-        user_message: Any = ""
+        user_message = ""
        history = []
        if conversation_messages:
            user_message = conversation_messages[-1].get("content", "")
            history = conversation_messages[:-1]

-        if not _content_has_visible_payload(user_message):
+        if not user_message:
            return web.json_response(
                {"error": {"message": "No user message found in messages", "type": "invalid_request_error"}},
                status=400,
@@ -1565,7 +1390,7 @@ class APIServerAdapter(BasePlatformAdapter):

        return response

-    async def _handle_responses(self, request: "web.Request") -> "web.StreamResponse":
+    async def _handle_responses(self, request: "web.Request") -> "web.Response":
        """POST /v1/responses — OpenAI Responses API format."""
        auth_err = self._check_auth(request)
        if auth_err:
@@ -1599,19 +1424,16 @@ class APIServerAdapter(BasePlatformAdapter):
            # No error if conversation doesn't exist yet — it's a new conversation

        # Normalize input to message list
-        input_messages: List[Dict[str, Any]] = []
+        input_messages: List[Dict[str, str]] = []
        if isinstance(raw_input, str):
            input_messages = [{"role": "user", "content": raw_input}]
        elif isinstance(raw_input, list):
-            for idx, item in enumerate(raw_input):
+            for item in raw_input:
                if isinstance(item, str):
                    input_messages.append({"role": "user", "content": item})
                elif isinstance(item, dict):
                    role = item.get("role", "user")
-                    try:
-                        content = _normalize_multimodal_content(item.get("content", ""))
-                    except ValueError as exc:
-                        return _multimodal_validation_error(exc, param=f"input[{idx}].content")
+                    content = _normalize_chat_content(item.get("content", ""))
                    input_messages.append({"role": role, "content": content})
        else:
            return web.json_response(_openai_error("'input' must be a string or array"), status=400)
@@ -1620,7 +1442,7 @@ class APIServerAdapter(BasePlatformAdapter):
        # This lets stateless clients supply their own history instead of
        # relying on server-side response chaining via previous_response_id.
        # Precedence: explicit conversation_history > previous_response_id.
-        conversation_history: List[Dict[str, Any]] = []
+        conversation_history: List[Dict[str, str]] = []
        raw_history = body.get("conversation_history")
        if raw_history:
            if not isinstance(raw_history, list):
@@ -1634,11 +1456,7 @@ class APIServerAdapter(BasePlatformAdapter):
                        _openai_error(f"conversation_history[{i}] must have 'role' and 'content' fields"),
                        status=400,
                    )
-                try:
-                    entry_content = _normalize_multimodal_content(entry["content"])
-                except ValueError as exc:
-                    return _multimodal_validation_error(exc, param=f"conversation_history[{i}].content")
-                conversation_history.append({"role": str(entry["role"]), "content": entry_content})
+                conversation_history.append({"role": str(entry["role"]), "content": str(entry["content"])})
            if previous_response_id:
                logger.debug("Both conversation_history and previous_response_id provided; using conversation_history")

@@ -1658,8 +1476,8 @@ class APIServerAdapter(BasePlatformAdapter):
            conversation_history.append(msg)

        # Last input message is the user_message
-        user_message: Any = input_messages[-1].get("content", "") if input_messages else ""
-        if not _content_has_visible_payload(user_message):
+        user_message = input_messages[-1].get("content", "") if input_messages else ""
+        if not user_message:
            return web.json_response(_openai_error("No user message found in input"), status=400)

        # Truncation support
@@ -1864,16 +1682,44 @@ class APIServerAdapter(BasePlatformAdapter):
    # Cron jobs API
    # ------------------------------------------------------------------

+    # Check cron module availability once (not per-request)
+    _CRON_AVAILABLE = False
+    try:
+        from cron.jobs import (
+            list_jobs as _cron_list,
+            get_job as _cron_get,
+            create_job as _cron_create,
+            update_job as _cron_update,
+            remove_job as _cron_remove,
+            pause_job as _cron_pause,
+            resume_job as _cron_resume,
+            trigger_job as _cron_trigger,
+        )
+        # Wrap as staticmethod to prevent descriptor binding — these are plain
+        # module functions, not instance methods.  Without this, self._cron_*()
+        # injects ``self`` as the first positional argument and every call
+        # raises TypeError.
+        _cron_list = staticmethod(_cron_list)
+        _cron_get = staticmethod(_cron_get)
+        _cron_create = staticmethod(_cron_create)
+        _cron_update = staticmethod(_cron_update)
+        _cron_remove = staticmethod(_cron_remove)
+        _cron_pause = staticmethod(_cron_pause)
+        _cron_resume = staticmethod(_cron_resume)
+        _cron_trigger = staticmethod(_cron_trigger)
+        _CRON_AVAILABLE = True
+    except ImportError:
+        pass
+
    _JOB_ID_RE = __import__("re").compile(r"[a-f0-9]{12}")
    # Allowed fields for update — prevents clients injecting arbitrary keys
    _UPDATE_ALLOWED_FIELDS = {"name", "schedule", "prompt", "deliver", "skills", "skill", "repeat", "enabled"}
    _MAX_NAME_LENGTH = 200
    _MAX_PROMPT_LENGTH = 5000

-    @staticmethod
-    def _check_jobs_available() -> Optional["web.Response"]:
+    def _check_jobs_available(self) -> Optional["web.Response"]:
        """Return error response if cron module isn't available."""
-        if not _CRON_AVAILABLE:
+        if not self._CRON_AVAILABLE:
            return web.json_response(
                {"error": "Cron module not available"}, status=501,
            )
@@ -1898,7 +1744,7 @@ class APIServerAdapter(BasePlatformAdapter):
            return cron_err
        try:
            include_disabled = request.query.get("include_disabled", "").lower() in ("true", "1")
-            jobs = _cron_list(include_disabled=include_disabled)
+            jobs = self._cron_list(include_disabled=include_disabled)
            return web.json_response({"jobs": jobs})
        except Exception as e:
            return web.json_response({"error": str(e)}, status=500)
@@ -1946,7 +1792,7 @@ class APIServerAdapter(BasePlatformAdapter):
            if repeat is not None:
                kwargs["repeat"] = repeat

-            job = _cron_create(**kwargs)
+            job = self._cron_create(**kwargs)
            return web.json_response({"job": job})
        except Exception as e:
            return web.json_response({"error": str(e)}, status=500)
@@ -1963,7 +1809,7 @@ class APIServerAdapter(BasePlatformAdapter):
        if id_err:
            return id_err
        try:
-            job = _cron_get(job_id)
+            job = self._cron_get(job_id)
            if not job:
                return web.json_response({"error": "Job not found"}, status=404)
            return web.json_response({"job": job})
@@ -1996,7 +1842,7 @@ class APIServerAdapter(BasePlatformAdapter):
                return web.json_response(
                    {"error": f"Prompt must be ≤ {self._MAX_PROMPT_LENGTH} characters"}, status=400,
                )
-            job = _cron_update(job_id, sanitized)
+            job = self._cron_update(job_id, sanitized)
            if not job:
                return web.json_response({"error": "Job not found"}, status=404)
            return web.json_response({"job": job})
@@ -2015,7 +1861,7 @@ class APIServerAdapter(BasePlatformAdapter):
        if id_err:
            return id_err
        try:
-            success = _cron_remove(job_id)
+            success = self._cron_remove(job_id)
            if not success:
                return web.json_response({"error": "Job not found"}, status=404)
            return web.json_response({"ok": True})
@@ -2034,7 +1880,7 @@ class APIServerAdapter(BasePlatformAdapter):
        if id_err:
            return id_err
        try:
-            job = _cron_pause(job_id)
+            job = self._cron_pause(job_id)
            if not job:
                return web.json_response({"error": "Job not found"}, status=404)
            return web.json_response({"job": job})
@@ -2053,7 +1899,7 @@ class APIServerAdapter(BasePlatformAdapter):
        if id_err:
            return id_err
        try:
-            job = _cron_resume(job_id)
+            job = self._cron_resume(job_id)
            if not job:
                return web.json_response({"error": "Job not found"}, status=404)
            return web.json_response({"job": job})
@@ -2072,7 +1918,7 @@ class APIServerAdapter(BasePlatformAdapter):
        if id_err:
            return id_err
        try:
-            job = _cron_trigger(job_id)
+            job = self._cron_trigger(job_id)
            if not job:
                return web.json_response({"error": "Job not found"}, status=404)
            return web.json_response({"job": job})
@@ -2459,6 +2305,10 @@ class APIServerAdapter(BasePlatformAdapter):

    async def connect(self) -> bool:
        """Start the aiohttp web server."""
+        if not AIOHTTP_AVAILABLE:
+            logger.warning("[%s] aiohttp not installed", self.name)
+            return False
+
        try:
            mws = [mw for mw in (cors_middleware, body_limit_middleware, security_headers_middleware) if mw is not None]
            self._app = web.Application(middlewares=mws)
@@ -19,8 +19,6 @@ import uuid
 from abc import ABC, abstractmethod
 from urllib.parse import urlsplit

-from utils import normalize_proxy_url
-
 logger = logging.getLogger(__name__)


@@ -161,13 +159,13 @@ def resolve_proxy_url(platform_env_var: str | None = None) -> str | None:
    if platform_env_var:
        value = (os.environ.get(platform_env_var) or "").strip()
        if value:
-            return normalize_proxy_url(value)
+            return value
    for key in ("HTTPS_PROXY", "HTTP_PROXY", "ALL_PROXY",
                "https_proxy", "http_proxy", "all_proxy"):
        value = (os.environ.get(key) or "").strip()
        if value:
-            return normalize_proxy_url(value)
-    return normalize_proxy_url(_detect_macos_system_proxy())
+            return value
+    return _detect_macos_system_proxy()


 def proxy_kwargs_for_bot(proxy_url: str | None) -> dict:
@@ -187,14 +185,16 @@ def proxy_kwargs_for_bot(proxy_url: str | None) -> dict:
    if proxy_url.lower().startswith("socks"):
        try:
            from aiohttp_socks import ProxyConnector
-        except ImportError:
-            raise ImportError(
-                "aiohttp-socks is required for SOCKS proxy support. "
-                "Install with: pip install hermes-agent[messaging]"
-            ) from None

-        connector = ProxyConnector.from_url(proxy_url, rdns=True)
-        return {"connector": connector}
+            connector = ProxyConnector.from_url(proxy_url, rdns=True)
+            return {"connector": connector}
+        except ImportError:
+            logger.warning(
+                "aiohttp_socks not installed — SOCKS proxy %s ignored. "
+                "Run: pip install aiohttp-socks",
+                proxy_url,
+            )
+            return {}
    return {"proxy": proxy_url}


@@ -218,14 +218,16 @@ def proxy_kwargs_for_aiohttp(proxy_url: str | None) -> tuple[dict, dict]:
    if proxy_url.lower().startswith("socks"):
        try:
            from aiohttp_socks import ProxyConnector
-        except ImportError:
-            raise ImportError(
-                "aiohttp-socks is required for SOCKS proxy support. "
-                "Install with: pip install hermes-agent[messaging]"
-            ) from None

-        connector = ProxyConnector.from_url(proxy_url, rdns=True)
-        return {"connector": connector}, {}
+            connector = ProxyConnector.from_url(proxy_url, rdns=True)
+            return {"connector": connector}, {}
+        except ImportError:
+            logger.warning(
+                "aiohttp_socks not installed — SOCKS proxy %s ignored. "
+                "Run: pip install aiohttp-socks",
+                proxy_url,
+            )
+            return {}, {}
    return {}, {"proxy": proxy_url}


@@ -389,9 +391,12 @@ async def cache_image_from_url(url: str, ext: str = ".jpg", retries: int = 2) ->
    if not is_safe_url(url):
        raise ValueError(f"Blocked unsafe URL (SSRF protection): {safe_url_for_log(url)}")

+    import asyncio
    import httpx
-    _log = logging.getLogger(__name__)
+    import logging as _logging
+    _log = _logging.getLogger(__name__)

+    last_exc = None
    async with httpx.AsyncClient(
        timeout=30.0,
        follow_redirects=True,
@@ -409,6 +414,7 @@ async def cache_image_from_url(url: str, ext: str = ".jpg", retries: int = 2) ->
                response.raise_for_status()
                return cache_image_from_bytes(response.content, ext)
            except (httpx.TimeoutException, httpx.HTTPStatusError) as exc:
+                last_exc = exc
                if isinstance(exc, httpx.HTTPStatusError) and exc.response.status_code < 429:
                    raise
                if attempt < retries:
@@ -424,7 +430,7 @@ async def cache_image_from_url(url: str, ext: str = ".jpg", retries: int = 2) ->
                    await asyncio.sleep(wait)
                    continue
                raise
-    raise AssertionError("unreachable: retry loop exhausted")
+    raise last_exc


 def cleanup_image_cache(max_age_hours: int = 24) -> int:
@@ -504,9 +510,12 @@ async def cache_audio_from_url(url: str, ext: str = ".ogg", retries: int = 2) ->
    if not is_safe_url(url):
        raise ValueError(f"Blocked unsafe URL (SSRF protection): {safe_url_for_log(url)}")

+    import asyncio
    import httpx
-    _log = logging.getLogger(__name__)
+    import logging as _logging
+    _log = _logging.getLogger(__name__)

+    last_exc = None
    async with httpx.AsyncClient(
        timeout=30.0,
        follow_redirects=True,
@@ -524,6 +533,7 @@ async def cache_audio_from_url(url: str, ext: str = ".ogg", retries: int = 2) ->
                response.raise_for_status()
                return cache_audio_from_bytes(response.content, ext)
            except (httpx.TimeoutException, httpx.HTTPStatusError) as exc:
+                last_exc = exc
                if isinstance(exc, httpx.HTTPStatusError) and exc.response.status_code < 429:
                    raise
                if attempt < retries:
@@ -539,40 +549,7 @@ async def cache_audio_from_url(url: str, ext: str = ".ogg", retries: int = 2) ->
                    await asyncio.sleep(wait)
                    continue
                raise
-    raise AssertionError("unreachable: retry loop exhausted")
-
-
-# ---------------------------------------------------------------------------
-# Video cache utilities
-#
-# Same pattern as image/audio cache -- videos from platforms are downloaded
-# here so the agent can reference them by local file path.
-# ---------------------------------------------------------------------------
-
-VIDEO_CACHE_DIR = get_hermes_dir("cache/videos", "video_cache")
-
-SUPPORTED_VIDEO_TYPES = {
-    ".mp4": "video/mp4",
-    ".mov": "video/quicktime",
-    ".webm": "video/webm",
-    ".mkv": "video/x-matroska",
-    ".avi": "video/x-msvideo",
-}
-
-
-def get_video_cache_dir() -> Path:
-    """Return the video cache directory, creating it if it doesn't exist."""
-    VIDEO_CACHE_DIR.mkdir(parents=True, exist_ok=True)
-    return VIDEO_CACHE_DIR
-
-
-def cache_video_from_bytes(data: bytes, ext: str = ".mp4") -> str:
-    """Save raw video bytes to the cache and return the absolute file path."""
-    cache_dir = get_video_cache_dir()
-    filename = f"video_{uuid.uuid4().hex[:12]}{ext}"
-    filepath = cache_dir / filename
-    filepath.write_bytes(data)
-    return str(filepath)
+    raise last_exc


 # ---------------------------------------------------------------------------
@@ -750,10 +727,7 @@ class MessageEvent:
        if not self.is_command():
            return self.text
        parts = self.text.split(maxsplit=1)
-        args = parts[1] if len(parts) > 1 else ""
-        # iOS auto-corrects -- to — (em dash) and - to – (en dash)
-        args = args.replace("\u2014\u2014", "--").replace("\u2014", "--").replace("\u2013", "-")
-        return args
+        return parts[1] if len(parts) > 1 else ""


@dataclass 
@@ -1344,7 +1318,7 @@ class BasePlatformAdapter(ABC):
        # Extract MEDIA:<path> tags, allowing optional whitespace after the colon
        # and quoted/backticked paths for LLM-formatted outputs.
        media_pattern = re.compile(
-            r'''[`"']?MEDIA:\s*(?P<path>`[^`\n]+`|"[^"\n]+"|'[^'\n]+'|(?:~/|/)\S+(?:[^\S\n]+\S+)*?\.(?:png|jpe?g|gif|webp|mp4|mov|avi|mkv|webm|ogg|opus|mp3|wav|m4a|epub|pdf|zip|rar|7z|docx?|xlsx?|pptx?|txt|csv|apk|ipa)(?=[\s`"',;:)\]}]|$)|\S+)[`"']?'''
+            r'''[`"']?MEDIA:\s*(?P<path>`[^`\n]+`|"[^"\n]+"|'[^'\n]+'|(?:~/|/)\S+(?:[^\S\n]+\S+)*?\.(?:png|jpe?g|gif|webp|mp4|mov|avi|mkv|webm|ogg|opus|mp3|wav|m4a)(?=[\s`"',;:)\]}]|$)|\S+)[`"']?'''
        )
        for match in media_pattern.finditer(content):
            path = match.group("path").strip()
@@ -1780,6 +1754,8 @@ class BasePlatformAdapter(ABC):
          HERMES_HUMAN_DELAY_MIN_MS: minimum delay in ms (default 800, custom mode)
          HERMES_HUMAN_DELAY_MAX_MS: maximum delay in ms (default 2500, custom mode)
        """
+        import random
+
        mode = os.getenv("HERMES_HUMAN_DELAY_MODE", "off").lower()
        if mode == "off":
            return 0.0
@@ -1829,11 +1805,8 @@ class BasePlatformAdapter(ABC):
        try:
            await self._run_processing_hook("on_processing_start", event)

-            handler = self._message_handler
-            if handler is None:
-                return
-
-            response = await handler(event)
+            # Call the handler (this can take a while with tool calls)
+            response = await self._message_handler(event)
            
            # Send response if any.  A None/empty response is normal when
            # streaming already delivered the text (already_sent=True) or
@@ -14,7 +14,7 @@ import logging
 import os
 import re
 import uuid
-from datetime import datetime, timezone
+from datetime import datetime
 from typing import Any, Dict, List, Optional
 from urllib.parse import quote

@@ -75,7 +75,7 @@ def _redact(text: str) -> str:
 def check_bluebubbles_requirements() -> bool:
    try:
        import aiohttp  # noqa: F401
-        import httpx  # noqa: F401
+        import httpx as _httpx  # noqa: F401
    except ImportError:
        return False
    return True
@@ -377,7 +377,7 @@ class BlueBubblesAdapter(BasePlatformAdapter):
        payload = {
            "addresses": [address],
            "message": message,
-            "tempGuid": f"temp-{datetime.now(timezone.utc).timestamp()}",
+            "tempGuid": f"temp-{datetime.utcnow().timestamp()}",
        }
        try:
            res = await self._api_post("/api/v1/chat/new", payload)
@@ -417,7 +417,7 @@ class BlueBubblesAdapter(BasePlatformAdapter):
                )
            payload: Dict[str, Any] = {
                "chatGuid": guid,
-                "tempGuid": f"temp-{datetime.now(timezone.utc).timestamp()}",
+                "tempGuid": f"temp-{datetime.utcnow().timestamp()}",
                "message": chunk,
            }
            if reply_to and self._private_api_enabled and self._helper_connected:
@@ -527,7 +527,6 @@ class DiscordAdapter(BasePlatformAdapter):
        # Reply threading mode: "off" (no replies), "first" (reply on first
        # chunk only, default), "all" (reply-reference on every chunk).
        self._reply_to_mode: str = getattr(config, 'reply_to_mode', 'first') or 'first'
-        self._slash_commands: bool = self.config.extra.get("slash_commands", True)

    async def connect(self) -> bool:
        """Connect to Discord and start receiving events."""
@@ -542,6 +541,7 @@ class DiscordAdapter(BasePlatformAdapter):
            # ctypes.util.find_library fails on macOS with Homebrew-installed libs,
            # so fall back to known Homebrew paths if needed.
            if not opus_path:
+                import sys
                _homebrew_paths = (
                    "/opt/homebrew/lib/libopus.dylib",  # Apple Silicon
                    "/usr/local/lib/libopus.dylib",     # Intel Mac
@@ -745,8 +745,7 @@ class DiscordAdapter(BasePlatformAdapter):
                    )

            # Register slash commands
-            if self._slash_commands:
-                self._register_slash_commands()
+            self._register_slash_commands()

            # Start the bot in background
            self._bot_task = asyncio.create_task(self._client.start(self.config.token))
@@ -1196,16 +1195,9 @@ class DiscordAdapter(BasePlatformAdapter):
            try:
                import base64

-                try:
-                    from mutagen.oggopus import OggOpus
-                except ImportError:
-                    raise ImportError(
-                        "mutagen is required for Discord voice messages. "
-                        "Install with: pip install hermes-agent[messaging]"
-                    ) from None
-
                duration_secs = 5.0
                try:
+                    from mutagen.oggopus import OggOpus
                    info = OggOpus(audio_path)
                    duration_secs = info.info.length
                except Exception:
@@ -1430,7 +1422,8 @@ class DiscordAdapter(BasePlatformAdapter):
        speaking_user_ids: set = set()
        receiver = self._voice_receivers.get(guild_id)
        if receiver:
-            now = time.monotonic()
+            import time as _time
+            now = _time.monotonic()
            with receiver._lock:
                for ssrc, last_t in receiver._last_packet_time.items():
                    # Consider "speaking" if audio received within last 2 seconds
@@ -1898,7 +1891,7 @@ class DiscordAdapter(BasePlatformAdapter):
            # Fetch full member list (requires members intent)
            try:
                members = guild.members
-                if guild.member_count is not None and len(members) < guild.member_count:
+                if len(members) < guild.member_count:
                    members = [m async for m in guild.fetch_members(limit=None)]
            except Exception as e:
                logger.warning("Failed to fetch members for guild %s: %s", guild.name, e)
@@ -2138,42 +2131,10 @@ class DiscordAdapter(BasePlatformAdapter):
        # This ensures new commands added to COMMAND_REGISTRY in
        # hermes_cli/commands.py automatically appear as Discord slash
        # commands without needing a manual entry here.
-        def _build_auto_slash_command(_name: str, _description: str, _args_hint: str = ""):
-            """Build a discord.app_commands.Command that proxies to _run_simple_slash."""
-            discord_name = _name.lower()[:32]
-            desc = (_description or f"Run /{_name}")[:100]
-            has_args = bool(_args_hint)
-
-            if has_args:
-                def _make_args_handler(__name: str, __hint: str):
-                    @discord.app_commands.describe(args=f"Arguments: {__hint}"[:100])
-                    async def _handler(interaction: discord.Interaction, args: str = ""):
-                        await self._run_simple_slash(
-                            interaction, f"/{__name} {args}".strip()
-                        )
-                    _handler.__name__ = f"auto_slash_{__name.replace('-', '_')}"
-                    return _handler
-
-                handler = _make_args_handler(_name, _args_hint)
-            else:
-                def _make_simple_handler(__name: str):
-                    async def _handler(interaction: discord.Interaction):
-                        await self._run_simple_slash(interaction, f"/{__name}")
-                    _handler.__name__ = f"auto_slash_{__name.replace('-', '_')}"
-                    return _handler
-
-                handler = _make_simple_handler(_name)
-
-            return discord.app_commands.Command(
-                name=discord_name,
-                description=desc,
-                callback=handler,
-            )
-
-        already_registered: set[str] = set()
        try:
            from hermes_cli.commands import COMMAND_REGISTRY, _is_gateway_available, _resolve_config_gates

+            already_registered = set()
            try:
                already_registered = {cmd.name for cmd in tree.get_commands()}
            except Exception:
@@ -2188,10 +2149,38 @@ class DiscordAdapter(BasePlatformAdapter):
                discord_name = cmd_def.name.lower()[:32]
                if discord_name in already_registered:
                    continue
-                auto_cmd = _build_auto_slash_command(
-                    cmd_def.name,
-                    cmd_def.description,
-                    cmd_def.args_hint,
+                # Skip aliases that overlap with already-registered names
+                # (aliases for explicitly registered commands are handled above).
+                desc = (cmd_def.description or f"Run /{cmd_def.name}")[:100]
+                has_args = bool(cmd_def.args_hint)
+
+                if has_args:
+                    # Command takes optional arguments — create handler with
+                    # an optional ``args`` string parameter.
+                    def _make_args_handler(_name: str, _hint: str):
+                        @discord.app_commands.describe(args=f"Arguments: {_hint}"[:100])
+                        async def _handler(interaction: discord.Interaction, args: str = ""):
+                            await self._run_simple_slash(
+                                interaction, f"/{_name} {args}".strip()
+                            )
+                        _handler.__name__ = f"auto_slash_{_name.replace('-', '_')}"
+                        return _handler
+
+                    handler = _make_args_handler(cmd_def.name, cmd_def.args_hint)
+                else:
+                    # Parameterless command.
+                    def _make_simple_handler(_name: str):
+                        async def _handler(interaction: discord.Interaction):
+                            await self._run_simple_slash(interaction, f"/{_name}")
+                        _handler.__name__ = f"auto_slash_{_name.replace('-', '_')}"
+                        return _handler
+
+                    handler = _make_simple_handler(cmd_def.name)
+
+                auto_cmd = discord.app_commands.Command(
+                    name=discord_name,
+                    description=desc,
+                    callback=handler,
                )
                try:
                    tree.add_command(auto_cmd)
@@ -2208,35 +2197,6 @@ class DiscordAdapter(BasePlatformAdapter):
        except Exception as e:
            logger.warning("Discord auto-register from COMMAND_REGISTRY failed: %s", e)

-        # ── Plugin-registered slash commands ──
-        # Plugins register via PluginContext.register_command(); we mirror
-        # those into Discord's native slash picker so users get the same
-        # autocomplete UX as for built-in commands. No per-platform plugin
-        # API needed — plugin commands are platform-agnostic.
-        try:
-            from hermes_cli.commands import _iter_plugin_command_entries
-
-            for plugin_name, plugin_desc, plugin_args_hint in _iter_plugin_command_entries():
-                discord_name = plugin_name.lower()[:32]
-                if discord_name in already_registered:
-                    continue
-                auto_cmd = _build_auto_slash_command(
-                    plugin_name,
-                    plugin_desc,
-                    plugin_args_hint,
-                )
-                try:
-                    tree.add_command(auto_cmd)
-                    already_registered.add(discord_name)
-                except Exception:
-                    # Silently skip commands that fail registration (e.g.
-                    # name conflict with a subcommand group).
-                    pass
-        except Exception as e:
-            logger.warning(
-                "Discord auto-register from plugin commands failed: %s", e
-            )
-
        # Register skills under a single /skill command group with category
        # subcommand groups.  This uses 1 top-level slot instead of N,
        # supporting up to 25 categories × 25 skills = 625 skills.
@@ -2511,7 +2471,7 @@ class DiscordAdapter(BasePlatformAdapter):
                if isinstance(skills, str):
                    return [skills]
                if isinstance(skills, list) and skills:
-                    return list(dict.fromkeys(skills))  # ty: ignore[invalid-return-type]  # dedup, preserve order
+                    return list(dict.fromkeys(skills))  # dedup, preserve order
        return None

    def _resolve_channel_prompt(self, channel_id: str, parent_id: str | None = None) -> str | None:
@@ -3002,17 +2962,6 @@ class DiscordAdapter(BasePlatformAdapter):
            parent_channel_id = self._get_parent_channel_id(message.channel)

        is_voice_linked_channel = False
-
-        # Save mention-stripped text before auto-threading since create_thread()
-        # can clobber message.content, breaking /command detection in channels.
-        raw_content = message.content.strip()
-        normalized_content = raw_content
-        mention_prefix = False
-        if self._client.user and self._client.user in message.mentions:
-            mention_prefix = True
-            normalized_content = normalized_content.replace(f"<@{self._client.user.id}>", "").strip()
-            normalized_content = normalized_content.replace(f"<@!{self._client.user.id}>", "").strip()
-            message.content = normalized_content
        if not isinstance(message.channel, discord.DMChannel):
            channel_ids = {str(message.channel.id)}
            if parent_channel_id:
@@ -3047,11 +2996,16 @@ class DiscordAdapter(BasePlatformAdapter):

            # Skip the mention check if the message is in a thread where
            # the bot has previously participated (auto-created or replied in).
-            in_bot_thread = is_thread and thread_id is not None and thread_id in self._threads
+            in_bot_thread = is_thread and thread_id in self._threads

            if require_mention and not is_free_channel and not in_bot_thread:
-                if self._client.user not in message.mentions and not mention_prefix:
+                if self._client.user not in message.mentions:
                    return
+
+            if self._client.user and self._client.user in message.mentions:
+                message.content = message.content.replace(f"<@{self._client.user.id}>", "").strip()
+                message.content = message.content.replace(f"<@!{self._client.user.id}>", "").strip()
+
        # Auto-thread: when enabled, automatically create a thread for every
        # @mention in a text channel so each conversation is isolated (like Slack).
        # Messages already inside threads or DMs are unaffected.
@@ -3073,7 +3027,7 @@ class DiscordAdapter(BasePlatformAdapter):

        # Determine message type
        msg_type = MessageType.TEXT
-        if normalized_content.startswith("/"):
+        if message.content.startswith("/"):
            msg_type = MessageType.COMMAND
        elif message.attachments:
            # Check attachment types
@@ -3213,9 +3167,7 @@ class DiscordAdapter(BasePlatformAdapter):
                                att.filename, e, exc_info=True,
                            )

-        # Use normalized_content (saved before auto-threading) instead of message.content,
-        # to detect /slash commands in channel messages.
-        event_text = normalized_content
+        event_text = message.content
        if pending_text_injection:
            event_text = f"{pending_text_injection}\n\n{event_text}" if event_text else pending_text_injection

@@ -3640,9 +3592,7 @@ if DISCORD_AVAILABLE:
                )
                return

-            if interaction.data is None:
-                return
-            provider_slug = interaction.data["values"][0]  # ty: ignore[invalid-key]
+            provider_slug = interaction.data["values"][0]
            self._selected_provider = provider_slug
            provider = next(
                (p for p in self.providers if p["slug"] == provider_slug), None
@@ -3676,10 +3626,8 @@ if DISCORD_AVAILABLE:
                )
                return

-            if interaction.data is None:
-                return
            self.resolved = True
-            model_id = interaction.data["values"][0]  # ty: ignore[invalid-key]
+            model_id = interaction.data["values"][0]

            try:
                result_text = await self.on_model_selected(
@@ -532,7 +532,6 @@ class EmailAdapter(BasePlatformAdapter):
        image_url: str,
        caption: Optional[str] = None,
        reply_to: Optional[str] = None,
-        metadata: Optional[Dict[str, Any]] = None,
    ) -> SendResult:
        """Send an image URL as part of an email body."""
        text = caption or ""
@@ -546,7 +545,6 @@ class EmailAdapter(BasePlatformAdapter):
        caption: Optional[str] = None,
        file_name: Optional[str] = None,
        reply_to: Optional[str] = None,
-        **kwargs,
    ) -> SendResult:
        """Send a file as an email attachment."""
        try:
@@ -14,35 +14,6 @@ Supports:
 - Interactive card button-click events routed as synthetic COMMAND events
 - Webhook anomaly tracking (matches openclaw createWebhookAnomalyTracker)
 - Verification token validation as second auth layer (matches openclaw)
-
-Feishu identity model
---------------------
-Feishu uses three user-ID tiers (official docs:
-https://open.feishu.cn/document/home/user-identity-introduction/introduction):
-
-  open_id  (ou_xxx)  — **App-scoped**.  The same person gets a different
-                        open_id under each Feishu app.  Always available in
-                        event payloads without extra permissions.
-  user_id  (u_xxx)   — **Tenant-scoped**.  Stable within a company but
-                        requires the ``contact:user.employee_id:readonly``
-                        scope.  May not be present.
-  union_id (on_xxx)  — **Developer-scoped**.  Same across all apps owned by
-                        one developer/ISV.  Best cross-app stable ID.
-
-For bots specifically:
-
-  app_id              — The application's canonical credential identifier.
-  bot open_id         — Returned by ``/bot/v3/info``.  This is the bot's own
-                        open_id *within its app context* and is what Feishu
-                        puts in ``mentions[].id.open_id`` when someone
-                        @-mentions the bot.  Used for mention gating only.
-
-In single-bot mode (what Hermes currently supports), open_id works as a
-de-facto unique user identifier since there is only one app context.
-
-Session-key participant isolation prefers ``union_id`` (via user_id_alt)
-over ``open_id`` (via user_id) so that sessions stay stable if the same
-user is seen through different apps in the future.
 """

 from __future__ import annotations
@@ -64,7 +35,7 @@ from dataclasses import dataclass, field
 from datetime import datetime
 from pathlib import Path
 from types import SimpleNamespace
-from typing import Any, Dict, List, Optional, Sequence
+from typing import Any, Dict, List, Optional
 from urllib.error import HTTPError, URLError
 from urllib.parse import urlencode
 from urllib.request import Request, urlopen
@@ -102,9 +73,7 @@ try:
        UpdateMessageRequest,
        UpdateMessageRequestBody,
    )
-    from lark_oapi.core import AccessTokenType, HttpMethod
    from lark_oapi.core.const import FEISHU_DOMAIN, LARK_DOMAIN
-    from lark_oapi.core.model import BaseRequest
    from lark_oapi.event.callback.model.p2_card_action_trigger import (
        CallBackCard,
        P2CardActionTriggerResponse,
@@ -265,8 +234,6 @@ FALLBACK_ATTACHMENT_TEXT = "[Attachment]"
 _PREFERRED_LOCALES = ("zh_cn", "en_us")
 _MARKDOWN_SPECIAL_CHARS_RE = re.compile(r"([\\`*_{}\[\]()#+\-!|>~])")
 _MENTION_PLACEHOLDER_RE = re.compile(r"@_user_\d+")
-_MENTION_BOUNDARY_CHARS = frozenset(" \t\n\r.,;:!?、，。；：！？()[]{}<>\"'`")
-_TRAILING_TERMINAL_PUNCT = frozenset(" \t\n\r.!?。！？")
 _WHITESPACE_RE = re.compile(r"\s+")
 _SUPPORTED_CARD_TEXT_KEYS = (
    "title",
@@ -310,36 +277,12 @@ class FeishuPostMediaRef:
    resource_type: str = "file"


-@dataclass(frozen=True)
-class FeishuMentionRef:
-    name: str = ""
-    open_id: str = ""
-    is_all: bool = False
-    is_self: bool = False
-
-
-@dataclass(frozen=True)
-class _FeishuBotIdentity:
-    open_id: str = ""
-    user_id: str = ""
-    name: str = ""
-
-    def matches(self, *, open_id: str, user_id: str, name: str) -> bool:
-        # Precedence: open_id > user_id > name. IDs are authoritative when both
-        # sides have them; the next tier is only considered when either side
-        # lacks the current one.
-        if open_id and self.open_id:
-            return open_id == self.open_id
-        if user_id and self.user_id:
-            return user_id == self.user_id
-        return bool(self.name) and name == self.name
-
-
@dataclass(frozen=True)
 class FeishuPostParseResult:
    text_content: str
    image_keys: List[str] = field(default_factory=list)
    media_refs: List[FeishuPostMediaRef] = field(default_factory=list)
+    mentioned_ids: List[str] = field(default_factory=list)


@dataclass(frozen=True)
@@ -349,14 +292,14 @@ class FeishuNormalizedMessage:
    preferred_message_type: str = "text"
    image_keys: List[str] = field(default_factory=list)
    media_refs: List[FeishuPostMediaRef] = field(default_factory=list)
-    mentions: List[FeishuMentionRef] = field(default_factory=list)
+    mentioned_ids: List[str] = field(default_factory=list)
    relation_kind: str = "plain"
    metadata: Dict[str, Any] = field(default_factory=dict)


@dataclass(frozen=True)
 class FeishuAdapterSettings:
-    app_id: str  # Canonical bot/app identifier (credential, not from event payloads)
+    app_id: str
    app_secret: str
    domain_name: str
    connection_mode: str
@@ -364,11 +307,7 @@ class FeishuAdapterSettings:
    verification_token: str
    group_policy: str
    allowed_group_users: frozenset[str]
-    # Bot's own open_id (app-scoped) — returned by /bot/v3/info.  Used only for
-    # @mention matching: Feishu puts this value in mentions[].id.open_id when
-    # a user @-mentions the bot in a group chat.
    bot_open_id: str
-    # Bot's user_id (tenant-scoped) — optional, used as fallback mention match.
    bot_user_id: str
    bot_name: str
    dedup_cache_size: int
@@ -566,17 +505,14 @@ def _build_markdown_post_rows(content: str) -> List[List[Dict[str, str]]]:
    return rows or [[{"tag": "md", "text": content}]]


-def parse_feishu_post_payload(
-    payload: Any,
-    *,
-    mentions_map: Optional[Dict[str, FeishuMentionRef]] = None,
-) -> FeishuPostParseResult:
+def parse_feishu_post_payload(payload: Any) -> FeishuPostParseResult:
    resolved = _resolve_post_payload(payload)
    if not resolved:
        return FeishuPostParseResult(text_content=FALLBACK_POST_TEXT)

    image_keys: List[str] = []
    media_refs: List[FeishuPostMediaRef] = []
+    mentioned_ids: List[str] = []
    parts: List[str] = []

    title = _normalize_feishu_text(str(resolved.get("title", "")).strip())
@@ -587,10 +523,7 @@ def parse_feishu_post_payload(
        if not isinstance(row, list):
            continue
        row_text = _normalize_feishu_text(
-            "".join(
-                _render_post_element(item, image_keys, media_refs, mentions_map)
-                for item in row
-            )
+            "".join(_render_post_element(item, image_keys, media_refs, mentioned_ids) for item in row)
        )
        if row_text:
            parts.append(row_text)
@@ -599,6 +532,7 @@ def parse_feishu_post_payload(
        text_content="\n".join(parts).strip() or FALLBACK_POST_TEXT,
        image_keys=image_keys,
        media_refs=media_refs,
+        mentioned_ids=mentioned_ids,
    )


@@ -650,7 +584,7 @@ def _render_post_element(
    element: Any,
    image_keys: List[str],
    media_refs: List[FeishuPostMediaRef],
-    mentions_map: Optional[Dict[str, FeishuMentionRef]] = None,
+    mentioned_ids: List[str],
 ) -> str:
    if isinstance(element, str):
        return element
@@ -668,21 +602,19 @@ def _render_post_element(
        escaped_label = _escape_markdown_text(label)
        return f"[{escaped_label}]({href})" if href else escaped_label
    if tag == "at":
-        # Post <at>.user_id is a placeholder ("@_user_N" or "@_all"); look up
-        # the real ref in mentions_map for the display name.
-        placeholder = str(element.get("user_id", "")).strip()
-        if placeholder == "@_all":
-            # Feishu SDK sometimes omits @_all from the top-level mentions
-            # payload; record it here so the caller's mention list stays complete.
-            if mentions_map is not None and "@_all" not in mentions_map:
-                mentions_map["@_all"] = FeishuMentionRef(is_all=True)
-            return "@all"
-        ref = (mentions_map or {}).get(placeholder)
-        if ref is not None:
-            display_name = ref.name or ref.open_id or "user"
-        else:
-            display_name = str(element.get("user_name", "")).strip() or "user"
-        return f"@{_escape_markdown_text(display_name)}"
+        mentioned_id = (
+            str(element.get("open_id", "")).strip()
+            or str(element.get("user_id", "")).strip()
+        )
+        if mentioned_id and mentioned_id not in mentioned_ids:
+            mentioned_ids.append(mentioned_id)
+        display_name = (
+            str(element.get("user_name", "")).strip()
+            or str(element.get("name", "")).strip()
+            or str(element.get("text", "")).strip()
+            or mentioned_id
+        )
+        return f"@{_escape_markdown_text(display_name)}" if display_name else "@"
    if tag in {"img", "image"}:
        image_key = str(element.get("image_key", "")).strip()
        if image_key and image_key not in image_keys:
@@ -720,7 +652,8 @@ def _render_post_element(

    nested_parts: List[str] = []
    for key in ("text", "title", "content", "children", "elements"):
-        extracted = _render_nested_post(element.get(key), image_keys, media_refs, mentions_map)
+        value = element.get(key)
+        extracted = _render_nested_post(value, image_keys, media_refs, mentioned_ids)
        if extracted:
            nested_parts.append(extracted)
    return " ".join(part for part in nested_parts if part)
@@ -730,7 +663,7 @@ def _render_nested_post(
    value: Any,
    image_keys: List[str],
    media_refs: List[FeishuPostMediaRef],
-    mentions_map: Optional[Dict[str, FeishuMentionRef]] = None,
+    mentioned_ids: List[str],
 ) -> str:
    if isinstance(value, str):
        return _escape_markdown_text(value)
@@ -738,17 +671,17 @@ def _render_nested_post(
        return " ".join(
            part
            for item in value
-            for part in [_render_nested_post(item, image_keys, media_refs, mentions_map)]
+            for part in [_render_nested_post(item, image_keys, media_refs, mentioned_ids)]
            if part
        )
    if isinstance(value, dict):
-        direct = _render_post_element(value, image_keys, media_refs, mentions_map)
+        direct = _render_post_element(value, image_keys, media_refs, mentioned_ids)
        if direct:
            return direct
        return " ".join(
            part
            for item in value.values()
-            for part in [_render_nested_post(item, image_keys, media_refs, mentions_map)]
+            for part in [_render_nested_post(item, image_keys, media_refs, mentioned_ids)]
            if part
        )
    return ""
@@ -759,48 +692,31 @@ def _render_nested_post(
 # ---------------------------------------------------------------------------


-def normalize_feishu_message(
-    *,
-    message_type: str,
-    raw_content: str,
-    mentions: Optional[Sequence[Any]] = None,
-    bot: _FeishuBotIdentity = _FeishuBotIdentity(),
-) -> FeishuNormalizedMessage:
+def normalize_feishu_message(*, message_type: str, raw_content: str) -> FeishuNormalizedMessage:
    normalized_type = str(message_type or "").strip().lower()
    payload = _load_feishu_payload(raw_content)
-    mentions_map = _build_mentions_map(mentions, bot)

    if normalized_type == "text":
-        text = str(payload.get("text", "") or "")
-        # Feishu SDK sometimes omits @_all from the mentions payload even when
-        # the text literal contains it (confirmed via im.v1.message.get).
-        if "@_all" in text and "@_all" not in mentions_map:
-            mentions_map["@_all"] = FeishuMentionRef(is_all=True)
        return FeishuNormalizedMessage(
            raw_type=normalized_type,
-            text_content=_normalize_feishu_text(text, mentions_map),
-            mentions=list(mentions_map.values()),
+            text_content=_normalize_feishu_text(str(payload.get("text", "") or "")),
        )
    if normalized_type == "post":
-        # The walker writes back to mentions_map if it encounters
-        # <at user_id="@_all">, so reading .values() after parsing is enough.
-        parsed_post = parse_feishu_post_payload(payload, mentions_map=mentions_map)
+        parsed_post = parse_feishu_post_payload(payload)
        return FeishuNormalizedMessage(
            raw_type=normalized_type,
            text_content=parsed_post.text_content,
            image_keys=list(parsed_post.image_keys),
            media_refs=list(parsed_post.media_refs),
-            mentions=list(mentions_map.values()),
+            mentioned_ids=list(parsed_post.mentioned_ids),
            relation_kind="post",
        )
-    mention_refs = list(mentions_map.values())
    if normalized_type == "image":
        image_key = str(payload.get("image_key", "") or "").strip()
        alt_text = _normalize_feishu_text(
            str(payload.get("text", "") or "")
            or str(payload.get("alt", "") or "")
-            or FALLBACK_IMAGE_TEXT,
-            mentions_map,
+            or FALLBACK_IMAGE_TEXT
        )
        return FeishuNormalizedMessage(
            raw_type=normalized_type,
@@ -808,7 +724,6 @@ def normalize_feishu_message(
            preferred_message_type="photo",
            image_keys=[image_key] if image_key else [],
            relation_kind="image",
-            mentions=mention_refs,
        )
    if normalized_type in {"file", "audio", "media"}:
        media_ref = _build_media_ref_from_payload(payload, resource_type=normalized_type)
@@ -820,7 +735,6 @@ def normalize_feishu_message(
            media_refs=[media_ref] if media_ref.file_key else [],
            relation_kind=normalized_type,
            metadata={"placeholder_text": placeholder},
-            mentions=mention_refs,
        )
    if normalized_type == "merge_forward":
        return _normalize_merge_forward_message(payload)
@@ -1095,20 +1009,8 @@ def _first_non_empty_text(*values: Any) -> str:
 # ---------------------------------------------------------------------------


-def _normalize_feishu_text(
-    text: str,
-    mentions_map: Optional[Dict[str, FeishuMentionRef]] = None,
-) -> str:
-    def _sub(match: "re.Match[str]") -> str:
-        key = match.group(0)
-        ref = (mentions_map or {}).get(key)
-        if ref is None:
-            return " "
-        name = ref.name or ref.open_id or "user"
-        return f"@{name}"
-
-    cleaned = _MENTION_PLACEHOLDER_RE.sub(_sub, text or "")
-    cleaned = cleaned.replace("@_all", "@all")
+def _normalize_feishu_text(text: str) -> str:
+    cleaned = _MENTION_PLACEHOLDER_RE.sub(" ", text or "")
    cleaned = cleaned.replace("\r\n", "\n").replace("\r", "\n")
    cleaned = "\n".join(_WHITESPACE_RE.sub(" ", line).strip() for line in cleaned.split("\n"))
    cleaned = "\n".join(line for line in cleaned.split("\n") if line)
@@ -1127,117 +1029,6 @@ def _unique_lines(lines: List[str]) -> List[str]:
    return unique


-# ---------------------------------------------------------------------------
-# Mention helpers
-# ---------------------------------------------------------------------------
-
-
-def _extract_mention_ids(mention: Any) -> tuple[str, str]:
-    # Returns (open_id, user_id). im.v1.message.get hands back id as a string
-    # plus id_type discriminator; event payloads hand back a nested UserId
-    # object carrying both fields.
-    mention_id = getattr(mention, "id", None)
-    if isinstance(mention_id, str):
-        id_type = str(getattr(mention, "id_type", "") or "").lower()
-        if id_type == "open_id":
-            return mention_id, ""
-        if id_type == "user_id":
-            return "", mention_id
-        return "", ""
-    if mention_id is None:
-        return "", ""
-    return (
-        str(getattr(mention_id, "open_id", "") or ""),
-        str(getattr(mention_id, "user_id", "") or ""),
-    )
-
-
-def _build_mentions_map(
-    mentions: Optional[Sequence[Any]],
-    bot: _FeishuBotIdentity,
-) -> Dict[str, FeishuMentionRef]:
-    result: Dict[str, FeishuMentionRef] = {}
-    for mention in mentions or []:
-        key = str(getattr(mention, "key", "") or "")
-        if not key:
-            continue
-        if key == "@_all":
-            result[key] = FeishuMentionRef(is_all=True)
-            continue
-        open_id, user_id = _extract_mention_ids(mention)
-        name = str(getattr(mention, "name", "") or "").strip()
-        result[key] = FeishuMentionRef(
-            name=name,
-            open_id=open_id,
-            is_self=bot.matches(open_id=open_id, user_id=user_id, name=name),
-        )
-    return result
-
-
-def _build_mention_hint(mentions: Sequence[FeishuMentionRef]) -> str:
-    parts: List[str] = []
-    seen: set = set()
-    for ref in mentions:
-        if ref.is_self:
-            continue
-        signature = (ref.is_all, ref.open_id, ref.name)
-        if signature in seen:
-            continue
-        seen.add(signature)
-        if ref.is_all:
-            parts.append("@all")
-        elif ref.open_id:
-            parts.append(f"{ref.name or 'unknown'} (open_id={ref.open_id})")
-        else:
-            parts.append(ref.name or "unknown")
-    return f"[Mentioned: {', '.join(parts)}]" if parts else ""
-
-
-def _strip_edge_self_mentions(
-    text: str,
-    mentions: Sequence[FeishuMentionRef],
-) -> str:
-    # Leading: strip consecutive self-mentions unconditionally.
-    # Trailing: strip only when followed by whitespace/terminal punct, so
-    # mid-sentence references ("don't @Bot again") stay intact.
-    # Leading word-boundary prevents @Al from eating @Alice.
-    if not text:
-        return text
-    self_names = [
-        f"@{ref.name or ref.open_id or 'user'}"
-        for ref in mentions
-        if ref.is_self
-    ]
-    if not self_names:
-        return text
-
-    remaining = text.lstrip()
-    while True:
-        for nm in self_names:
-            if not remaining.startswith(nm):
-                continue
-            after = remaining[len(nm):]
-            if after and after[0] not in _MENTION_BOUNDARY_CHARS:
-                continue
-            remaining = after.lstrip()
-            break
-        else:
-            break
-
-    while True:
-        i = len(remaining)
-        while i > 0 and remaining[i - 1] in _TRAILING_TERMINAL_PUNCT:
-            i -= 1
-        body = remaining[:i]
-        tail = remaining[i:]
-        for nm in self_names:
-            if body.endswith(nm):
-                remaining = body[: -len(nm)].rstrip() + tail
-                break
-        else:
-            return remaining
-
-
 def _run_official_feishu_ws_client(ws_client: Any, adapter: Any) -> None:
    """Run the official Lark WS client in its own thread-local event loop."""
    import lark_oapi.ws.client as ws_client_module
@@ -1700,7 +1491,6 @@ class FeishuAdapter(BasePlatformAdapter):
        if not self._client:
            return SendResult(success=False, error="Not connected")

-        content = self.format_message(content)
        try:
            msg_type, payload = self._build_outbound_payload(content)
            body = self._build_update_message_body(msg_type=msg_type, content=payload)
@@ -2680,22 +2470,13 @@ class FeishuAdapter(BasePlatformAdapter):
        chat_type: str,
        message_id: str,
    ) -> None:
-        text, inbound_type, media_urls, media_types, mentions = await self._extract_message_content(message)
-
-        if inbound_type == MessageType.TEXT:
-            text = _strip_edge_self_mentions(text, mentions)
-            if text.startswith("/"):
-                inbound_type = MessageType.COMMAND
-
-        # Guard runs post-strip so a pure "@Bot" message (stripped to "") is dropped.
+        text, inbound_type, media_urls, media_types = await self._extract_message_content(message)
        if inbound_type == MessageType.TEXT and not text and not media_urls:
-            logger.debug("[Feishu] Ignoring empty text message id=%s", message_id)
+            logger.debug("[Feishu] Ignoring unsupported or empty message type: %s", getattr(message, "message_type", ""))
            return

-        if inbound_type != MessageType.COMMAND:
-            hint = _build_mention_hint(mentions)
-            if hint:
-                text = f"{hint}\n\n{text}" if text else hint
+        if inbound_type == MessageType.TEXT and text.startswith("/"):
+            inbound_type = MessageType.COMMAND

        reply_to_message_id = (
            getattr(message, "parent_id", None)
@@ -3154,20 +2935,14 @@ class FeishuAdapter(BasePlatformAdapter):
    # Message content extraction and resource download
    # =========================================================================

-    async def _extract_message_content(
-        self, message: Any
-    ) -> tuple[str, MessageType, List[str], List[str], List[FeishuMentionRef]]:
+    async def _extract_message_content(self, message: Any) -> tuple[str, MessageType, List[str], List[str]]:
+        """Extract text and cached media from a normalized Feishu message."""
        raw_content = getattr(message, "content", "") or ""
        raw_type = getattr(message, "message_type", "") or ""
        message_id = str(getattr(message, "message_id", "") or "")
        logger.info("[Feishu] Received raw message type=%s message_id=%s", raw_type, message_id)

-        normalized = normalize_feishu_message(
-            message_type=raw_type,
-            raw_content=raw_content,
-            mentions=getattr(message, "mentions", None),
-            bot=self._bot_identity(),
-        )
+        normalized = normalize_feishu_message(message_type=raw_type, raw_content=raw_content)
        media_urls, media_types = await self._download_feishu_message_resources(
            message_id=message_id,
            normalized=normalized,
@@ -3184,7 +2959,7 @@ class FeishuAdapter(BasePlatformAdapter):
            if injected:
                text = injected

-        return text, inbound_type, media_urls, media_types, list(normalized.mentions)
+        return text, inbound_type, media_urls, media_types

    async def _download_feishu_message_resources(
        self,
@@ -3448,22 +3223,10 @@ class FeishuAdapter(BasePlatformAdapter):
        return "group"

    async def _resolve_sender_profile(self, sender_id: Any) -> Dict[str, Optional[str]]:
-        """Map Feishu's three-tier user IDs onto Hermes' SessionSource fields.
-
-        Preference order for the primary ``user_id`` field:
-          1. user_id  (tenant-scoped, most stable — requires permission scope)
-          2. open_id  (app-scoped, always available — different per bot app)
-
-        ``user_id_alt`` carries the union_id (developer-scoped, stable across
-        all apps by the same developer).  Session-key generation prefers
-        user_id_alt when present, so participant isolation stays stable even
-        if the primary ID is the app-scoped open_id.
-        """
        open_id = getattr(sender_id, "open_id", None) or None
        user_id = getattr(sender_id, "user_id", None) or None
        union_id = getattr(sender_id, "union_id", None) or None
-        # Prefer tenant-scoped user_id; fall back to app-scoped open_id.
-        primary_id = user_id or open_id
+        primary_id = open_id or user_id
        display_name = await self._resolve_sender_name_from_api(primary_id or union_id)
        return {
            "user_id": primary_id,
@@ -3545,31 +3308,15 @@ class FeishuAdapter(BasePlatformAdapter):
            body = getattr(parent, "body", None)
            msg_type = getattr(parent, "msg_type", "") or ""
            raw_content = getattr(body, "content", "") or ""
-            parent_mentions = getattr(parent, "mentions", None) if parent else None
-            text = self._extract_text_from_raw_content(
-                msg_type=msg_type,
-                raw_content=raw_content,
-                mentions=parent_mentions,
-            )
+            text = self._extract_text_from_raw_content(msg_type=msg_type, raw_content=raw_content)
            self._message_text_cache[message_id] = text
            return text
        except Exception:
            logger.warning("[Feishu] Failed to fetch parent message %s", message_id, exc_info=True)
            return None

-    def _extract_text_from_raw_content(
-        self,
-        *,
-        msg_type: str,
-        raw_content: str,
-        mentions: Optional[Sequence[Any]] = None,
-    ) -> Optional[str]:
-        normalized = normalize_feishu_message(
-            message_type=msg_type,
-            raw_content=raw_content,
-            mentions=mentions,
-            bot=self._bot_identity(),
-        )
+    def _extract_text_from_raw_content(self, *, msg_type: str, raw_content: str) -> Optional[str]:
+        normalized = normalize_feishu_message(message_type=msg_type, raw_content=raw_content)
        if normalized.text_content:
            return normalized.text_content
        placeholder = normalized.metadata.get("placeholder_text") if isinstance(normalized.metadata, dict) else None
@@ -3639,10 +3386,10 @@ class FeishuAdapter(BasePlatformAdapter):
        normalized = normalize_feishu_message(
            message_type=getattr(message, "message_type", "") or "",
            raw_content=raw_content,
-            mentions=getattr(message, "mentions", None),
-            bot=self._bot_identity(),
        )
-        return self._post_mentions_bot(normalized.mentions)
+        if normalized.mentioned_ids:
+            return self._post_mentions_bot(normalized.mentioned_ids)
+        return False

    def _is_self_sent_bot_message(self, event: Any) -> bool:
        """Return True only for Feishu events emitted by this Hermes bot."""
@@ -3662,37 +3409,30 @@ class FeishuAdapter(BasePlatformAdapter):
        return False

    def _message_mentions_bot(self, mentions: List[Any]) -> bool:
-        # IDs trump names: when both sides have open_id (or both user_id),
-        # match requires equal IDs. Name fallback only when either side
-        # lacks an ID.
+        """Check whether any mention targets the configured or inferred bot identity."""
        for mention in mentions:
            mention_id = getattr(mention, "id", None)
-            mention_open_id = (getattr(mention_id, "open_id", None) or "").strip()
-            mention_user_id = (getattr(mention_id, "user_id", None) or "").strip()
+            mention_open_id = getattr(mention_id, "open_id", None)
+            mention_user_id = getattr(mention_id, "user_id", None)
            mention_name = (getattr(mention, "name", None) or "").strip()

-            if mention_open_id and self._bot_open_id:
-                if mention_open_id == self._bot_open_id:
-                    return True
-                continue  # IDs differ — not the bot; skip name fallback.
-            if mention_user_id and self._bot_user_id:
-                if mention_user_id == self._bot_user_id:
-                    return True
-                continue
+            if self._bot_open_id and mention_open_id == self._bot_open_id:
+                return True
+            if self._bot_user_id and mention_user_id == self._bot_user_id:
+                return True
            if self._bot_name and mention_name == self._bot_name:
                return True

        return False

-    def _post_mentions_bot(self, mentions: List[FeishuMentionRef]) -> bool:
-        return any(m.is_self for m in mentions)
-
-    def _bot_identity(self) -> _FeishuBotIdentity:
-        return _FeishuBotIdentity(
-            open_id=self._bot_open_id,
-            user_id=self._bot_user_id,
-            name=self._bot_name,
-        )
+    def _post_mentions_bot(self, mentioned_ids: List[str]) -> bool:
+        if not mentioned_ids:
+            return False
+        if self._bot_open_id and self._bot_open_id in mentioned_ids:
+            return True
+        if self._bot_user_id and self._bot_user_id in mentioned_ids:
+            return True
+        return False

    async def _hydrate_bot_identity(self) -> None:
        """Best-effort discovery of bot identity for precise group mention gating
@@ -3717,15 +3457,14 @@ class FeishuAdapter(BasePlatformAdapter):
        # uses via probe_bot().
        if not self._bot_open_id or not self._bot_name:
            try:
-                req = (
-                    BaseRequest.builder()
-                    .http_method(HttpMethod.GET)
-                    .uri("/open-apis/bot/v3/info")
-                    .token_types({AccessTokenType.TENANT})
-                    .build()
+                resp = await asyncio.to_thread(
+                    self._client.request,
+                    method="GET",
+                    url="/open-apis/bot/v3/info",
+                    body=None,
+                    raw_response=True,
                )
-                resp = await asyncio.to_thread(self._client.request, req)
-                content = getattr(getattr(resp, "raw", None), "content", None)
+                content = getattr(resp, "content", None)
                if content:
                    payload = json.loads(content)
                    parsed = _parse_bot_response(payload) or {}
@@ -4473,9 +4212,6 @@ def probe_bot(app_id: str, app_secret: str, domain: str) -> Optional[dict]:

    Uses lark_oapi SDK when available, falls back to raw HTTP otherwise.
    Returns {"bot_name": ..., "bot_open_id": ...} on success, None on failure.
-
-    Note: ``bot_open_id`` here is the bot's app-scoped open_id — the same ID
-    that Feishu puts in @mention payloads.  It is NOT the app_id.
    """
    if FEISHU_AVAILABLE:
        return _probe_bot_sdk(app_id, app_secret, domain)
@@ -4496,12 +4232,12 @@ def _build_onboard_client(app_id: str, app_secret: str, domain: str) -> Any:


 def _parse_bot_response(data: dict) -> Optional[dict]:
-    # /bot/v3/info returns bot.app_name; legacy paths used bot_name — accept both.
+    """Extract bot_name and bot_open_id from a /bot/v3/info response."""
    if data.get("code") != 0:
        return None
    bot = data.get("bot") or data.get("data", {}).get("bot") or {}
    return {
-        "bot_name": bot.get("app_name") or bot.get("bot_name"),
+        "bot_name": bot.get("bot_name"),
        "bot_open_id": bot.get("open_id"),
    }

@@ -4510,18 +4246,13 @@ def _probe_bot_sdk(app_id: str, app_secret: str, domain: str) -> Optional[dict]:
    """Probe bot info using lark_oapi SDK."""
    try:
        client = _build_onboard_client(app_id, app_secret, domain)
-        req = (
-            BaseRequest.builder()
-            .http_method(HttpMethod.GET)
-            .uri("/open-apis/bot/v3/info")
-            .token_types({AccessTokenType.TENANT})
-            .build()
+        resp = client.request(
+            method="GET",
+            url="/open-apis/bot/v3/info",
+            body=None,
+            raw_response=True,
        )
-        resp = client.request(req)
-        content = getattr(getattr(resp, "raw", None), "content", None)
-        if content is None:
-            return None
-        return _parse_bot_response(json.loads(content))
+        return _parse_bot_response(json.loads(resp.content))
    except Exception as exc:
        logger.debug("[Feishu onboard] SDK probe failed: %s", exc)
        return None
@@ -2170,8 +2170,8 @@ class MatrixAdapter(BasePlatformAdapter):
            ul_match = re.match(r"^[\s]*[-*+]\s+(.+)$", line)
            if ul_match:
                items = []
-                while i < len(lines) and (m := re.match(r"^[\s]*[-*+]\s+(.+)$", lines[i])):
-                    items.append(m.group(1))
+                while i < len(lines) and re.match(r"^[\s]*[-*+]\s+(.+)$", lines[i]):
+                    items.append(re.match(r"^[\s]*[-*+]\s+(.+)$", lines[i]).group(1))
                    i += 1
                li = "".join(f"<li>{item}</li>" for item in items)
                out_lines.append(f"<ul>{li}</ul>")
@@ -2181,8 +2181,8 @@ class MatrixAdapter(BasePlatformAdapter):
            ol_match = re.match(r"^[\s]*\d+[.)]\s+(.+)$", line)
            if ol_match:
                items = []
-                while i < len(lines) and (m := re.match(r"^[\s]*\d+[.)]\s+(.+)$", lines[i])):
-                    items.append(m.group(1))
+                while i < len(lines) and re.match(r"^[\s]*\d+[.)]\s+(.+)$", lines[i]):
+                    items.append(re.match(r"^[\s]*\d+[.)]\s+(.+)$", lines[i]).group(1))
                    i += 1
                li = "".join(f"<li>{item}</li>" for item in items)
                out_lines.append(f"<ol>{li}</ol>")
@@ -410,6 +410,7 @@ class MattermostAdapter(BasePlatformAdapter):
            logger.warning("Mattermost: blocked unsafe URL (SSRF protection)")
            return await self.send(chat_id, f"{caption or ''}\n{url}".strip(), reply_to)

+        import asyncio
        import aiohttp

        last_exc = None
@@ -26,8 +26,9 @@ from .adapter import (  # noqa: F401
 # -- Onboard (QR-code scan-to-configure) -----------------------------------
 from .onboard import (  # noqa: F401
    BindStatus,
+    create_bind_task,
+    poll_bind_result,
    build_connect_url,
-    qr_register,
 )
 from .crypto import decrypt_secret, generate_bind_key  # noqa: F401

@@ -43,8 +44,9 @@ __all__ = [
    "_ssrf_redirect_guard",
    # onboard
    "BindStatus",
+    "create_bind_task",
+    "poll_bind_result",
    "build_connect_url",
-    "qr_register",
    # crypto
    "decrypt_secret",
    "generate_bind_key",
@@ -535,9 +535,6 @@ class QQAdapter(BasePlatformAdapter):
                    quick_disconnect_count = 0
                else:
                    backoff_idx += 1
-                    if backoff_idx >= MAX_RECONNECT_ATTEMPTS:
-                        logger.error("[%s] Max reconnect attempts reached (QQCloseError)", self._log_tag)
-                        return

            except Exception as exc:
                if not self._running:
@@ -1089,8 +1086,11 @@ class QQAdapter(BasePlatformAdapter):
            return MessageType.VIDEO
        if "image" in first_type or "photo" in first_type:
            return MessageType.PHOTO
+        # Unknown content type with an attachment — don't assume PHOTO
+        # to prevent non-image files from being sent to vision analysis.
        logger.debug(
-            "Unknown media content_type '%s', defaulting to TEXT",
+            "[%s] Unknown media content_type '%s', defaulting to TEXT",
+            self._log_tag,
            first_type,
        )
        return MessageType.TEXT
@@ -1826,12 +1826,14 @@ class QQAdapter(BasePlatformAdapter):
            body["file_name"] = file_name

        # Retry transient upload failures
+        last_exc = None
        for attempt in range(3):
            try:
                return await self._api_request(
                    "POST", path, body, timeout=FILE_UPLOAD_TIMEOUT
                )
            except RuntimeError as exc:
+                last_exc = exc
                err_msg = str(exc)
                if any(
                        kw in err_msg
@@ -1840,9 +1842,8 @@ class QQAdapter(BasePlatformAdapter):
                    raise
                if attempt < 2:
                    await asyncio.sleep(1.5 * (attempt + 1))
-                else:
-                    raise
-        raise AssertionError("unreachable: retry loop exhausted")
+
+        raise last_exc  # type: ignore[misc]

    # Maximum time (seconds) to wait for reconnection before giving up on send.
    _RECONNECT_WAIT_SECONDS = 15.0
@@ -1,10 +1,6 @@
 """
 QQBot scan-to-configure (QR code onboard) module.

-Mirrors the Feishu onboarding pattern: synchronous HTTP + a single public
-entry-point ``qr_register()`` that handles the full flow (create task →
-display QR code → poll → decrypt credentials).
-
 Calls the ``q.qq.com`` ``create_bind_task`` / ``poll_bind_result`` APIs to
 generate a QR-code URL and poll for scan completion.  On success the caller
 receives the bot's *app_id*, *client_secret* (decrypted locally), and the
@@ -16,20 +12,18 @@ Reference: https://bot.q.qq.com/wiki/develop/api-v2/
 from __future__ import annotations

 import logging
-import time
 from enum import IntEnum
-from typing import Optional, Tuple
+from typing import Tuple
 from urllib.parse import quote

 from .constants import (
    ONBOARD_API_TIMEOUT,
    ONBOARD_CREATE_PATH,
-    ONBOARD_POLL_INTERVAL,
    ONBOARD_POLL_PATH,
    PORTAL_HOST,
    QR_URL_TEMPLATE,
 )
-from .crypto import decrypt_secret, generate_bind_key
+from .crypto import generate_bind_key
 from .utils import get_api_headers

 logger = logging.getLogger(__name__)
@@ -41,7 +35,7 @@ logger = logging.getLogger(__name__)


 class BindStatus(IntEnum):
-    """Status codes returned by ``_poll_bind_result``."""
+    """Status codes returned by ``poll_bind_result``."""

    NONE = 0
    PENDING = 1
@@ -50,40 +44,18 @@ class BindStatus(IntEnum):


 # ---------------------------------------------------------------------------
-# QR rendering
-# ---------------------------------------------------------------------------
-
-try:
-    import qrcode as _qrcode_mod
-except (ImportError, TypeError):
-    _qrcode_mod = None  # type: ignore[assignment]
-
-
-def _render_qr(url: str) -> bool:
-    """Try to render a QR code in the terminal. Returns True if successful."""
-    if _qrcode_mod is None:
-        return False
-    try:
-        qr = _qrcode_mod.QRCode(
-            error_correction=_qrcode_mod.constants.ERROR_CORRECT_M,
-            border=2,
-        )
-        qr.add_data(url)
-        qr.make(fit=True)
-        qr.print_ascii(invert=True)
-        return True
-    except Exception:
-        return False
-
-
-# ---------------------------------------------------------------------------
-# Synchronous HTTP helpers (mirrors Feishu _post_registration pattern)
+# Public API
 # ---------------------------------------------------------------------------


-def _create_bind_task(timeout: float = ONBOARD_API_TIMEOUT) -> Tuple[str, str]:
+async def create_bind_task(
+    timeout: float = ONBOARD_API_TIMEOUT,
+) -> Tuple[str, str]:
    """Create a bind task and return *(task_id, aes_key_base64)*.

+    The AES key is generated locally and sent to the server so it can
+    encrypt the bot credentials before returning them.
+
    Raises:
        RuntimeError: If the API returns a non-zero ``retcode``.
    """
@@ -92,8 +64,8 @@ def _create_bind_task(timeout: float = ONBOARD_API_TIMEOUT) -> Tuple[str, str]:
    url = f"https://{PORTAL_HOST}{ONBOARD_CREATE_PATH}"
    key = generate_bind_key()

-    with httpx.Client(timeout=timeout, follow_redirects=True) as client:
-        resp = client.post(url, json={"key": key}, headers=get_api_headers())
+    async with httpx.AsyncClient(timeout=timeout, follow_redirects=True) as client:
+        resp = await client.post(url, json={"key": key}, headers=get_api_headers())
        resp.raise_for_status()
        data = resp.json()

@@ -108,7 +80,7 @@ def _create_bind_task(timeout: float = ONBOARD_API_TIMEOUT) -> Tuple[str, str]:
    return task_id, key


-def _poll_bind_result(
+async def poll_bind_result(
    task_id: str,
    timeout: float = ONBOARD_API_TIMEOUT,
 ) -> Tuple[BindStatus, str, str, str]:
@@ -117,6 +89,12 @@ def _poll_bind_result(
    Returns:
        A 4-tuple of ``(status, bot_appid, bot_encrypt_secret, user_openid)``.

+        * ``bot_encrypt_secret`` is AES-256-GCM encrypted — decrypt it with
+          :func:`~gateway.platforms.qqbot.crypto.decrypt_secret` using the
+          key from :func:`create_bind_task`.
+        * ``user_openid`` is the OpenID of the person who scanned the code
+          (available when ``status == COMPLETED``).
+
    Raises:
        RuntimeError: If the API returns a non-zero ``retcode``.
    """
@@ -124,8 +102,8 @@ def _poll_bind_result(

    url = f"https://{PORTAL_HOST}{ONBOARD_POLL_PATH}"

-    with httpx.Client(timeout=timeout, follow_redirects=True) as client:
-        resp = client.post(url, json={"task_id": task_id}, headers=get_api_headers())
+    async with httpx.AsyncClient(timeout=timeout, follow_redirects=True) as client:
+        resp = await client.post(url, json={"task_id": task_id}, headers=get_api_headers())
        resp.raise_for_status()
        data = resp.json()

@@ -144,77 +122,3 @@ def _poll_bind_result(
 def build_connect_url(task_id: str) -> str:
    """Build the QR-code target URL for a given *task_id*."""
    return QR_URL_TEMPLATE.format(task_id=quote(task_id))
-
-
-# ---------------------------------------------------------------------------
-# Public entry-point
-# ---------------------------------------------------------------------------
-
-_MAX_REFRESHES = 3
-
-
-def qr_register(timeout_seconds: int = 600) -> Optional[dict]:
-    """Run the QQBot scan-to-configure QR registration flow.
-
-    Mirrors ``feishu.qr_register()``: handles create → display → poll →
-    decrypt in one call.  Unexpected errors propagate to the caller.
-
-    :returns:
-        ``{"app_id": ..., "client_secret": ..., "user_openid": ...}`` on
-        success, or ``None`` on failure / expiry / cancellation.
-    """
-    deadline = time.monotonic() + timeout_seconds
-
-    for refresh_count in range(_MAX_REFRESHES + 1):
-        # ── Create bind task ──
-        try:
-            task_id, aes_key = _create_bind_task()
-        except Exception as exc:
-            logger.warning("[QQBot onboard] Failed to create bind task: %s", exc)
-            return None
-
-        url = build_connect_url(task_id)
-
-        # ── Display QR code + URL ──
-        print()
-        if _render_qr(url):
-            print(f"  Scan the QR code above, or open this URL directly:\n  {url}")
-        else:
-            print(f"  Open this URL in QQ on your phone:\n  {url}")
-            print("  Tip: pip install qrcode  to display a scannable QR code here")
-        print()
-
-        # ── Poll loop ──
-        while time.monotonic() < deadline:
-            try:
-                status, app_id, encrypted_secret, user_openid = _poll_bind_result(task_id)
-            except Exception:
-                time.sleep(ONBOARD_POLL_INTERVAL)
-                continue
-
-            if status == BindStatus.COMPLETED:
-                client_secret = decrypt_secret(encrypted_secret, aes_key)
-                print()
-                print(f"  QR scan complete! (App ID: {app_id})")
-                if user_openid:
-                    print(f"  Scanner's OpenID: {user_openid}")
-                return {
-                    "app_id": app_id,
-                    "client_secret": client_secret,
-                    "user_openid": user_openid,
-                }
-
-            if status == BindStatus.EXPIRED:
-                if refresh_count >= _MAX_REFRESHES:
-                    logger.warning("[QQBot onboard] QR code expired %d times — giving up", _MAX_REFRESHES)
-                    return None
-                print(f"\n  QR code expired, refreshing... ({refresh_count + 1}/{_MAX_REFRESHES})")
-                break  # next for-loop iteration creates a new task
-
-            time.sleep(ONBOARD_POLL_INTERVAL)
-        else:
-            # deadline reached without completing
-            logger.warning("[QQBot onboard] Poll timed out after %ds", timeout_seconds)
-            return None
-
-    return None
@@ -38,7 +38,6 @@ from gateway.platforms.base import (
    BasePlatformAdapter,
    MessageEvent,
    MessageType,
-    ProcessingOutcome,
    SendResult,
    SUPPORTED_DOCUMENT_TYPES,
    safe_url_for_log,
@@ -114,11 +113,6 @@ class SlackAdapter(BasePlatformAdapter):
        # Cache for _fetch_thread_context results: cache_key → _ThreadContextCache
        self._thread_context_cache: Dict[str, _ThreadContextCache] = {}
        self._THREAD_CACHE_TTL = 60.0
-        # Track message IDs that should get reaction lifecycle (DMs / @mentions).
-        self._reacting_message_ids: set = set()
-        # Track active assistant thread status indicators so stop_typing can
-        # clear them (chat_id → thread_ts).
-        self._active_status_threads: Dict[str, str] = {}

    async def connect(self) -> bool:
        """Connect to Slack via Socket Mode."""
@@ -368,7 +362,6 @@ class SlackAdapter(BasePlatformAdapter):
        if not thread_ts:
            return  # Can only set status in a thread context

-        self._active_status_threads[chat_id] = thread_ts
        try:
            await self._get_client(chat_id).assistant_threads_setStatus(
                channel_id=chat_id,
@@ -380,22 +373,6 @@ class SlackAdapter(BasePlatformAdapter):
            # in an assistant-enabled context. Falls back to reactions.
            logger.debug("[Slack] assistant.threads.setStatus failed: %s", e)

-    async def stop_typing(self, chat_id: str) -> None:
-        """Clear the assistant thread status indicator."""
-        if not self._app:
-            return
-        thread_ts = self._active_status_threads.pop(chat_id, None)
-        if not thread_ts:
-            return
-        try:
-            await self._get_client(chat_id).assistant_threads_setStatus(
-                channel_id=chat_id,
-                thread_ts=thread_ts,
-                status="",
-            )
-        except Exception as e:
-            logger.debug("[Slack] assistant.threads.setStatus clear failed: %s", e)
-
    def _dm_top_level_threads_as_sessions(self) -> bool:
        """Whether top-level Slack DMs get per-message session threads.

@@ -607,38 +584,6 @@ class SlackAdapter(BasePlatformAdapter):
            logger.debug("[Slack] reactions.remove failed (%s): %s", emoji, e)
            return False

-    def _reactions_enabled(self) -> bool:
-        """Check if message reactions are enabled via config/env."""
-        return os.getenv("SLACK_REACTIONS", "true").lower() not in ("false", "0", "no")
-
-    async def on_processing_start(self, event: MessageEvent) -> None:
-        """Add an in-progress reaction when message processing begins."""
-        if not self._reactions_enabled():
-            return
-        ts = getattr(event, "message_id", None)
-        if not ts or ts not in self._reacting_message_ids:
-            return
-        channel_id = getattr(event.source, "chat_id", None)
-        if channel_id:
-            await self._add_reaction(channel_id, ts, "eyes")
-
-    async def on_processing_complete(self, event: MessageEvent, outcome: ProcessingOutcome) -> None:
-        """Swap the in-progress reaction for a final success/failure reaction."""
-        if not self._reactions_enabled():
-            return
-        ts = getattr(event, "message_id", None)
-        if not ts or ts not in self._reacting_message_ids:
-            return
-        self._reacting_message_ids.discard(ts)
-        channel_id = getattr(event.source, "chat_id", None)
-        if not channel_id:
-            return
-        await self._remove_reaction(channel_id, ts, "eyes")
-        if outcome == ProcessingOutcome.SUCCESS:
-            await self._add_reaction(channel_id, ts, "white_check_mark")
-        elif outcome == ProcessingOutcome.FAILURE:
-            await self._add_reaction(channel_id, ts, "x")
-
    # ----- User identity resolution -----

    async def _resolve_user_name(self, user_id: str, chat_id: str = "") -> str:
@@ -1268,12 +1213,17 @@ class SlackAdapter(BasePlatformAdapter):
        # Only react when bot is directly addressed (DM or @mention).
        # In listen-all channels (require_mention=false), reacting to every
        # casual message would be noisy.
-        _should_react = (is_dm or is_mentioned) and self._reactions_enabled()
+        _should_react = is_dm or is_mentioned
+
        if _should_react:
-            self._reacting_message_ids.add(ts)
+            await self._add_reaction(channel_id, ts, "eyes")

        await self.handle_message(msg_event)

+        if _should_react:
+            await self._remove_reaction(channel_id, ts, "eyes")
+            await self._add_reaction(channel_id, ts, "white_check_mark")
+
    # ----- Approval button support (Block Kit) -----

    async def send_exec_approval(
@@ -1650,9 +1600,11 @@ class SlackAdapter(BasePlatformAdapter):

    async def _download_slack_file(self, url: str, ext: str, audio: bool = False, team_id: str = "") -> str:
        """Download a Slack file using the bot token for auth, with retry."""
+        import asyncio
        import httpx

        bot_token = self._team_clients[team_id].token if team_id and team_id in self._team_clients else self.config.token
+        last_exc = None

        async with httpx.AsyncClient(timeout=30.0, follow_redirects=True) as client:
            for attempt in range(3):
@@ -1682,6 +1634,7 @@ class SlackAdapter(BasePlatformAdapter):
                        from gateway.platforms.base import cache_image_from_bytes
                        return cache_image_from_bytes(response.content, ext)
                except (httpx.TimeoutException, httpx.HTTPStatusError) as exc:
+                    last_exc = exc
                    if isinstance(exc, httpx.HTTPStatusError) and exc.response.status_code < 429:
                        raise
                    if attempt < 2:
@@ -1690,13 +1643,15 @@ class SlackAdapter(BasePlatformAdapter):
                        await asyncio.sleep(1.5 * (attempt + 1))
                        continue
                    raise
-        raise AssertionError("unreachable: retry loop exhausted")
+        raise last_exc

    async def _download_slack_file_bytes(self, url: str, team_id: str = "") -> bytes:
        """Download a Slack file and return raw bytes, with retry."""
+        import asyncio
        import httpx

        bot_token = self._team_clients[team_id].token if team_id and team_id in self._team_clients else self.config.token
+        last_exc = None

        async with httpx.AsyncClient(timeout=30.0, follow_redirects=True) as client:
            for attempt in range(3):
@@ -1708,6 +1663,7 @@ class SlackAdapter(BasePlatformAdapter):
                    response.raise_for_status()
                    return response.content
                except (httpx.TimeoutException, httpx.HTTPStatusError) as exc:
+                    last_exc = exc
                    if isinstance(exc, httpx.HTTPStatusError) and exc.response.status_code < 429:
                        raise
                    if attempt < 2:
@@ -1716,7 +1672,7 @@ class SlackAdapter(BasePlatformAdapter):
                        await asyncio.sleep(1.5 * (attempt + 1))
                        continue
                    raise
-        raise AssertionError("unreachable: retry loop exhausted")
+        raise last_exc

    # ── Channel mention gating ─────────────────────────────────────────────

@@ -25,10 +25,7 @@ import hmac
 import logging
 import os
 import urllib.parse
-from typing import Any, Dict, Optional, TYPE_CHECKING
-
-if TYPE_CHECKING:
-    import aiohttp
+from typing import Any, Dict, Optional

 from gateway.config import Platform, PlatformConfig
 from gateway.platforms.base import (
@@ -71,10 +71,8 @@ from gateway.platforms.base import (
    SendResult,
    cache_image_from_bytes,
    cache_audio_from_bytes,
-    cache_video_from_bytes,
    cache_document_from_bytes,
    resolve_proxy_url,
-    SUPPORTED_VIDEO_TYPES,
    SUPPORTED_DOCUMENT_TYPES,
    utf16_len,
    _prefix_within_utf16_limit,
@@ -496,13 +494,6 @@ class TelegramAdapter(BasePlatformAdapter):
                    "[%s] DM topic '%s' already exists in chat %s (will be mapped from incoming messages)",
                    self.name, name, chat_id,
                )
-            elif "not a forum" in error_text or "forums_disabled" in error_text:
-                logger.warning(
-                    "[%s] Cannot create DM topic '%s' in chat %s: Topics mode is not enabled. "
-                    "The user must open the DM with this bot in Telegram, tap the bot name "
-                    "at the top, and enable 'Topics' in chat settings before topics can be created.",
-                    self.name, name, chat_id,
-                )
            else:
                logger.warning(
                    "[%s] Failed to create DM topic '%s' in chat %s: %s",
@@ -794,28 +785,8 @@ class TelegramAdapter(BasePlatformAdapter):
                # Telegram pushes updates to our HTTP endpoint.  This
                # enables cloud platforms (Fly.io, Railway) to auto-wake
                # suspended machines on inbound HTTP traffic.
-                #
-                # SECURITY: TELEGRAM_WEBHOOK_SECRET is REQUIRED. Without it,
-                # python-telegram-bot passes secret_token=None and the
-                # webhook endpoint accepts any HTTP POST — attackers can
-                # inject forged updates as if from Telegram. Refuse to
-                # start rather than silently run in fail-open mode.
-                # See GHSA-3vpc-7q5r-276h.
                webhook_port = int(os.getenv("TELEGRAM_WEBHOOK_PORT", "8443"))
-                webhook_secret = os.getenv("TELEGRAM_WEBHOOK_SECRET", "").strip()
-                if not webhook_secret:
-                    raise RuntimeError(
-                        "TELEGRAM_WEBHOOK_SECRET is required when "
-                        "TELEGRAM_WEBHOOK_URL is set. Without it, the "
-                        "webhook endpoint accepts forged updates from "
-                        "anyone who can reach it — see "
-                        "https://github.com/NousResearch/hermes-agent/"
-                        "security/advisories/GHSA-3vpc-7q5r-276h.\n\n"
-                        "Generate a secret and set it in your .env:\n"
-                        "  export TELEGRAM_WEBHOOK_SECRET=\"$(openssl rand -hex 32)\"\n\n"
-                        "Then register it with Telegram when setting the "
-                        "webhook via setWebhook's secret_token parameter."
-                    )
+                webhook_secret = os.getenv("TELEGRAM_WEBHOOK_SECRET", "").strip() or None
                from urllib.parse import urlparse
                webhook_path = urlparse(webhook_url).path or "/telegram"

@@ -1733,6 +1704,7 @@ class TelegramAdapter(BasePlatformAdapter):
            return SendResult(success=False, error="Not connected")
        
        try:
+            import os
            if not os.path.exists(audio_path):
                return SendResult(success=False, error=self._missing_media_path_error("Audio", audio_path))
            
@@ -1781,6 +1753,7 @@ class TelegramAdapter(BasePlatformAdapter):
            return SendResult(success=False, error="Not connected")

        try:
+            import os
            if not os.path.exists(image_path):
                return SendResult(success=False, error=self._missing_media_path_error("Image", image_path))

@@ -2093,7 +2066,7 @@ class TelegramAdapter(BasePlatformAdapter):
            url = m.group(2).replace('\\', '\\\\').replace(')', '\\)')
            return _ph(f'[{display}]({url})')

-        text = re.sub(r'\[([^\]]+)\]\(([^()]*(?:\([^()]*\)[^()]*)*)\)', _convert_link, text)
+        text = re.sub(r'\[([^\]]+)\]\(([^)]+)\)', _convert_link, text)

        # 4) Convert markdown headers (## Title) → bold *Title*
        def _convert_header(m):
@@ -2353,16 +2326,10 @@ class TelegramAdapter(BasePlatformAdapter):
        DMs remain unrestricted. Group/supergroup messages are accepted when:
        - the chat is explicitly allowlisted in ``free_response_chats``
        - ``require_mention`` is disabled
+        - the message is a command
        - the message replies to the bot
        - the bot is @mentioned
        - the text/caption matches a configured regex wake-word pattern
-
-        When ``require_mention`` is enabled, slash commands are not given
-        special treatment — they must pass the same mention/reply checks
-        as any other group message.  Users can still trigger commands via
-        the Telegram bot menu (``/command@botname``) or by explicitly
-        mentioning the bot (``@botname /command``), both of which are
-        recognised as mentions by :meth:`_message_mentions_bot`.
        """
        if not self._is_group_chat(message):
            return True
@@ -2377,6 +2344,8 @@ class TelegramAdapter(BasePlatformAdapter):
            return True
        if not self._telegram_require_mention():
            return True
+        if is_command:
+            return True
        if self._is_reply_to_bot(message):
            return True
        if self._message_mentions_bot(message):
@@ -2659,23 +2628,6 @@ class TelegramAdapter(BasePlatformAdapter):
            except Exception as e:
                logger.warning("[Telegram] Failed to cache audio: %s", e, exc_info=True)

-        elif msg.video:
-            try:
-                file_obj = await msg.video.get_file()
-                video_bytes = await file_obj.download_as_bytearray()
-                ext = ".mp4"
-                if getattr(file_obj, "file_path", None):
-                    for candidate in SUPPORTED_VIDEO_TYPES:
-                        if file_obj.file_path.lower().endswith(candidate):
-                            ext = candidate
-                            break
-                cached_path = cache_video_from_bytes(bytes(video_bytes), ext=ext)
-                event.media_urls = [cached_path]
-                event.media_types = [SUPPORTED_VIDEO_TYPES.get(ext, "video/mp4")]
-                logger.info("[Telegram] Cached user video at %s", cached_path)
-            except Exception as e:
-                logger.warning("[Telegram] Failed to cache video: %s", e, exc_info=True)
-
        # Download document files to cache for agent processing
        elif msg.document:
            doc = msg.document
@@ -2692,21 +2644,6 @@ class TelegramAdapter(BasePlatformAdapter):
                    mime_to_ext = {v: k for k, v in SUPPORTED_DOCUMENT_TYPES.items()}
                    ext = mime_to_ext.get(doc.mime_type, "")

-                if not ext and doc.mime_type:
-                    video_mime_to_ext = {v: k for k, v in SUPPORTED_VIDEO_TYPES.items()}
-                    ext = video_mime_to_ext.get(doc.mime_type, "")
-
-                if ext in SUPPORTED_VIDEO_TYPES:
-                    file_obj = await doc.get_file()
-                    video_bytes = await file_obj.download_as_bytearray()
-                    cached_path = cache_video_from_bytes(bytes(video_bytes), ext=ext)
-                    event.media_urls = [cached_path]
-                    event.media_types = [SUPPORTED_VIDEO_TYPES[ext]]
-                    event.message_type = MessageType.VIDEO
-                    logger.info("[Telegram] Cached user video document at %s", cached_path)
-                    await self.handle_message(event)
-                    return
-
                # Check if supported
                if ext not in SUPPORTED_DOCUMENT_TYPES:
                    supported_list = ", ".join(sorted(SUPPORTED_DOCUMENT_TYPES.keys()))
@@ -2820,8 +2757,6 @@ class TelegramAdapter(BasePlatformAdapter):
        )

        sticker = msg.sticker
-        if sticker is None:
-            return
        emoji = sticker.emoji or ""
        set_name = sticker.set_name or ""

@@ -2847,11 +2782,13 @@ class TelegramAdapter(BasePlatformAdapter):
            logger.info("[Telegram] Analyzing sticker at %s", cached_path)

            from tools.vision_tools import vision_analyze_tool
+            import json as _json
+
            result_json = await vision_analyze_tool(
                image_url=cached_path,
                user_prompt=STICKER_VISION_PROMPT,
            )
-            result = json.loads(result_json)
+            result = _json.loads(result_json)

            if result.get("success"):
                description = result.get("analysis", "a sticker")
@@ -151,7 +151,7 @@ def _resolve_system_dns() -> set[str]:
    """Return the IPv4 addresses that the OS resolver gives for api.telegram.org."""
    try:
        results = socket.getaddrinfo(_TELEGRAM_API_HOST, 443, socket.AF_INET)
-        return {str(addr[4][0]) for addr in results}
+        return {addr[4][0] for addr in results}
    except Exception:
        return set()

@@ -508,11 +508,6 @@ class WeComAdapter(BasePlatformAdapter):
        self._remember_chat_req_id(chat_id, self._payload_req_id(payload))

        text, reply_text = self._extract_text(body)
-        # Strip leading @mention in group chats so slash commands like
-        # "@BotName /approve" are correctly recognized as "/approve".
-        # Mirrors what the Telegram adapter does (re.sub @botname).
-        if is_group and text:
-            text = re.sub(r"^@\S+\s*", "", text).strip()
        media_urls, media_types = await self._extract_media(body)
        message_type = self._derive_message_type(body, text, media_types)
        has_reply_context = bool(reply_text and (text or media_urls))
@@ -629,16 +624,13 @@ class WeComAdapter(BasePlatformAdapter):
        msgtype = str(body.get("msgtype") or "").lower()

        if msgtype == "mixed":
-            _raw_mixed = body.get("mixed")
-            mixed = _raw_mixed if isinstance(_raw_mixed, dict) else {}
-            _raw_items = mixed.get("msg_item")
-            items = _raw_items if isinstance(_raw_items, list) else []
+            mixed = body.get("mixed") if isinstance(body.get("mixed"), dict) else {}
+            items = mixed.get("msg_item") if isinstance(mixed.get("msg_item"), list) else []
            for item in items:
                if not isinstance(item, dict):
                    continue
                if str(item.get("msgtype") or "").lower() == "text":
-                    _raw_text = item.get("text")
-                    text_block = _raw_text if isinstance(_raw_text, dict) else {}
+                    text_block = item.get("text") if isinstance(item.get("text"), dict) else {}
                    content = str(text_block.get("content") or "").strip()
                    if content:
                        text_parts.append(content)
@@ -680,10 +672,8 @@ class WeComAdapter(BasePlatformAdapter):
        msgtype = str(body.get("msgtype") or "").lower()

        if msgtype == "mixed":
-            _raw_mixed = body.get("mixed")
-            mixed = _raw_mixed if isinstance(_raw_mixed, dict) else {}
-            _raw_items = mixed.get("msg_item")
-            items = _raw_items if isinstance(_raw_items, list) else []
+            mixed = body.get("mixed") if isinstance(body.get("mixed"), dict) else {}
+            items = mixed.get("msg_item") if isinstance(mixed.get("msg_item"), list) else []
            for item in items:
                if not isinstance(item, dict):
                    continue
@@ -703,8 +693,7 @@ class WeComAdapter(BasePlatformAdapter):
                elif isinstance(appmsg.get("image"), dict):
                    refs.append(("image", appmsg["image"]))

-        raw_quote = body.get("quote")
-        quote = raw_quote if isinstance(raw_quote, dict) else {}
+        quote = body.get("quote") if isinstance(body.get("quote"), dict) else {}
        quote_type = str(quote.get("msgtype") or "").lower()
        if quote_type == "image" and isinstance(quote.get("image"), dict):
            refs.append(("image", quote["image"]))
@@ -1470,134 +1459,3 @@ class WeComAdapter(BasePlatformAdapter):
            "name": chat_id,
            "type": "group" if chat_id and chat_id.lower().startswith("group") else "dm",
        }
-
-
-# ------------------------------------------------------------------
-# QR code scan flow for obtaining bot credentials
-# ------------------------------------------------------------------
-
-_QR_GENERATE_URL = "https://work.weixin.qq.com/ai/qc/generate"
-_QR_QUERY_URL = "https://work.weixin.qq.com/ai/qc/query_result"
-_QR_CODE_PAGE = "https://work.weixin.qq.com/ai/qc/gen?source=hermes&scode="
-_QR_POLL_INTERVAL = 3  # seconds
-_QR_POLL_TIMEOUT = 300  # 5 minutes
-
-
-def qr_scan_for_bot_info(
-    *,
-    timeout_seconds: int = _QR_POLL_TIMEOUT,
-) -> Optional[Dict[str, str]]:
-    """Run the WeCom QR scan flow to obtain bot_id and secret.
-
-    Fetches a QR code from WeCom, renders it in the terminal, and polls
-    until the user scans it or the timeout expires.
-
-    Returns ``{"bot_id": ..., "secret": ...}`` on success, ``None`` on
-    failure or timeout.
-
-    Note: the ``work.weixin.qq.com/ai/qc/{generate,query_result}`` endpoints
-    used here are not part of WeCom's public developer API — they back the
-    admin-console web UI's bot-creation flow and may change without notice.
-    The same pattern is used by the feishu/dingtalk QR setup wizards.
-    """
-    try:
-        import urllib.request
-        import urllib.parse
-    except ImportError:  # pragma: no cover
-        logger.error("urllib is required for WeCom QR scan")
-        return None
-
-    generate_url = f"{_QR_GENERATE_URL}?source=hermes"
-
-    # ── Step 1: Fetch QR code ──
-    print("  Connecting to WeCom...", end="", flush=True)
-    try:
-        req = urllib.request.Request(generate_url, headers={"User-Agent": "HermesAgent/1.0"})
-        with urllib.request.urlopen(req, timeout=15) as resp:
-            raw = json.loads(resp.read().decode("utf-8"))
-    except Exception as exc:
-        logger.error("WeCom QR: failed to fetch QR code: %s", exc)
-        print(f" failed: {exc}")
-        return None
-
-    data = raw.get("data") or {}
-    scode = str(data.get("scode") or "").strip()
-    auth_url = str(data.get("auth_url") or "").strip()
-
-    if not scode or not auth_url:
-        logger.error("WeCom QR: unexpected response format: %s", raw)
-        print(" failed: unexpected response format")
-        return None
-
-    print(" done.")
-
-    # ── Step 2: Render QR code in terminal ──
-    print()
-    qr_rendered = False
-    try:
-        import qrcode as _qrcode
-        qr = _qrcode.QRCode()
-        qr.add_data(auth_url)
-        qr.make(fit=True)
-        qr.print_ascii(invert=True)
-        qr_rendered = True
-    except ImportError:
-        pass
-    except Exception:
-        pass
-
-    page_url = f"{_QR_CODE_PAGE}{urllib.parse.quote(scode)}"
-    if qr_rendered:
-        print(f"\n  Scan the QR code above, or open this URL directly:\n  {page_url}")
-    else:
-        print(f"  Open this URL in WeCom on your phone:\n\n  {page_url}\n")
-        print("  Tip: pip install qrcode  to display a scannable QR code here next time")
-    print()
-    print("  Fetching configuration results...", end="", flush=True)
-
-    # ── Step 3: Poll for result ──
-    import time
-    deadline = time.time() + timeout_seconds
-    query_url = f"{_QR_QUERY_URL}?scode={urllib.parse.quote(scode)}"
-    poll_count = 0
-
-    while time.time() < deadline:
-        try:
-            req = urllib.request.Request(query_url, headers={"User-Agent": "HermesAgent/1.0"})
-            with urllib.request.urlopen(req, timeout=10) as resp:
-                result = json.loads(resp.read().decode("utf-8"))
-        except Exception as exc:
-            logger.debug("WeCom QR poll error: %s", exc)
-            time.sleep(_QR_POLL_INTERVAL)
-            continue
-
-        poll_count += 1
-        # Print a dot on every poll so progress is visible within 3s.
-        print(".", end="", flush=True)
-
-        result_data = result.get("data") or {}
-        status = str(result_data.get("status") or "").lower()
-
-        if status == "success":
-            print()  # newline after "Fetching configuration results..." dots
-            bot_info = result_data.get("bot_info") or {}
-            bot_id = str(bot_info.get("botid") or bot_info.get("bot_id") or "").strip()
-            secret = str(bot_info.get("secret") or "").strip()
-            if bot_id and secret:
-                return {"bot_id": bot_id, "secret": secret}
-            logger.warning(
-                "WeCom QR: scan reported success but bot_info missing or incomplete: %s",
-                result_data,
-            )
-            print(
-                "  QR scan reported success but no bot credentials were returned.\n"
-                "  This usually means the bot was not actually created on the WeCom side.\n"
-                "  Falling back to manual credential entry."
-            )
-            return None
-
-        time.sleep(_QR_POLL_INTERVAL)
-
-    print()  # newline after dots
-    print(f"  QR scan timed out ({timeout_seconds // 60} minutes). Please try again.")
-    return None
@@ -25,10 +25,7 @@ import subprocess

 _IS_WINDOWS = platform.system() == "Windows"
 from pathlib import Path
-from typing import Dict, Optional, Any, TYPE_CHECKING
-
-if TYPE_CHECKING:
-    import aiohttp
+from typing import Dict, Optional, Any

 from hermes_constants import get_hermes_dir

@@ -69,37 +66,6 @@ def _kill_port_process(port: int) -> None:
    except Exception:
        pass

-
-def _terminate_bridge_process(proc, *, force: bool = False) -> None:
-    """Terminate the bridge process using process-tree semantics where possible."""
-    if _IS_WINDOWS:
-        cmd = ["taskkill", "/PID", str(proc.pid), "/T"]
-        if force:
-            cmd.append("/F")
-        try:
-            result = subprocess.run(
-                cmd,
-                capture_output=True,
-                text=True,
-                timeout=10,
-            )
-        except FileNotFoundError:
-            if force:
-                proc.kill()
-            else:
-                proc.terminate()
-            return
-
-        if result.returncode != 0:
-            details = (result.stderr or result.stdout or "").strip()
-            raise OSError(details or f"taskkill failed for PID {proc.pid}")
-        return
-
-    import signal
-
-    sig = signal.SIGTERM if not force else signal.SIGKILL
-    os.killpg(os.getpgid(proc.pid), sig)
-
 import sys
 sys.path.insert(0, str(Path(__file__).resolve().parents[2]))

@@ -152,10 +118,6 @@ class WhatsAppAdapter(BasePlatformAdapter):
    - bridge_script: Path to the Node.js bridge script
    - bridge_port: Port for HTTP communication (default: 3000)
    - session_path: Path to store WhatsApp session data
-    - dm_policy: "open" | "allowlist" | "disabled" — how DMs are handled (default: "open")
-    - allow_from: List of sender IDs allowed in DMs (when dm_policy="allowlist")
-    - group_policy: "open" | "allowlist" | "disabled" — which groups are processed (default: "open")
-    - group_allow_from: List of group JIDs allowed (when group_policy="allowlist")
    """
    
    # WhatsApp message limits — practical UX limit, not protocol max.
@@ -178,10 +140,6 @@ class WhatsAppAdapter(BasePlatformAdapter):
            get_hermes_dir("platforms/whatsapp/session", "whatsapp/session")
        ))
        self._reply_prefix: Optional[str] = config.extra.get("reply_prefix")
-        self._dm_policy = str(config.extra.get("dm_policy") or os.getenv("WHATSAPP_DM_POLICY", "open")).strip().lower()
-        self._allow_from = self._coerce_allow_list(config.extra.get("allow_from") or config.extra.get("allowFrom"))
-        self._group_policy = str(config.extra.get("group_policy") or os.getenv("WHATSAPP_GROUP_POLICY", "open")).strip().lower()
-        self._group_allow_from = self._coerce_allow_list(config.extra.get("group_allow_from") or config.extra.get("groupAllowFrom"))
        self._mention_patterns = self._compile_mention_patterns()
        self._message_queue: asyncio.Queue = asyncio.Queue()
        self._bridge_log_fh = None
@@ -205,33 +163,6 @@ class WhatsAppAdapter(BasePlatformAdapter):
            return {str(part).strip() for part in raw if str(part).strip()}
        return {part.strip() for part in str(raw).split(",") if part.strip()}

-    @staticmethod
-    def _coerce_allow_list(raw) -> set[str]:
-        """Parse allow_from / group_allow_from from config or env var."""
-        if raw is None:
-            return set()
-        if isinstance(raw, list):
-            return {str(part).strip() for part in raw if str(part).strip()}
-        return {part.strip() for part in str(raw).split(",") if part.strip()}
-
-    def _is_dm_allowed(self, sender_id: str) -> bool:
-        """Check whether a DM from the given sender should be processed."""
-        if self._dm_policy == "disabled":
-            return False
-        if self._dm_policy == "allowlist":
-            return sender_id in self._allow_from
-        # "open" — all DMs allowed
-        return True
-
-    def _is_group_allowed(self, chat_id: str) -> bool:
-        """Check whether a group chat should be processed."""
-        if self._group_policy == "disabled":
-            return False
-        if self._group_policy == "allowlist":
-            return chat_id in self._group_allow_from
-        # "open" — all groups allowed
-        return True
-
    def _compile_mention_patterns(self):
        patterns = self.config.extra.get("mention_patterns")
        if patterns is None:
@@ -324,18 +255,8 @@ class WhatsAppAdapter(BasePlatformAdapter):
        return cleaned.strip() or text

    def _should_process_message(self, data: Dict[str, Any]) -> bool:
-        is_group = data.get("isGroup", False)
-        if is_group:
-            chat_id = str(data.get("chatId") or "")
-            if not self._is_group_allowed(chat_id):
-                return False
-        else:
-            sender_id = str(data.get("senderId") or data.get("from") or "")
-            if not self._is_dm_allowed(sender_id):
-                return False
-            # DMs that pass the policy gate are always processed
+        if not data.get("isGroup"):
            return True
-        # Group messages: check mention / free-response settings
        chat_id = str(data.get("chatId") or "")
        if chat_id in self._whatsapp_free_response_chats():
            return True
@@ -402,6 +323,7 @@ class WhatsAppAdapter(BasePlatformAdapter):
            
            # Check if bridge is already running and connected
            import aiohttp
+            import asyncio
            try:
                async with aiohttp.ClientSession() as session:
                    async with session.get(
@@ -570,14 +492,22 @@ class WhatsAppAdapter(BasePlatformAdapter):
        """Stop the WhatsApp bridge and clean up any orphaned processes."""
        if self._bridge_process:
            try:
+                # Kill the entire process group so child node processes die too
+                import signal
                try:
-                    _terminate_bridge_process(self._bridge_process, force=False)
+                    if _IS_WINDOWS:
+                        self._bridge_process.terminate()
+                    else:
+                        os.killpg(os.getpgid(self._bridge_process.pid), signal.SIGTERM)
                except (ProcessLookupError, PermissionError):
                    self._bridge_process.terminate()
                await asyncio.sleep(1)
                if self._bridge_process.poll() is None:
                    try:
-                        _terminate_bridge_process(self._bridge_process, force=True)
+                        if _IS_WINDOWS:
+                            self._bridge_process.kill()
+                        else:
+                            os.killpg(os.getpgid(self._bridge_process.pid), signal.SIGKILL)
                    except (ProcessLookupError, PermissionError):
                        self._bridge_process.kill()
            except Exception as e:
@@ -843,17 +773,6 @@ class WhatsAppAdapter(BasePlatformAdapter):
        """Send a video natively via bridge — plays inline in WhatsApp."""
        return await self._send_media_to_bridge(chat_id, video_path, "video", caption)

-    async def send_voice(
-        self,
-        chat_id: str,
-        audio_path: str,
-        caption: Optional[str] = None,
-        reply_to: Optional[str] = None,
-        **kwargs,
-    ) -> SendResult:
-        """Send an audio file as a WhatsApp voice message via bridge."""
-        return await self._send_media_to_bridge(chat_id, audio_path, "audio", caption)
-
    async def send_document(
        self,
        chat_id: str,
@@ -30,8 +30,6 @@ from pathlib import Path
 from datetime import datetime
 from typing import Dict, Optional, Any, List

-from agent.account_usage import fetch_account_usage, render_account_usage_lines
-
 # --- Agent cache tuning ---------------------------------------------------
 # Bounds the per-session AIAgent cache to prevent unbounded growth in
 # long-lived gateways (each AIAgent holds LLM clients, tool schemas,
@@ -88,7 +86,7 @@ sys.path.insert(0, str(Path(__file__).parent.parent))

 # Resolve Hermes home directory (respects HERMES_HOME override)
 from hermes_constants import get_hermes_home
-from utils import atomic_yaml_write, base_url_host_matches, is_truthy_value
+from utils import atomic_yaml_write, is_truthy_value
 _hermes_home = get_hermes_home()

 # Load environment variables from ~/.hermes/.env first.
@@ -281,7 +279,6 @@ from gateway.session import (
    build_session_context,
    build_session_context_prompt,
    build_session_key,
-    is_shared_multi_user_session,
 )
 from gateway.delivery import DeliveryRouter
 from gateway.platforms.base import (
@@ -710,26 +707,7 @@ class GatewayRunner:
            self._session_db = SessionDB()
        except Exception as e:
            logger.debug("SQLite session store not available: %s", e)
-
-        # Opportunistic state.db maintenance: prune ended sessions older
-        # than sessions.retention_days + optional VACUUM. Tracks last-run
-        # in state_meta so it only actually executes once per
-        # sessions.min_interval_hours.  Gateway is long-lived so blocking
-        # a few seconds once per day is acceptable; failures are logged
-        # but never raised.
-        if self._session_db is not None:
-            try:
-                from hermes_cli.config import load_config as _load_full_config
-                _sess_cfg = (_load_full_config().get("sessions") or {})
-                if _sess_cfg.get("auto_prune", False):
-                    self._session_db.maybe_auto_prune_and_vacuum(
-                        retention_days=int(_sess_cfg.get("retention_days", 90)),
-                        min_interval_hours=int(_sess_cfg.get("min_interval_hours", 24)),
-                        vacuum=bool(_sess_cfg.get("vacuum_after_prune", True)),
-                    )
-            except Exception as exc:
-                logger.debug("state.db auto-maintenance skipped: %s", exc)
-
+        
        # DM pairing store for code-based user authorization
        from gateway.pairing import PairingStore
        self.pairing_store = PairingStore()
@@ -1288,6 +1266,7 @@ class GatewayRunner:
        the prefill_messages_file key in ~/.hermes/config.yaml.
        Relative paths are resolved from ~/.hermes/.
        """
+        import json as _json
        file_path = os.getenv("HERMES_PREFILL_MESSAGES_FILE", "")
        if not file_path:
            try:
@@ -1309,7 +1288,7 @@ class GatewayRunner:
            return []
        try:
            with open(path, "r", encoding="utf-8") as f:
-                data = json.load(f)
+                data = _json.load(f)
            if not isinstance(data, list):
                logger.warning("Prefill messages file must contain a JSON array: %s", path)
                return []
@@ -1688,32 +1667,12 @@ class GatewayRunner:

        notified: set = set()
        for session_key in active:
-            source = None
-            try:
-                if getattr(self, "session_store", None) is not None:
-                    self.session_store._ensure_loaded()
-                    entry = self.session_store._entries.get(session_key)
-                    source = getattr(entry, "origin", None) if entry else None
-            except Exception as e:
-                logger.debug(
-                    "Failed to load session origin for shutdown notification %s: %s",
-                    session_key,
-                    e,
-                )
-
-            if source is not None:
-                platform_str = source.platform.value
-                chat_id = source.chat_id
-                thread_id = source.thread_id
-            else:
-                # Fall back to parsing the session key when no persisted
-                # origin is available (legacy sessions/tests).
-                _parsed = _parse_session_key(session_key)
-                if not _parsed:
-                    continue
-                platform_str = _parsed["platform"]
-                chat_id = _parsed["chat_id"]
-                thread_id = _parsed.get("thread_id")
+            # Parse platform + chat_id from the session key.
+            _parsed = _parse_session_key(session_key)
+            if not _parsed:
+                continue
+            platform_str = _parsed["platform"]
+            chat_id = _parsed["chat_id"]

            # Deduplicate: one notification per chat, even if multiple
            # sessions (different users/threads) share the same chat.
@@ -1729,6 +1688,7 @@ class GatewayRunner:

                # Include thread_id if present so the message lands in the
                # correct forum topic / thread.
+                thread_id = _parsed.get("thread_id")
                metadata = {"thread_id": thread_id} if thread_id else None

                await adapter.send(chat_id, msg, metadata=metadata)
@@ -1981,39 +1941,6 @@ class GatewayRunner:
                "or configure platform allowlists (e.g., TELEGRAM_ALLOWED_USERS=your_id)."
            )
        
-        # Discover Python plugins before shell hooks so plugin block
-        # decisions take precedence in tie cases.  The CLI startup path
-        # does this via an explicit call in hermes_cli/main.py; the
-        # gateway lazily imports run_agent inside per-request handlers,
-        # so the discover_plugins() side-effect in model_tools.py is NOT
-        # guaranteed to have run by the time we reach this point.
-        try:
-            from hermes_cli.plugins import discover_plugins
-            discover_plugins()
-        except Exception:
-            logger.debug(
-                "plugin discovery failed at gateway startup", exc_info=True,
-            )
-
-        # Register declarative shell hooks from cli-config.yaml.  Gateway
-        # has no TTY, so consent has to come from one of the three opt-in
-        # channels (--accept-hooks on launch, HERMES_ACCEPT_HOOKS env var,
-        # or hooks_auto_accept: true in config.yaml).  We pass
-        # accept_hooks=False here and let register_from_config resolve
-        # the effective value from env + config itself — the CLI-side
-        # registration already honored --accept-hooks, and re-reading
-        # hooks_auto_accept here would just duplicate that lookup.
-        # Failures are logged but must never block gateway startup.
-        try:
-            from hermes_cli.config import load_config
-            from agent.shell_hooks import register_from_config
-            register_from_config(load_config(), accept_hooks=False)
-        except Exception:
-            logger.debug(
-                "shell-hook registration failed at gateway startup",
-                exc_info=True,
-            )
-
        # Discover and load event hooks
        self.hooks.discover_and_load()
        
@@ -2687,9 +2614,8 @@ class GatewayRunner:
                except Exception as _e:
                    logger.debug("SessionDB close error: %s", _e)

-            from gateway.status import remove_pid_file, release_gateway_runtime_lock
+            from gateway.status import remove_pid_file
            remove_pid_file()
-            release_gateway_runtime_lock()

            # Write a clean-shutdown marker so the next startup knows this
            # wasn't a crash.  suspend_recently_active() only needs to run
@@ -2859,12 +2785,10 @@ class GatewayRunner:
            return MatrixAdapter(config)

        elif platform == Platform.API_SERVER:
-            try:
-                import aiohttp  # noqa: F401
-            except ImportError:
+            from gateway.platforms.api_server import APIServerAdapter, check_api_server_requirements
+            if not check_api_server_requirements():
                logger.warning("API Server: aiohttp not installed")
                return None
-            from gateway.platforms.api_server import APIServerAdapter
            return APIServerAdapter(config)

        elif platform == Platform.WEBHOOK:
@@ -3299,9 +3223,10 @@ class GatewayRunner:
                    return "Usage: /queue <prompt>"
                adapter = self.adapters.get(source.platform)
                if adapter:
-                    queued_event = MessageEvent(
+                    from gateway.platforms.base import MessageEvent as _ME, MessageType as _MT
+                    queued_event = _ME(
                        text=queued_text,
-                        message_type=MessageType.TEXT,
+                        message_type=_MT.TEXT,
                        source=event.source,
                        message_id=event.message_id,
                        channel_prompt=event.channel_prompt,
@@ -3323,9 +3248,10 @@ class GatewayRunner:
                    # Agent hasn't started yet — queue as turn-boundary fallback.
                    adapter = self.adapters.get(source.platform)
                    if adapter:
-                        queued_event = MessageEvent(
+                        from gateway.platforms.base import MessageEvent as _ME, MessageType as _MT
+                        queued_event = _ME(
                            text=steer_text,
-                            message_type=MessageType.TEXT,
+                            message_type=_MT.TEXT,
                            source=event.source,
                            message_id=event.message_id,
                            channel_prompt=event.channel_prompt,
@@ -3345,9 +3271,10 @@ class GatewayRunner:
                # Running agent is missing or lacks steer() — fall back to queue.
                adapter = self.adapters.get(source.platform)
                if adapter:
-                    queued_event = MessageEvent(
+                    from gateway.platforms.base import MessageEvent as _ME, MessageType as _MT
+                    queued_event = _ME(
                        text=steer_text,
-                        message_type=MessageType.TEXT,
+                        message_type=_MT.TEXT,
                        source=event.source,
                        message_id=event.message_id,
                        channel_prompt=event.channel_prompt,
@@ -3488,72 +3415,22 @@ class GatewayRunner:

        # Check for commands
        command = event.get_command()
-
-        from hermes_cli.commands import (
-            GATEWAY_KNOWN_COMMANDS,
-            is_gateway_known_command,
-            resolve_command as _resolve_cmd,
-        )
-
-        # Resolve aliases to canonical name so dispatch and hook names
-        # don't depend on the exact alias the user typed.
-        _cmd_def = _resolve_cmd(command) if command else None
-        canonical = _cmd_def.name if _cmd_def else command
-
-        # Fire the ``command:<canonical>`` hook for any recognized slash
-        # command — built-in OR plugin-registered. Handlers can return a
-        # dict with ``{"decision": "deny" | "handled" | "rewrite", ...}``
-        # to intercept dispatch before core handling runs. This replaces
-        # the previous fire-and-forget emit(): return values are now
-        # honored, but handlers that return nothing behave exactly as
-        # before (telemetry-style hooks keep working).
-        if command and is_gateway_known_command(canonical):
-            raw_args = event.get_command_args().strip()
-            hook_ctx = {
+        
+        # Emit command:* hook for any recognized slash command.
+        # GATEWAY_KNOWN_COMMANDS is derived from the central COMMAND_REGISTRY
+        # in hermes_cli/commands.py — no hardcoded set to maintain here.
+        from hermes_cli.commands import GATEWAY_KNOWN_COMMANDS, resolve_command as _resolve_cmd
+        if command and command in GATEWAY_KNOWN_COMMANDS:
+            await self.hooks.emit(f"command:{command}", {
                "platform": source.platform.value if source.platform else "",
                "user_id": source.user_id,
-                "command": canonical,
-                "raw_command": command,
-                "args": raw_args,
-                "raw_args": raw_args,
-            }
-            try:
-                hook_results = await self.hooks.emit_collect(
-                    f"command:{canonical}", hook_ctx
-                )
-            except Exception as _hook_err:
-                logger.debug(
-                    "command:%s hook dispatch failed (non-fatal): %s",
-                    canonical, _hook_err,
-                )
-                hook_results = []
+                "command": command,
+                "args": event.get_command_args().strip(),
+            })

-            for hook_result in hook_results:
-                if not isinstance(hook_result, dict):
-                    continue
-                decision = str(hook_result.get("decision", "")).strip().lower()
-                if not decision or decision == "allow":
-                    continue
-                if decision == "deny":
-                    message = hook_result.get("message")
-                    if isinstance(message, str) and message:
-                        return message
-                    return f"Command `/{command}` was blocked by a hook."
-                if decision == "handled":
-                    message = hook_result.get("message")
-                    return message if isinstance(message, str) and message else None
-                if decision == "rewrite":
-                    new_command = str(
-                        hook_result.get("command_name", "")
-                    ).strip().lstrip("/")
-                    if not new_command:
-                        continue
-                    new_args = str(hook_result.get("raw_args", "")).strip()
-                    event.text = f"/{new_command} {new_args}".strip()
-                    command = event.get_command()
-                    _cmd_def = _resolve_cmd(command) if command else None
-                    canonical = _cmd_def.name if _cmd_def else command
-                    break
+        # Resolve aliases to canonical name so dispatch only checks canonicals.
+        _cmd_def = _resolve_cmd(command) if command else None
+        canonical = _cmd_def.name if _cmd_def else command

        if canonical == "new":
            return await self._handle_reset_command(event)
@@ -3746,8 +3623,9 @@ class GatewayRunner:
                plugin_handler = get_plugin_command_handler(command.replace("_", "-"))
                if plugin_handler:
                    user_args = event.get_command_args().strip()
+                    import asyncio as _aio
                    result = plugin_handler(user_args)
-                    if asyncio.iscoroutine(result):
+                    if _aio.iscoroutine(result):
                        result = await result
                    return str(result) if result else None
            except Exception as e:
@@ -3864,12 +3742,12 @@ class GatewayRunner:
        history = history or []
        message_text = event.text or ""

-        _is_shared_multi_user = is_shared_multi_user_session(
-            source,
-            group_sessions_per_user=getattr(self.config, "group_sessions_per_user", True),
-            thread_sessions_per_user=getattr(self.config, "thread_sessions_per_user", False),
+        _is_shared_thread = (
+            source.chat_type != "dm"
+            and source.thread_id
+            and not getattr(self.config, "thread_sessions_per_user", False)
        )
-        if _is_shared_multi_user and source.user_name:
+        if _is_shared_thread and source.user_name:
            message_text = f"[{source.user_name}] {message_text}"

        if event.media_urls:
@@ -3929,7 +3807,9 @@ class GatewayRunner:
            for i, path in enumerate(event.media_urls):
                mtype = event.media_types[i] if i < len(event.media_types) else ""
                if mtype in ("", "application/octet-stream"):
-                    _ext = os.path.splitext(path)[1].lower()
+                    import os as _os2
+
+                    _ext = _os2.path.splitext(path)[1].lower()
                    if _ext in _TEXT_EXTENSIONS:
                        mtype = "text/plain"
                    else:
@@ -3939,10 +3819,13 @@ class GatewayRunner:
                if not mtype.startswith(("application/", "text/")):
                    continue

-                basename = os.path.basename(path)
+                import os as _os
+                import re as _re
+
+                basename = _os.path.basename(path)
                parts = basename.split("_", 2)
                display_name = parts[2] if len(parts) >= 3 else basename
-                display_name = re.sub(r'[^\w.\- ]', '_', display_name)
+                display_name = _re.sub(r'[^\w.\- ]', '_', display_name)

                if mtype.startswith("text/"):
                    context_note = (
@@ -3959,14 +3842,14 @@ class GatewayRunner:
                message_text = f"{context_note}\n\n{message_text}"

        if getattr(event, "reply_to_text", None) and event.reply_to_message_id:
-            # Always inject the reply-to pointer — even when the quoted text
-            # already appears in history. The prefix isn't deduplication, it's
-            # disambiguation: it tells the agent *which* prior message the user
-            # is referencing. History can contain the same or similar text
-            # multiple times, and without an explicit pointer the agent has to
-            # guess (or answer for both subjects). Token overhead is minimal.
            reply_snippet = event.reply_to_text[:500]
-            message_text = f'[Replying to: "{reply_snippet}"]\n\n{message_text}'
+            found_in_history = any(
+                reply_snippet[:200] in (msg.get("content") or "")
+                for msg in history
+                if msg.get("role") in ("assistant", "user", "tool")
+            )
+            if not found_in_history:
+                message_text = f'[Replying to: "{reply_snippet}"]\n\n{message_text}'

        if "@" in message_text:
            try:
@@ -3974,11 +3857,9 @@ class GatewayRunner:
                from agent.model_metadata import get_model_context_length

                _msg_cwd = os.environ.get("TERMINAL_CWD", os.path.expanduser("~"))
-                _msg_runtime = _resolve_runtime_agent_kwargs()
                _msg_ctx_len = get_model_context_length(
                    self._model,
-                    base_url=self._base_url or _msg_runtime.get("base_url") or "",
-                    api_key=_msg_runtime.get("api_key") or "",
+                    base_url=self._base_url or "",
                )
                _ctx_result = await preprocess_context_references_async(
                    message_text,
@@ -4431,10 +4312,9 @@ class GatewayRunner:
        # is speaking, without needing a separate tool call.
        # -----------------------------------------------------------------
        if source.platform == Platform.DISCORD:
-            from gateway.platforms.discord import DiscordAdapter
            adapter = self.adapters.get(Platform.DISCORD)
            guild_id = self._get_guild_id(event)
-            if guild_id and isinstance(adapter, DiscordAdapter):
+            if guild_id and adapter and hasattr(adapter, "get_voice_channel_context"):
                vc_context = adapter.get_voice_channel_context(guild_id)
                if vc_context:
                    context_prompt += f"\n\n{vc_context}"
@@ -4974,11 +4854,6 @@ class GatewayRunner:
        # the configured default instead of the previously switched model.
        self._session_model_overrides.pop(session_key, None)

-        # Clear session-scoped dangerous-command approvals and /yolo state.
-        # /new is a conversation-boundary operation — approval state from the
-        # previous conversation must not survive the reset.
-        self._clear_session_boundary_security_state(session_key)
-
        # Fire plugin on_session_finalize hook (session boundary)
        try:
            from hermes_cli.plugins import invoke_hook as _invoke_hook
@@ -5246,6 +5121,7 @@ class GatewayRunner:
        # Save the requester's routing info so the new gateway process can
        # notify them once it comes back online.
        try:
+            import json as _json
            notify_data = {
                "platform": event.source.platform.value if event.source.platform else None,
                "chat_id": event.source.chat_id,
@@ -5253,7 +5129,7 @@ class GatewayRunner:
            if event.source.thread_id:
                notify_data["thread_id"] = event.source.thread_id
            (_hermes_home / ".restart_notify.json").write_text(
-                json.dumps(notify_data)
+                _json.dumps(notify_data)
            )
        except Exception as e:
            logger.debug("Failed to write restart notify file: %s", e)
@@ -5264,14 +5140,16 @@ class GatewayRunner:
        # marker persists so the new gateway can still detect a delayed
        # /restart redelivery from Telegram.  Overwritten on every /restart.
        try:
+            import json as _json
+            import time as _time
            dedup_data = {
                "platform": event.source.platform.value if event.source.platform else None,
-                "requested_at": time.time(),
+                "requested_at": _time.time(),
            }
            if event.platform_update_id is not None:
                dedup_data["update_id"] = event.platform_update_id
            (_hermes_home / ".restart_last_processed.json").write_text(
-                json.dumps(dedup_data)
+                _json.dumps(dedup_data)
            )
        except Exception as e:
            logger.debug("Failed to write restart dedup marker: %s", e)
@@ -5319,10 +5197,12 @@ class GatewayRunner:
            return False

        try:
+            import json as _json
+            import time as _time
            marker_path = _hermes_home / ".restart_last_processed.json"
            if not marker_path.exists():
                return False
-            data = json.loads(marker_path.read_text())
+            data = _json.loads(marker_path.read_text())
        except Exception:
            return False

@@ -5336,7 +5216,7 @@ class GatewayRunner:
        # swallow a fresh /restart from the user.
        requested_at = data.get("requested_at")
        if isinstance(requested_at, (int, float)):
-            if time.time() - requested_at > 300:
+            if _time.time() - requested_at > 300:
                return False
        return event.platform_update_id <= recorded_uid

@@ -5487,7 +5367,6 @@ class GatewayRunner:
                try:
                    providers = list_authenticated_providers(
                        current_provider=current_provider,
-                        current_base_url=current_base_url,
                        user_providers=user_provs,
                        custom_providers=custom_provs,
                        max_models=50,
@@ -5599,7 +5478,6 @@ class GatewayRunner:
            try:
                providers = list_authenticated_providers(
                    current_provider=current_provider,
-                    current_base_url=current_base_url,
                    user_providers=user_provs,
                    custom_providers=custom_provs,
                    max_models=5,
@@ -5729,7 +5607,7 @@ class GatewayRunner:

        # Cache notice
        cache_enabled = (
-            (base_url_host_matches(result.base_url or "", "openrouter.ai") and "claude" in result.new_model.lower())
+            ("openrouter" in (result.base_url or "").lower() and "claude" in result.new_model.lower())
            or result.api_mode == "anthropic_messages"
        )
        if cache_enabled:
@@ -5877,7 +5755,7 @@ class GatewayRunner:
        available = "`none`, " + ", ".join(f"`{n}`" for n in personalities)
        return f"Unknown personality: `{args}`\n\nAvailable: {available}"
    
-    async def _handle_retry_command(self, event: MessageEvent) -> Optional[str]:
+    async def _handle_retry_command(self, event: MessageEvent) -> str:
        """Handle /retry command - re-send the last user message."""
        source = event.source
        session_entry = self.session_store.get_or_create_session(source)
@@ -6027,10 +5905,9 @@ class GatewayRunner:
                "all": "TTS (voice reply to all messages)",
            }
            # Append voice channel info if connected
-            from gateway.platforms.discord import DiscordAdapter
            adapter = self.adapters.get(event.source.platform)
            guild_id = self._get_guild_id(event)
-            if guild_id and isinstance(adapter, DiscordAdapter):
+            if guild_id and hasattr(adapter, "get_voice_channel_info"):
                info = adapter.get_voice_channel_info(guild_id)
                if info:
                    lines = [
@@ -6061,9 +5938,8 @@ class GatewayRunner:

    async def _handle_voice_channel_join(self, event: MessageEvent) -> str:
        """Join the user's current Discord voice channel."""
-        from gateway.platforms.discord import DiscordAdapter
        adapter = self.adapters.get(event.source.platform)
-        if not isinstance(adapter, DiscordAdapter):
+        if not hasattr(adapter, "join_voice_channel"):
            return "Voice channels are not supported on this platform."

        guild_id = self._get_guild_id(event)
@@ -6078,8 +5954,10 @@ class GatewayRunner:

        # Wire callbacks BEFORE join so voice input arriving immediately
        # after connection is not lost.
-        adapter._voice_input_callback = self._handle_voice_channel_input
-        adapter._on_voice_disconnect = self._handle_voice_timeout_cleanup
+        if hasattr(adapter, "_voice_input_callback"):
+            adapter._voice_input_callback = self._handle_voice_channel_input
+        if hasattr(adapter, "_on_voice_disconnect"):
+            adapter._on_voice_disconnect = self._handle_voice_timeout_cleanup

        try:
            success = await adapter.join_voice_channel(voice_channel)
@@ -6096,7 +5974,8 @@ class GatewayRunner:

        if success:
            adapter._voice_text_channels[guild_id] = int(event.source.chat_id)
-            adapter._voice_sources[guild_id] = event.source.to_dict()
+            if hasattr(adapter, "_voice_sources"):
+                adapter._voice_sources[guild_id] = event.source.to_dict()
            self._voice_mode[self._voice_key(event.source.platform, event.source.chat_id)] = "all"
            self._save_voice_modes()
            self._set_adapter_auto_tts_disabled(adapter, event.source.chat_id, disabled=False)
@@ -6110,14 +5989,13 @@ class GatewayRunner:

    async def _handle_voice_channel_leave(self, event: MessageEvent) -> str:
        """Leave the Discord voice channel."""
-        from gateway.platforms.discord import DiscordAdapter
        adapter = self.adapters.get(event.source.platform)
        guild_id = self._get_guild_id(event)

-        if not guild_id or not isinstance(adapter, DiscordAdapter):
+        if not guild_id or not hasattr(adapter, "leave_voice_channel"):
            return "Not in a voice channel."

-        if not adapter.is_in_voice_channel(guild_id):
+        if not hasattr(adapter, "is_in_voice_channel") or not adapter.is_in_voice_channel(guild_id):
            return "Not in a voice channel."

        try:
@@ -6128,7 +6006,8 @@ class GatewayRunner:
        self._voice_mode[self._voice_key(event.source.platform, event.source.chat_id)] = "off"
        self._save_voice_modes()
        self._set_adapter_auto_tts_disabled(adapter, event.source.chat_id, disabled=True)
-        adapter._voice_input_callback = None
+        if hasattr(adapter, "_voice_input_callback"):
+            adapter._voice_input_callback = None
        return "Left voice channel."

    def _handle_voice_timeout_cleanup(self, chat_id: str) -> None:
@@ -6288,13 +6167,13 @@ class GatewayRunner:
            adapter = self.adapters.get(event.source.platform)

            # If connected to a voice channel, play there instead of sending a file
-            from gateway.platforms.discord import DiscordAdapter
            guild_id = self._get_guild_id(event)
            if (guild_id
-                    and isinstance(adapter, DiscordAdapter)
+                    and hasattr(adapter, "play_in_voice_channel")
+                    and hasattr(adapter, "is_in_voice_channel")
                    and adapter.is_in_voice_channel(guild_id)):
                await adapter.play_in_voice_channel(guild_id, actual_path)
-            elif adapter:
+            elif adapter and hasattr(adapter, "send_voice"):
                send_kwargs: Dict[str, Any] = {
                    "chat_id": event.source.chat_id,
                    "audio_path": actual_path,
@@ -6535,11 +6414,6 @@ class GatewayRunner:
                    session_id=task_id,
                    platform=platform_key,
                    user_id=source.user_id,
-                    user_name=source.user_name,
-                    chat_id=source.chat_id,
-                    chat_name=source.chat_name,
-                    chat_type=source.chat_type,
-                    thread_id=source.thread_id,
                    session_db=self._session_db,
                    fallback_model=self._fallback_model,
                )
@@ -7226,7 +7100,6 @@ class GatewayRunner:
        new_entry = self.session_store.switch_session(session_key, target_id)
        if not new_entry:
            return "Failed to switch session."
-        self._clear_session_boundary_security_state(session_key)

        # Get the title for confirmation
        title = self._session_db.get_session_title(target_id) or name
@@ -7301,7 +7174,6 @@ class GatewayRunner:
                    tool_calls=msg.get("tool_calls"),
                    tool_call_id=msg.get("tool_call_id"),
                    reasoning=msg.get("reasoning"),
-                    reasoning_content=msg.get("reasoning_content"),
                )
            except Exception:
                pass  # Best-effort copy
@@ -7316,7 +7188,6 @@ class GatewayRunner:
        new_entry = self.session_store.switch_session(session_key, new_session_id)
        if not new_entry:
            return "Branch created but failed to switch to it."
-        self._clear_session_boundary_security_state(session_key)

        # Evict any cached agent for this session
        self._evict_cached_agent(session_key)
@@ -7351,38 +7222,6 @@ class GatewayRunner:
                    if cached:
                        agent = cached[0]

-        # Resolve provider/base_url/api_key for the account-usage fetch.
-        # Prefer the live agent; fall back to persisted billing data on the
-        # SessionDB row so `/usage` still returns account info between turns
-        # when no agent is resident.
-        provider = getattr(agent, "provider", None) if agent and agent is not _AGENT_PENDING_SENTINEL else None
-        base_url = getattr(agent, "base_url", None) if agent and agent is not _AGENT_PENDING_SENTINEL else None
-        api_key = getattr(agent, "api_key", None) if agent and agent is not _AGENT_PENDING_SENTINEL else None
-        if not provider and getattr(self, "_session_db", None) is not None:
-            try:
-                _entry_for_billing = self.session_store.get_or_create_session(source)
-                persisted = self._session_db.get_session(_entry_for_billing.session_id) or {}
-            except Exception:
-                persisted = {}
-            provider = provider or persisted.get("billing_provider")
-            base_url = base_url or persisted.get("billing_base_url")
-
-        # Fetch account usage off the event loop so slow provider APIs don't
-        # block the gateway. Failures are non-fatal -- account_lines stays [].
-        account_lines: list[str] = []
-        if provider:
-            try:
-                account_snapshot = await asyncio.to_thread(
-                    fetch_account_usage,
-                    provider,
-                    base_url=base_url,
-                    api_key=api_key,
-                )
-            except Exception:
-                account_snapshot = None
-            if account_snapshot:
-                account_lines = render_account_usage_lines(account_snapshot, markdown=True)
-
        if agent and hasattr(agent, "session_total_tokens") and agent.session_api_calls > 0:
            lines = []

@@ -7440,10 +7279,6 @@ class GatewayRunner:
            if ctx.compression_count:
                lines.append(f"Compressions: {ctx.compression_count}")

-            if account_lines:
-                lines.append("")
-                lines.extend(account_lines)
-
            return "\n".join(lines)

        # No agent at all -- check session history for a rough count
@@ -7453,26 +7288,23 @@ class GatewayRunner:
            from agent.model_metadata import estimate_messages_tokens_rough
            msgs = [m for m in history if m.get("role") in ("user", "assistant") and m.get("content")]
            approx = estimate_messages_tokens_rough(msgs)
-            lines = [
-                "📊 **Session Info**",
-                f"Messages: {len(msgs)}",
-                f"Estimated context: ~{approx:,} tokens",
-                "_(Detailed usage available after the first agent response)_",
-            ]
-            if account_lines:
-                lines.append("")
-                lines.extend(account_lines)
-            return "\n".join(lines)
-        if account_lines:
-            return "\n".join(account_lines)
+            return (
+                f"📊 **Session Info**\n"
+                f"Messages: {len(msgs)}\n"
+                f"Estimated context: ~{approx:,} tokens\n"
+                f"_(Detailed usage available after the first agent response)_"
+            )
        return "No usage data available for this session."

    async def _handle_insights_command(self, event: MessageEvent) -> str:
        """Handle /insights command -- show usage insights and analytics."""
+        import asyncio as _asyncio
+
        args = event.get_command_args().strip()

        # Normalize Unicode dashes (Telegram/iOS auto-converts -- to em/en dash)
-        args = re.sub(r'[\u2012\u2013\u2014\u2015](days|source)', r'--\1', args)
+        import re as _re
+        args = _re.sub(r'[\u2012\u2013\u2014\u2015](days|source)', r'--\1', args)

        days = 30
        source = None
@@ -7501,7 +7333,7 @@ class GatewayRunner:
            from hermes_state import SessionDB
            from agent.insights import InsightsEngine

-            loop = asyncio.get_running_loop()
+            loop = _asyncio.get_running_loop()

            def _run_insights():
                db = SessionDB()
@@ -7707,14 +7539,13 @@ class GatewayRunner:
        from hermes_cli.debug import (
            _capture_dump, collect_debug_report,
            upload_to_pastebin, _schedule_auto_delete,
-            _GATEWAY_PRIVACY_NOTICE, _best_effort_sweep_expired_pastes,
+            _GATEWAY_PRIVACY_NOTICE,
        )

        loop = asyncio.get_running_loop()

        # Run blocking I/O (dump capture, log reads, uploads) in a thread.
        def _collect_and_upload():
-            _best_effort_sweep_expired_pastes()
            dump_text = _capture_dump()
            report = collect_debug_report(log_lines=200, dump_text=dump_text)

@@ -7860,6 +7691,9 @@ class GatewayRunner:
        the messenger.  The user's next message is intercepted by
        ``_handle_message`` and written to ``.update_response``.
        """
+        import json
+        import re as _re
+
        pending_path = _hermes_home / ".update_pending.json"
        claimed_path = _hermes_home / ".update_pending.claimed.json"
        output_path = _hermes_home / ".update_output.txt"
@@ -7904,7 +7738,7 @@ class GatewayRunner:
            return

        def _strip_ansi(text: str) -> str:
-            return re.sub(r'\x1b\[[0-9;]*[A-Za-z]', '', text)
+            return _re.sub(r'\x1b\[[0-9;]*[A-Za-z]', '', text)

        bytes_sent = 0
        last_stream_time = loop.time()
@@ -8052,6 +7886,9 @@ class GatewayRunner:
        cannot resolve the adapter (e.g. after a gateway restart where the
        platform hasn't reconnected yet).
        """
+        import json
+        import re as _re
+
        pending_path = _hermes_home / ".update_pending.json"
        claimed_path = _hermes_home / ".update_pending.claimed.json"
        output_path = _hermes_home / ".update_output.txt"
@@ -8097,7 +7934,7 @@ class GatewayRunner:

            if adapter and chat_id:
                # Strip ANSI escape codes for clean display
-                output = re.sub(r'\x1b\[[0-9;]*m', '', output).strip()
+                output = _re.sub(r'\x1b\[[0-9;]*m', '', output).strip()
                if output:
                    if len(output) > 3500:
                        output = "…" + output[-3500:]
@@ -8130,12 +7967,14 @@ class GatewayRunner:

    async def _send_restart_notification(self) -> None:
        """Notify the chat that initiated /restart that the gateway is back."""
+        import json as _json
+
        notify_path = _hermes_home / ".restart_notify.json"
        if not notify_path.exists():
            return

        try:
-            data = json.loads(notify_path.read_text())
+            data = _json.loads(notify_path.read_text())
            platform_str = data.get("platform")
            chat_id = data.get("chat_id")
            thread_id = data.get("thread_id")
@@ -8221,6 +8060,7 @@ class GatewayRunner:
            The enriched message string with vision descriptions prepended.
        """
        from tools.vision_tools import vision_analyze_tool
+        import json as _json

        analysis_prompt = (
            "Describe everything visible in this image in thorough detail. "
@@ -8236,7 +8076,7 @@ class GatewayRunner:
                    image_url=path,
                    user_prompt=analysis_prompt,
                )
-                result = json.loads(result_json)
+                result = _json.loads(result_json)
                if result.get("success"):
                    description = result.get("analysis", "")
                    enriched_parts.append(
@@ -8295,6 +8135,7 @@ class GatewayRunner:
            return disabled_note

        from tools.transcription_tools import transcribe_audio
+        import asyncio

        enriched_parts = []
        for path in audio_paths:
@@ -8430,6 +8271,7 @@ class GatewayRunner:
        if not adapter:
            return
        try:
+            from gateway.platforms.base import MessageEvent, MessageType
            synth_event = MessageEvent(
                text=synth_text,
                message_type=MessageType.TEXT,
@@ -8534,6 +8376,7 @@ class GatewayRunner:
                            break
                    if adapter and source.chat_id:
                        try:
+                            from gateway.platforms.base import MessageEvent, MessageType
                            synth_event = MessageEvent(
                                text=synth_text,
                                message_type=MessageType.TEXT,
@@ -8691,29 +8534,6 @@ class GatewayRunner:
        if hasattr(self, "_busy_ack_ts"):
            self._busy_ack_ts.pop(session_key, None)

-    def _clear_session_boundary_security_state(self, session_key: str) -> None:
-        """Clear approval state that must not survive a real conversation switch."""
-        if not session_key:
-            return
-
-        pending_approvals = getattr(self, "_pending_approvals", None)
-        if isinstance(pending_approvals, dict):
-            pending_approvals.pop(session_key, None)
-
-        try:
-            from tools.approval import clear_session as _clear_approval_session
-        except Exception:
-            return
-
-        try:
-            _clear_approval_session(session_key)
-        except Exception as e:
-            logger.debug(
-                "Failed to clear approval state for session boundary %s: %s",
-                session_key,
-                e,
-            )
-
    def _begin_session_run_generation(self, session_key: str) -> int:
        """Claim a fresh run generation token for ``session_key``.

@@ -9078,6 +8898,7 @@ class GatewayRunner:
        if _streaming_enabled:
            try:
                from gateway.stream_consumer import GatewayStreamConsumer, StreamConsumerConfig
+                from gateway.config import Platform
                _adapter = self.adapters.get(source.platform)
                if _adapter:
                    _adapter_supports_edit = getattr(_adapter, "SUPPORTS_MESSAGE_EDITING", True)
@@ -9361,7 +9182,8 @@ class GatewayRunner:
                if args:
                    from agent.display import get_tool_preview_max_len
                    _pl = get_tool_preview_max_len()
-                    args_str = json.dumps(args, ensure_ascii=False, default=str)
+                    import json as _json
+                    args_str = _json.dumps(args, ensure_ascii=False, default=str)
                    # When tool_preview_length is 0 (default), don't truncate
                    # in verbose mode — the user explicitly asked for full
                    # detail.  Platform message-length limits handle the rest.
@@ -9427,7 +9249,8 @@ class GatewayRunner:
            # Skip tool progress for platforms that don't support message
            # editing (e.g. iMessage/BlueBubbles) — each progress update
            # would become a separate message bubble, which is noisy.
-            if type(adapter).edit_message is BasePlatformAdapter.edit_message:
+            from gateway.platforms.base import BasePlatformAdapter as _BaseAdapter
+            if type(adapter).edit_message is _BaseAdapter.edit_message:
                while not progress_queue.empty():
                    try:
                        progress_queue.get_nowait()
@@ -9809,11 +9632,6 @@ class GatewayRunner:
                    session_id=session_id,
                    platform=platform_key,
                    user_id=source.user_id,
-                    user_name=source.user_name,
-                    chat_id=source.chat_id,
-                    chat_name=source.chat_name,
-                    chat_type=source.chat_type,
-                    thread_id=source.thread_id,
                    gateway_session_key=session_key,
                    session_db=self._session_db,
                    fallback_model=self._fallback_model,
@@ -10490,7 +10308,6 @@ class GatewayRunner:
                if _timed_out_agent and hasattr(_timed_out_agent, "interrupt"):
                    _timed_out_agent.interrupt(_INTERRUPT_REASON_TIMEOUT)

-                assert _agent_timeout is not None  # narrowed by _idle_secs >= _agent_timeout above
                _timeout_mins = int(_agent_timeout // 60) or 1

                # Construct a user-facing message with diagnostic context.
@@ -10609,7 +10426,7 @@ class GatewayRunner:
                pending = None

            if pending_event or pending:
-                logger.debug("Processing pending message: '%s...'", (pending or "")[:40])
+                logger.debug("Processing pending message: '%s...'", pending[:40])

                # Clear the adapter's interrupt event so the next _run_agent call
                # doesn't immediately re-trigger the interrupt before the new agent
@@ -10628,6 +10445,8 @@ class GatewayRunner:
                    adapter = self.adapters.get(source.platform)
                    if adapter and pending_event:
                        merge_pending_message_event(adapter._pending_messages, session_key, pending_event)
+                    elif adapter and hasattr(adapter, 'queue_message'):
+                        adapter.queue_message(session_key, pending)
                    return result_holder[0] or {"final_response": response, "messages": history}

                was_interrupted = result.get("interrupted")
@@ -10709,7 +10528,7 @@ class GatewayRunner:
                        history=updated_history,
                    )
                    if next_message is None:
-                        return result  # ty: ignore[invalid-return-type]
+                        return result
                    next_message_id = getattr(pending_event, "message_id", None)
                    next_channel_prompt = getattr(pending_event, "channel_prompt", None)

@@ -10879,13 +10698,8 @@ async def start_gateway(config: Optional[GatewayConfig] = None, replace: bool =
    # The PID file is scoped to HERMES_HOME, so future multi-profile
    # setups (each profile using a distinct HERMES_HOME) will naturally
    # allow concurrent instances without tripping this guard.
-    from gateway.status import (
-        acquire_gateway_runtime_lock,
-        get_running_pid,
-        release_gateway_runtime_lock,
-        remove_pid_file,
-        terminate_pid,
-    )
+    import time as _time
+    from gateway.status import get_running_pid, remove_pid_file, terminate_pid
    existing_pid = get_running_pid()
    if existing_pid is not None and existing_pid != os.getpid():
        if replace:
@@ -10924,7 +10738,7 @@ async def start_gateway(config: Optional[GatewayConfig] = None, replace: bool =
            for _ in range(20):
                try:
                    os.kill(existing_pid, 0)
-                    time.sleep(0.5)
+                    _time.sleep(0.5)
                except (ProcessLookupError, PermissionError):
                    break  # Process is gone
            else:
@@ -10935,16 +10749,10 @@ async def start_gateway(config: Optional[GatewayConfig] = None, replace: bool =
                )
                try:
                    terminate_pid(existing_pid, force=True)
-                    time.sleep(0.5)
+                    _time.sleep(0.5)
                except (ProcessLookupError, PermissionError, OSError):
                    pass
            remove_pid_file()
-            # remove_pid_file() is a no-op when the PID doesn't match.
-            # Force-unlink to cover the old-process-crashed case.
-            try:
-                (get_hermes_home() / "gateway.pid").unlink(missing_ok=True)
-            except Exception:
-                pass
            # Clean up any takeover marker the old process didn't consume
            # (e.g. SIGKILL'd before its shutdown handler could read it).
            try:
@@ -11083,37 +10891,6 @@ async def start_gateway(config: Optional[GatewayConfig] = None, replace: bool =
    else:
        logger.info("Skipping signal handlers (not running in main thread).")
    
-    # Claim the PID file BEFORE bringing up any platform adapters.
-    # This closes the --replace race window: two concurrent `gateway run
-    # --replace` invocations both pass the termination-wait above, but
-    # only the winner of the O_CREAT|O_EXCL race below will ever open
-    # Telegram polling, Discord gateway sockets, etc. The loser exits
-    # cleanly before touching any external service.
-    import atexit
-    from gateway.status import write_pid_file, remove_pid_file, get_running_pid
-    _current_pid = get_running_pid()
-    if _current_pid is not None and _current_pid != os.getpid():
-        logger.error(
-            "Another gateway instance (PID %d) started during our startup. "
-            "Exiting to avoid double-running.", _current_pid
-        )
-        return False
-    if not acquire_gateway_runtime_lock():
-        logger.error(
-            "Gateway runtime lock is already held by another instance. Exiting."
-        )
-        return False
-    try:
-        write_pid_file()
-    except FileExistsError:
-        release_gateway_runtime_lock()
-        logger.error(
-            "PID file race lost to another gateway instance. Exiting."
-        )
-        return False
-    atexit.register(remove_pid_file)
-    atexit.register(release_gateway_runtime_lock)
-
    # Start the gateway
    success = await runner.start()
    if not success:
@@ -11123,6 +10900,12 @@ async def start_gateway(config: Optional[GatewayConfig] = None, replace: bool =
            logger.error("Gateway exiting cleanly: %s", runner.exit_reason)
        return True
    
+    # Write PID file so CLI can detect gateway is running
+    import atexit
+    from gateway.status import write_pid_file, remove_pid_file
+    write_pid_file()
+    atexit.register(remove_pid_file)
+    
    # Start background cron ticker so scheduled jobs fire automatically.
    # Pass the event loop so cron delivery can use live adapters (E2EE support).
    cron_stop = threading.Event()
@@ -80,7 +80,7 @@ class SessionSource:
    user_name: Optional[str] = None
    thread_id: Optional[str] = None  # For forum topics, Discord threads, etc.
    chat_topic: Optional[str] = None  # Channel topic/description (Discord, Slack)
-    user_id_alt: Optional[str] = None  # Platform-specific stable alt ID (Signal UUID, Feishu union_id)
+    user_id_alt: Optional[str] = None  # Signal UUID (alternative to phone number)
    chat_id_alt: Optional[str] = None  # Signal group internal ID
    is_bot: bool = False  # True when the message author is a bot/webhook (Discord)
    
@@ -152,7 +152,6 @@ class SessionContext:
    source: SessionSource
    connected_platforms: List[Platform]
    home_channels: Dict[Platform, HomeChannel]
-    shared_multi_user_session: bool = False
    
    # Session metadata
    session_key: str = ""
@@ -167,7 +166,6 @@ class SessionContext:
            "home_channels": {
                p.value: hc.to_dict() for p, hc in self.home_channels.items()
            },
-            "shared_multi_user_session": self.shared_multi_user_session,
            "session_key": self.session_key,
            "session_id": self.session_id,
            "created_at": self.created_at.isoformat() if self.created_at else None,
@@ -242,16 +240,18 @@ def build_session_context_prompt(
        lines.append(f"**Channel Topic:** {context.source.chat_topic}")

    # User identity.
-    # In shared multi-user sessions (shared threads OR shared non-thread groups
-    # when group_sessions_per_user=False), multiple users contribute to the same
-    # conversation.  Don't pin a single user name in the system prompt — it
-    # changes per-turn and would bust the prompt cache.  Instead, note that
-    # this is a multi-user session; individual sender names are prefixed on
-    # each user message by the gateway.
-    if context.shared_multi_user_session:
-        session_label = "Multi-user thread" if context.source.thread_id else "Multi-user session"
+    # In shared thread sessions (non-DM with thread_id), multiple users
+    # contribute to the same conversation.  Don't pin a single user name
+    # in the system prompt — it changes per-turn and would bust the prompt
+    # cache.  Instead, note that this is a multi-user thread; individual
+    # sender names are prefixed on each user message by the gateway.
+    _is_shared_thread = (
+        context.source.chat_type != "dm"
+        and context.source.thread_id
+    )
+    if _is_shared_thread:
        lines.append(
-            f"**Session type:** {session_label} — messages are prefixed "
+            "**Session type:** Multi-user thread — messages are prefixed "
            "with [sender name]. Multiple users may participate."
        )
    elif context.source.user_name:
@@ -467,27 +467,6 @@ class SessionEntry:
        )


-def is_shared_multi_user_session(
-    source: SessionSource,
-    *,
-    group_sessions_per_user: bool = True,
-    thread_sessions_per_user: bool = False,
-) -> bool:
-    """Return True when a non-DM session is shared across participants.
-
-    Mirrors the isolation rules in :func:`build_session_key`:
-      - DMs are never shared.
-      - Threads are shared unless ``thread_sessions_per_user`` is True.
-      - Non-thread group/channel sessions are shared unless
-        ``group_sessions_per_user`` is True (default: True = isolated).
-    """
-    if source.chat_type == "dm":
-        return False
-    if source.thread_id:
-        return not thread_sessions_per_user
-    return not group_sessions_per_user
-
-
 def build_session_key(
    source: SessionSource,
    group_sessions_per_user: bool = True,
@@ -1147,10 +1126,6 @@ class SessionStore:
                    tool_name=message.get("tool_name"),
                    tool_calls=message.get("tool_calls"),
                    tool_call_id=message.get("tool_call_id"),
-                    reasoning=message.get("reasoning") if message.get("role") == "assistant" else None,
-                    reasoning_content=message.get("reasoning_content") if message.get("role") == "assistant" else None,
-                    reasoning_details=message.get("reasoning_details") if message.get("role") == "assistant" else None,
-                    codex_reasoning_items=message.get("codex_reasoning_items") if message.get("role") == "assistant" else None,
                )
            except Exception as e:
                logger.debug("Session DB operation failed: %s", e)
@@ -1180,7 +1155,6 @@ class SessionStore:
                        tool_calls=msg.get("tool_calls"),
                        tool_call_id=msg.get("tool_call_id"),
                        reasoning=msg.get("reasoning") if role == "assistant" else None,
-                        reasoning_content=msg.get("reasoning_content") if role == "assistant" else None,
                        reasoning_details=msg.get("reasoning_details") if role == "assistant" else None,
                        codex_reasoning_items=msg.get("codex_reasoning_items") if role == "assistant" else None,
                    )
@@ -1264,11 +1238,6 @@ def build_session_context(
        source=source,
        connected_platforms=connected,
        home_channels=home_channels,
-        shared_multi_user_session=is_shared_multi_user_session(
-            source,
-            group_sessions_per_user=getattr(config, "group_sessions_per_user", True),
-            thread_sessions_per_user=getattr(config, "thread_sessions_per_user", False),
-        ),
    )
    
    if session_entry:
@@ -56,12 +56,6 @@ _SESSION_USER_ID: ContextVar = ContextVar("HERMES_SESSION_USER_ID", default=_UNS
 _SESSION_USER_NAME: ContextVar = ContextVar("HERMES_SESSION_USER_NAME", default=_UNSET)
 _SESSION_KEY: ContextVar = ContextVar("HERMES_SESSION_KEY", default=_UNSET)

-# Cron auto-delivery vars — set per-job in run_job() so concurrent jobs
-# don't clobber each other's delivery targets.
-_CRON_AUTO_DELIVER_PLATFORM: ContextVar = ContextVar("HERMES_CRON_AUTO_DELIVER_PLATFORM", default=_UNSET)
-_CRON_AUTO_DELIVER_CHAT_ID: ContextVar = ContextVar("HERMES_CRON_AUTO_DELIVER_CHAT_ID", default=_UNSET)
-_CRON_AUTO_DELIVER_THREAD_ID: ContextVar = ContextVar("HERMES_CRON_AUTO_DELIVER_THREAD_ID", default=_UNSET)
-
 _VAR_MAP = {
    "HERMES_SESSION_PLATFORM": _SESSION_PLATFORM,
    "HERMES_SESSION_CHAT_ID": _SESSION_CHAT_ID,
@@ -70,9 +64,6 @@ _VAR_MAP = {
    "HERMES_SESSION_USER_ID": _SESSION_USER_ID,
    "HERMES_SESSION_USER_NAME": _SESSION_USER_NAME,
    "HERMES_SESSION_KEY": _SESSION_KEY,
-    "HERMES_CRON_AUTO_DELIVER_PLATFORM": _CRON_AUTO_DELIVER_PLATFORM,
-    "HERMES_CRON_AUTO_DELIVER_CHAT_ID": _CRON_AUTO_DELIVER_CHAT_ID,
-    "HERMES_CRON_AUTO_DELIVER_THREAD_ID": _CRON_AUTO_DELIVER_THREAD_ID,
 }


@@ -22,18 +22,11 @@ from pathlib import Path
 from hermes_constants import get_hermes_home
 from typing import Any, Optional

-if sys.platform == "win32":
-    import msvcrt
-else:
-    import fcntl
-
 _GATEWAY_KIND = "hermes-gateway"
 _RUNTIME_STATUS_FILE = "gateway_state.json"
 _LOCKS_DIRNAME = "gateway-locks"
 _IS_WINDOWS = sys.platform == "win32"
 _UNSET = object()
-_GATEWAY_LOCK_FILENAME = "gateway.lock"
-_gateway_lock_handle = None


 def _get_pid_path() -> Path:
@@ -42,14 +35,6 @@ def _get_pid_path() -> Path:
    return home / "gateway.pid"


-def _get_gateway_lock_path(pid_path: Optional[Path] = None) -> Path:
-    """Return the path to the runtime gateway lock file."""
-    if pid_path is not None:
-        return pid_path.with_name(_GATEWAY_LOCK_FILENAME)
-    home = get_hermes_home()
-    return home / _GATEWAY_LOCK_FILENAME
-
-
 def _get_runtime_status_path() -> Path:
    """Return the persisted runtime health/status file path."""
    return _get_pid_path().with_name(_RUNTIME_STATUS_FILE)
@@ -136,7 +121,6 @@ def _looks_like_gateway_process(pid: int) -> bool:
        "hermes_cli.main gateway",
        "hermes_cli/main.py gateway",
        "hermes gateway",
-        "hermes-gateway",
        "gateway/run.py",
    )
    return any(pattern in cmdline for pattern in patterns)
@@ -228,160 +212,21 @@ def _read_pid_record(pid_path: Optional[Path] = None) -> Optional[dict]:
    return None


-def _read_gateway_lock_record(lock_path: Optional[Path] = None) -> Optional[dict[str, Any]]:
-    return _read_pid_record(lock_path or _get_gateway_lock_path())
-
-
-def _pid_from_record(record: Optional[dict[str, Any]]) -> Optional[int]:
-    if not record:
-        return None
-    try:
-        return int(record["pid"])
-    except (KeyError, TypeError, ValueError):
-        return None
-
-
 def _cleanup_invalid_pid_path(pid_path: Path, *, cleanup_stale: bool) -> None:
-    """Delete a stale gateway PID file (and its sibling lock metadata).
-
-    Called from ``get_running_pid()`` after the runtime lock has already been
-    confirmed inactive, so the on-disk metadata is known to belong to a dead
-    process.  Unlike ``remove_pid_file()`` (which defensively refuses to delete
-    a PID file whose ``pid`` field differs from ``os.getpid()`` to protect
-    ``--replace`` handoffs), this path force-unlinks both files so the next
-    startup sees a clean slate.
-    """
    if not cleanup_stale:
        return
    try:
-        pid_path.unlink(missing_ok=True)
+        if pid_path == _get_pid_path():
+            remove_pid_file()
+        else:
+            pid_path.unlink(missing_ok=True)
    except Exception:
        pass
-    try:
-        _get_gateway_lock_path(pid_path).unlink(missing_ok=True)
-    except Exception:
-        pass
-
-
-def _write_gateway_lock_record(handle) -> None:
-    handle.seek(0)
-    handle.truncate()
-    json.dump(_build_pid_record(), handle)
-    handle.flush()
-    try:
-        os.fsync(handle.fileno())
-    except OSError:
-        pass
-
-
-def _try_acquire_file_lock(handle) -> bool:
-    try:
-        if _IS_WINDOWS:
-            handle.seek(0, os.SEEK_END)
-            if handle.tell() == 0:
-                handle.write("\n")
-                handle.flush()
-            handle.seek(0)
-            msvcrt.locking(handle.fileno(), msvcrt.LK_NBLCK, 1)
-        else:
-            fcntl.flock(handle.fileno(), fcntl.LOCK_EX | fcntl.LOCK_NB)
-        return True
-    except (BlockingIOError, OSError):
-        return False
-
-
-def _release_file_lock(handle) -> None:
-    try:
-        if _IS_WINDOWS:
-            handle.seek(0)
-            msvcrt.locking(handle.fileno(), msvcrt.LK_UNLCK, 1)
-        else:
-            fcntl.flock(handle.fileno(), fcntl.LOCK_UN)
-    except OSError:
-        pass
-
-
-def acquire_gateway_runtime_lock() -> bool:
-    """Claim the cross-process runtime lock for the gateway.
-
-    Unlike the PID file, the lock is owned by the live process itself. If the
-    process dies abruptly, the OS releases the lock automatically.
-    """
-    global _gateway_lock_handle
-    if _gateway_lock_handle is not None:
-        return True
-
-    path = _get_gateway_lock_path()
-    path.parent.mkdir(parents=True, exist_ok=True)
-    handle = open(path, "a+", encoding="utf-8")
-    if not _try_acquire_file_lock(handle):
-        handle.close()
-        return False
-    _write_gateway_lock_record(handle)
-    _gateway_lock_handle = handle
-    return True
-
-
-def release_gateway_runtime_lock() -> None:
-    """Release the gateway runtime lock when owned by this process."""
-    global _gateway_lock_handle
-    handle = _gateway_lock_handle
-    if handle is None:
-        return
-    _gateway_lock_handle = None
-    _release_file_lock(handle)
-    try:
-        handle.close()
-    except OSError:
-        pass
-
-
-def is_gateway_runtime_lock_active(lock_path: Optional[Path] = None) -> bool:
-    """Return True when some process currently owns the gateway runtime lock."""
-    global _gateway_lock_handle
-    resolved_lock_path = lock_path or _get_gateway_lock_path()
-    if _gateway_lock_handle is not None and resolved_lock_path == _get_gateway_lock_path():
-        return True
-
-    if not resolved_lock_path.exists():
-        return False
-
-    handle = open(resolved_lock_path, "a+", encoding="utf-8")
-    try:
-        if _try_acquire_file_lock(handle):
-            _release_file_lock(handle)
-            return False
-        return True
-    finally:
-        try:
-            handle.close()
-        except OSError:
-            pass


 def write_pid_file() -> None:
-    """Write the current process PID and metadata to the gateway PID file.
-
-    Uses atomic O_CREAT | O_EXCL creation so that concurrent --replace
-    invocations race: exactly one process wins and the rest get
-    FileExistsError.
-    """
-    path = _get_pid_path()
-    path.parent.mkdir(parents=True, exist_ok=True)
-    record = json.dumps(_build_pid_record())
-    try:
-        fd = os.open(path, os.O_CREAT | os.O_EXCL | os.O_WRONLY)
-    except FileExistsError:
-        raise  # Let caller decide: another gateway is racing us
-    try:
-        with os.fdopen(fd, "w", encoding="utf-8") as f:
-            f.write(record)
-    except Exception:
-        try:
-            path.unlink(missing_ok=True)
-        except OSError:
-            pass
-        raise
+    """Write the current process PID and metadata to the gateway PID file."""
+    _write_json_file(_get_pid_path(), _build_pid_record())


 def write_runtime_status(
@@ -496,8 +341,7 @@ def acquire_scoped_lock(scope: str, identity: str, metadata: Optional[dict[str,
        if not stale:
            try:
                os.kill(existing_pid, 0)
-            except (ProcessLookupError, PermissionError, OSError):
-                # Windows raises OSError with WinError 87 for invalid pid check
+            except (ProcessLookupError, PermissionError):
                stale = True
            else:
                current_start = _get_process_start_time(existing_pid)
@@ -719,46 +563,35 @@ def get_running_pid(
    Cleans up stale PID files automatically.
    """
    resolved_pid_path = pid_path or _get_pid_path()
-    resolved_lock_path = _get_gateway_lock_path(resolved_pid_path)
-    lock_active = is_gateway_runtime_lock_active(resolved_lock_path)
-    if not lock_active:
+    record = _read_pid_record(resolved_pid_path)
+    if not record:
        _cleanup_invalid_pid_path(resolved_pid_path, cleanup_stale=cleanup_stale)
        return None

-    primary_record = _read_pid_record(resolved_pid_path)
-    fallback_record = _read_gateway_lock_record(resolved_lock_path)
+    try:
+        pid = int(record["pid"])
+    except (KeyError, TypeError, ValueError):
+        _cleanup_invalid_pid_path(resolved_pid_path, cleanup_stale=cleanup_stale)
+        return None

-    for record in (primary_record, fallback_record):
-        pid = _pid_from_record(record)
-        if pid is None:
-            continue
+    try:
+        os.kill(pid, 0)  # signal 0 = existence check, no actual signal sent
+    except (ProcessLookupError, PermissionError):
+        _cleanup_invalid_pid_path(resolved_pid_path, cleanup_stale=cleanup_stale)
+        return None

-        try:
-            os.kill(pid, 0)  # signal 0 = existence check, no actual signal sent
-        except ProcessLookupError:
-            continue
-        except PermissionError:
-            # The process exists but belongs to another user/service scope.
-            # With the runtime lock still held, prefer keeping it visible
-            # rather than deleting the PID file as "stale".
-            if _record_looks_like_gateway(record):
-                return pid
-            continue
-        except OSError:
-            # Windows raises OSError with WinError 87 for an invalid pid
-            # (process is definitely gone). Treat as "process doesn't exist".
-            continue
+    recorded_start = record.get("start_time")
+    current_start = _get_process_start_time(pid)
+    if recorded_start is not None and current_start is not None and current_start != recorded_start:
+        _cleanup_invalid_pid_path(resolved_pid_path, cleanup_stale=cleanup_stale)
+        return None

-        recorded_start = record.get("start_time")
-        current_start = _get_process_start_time(pid)
-        if recorded_start is not None and current_start is not None and current_start != recorded_start:
-            continue
+    if not _looks_like_gateway_process(pid):
+        if not _record_looks_like_gateway(record):
+            _cleanup_invalid_pid_path(resolved_pid_path, cleanup_stale=cleanup_stale)
+            return None

-        if _looks_like_gateway_process(pid) or _record_looks_like_gateway(record):
-            return pid
-
-    _cleanup_invalid_pid_path(resolved_pid_path, cleanup_stale=cleanup_stale)
-    return None
+    return pid


 def is_gateway_running(
@@ -72,8 +72,6 @@ DEFAULT_QWEN_BASE_URL = "https://portal.qwen.ai/v1"
 DEFAULT_GITHUB_MODELS_BASE_URL = "https://api.githubcopilot.com"
 DEFAULT_COPILOT_ACP_BASE_URL = "acp://copilot"
 DEFAULT_OLLAMA_CLOUD_BASE_URL = "https://ollama.com/v1"
-STEPFUN_STEP_PLAN_INTL_BASE_URL = "https://api.stepfun.ai/step_plan/v1"
-STEPFUN_STEP_PLAN_CN_BASE_URL = "https://api.stepfun.com/step_plan/v1"
 CODEX_OAUTH_CLIENT_ID = "app_EMoamEEZ73f0CkXaXp7hrann"
 CODEX_OAUTH_TOKEN_URL = "https://auth.openai.com/oauth/token"
 CODEX_ACCESS_TOKEN_REFRESH_SKEW_SECONDS = 120
@@ -170,11 +168,8 @@ PROVIDER_REGISTRY: Dict[str, ProviderConfig] = {
        id="kimi-coding",
        name="Kimi / Moonshot",
        auth_type="api_key",
-        # Legacy platform.moonshot.ai keys use this endpoint (OpenAI-compat).
-        # sk-kimi- (Kimi Code) keys are auto-redirected to api.kimi.com/coding
-        # by _resolve_kimi_base_url() below.
        inference_base_url="https://api.moonshot.ai/v1",
-        api_key_env_vars=("KIMI_API_KEY", "KIMI_CODING_API_KEY"),
+        api_key_env_vars=("KIMI_API_KEY",),
        base_url_env_var="KIMI_BASE_URL",
    ),
    "kimi-coding-cn": ProviderConfig(
@@ -184,14 +179,6 @@ PROVIDER_REGISTRY: Dict[str, ProviderConfig] = {
        inference_base_url="https://api.moonshot.cn/v1",
        api_key_env_vars=("KIMI_CN_API_KEY",),
    ),
-    "stepfun": ProviderConfig(
-        id="stepfun",
-        name="StepFun Step Plan",
-        auth_type="api_key",
-        inference_base_url=STEPFUN_STEP_PLAN_INTL_BASE_URL,
-        api_key_env_vars=("STEPFUN_API_KEY",),
-        base_url_env_var="STEPFUN_BASE_URL",
-    ),
    "arcee": ProviderConfig(
        id="arcee",
        name="Arcee AI",
@@ -214,7 +201,6 @@ PROVIDER_REGISTRY: Dict[str, ProviderConfig] = {
        auth_type="api_key",
        inference_base_url="https://api.anthropic.com",
        api_key_env_vars=("ANTHROPIC_API_KEY", "ANTHROPIC_TOKEN", "CLAUDE_CODE_OAUTH_TOKEN"),
-        base_url_env_var="ANTHROPIC_BASE_URL",
    ),
    "alibaba": ProviderConfig(
        id="alibaba",
@@ -354,16 +340,10 @@ def get_anthropic_key() -> str:
 # =============================================================================

 # Kimi Code (kimi.com/code) issues keys prefixed "sk-kimi-" that only work
-# on api.kimi.com/coding.  Legacy keys from platform.moonshot.ai work on
-# api.moonshot.ai/v1 (the old default).  Auto-detect when user hasn't set
+# on api.kimi.com/coding/v1.  Legacy keys from platform.moonshot.ai work on
+# api.moonshot.ai/v1 (the default).  Auto-detect when user hasn't set
 # KIMI_BASE_URL explicitly.
-#
-# Note: the base URL intentionally has NO /v1 suffix.  The /coding endpoint
-# speaks the Anthropic Messages protocol, and the anthropic SDK appends
-# "/v1/messages" internally — so "/coding" + SDK suffix → "/coding/v1/messages"
-# (the correct target). Using "/coding/v1" here would produce
-# "/coding/v1/v1/messages" (a 404).
-KIMI_CODE_BASE_URL = "https://api.kimi.com/coding"
+KIMI_CODE_BASE_URL = "https://api.kimi.com/coding/v1"


 def _resolve_kimi_base_url(api_key: str, default_url: str, env_override: str) -> str:
@@ -768,20 +748,16 @@ def _save_provider_state(auth_store: Dict[str, Any], provider_id: str, state: Di
    auth_store["active_provider"] = provider_id


-def read_credential_pool() -> Dict[str, Any]:
-    """Return the entire persisted credential pool."""
+def read_credential_pool(provider_id: Optional[str] = None) -> Dict[str, Any]:
+    """Return the persisted credential pool, or one provider slice."""
    auth_store = _load_auth_store()
    pool = auth_store.get("credential_pool")
    if not isinstance(pool, dict):
        pool = {}
-    return dict(pool)
-
-
-def read_provider_credentials(provider_id: str) -> List[Dict[str, Any]]:
-    """Return credential entries for a single provider."""
-    pool = read_credential_pool()
-    entries = pool.get(provider_id)
-    return list(entries) if isinstance(entries, list) else []
+    if provider_id is None:
+        return dict(pool)
+    provider_entries = pool.get(provider_id)
+    return list(provider_entries) if isinstance(provider_entries, list) else []


 def write_credential_pool(provider_id: str, entries: List[Dict[str, Any]]) -> Path:
@@ -1007,7 +983,6 @@ def resolve_provider(
        "x-ai": "xai", "x.ai": "xai", "grok": "xai",
        "kimi": "kimi-coding", "kimi-for-coding": "kimi-coding", "moonshot": "kimi-coding",
        "kimi-cn": "kimi-coding-cn", "moonshot-cn": "kimi-coding-cn",
-        "step": "stepfun", "stepfun-coding-plan": "stepfun",
        "arcee-ai": "arcee", "arceeai": "arcee",
        "minimax-china": "minimax-cn", "minimax_cn": "minimax-cn",
        "claude": "anthropic", "claude-code": "anthropic",
@@ -3400,7 +3375,7 @@ def _login_nous(args, pconfig: ProviderConfig) -> None:
                )

            from hermes_cli.models import (
-                _PROVIDER_MODELS, get_pricing_for_provider,
+                _PROVIDER_MODELS, get_pricing_for_provider, filter_nous_free_models,
                check_nous_free_tier, partition_nous_models_by_tier,
            )
            model_ids = _PROVIDER_MODELS.get("nous", [])
@@ -3409,6 +3384,7 @@ def _login_nous(args, pconfig: ProviderConfig) -> None:
            unavailable_models: list = []
            if model_ids:
                pricing = get_pricing_for_provider("nous")
+                model_ids = filter_nous_free_models(model_ids, pricing)
                free_tier = check_nous_free_tier()
                if free_tier:
                    model_ids, unavailable_models = partition_nous_models_by_tier(
@@ -152,23 +152,6 @@ def auth_add_command(args) -> None:

    pool = load_pool(provider)

-    # Clear ALL suppressions for this provider — re-adding a credential is
-    # a strong signal the user wants auth re-enabled.  This covers env:*
-    # (shell-exported vars), gh_cli (copilot), claude_code, qwen-cli,
-    # device_code (codex), etc.  One consistent re-engagement pattern.
-    # Matches the Codex device_code re-link pattern that predates this.
-    if not provider.startswith(CUSTOM_POOL_PREFIX):
-        try:
-            from hermes_cli.auth import (
-                _load_auth_store,
-                unsuppress_credential_source,
-            )
-            suppressed = _load_auth_store().get("suppressed_sources", {})
-            for src in list(suppressed.get(provider, []) or []):
-                unsuppress_credential_source(provider, src)
-        except Exception:
-            pass
-
    if requested_type == AUTH_TYPE_API_KEY:
        token = (getattr(args, "api_key", None) or "").strip()
        if not token:
@@ -355,28 +338,71 @@ def auth_remove_command(args) -> None:
        raise SystemExit(f'No credential matching "{target}" for provider {provider}.')
    print(f"Removed {provider} credential #{index} ({removed.label})")

-    # Unified removal dispatch.  Every credential source Hermes reads from
-    # (env vars, external OAuth files, auth.json blocks, custom config)
-    # has a RemovalStep registered in agent.credential_sources.  The step
-    # handles its source-specific cleanup and we centralise suppression +
-    # user-facing output here so every source behaves identically from
-    # the user's perspective.
-    from agent.credential_sources import find_removal_step
-    from hermes_cli.auth import suppress_credential_source
+    # If this was an env-seeded credential, also clear the env var from .env
+    # so it doesn't get re-seeded on the next load_pool() call.
+    if removed.source.startswith("env:"):
+        env_var = removed.source[len("env:"):]
+        if env_var:
+            from hermes_cli.config import remove_env_value
+            cleared = remove_env_value(env_var)
+            if cleared:
+                print(f"Cleared {env_var} from .env")

-    step = find_removal_step(provider, removed.source)
-    if step is None:
-        # Unregistered source — e.g. "manual", which has nothing external
-        # to clean up.  The pool entry is already gone; we're done.
-        return
+    # If this was a singleton-seeded credential (OAuth device_code, hermes_pkce),
+    # clear the underlying auth store / credential file so it doesn't get
+    # re-seeded on the next load_pool() call.
+    elif provider == "openai-codex" and (
+        removed.source == "device_code" or removed.source.endswith(":device_code")
+    ):
+        # Codex tokens live in TWO places: the Hermes auth store and
+        # ~/.codex/auth.json (the Codex CLI shared file).  On every refresh,
+        # refresh_codex_oauth_pure() writes to both.  So clearing only the
+        # Hermes auth store is not enough — _seed_from_singletons() will
+        # auto-import from ~/.codex/auth.json on the next load_pool() and
+        # the removal is instantly undone.  Mark the source as suppressed
+        # so auto-import is skipped; leave ~/.codex/auth.json untouched so
+        # the Codex CLI itself keeps working.
+        from hermes_cli.auth import (
+            _load_auth_store, _save_auth_store, _auth_store_lock,
+            suppress_credential_source,
+        )
+        with _auth_store_lock():
+            auth_store = _load_auth_store()
+            providers_dict = auth_store.get("providers")
+            if isinstance(providers_dict, dict) and provider in providers_dict:
+                del providers_dict[provider]
+                _save_auth_store(auth_store)
+                print(f"Cleared {provider} OAuth tokens from auth store")
+        suppress_credential_source(provider, "device_code")
+        print("Suppressed openai-codex device_code source — it will not be re-seeded.")
+        print("Note: Codex CLI credentials still live in ~/.codex/auth.json")
+        print("Run `hermes auth add openai-codex` to re-enable if needed.")

-    result = step.remove_fn(provider, removed)
-    for line in result.cleaned:
-        print(line)
-    if result.suppress:
-        suppress_credential_source(provider, removed.source)
-    for line in result.hints:
-        print(line)
+    elif removed.source == "device_code" and provider == "nous":
+        from hermes_cli.auth import (
+            _load_auth_store, _save_auth_store, _auth_store_lock,
+        )
+        with _auth_store_lock():
+            auth_store = _load_auth_store()
+            providers_dict = auth_store.get("providers")
+            if isinstance(providers_dict, dict) and provider in providers_dict:
+                del providers_dict[provider]
+                _save_auth_store(auth_store)
+                print(f"Cleared {provider} OAuth tokens from auth store")
+
+    elif removed.source == "hermes_pkce" and provider == "anthropic":
+        from hermes_constants import get_hermes_home
+        oauth_file = get_hermes_home() / ".anthropic_oauth.json"
+        if oauth_file.exists():
+            oauth_file.unlink()
+            print("Cleared Hermes Anthropic OAuth credentials")
+
+    elif removed.source == "claude_code" and provider == "anthropic":
+        from hermes_cli.auth import suppress_credential_source
+        suppress_credential_source(provider, "claude_code")
+        print("Suppressed claude_code credential — it will not be re-seeded.")
+        print("Note: Claude Code credentials still live in ~/.claude/.credentials.json")
+        print("Run `hermes auth add anthropic` to re-enable if needed.")


 def auth_reset_command(args) -> None:
@@ -249,7 +249,7 @@ def _scan_workspace_state(source_dir: Path) -> list[tuple[Path, str]]:
            state_path = child / state_name
            if state_path.exists():
                kind = "directory" if state_path.is_dir() else "file"
-                rel = state_path.relative_to(source_dir).as_posix()
+                rel = state_path.relative_to(source_dir)
                findings.append((state_path, f"Workspace {kind}: {rel}"))

    return findings
@@ -276,7 +276,7 @@ def _get_ps_exe() -> str | None:
    global _ps_exe
    if _ps_exe is False:
        _ps_exe = _find_powershell()
-    return _ps_exe if isinstance(_ps_exe, str) else None
+    return _ps_exe


 def _windows_has_image() -> bool:
@@ -387,8 +387,6 @@ def _wayland_save(dest: Path) -> bool:

    except FileNotFoundError:
        logger.debug("wl-paste not installed — Wayland clipboard unavailable")
-    except ImportError:
-        raise
    except Exception as e:
        logger.debug("wl-paste clipboard extraction failed: %s", e)
        dest.unlink(missing_ok=True)
@@ -397,17 +395,14 @@ def _wayland_save(dest: Path) -> bool:

 def _convert_to_png(path: Path) -> bool:
    """Convert an image file to PNG in-place (requires Pillow or ImageMagick)."""
+    # Try Pillow first (likely installed in the venv)
    try:
        from PIL import Image
-    except ImportError:
-        raise ImportError(
-            "Pillow is required for clipboard image conversion. "
-            "Install with: pip install hermes-agent[cli]"
-        ) from None
-    try:
        img = Image.open(path)
        img.save(path, "PNG")
        return True
+    except ImportError:
+        pass
    except Exception as e:
        logger.debug("Pillow BMP→PNG conversion failed: %s", e)

@@ -24,6 +24,7 @@ _FORWARD_COMPAT_TEMPLATE_MODELS: List[tuple[str, tuple[str, ...]]] = [
    ("gpt-5.4-mini", ("gpt-5.3-codex", "gpt-5.2-codex")),
    ("gpt-5.4", ("gpt-5.3-codex", "gpt-5.2-codex")),
    ("gpt-5.3-codex", ("gpt-5.2-codex",)),
+    ("gpt-5.3-codex-spark", ("gpt-5.3-codex", "gpt-5.2-codex")),
 ]


@@ -260,26 +260,6 @@ GATEWAY_KNOWN_COMMANDS: frozenset[str] = frozenset(
 )


-def is_gateway_known_command(name: str | None) -> bool:
-    """Return True if ``name`` resolves to a gateway-dispatchable slash command.
-
-    This covers both built-in commands (``GATEWAY_KNOWN_COMMANDS`` derived
-    from ``COMMAND_REGISTRY``) and plugin-registered commands, which are
-    looked up lazily so importing this module never forces plugin
-    discovery. Gateway code uses this to decide whether to emit
-    ``command:<name>`` hooks — plugin commands get the same lifecycle
-    events as built-ins.
-    """
-    if not name:
-        return False
-    if name in GATEWAY_KNOWN_COMMANDS:
-        return True
-    for plugin_name, _description, _args_hint in _iter_plugin_command_entries():
-        if plugin_name == name:
-            return True
-    return False
-
-
 # Commands with explicit Level-2 running-agent handlers in gateway/run.py.
 # Listed here for introspection / tests; semantically a subset of
 # "all resolvable commands" — which is the real bypass set (see
@@ -391,47 +371,12 @@ def gateway_help_lines() -> list[str]:
    return lines


-def _iter_plugin_command_entries() -> list[tuple[str, str, str]]:
-    """Yield (name, description, args_hint) tuples for all plugin slash commands.
-
-    Plugin commands are registered via
-    :func:`hermes_cli.plugins.PluginContext.register_command`. They behave
-    like ``CommandDef`` entries for gateway surfacing: they appear in the
-    Telegram command menu, in Slack's ``/hermes`` subcommand mapping, and
-    (via :func:`gateway.platforms.discord._register_slash_commands`) in
-    Discord's native slash command picker.
-
-    Lookup is lazy so importing this module never forces plugin discovery
-    (which can trigger filesystem scans and environment-dependent
-    behavior).
-    """
-    try:
-        from hermes_cli.plugins import get_plugin_commands
-    except Exception:
-        return []
-    try:
-        commands = get_plugin_commands() or {}
-    except Exception:
-        return []
-    entries: list[tuple[str, str, str]] = []
-    for name, meta in commands.items():
-        if not isinstance(name, str) or not isinstance(meta, dict):
-            continue
-        description = str(meta.get("description") or f"Run /{name}")
-        args_hint = str(meta.get("args_hint") or "").strip()
-        entries.append((name, description, args_hint))
-    return entries
-
-
 def telegram_bot_commands() -> list[tuple[str, str]]:
    """Return (command_name, description) pairs for Telegram setMyCommands.

    Telegram command names cannot contain hyphens, so they are replaced with
    underscores.  Aliases are skipped -- Telegram shows one menu entry per
    canonical command.
-
-    Plugin-registered slash commands are included so plugins get native
-    autocomplete in Telegram without touching core code.
    """
    overrides = _resolve_config_gates()
    result: list[tuple[str, str]] = []
@@ -441,10 +386,6 @@ def telegram_bot_commands() -> list[tuple[str, str]]:
        tg_name = _sanitize_telegram_name(cmd.name)
        if tg_name:
            result.append((tg_name, cmd.description))
-    for name, description, _args_hint in _iter_plugin_command_entries():
-        tg_name = _sanitize_telegram_name(name)
-        if tg_name:
-            result.append((tg_name, description))
    return result


@@ -556,8 +497,9 @@ def _collect_gateway_skill_entries(
    # --- Tier 1: Plugin slash commands (never trimmed) ---------------------
    plugin_pairs: list[tuple[str, str]] = []
    try:
-        from hermes_cli.plugins import get_plugin_commands
-        plugin_cmds = get_plugin_commands()
+        from hermes_cli.plugins import get_plugin_manager
+        pm = get_plugin_manager()
+        plugin_cmds = getattr(pm, "_plugin_commands", {})
        for cmd_name in sorted(plugin_cmds):
            name = sanitize_name(cmd_name) if sanitize_name else cmd_name
            if not name:
@@ -809,9 +751,6 @@ def slack_subcommand_map() -> dict[str, str]:

    Maps both canonical names and aliases so /hermes bg do stuff works
    the same as /hermes background do stuff.
-
-    Plugin-registered slash commands are included so ``/hermes <plugin-cmd>``
-    routes through the plugin handler.
    """
    overrides = _resolve_config_gates()
    mapping: dict[str, str] = {}
@@ -821,9 +760,6 @@ def slack_subcommand_map() -> dict[str, str]:
        mapping[cmd.name] = f"/{cmd.name}"
        for alias in cmd.aliases:
            mapping[alias] = f"/{alias}"
-    for name, _description, _args_hint in _iter_plugin_command_entries():
-        if name not in mapping:
-            mapping[name] = f"/{name}"
    return mapping


@@ -989,22 +925,12 @@ class SlashCommandCompleter(Completer):
                    display_meta=meta,
                )

-        # If the user typed @file: / @folder: (or just @file / @folder with
-        # no colon yet), delegate to path completions.  Accepting the bare
-        # form lets the picker surface directories as soon as the user has
-        # typed `@folder`, without requiring them to first accept the static
-        # `@folder:` hint and re-trigger completion.
+        # If the user typed @file: or @folder:, delegate to path completions
        for prefix in ("@file:", "@folder:"):
-            bare = prefix[:-1]
-
-            if word == bare or word.startswith(prefix):
-                want_dir = prefix == "@folder:"
-                path_part = '' if word == bare else word[len(prefix):]
+            if word.startswith(prefix):
+                path_part = word[len(prefix):] or "."
                expanded = os.path.expanduser(path_part)
-
-                if not expanded or expanded == ".":
-                    search_dir, match_prefix = ".", ""
-                elif expanded.endswith("/"):
+                if expanded.endswith("/"):
                    search_dir, match_prefix = expanded, ""
                else:
                    search_dir = os.path.dirname(expanded) or "."
@@ -1020,21 +946,15 @@ class SlashCommandCompleter(Completer):
                for entry in sorted(entries):
                    if match_prefix and not entry.lower().startswith(prefix_lower):
                        continue
-                    full_path = os.path.join(search_dir, entry)
-                    is_dir = os.path.isdir(full_path)
-                    # `@folder:` must only surface directories; `@file:` only
-                    # regular files.  Without this filter `@folder:` listed
-                    # every .env / .gitignore in the cwd, defeating the
-                    # explicit prefix and confusing users expecting a
-                    # directory picker.
-                    if want_dir != is_dir:
-                        continue
                    if count >= limit:
                        break
+                    full_path = os.path.join(search_dir, entry)
+                    is_dir = os.path.isdir(full_path)
                    display_path = os.path.relpath(full_path)
                    suffix = "/" if is_dir else ""
+                    kind = "folder" if is_dir else "file"
                    meta = "dir" if is_dir else _file_size_label(full_path)
-                    completion = f"{prefix}{display_path}{suffix}"
+                    completion = f"@{kind}:{display_path}{suffix}"
                    yield Completion(
                        completion,
                        start_position=-len(word),
@@ -13,7 +13,6 @@ This module provides:
 """

 import copy
-import logging
 import os
 import platform
 import re
@@ -25,7 +24,6 @@ from dataclasses import dataclass
 from pathlib import Path
 from typing import Dict, Any, Optional, List, Tuple

-logger = logging.getLogger(__name__)

 _IS_WINDOWS = platform.system() == "Windows"
 _ENV_VAR_NAME_RE = re.compile(r"^[A-Za-z_][A-Za-z0-9_]*$")
@@ -387,26 +385,6 @@ DEFAULT_CONFIG = {
        # (terminal and execute_code).  Skill-declared required_environment_variables
        # are passed through automatically; this list is for non-skill use cases.
        "env_passthrough": [],
-        # Extra files to source in the login shell when building the
-        # per-session environment snapshot.  Use this when tools like nvm,
-        # pyenv, asdf, or custom PATH entries are registered by files that
-        # a bash login shell would skip — most commonly ``~/.bashrc``
-        # (bash doesn't source bashrc in non-interactive login mode) or
-        # zsh-specific files like ``~/.zshrc`` / ``~/.zprofile``.
-        # Paths support ``~`` / ``${VAR}``. Missing files are silently
-        # skipped. When empty, Hermes auto-appends ``~/.bashrc`` if the
-        # snapshot shell is bash (this is the ``auto_source_bashrc``
-        # behaviour — disable with that key if you want strict login-only
-        # semantics).
-        "shell_init_files": [],
-        # When true (default), Hermes sources ``~/.bashrc`` in the login
-        # shell used to build the environment snapshot.  This captures
-        # PATH additions, shell functions, and aliases defined in the
-        # user's bashrc — which a plain ``bash -l -c`` would otherwise
-        # miss because bash skips bashrc in non-interactive login mode.
-        # Turn this off if you have a bashrc that misbehaves when sourced
-        # non-interactively (e.g. one that hard-exits on TTY checks).
-        "auto_source_bashrc": True,
        "docker_image": "nikolaik/python-nodejs:python3.11-nodejs20",
        "docker_forward_env": [],
        # Explicit environment variables to set inside Docker containers.
@@ -613,10 +591,6 @@ DEFAULT_CONFIG = {
    },
    
    # Text-to-speech configuration
-    # Each provider supports an optional `max_text_length:` override for the
-    # per-request input-character cap. Omit it to use the provider's documented
-    # limit (OpenAI 4096, xAI 15000, MiniMax 10000, ElevenLabs 5k-40k model-aware,
-    # Gemini 5000, Edge 5000, Mistral 4000, NeuTTS/KittenTTS 2000).
    "tts": {
        "provider": "edge",  # "edge" (free) | "elevenlabs" (premium) | "openai" | "xai" | "minimax" | "mistral" | "neutts" (local)
        "edge": {
@@ -669,7 +643,6 @@ DEFAULT_CONFIG = {
        "record_key": "ctrl+b",
        "max_recording_seconds": 120,
        "auto_tts": False,
-        "beep_enabled": True,         # Play record start/stop beeps in CLI voice mode
        "silence_threshold": 200,     # RMS below this = silence (0-32767)
        "silence_duration": 3.0,      # Seconds of silence before auto-stop
    },
@@ -712,22 +685,10 @@ DEFAULT_CONFIG = {
        "provider": "",    # e.g. "openrouter" (empty = inherit parent provider + credentials)
        "base_url": "",    # direct OpenAI-compatible endpoint for subagents
        "api_key": "",     # API key for delegation.base_url (falls back to OPENAI_API_KEY)
-        # When delegate_task narrows child toolsets explicitly, preserve any
-        # MCP toolsets the parent already has enabled. On by default so
-        # narrowing (e.g. toolsets=["web","browser"]) expresses "I want these
-        # extras" without silently stripping MCP tools the parent already has.
-        # Set to false for strict intersection.
-        "inherit_mcp_toolsets": True,
        "max_iterations": 50,  # per-subagent iteration cap (each subagent gets its own budget,
                               # independent of the parent's max_iterations)
        "reasoning_effort": "",  # reasoning effort for subagents: "xhigh", "high", "medium",
                                 # "low", "minimal", "none" (empty = inherit parent's level)
-        "max_concurrent_children": 3,  # max parallel children per batch; floor of 1 enforced, no ceiling
-        # Orchestrator role controls (see tools/delegate_tool.py:_get_max_spawn_depth
-        # and _get_orchestrator_enabled).  Values are clamped to [1, 3] with a
-        # warning log if out of range.
-        "max_spawn_depth": 1,        # depth cap (1 = flat [default], 2 = orchestrator→leaf, 3 = three-level)
-        "orchestrator_enabled": True,  # kill switch for role="orchestrator"
    },

    # Ephemeral prefill messages file — JSON list of {role, content} dicts
@@ -740,20 +701,6 @@ DEFAULT_CONFIG = {
    # always goes to ~/.hermes/skills/.
    "skills": {
        "external_dirs": [],   # e.g. ["~/.agents/skills", "/shared/team-skills"]
-        # Substitute ${HERMES_SKILL_DIR} and ${HERMES_SESSION_ID} in SKILL.md
-        # content with the absolute skill directory and the active session id
-        # before the agent sees it.  Lets skill authors reference bundled
-        # scripts without the agent having to join paths.
-        "template_vars": True,
-        # Pre-execute inline shell snippets written as !`cmd` in SKILL.md
-        # body.  Their stdout is inlined into the skill message before the
-        # agent reads it, so skills can inject dynamic context (dates, git
-        # state, detected tool versions, …).  Off by default because any
-        # content from the skill author runs on the host without approval;
-        # only enable for skill sources you trust.
-        "inline_shell": False,
-        # Timeout (seconds) for each !`cmd` snippet when inline_shell is on.
-        "inline_shell_timeout": 10,
    },

    # Honcho AI-native memory -- reads ~/.honcho/config.json as single source of truth.
@@ -824,21 +771,6 @@ DEFAULT_CONFIG = {
    "command_allowlist": [],
    # User-defined quick commands that bypass the agent loop (type: exec only)
    "quick_commands": {},
-
-    # Shell-script hooks — declarative bridge that invokes shell scripts
-    # on plugin-hook events (pre_tool_call, post_tool_call, pre_llm_call,
-    # subagent_stop, etc.).  Each entry maps an event name to a list of
-    # {matcher, command, timeout} dicts.  First registration of a new
-    # command prompts the user for consent; subsequent runs reuse the
-    # stored approval from ~/.hermes/shell-hooks-allowlist.json.
-    # See `website/docs/user-guide/features/hooks.md` for schema + examples.
-    "hooks": {},
-
-    # Auto-accept shell-hook registrations without a TTY prompt.  Also
-    # toggleable per-invocation via --accept-hooks or HERMES_ACCEPT_HOOKS=1.
-    # Gateway / cron / non-interactive runs need this (or one of the other
-    # channels) to pick up newly-added hooks.
-    "hooks_auto_accept": False,
    # Custom personalities — add your own entries here
    # Supports string format: {"name": "system prompt"}
    # Or dict format: {"name": {"description": "...", "system_prompt": "...", "tone": "...", "style": "..."}}
@@ -846,7 +778,6 @@ DEFAULT_CONFIG = {

    # Pre-exec security scanning via tirith
    "security": {
-        "allow_private_urls": False,  # Allow requests to private/internal IPs (for OpenWrt, proxies, VPNs)
        "redact_secrets": True,
        "tirith_enabled": True,
        "tirith_path": "tirith",
@@ -863,11 +794,6 @@ DEFAULT_CONFIG = {
        # Wrap delivered cron responses with a header (task name) and footer
        # ("The agent cannot see this message").  Set to false for clean output.
        "wrap_response": True,
-        # Maximum number of due jobs to run in parallel per tick.
-        # null/0 = unbounded (limited only by thread count).
-        # 1 = serial (pre-v0.9 behaviour).
-        # Also overridable via HERMES_CRON_MAX_PARALLEL env var.
-        "max_parallel_jobs": None,
    },

    # execute_code settings — controls the tool used for programmatic tool calls.
@@ -900,36 +826,8 @@ DEFAULT_CONFIG = {
        "force_ipv4": False,
    },

-    # Session storage — controls automatic cleanup of ~/.hermes/state.db.
-    # state.db accumulates every session, message, tool call, and FTS5 index
-    # entry forever.  Without auto-pruning, a heavy user (gateway + cron)
-    # reports 384MB+ databases with 68K+ messages, which slows down FTS5
-    # inserts, /resume listing, and insights queries.
-    "sessions": {
-        # When true, prune ended sessions older than retention_days once
-        # per (roughly) min_interval_hours at CLI/gateway/cron startup.
-        # Only touches ended sessions — active sessions are always preserved.
-        # Default false: session history is valuable for search recall, and
-        # silently deleting it could surprise users.  Opt in explicitly.
-        "auto_prune": False,
-        # How many days of ended-session history to keep.  Matches the
-        # default of ``hermes sessions prune``.
-        "retention_days": 90,
-        # VACUUM after a prune that actually deleted rows.  SQLite does not
-        # reclaim disk space on DELETE — freed pages are just reused on
-        # subsequent INSERTs — so without VACUUM the file stays bloated
-        # even after pruning.  VACUUM blocks writes for a few seconds per
-        # 100MB, so it only runs at startup, and only when prune deleted
-        # ≥1 session.
-        "vacuum_after_prune": True,
-        # Minimum hours between auto-maintenance runs (avoids repeating
-        # the sweep on every CLI invocation).  Tracked via state_meta in
-        # state.db itself, so it's shared across all processes.
-        "min_interval_hours": 24,
-    },
-
    # Config schema version - bump this when adding new required fields
-    "_config_version": 22,
+    "_config_version": 20,
 }

 # =============================================================================
@@ -1085,22 +983,6 @@ OPTIONAL_ENV_VARS = {
        "category": "provider",
        "advanced": True,
    },
-    "STEPFUN_API_KEY": {
-        "description": "StepFun Step Plan API key",
-        "prompt": "StepFun Step Plan API key",
-        "url": "https://platform.stepfun.com/",
-        "password": True,
-        "category": "provider",
-        "advanced": True,
-    },
-    "STEPFUN_BASE_URL": {
-        "description": "StepFun Step Plan base URL override",
-        "prompt": "StepFun Step Plan base URL (leave empty for default)",
-        "url": None,
-        "password": False,
-        "category": "provider",
-        "advanced": True,
-    },
    "ARCEEAI_API_KEY": {
        "description": "Arcee AI API key",
        "prompt": "Arcee AI API key",
@@ -1904,7 +1786,7 @@ def get_missing_config_fields() -> List[Dict[str, Any]]:
    config = load_config()
    missing = []

-    def _check(defaults: Dict[str, Any], current: Dict[str, Any], prefix: str = ""):
+    def _check(defaults: dict, current: dict, prefix: str = ""):
        for key, default_value in defaults.items():
            if key.startswith('_'):
                continue
@@ -1968,53 +1850,12 @@ def _normalize_custom_provider_entry(
    if not isinstance(entry, dict):
        return None

-    # Accept camelCase aliases commonly used in hand-written configs.
-    _CAMEL_ALIASES: Dict[str, str] = {
-        "apiKey": "api_key",
-        "baseUrl": "base_url",
-        "apiMode": "api_mode",
-        "keyEnv": "key_env",
-        "defaultModel": "default_model",
-        "contextLength": "context_length",
-        "rateLimitDelay": "rate_limit_delay",
-    }
-    _KNOWN_KEYS = {
-        "name", "api", "url", "base_url", "api_key", "key_env",
-        "api_mode", "transport", "model", "default_model", "models",
-        "context_length", "rate_limit_delay",
-    }
-    for camel, snake in _CAMEL_ALIASES.items():
-        if camel in entry and snake not in entry:
-            logger.warning(
-                "providers.%s: camelCase key '%s' auto-mapped to '%s' "
-                "(use snake_case to avoid this warning)",
-                provider_key or "?", camel, snake,
-            )
-            entry[snake] = entry[camel]
-    unknown = set(entry.keys()) - _KNOWN_KEYS - set(_CAMEL_ALIASES.keys())
-    if unknown:
-        logger.warning(
-            "providers.%s: unknown config keys ignored: %s",
-            provider_key or "?", ", ".join(sorted(unknown)),
-        )
-
-    from urllib.parse import urlparse
-
    base_url = ""
-    for url_key in ("base_url", "url", "api"):
+    for url_key in ("api", "url", "base_url"):
        raw_url = entry.get(url_key)
        if isinstance(raw_url, str) and raw_url.strip():
-            candidate = raw_url.strip()
-            parsed = urlparse(candidate)
-            if parsed.scheme and parsed.netloc:
-                base_url = candidate
-                break
-            else:
-                logger.warning(
-                    "providers.%s: '%s' value '%s' is not a valid URL "
-                    "(no scheme or host) — skipped",
-                    provider_key or "?", url_key, candidate,
-                )
+            base_url = raw_url.strip()
+            break
    if not base_url:
        return None

@@ -2055,14 +1896,6 @@ def _normalize_custom_provider_entry(
    models = entry.get("models")
    if isinstance(models, dict) and models:
        normalized["models"] = models
-    elif isinstance(models, list) and models:
-        # Hand-edited configs (and older Hermes versions) write ``models`` as
-        # a plain list of model ids. Preserve them by converting to the dict
-        # shape downstream code expects; otherwise normalize silently drops
-        # the list and /model shows the provider with (0) models.
-        normalized["models"] = {
-            str(m): {} for m in models if isinstance(m, str) and m.strip()
-        }

    context_length = entry.get("context_length")
    if isinstance(context_length, int) and context_length > 0:
@@ -2146,8 +1979,8 @@ def check_config_version() -> Tuple[int, int]:
    Returns (current_version, latest_version).
    """
    config = load_config()
-    current = int(config.get("_config_version", 0))
-    latest = int(DEFAULT_CONFIG.get("_config_version", 1))
+    current = config.get("_config_version", 0)
+    latest = DEFAULT_CONFIG.get("_config_version", 1)
    return current, latest


@@ -2161,7 +1994,6 @@ _KNOWN_ROOT_KEYS = {
    "fallback_providers", "credential_pool_strategies", "toolsets",
    "agent", "terminal", "display", "compression", "delegation",
    "auxiliary", "custom_providers", "context", "memory", "gateway",
-    "sessions",
 }

 # Valid fields inside a custom_providers list entry
@@ -2319,6 +2151,7 @@ def print_config_warnings(config: Optional[Dict[str, Any]] = None) -> None:
    if not issues:
        return

+    import sys
    lines = ["\033[33m⚠ Config issues detected in config.yaml:\033[0m"]
    for ci in issues:
        marker = "\033[31m✗\033[0m" if ci.severity == "error" else "\033[33m⚠\033[0m"
@@ -2333,6 +2166,7 @@ def warn_deprecated_cwd_env_vars(config: Optional[Dict[str, Any]] = None) -> Non
    These env vars are deprecated — the canonical setting is terminal.cwd
    in config.yaml.  Prints a migration hint to stderr.
    """
+    import os, sys
    messaging_cwd = os.environ.get("MESSAGING_CWD")
    terminal_cwd_env = os.environ.get("TERMINAL_CWD")

@@ -2650,71 +2484,6 @@ def migrate_config(interactive: bool = True, quiet: bool = False) -> Dict[str, A
                    else:
                        print("  ✓ Removed unused compression.summary_* keys")

-    # ── Version 20 → 21: plugins are now opt-in; grandfather existing user plugins ──
-    # The loader now requires plugins to appear in ``plugins.enabled`` before
-    # loading. Existing installs had all discovered plugins loading by default
-    # (minus anything in ``plugins.disabled``). To avoid silently breaking
-    # those setups on upgrade, populate ``plugins.enabled`` with the set of
-    # currently-installed user plugins that aren't already disabled.
-    #
-    # Bundled plugins (shipped in the repo itself) are NOT grandfathered —
-    # they ship off for everyone, including existing users, so any user who
-    # wants one has to opt in explicitly.
-    if current_ver < 21:
-        config = read_raw_config()
-        plugins_cfg = config.get("plugins")
-        if not isinstance(plugins_cfg, dict):
-            plugins_cfg = {}
-        # Only migrate if the enabled allow-list hasn't been set yet.
-        if "enabled" not in plugins_cfg:
-            disabled = plugins_cfg.get("disabled", []) or []
-            if not isinstance(disabled, list):
-                disabled = []
-            disabled_set = set(disabled)
-
-            # Scan ``$HERMES_HOME/plugins/`` for currently installed user plugins.
-            grandfathered: List[str] = []
-            try:
-                user_plugins_dir = get_hermes_home() / "plugins"
-                if user_plugins_dir.is_dir():
-                    for child in sorted(user_plugins_dir.iterdir()):
-                        if not child.is_dir():
-                            continue
-                        manifest_file = child / "plugin.yaml"
-                        if not manifest_file.exists():
-                            manifest_file = child / "plugin.yml"
-                        if not manifest_file.exists():
-                            continue
-                        try:
-                            with open(manifest_file) as _mf:
-                                manifest = yaml.safe_load(_mf) or {}
-                        except Exception:
-                            manifest = {}
-                        name = manifest.get("name") or child.name
-                        if name in disabled_set:
-                            continue
-                        grandfathered.append(name)
-            except Exception:
-                grandfathered = []
-
-            plugins_cfg["enabled"] = grandfathered
-            config["plugins"] = plugins_cfg
-            save_config(config)
-            results["config_added"].append(
-                f"plugins.enabled (opt-in allow-list, {len(grandfathered)} grandfathered)"
-            )
-            if not quiet:
-                if grandfathered:
-                    print(
-                        f"  ✓ Plugins now opt-in: grandfathered "
-                        f"{len(grandfathered)} existing plugin(s) into plugins.enabled"
-                    )
-                else:
-                    print(
-                        "  ✓ Plugins now opt-in: no existing plugins to grandfather. "
-                        "Use `hermes plugins enable <name>` to activate."
-                    )
-
    if current_ver < latest_ver and not quiet:
        print(f"Config version: {current_ver} → {latest_ver}")
    
@@ -2867,7 +2636,7 @@ def migrate_config(interactive: bool = True, quiet: bool = False) -> Dict[str, A
    return results


-def _deep_merge(base: Dict[str, Any], override: Dict[str, Any]) -> Dict[str, Any]:
+def _deep_merge(base: dict, override: dict) -> dict:
    """Recursively merge *override* into *base*, preserving nested defaults.

    Keys in *override* take precedence. If both values are dicts the merge
@@ -3177,7 +2946,7 @@ def save_config(config: Dict[str, Any]):
    if not sec or sec.get("redact_secrets") is None:
        parts.append(_SECURITY_COMMENT)
    fb = normalized.get("fallback_model", {})
-    if not fb or not isinstance(fb, dict) or not (fb.get("provider") and fb.get("model")):
+    if not fb or not (fb.get("provider") and fb.get("model")):
        parts.append(_FALLBACK_COMMENT)

    atomic_yaml_write(
@@ -3340,6 +3109,7 @@ def _check_non_ascii_credential(key: str, value: str) -> str:
            bad_chars.append(f"  position {i}: {ch!r} (U+{ord(ch):04X})")
    sanitized = value.encode("ascii", errors="ignore").decode("ascii")

+    import sys
    print(
        f"\n  Warning: {key} contains non-ASCII characters that will break API requests.\n"
        f"  This usually happens when copy-pasting from a PDF, rich-text editor,\n"
@@ -13,7 +13,6 @@ import time
 import urllib.error
 import urllib.parse
 import urllib.request
-from dataclasses import dataclass
 from pathlib import Path
 from typing import Optional

@@ -148,14 +147,6 @@ def _sweep_expired_pastes(now: Optional[float] = None) -> tuple[int, int]:
    return (deleted, len(remaining))


-def _best_effort_sweep_expired_pastes() -> None:
-    """Attempt pending-paste cleanup without letting /debug fail offline."""
-    try:
-        _sweep_expired_pastes()
-    except Exception:
-        pass
-
-
 # ---------------------------------------------------------------------------
 # Privacy / delete helpers
 # ---------------------------------------------------------------------------
@@ -323,128 +314,72 @@ def upload_to_pastebin(content: str, expiry_days: int = 7) -> str:
 # Log file reading
 # ---------------------------------------------------------------------------

-
-@dataclass
-class LogSnapshot:
-    """Single-read snapshot of a log file used by debug-share."""
-
-    path: Optional[Path]
-    tail_text: str
-    full_text: Optional[str]
-
-
-def _primary_log_path(log_name: str) -> Optional[Path]:
-    """Where *log_name* would live if present. Doesn't check existence."""
-    from hermes_cli.logs import LOG_FILES
-
-    filename = LOG_FILES.get(log_name)
-    return (get_hermes_home() / "logs" / filename) if filename else None
-
-
 def _resolve_log_path(log_name: str) -> Optional[Path]:
    """Find the log file for *log_name*, falling back to the .1 rotation.

-    Returns the first non-empty candidate (primary, then .1), or None.
-    Callers distinguish 'empty primary' from 'truly missing' via
-    :func:`_primary_log_path`.
+    Returns the path if found, or None.
    """
-    primary = _primary_log_path(log_name)
-    if primary is None:
+    from hermes_cli.logs import LOG_FILES
+
+    filename = LOG_FILES.get(log_name)
+    if not filename:
        return None

+    log_dir = get_hermes_home() / "logs"
+    primary = log_dir / filename
    if primary.exists() and primary.stat().st_size > 0:
        return primary

-    rotated = primary.parent / f"{primary.name}.1"
+    # Fall back to the most recent rotated file (.1).
+    rotated = log_dir / f"{filename}.1"
    if rotated.exists() and rotated.stat().st_size > 0:
        return rotated

    return None


-def _capture_log_snapshot(
-    log_name: str,
-    *,
-    tail_lines: int,
-    max_bytes: int = _MAX_LOG_BYTES,
-) -> LogSnapshot:
-    """Capture a log once and derive summary/full-log views from it.
+def _read_log_tail(log_name: str, num_lines: int) -> str:
+    """Read the last *num_lines* from a log file, or return a placeholder."""
+    from hermes_cli.logs import _read_last_n_lines

-    The report tail and standalone log upload must come from the same file
-    snapshot. Otherwise a rotation/truncate between reads can make the report
-    look newer than the uploaded ``agent.log`` paste.
+    log_path = _resolve_log_path(log_name)
+    if log_path is None:
+        return "(file not found)"
+
+    try:
+        lines = _read_last_n_lines(log_path, num_lines)
+        return "".join(lines).rstrip("\n")
+    except Exception as exc:
+        return f"(error reading: {exc})"
+
+
+def _read_full_log(log_name: str, max_bytes: int = _MAX_LOG_BYTES) -> Optional[str]:
+    """Read a log file for standalone upload.
+
+    Returns the file content (last *max_bytes* if truncated), or None if the
+    file doesn't exist or is empty.
    """
    log_path = _resolve_log_path(log_name)
    if log_path is None:
-        primary = _primary_log_path(log_name)
-        tail = "(file empty)" if primary and primary.exists() else "(file not found)"
-        return LogSnapshot(path=None, tail_text=tail, full_text=None)
+        return None

    try:
        size = log_path.stat().st_size
        if size == 0:
-            # race: file was truncated between _resolve_log_path and stat
-            return LogSnapshot(path=log_path, tail_text="(file empty)", full_text=None)
+            return None

+        if size <= max_bytes:
+            return log_path.read_text(encoding="utf-8", errors="replace")
+
+        # File is larger than max_bytes — read the tail.
        with open(log_path, "rb") as f:
-            if size <= max_bytes:
-                raw = f.read()
-                truncated = False
-            else:
-                # Read from the end until we have enough bytes for the
-                # standalone upload and enough newline context to render the
-                # summary tail from the same snapshot.
-                chunk_size = 8192
-                pos = size
-                chunks: list[bytes] = []
-                total = 0
-                newline_count = 0
-
-                while pos > 0 and (total < max_bytes or newline_count <= tail_lines + 1) and total < max_bytes * 2:
-                    read_size = min(chunk_size, pos)
-                    pos -= read_size
-                    f.seek(pos)
-                    chunk = f.read(read_size)
-                    chunks.insert(0, chunk)
-                    total += len(chunk)
-                    newline_count += chunk.count(b"\n")
-                    chunk_size = min(chunk_size * 2, 65536)
-
-                raw = b"".join(chunks)
-                truncated = pos > 0
-
-        full_raw = raw
-        if truncated and len(full_raw) > max_bytes:
-            cut = len(full_raw) - max_bytes
-            # Check whether the cut lands exactly on a line boundary.  If the
-            # byte just before the cut position is a newline the first retained
-            # byte starts a complete line and we should keep it.  Only drop a
-            # partial first line when we're genuinely mid-line.
-            on_boundary = cut > 0 and full_raw[cut - 1 : cut] == b"\n"
-            full_raw = full_raw[cut:]
-            if not on_boundary and b"\n" in full_raw:
-                full_raw = full_raw.split(b"\n", 1)[1]
-
-        all_text = raw.decode("utf-8", errors="replace")
-        tail_text = "".join(all_text.splitlines(keepends=True)[-tail_lines:]).rstrip("\n")
-
-        full_text = full_raw.decode("utf-8", errors="replace")
-        if truncated:
-            full_text = f"[... truncated — showing last ~{max_bytes // 1024}KB ...]\n{full_text}"
-
-        return LogSnapshot(path=log_path, tail_text=tail_text, full_text=full_text)
-    except Exception as exc:
-        return LogSnapshot(path=log_path, tail_text=f"(error reading: {exc})", full_text=None)
-
-
-def _capture_default_log_snapshots(log_lines: int) -> dict[str, LogSnapshot]:
-    """Capture all logs used by debug-share exactly once."""
-    errors_lines = min(log_lines, 100)
-    return {
-        "agent": _capture_log_snapshot("agent", tail_lines=log_lines),
-        "errors": _capture_log_snapshot("errors", tail_lines=errors_lines),
-        "gateway": _capture_log_snapshot("gateway", tail_lines=errors_lines),
-    }
+            f.seek(size - max_bytes)
+            # Skip partial line at the seek point.
+            f.readline()
+            content = f.read().decode("utf-8", errors="replace")
+        return f"[... truncated — showing last ~{max_bytes // 1024}KB ...]\n{content}"
+    except Exception:
+        return None


 # ---------------------------------------------------------------------------
@@ -470,12 +405,7 @@ def _capture_dump() -> str:
    return capture.getvalue()


-def collect_debug_report(
-    *,
-    log_lines: int = 200,
-    dump_text: str = "",
-    log_snapshots: Optional[dict[str, LogSnapshot]] = None,
-) -> str:
+def collect_debug_report(*, log_lines: int = 200, dump_text: str = "") -> str:
    """Build the summary debug report: system dump + log tails.

    Parameters
@@ -494,22 +424,19 @@ def collect_debug_report(
        dump_text = _capture_dump()
    buf.write(dump_text)

-    if log_snapshots is None:
-        log_snapshots = _capture_default_log_snapshots(log_lines)
-
    # ── Recent log tails (summary only) ──────────────────────────────────
    buf.write("\n\n")
    buf.write(f"--- agent.log (last {log_lines} lines) ---\n")
-    buf.write(log_snapshots["agent"].tail_text)
+    buf.write(_read_log_tail("agent", log_lines))
    buf.write("\n\n")

    errors_lines = min(log_lines, 100)
    buf.write(f"--- errors.log (last {errors_lines} lines) ---\n")
-    buf.write(log_snapshots["errors"].tail_text)
+    buf.write(_read_log_tail("errors", errors_lines))
    buf.write("\n\n")

    buf.write(f"--- gateway.log (last {errors_lines} lines) ---\n")
-    buf.write(log_snapshots["gateway"].tail_text)
+    buf.write(_read_log_tail("gateway", errors_lines))
    buf.write("\n")

    return buf.getvalue()
@@ -521,8 +448,6 @@ def collect_debug_report(

 def run_debug_share(args):
    """Collect debug report + full logs, upload each, print URLs."""
-    _best_effort_sweep_expired_pastes()
-
    log_lines = getattr(args, "lines", 200)
    expiry = getattr(args, "expire", 7)
    local_only = getattr(args, "local", False)
@@ -534,15 +459,10 @@ def run_debug_share(args):

    # Capture dump once — prepended to every paste for context.
    dump_text = _capture_dump()
-    log_snapshots = _capture_default_log_snapshots(log_lines)

-    report = collect_debug_report(
-        log_lines=log_lines,
-        dump_text=dump_text,
-        log_snapshots=log_snapshots,
-    )
-    agent_log = log_snapshots["agent"].full_text
-    gateway_log = log_snapshots["gateway"].full_text
+    report = collect_debug_report(log_lines=log_lines, dump_text=dump_text)
+    agent_log = _read_full_log("agent")
+    gateway_log = _read_full_log("gateway")

    # Prepend dump header to each full log so every paste is self-contained.
    if agent_log:
@@ -18,7 +18,7 @@ import os
 import sys
 import time
 import logging
-from typing import Any, Callable, Optional, Tuple
+from typing import Optional, Tuple

 import requests

@@ -108,7 +108,7 @@ def wait_for_registration_success(
    device_code: str,
    interval: int = 3,
    expires_in: int = 7200,
-    on_waiting: Optional[Callable[..., Any]] = None,
+    on_waiting: Optional[callable] = None,
 ) -> Tuple[str, str]:
    """Block until the registration succeeds or times out.

@@ -30,7 +30,6 @@ load_dotenv(PROJECT_ROOT / ".env", override=False, encoding="utf-8")

 from hermes_cli.colors import Colors, color
 from hermes_constants import OPENROUTER_MODELS_URL
-from utils import base_url_host_matches


 _PROVIDER_ENV_HINTS = (
@@ -912,7 +911,6 @@ def run_doctor(args):
    _apikey_providers = [
        ("Z.AI / GLM",      ("GLM_API_KEY", "ZAI_API_KEY", "Z_AI_API_KEY"), "https://api.z.ai/api/paas/v4/models", "GLM_BASE_URL", True),
        ("Kimi / Moonshot",  ("KIMI_API_KEY",),                              "https://api.moonshot.ai/v1/models",   "KIMI_BASE_URL", True),
-        ("StepFun Step Plan",   ("STEPFUN_API_KEY",),                           "https://api.stepfun.ai/step_plan/v1/models", "STEPFUN_BASE_URL", True),
        ("Kimi / Moonshot (China)", ("KIMI_CN_API_KEY",),                    "https://api.moonshot.cn/v1/models",   None, True),
        ("Arcee AI",         ("ARCEEAI_API_KEY",),                            "https://api.arcee.ai/api/v1/models",  "ARCEE_BASE_URL", True),
        ("DeepSeek",         ("DEEPSEEK_API_KEY",),                           "https://api.deepseek.com/v1/models",  "DEEPSEEK_BASE_URL", True),
@@ -944,22 +942,18 @@ def run_doctor(args):
            try:
                import httpx
                _base = os.getenv(_base_env, "") if _base_env else ""
-                # Auto-detect Kimi Code keys (sk-kimi-) → api.kimi.com/coding/v1
-                # (OpenAI-compat surface, which exposes /models for health check).
+                # Auto-detect Kimi Code keys (sk-kimi-) → api.kimi.com
                if not _base and _key.startswith("sk-kimi-"):
                    _base = "https://api.kimi.com/coding/v1"
-                # Anthropic-compat endpoints (/anthropic, api.kimi.com/coding
-                # with no /v1) don't support /models.  Rewrite to the OpenAI-compat
-                # /v1 surface for health checks.
+                # Anthropic-compat endpoints (/anthropic) don't support /models.
+                # Rewrite to the OpenAI-compat /v1 surface for health checks.
                if _base and _base.rstrip("/").endswith("/anthropic"):
                    from agent.auxiliary_client import _to_openai_base_url
                    _base = _to_openai_base_url(_base)
-                if base_url_host_matches(_base, "api.kimi.com") and _base.rstrip("/").endswith("/coding"):
-                    _base = _base.rstrip("/") + "/v1"
                _url = (_base.rstrip("/") + "/models") if _base else _default_url
                _headers = {"Authorization": f"Bearer {_key}"}
-                if base_url_host_matches(_base, "api.kimi.com"):
-                    _headers["User-Agent"] = "claude-code/0.1.0"
+                if "api.kimi.com" in _url.lower():
+                    _headers["User-Agent"] = "KimiCLI/1.30.0"
                _resp = httpx.get(
                    _url,
                    headers=_headers,
@@ -3,7 +3,6 @@
 from __future__ import annotations

 import os
-import sys
 from pathlib import Path

 from dotenv import load_dotenv
@@ -15,26 +14,6 @@ from dotenv import load_dotenv
 # pure ASCII (they become HTTP header values).
 _CREDENTIAL_SUFFIXES = ("_API_KEY", "_TOKEN", "_SECRET", "_KEY")

-# Names we've already warned about during this process, so repeated
-# load_hermes_dotenv() calls (user env + project env, gateway hot-reload,
-# tests) don't spam the same warning multiple times.
-_WARNED_KEYS: set[str] = set()
-
-
-def _format_offending_chars(value: str, limit: int = 3) -> str:
-    """Return a compact 'U+XXXX ('c'), ...' summary of non-ASCII codepoints."""
-    seen: list[str] = []
-    for ch in value:
-        if ord(ch) > 127:
-            label = f"U+{ord(ch):04X}"
-            if ch.isprintable():
-                label += f" ({ch!r})"
-            if label not in seen:
-                seen.append(label)
-            if len(seen) >= limit:
-                break
-    return ", ".join(seen)
-

 def _sanitize_loaded_credentials() -> None:
    """Strip non-ASCII characters from credential env vars in os.environ.
@@ -42,42 +21,14 @@ def _sanitize_loaded_credentials() -> None:
    Called after dotenv loads so the rest of the codebase never sees
    non-ASCII API keys.  Only touches env vars whose names end with
    known credential suffixes (``_API_KEY``, ``_TOKEN``, etc.).
-
-    Emits a one-line warning to stderr when characters are stripped.
-    Silent stripping would mask copy-paste corruption (Unicode lookalike
-    glyphs from PDFs / rich-text editors, ZWSP from web pages) as opaque
-    provider-side "invalid API key" errors (see #6843).
    """
    for key, value in list(os.environ.items()):
        if not any(key.endswith(suffix) for suffix in _CREDENTIAL_SUFFIXES):
            continue
        try:
            value.encode("ascii")
-            continue
        except UnicodeEncodeError:
-            pass
-        cleaned = value.encode("ascii", errors="ignore").decode("ascii")
-        os.environ[key] = cleaned
-        if key in _WARNED_KEYS:
-            continue
-        _WARNED_KEYS.add(key)
-        stripped = len(value) - len(cleaned)
-        detail = _format_offending_chars(value) or "non-printable"
-        print(
-            f"  Warning: {key} contained {stripped} non-ASCII character"
-            f"{'s' if stripped != 1 else ''} ({detail}) — stripped so the "
-            f"key can be sent as an HTTP header.",
-            file=sys.stderr,
-        )
-        print(
-            "  This usually means the key was copy-pasted from a PDF, "
-            "rich-text editor, or web page that substituted lookalike\n"
-            "  Unicode glyphs for ASCII letters. If authentication fails "
-            "(e.g. \"API key not valid\"), re-copy the key from the\n"
-            "  provider's dashboard and run `hermes setup` (or edit the "
-            ".env file in a plain-text editor).",
-            file=sys.stderr,
-        )
+            os.environ[key] = value.encode("ascii", errors="ignore").decode("ascii")


 def _load_dotenv_with_fallback(path: Path, *, override: bool) -> None:
@@ -160,8 +111,6 @@ def load_hermes_dotenv(
    # Fix corrupted .env files before python-dotenv parses them (#8908).
    if user_env.exists():
        _sanitize_env_file_if_needed(user_env)
-    if project_env_path and project_env_path.exists():
-        _sanitize_env_file_if_needed(project_env_path)

    if user_env.exists():
        _load_dotenv_with_fallback(user_env, override=True)
@@ -333,147 +333,6 @@ def _probe_systemd_service_running(system: bool = False) -> tuple[bool, bool]:
    return selected_system, result.stdout.strip() == "active"


-def _read_systemd_unit_properties(
-    system: bool = False,
-    properties: tuple[str, ...] = (
-        "ActiveState",
-        "SubState",
-        "Result",
-        "ExecMainStatus",
-    ),
-) -> dict[str, str]:
-    """Return selected ``systemctl show`` properties for the gateway unit."""
-    selected_system = _select_systemd_scope(system)
-    try:
-        result = _run_systemctl(
-            [
-                "show",
-                get_service_name(),
-                "--no-pager",
-                "--property",
-                ",".join(properties),
-            ],
-            system=selected_system,
-            capture_output=True,
-            text=True,
-            timeout=10,
-        )
-    except (RuntimeError, subprocess.TimeoutExpired, OSError):
-        return {}
-
-    if result.returncode != 0:
-        return {}
-
-    parsed: dict[str, str] = {}
-    for line in result.stdout.splitlines():
-        if "=" not in line:
-            continue
-        key, value = line.split("=", 1)
-        parsed[key] = value.strip()
-    return parsed
-
-
-def _wait_for_systemd_service_restart(
-    *,
-    system: bool = False,
-    previous_pid: int | None = None,
-    timeout: float = 60.0,
-) -> bool:
-    """Wait for the gateway service to become active after a restart handoff."""
-    import time
-
-    svc = get_service_name()
-    scope_label = _service_scope_label(system).capitalize()
-    deadline = time.time() + timeout
-
-    while time.time() < deadline:
-        props = _read_systemd_unit_properties(system=system)
-        active_state = props.get("ActiveState", "")
-        sub_state = props.get("SubState", "")
-        new_pid = None
-        try:
-            from gateway.status import get_running_pid
-
-            new_pid = get_running_pid()
-        except Exception:
-            new_pid = None
-
-        if active_state == "active":
-            if new_pid and (previous_pid is None or new_pid != previous_pid):
-                print(f"✓ {scope_label} service restarted (PID {new_pid})")
-                return True
-            if previous_pid is None:
-                print(f"✓ {scope_label} service restarted")
-                return True
-
-        if active_state == "activating" and sub_state == "auto-restart":
-            time.sleep(1)
-            continue
-
-        time.sleep(2)
-
-    print(
-        f"⚠ {scope_label} service did not become active within {int(timeout)}s.\n"
-        f"  Check status: {'sudo ' if system else ''}hermes gateway status\n"
-        f"  Check logs:   journalctl {'--user ' if not system else ''}-u {svc} -l --since '2 min ago'"
-    )
-    return False
-
-
-def _recover_pending_systemd_restart(system: bool = False, previous_pid: int | None = None) -> bool:
-    """Recover a planned service restart that is stuck in systemd state."""
-    props = _read_systemd_unit_properties(system=system)
-    if not props:
-        return False
-
-    try:
-        from gateway.status import read_runtime_status
-    except Exception:
-        return False
-
-    runtime_state = read_runtime_status() or {}
-    if not runtime_state.get("restart_requested"):
-        return False
-
-    active_state = props.get("ActiveState", "")
-    sub_state = props.get("SubState", "")
-    exec_main_status = props.get("ExecMainStatus", "")
-    result = props.get("Result", "")
-
-    if active_state == "activating" and sub_state == "auto-restart":
-        print("⏳ Service restart already pending — waiting for systemd relaunch...")
-        return _wait_for_systemd_service_restart(
-            system=system,
-            previous_pid=previous_pid,
-        )
-
-    if active_state == "failed" and (
-        exec_main_status == str(GATEWAY_SERVICE_RESTART_EXIT_CODE)
-        or result == "exit-code"
-    ):
-        svc = get_service_name()
-        scope_label = _service_scope_label(system).capitalize()
-        print(f"↻ Clearing failed state for pending {scope_label.lower()} service restart...")
-        _run_systemctl(
-            ["reset-failed", svc],
-            system=system,
-            check=False,
-            timeout=30,
-        )
-        _run_systemctl(
-            ["start", svc],
-            system=system,
-            check=False,
-            timeout=90,
-        )
-        return _wait_for_systemd_service_restart(
-            system=system,
-            previous_pid=previous_pid,
-        )
-
-    return False
-
-
 def _probe_launchd_service_running() -> bool:
    if not get_launchd_plist_path().exists():
        return False
@@ -611,8 +470,7 @@ def stop_profile_gateway() -> bool:
        except (ProcessLookupError, PermissionError):
            break

-    if get_running_pid() is None:
-        remove_pid_file()
+    remove_pid_file()
    return True


@@ -761,21 +619,6 @@ def get_systemd_unit_path(system: bool = False) -> Path:
    return Path.home() / ".config" / "systemd" / "user" / f"{name}.service"


-class UserSystemdUnavailableError(RuntimeError):
-    """Raised when ``systemctl --user`` cannot reach the user D-Bus session.
-
-    Typically hit on fresh RHEL/Debian SSH sessions where linger is disabled
-    and no user@.service is running, so ``/run/user/$UID/bus`` never exists.
-    Carries a user-facing remediation message in ``args[0]``.
-    """
-
-
-def _user_dbus_socket_path() -> Path:
-    """Return the expected per-user D-Bus socket path (regardless of existence)."""
-    xdg = os.environ.get("XDG_RUNTIME_DIR") or f"/run/user/{os.getuid()}"
-    return Path(xdg) / "bus"
-
-
 def _ensure_user_systemd_env() -> None:
    """Ensure DBUS_SESSION_BUS_ADDRESS and XDG_RUNTIME_DIR are set for systemctl --user.

@@ -798,126 +641,6 @@ def _ensure_user_systemd_env() -> None:
            os.environ["DBUS_SESSION_BUS_ADDRESS"] = f"unix:path={bus_path}"


-def _wait_for_user_dbus_socket(timeout: float = 3.0) -> bool:
-    """Poll for the user D-Bus socket to appear, up to ``timeout`` seconds.
-
-    Linger-enabled user@.service can take a second or two to spawn the socket
-    after ``loginctl enable-linger`` runs.  Returns True once the socket exists.
-    """
-    import time
-
-    deadline = time.monotonic() + timeout
-    while time.monotonic() < deadline:
-        if _user_dbus_socket_path().exists():
-            _ensure_user_systemd_env()
-            return True
-        time.sleep(0.2)
-    return _user_dbus_socket_path().exists()
-
-
-def _preflight_user_systemd(*, auto_enable_linger: bool = True) -> None:
-    """Ensure ``systemctl --user`` will reach the user D-Bus session bus.
-
-    No-op when the bus socket is already there (the common case on desktops
-    and linger-enabled servers).  On fresh SSH sessions where the socket is
-    missing:
-
-    * If linger is already enabled, wait briefly for user@.service to spawn
-      the socket.
-    * If linger is disabled and ``auto_enable_linger`` is True, try
-      ``loginctl enable-linger $USER`` (works as non-root when polkit permits
-      it, otherwise needs sudo).
-    * If the socket is still missing afterwards, raise
-      :class:`UserSystemdUnavailableError` with a precise remediation message.
-
-    Callers should treat the exception as a terminal condition for user-scope
-    systemd operations and surface the message to the user.
-    """
-    _ensure_user_systemd_env()
-    bus_path = _user_dbus_socket_path()
-    if bus_path.exists():
-        return
-
-    import getpass
-
-    username = getpass.getuser()
-    linger_enabled, linger_detail = get_systemd_linger_status()
-
-    if linger_enabled is True:
-        if _wait_for_user_dbus_socket(timeout=3.0):
-            return
-        # Linger is on but socket still missing — unusual; fall through to error.
-        _raise_user_systemd_unavailable(
-            username,
-            reason="User D-Bus socket is missing even though linger is enabled.",
-            fix_hint=(
-                f"  systemctl start user@{os.getuid()}.service\n"
-                "  (may require sudo; try again after the command succeeds)"
-            ),
-        )
-
-    if auto_enable_linger and shutil.which("loginctl"):
-        try:
-            result = subprocess.run(
-                ["loginctl", "enable-linger", username],
-                capture_output=True,
-                text=True,
-                check=False,
-                timeout=30,
-            )
-        except Exception as exc:
-            _raise_user_systemd_unavailable(
-                username,
-                reason=f"loginctl enable-linger failed ({exc}).",
-                fix_hint=f"  sudo loginctl enable-linger {username}",
-            )
-        else:
-            if result.returncode == 0:
-                if _wait_for_user_dbus_socket(timeout=5.0):
-                    print(f"✓ Enabled linger for {username} — user D-Bus now available")
-                    return
-                # enable-linger succeeded but the socket never appeared.
-                _raise_user_systemd_unavailable(
-                    username,
-                    reason="Linger was enabled, but the user D-Bus socket did not appear.",
-                    fix_hint=(
-                        "  Log out and log back in, then re-run the command.\n"
-                        f"  Or reboot and run: systemctl --user start {get_service_name()}"
-                    ),
-                )
-            detail = (result.stderr or result.stdout or f"exit {result.returncode}").strip()
-            _raise_user_systemd_unavailable(
-                username,
-                reason=f"loginctl enable-linger was denied: {detail}",
-                fix_hint=f"  sudo loginctl enable-linger {username}",
-            )
-
-    _raise_user_systemd_unavailable(
-        username,
-        reason=(
-            "User D-Bus session is not available "
-            f"({linger_detail or 'linger disabled'})."
-        ),
-        fix_hint=f"  sudo loginctl enable-linger {username}",
-    )
-
-
-def _raise_user_systemd_unavailable(username: str, *, reason: str, fix_hint: str) -> None:
-    """Build a user-facing error message and raise UserSystemdUnavailableError."""
-    msg = (
-        f"{reason}\n"
-        "  systemctl --user cannot reach the user D-Bus session in this shell.\n"
-        "\n"
-        "  To fix:\n"
-        f"{fix_hint}\n"
-        "\n"
-        "  Alternative: run the gateway in the foreground (stays up until\n"
-        "  you exit / close the terminal):\n"
-        "    hermes gateway run"
-    )
-    raise UserSystemdUnavailableError(msg)
-
-
 def _systemctl_cmd(system: bool = False) -> list[str]:
    if not system:
        _ensure_user_systemd_env()
@@ -1271,6 +994,8 @@ def get_systemd_linger_status() -> tuple[bool | None, str]:
    if not is_linux():
        return None, "not supported on this platform"

+    import shutil
+
    if not shutil.which("loginctl"):
        return None, "loginctl not found"

@@ -1622,6 +1347,7 @@ def _ensure_linger_enabled() -> None:
        return

    import getpass
+    import shutil

    username = getpass.getuser()
    linger_file = Path(f"/var/lib/systemd/linger/{username}")
@@ -1758,11 +1484,6 @@ def systemd_start(system: bool = False):
    system = _select_systemd_scope(system)
    if system:
        _require_root_for_system_service("start")
-    else:
-        # Fail fast with actionable guidance if the user D-Bus session is not
-        # reachable (common on fresh RHEL/Debian SSH sessions without linger).
-        # Raises UserSystemdUnavailableError with a remediation message.
-        _preflight_user_systemd()
    refresh_systemd_unit_if_needed(system=system)
    _run_systemctl(["start", get_service_name()], system=system, check=True, timeout=30)
    print(f"✓ {_service_scope_label(system).capitalize()} service started")
@@ -1782,16 +1503,19 @@ def systemd_restart(system: bool = False):
    system = _select_systemd_scope(system)
    if system:
        _require_root_for_system_service("restart")
-    else:
-        _preflight_user_systemd()
    refresh_systemd_unit_if_needed(system=system)
    from gateway.status import get_running_pid

    pid = get_running_pid()
    if pid is not None and _request_gateway_self_restart(pid):
+        # SIGUSR1 sent — the gateway will drain active agents, exit with
+        # code 75, and systemd will restart it after RestartSec (30s).
+        # Wait for the old process to die and the new one to become active
+        # so the CLI doesn't return while the service is still restarting.
        import time
        scope_label = _service_scope_label(system).capitalize()
        svc = get_service_name()
+        scope_cmd = _systemctl_cmd(system)

        # Phase 1: wait for old process to exit (drain + shutdown)
        print(f"⏳ {scope_label} service draining active work...")
@@ -1805,41 +1529,48 @@ def systemd_restart(system: bool = False):
        else:
            print(f"⚠ Old process (PID {pid}) still alive after 90s")

-        # The gateway exits with code 75 for a planned service restart.
-        # systemd can sit in the RestartSec window or even wedge itself into a
-        # failed/rate-limited state if the operator asks for another restart in
-        # the middle of that handoff. Clear any stale failed state and kick the
-        # unit immediately so `hermes gateway restart` behaves idempotently.
-        _run_systemctl(
-            ["reset-failed", svc],
-            system=system,
-            check=False,
-            timeout=30,
-        )
-        _run_systemctl(
-            ["start", svc],
-            system=system,
-            check=False,
-            timeout=90,
-        )
-        _wait_for_systemd_service_restart(system=system, previous_pid=pid)
-        return
+        # Phase 2: wait for systemd to start the new process
+        print(f"⏳ Waiting for {svc} to restart...")
+        deadline = time.time() + 60
+        while time.time() < deadline:
+            try:
+                result = subprocess.run(
+                    scope_cmd + ["is-active", svc],
+                    capture_output=True, text=True, timeout=5,
+                )
+                if result.stdout.strip() == "active":
+                    # Verify it's a NEW process, not the old one somehow
+                    new_pid = get_running_pid()
+                    if new_pid and new_pid != pid:
+                        print(f"✓ {scope_label} service restarted (PID {new_pid})")
+                        return
+            except (subprocess.TimeoutExpired, FileNotFoundError):
+                pass
+            time.sleep(2)

-    if _recover_pending_systemd_restart(system=system, previous_pid=pid):
+        # Timed out — check final state
+        try:
+            result = subprocess.run(
+                scope_cmd + ["is-active", svc],
+                capture_output=True, text=True, timeout=5,
+            )
+            if result.stdout.strip() == "active":
+                print(f"✓ {scope_label} service restarted")
+                return
+        except Exception:
+            pass
+        print(
+            f"⚠ {scope_label} service did not become active within 60s.\n"
+            f"  Check status: {'sudo ' if system else ''}hermes gateway status\n"
+            f"  Check logs:   journalctl {'--user ' if not system else ''}-u {svc} --since '2 min ago'"
+        )
        return
-
-    _run_systemctl(
-        ["reset-failed", get_service_name()],
-        system=system,
-        check=False,
-        timeout=30,
-    )
    _run_systemctl(["reload-or-restart", get_service_name()], system=system, check=True, timeout=90)
    print(f"✓ {_service_scope_label(system).capitalize()} service restarted")



-def systemd_status(deep: bool = False, system: bool = False, full: bool = False):
+def systemd_status(deep: bool = False, system: bool = False):
    system = _select_systemd_scope(system)
    unit_path = get_systemd_unit_path(system=system)
    scope_flag = " --system" if system else ""
@@ -1862,12 +1593,8 @@ def systemd_status(deep: bool = False, system: bool = False, full: bool = False)
        print(f"  Run: {'sudo ' if system else ''}hermes gateway restart{scope_flag}  # auto-refreshes the unit")
        print()

-    status_cmd = ["status", get_service_name(), "--no-pager"]
-    if full:
-        status_cmd.append("-l")
-
    _run_systemctl(
-        status_cmd,
+        ["status", get_service_name(), "--no-pager"],
        system=system,
        capture_output=False,
        timeout=10,
@@ -1900,19 +1627,6 @@ def systemd_status(deep: bool = False, system: bool = False, full: bool = False)
        for line in runtime_lines:
            print(f"  {line}")

-    unit_props = _read_systemd_unit_properties(system=system)
-    active_state = unit_props.get("ActiveState", "")
-    sub_state = unit_props.get("SubState", "")
-    exec_main_status = unit_props.get("ExecMainStatus", "")
-    result_code = unit_props.get("Result", "")
-    if active_state == "activating" and sub_state == "auto-restart":
-        print("  ⏳ Restart pending: systemd is waiting to relaunch the gateway")
-    elif active_state == "failed" and exec_main_status == str(GATEWAY_SERVICE_RESTART_EXIT_CODE):
-        print("  ⚠ Planned restart is stuck in systemd failed state (exit 75)")
-        print(f"  Run: systemctl {'--user ' if not system else ''}reset-failed {get_service_name()} && {'sudo ' if system else ''}hermes gateway start{scope_flag}")
-    elif active_state == "failed" and result_code:
-        print(f"  ⚠ Systemd unit result: {result_code}")
-
    if system:
        print("✓ System service starts at boot without requiring systemd linger")
    elif deep:
@@ -1928,10 +1642,7 @@ def systemd_status(deep: bool = False, system: bool = False, full: bool = False)
    if deep:
        print()
        print("Recent logs:")
-        log_cmd = _journalctl_cmd(system) + ["-u", get_service_name(), "-n", "20", "--no-pager"]
-        if full:
-            log_cmd.append("-l")
-        subprocess.run(log_cmd, timeout=10)
+        subprocess.run(_journalctl_cmd(system) + ["-u", get_service_name(), "-n", "20", "--no-pager"], timeout=10)


 # =============================================================================
@@ -1945,6 +1656,7 @@ def get_launchd_label() -> str:


 def _launchd_domain() -> str:
+    import os
    return f"gui/{os.getuid()}"


@@ -2931,125 +2643,8 @@ def _setup_dingtalk():


 def _setup_wecom():
-    """Interactive setup for WeCom — scan QR code or manual credential input."""
-    print()
-    print(color("  ─── 💬 WeCom (Enterprise WeChat) Setup ───", Colors.CYAN))
-
-    existing_bot_id = get_env_value("WECOM_BOT_ID")
-    existing_secret = get_env_value("WECOM_SECRET")
-    if existing_bot_id and existing_secret:
-        print()
-        print_success("WeCom is already configured.")
-        if not prompt_yes_no("  Reconfigure WeCom?", False):
-            return
-
-    # ── Choose setup method ──
-    print()
-    method_choices = [
-        "Scan QR code to obtain Bot ID and Secret automatically (recommended)",
-        "Enter existing Bot ID and Secret manually",
-    ]
-    method_idx = prompt_choice("  How would you like to set up WeCom?", method_choices, 0)
-
-    bot_id = None
-    secret = None
-
-    if method_idx == 0:
-        # ── QR scan flow ──
-        try:
-            from gateway.platforms.wecom import qr_scan_for_bot_info
-        except Exception as exc:
-            print_error(f"  WeCom QR scan import failed: {exc}")
-            qr_scan_for_bot_info = None
-
-        if qr_scan_for_bot_info is not None:
-            try:
-                credentials = qr_scan_for_bot_info()
-            except KeyboardInterrupt:
-                print()
-                print_warning("  WeCom setup cancelled.")
-                return
-            except Exception as exc:
-                print_warning(f"  QR scan failed: {exc}")
-                credentials = None
-            if credentials:
-                bot_id = credentials.get("bot_id", "")
-                secret = credentials.get("secret", "")
-                print_success("  ✔ QR scan successful! Bot ID and Secret obtained.")
-
-        if not bot_id or not secret:
-            print_info("  QR scan did not complete. Continuing with manual input.")
-            bot_id = None
-            secret = None
-
-    # ── Manual credential input ──
-    if not bot_id or not secret:
-        print()
-        print_info("  1. Go to WeCom Application → Workspace → Smart Robot -> Create smart robots")
-        print_info("  2. Select API Mode")
-        print_info("  3. Copy the Bot ID and Secret from the bot's credentials info")
-        print_info("  4. The bot connects via WebSocket — no public endpoint needed")
-        print()
-        bot_id = prompt("  Bot ID", password=False)
-        if not bot_id:
-            print_warning("  Skipped — WeCom won't work without a Bot ID.")
-            return
-        secret = prompt("  Secret", password=True)
-        if not secret:
-            print_warning("  Skipped — WeCom won't work without a Secret.")
-            return
-
-    # ── Save core credentials ──
-    save_env_value("WECOM_BOT_ID", bot_id)
-    save_env_value("WECOM_SECRET", secret)
-
-    # ── Allowed users (deny-by-default security) ──
-    print()
-    print_info("  The gateway DENIES all users by default for security.")
-    print_info("  Enter user IDs to create an allowlist, or leave empty.")
-    allowed = prompt("  Allowed user IDs (comma-separated, or empty)", password=False)
-    if allowed:
-        cleaned = allowed.replace(" ", "")
-        save_env_value("WECOM_ALLOWED_USERS", cleaned)
-        print_success("  Saved — only these users can interact with the bot.")
-    else:
-        print()
-        access_choices = [
-            "Enable open access (anyone can message the bot)",
-            "Use DM pairing (unknown users request access, you approve with 'hermes pairing approve')",
-            "Disable direct messages",
-            "Skip for now (bot will deny all users until configured)",
-        ]
-        access_idx = prompt_choice("  How should unauthorized users be handled?", access_choices, 1)
-        if access_idx == 0:
-            save_env_value("WECOM_DM_POLICY", "open")
-            save_env_value("GATEWAY_ALLOW_ALL_USERS", "true")
-            print_warning("  Open access enabled — anyone can use your bot!")
-        elif access_idx == 1:
-            save_env_value("WECOM_DM_POLICY", "pairing")
-            print_success("  DM pairing mode — users will receive a code to request access.")
-            print_info("  Approve with: hermes pairing approve <platform> <code>")
-        elif access_idx == 2:
-            save_env_value("WECOM_DM_POLICY", "disabled")
-            print_warning("  Direct messages disabled.")
-        else:
-            print_info("  Skipped — configure later with 'hermes gateway setup'")
-
-    # ── Home channel (optional) ──
-    print()
-    print_info("  Chat ID for scheduled results and notifications.")
-    home = prompt("  Home chat ID (optional, for cron/notifications)", password=False)
-    if home:
-        save_env_value("WECOM_HOME_CHANNEL", home)
-        print_success(f"  Home channel set to {home}")
-
-    print()
-    print_success("💬 WeCom configured!")
-
-
-def _setup_wecom_callback():
-    """Configure WeCom Callback (self-built app) via the standard platform setup."""
-    wecom_platform = next(p for p in _PLATFORMS if p["key"] == "wecom_callback")
+    """Configure WeCom (Enterprise WeChat) via the standard platform setup."""
+    wecom_platform = next(p for p in _PLATFORMS if p["key"] == "wecom")
    _setup_standard_platform(wecom_platform)


@@ -3430,8 +3025,7 @@ def _setup_qqbot():
    if method_idx == 0:
        # ── QR scan-to-configure ──
        try:
-            from gateway.platforms.qqbot import qr_register
-            credentials = qr_register()
+            credentials = _qqbot_qr_flow()
        except KeyboardInterrupt:
            print()
            print_warning("  QQ Bot setup cancelled.")
@@ -3513,6 +3107,106 @@ def _setup_qqbot():
    print_info(f"  App ID: {credentials['app_id']}")


+def _qqbot_render_qr(url: str) -> bool:
+    """Try to render a QR code in the terminal. Returns True if successful."""
+    try:
+        import qrcode as _qr
+        qr = _qr.QRCode(border=1,error_correction=_qr.constants.ERROR_CORRECT_L)
+        qr.add_data(url)
+        qr.make(fit=True)
+        qr.print_ascii(invert=True)
+        return True
+    except Exception:
+        return False
+
+
+def _qqbot_qr_flow():
+    """Run the QR-code scan-to-configure flow.
+
+    Returns a dict with app_id, client_secret, user_openid on success,
+    or None on failure/cancel.
+    """
+    try:
+        from gateway.platforms.qqbot import (
+            create_bind_task, poll_bind_result, build_connect_url,
+            decrypt_secret, BindStatus,
+        )
+        from gateway.platforms.qqbot.constants import ONBOARD_POLL_INTERVAL
+    except Exception as exc:
+        print_error(f"  QQBot onboard import failed: {exc}")
+        return None
+
+    import asyncio
+    import time
+
+    MAX_REFRESHES = 3
+    refresh_count = 0
+
+    while refresh_count <= MAX_REFRESHES:
+        loop = asyncio.new_event_loop()
+
+        # ── Create bind task ──
+        try:
+            task_id, aes_key = loop.run_until_complete(create_bind_task())
+        except Exception as e:
+            print_warning(f"  Failed to create bind task: {e}")
+            loop.close()
+            return None
+
+        url = build_connect_url(task_id)
+
+        # ── Display QR code + URL ──
+        print()
+        if _qqbot_render_qr(url):
+            print(f"  Scan the QR code above, or open this URL directly:\n  {url}")
+        else:
+            print(f"  Open this URL in QQ on your phone:\n  {url}")
+            print_info("  Tip: pip install qrcode  to show a scannable QR code here")
+
+        # ── Poll loop (silent — keep QR visible at bottom) ──
+        try:
+            while True:
+                try:
+                    status, app_id, encrypted_secret, user_openid = loop.run_until_complete(
+                        poll_bind_result(task_id)
+                    )
+                except Exception:
+                    time.sleep(ONBOARD_POLL_INTERVAL)
+                    continue
+
+                if status == BindStatus.COMPLETED:
+                    client_secret = decrypt_secret(encrypted_secret, aes_key)
+                    print()
+                    print_success(f"  QR scan complete! (App ID: {app_id})")
+                    if user_openid:
+                        print_info(f"  Scanner's OpenID: {user_openid}")
+                    return {
+                        "app_id": app_id,
+                        "client_secret": client_secret,
+                        "user_openid": user_openid,
+                    }
+
+                if status == BindStatus.EXPIRED:
+                    refresh_count += 1
+                    if refresh_count > MAX_REFRESHES:
+                        print()
+                        print_warning(f"  QR code expired {MAX_REFRESHES} times — giving up.")
+                        return None
+                    print()
+                    print_warning(f"  QR code expired, refreshing... ({refresh_count}/{MAX_REFRESHES})")
+                    loop.close()
+                    break  # outer while creates a new task
+
+                time.sleep(ONBOARD_POLL_INTERVAL)
+        except KeyboardInterrupt:
+            loop.close()
+            raise
+        finally:
+            loop.close()
+
+    return None
+
+
 def _setup_signal():
    """Interactive setup for Signal messenger."""
    import shutil
@@ -3664,10 +3358,6 @@ def gateway_setup():
                    systemd_start()
                elif is_macos():
                    launchd_start()
-            except UserSystemdUnavailableError as e:
-                print_error("  Failed to start — user systemd not reachable:")
-                for line in str(e).splitlines():
-                    print(f"  {line}")
            except subprocess.CalledProcessError as e:
                print_error(f"  Failed to start: {e}")
    else:
@@ -3704,8 +3394,6 @@ def gateway_setup():
            _setup_feishu()
        elif platform["key"] == "qqbot":
            _setup_qqbot()
-        elif platform["key"] == "wecom":
-            _setup_wecom()
        else:
            _setup_standard_platform(platform)

@@ -3732,10 +3420,6 @@ def gateway_setup():
                    else:
                        stop_profile_gateway()
                        print_info("Start manually: hermes gateway")
-                except UserSystemdUnavailableError as e:
-                    print_error("  Restart failed — user systemd not reachable:")
-                    for line in str(e).splitlines():
-                        print(f"  {line}")
                except subprocess.CalledProcessError as e:
                    print_error(f"  Restart failed: {e}")
        elif service_installed:
@@ -3745,10 +3429,6 @@ def gateway_setup():
                        systemd_start()
                    elif is_macos():
                        launchd_start()
-                except UserSystemdUnavailableError as e:
-                    print_error("  Start failed — user systemd not reachable:")
-                    for line in str(e).splitlines():
-                        print(f"  {line}")
                except subprocess.CalledProcessError as e:
                    print_error(f"  Start failed: {e}")
        else:
@@ -3772,10 +3452,6 @@ def gateway_setup():
                                    systemd_start(system=installed_scope == "system")
                                else:
                                    launchd_start()
-                            except UserSystemdUnavailableError as e:
-                                print_error("  Start failed — user systemd not reachable:")
-                                for line in str(e).splitlines():
-                                    print(f"  {line}")
                            except subprocess.CalledProcessError as e:
                                print_error(f"  Start failed: {e}")
                    except subprocess.CalledProcessError as e:
@@ -3813,18 +3489,6 @@ def gateway_setup():

 def gateway_command(args):
    """Handle gateway subcommands."""
-    try:
-        return _gateway_command_inner(args)
-    except UserSystemdUnavailableError as e:
-        # Clean, actionable message instead of a traceback when the user D-Bus
-        # session is unreachable (fresh SSH shell, no linger, container, etc.).
-        print_error("User systemd not reachable:")
-        for line in str(e).splitlines():
-            print(f"  {line}")
-        sys.exit(1)
-
-
-def _gateway_command_inner(args):
    subcmd = getattr(args, 'gateway_command', None)
    
    # Default to run if no subcommand
@@ -4088,13 +3752,12 @@ def _gateway_command_inner(args):
    
    elif subcmd == "status":
        deep = getattr(args, 'deep', False)
-        full = getattr(args, 'full', False)
        system = getattr(args, 'system', False)
        snapshot = get_gateway_runtime_snapshot(system=system)
        
        # Check for service first
        if supports_systemd_services() and (get_systemd_unit_path(system=False).exists() or get_systemd_unit_path(system=True).exists()):
-            systemd_status(deep, system=system, full=full)
+            systemd_status(deep, system=system)
            _print_gateway_process_mismatch(snapshot)
        elif is_macos() and get_launchd_plist_path().exists():
            launchd_status(deep)
@@ -1,385 +0,0 @@
-"""hermes hooks — inspect and manage shell-script hooks.
-
-Usage::
-
-    hermes hooks list
-    hermes hooks test <event> [--for-tool X] [--payload-file F]
-    hermes hooks revoke <command>
-    hermes hooks doctor
-
-Consent records live under ``~/.hermes/shell-hooks-allowlist.json`` and
-hook definitions come from the ``hooks:`` block in ``~/.hermes/config.yaml``
-(the same config read by the CLI / gateway at startup).
-
-This module is a thin CLI shell over :mod:`agent.shell_hooks`; every
-shared concern (payload serialisation, response parsing, allowlist
-format) lives there.
-"""
-
-from __future__ import annotations
-
-import json
-import os
-from pathlib import Path
-from typing import Any, Dict, List, Optional
-
-
-def hooks_command(args) -> None:
-    """Entry point for ``hermes hooks`` — dispatches to the requested action."""
-    sub = getattr(args, "hooks_action", None)
-
-    if not sub:
-        print("Usage: hermes hooks {list|test|revoke|doctor}")
-        print("Run 'hermes hooks --help' for details.")
-        return
-
-    if sub in ("list", "ls"):
-        _cmd_list(args)
-    elif sub == "test":
-        _cmd_test(args)
-    elif sub in ("revoke", "remove", "rm"):
-        _cmd_revoke(args)
-    elif sub == "doctor":
-        _cmd_doctor(args)
-    else:
-        print(f"Unknown hooks subcommand: {sub}")
-
-
-# ---------------------------------------------------------------------------
-# list
-# ---------------------------------------------------------------------------
-
-def _cmd_list(_args) -> None:
-    from hermes_cli.config import load_config
-    from agent import shell_hooks
-
-    specs = shell_hooks.iter_configured_hooks(load_config())
-
-    if not specs:
-        print("No shell hooks configured in ~/.hermes/config.yaml.")
-        print("See `hermes hooks --help` or")
-        print("    website/docs/user-guide/features/hooks.md")
-        print("for the config schema and worked examples.")
-        return
-
-    by_event: Dict[str, List] = {}
-    for spec in specs:
-        by_event.setdefault(spec.event, []).append(spec)
-
-    allowlist = shell_hooks.load_allowlist()
-    approved = {
-        (e.get("event"), e.get("command"))
-        for e in allowlist.get("approvals", [])
-        if isinstance(e, dict)
-    }
-
-    print(f"Configured shell hooks ({len(specs)} total):\n")
-
-    for event in sorted(by_event.keys()):
-        print(f"  [{event}]")
-        for spec in by_event[event]:
-            is_approved = (spec.event, spec.command) in approved
-            status = "✓ allowed" if is_approved else "✗ not allowlisted"
-            matcher_part = f" matcher={spec.matcher!r}" if spec.matcher else ""
-            print(
-                f"    - {spec.command}{matcher_part} "
-                f"(timeout={spec.timeout}s, {status})"
-            )
-
-            if is_approved:
-                entry = shell_hooks.allowlist_entry_for(spec.event, spec.command)
-                if entry and entry.get("approved_at"):
-                    print(f"      approved_at: {entry['approved_at']}")
-                    mtime_now = shell_hooks.script_mtime_iso(spec.command)
-                    mtime_at = entry.get("script_mtime_at_approval")
-                    if mtime_now and mtime_at and mtime_now > mtime_at:
-                        print(
-                            f"      ⚠ script modified since approval "
-                            f"(was {mtime_at}, now {mtime_now}) — "
-                            f"run `hermes hooks doctor` to re-validate"
-                        )
-        print()
-
-
-# ---------------------------------------------------------------------------
-# test
-# ---------------------------------------------------------------------------
-
-# Synthetic kwargs matching the real invoke_hook() call sites — these are
-# passed verbatim to agent.shell_hooks.run_once(), which routes them through
-# the same _serialize_payload() that production firings use.  That way the
-# stdin a script sees under `hermes hooks test` and `hermes hooks doctor`
-# is identical in shape to what it will see at runtime.
-_DEFAULT_PAYLOADS = {
-    "pre_tool_call": {
-        "tool_name": "terminal",
-        "args": {"command": "echo hello"},
-        "session_id": "test-session",
-        "task_id": "test-task",
-        "tool_call_id": "test-call",
-    },
-    "post_tool_call": {
-        "tool_name": "terminal",
-        "args": {"command": "echo hello"},
-        "session_id": "test-session",
-        "task_id": "test-task",
-        "tool_call_id": "test-call",
-        "result": '{"output": "hello"}',
-    },
-    "pre_llm_call": {
-        "session_id": "test-session",
-        "user_message": "What is the weather?",
-        "conversation_history": [],
-        "is_first_turn": True,
-        "model": "gpt-4",
-        "platform": "cli",
-    },
-    "post_llm_call": {
-        "session_id": "test-session",
-        "model": "gpt-4",
-        "platform": "cli",
-    },
-    "on_session_start": {"session_id": "test-session"},
-    "on_session_end": {"session_id": "test-session"},
-    "on_session_finalize": {"session_id": "test-session"},
-    "on_session_reset": {"session_id": "test-session"},
-    "pre_api_request": {
-        "session_id": "test-session",
-        "task_id": "test-task",
-        "platform": "cli",
-        "model": "claude-sonnet-4-6",
-        "provider": "anthropic",
-        "base_url": "https://api.anthropic.com",
-        "api_mode": "anthropic_messages",
-        "api_call_count": 1,
-        "message_count": 4,
-        "tool_count": 12,
-        "approx_input_tokens": 2048,
-        "request_char_count": 8192,
-        "max_tokens": 4096,
-    },
-    "post_api_request": {
-        "session_id": "test-session",
-        "task_id": "test-task",
-        "platform": "cli",
-        "model": "claude-sonnet-4-6",
-        "provider": "anthropic",
-        "base_url": "https://api.anthropic.com",
-        "api_mode": "anthropic_messages",
-        "api_call_count": 1,
-        "api_duration": 1.234,
-        "finish_reason": "stop",
-        "message_count": 4,
-        "response_model": "claude-sonnet-4-6",
-        "usage": {"input_tokens": 2048, "output_tokens": 512},
-        "assistant_content_chars": 1200,
-        "assistant_tool_call_count": 0,
-    },
-    "subagent_stop": {
-        "parent_session_id": "parent-sess",
-        "child_role": None,
-        "child_summary": "Synthetic summary for hooks test",
-        "child_status": "completed",
-        "duration_ms": 1234,
-    },
-}
-
-
-def _cmd_test(args) -> None:
-    from hermes_cli.config import load_config
-    from hermes_cli.plugins import VALID_HOOKS
-    from agent import shell_hooks
-
-    event = args.event
-    if event not in VALID_HOOKS:
-        print(f"Unknown event: {event!r}")
-        print(f"Valid events: {', '.join(sorted(VALID_HOOKS))}")
-        return
-
-    # Synthetic kwargs in the same shape invoke_hook() would pass.  Merged
-    # with --for-tool (overrides tool_name) and --payload-file (extra kwargs).
-    payload = dict(_DEFAULT_PAYLOADS.get(event, {"session_id": "test-session"}))
-
-    if getattr(args, "for_tool", None):
-        payload["tool_name"] = args.for_tool
-
-    if getattr(args, "payload_file", None):
-        try:
-            custom = json.loads(Path(args.payload_file).read_text())
-            if isinstance(custom, dict):
-                payload.update(custom)
-            else:
-                print(f"Warning: {args.payload_file} is not a JSON object; ignoring")
-        except Exception as exc:
-            print(f"Error reading payload file: {exc}")
-            return
-
-    specs = shell_hooks.iter_configured_hooks(load_config())
-    specs = [s for s in specs if s.event == event]
-
-    if getattr(args, "for_tool", None):
-        specs = [
-            s for s in specs
-            if s.event not in ("pre_tool_call", "post_tool_call")
-            or s.matches_tool(args.for_tool)
-        ]
-
-    if not specs:
-        print(f"No shell hooks configured for event: {event}")
-        if getattr(args, "for_tool", None):
-            print(f"(with matcher filter --for-tool={args.for_tool})")
-        return
-
-    print(f"Firing {len(specs)} hook(s) for event '{event}':\n")
-    for spec in specs:
-        print(f"  → {spec.command}")
-        result = shell_hooks.run_once(spec, payload)
-        _print_run_result(result)
-        print()
-
-
-def _print_run_result(result: Dict[str, Any]) -> None:
-    if result.get("error"):
-        print(f"      ✗ error: {result['error']}")
-        return
-    if result.get("timed_out"):
-        print(f"      ✗ timed out after {result['elapsed_seconds']}s")
-        return
-
-    rc = result.get("returncode")
-    elapsed = result.get("elapsed_seconds", 0)
-    print(f"      exit={rc}  elapsed={elapsed}s")
-
-    stdout = (result.get("stdout") or "").strip()
-    stderr = (result.get("stderr") or "").strip()
-    if stdout:
-        print(f"      stdout: {_truncate(stdout, 400)}")
-    if stderr:
-        print(f"      stderr: {_truncate(stderr, 400)}")
-
-    parsed = result.get("parsed")
-    if parsed:
-        print(f"      parsed (Hermes wire shape): {json.dumps(parsed)}")
-    else:
-        print("      parsed: <none — hook contributed nothing to the dispatcher>")
-
-
-def _truncate(s: str, n: int) -> str:
-    return s if len(s) <= n else s[: n - 3] + "..."
-
-
-# ---------------------------------------------------------------------------
-# revoke
-# ---------------------------------------------------------------------------
-
-def _cmd_revoke(args) -> None:
-    from agent import shell_hooks
-
-    removed = shell_hooks.revoke(args.command)
-    if removed == 0:
-        print(f"No allowlist entry found for command: {args.command}")
-        return
-    print(f"Removed {removed} allowlist entry/entries for: {args.command}")
-    print(
-        "Note: currently running CLI / gateway processes keep their "
-        "already-registered callbacks until they restart."
-    )
-
-
-# ---------------------------------------------------------------------------
-# doctor
-# ---------------------------------------------------------------------------
-
-def _cmd_doctor(_args) -> None:
-    from hermes_cli.config import load_config
-    from agent import shell_hooks
-
-    specs = shell_hooks.iter_configured_hooks(load_config())
-
-    if not specs:
-        print("No shell hooks configured — nothing to check.")
-        return
-
-    print(f"Checking {len(specs)} configured shell hook(s)...\n")
-
-    problems = 0
-    for spec in specs:
-        print(f"  [{spec.event}] {spec.command}")
-        problems += _doctor_one(spec, shell_hooks)
-        print()
-
-    if problems:
-        print(f"{problems} issue(s) found.  Fix before relying on these hooks.")
-    else:
-        print("All shell hooks look healthy.")
-
-
-def _doctor_one(spec, shell_hooks) -> int:
-    problems = 0
-
-    # 1. Script exists and is executable
-    if shell_hooks.script_is_executable(spec.command):
-        print("      ✓ script exists and is executable")
-    else:
-        problems += 1
-        print("      ✗ script missing or not executable "
-              "(chmod +x the file, or fix the path)")
-
-    # 2. Allowlist status
-    entry = shell_hooks.allowlist_entry_for(spec.event, spec.command)
-    if entry:
-        print(f"      ✓ allowlisted (approved {entry.get('approved_at', '?')})")
-    else:
-        problems += 1
-        print("      ✗ not allowlisted — hook will NOT fire at runtime "
-              "(run with --accept-hooks once, or confirm at the TTY prompt)")
-
-    # 3. Mtime drift
-    if entry and entry.get("script_mtime_at_approval"):
-        mtime_now = shell_hooks.script_mtime_iso(spec.command)
-        mtime_at = entry["script_mtime_at_approval"]
-        if mtime_now and mtime_at and mtime_now > mtime_at:
-            problems += 1
-            print(f"      ⚠ script modified since approval "
-                  f"(was {mtime_at}, now {mtime_now}) — review changes, "
-                  f"then `hermes hooks revoke` + re-approve to refresh")
-        elif mtime_now and mtime_at and mtime_now == mtime_at:
-            print("      ✓ script unchanged since approval")
-
-    # 4. Produces valid JSON for a synthetic payload — only when the entry
-    # is already allowlisted.  Otherwise `hermes hooks doctor` would execute
-    # every script listed in a freshly-pulled config before the user has
-    # reviewed them, which directly contradicts the documented workflow
-    # ("spot newly-added hooks *before they register*").
-    if not entry:
-        print("      ℹ skipped JSON smoke test — not allowlisted yet. "
-              "Approve the hook first (via TTY prompt or --accept-hooks), "
-              "then re-run `hermes hooks doctor`.")
-    elif shell_hooks.script_is_executable(spec.command):
-        payload = _DEFAULT_PAYLOADS.get(spec.event, {"extra": {}})
-        result = shell_hooks.run_once(spec, payload)
-        if result.get("timed_out"):
-            problems += 1
-            print(f"      ✗ timed out after {result['elapsed_seconds']}s "
-                  f"on synthetic payload (timeout={spec.timeout}s)")
-        elif result.get("error"):
-            problems += 1
-            print(f"      ✗ execution error: {result['error']}")
-        else:
-            rc = result.get("returncode")
-            elapsed = result.get("elapsed_seconds", 0)
-            stdout = (result.get("stdout") or "").strip()
-            if stdout:
-                try:
-                    json.loads(stdout)
-                    print(f"      ✓ produced valid JSON on synthetic payload "
-                          f"(exit={rc}, {elapsed}s)")
-                except json.JSONDecodeError:
-                    problems += 1
-                    print(f"      ✗ stdout was not valid JSON (exit={rc}, "
-                          f"{elapsed}s): {_truncate(stdout, 120)}")
-            else:
-                print(f"      ✓ ran clean with empty stdout "
-                      f"(exit={rc}, {elapsed}s) — hook is observer-only")
-
-    return problems
@@ -51,19 +51,6 @@ import sys
 from pathlib import Path
 from typing import Optional

-def _add_accept_hooks_flag(parser) -> None:
-    """Attach the ``--accept-hooks`` flag.  Shared across every agent
-    subparser so the flag works regardless of CLI position."""
-    parser.add_argument(
-        "--accept-hooks",
-        action="store_true",
-        default=argparse.SUPPRESS,
-        help=(
-            "Auto-approve unseen shell hooks without a TTY prompt "
-            "(equivalent to HERMES_ACCEPT_HOOKS=1 / hooks_auto_accept: true)."
-        ),
-    )
-

 def _require_tty(command_name: str) -> None:
    """Exit with a clear error if stdin is not a terminal.
@@ -193,7 +180,7 @@ import time as _time
 from datetime import datetime

 from hermes_cli import __version__, __release_date__
-from hermes_constants import AI_GATEWAY_BASE_URL, OPENROUTER_BASE_URL
+from hermes_constants import OPENROUTER_BASE_URL

 logger = logging.getLogger(__name__)

@@ -618,6 +605,7 @@ def _exec_in_container(container_info: dict, cli_args: list):
        container_info: dict with backend, container_name, exec_user, hermes_bin
        cli_args: the original CLI arguments (everything after 'hermes')
    """
+    import shutil

    backend = container_info["backend"]
    container_name = container_info["container_name"]
@@ -1015,17 +1003,6 @@ def _launch_tui(resume_session_id: Optional[str] = None, tui_dev: bool = False):
    )
    env.setdefault("HERMES_PYTHON", sys.executable)
    env.setdefault("HERMES_CWD", os.getcwd())
-    # Guarantee an 8GB V8 heap + exposed GC for the TUI. Default node cap is
-    # ~1.5–4GB depending on version and can fatal-OOM on long sessions with
-    # large transcripts / reasoning blobs. Token-level merge: respect any
-    # user-supplied --max-old-space-size (they may have set it higher) and
-    # avoid duplicating --expose-gc.
-    _tokens = env.get("NODE_OPTIONS", "").split()
-    if not any(t.startswith("--max-old-space-size=") for t in _tokens):
-        _tokens.append("--max-old-space-size=8192")
-    if "--expose-gc" not in _tokens:
-        _tokens.append("--expose-gc")
-    env["NODE_OPTIONS"] = " ".join(_tokens)
    if resume_session_id:
        env["HERMES_TUI_RESUME"] = resume_session_id

@@ -1131,20 +1108,6 @@ def cmd_chat(args):
    if getattr(args, "yolo", False):
        os.environ["HERMES_YOLO_MODE"] = "1"

-    # --ignore-user-config: make load_cli_config() / load_config() skip the
-    # user's ~/.hermes/config.yaml and return built-in defaults. Set BEFORE
-    # importing cli (which runs `CLI_CONFIG = load_cli_config()` at module
-    # import time). Credentials in .env are still loaded — this flag only
-    # ignores behavioral/config settings.
-    if getattr(args, "ignore_user_config", False):
-        os.environ["HERMES_IGNORE_USER_CONFIG"] = "1"
-
-    # --ignore-rules: skip auto-injection of AGENTS.md/SOUL.md/.cursorrules
-    # (rules), memory entries, and any preloaded skills coming from user config.
-    # Maps to AIAgent(skip_context_files=True, skip_memory=True).
-    if getattr(args, "ignore_rules", False):
-        os.environ["HERMES_IGNORE_RULES"] = "1"
-
    # --source: tag session source for filtering (e.g. 'tool' for third-party integrations)
    if getattr(args, "source", None):
        os.environ["HERMES_SESSION_SOURCE"] = args.source
@@ -1173,8 +1136,6 @@ def cmd_chat(args):
        "checkpoints": getattr(args, "checkpoints", False),
        "pass_session_id": getattr(args, "pass_session_id", False),
        "max_turns": getattr(args, "max_turns", None),
-        "ignore_rules": getattr(args, "ignore_rules", False),
-        "ignore_user_config": getattr(args, "ignore_user_config", False),
    }
    # Filter out None values
    kwargs = {k: v for k, v in kwargs.items() if v is not None}
@@ -1196,6 +1157,8 @@ def cmd_gateway(args):
 def cmd_whatsapp(args):
    """Set up WhatsApp: choose mode, configure, install bridge, pair via QR."""
    _require_tty("whatsapp")
+    import subprocess
+    from pathlib import Path
    from hermes_cli.config import get_env_value, save_env_value

    print()
@@ -1304,27 +1267,16 @@ def cmd_whatsapp(args):
        return

    if not (bridge_dir / "node_modules").exists():
-        print("\n→ Installing WhatsApp bridge dependencies (this can take a few minutes)...")
-        npm = shutil.which("npm")
-        if not npm:
-            print("  ✗ npm not found on PATH — install Node.js first")
-            return
-        try:
-            result = subprocess.run(
-                [npm, "install", "--no-fund", "--no-audit", "--progress=false"],
-                cwd=str(bridge_dir),
-                stdout=subprocess.DEVNULL,
-                stderr=subprocess.PIPE,
-                text=True,
-            )
-        except KeyboardInterrupt:
-            print("\n  ✗ Install cancelled")
-            return
+        print("\n→ Installing WhatsApp bridge dependencies...")
+        result = subprocess.run(
+            ["npm", "install"],
+            cwd=str(bridge_dir),
+            capture_output=True,
+            text=True,
+            timeout=120,
+        )
        if result.returncode != 0:
-            err = (result.stderr or "").strip()
-            preview = "\n".join(err.splitlines()[-30:]) if err else "(no output)"
-            print("  ✗ npm install failed:")
-            print(preview)
+            print(f"  ✗ npm install failed: {result.stderr}")
            return
        print("  ✓ Dependencies installed")
    else:
@@ -1343,6 +1295,8 @@ def cmd_whatsapp(args):
        except (EOFError, KeyboardInterrupt):
            response = "n"
        if response.lower() in ("y", "yes"):
+            import shutil
+
            shutil.rmtree(session_dir, ignore_errors=True)
            session_dir.mkdir(parents=True, exist_ok=True)
            print("  ✓ Session cleared")
@@ -1438,6 +1392,8 @@ def select_provider_and_model(args=None):

    # Read effective provider the same way the CLI does at startup:
    # config.yaml model.provider > env var > auto-detect
+    import os
+
    config_provider = None
    model_cfg = config.get("model")
    if isinstance(model_cfg, dict):
@@ -1548,8 +1504,6 @@ def select_provider_and_model(args=None):
    # Step 2: Provider-specific setup + model selection
    if selected_provider == "openrouter":
        _model_flow_openrouter(config, current_model)
-    elif selected_provider == "ai-gateway":
-        _model_flow_ai_gateway(config, current_model)
    elif selected_provider == "nous":
        _model_flow_nous(config, current_model, args=args)
    elif selected_provider == "openai-codex":
@@ -1582,8 +1536,6 @@ def select_provider_and_model(args=None):
        _model_flow_anthropic(config, current_model)
    elif selected_provider == "kimi-coding":
        _model_flow_kimi(config, current_model)
-    elif selected_provider == "stepfun":
-        _model_flow_stepfun(config, current_model)
    elif selected_provider == "bedrock":
        _model_flow_bedrock(config, current_model)
    elif selected_provider in (
@@ -1597,6 +1549,7 @@ def select_provider_and_model(args=None):
        "kilocode",
        "opencode-zen",
        "opencode-go",
+        "ai-gateway",
        "alibaba",
        "huggingface",
        "xiaomi",
@@ -2068,63 +2021,6 @@ def _model_flow_openrouter(config, current_model=""):
        print("No change.")


-def _model_flow_ai_gateway(config, current_model=""):
-    """Vercel AI Gateway provider: ensure API key, then pick model with pricing."""
-    from hermes_cli.auth import (
-        _prompt_model_selection,
-        _save_model_choice,
-        deactivate_provider,
-    )
-    from hermes_cli.config import get_env_value, save_env_value
-
-    api_key = get_env_value("AI_GATEWAY_API_KEY")
-    if not api_key:
-        print("No Vercel AI Gateway API key configured.")
-        print("Create API key here: https://vercel.com/d?to=%2F%5Bteam%5D%2F%7E%2Fai-gateway&title=AI+Gateway")
-        print("Add a payment method to get $5 in free credits.")
-        print()
-        try:
-            import getpass
-
-            key = getpass.getpass("AI Gateway API key (or Enter to cancel): ").strip()
-        except (KeyboardInterrupt, EOFError):
-            print()
-            return
-        if not key:
-            print("Cancelled.")
-            return
-        save_env_value("AI_GATEWAY_API_KEY", key)
-        print("API key saved.")
-        print()
-
-    from hermes_cli.models import ai_gateway_model_ids, get_pricing_for_provider
-
-    models_list = ai_gateway_model_ids(force_refresh=True)
-    pricing = get_pricing_for_provider("ai-gateway", force_refresh=True)
-
-    selected = _prompt_model_selection(
-        models_list, current_model=current_model, pricing=pricing
-    )
-    if selected:
-        _save_model_choice(selected)
-
-        from hermes_cli.config import load_config, save_config
-
-        cfg = load_config()
-        model = cfg.get("model")
-        if not isinstance(model, dict):
-            model = {"default": model} if model else {}
-            cfg["model"] = model
-        model["provider"] = "ai-gateway"
-        model["base_url"] = AI_GATEWAY_BASE_URL
-        model["api_mode"] = "chat_completions"
-        save_config(cfg)
-        deactivate_provider()
-        print(f"Default model set to: {selected} (via Vercel AI Gateway)")
-    else:
-        print("No change.")
-
-
 def _model_flow_nous(config, current_model="", args=None):
    """Nous Portal provider: ensure logged in, then pick model."""
    from hermes_cli.auth import (
@@ -2145,6 +2041,7 @@ def _model_flow_nous(config, current_model="", args=None):
        save_env_value,
    )
    from hermes_cli.nous_subscription import prompt_enable_tool_gateway
+    import argparse

    state = get_provider_auth_state("nous")
    if not state or not state.get("access_token"):
@@ -2183,6 +2080,7 @@ def _model_flow_nous(config, current_model="", args=None):
    from hermes_cli.models import (
        _PROVIDER_MODELS,
        get_pricing_for_provider,
+        filter_nous_free_models,
        check_nous_free_tier,
        partition_nous_models_by_tier,
    )
@@ -2225,8 +2123,10 @@ def _model_flow_nous(config, current_model="", args=None):
    # Check if user is on free tier
    free_tier = check_nous_free_tier()

-    # For free users: partition models into selectable/unavailable based on
-    # whether they are free per the Portal-reported pricing.
+    # For both tiers: apply the allowlist filter first (removes non-allowlisted
+    # free models and allowlist models that aren't actually free).
+    # Then for free users: partition remaining models into selectable/unavailable.
+    model_ids = filter_nous_free_models(model_ids, pricing)
    unavailable_models: list[str] = []
    if free_tier:
        model_ids, unavailable_models = partition_nous_models_by_tier(
@@ -2309,6 +2209,7 @@ def _model_flow_openai_codex(config, current_model=""):
        DEFAULT_CODEX_BASE_URL,
    )
    from hermes_cli.codex_models import get_codex_model_ids
+    import argparse

    status = get_codex_auth_status()
    if not status.get("logged_in"):
@@ -3439,9 +3340,8 @@ def _model_flow_kimi(config, current_model=""):

    # Step 3: Model selection — show appropriate models for the endpoint
    if is_coding_plan:
-        # Coding Plan models (kimi-k2.6 first)
+        # Coding Plan models (kimi-k2.5 first)
        model_list = [
-            "kimi-k2.6",
            "kimi-k2.5",
            "kimi-for-coding",
            "kimi-k2-thinking",
@@ -3480,140 +3380,6 @@ def _model_flow_kimi(config, current_model=""):
        print("No change.")


-def _infer_stepfun_region(base_url: str) -> str:
-    """Infer the current StepFun region from the configured endpoint."""
-    normalized = (base_url or "").strip().lower()
-    if "api.stepfun.com" in normalized:
-        return "china"
-    return "international"
-
-
-def _stepfun_base_url_for_region(region: str) -> str:
-    from hermes_cli.auth import (
-        STEPFUN_STEP_PLAN_CN_BASE_URL,
-        STEPFUN_STEP_PLAN_INTL_BASE_URL,
-    )
-
-    return (
-        STEPFUN_STEP_PLAN_CN_BASE_URL
-        if region == "china"
-        else STEPFUN_STEP_PLAN_INTL_BASE_URL
-    )
-
-
-def _model_flow_stepfun(config, current_model=""):
-    """StepFun Step Plan flow with region-specific endpoints."""
-    from hermes_cli.auth import (
-        PROVIDER_REGISTRY,
-        _prompt_model_selection,
-        _save_model_choice,
-        deactivate_provider,
-    )
-    from hermes_cli.config import get_env_value, save_env_value, load_config, save_config
-    from hermes_cli.models import fetch_api_models
-
-    provider_id = "stepfun"
-    pconfig = PROVIDER_REGISTRY[provider_id]
-    key_env = pconfig.api_key_env_vars[0] if pconfig.api_key_env_vars else ""
-    base_url_env = pconfig.base_url_env_var or ""
-
-    existing_key = ""
-    for ev in pconfig.api_key_env_vars:
-        existing_key = get_env_value(ev) or os.getenv(ev, "")
-        if existing_key:
-            break
-
-    if not existing_key:
-        print(f"No {pconfig.name} API key configured.")
-        if key_env:
-            try:
-                import getpass
-                new_key = getpass.getpass(f"{key_env} (or Enter to cancel): ").strip()
-            except (KeyboardInterrupt, EOFError):
-                print()
-                return
-            if not new_key:
-                print("Cancelled.")
-                return
-            save_env_value(key_env, new_key)
-            existing_key = new_key
-            print("API key saved.")
-            print()
-    else:
-        print(f"  {pconfig.name} API key: {existing_key[:8]}... ✓")
-        print()
-
-    current_base = ""
-    if base_url_env:
-        current_base = get_env_value(base_url_env) or os.getenv(base_url_env, "")
-    if not current_base:
-        model_cfg = config.get("model")
-        if isinstance(model_cfg, dict):
-            current_base = str(model_cfg.get("base_url") or "").strip()
-    current_region = _infer_stepfun_region(current_base or pconfig.inference_base_url)
-
-    region_choices = [
-        ("international", f"International ({_stepfun_base_url_for_region('international')})"),
-        ("china", f"China ({_stepfun_base_url_for_region('china')})"),
-    ]
-    ordered_regions = []
-    for region_key, label in region_choices:
-        if region_key == current_region:
-            ordered_regions.insert(0, (region_key, f"{label}  ← currently active"))
-        else:
-            ordered_regions.append((region_key, label))
-    ordered_regions.append(("cancel", "Cancel"))
-
-    region_idx = _prompt_provider_choice([label for _, label in ordered_regions])
-    if region_idx is None or ordered_regions[region_idx][0] == "cancel":
-        print("No change.")
-        return
-
-    selected_region = ordered_regions[region_idx][0]
-    effective_base = _stepfun_base_url_for_region(selected_region)
-    if base_url_env:
-        save_env_value(base_url_env, effective_base)
-
-    live_models = fetch_api_models(existing_key, effective_base)
-    if live_models:
-        model_list = live_models
-        print(f"  Found {len(model_list)} model(s) from {pconfig.name} API")
-    else:
-        model_list = _PROVIDER_MODELS.get(provider_id, [])
-        if model_list:
-            print(
-                f"  Could not auto-detect models from {pconfig.name} API — "
-                "showing Step Plan fallback catalog."
-            )
-
-    if model_list:
-        selected = _prompt_model_selection(model_list, current_model=current_model)
-    else:
-        try:
-            selected = input("Model name: ").strip()
-        except (KeyboardInterrupt, EOFError):
-            selected = None
-
-    if selected:
-        _save_model_choice(selected)
-
-        cfg = load_config()
-        model = cfg.get("model")
-        if not isinstance(model, dict):
-            model = {"default": model} if model else {}
-            cfg["model"] = model
-        model["provider"] = provider_id
-        model["base_url"] = effective_base
-        model.pop("api_mode", None)
-        save_config(cfg)
-        deactivate_provider()
-
-        config["model"] = dict(model)
-        print(f"Default model set to: {selected} (via {pconfig.name})")
-    else:
-        print("No change.")
-
-
 def _model_flow_bedrock_api_key(config, region, current_model=""):
    """Bedrock API Key mode — uses the OpenAI-compatible bedrock-mantle endpoint.

@@ -4314,12 +4080,6 @@ def cmd_webhook(args):
    webhook_command(args)


-def cmd_hooks(args):
-    """Shell-hook inspection and management."""
-    from hermes_cli.hooks import hooks_command
-    hooks_command(args)
-
-
 def cmd_doctor(args):
    """Check configuration and dependencies."""
    from hermes_cli.doctor import run_doctor
@@ -4429,7 +4189,9 @@ def _clear_bytecode_cache(root: Path) -> int:
        ]
        if os.path.basename(dirpath) == "__pycache__":
            try:
-                shutil.rmtree(dirpath)
+                import shutil as _shutil
+
+                _shutil.rmtree(dirpath)
                removed += 1
            except OSError:
                pass
@@ -4468,6 +4230,8 @@ def _gateway_prompt(prompt_text: str, default: str = "", timeout: float = 300.0)
    tmp.replace(prompt_path)

    # Poll for response
+    import time as _time
+
    deadline = _time.monotonic() + timeout
    while _time.monotonic() < deadline:
        if response_path.exists():
@@ -4499,6 +4263,7 @@ def _build_web_ui(web_dir: Path, *, fatal: bool = False) -> bool:
    """
    if not (web_dir / "package.json").exists():
        return True
+    import shutil

    npm = shutil.which("npm")
    if not npm:
@@ -4535,6 +4300,7 @@ def _update_via_zip(args):
    Used on Windows when git file I/O is broken (antivirus, NTFS filter
    drivers causing 'Invalid argument' errors on file creation).
    """
+    import shutil
    import tempfile
    import zipfile
    from urllib.request import urlretrieve
@@ -4611,6 +4377,7 @@ def _update_via_zip(args):
    # breaks on this machine, keep base deps and reinstall the remaining extras
    # individually so update does not silently strip working capabilities.
    print("→ Updating Python dependencies...")
+    import subprocess

    uv_bin = shutil.which("uv")
    if uv_bin:
@@ -5361,11 +5128,9 @@ def _install_hangup_protection(gateway_mode: bool = False):
    # (2) Mirror output to update.log and wrap stdio for broken-pipe
    # tolerance.  Any failure here is non-fatal; we just skip the wrap.
    try:
-        # Late-bound import so tests can monkeypatch
-        # hermes_cli.config.get_hermes_home to simulate setup failure.
-        from hermes_cli.config import get_hermes_home as _get_hermes_home
+        from hermes_cli.config import get_hermes_home

-        logs_dir = _get_hermes_home() / "logs"
+        logs_dir = get_hermes_home() / "logs"
        logs_dir.mkdir(parents=True, exist_ok=True)
        log_path = logs_dir / "update.log"
        log_file = open(log_path, "a", buffering=1, encoding="utf-8")
@@ -5940,6 +5705,8 @@ def _cmd_update_impl(args, gateway_mode: bool):
                                    # Verify the service actually survived the
                                    # restart.  systemctl restart returns 0 even
                                    # if the new process crashes immediately.
+                                    import time as _time
+
                                    _time.sleep(3)
                                    verify = subprocess.run(
                                        scope_cmd + ["is-active", svc_name],
@@ -6592,17 +6359,6 @@ For more help on a command:
        default=False,
        help="Run in an isolated git worktree (for parallel agents)",
    )
-    parser.add_argument(
-        "--accept-hooks",
-        action="store_true",
-        default=False,
-        help=(
-            "Auto-approve any unseen shell hooks declared in config.yaml "
-            "without a TTY prompt.  Equivalent to HERMES_ACCEPT_HOOKS=1 or "
-            "hooks_auto_accept: true in config.yaml.  Use on CI / headless "
-            "runs that can't prompt."
-        ),
-    )
    parser.add_argument(
        "--skills",
        "-s",
@@ -6622,18 +6378,6 @@ For more help on a command:
        default=False,
        help="Include the session ID in the agent's system prompt",
    )
-    parser.add_argument(
-        "--ignore-user-config",
-        action="store_true",
-        default=False,
-        help="Ignore ~/.hermes/config.yaml and fall back to built-in defaults (credentials in .env are still loaded)",
-    )
-    parser.add_argument(
-        "--ignore-rules",
-        action="store_true",
-        default=False,
-        help="Skip auto-injection of AGENTS.md, SOUL.md, .cursorrules, memory, and preloaded skills",
-    )
    parser.add_argument(
        "--tui",
        action="store_true",
@@ -6694,7 +6438,6 @@ For more help on a command:
            "zai",
            "kimi-coding",
            "kimi-coding-cn",
-            "stepfun",
            "minimax",
            "minimax-cn",
            "kilocode",
@@ -6738,16 +6481,6 @@ For more help on a command:
        default=argparse.SUPPRESS,
        help="Run in an isolated git worktree (for parallel agents on the same repo)",
    )
-    chat_parser.add_argument(
-        "--accept-hooks",
-        action="store_true",
-        default=argparse.SUPPRESS,
-        help=(
-            "Auto-approve any unseen shell hooks declared in config.yaml "
-            "without a TTY prompt (see also HERMES_ACCEPT_HOOKS env var and "
-            "hooks_auto_accept: in config.yaml)."
-        ),
-    )
    chat_parser.add_argument(
        "--checkpoints",
        action="store_true",
@@ -6773,18 +6506,6 @@ For more help on a command:
        default=argparse.SUPPRESS,
        help="Include the session ID in the agent's system prompt",
    )
-    chat_parser.add_argument(
-        "--ignore-user-config",
-        action="store_true",
-        default=argparse.SUPPRESS,
-        help="Ignore ~/.hermes/config.yaml and fall back to built-in defaults (credentials in .env are still loaded). Useful for isolated CI runs, reproduction, and third-party integrations.",
-    )
-    chat_parser.add_argument(
-        "--ignore-rules",
-        action="store_true",
-        default=argparse.SUPPRESS,
-        help="Skip auto-injection of AGENTS.md, SOUL.md, .cursorrules, memory, and preloaded skills. Combine with --ignore-user-config for a fully isolated run.",
-    )
    chat_parser.add_argument(
        "--source",
        default=None,
@@ -6879,8 +6600,6 @@ For more help on a command:
        action="store_true",
        help="Replace any existing gateway instance (useful for systemd)",
    )
-    _add_accept_hooks_flag(gateway_run)
-    _add_accept_hooks_flag(gateway_parser)

    # gateway start
    gateway_start = gateway_subparsers.add_parser(
@@ -6928,12 +6647,6 @@ For more help on a command:
    # gateway status
    gateway_status = gateway_subparsers.add_parser("status", help="Show gateway status")
    gateway_status.add_argument("--deep", action="store_true", help="Deep status check")
-    gateway_status.add_argument(
-        "-l",
-        "--full",
-        action="store_true",
-        help="Show full, untruncated service/log output where supported",
-    )
    gateway_status.add_argument(
        "--system",
        action="store_true",
@@ -7251,7 +6964,6 @@ For more help on a command:
        "run", help="Run a job on the next scheduler tick"
    )
    cron_run.add_argument("job_id", help="Job ID to trigger")
-    _add_accept_hooks_flag(cron_run)

    cron_remove = cron_subparsers.add_parser(
        "remove", aliases=["rm", "delete"], help="Remove a scheduled job"
@@ -7262,9 +6974,8 @@ For more help on a command:
    cron_subparsers.add_parser("status", help="Check if cron scheduler is running")

    # cron tick (mostly for debugging)
-    cron_tick = cron_subparsers.add_parser("tick", help="Run due jobs once and exit")
-    _add_accept_hooks_flag(cron_tick)
-    _add_accept_hooks_flag(cron_parser)
+    cron_subparsers.add_parser("tick", help="Run due jobs once and exit")
+
    cron_parser.set_defaults(func=cmd_cron)

    # =========================================================================
@@ -7331,67 +7042,6 @@ For more help on a command:

    webhook_parser.set_defaults(func=cmd_webhook)

-    # =========================================================================
-    # hooks command — shell-hook inspection and management
-    # =========================================================================
-    hooks_parser = subparsers.add_parser(
-        "hooks",
-        help="Inspect and manage shell-script hooks",
-        description=(
-            "Inspect shell-script hooks declared in ~/.hermes/config.yaml, "
-            "test them against synthetic payloads, and manage the first-use "
-            "consent allowlist at ~/.hermes/shell-hooks-allowlist.json."
-        ),
-    )
-    hooks_subparsers = hooks_parser.add_subparsers(dest="hooks_action")
-
-    hooks_subparsers.add_parser(
-        "list", aliases=["ls"],
-        help="List configured hooks with matcher, timeout, and consent status",
-    )
-
-    _hk_test = hooks_subparsers.add_parser(
-        "test",
-        help="Fire every hook matching <event> against a synthetic payload",
-    )
-    _hk_test.add_argument(
-        "event",
-        help="Hook event name (e.g. pre_tool_call, pre_llm_call, subagent_stop)",
-    )
-    _hk_test.add_argument(
-        "--for-tool", dest="for_tool", default=None,
-        help=(
-            "Only fire hooks whose matcher matches this tool name "
-            "(used for pre_tool_call / post_tool_call)"
-        ),
-    )
-    _hk_test.add_argument(
-        "--payload-file", dest="payload_file", default=None,
-        help=(
-            "Path to a JSON file whose contents are merged into the "
-            "synthetic payload before execution"
-        ),
-    )
-
-    _hk_revoke = hooks_subparsers.add_parser(
-        "revoke", aliases=["remove", "rm"],
-        help="Remove a command's allowlist entries (takes effect on next restart)",
-    )
-    _hk_revoke.add_argument(
-        "command",
-        help="The exact command string to revoke (as declared in config.yaml)",
-    )
-
-    hooks_subparsers.add_parser(
-        "doctor",
-        help=(
-            "Check each configured hook: exec bit, allowlist, mtime drift, "
-            "JSON validity, and synthetic run timing"
-        ),
-    )
-
-    hooks_parser.set_defaults(func=cmd_hooks)
-
    # =========================================================================
    # doctor command
    # =========================================================================
@@ -7799,17 +7449,6 @@ Examples:
        action="store_true",
        help="Remove existing plugin and reinstall",
    )
-    _install_enable_group = plugins_install.add_mutually_exclusive_group()
-    _install_enable_group.add_argument(
-        "--enable",
-        action="store_true",
-        help="Auto-enable the plugin after install (skip confirmation prompt)",
-    )
-    _install_enable_group.add_argument(
-        "--no-enable",
-        action="store_true",
-        help="Install disabled (skip confirmation prompt); enable later with `hermes plugins enable <name>`",
-    )

    plugins_update = plugins_subparsers.add_parser(
        "update", help="Pull latest changes for an installed plugin"
@@ -7857,7 +7496,9 @@ Examples:
            )
            cmd_info["setup_fn"](plugin_parser)
    except Exception as _exc:
-        logging.getLogger(__name__).debug("Plugin CLI discovery failed: %s", _exc)
+        import logging as _log
+
+        _log.getLogger(__name__).debug("Plugin CLI discovery failed: %s", _exc)

    # =========================================================================
    # memory command
@@ -8063,7 +7704,6 @@ Examples:
        action="store_true",
        help="Enable verbose logging on stderr",
    )
-    _add_accept_hooks_flag(mcp_serve_p)

    mcp_add_p = mcp_sub.add_parser(
        "add", help="Add an MCP server (discovery-first install)"
@@ -8102,8 +7742,6 @@ Examples:
    )
    mcp_login_p.add_argument("name", help="Server name to re-authenticate")

-    _add_accept_hooks_flag(mcp_parser)
-
    def cmd_mcp(args):
        from hermes_cli.mcp_config import mcp_command

@@ -8242,6 +7880,7 @@ Examples:
                    return
                line = _json.dumps(data, ensure_ascii=False) + "\n"
                if args.output == "-":
+                    import sys

                    sys.stdout.write(line)
                else:
@@ -8251,6 +7890,7 @@ Examples:
            else:
                sessions = db.export_all(source=args.source)
                if args.output == "-":
+                    import sys

                    for s in sessions:
                        sys.stdout.write(_json.dumps(s, ensure_ascii=False) + "\n")
@@ -8321,6 +7961,8 @@ Examples:

            # Launch hermes --resume <id> by replacing the current process
            print(f"Resuming session: {selected_id}")
+            import shutil
+
            hermes_bin = shutil.which("hermes")
            if hermes_bin:
                os.execvp(hermes_bin, ["hermes", "--resume", selected_id])
@@ -8511,7 +8153,6 @@ Examples:
        help="Run Hermes Agent as an ACP (Agent Client Protocol) server",
        description="Start Hermes Agent in ACP mode for editor integration (VS Code, Zed, JetBrains)",
    )
-    _add_accept_hooks_flag(acp_parser)

    def cmd_acp(args):
        """Launch Hermes Agent as an ACP server."""
@@ -8785,42 +8426,6 @@ Examples:
        cmd_version(args)
        return

-    # Discover Python plugins and register shell hooks once, before any
-    # command that can fire lifecycle hooks.  Both are idempotent; gated
-    # so introspection/management commands (hermes hooks list, cron
-    # list, gateway status, mcp add, ...) don't pay discovery cost or
-    # trigger consent prompts for hooks the user is still inspecting.
-    # Groups with mixed admin/CRUD vs. agent-running entries narrow via
-    # the nested subcommand (dest varies by parser).
-    _AGENT_COMMANDS = {None, "chat", "acp", "rl"}
-    _AGENT_SUBCOMMANDS = {
-        "cron":    ("cron_command",    {"run", "tick"}),
-        "gateway": ("gateway_command", {"run"}),
-        "mcp":     ("mcp_action",      {"serve"}),
-    }
-    _sub_attr, _sub_set = _AGENT_SUBCOMMANDS.get(args.command, (None, None))
-    if (
-        args.command in _AGENT_COMMANDS
-        or (_sub_attr and getattr(args, _sub_attr, None) in _sub_set)
-    ):
-        _accept_hooks = bool(getattr(args, "accept_hooks", False))
-        try:
-            from hermes_cli.plugins import discover_plugins
-            discover_plugins()
-        except Exception:
-            logger.debug(
-                "plugin discovery failed at CLI startup", exc_info=True,
-            )
-        try:
-            from hermes_cli.config import load_config
-            from agent.shell_hooks import register_from_config
-            register_from_config(load_config(), accept_hooks=_accept_hooks)
-        except Exception:
-            logger.debug(
-                "shell-hook registration failed at CLI startup",
-                exc_info=True,
-            )
-
    # Handle top-level --resume / --continue as shortcut to chat
    if (args.resume or args.continue_last) and args.command is None:
        args.command = "chat"
@@ -143,7 +143,7 @@ MODEL_ALIASES: dict[str, ModelIdentity] = {
    # Z.AI / GLM
    "glm":       ModelIdentity("z-ai", "glm"),

-    # Step Plan (StepFun)
+    # StepFun
    "step":      ModelIdentity("stepfun", "step"),

    # Xiaomi
@@ -678,7 +678,6 @@ def switch_model(
        _da = DIRECT_ALIASES.get(resolved_alias)
        if _da is not None and _da.base_url:
            base_url = _da.base_url
-            api_mode = ""  # clear so determine_api_mode re-detects from URL
            if not api_key:
                api_key = "no-key-required"

@@ -782,7 +781,6 @@ def switch_model(

 def list_authenticated_providers(
    current_provider: str = "",
-    current_base_url: str = "",
    user_providers: dict = None,
    custom_providers: list | None = None,
    max_models: int = 8,
@@ -811,10 +809,7 @@ def list_authenticated_providers(
        get_provider_info as _mdev_pinfo,
    )
    from hermes_cli.auth import PROVIDER_REGISTRY
-    from hermes_cli.models import (
-        OPENROUTER_MODELS, _PROVIDER_MODELS,
-        _MODELS_DEV_PREFERRED, _merge_with_models_dev,
-    )
+    from hermes_cli.models import OPENROUTER_MODELS, _PROVIDER_MODELS

    results: List[dict] = []
    seen_slugs: set = set()  # lowercase-normalized to catch case variants (#9545)
@@ -848,10 +843,6 @@ def list_authenticated_providers(
        # source of truth.  models.dev can have wrong mappings (e.g.
        # minimax-cn → MINIMAX_API_KEY instead of MINIMAX_CN_API_KEY).
        pconfig = PROVIDER_REGISTRY.get(hermes_id)
-        # Skip non-API-key auth providers here — they are handled in
-        # section 2 (HERMES_OVERLAYS) with proper auth store checking.
-        if pconfig and pconfig.auth_type != "api_key":
-            continue
        if pconfig and pconfig.api_key_env_vars:
            env_vars = list(pconfig.api_key_env_vars)
        else:
@@ -864,13 +855,8 @@ def list_authenticated_providers(
        if not has_creds:
            continue

-        # Use curated list, falling back to models.dev if no curated list.
-        # For preferred providers, merge models.dev entries into the curated
-        # catalog so newly released models (e.g. mimo-v2.5-pro on opencode-go)
-        # show up in the picker without requiring a Hermes release.
+        # Use curated list, falling back to models.dev if no curated list
        model_ids = curated.get(hermes_id, [])
-        if hermes_id in _MODELS_DEV_PREFERRED:
-            model_ids = _merge_with_models_dev(hermes_id, model_ids)
        total = len(model_ids)
        top = model_ids[:max_models]

@@ -974,9 +960,6 @@ def list_authenticated_providers(

        # Use curated list — look up by Hermes slug, fall back to overlay key
        model_ids = curated.get(hermes_slug, []) or curated.get(pid, [])
-        # Merge with models.dev for preferred providers (same rationale as above).
-        if hermes_slug in _MODELS_DEV_PREFERRED:
-            model_ids = _merge_with_models_dev(hermes_slug, model_ids)
        total = len(model_ids)
        top = model_ids[:max_models]

@@ -1112,7 +1095,6 @@ def list_authenticated_providers(
                "api_url": api_url,
            })
            seen_slugs.add(ep_name.lower())
-            seen_slugs.add(custom_provider_slug(display_name).lower())
            _pair = (
                str(display_name).strip().lower(),
                str(api_url).strip().rstrip("/").lower(),
@@ -1122,113 +1104,66 @@ def list_authenticated_providers(

    # --- 4. Saved custom providers from config ---
    # Each ``custom_providers`` entry represents one model under a named
-    # provider. Entries sharing the same endpoint (``base_url`` + ``api_key``)
-    # are grouped into a single picker row, so e.g. four Ollama entries
-    # pointing at ``http://localhost:11434/v1`` with per-model display names
-    # ("Ollama — GLM 5.1", "Ollama — Qwen3-coder", ...) appear as one
-    # "Ollama" row with four models inside instead of four near-duplicates
-    # that differ only by suffix. Entries with distinct endpoints still
-    # produce separate rows.
-    #
-    # When the grouped endpoint matches ``current_base_url`` the group's
-    # slug becomes ``current_provider`` so that selecting a model from the
-    # picker flows back through the runtime provider that already holds
-    # valid credentials — no re-resolution needed.
+    # provider. Entries sharing the same provider name are grouped into a
+    # single picker row so that e.g. four Ollama Cloud entries
+    # (qwen3-coder, glm-5.1, kimi-k2, minimax-m2.7) appear as one
+    # "Ollama Cloud" row with four models inside instead of four
+    # duplicate "Ollama Cloud" rows. Entries with distinct provider names
+    # still produce separate rows (e.g. Ollama Cloud vs Moonshot).
    if custom_providers and isinstance(custom_providers, list):
        from collections import OrderedDict

-        # Key by (base_url, api_key) instead of slug: names frequently
-        # differ per model ("Ollama — X") while the endpoint stays the
-        # same. Slug-based grouping left them as separate rows.
-        groups: "OrderedDict[tuple, dict]" = OrderedDict()
+        groups: "OrderedDict[str, dict]" = OrderedDict()
        for entry in custom_providers:
            if not isinstance(entry, dict):
                continue

-            raw_name = (entry.get("name") or "").strip()
+            display_name = (entry.get("name") or "").strip()
            api_url = (
                entry.get("base_url", "")
                or entry.get("url", "")
                or entry.get("api", "")
                or ""
-            ).strip().rstrip("/")
-            if not raw_name or not api_url:
+            ).strip()
+            if not display_name or not api_url:
                continue
-            api_key = (entry.get("api_key") or "").strip()

-            group_key = (api_url, api_key)
-            if group_key not in groups:
-                # Strip per-model suffix so "Ollama — GLM 5.1" becomes
-                # "Ollama" for the grouped row. Em dash is the convention
-                # Hermes's own writer uses; a hyphen variant is accepted
-                # for hand-edited configs.
-                display_name = raw_name
-                for sep in ("—", " - "):
-                    if sep in display_name:
-                        display_name = display_name.split(sep)[0].strip()
-                        break
-                if not display_name:
-                    display_name = raw_name
-                # If this endpoint matches the currently active one, use
-                # ``current_provider`` as the slug so picker-driven switches
-                # route through the live credential pipeline.
-                if (
-                    current_base_url
-                    and api_url == current_base_url.strip().rstrip("/")
-                ):
-                    slug = current_provider or custom_provider_slug(display_name)
-                else:
-                    slug = custom_provider_slug(display_name)
-                groups[group_key] = {
-                    "slug": slug,
+            slug = custom_provider_slug(display_name)
+            if slug not in groups:
+                groups[slug] = {
                    "name": display_name,
                    "api_url": api_url,
                    "models": [],
                }
-
            # The singular ``model:`` field only holds the currently
            # active model. Hermes's own writer (main.py::_save_custom_provider)
            # stores every configured model as a dict under ``models:``;
            # downstream readers (agent/models_dev.py, gateway/run.py,
            # run_agent.py, hermes_cli/config.py) already consume that dict.
+            # The /model picker previously ignored it, so multi-model
+            # custom providers appeared to have only the active model.
            default_model = (entry.get("model") or "").strip()
-            if default_model and default_model not in groups[group_key]["models"]:
-                groups[group_key]["models"].append(default_model)
+            if default_model and default_model not in groups[slug]["models"]:
+                groups[slug]["models"].append(default_model)

            cfg_models = entry.get("models", {})
            if isinstance(cfg_models, dict):
                for m in cfg_models:
-                    if m and m not in groups[group_key]["models"]:
-                        groups[group_key]["models"].append(m)
+                    if m and m not in groups[slug]["models"]:
+                        groups[slug]["models"].append(m)
            elif isinstance(cfg_models, list):
                for m in cfg_models:
-                    if m and m not in groups[group_key]["models"]:
-                        groups[group_key]["models"].append(m)
+                    if m and m not in groups[slug]["models"]:
+                        groups[slug]["models"].append(m)

-        _section4_emitted_slugs: set = set()
-        for grp in groups.values():
-            slug = grp["slug"]
-            # If the slug is already claimed by a built-in / overlay /
-            # user-provider row (sections 1-3), skip this custom group
-            # to avoid shadowing a real provider.
-            if slug.lower() in seen_slugs and slug.lower() not in _section4_emitted_slugs:
+        for slug, grp in groups.items():
+            if slug.lower() in seen_slugs:
                continue
-            # If a prior section-4 group already used this slug (two custom
-            # endpoints with the same cleaned name — e.g. two OpenAI-
-            # compatible gateways named identically with different keys),
-            # append a counter so both rows stay visible in the picker.
-            if slug.lower() in _section4_emitted_slugs:
-                base_slug = slug
-                n = 2
-                while f"{base_slug}-{n}".lower() in seen_slugs:
-                    n += 1
-                slug = f"{base_slug}-{n}"
-                grp["slug"] = slug
            # Skip if section 3 already emitted this endpoint under its
-            # ``providers:`` dict key — matches on (display_name, base_url).
-            # Prevents two picker rows labelled identically when callers
-            # pass both ``user_providers`` and a compatibility-merged
-            # ``custom_providers`` list.
+            # ``providers:`` dict key — matches on (display_name, base_url),
+            # the tuple section 4 groups by.  Prevents two picker rows
+            # labelled identically when callers pass both ``user_providers``
+            # and a compatibility-merged ``custom_providers`` list.
            _pair_key = (
                str(grp["name"]).strip().lower(),
                str(grp["api_url"]).strip().rstrip("/").lower(),
@@ -1246,7 +1181,6 @@ def list_authenticated_providers(
                "api_url": grp["api_url"],
            })
            seen_slugs.add(slug.lower())
-            _section4_emitted_slugs.add(slug.lower())

    # Sort: current provider first, then by model count descending
    results.sort(key=lambda r: (not r["is_current"], -r["total_models"]))
@@ -16,12 +16,6 @@ from difflib import get_close_matches
 from pathlib import Path
 from typing import Any, NamedTuple, Optional

-from hermes_cli import __version__ as _HERMES_VERSION
-
-# Identify ourselves so endpoints fronted by Cloudflare's Browser Integrity
-# Check (error 1010) don't reject the default ``Python-urllib/*`` signature.
-_HERMES_USER_AGENT = f"hermes-cli/{_HERMES_VERSION}"
-
 COPILOT_BASE_URL = "https://api.githubcopilot.com"
 COPILOT_MODELS_URL = f"{COPILOT_BASE_URL}/models"
 COPILOT_EDITOR_VERSION = "vscode/1.104.1"
@@ -32,7 +26,7 @@ COPILOT_REASONING_EFFORTS_O_SERIES = ["low", "medium", "high"]
 # Fallback OpenRouter snapshot used when the live catalog is unavailable.
 # (model_id, display description shown in menus)
 OPENROUTER_MODELS: list[tuple[str, str]] = [
-    ("moonshotai/kimi-k2.6",            "recommended"),
+    ("moonshotai/kimi-k2.5",            "recommended"),
    ("anthropic/claude-opus-4.7",       ""),
    ("anthropic/claude-opus-4.6",       ""),
    ("anthropic/claude-sonnet-4.6",     ""),
@@ -42,8 +36,7 @@ OPENROUTER_MODELS: list[tuple[str, str]] = [
    ("openrouter/elephant-alpha",       "free"),
    ("openai/gpt-5.4",                  ""),
    ("openai/gpt-5.4-mini",             ""),
-    ("xiaomi/mimo-v2.5-pro",             ""),
-    ("xiaomi/mimo-v2.5",                 ""),
+    ("xiaomi/mimo-v2-pro",               ""),
    ("openai/gpt-5.3-codex",            ""),
    ("google/gemini-3-pro-image-preview", ""),
    ("google/gemini-3-flash-preview",   ""),
@@ -54,7 +47,6 @@ OPENROUTER_MODELS: list[tuple[str, str]] = [
    ("stepfun/step-3.5-flash",          ""),
    ("minimax/minimax-m2.7",            ""),
    ("minimax/minimax-m2.5",            ""),
-    ("minimax/minimax-m2.5:free",       "free"),
    ("z-ai/glm-5.1",                    ""),
    ("z-ai/glm-5v-turbo",               ""),
    ("z-ai/glm-5-turbo",                ""),
@@ -70,31 +62,6 @@ OPENROUTER_MODELS: list[tuple[str, str]] = [
 _openrouter_catalog_cache: list[tuple[str, str]] | None = None


-# Fallback Vercel AI Gateway snapshot used when the live catalog is unavailable.
-# OSS / open-weight models prioritized first, then closed-source by family.
-# Slugs match Vercel's actual /v1/models catalog (e.g. alibaba/ for Qwen,
-# zai/ and xai/ without hyphens).
-VERCEL_AI_GATEWAY_MODELS: list[tuple[str, str]] = [
-    ("moonshotai/kimi-k2.6",                 "recommended"),
-    ("alibaba/qwen3.6-plus",                 ""),
-    ("zai/glm-5.1",                          ""),
-    ("minimax/minimax-m2.7",                 ""),
-    ("anthropic/claude-sonnet-4.6",          ""),
-    ("anthropic/claude-opus-4.7",            ""),
-    ("anthropic/claude-opus-4.6",            ""),
-    ("anthropic/claude-haiku-4.5",           ""),
-    ("openai/gpt-5.4",                       ""),
-    ("openai/gpt-5.4-mini",                  ""),
-    ("openai/gpt-5.3-codex",                 ""),
-    ("google/gemini-3.1-pro-preview",        ""),
-    ("google/gemini-3-flash",                ""),
-    ("google/gemini-3.1-flash-lite-preview", ""),
-    ("xai/grok-4.20-reasoning",              ""),
-]
-
-_ai_gateway_catalog_cache: list[tuple[str, str]] | None = None
-
-
 def _codex_curated_models() -> list[str]:
    """Derive the openai-codex curated list from codex_models.py.

@@ -108,9 +75,8 @@ def _codex_curated_models() -> list[str]:

 _PROVIDER_MODELS: dict[str, list[str]] = {
    "nous": [
-        "moonshotai/kimi-k2.6",
-        "xiaomi/mimo-v2.5-pro",
-        "xiaomi/mimo-v2.5",
+        "moonshotai/kimi-k2.5",
+        "xiaomi/mimo-v2-pro",
        "anthropic/claude-opus-4.7",
        "anthropic/claude-opus-4.6",
        "anthropic/claude-sonnet-4.6",
@@ -128,15 +94,17 @@ _PROVIDER_MODELS: dict[str, list[str]] = {
        "stepfun/step-3.5-flash",
        "minimax/minimax-m2.7",
        "minimax/minimax-m2.5",
-        "minimax/minimax-m2.5:free",
        "z-ai/glm-5.1",
        "z-ai/glm-5v-turbo",
        "z-ai/glm-5-turbo",
        "x-ai/grok-4.20-beta",
        "nvidia/nemotron-3-super-120b-a12b",
+        "nvidia/nemotron-3-super-120b-a12b:free",
+        "arcee-ai/trinity-large-preview:free",
        "arcee-ai/trinity-large-thinking",
        "openai/gpt-5.4-pro",
        "openai/gpt-5.4-nano",
+        "openrouter/elephant-alpha",
    ],
    "openai-codex": _codex_curated_models(),
    "copilot-acp": [
@@ -191,13 +159,12 @@ _PROVIDER_MODELS: dict[str, list[str]] = {
        # (map to OpenRouter defaults — users get familiar picks on NIM)
        "qwen/qwen3.5-397b-a17b",
        "deepseek-ai/deepseek-v3.2",
-        "moonshotai/kimi-k2.6",
+        "moonshotai/kimi-k2.5",
        "minimaxai/minimax-m2.5",
        "z-ai/glm5",
        "openai/gpt-oss-120b",
    ],
    "kimi-coding": [
-        "kimi-k2.6",
        "kimi-k2.5",
        "kimi-for-coding",
        "kimi-k2-thinking",
@@ -206,18 +173,12 @@ _PROVIDER_MODELS: dict[str, list[str]] = {
        "kimi-k2-0905-preview",
    ],
    "kimi-coding-cn": [
-        "kimi-k2.6",
        "kimi-k2.5",
        "kimi-k2-thinking",
        "kimi-k2-turbo-preview",
        "kimi-k2-0905-preview",
    ],
-    "stepfun": [
-        "step-3.5-flash",
-        "step-3.5-flash-2603",
-    ],
    "moonshot": [
-        "kimi-k2.6",
        "kimi-k2.5",
        "kimi-k2-thinking",
        "kimi-k2-turbo-preview",
@@ -264,6 +225,7 @@ _PROVIDER_MODELS: dict[str, list[str]] = {
        "gpt-5.4-pro",
        "gpt-5.4",
        "gpt-5.3-codex",
+        "gpt-5.3-codex-spark",
        "gpt-5.2",
        "gpt-5.2-codex",
        "gpt-5.1",
@@ -297,7 +259,6 @@ _PROVIDER_MODELS: dict[str, list[str]] = {
        "big-pickle",
    ],
    "opencode-go": [
-        "kimi-k2.6",
        "kimi-k2.5",
        "glm-5.1",
        "glm-5",
@@ -305,8 +266,20 @@ _PROVIDER_MODELS: dict[str, list[str]] = {
        "mimo-v2-omni",
        "minimax-m2.7",
        "minimax-m2.5",
-        "qwen3.6-plus",
-        "qwen3.5-plus",
+    ],
+    "ai-gateway": [
+        "anthropic/claude-opus-4.6",
+        "anthropic/claude-sonnet-4.6",
+        "anthropic/claude-sonnet-4.5",
+        "anthropic/claude-haiku-4.5",
+        "openai/gpt-5",
+        "openai/gpt-4.1",
+        "openai/gpt-4.1-mini",
+        "google/gemini-3-pro-preview",
+        "google/gemini-3-flash",
+        "google/gemini-2.5-pro",
+        "google/gemini-2.5-flash",
+        "deepseek/deepseek-v3.2",
    ],
    "kilocode": [
        "anthropic/claude-opus-4.6",
@@ -340,7 +313,6 @@ _PROVIDER_MODELS: dict[str, list[str]] = {
        "zai-org/GLM-5",
        "XiaomiMiMo/MiMo-V2-Flash",
        "moonshotai/Kimi-K2-Thinking",
-        "moonshotai/Kimi-K2.6",
    ],
    # AWS Bedrock — static fallback list used when dynamic discovery is
    # unavailable (no boto3, no credentials, or API error).  The agent
@@ -360,18 +332,18 @@ _PROVIDER_MODELS: dict[str, list[str]] = {
    ],
 }

-# Vercel AI Gateway: derive the bare-model-id catalog from the curated
-# ``VERCEL_AI_GATEWAY_MODELS`` snapshot so both the picker (tuples with descriptions)
-# and the static fallback catalog (bare ids) stay in sync from a single
-# source of truth.
-_PROVIDER_MODELS["ai-gateway"] = [mid for mid, _ in VERCEL_AI_GATEWAY_MODELS]
-
 # ---------------------------------------------------------------------------
-# Nous Portal free-model helper
+# Nous Portal free-model filtering
 # ---------------------------------------------------------------------------
-# The Nous Portal models endpoint is the source of truth for which models
-# are currently offered (free or paid). We trust whatever it returns and
-# surface it to users as-is — no local allowlist filtering.
+# Models that are ALLOWED to appear when priced as free on Nous Portal.
+# Any other free model is hidden — prevents promotional/temporary free models
+# from cluttering the selection when users are paying subscribers.
+# Models in this list are ALSO filtered out if they are NOT free (i.e. they
+# should only appear in the menu when they are genuinely free).
+_NOUS_ALLOWED_FREE_MODELS: frozenset[str] = frozenset({
+    "xiaomi/mimo-v2-pro",
+    "xiaomi/mimo-v2-omni",
+})


 def _is_model_free(model_id: str, pricing: dict[str, dict[str, str]]) -> bool:
@@ -385,6 +357,35 @@ def _is_model_free(model_id: str, pricing: dict[str, dict[str, str]]) -> bool:
        return False


+def filter_nous_free_models(
+    model_ids: list[str],
+    pricing: dict[str, dict[str, str]],
+) -> list[str]:
+    """Filter the Nous Portal model list according to free-model policy.
+
+    Rules:
+      • Paid models that are NOT in the allowlist → keep (normal case).
+      • Free models that are NOT in the allowlist → drop.
+      • Allowlist models that ARE free → keep.
+      • Allowlist models that are NOT free → drop.
+    """
+    if not pricing:
+        return model_ids  # no pricing data — can't filter, show everything
+
+    result: list[str] = []
+    for mid in model_ids:
+        free = _is_model_free(mid, pricing)
+        if mid in _NOUS_ALLOWED_FREE_MODELS:
+            # Allowlist model: only show when it's actually free
+            if free:
+                result.append(mid)
+        else:
+            # Regular model: keep only when it's NOT free
+            if not free:
+                result.append(mid)
+    return result
+
+
 # ---------------------------------------------------------------------------
 # Nous Portal account tier detection
 # ---------------------------------------------------------------------------
@@ -448,7 +449,8 @@ def partition_nous_models_by_tier(
 ) -> tuple[list[str], list[str]]:
    """Split Nous models into (selectable, unavailable) based on user tier.

-    For paid-tier users: all models are selectable, none unavailable.
+    For paid-tier users: all models are selectable, none unavailable
+    (free-model filtering is handled separately by ``filter_nous_free_models``).

    For free-tier users: only free models are selectable; paid models
    are returned as unavailable (shown grayed out in the menu).
@@ -487,6 +489,8 @@ def check_nous_free_tier() -> bool:
    Returns False (assume paid) on any error — never blocks paying users.
    """
    global _free_tier_cache
+    import time
+
    now = time.monotonic()
    if _free_tier_cache is not None:
        cached_result, cached_at = _free_tier_cache
@@ -518,157 +522,6 @@ def check_nous_free_tier() -> bool:
        return False  # default to paid on error — don't block users


-# ---------------------------------------------------------------------------
-# Nous Portal recommended models
-#
-# The Portal publishes a curated list of suggested models (separated into
-# paid and free tiers) plus dedicated recommendations for compaction (text
-# summarisation / auxiliary) and vision tasks. We fetch it once per process
-# with a TTL cache so callers can ask "what's the best aux model right now?"
-# without hitting the network on every lookup.
-#
-# Shape of the response (fields we care about):
-#   {
-#     "paidRecommendedModels":     [ {modelName, ...}, ... ],
-#     "freeRecommendedModels":     [ {modelName, ...}, ... ],
-#     "paidRecommendedCompactionModel":  {modelName, ...} | null,
-#     "paidRecommendedVisionModel":      {modelName, ...} | null,
-#     "freeRecommendedCompactionModel":  {modelName, ...} | null,
-#     "freeRecommendedVisionModel":      {modelName, ...} | null,
-#   }
-# ---------------------------------------------------------------------------
-
-NOUS_RECOMMENDED_MODELS_PATH = "/api/nous/recommended-models"
-_NOUS_RECOMMENDED_CACHE_TTL: int = 600  # seconds (10 minutes)
-# (result_dict, timestamp) keyed by portal_base_url so staging vs prod don't collide.
-_nous_recommended_cache: dict[str, tuple[dict[str, Any], float]] = {}
-
-
-def fetch_nous_recommended_models(
-    portal_base_url: str = "",
-    timeout: float = 5.0,
-    *,
-    force_refresh: bool = False,
-) -> dict[str, Any]:
-    """Fetch the Nous Portal's curated recommended-models payload.
-
-    Hits ``<portal>/api/nous/recommended-models``. The endpoint is public —
-    no auth is required. Results are cached per portal URL for
-    ``_NOUS_RECOMMENDED_CACHE_TTL`` seconds; pass ``force_refresh=True`` to
-    bypass the cache.
-
-    Returns the parsed JSON dict on success, or ``{}`` on any failure
-    (network, parse, non-2xx). Callers must treat missing/null fields as
-    "no recommendation" and fall back to their own default.
-    """
-    base = (portal_base_url or "https://portal.nousresearch.com").rstrip("/")
-    now = time.monotonic()
-    cached = _nous_recommended_cache.get(base)
-    if not force_refresh and cached is not None:
-        payload, cached_at = cached
-        if now - cached_at < _NOUS_RECOMMENDED_CACHE_TTL:
-            return payload
-
-    url = f"{base}{NOUS_RECOMMENDED_MODELS_PATH}"
-    try:
-        req = urllib.request.Request(
-            url,
-            headers={"Accept": "application/json"},
-        )
-        with urllib.request.urlopen(req, timeout=timeout) as resp:
-            data = json.loads(resp.read().decode())
-        if not isinstance(data, dict):
-            data = {}
-    except Exception:
-        data = {}
-
-    _nous_recommended_cache[base] = (data, now)
-    return data
-
-
-def _resolve_nous_portal_url() -> str:
-    """Best-effort lookup of the Portal base URL the user is authed against."""
-    try:
-        from hermes_cli.auth import (
-            DEFAULT_NOUS_PORTAL_URL,
-            get_provider_auth_state,
-        )
-        state = get_provider_auth_state("nous") or {}
-        portal = str(state.get("portal_base_url") or "").strip()
-        if portal:
-            return portal.rstrip("/")
-        return str(DEFAULT_NOUS_PORTAL_URL).rstrip("/")
-    except Exception:
-        return "https://portal.nousresearch.com"
-
-
-def _extract_model_name(entry: Any) -> Optional[str]:
-    """Pull the ``modelName`` field from a recommended-model entry, else None."""
-    if not isinstance(entry, dict):
-        return None
-    model_name = entry.get("modelName")
-    if isinstance(model_name, str) and model_name.strip():
-        return model_name.strip()
-    return None
-
-
-def get_nous_recommended_aux_model(
-    *,
-    vision: bool = False,
-    free_tier: Optional[bool] = None,
-    portal_base_url: str = "",
-    force_refresh: bool = False,
-) -> Optional[str]:
-    """Return the Portal's recommended model name for an auxiliary task.
-
-    Picks the best field from the Portal's recommended-models payload:
-
-    * ``vision=True``  → ``paidRecommendedVisionModel``  (paid tier) or
-                         ``freeRecommendedVisionModel``  (free tier)
-    * ``vision=False`` → ``paidRecommendedCompactionModel`` or
-                         ``freeRecommendedCompactionModel``
-
-    When ``free_tier`` is ``None`` (default) the user's tier is auto-detected
-    via :func:`check_nous_free_tier`. Pass an explicit bool to bypass the
-    detection — useful for tests or when the caller already knows the tier.
-
-    For paid-tier users we prefer the paid recommendation but gracefully fall
-    back to the free recommendation if the Portal returned ``null`` for the
-    paid field (common during the staged rollout of new paid models).
-
-    Returns ``None`` when every candidate is missing, null, or the fetch
-    fails — callers should fall back to their own default (currently
-    ``google/gemini-3-flash-preview``).
-    """
-    base = portal_base_url or _resolve_nous_portal_url()
-    payload = fetch_nous_recommended_models(base, force_refresh=force_refresh)
-    if not payload:
-        return None
-
-    if free_tier is None:
-        try:
-            free_tier = check_nous_free_tier()
-        except Exception:
-            # On any detection error, assume paid — paid users see both fields
-            # anyway so this is a safe default that maximises model quality.
-            free_tier = False
-
-    if vision:
-        paid_key, free_key = "paidRecommendedVisionModel", "freeRecommendedVisionModel"
-    else:
-        paid_key, free_key = "paidRecommendedCompactionModel", "freeRecommendedCompactionModel"
-
-    # Preference order:
-    #   free tier  → free only
-    #   paid tier  → paid, then free (if paid field is null)
-    candidates = [free_key] if free_tier else [paid_key, free_key]
-    for key in candidates:
-        name = _extract_model_name(payload.get(key))
-        if name:
-            return name
-    return None
-
-
 # ---------------------------------------------------------------------------
 # Canonical provider list — single source of truth for provider identity.
 # Every code path that lists, displays, or iterates providers derives from
@@ -689,7 +542,6 @@ class ProviderEntry(NamedTuple):
 CANONICAL_PROVIDERS: list[ProviderEntry] = [
    ProviderEntry("nous",           "Nous Portal",              "Nous Portal (Nous Research subscription)"),
    ProviderEntry("openrouter",     "OpenRouter",               "OpenRouter (100+ models, pay-per-use)"),
-    ProviderEntry("ai-gateway",     "Vercel AI Gateway",        "Vercel AI Gateway (200+ models, $5 free credit, no markup)"),
    ProviderEntry("anthropic",      "Anthropic",                "Anthropic (Claude models — API key or Claude Code)"),
    ProviderEntry("openai-codex",   "OpenAI Codex",             "OpenAI Codex"),
    ProviderEntry("xiaomi",         "Xiaomi MiMo",              "Xiaomi MiMo (MiMo-V2 models — pro, omni, flash)"),
@@ -705,7 +557,6 @@ CANONICAL_PROVIDERS: list[ProviderEntry] = [
    ProviderEntry("zai",            "Z.AI / GLM",               "Z.AI / GLM (Zhipu AI direct API)"),
    ProviderEntry("kimi-coding",    "Kimi / Kimi Coding Plan",  "Kimi Coding Plan (api.kimi.com) & Moonshot API"),
    ProviderEntry("kimi-coding-cn", "Kimi / Moonshot (China)",  "Kimi / Moonshot China (Moonshot CN direct API)"),
-    ProviderEntry("stepfun",        "StepFun Step Plan",       "StepFun Step Plan (agent/coding models via Step Plan API)"),
    ProviderEntry("minimax",        "MiniMax",                  "MiniMax (global direct API)"),
    ProviderEntry("minimax-cn",     "MiniMax (China)",          "MiniMax China (domestic direct API)"),
    ProviderEntry("alibaba",        "Alibaba Cloud (DashScope)","Alibaba Cloud / DashScope Coding (Qwen + multi-provider)"),
@@ -714,6 +565,7 @@ CANONICAL_PROVIDERS: list[ProviderEntry] = [
    ProviderEntry("kilocode",       "Kilo Code",                "Kilo Code (Kilo Gateway API)"),
    ProviderEntry("opencode-zen",   "OpenCode Zen",             "OpenCode Zen (35+ curated models, pay-as-you-go)"),
    ProviderEntry("opencode-go",    "OpenCode Go",              "OpenCode Go (open models, $10/month subscription)"),
+    ProviderEntry("ai-gateway",     "Vercel AI Gateway",        "Vercel AI Gateway (200+ models, pay-per-use)"),
    ProviderEntry("bedrock",        "AWS Bedrock",              "AWS Bedrock (Claude, Nova, Llama, DeepSeek — IAM or API key)"),
 ]

@@ -740,8 +592,6 @@ _PROVIDER_ALIASES = {
    "moonshot": "kimi-coding",
    "kimi-cn": "kimi-coding-cn",
    "moonshot-cn": "kimi-coding-cn",
-    "step": "stepfun",
-    "stepfun-coding-plan": "stepfun",
    "arcee-ai": "arcee",
    "arceeai": "arcee",
    "minimax-china": "minimax-cn",
@@ -811,31 +661,6 @@ def _openrouter_model_is_free(pricing: Any) -> bool:
        return False


-def _openrouter_model_supports_tools(item: Any) -> bool:
-    """Return True when the model's ``supported_parameters`` advertise tool calling.
-
-    hermes-agent is tool-calling-first — every provider path assumes the model
-    can invoke tools. Models that don't advertise ``tools`` in their
-    ``supported_parameters`` (e.g. image-only or completion-only models) cannot
-    be driven by the agent loop and would fail at the first tool call.
-
-    **Permissive when the field is missing.** Some OpenRouter-compatible gateways
-    (Nous Portal, private mirrors, older catalog snapshots) don't populate
-    ``supported_parameters`` at all. Treat that as "unknown capability → allow"
-    so the picker doesn't silently empty for those users. Only hide models
-    whose ``supported_parameters`` is an explicit list that omits ``tools``.
-
-    Ported from Kilo-Org/kilocode#9068.
-    """
-    if not isinstance(item, dict):
-        return True
-    params = item.get("supported_parameters")
-    if not isinstance(params, list):
-        # Field absent / malformed / None — be permissive.
-        return True
-    return "tools" in params
-
-
 def fetch_openrouter_models(
    timeout: float = 8.0,
    *,
@@ -878,11 +703,6 @@ def fetch_openrouter_models(
        live_item = live_by_id.get(preferred_id)
        if live_item is None:
            continue
-        # Hide models that don't advertise tool-calling support — hermes-agent
-        # requires it and surfacing them leads to immediate runtime failures
-        # when the user selects them. Ported from Kilo-Org/kilocode#9068.
-        if not _openrouter_model_supports_tools(live_item):
-            continue
        desc = "free" if _openrouter_model_is_free(live_item.get("pricing")) else ""
        curated.append((preferred_id, desc))

@@ -900,93 +720,6 @@ def model_ids(*, force_refresh: bool = False) -> list[str]:
    return [mid for mid, _ in fetch_openrouter_models(force_refresh=force_refresh)]


-def _ai_gateway_model_is_free(pricing: Any) -> bool:
-    """Return True if an AI Gateway model has $0 input AND output pricing."""
-    if not isinstance(pricing, dict):
-        return False
-    try:
-        return float(pricing.get("input", "0")) == 0 and float(pricing.get("output", "0")) == 0
-    except (TypeError, ValueError):
-        return False
-
-
-def fetch_ai_gateway_models(
-    timeout: float = 8.0,
-    *,
-    force_refresh: bool = False,
-) -> list[tuple[str, str]]:
-    """Return the curated AI Gateway picker list, refreshed from the live catalog when possible."""
-    global _ai_gateway_catalog_cache
-
-    if _ai_gateway_catalog_cache is not None and not force_refresh:
-        return list(_ai_gateway_catalog_cache)
-
-    from hermes_constants import AI_GATEWAY_BASE_URL
-
-    fallback = list(VERCEL_AI_GATEWAY_MODELS)
-    preferred_ids = [mid for mid, _ in fallback]
-
-    try:
-        req = urllib.request.Request(
-            f"{AI_GATEWAY_BASE_URL.rstrip('/')}/models",
-            headers={"Accept": "application/json"},
-        )
-        with urllib.request.urlopen(req, timeout=timeout) as resp:
-            payload = json.loads(resp.read().decode())
-    except Exception:
-        return list(_ai_gateway_catalog_cache or fallback)
-
-    live_items = payload.get("data", [])
-    if not isinstance(live_items, list):
-        return list(_ai_gateway_catalog_cache or fallback)
-
-    live_by_id: dict[str, dict[str, Any]] = {}
-    for item in live_items:
-        if not isinstance(item, dict):
-            continue
-        mid = str(item.get("id") or "").strip()
-        if not mid:
-            continue
-        live_by_id[mid] = item
-
-    curated: list[tuple[str, str]] = []
-    for preferred_id in preferred_ids:
-        live_item = live_by_id.get(preferred_id)
-        if live_item is None:
-            continue
-        desc = "free" if _ai_gateway_model_is_free(live_item.get("pricing")) else ""
-        curated.append((preferred_id, desc))
-
-    if not curated:
-        return list(_ai_gateway_catalog_cache or fallback)
-
-    # If the live catalog offers a free Moonshot model, auto-promote it to
-    # position #1 as "recommended" — dynamic discovery without a PR.
-    free_moonshot = next(
-        (
-            mid
-            for mid, item in live_by_id.items()
-            if mid.startswith("moonshotai/")
-            and _ai_gateway_model_is_free(item.get("pricing"))
-        ),
-        None,
-    )
-    if free_moonshot:
-        curated = [(mid, desc) for mid, desc in curated if mid != free_moonshot]
-        curated.insert(0, (free_moonshot, "recommended"))
-    else:
-        first_id, _ = curated[0]
-        curated[0] = (first_id, "recommended")
-
-    _ai_gateway_catalog_cache = curated
-    return list(curated)
-
-
-def ai_gateway_model_ids(*, force_refresh: bool = False) -> list[str]:
-    """Return just the AI Gateway model-id strings."""
-    return [mid for mid, _ in fetch_ai_gateway_models(force_refresh=force_refresh)]
-
-


 # ---------------------------------------------------------------------------
@@ -1131,56 +864,6 @@ def fetch_models_with_pricing(
    return result


-def fetch_ai_gateway_pricing(
-    timeout: float = 8.0,
-    *,
-    force_refresh: bool = False,
-) -> dict[str, dict[str, str]]:
-    """Fetch Vercel AI Gateway /v1/models and return hermes-shaped pricing.
-
-    Vercel uses ``input`` / ``output`` field names; hermes's picker expects
-    ``prompt`` / ``completion``. This translates. Cache read/write field names
-    already match.
-    """
-    from hermes_constants import AI_GATEWAY_BASE_URL
-
-    cache_key = AI_GATEWAY_BASE_URL.rstrip("/")
-    if not force_refresh and cache_key in _pricing_cache:
-        return _pricing_cache[cache_key]
-
-    try:
-        req = urllib.request.Request(
-            f"{cache_key}/models",
-            headers={"Accept": "application/json"},
-        )
-        with urllib.request.urlopen(req, timeout=timeout) as resp:
-            payload = json.loads(resp.read().decode())
-    except Exception:
-        _pricing_cache[cache_key] = {}
-        return {}
-
-    result: dict[str, dict[str, str]] = {}
-    for item in payload.get("data", []):
-        if not isinstance(item, dict):
-            continue
-        mid = item.get("id")
-        pricing = item.get("pricing")
-        if not (mid and isinstance(pricing, dict)):
-            continue
-        entry: dict[str, str] = {
-            "prompt": str(pricing.get("input", "")),
-            "completion": str(pricing.get("output", "")),
-        }
-        if pricing.get("input_cache_read"):
-            entry["input_cache_read"] = str(pricing["input_cache_read"])
-        if pricing.get("input_cache_write"):
-            entry["input_cache_write"] = str(pricing["input_cache_write"])
-        result[mid] = entry
-
-    _pricing_cache[cache_key] = result
-    return result
-
-
 def _resolve_openrouter_api_key() -> str:
    """Best-effort OpenRouter API key for pricing fetch."""
    return os.getenv("OPENROUTER_API_KEY", "").strip()
@@ -1199,7 +882,7 @@ def _resolve_nous_pricing_credentials() -> tuple[str, str]:


 def get_pricing_for_provider(provider: str, *, force_refresh: bool = False) -> dict[str, dict[str, str]]:
-    """Return live pricing for providers that support it (openrouter, nous, ai-gateway)."""
+    """Return live pricing for providers that support it (openrouter, nous)."""
    normalized = normalize_provider(provider)
    if normalized == "openrouter":
        return fetch_models_with_pricing(
@@ -1207,8 +890,6 @@ def get_pricing_for_provider(provider: str, *, force_refresh: bool = False) -> d
            base_url="https://openrouter.ai/api",
            force_refresh=force_refresh,
        )
-    if normalized == "ai-gateway":
-        return fetch_ai_gateway_pricing(force_refresh=force_refresh)
    if normalized == "nous":
        api_key, base_url = _resolve_nous_pricing_credentials()
        if base_url:
@@ -1413,6 +1094,7 @@ def detect_provider_for_model(
            from hermes_cli.auth import PROVIDER_REGISTRY
            pconfig = PROVIDER_REGISTRY.get(direct_match)
            if pconfig:
+                import os
                for env_var in pconfig.api_key_env_vars:
                    if os.getenv(env_var, "").strip():
                        has_creds = True
@@ -1589,84 +1271,11 @@ def _resolve_copilot_catalog_api_key() -> str:
        return ""


-# Providers where models.dev is treated as authoritative: curated static
-# lists are kept only as an offline fallback and to capture custom additions
-# the registry doesn't publish yet. Adding a provider here causes its
-# curated list to be merged with fresh models.dev entries (fresh first, any
-# curated-only names appended) for both the CLI and the gateway /model picker.
-#
-# DELIBERATELY EXCLUDED:
-#   - "openrouter": curated list is already a hand-picked agentic subset of
-#     OpenRouter's 400+ catalog. Blindly merging would dump everything.
-#   - "nous": curated list and Portal /models endpoint are the source of
-#     truth for the subscription tier.
-# Also excluded: providers that already have dedicated live-endpoint
-# branches below (copilot, anthropic, ai-gateway, ollama-cloud, custom,
-# stepfun, openai-codex) — those paths handle freshness themselves.
-_MODELS_DEV_PREFERRED: frozenset[str] = frozenset({
-    "opencode-go",
-    "opencode-zen",
-    "deepseek",
-    "kilocode",
-    "fireworks",
-    "mistral",
-    "togetherai",
-    "cohere",
-    "perplexity",
-    "groq",
-    "nvidia",
-    "huggingface",
-    "zai",
-    "gemini",
-    "google",
-})
-
-
-def _merge_with_models_dev(provider: str, curated: list[str]) -> list[str]:
-    """Merge curated list with fresh models.dev entries for a preferred provider.
-
-    Returns models.dev entries first (in models.dev order), then any
-    curated-only entries appended. Preserves case for curated fallbacks
-    (e.g. ``MiniMax-M2.7``) while trusting models.dev for newer variants.
-
-    If models.dev is unreachable or returns nothing, the curated list is
-    returned unchanged — this is the offline/CI fallback path.
-    """
-    try:
-        from agent.models_dev import list_agentic_models
-        mdev = list_agentic_models(provider)
-    except Exception:
-        mdev = []
-
-    if not mdev:
-        return list(curated)
-
-    # Case-insensitive dedup while preserving order and curated casing.
-    seen_lower: set[str] = set()
-    merged: list[str] = []
-    for mid in mdev:
-        key = str(mid).lower()
-        if key in seen_lower:
-            continue
-        seen_lower.add(key)
-        merged.append(mid)
-    for mid in curated:
-        key = str(mid).lower()
-        if key in seen_lower:
-            continue
-        seen_lower.add(key)
-        merged.append(mid)
-    return merged
-
-
 def provider_model_ids(provider: Optional[str], *, force_refresh: bool = False) -> list[str]:
    """Return the best known model catalog for a provider.

    Tries live API endpoints for providers that support them (Codex, Nous),
-    falling back to static lists. For providers in ``_MODELS_DEV_PREFERRED``
-    (opencode-go/zen, xiaomi, deepseek, smaller inference providers, etc.),
-    models.dev entries are merged on top of curated so new models released
-    on the platform appear in ``/model`` without a Hermes release.
+    falling back to static lists.
    """
    normalized = normalize_provider(provider)
    if normalized == "openrouter":
@@ -1695,19 +1304,6 @@ def provider_model_ids(provider: Optional[str], *, force_refresh: bool = False)
                    return live
        except Exception:
            pass
-    if normalized == "stepfun":
-        try:
-            from hermes_cli.auth import resolve_api_key_provider_credentials
-
-            creds = resolve_api_key_provider_credentials("stepfun")
-            api_key = str(creds.get("api_key") or "").strip()
-            base_url = str(creds.get("base_url") or "").strip()
-            if api_key and base_url:
-                live = fetch_api_models(api_key, base_url)
-                if live:
-                    return live
-        except Exception:
-            pass
    if normalized == "anthropic":
        live = _fetch_anthropic_models()
        if live:
@@ -1732,10 +1328,7 @@ def provider_model_ids(provider: Optional[str], *, force_refresh: bool = False)
            live = fetch_api_models(api_key, base_url)
            if live:
                return live
-    curated_static = list(_PROVIDER_MODELS.get(normalized, []))
-    if normalized in _MODELS_DEV_PREFERRED:
-        return _merge_with_models_dev(normalized, curated_static)
-    return curated_static
+    return list(_PROVIDER_MODELS.get(normalized, []))


 def _fetch_anthropic_models(timeout: float = 5.0) -> Optional[list[str]]:
@@ -2176,7 +1769,7 @@ def probe_api_models(
        candidates.append((alternate_base, True))

    tried: list[str] = []
-    headers: dict[str, str] = {"User-Agent": _HERMES_USER_AGENT}
+    headers: dict[str, str] = {}
    if api_key:
        headers["Authorization"] = f"Bearer {api_key}"
    if normalized.startswith(COPILOT_BASE_URL):
@@ -2638,70 +2231,13 @@ def validate_requested_model(
        except Exception:
            pass  # Fall through to generic warning

-    # Static-catalog fallback: when the /models probe was unreachable,
-    # validate against the curated list from provider_model_ids() — same
-    # pattern as the openai-codex and minimax branches above.  This fixes
-    # /model switches in the gateway for providers like opencode-go and
-    # opencode-zen whose /models endpoint returns 404 against the HTML
-    # marketing site.  Without this block, validate_requested_model would
-    # reject every model on such providers, switch_model() would return
-    # success=False, and the gateway would never write to
-    # _session_model_overrides.
    provider_label = _PROVIDER_LABELS.get(normalized, normalized)
-    try:
-        catalog_models = provider_model_ids(normalized)
-    except Exception:
-        catalog_models = []
-
-    if catalog_models:
-        catalog_lower = {m.lower(): m for m in catalog_models}
-        if requested_for_lookup.lower() in catalog_lower:
-            return {
-                "accepted": True,
-                "persist": True,
-                "recognized": True,
-                "message": None,
-            }
-        catalog_lower_list = list(catalog_lower.keys())
-        auto = get_close_matches(
-            requested_for_lookup.lower(), catalog_lower_list, n=1, cutoff=0.9
-        )
-        if auto:
-            corrected = catalog_lower[auto[0]]
-            return {
-                "accepted": True,
-                "persist": True,
-                "recognized": True,
-                "corrected_model": corrected,
-                "message": f"Auto-corrected `{requested}` → `{corrected}`",
-            }
-        suggestions = get_close_matches(
-            requested_for_lookup.lower(), catalog_lower_list, n=3, cutoff=0.5
-        )
-        suggestion_text = ""
-        if suggestions:
-            suggestion_text = "\n  Similar models: " + ", ".join(
-                f"`{catalog_lower[s]}`" for s in suggestions
-            )
-        return {
-            "accepted": True,
-            "persist": True,
-            "recognized": False,
-            "message": (
-                f"Note: `{requested}` was not found in the {provider_label} curated catalog "
-                f"and the /models endpoint was unreachable.{suggestion_text}"
-                f"\n  The model may still work if it exists on the provider."
-            ),
-        }
-
-    # No catalog available — accept with a warning, matching the comment's
-    # stated intent ("Accept and persist, but warn").
    return {
-        "accepted": True,
-        "persist": True,
+        "accepted": False,
+        "persist": False,
        "recognized": False,
        "message": (
-            f"Note: could not reach the {provider_label} API to validate `{requested}`. "
+            f"Could not reach the {provider_label} API to validate `{requested}`. "
            f"If the service isn't down, this model may not be valid."
        ),
    }
@@ -10,7 +10,6 @@ from hermes_cli.auth import get_nous_auth_status
 from hermes_cli.config import get_env_value, load_config
 from tools.managed_tool_gateway import is_managed_tool_gateway_ready
 from tools.tool_backend_helpers import (
-    fal_key_is_configured,
    has_direct_modal_credentials,
    managed_nous_tools_enabled,
    normalize_browser_cloud_provider,
@@ -272,7 +271,7 @@ def get_nous_subscription_features(
    direct_firecrawl = bool(get_env_value("FIRECRAWL_API_KEY") or get_env_value("FIRECRAWL_API_URL"))
    direct_parallel = bool(get_env_value("PARALLEL_API_KEY"))
    direct_tavily = bool(get_env_value("TAVILY_API_KEY"))
-    direct_fal = fal_key_is_configured()
+    direct_fal = bool(get_env_value("FAL_KEY"))
    direct_openai_tts = bool(resolve_openai_audio_api_key())
    direct_elevenlabs = bool(get_env_value("ELEVENLABS_API_KEY"))
    direct_camofox = bool(get_env_value("CAMOFOX_URL"))
@@ -521,7 +520,7 @@ def apply_nous_managed_defaults(
        browser_cfg["cloud_provider"] = "browser-use"
        changed.add("browser")

-    if "image_gen" in selected_toolsets and not fal_key_is_configured():
+    if "image_gen" in selected_toolsets and not get_env_value("FAL_KEY"):
        changed.add("image_gen")

    return changed
@@ -549,7 +548,7 @@ def _get_gateway_direct_credentials() -> Dict[str, bool]:
            or get_env_value("TAVILY_API_KEY")
            or get_env_value("EXA_API_KEY")
        ),
-        "image_gen": fal_key_is_configured(),
+        "image_gen": bool(get_env_value("FAL_KEY")),
        "tts": bool(
            resolve_openai_audio_api_key()
            or get_env_value("ELEVENLABS_API_KEY")
@@ -587,6 +586,7 @@ def get_gateway_eligible_tools(
        return [], [], []

    if config is None:
+        from hermes_cli.config import load_config
        config = load_config() or {}

    # Quick provider check without the heavy get_nous_subscription_features call
@@ -44,7 +44,7 @@ def _cmd_list(store):
        for p in pending:
            print(
                f"  {p['platform']:<12} {p['code']:<10} {p['user_id']:<20} "
-                f"{(p.get('user_name') or ''):<20} {p['age_minutes']}m ago"
+                f"{p.get('user_name', ''):<20} {p['age_minutes']}m ago"
            )
    else:
        print("\n  No pending pairing requests.")
@@ -54,7 +54,7 @@ def _cmd_list(store):
        print(f"  {'Platform':<12} {'User ID':<20} {'Name':<20}")
        print(f"  {'--------':<12} {'-------':<20} {'----':<20}")
        for a in approved:
-            print(f"  {a['platform']:<12} {a['user_id']:<20} {(a.get('user_name') or ''):<20}")
+            print(f"  {a['platform']:<12} {a['user_id']:<20} {a.get('user_name', ''):<20}")
    else:
        print("\n  No approved users.")

@@ -69,7 +69,7 @@ def _cmd_approve(store, platform: str, code: str):
    result = store.approve_code(platform, code)
    if result:
        uid = result["user_id"]
-        name = result.get("user_name") or ""
+        name = result.get("user_name", "")
        display = f"{name} ({uid})" if name else uid
        print(f"\n  Approved! User {display} on {platform} can now use the bot~")
        print("  They'll be recognized automatically on their next message.\n")
@@ -2,20 +2,14 @@
 Hermes Plugin System
 ====================

-Discovers, loads, and manages plugins from four sources:
+Discovers, loads, and manages plugins from three sources:

-1. **Bundled plugins** – ``<repo>/plugins/<name>/`` (shipped with hermes-agent;
-   ``memory/`` and ``context_engine/`` subdirs are excluded — they have their
-   own discovery paths)
-2. **User plugins**   – ``~/.hermes/plugins/<name>/``
-3. **Project plugins** – ``./.hermes/plugins/<name>/`` (opt-in via
+1. **User plugins**   – ``~/.hermes/plugins/<name>/``
+2. **Project plugins** – ``./.hermes/plugins/<name>/`` (opt-in via
   ``HERMES_ENABLE_PROJECT_PLUGINS``)
-4. **Pip plugins**     – packages that expose the ``hermes_agent.plugins``
+3. **Pip plugins**     – packages that expose the ``hermes_agent.plugins``
   entry-point group.

-Later sources override earlier ones on name collision, so a user or project
-plugin with the same name as a bundled plugin replaces it.
-
 Each directory plugin must contain a ``plugin.yaml`` manifest **and** an
 ``__init__.py`` with a ``register(ctx)`` function.

@@ -70,7 +64,6 @@ VALID_HOOKS: Set[str] = {
    "on_session_end",
    "on_session_finalize",
    "on_session_reset",
-    "subagent_stop",
 }

 ENTRY_POINTS_GROUP = "hermes_agent.plugins"
@@ -84,12 +77,7 @@ def _env_enabled(name: str) -> bool:


 def _get_disabled_plugins() -> set:
-    """Read the disabled plugins list from config.yaml.
-
-    Kept for backward compat and explicit deny-list semantics. A plugin
-    name in this set will never load, even if it appears in
-    ``plugins.enabled``.
-    """
+    """Read the disabled plugins list from config.yaml."""
    try:
        from hermes_cli.config import load_config
        config = load_config()
@@ -99,43 +87,10 @@ def _get_disabled_plugins() -> set:
        return set()


-def _get_enabled_plugins() -> Optional[set]:
-    """Read the enabled-plugins allow-list from config.yaml.
-
-    Plugins are opt-in by default — only plugins whose name appears in
-    this set are loaded. Returns:
-
-    * ``None`` — the key is missing or malformed. Callers should treat
-      this as "nothing enabled yet" (the opt-in default); the first
-      ``migrate_config`` run populates the key with a grandfathered set
-      of currently-installed user plugins so existing setups don't
-      break on upgrade.
-    * ``set()`` — an empty list was explicitly set; nothing loads.
-    * ``set(...)`` — the concrete allow-list.
-    """
-    try:
-        from hermes_cli.config import load_config
-        config = load_config()
-        plugins_cfg = config.get("plugins")
-        if not isinstance(plugins_cfg, dict):
-            return None
-        if "enabled" not in plugins_cfg:
-            return None
-        enabled = plugins_cfg.get("enabled")
-        if not isinstance(enabled, list):
-            return None
-        return set(enabled)
-    except Exception:
-        return None
-
-
 # ---------------------------------------------------------------------------
 # Data classes
 # ---------------------------------------------------------------------------

-_VALID_PLUGIN_KINDS: Set[str] = {"standalone", "backend", "exclusive"}
-
-
@dataclass
 class PluginManifest:
    """Parsed representation of a plugin.yaml manifest."""
@@ -149,23 +104,6 @@ class PluginManifest:
    provides_hooks: List[str] = field(default_factory=list)
    source: str = ""        # "user", "project", or "entrypoint"
    path: Optional[str] = None
-    # Plugin kind — see plugins.py module docstring for semantics.
-    # ``standalone`` (default): hooks/tools of its own; opt-in via
-    #                           ``plugins.enabled``.
-    # ``backend``: pluggable backend for an existing core tool (e.g.
-    #              image_gen). Built-in (bundled) backends auto-load;
-    #              user-installed still gated by ``plugins.enabled``.
-    # ``exclusive``: category with exactly one active provider (memory).
-    #              Selection via ``<category>.provider`` config key; the
-    #              category's own discovery system handles loading and the
-    #              general scanner skips these.
-    kind: str = "standalone"
-    # Registry key — path-derived, used by ``plugins.enabled``/``disabled``
-    # lookups and by ``hermes plugins list``. For a flat plugin at
-    # ``plugins/disk-cleanup/`` the key is ``disk-cleanup``; for a nested
-    # category plugin at ``plugins/image_gen/openai/`` the key is
-    # ``image_gen/openai``. When empty, falls back to ``name``.
-    key: str = ""


@dataclass
@@ -283,7 +221,6 @@ class PluginContext:
        name: str,
        handler: Callable,
        description: str = "",
-        args_hint: str = "",
    ) -> None:
        """Register a slash command (e.g. ``/lcm``) available in CLI and gateway sessions.

@@ -294,13 +231,6 @@ class PluginContext:
        terminal commands), this registers in-session slash commands that users
        invoke during a conversation.

-        ``args_hint`` is an optional short string (e.g. ``"<file>"`` or
-        ``"dias:7 formato:json"``) used by gateway adapters to surface the
-        command with an argument field — for example Discord's native slash
-        command picker. Plugin commands without ``args_hint`` register as
-        parameterless in Discord and still accept trailing text when invoked
-        as free-form chat.
-
        Names conflicting with built-in commands are rejected with a warning.
        """
        clean = name.lower().strip().lstrip("/").replace(" ", "-")
@@ -328,7 +258,6 @@ class PluginContext:
            "handler": handler,
            "description": description or "Plugin command",
            "plugin": self.manifest.name,
-            "args_hint": (args_hint or "").strip(),
        }
        logger.debug("Plugin %s registered command: /%s", self.manifest.name, clean)

@@ -395,33 +324,6 @@ class PluginContext:
            self.manifest.name, engine.name,
        )

-    # -- image gen provider registration ------------------------------------
-
-    def register_image_gen_provider(self, provider) -> None:
-        """Register an image generation backend.
-
-        ``provider`` must be an instance of
-        :class:`agent.image_gen_provider.ImageGenProvider`. The
-        ``provider.name`` attribute is what ``image_gen.provider`` in
-        ``config.yaml`` matches against when routing ``image_generate``
-        tool calls.
-        """
-        from agent.image_gen_provider import ImageGenProvider
-        from agent.image_gen_registry import register_provider
-
-        if not isinstance(provider, ImageGenProvider):
-            logger.warning(
-                "Plugin '%s' tried to register an image_gen provider that does "
-                "not inherit from ImageGenProvider. Ignoring.",
-                self.manifest.name,
-            )
-            return
-        register_provider(provider)
-        logger.info(
-            "Plugin '%s' registered image_gen provider: %s",
-            self.manifest.name, provider.name,
-        )
-
    # -- hook registration --------------------------------------------------

    def register_hook(self, hook_name: str, callback: Callable) -> None:
@@ -512,124 +414,34 @@ class PluginManager:
    # Public
    # -----------------------------------------------------------------------

-    def discover_and_load(self, force: bool = False) -> None:
-        """Scan all plugin sources and load each plugin found.
-
-        When ``force`` is true, clear cached discovery state first so config
-        changes or newly-added bundled backends become visible in long-lived
-        sessions without requiring a full agent restart.
-        """
-        if self._discovered and not force:
+    def discover_and_load(self) -> None:
+        """Scan all plugin sources and load each plugin found."""
+        if self._discovered:
            return
-        if force:
-            self._plugins.clear()
-            self._hooks.clear()
-            self._plugin_tool_names.clear()
-            self._cli_commands.clear()
-            self._plugin_commands.clear()
-            self._plugin_skills.clear()
-            self._context_engine = None
        self._discovered = True

        manifests: List[PluginManifest] = []

-        # 1. Bundled plugins (<repo>/plugins/<name>/)
-        #
-        # Repo-shipped plugins live next to hermes_cli/. Two layouts are
-        # supported (see ``_scan_directory`` for details):
-        #
-        #   - flat: ``plugins/disk-cleanup/plugin.yaml`` (standalone)
-        #   - category: ``plugins/image_gen/openai/plugin.yaml`` (backend)
-        #
-        # ``memory/`` and ``context_engine/`` are skipped at the top level —
-        # they have their own discovery systems. Porting those to the
-        # category-namespace ``kind: exclusive`` model is a future PR.
-        repo_plugins = Path(__file__).resolve().parent.parent / "plugins"
-        manifests.extend(
-            self._scan_directory(
-                repo_plugins,
-                source="bundled",
-                skip_names={"memory", "context_engine"},
-            )
-        )
-
-        # 2. User plugins (~/.hermes/plugins/)
+        # 1. User plugins (~/.hermes/plugins/)
        user_dir = get_hermes_home() / "plugins"
        manifests.extend(self._scan_directory(user_dir, source="user"))

-        # 3. Project plugins (./.hermes/plugins/)
+        # 2. Project plugins (./.hermes/plugins/)
        if _env_enabled("HERMES_ENABLE_PROJECT_PLUGINS"):
            project_dir = Path.cwd() / ".hermes" / "plugins"
            manifests.extend(self._scan_directory(project_dir, source="project"))

-        # 4. Pip / entry-point plugins
+        # 3. Pip / entry-point plugins
        manifests.extend(self._scan_entry_points())

-        # Load each manifest (skip user-disabled plugins).
-        # Later sources override earlier ones on key collision — user
-        # plugins take precedence over bundled, project plugins take
-        # precedence over user. Dedup here so we only load the final
-        # winner. Keys are path-derived (``image_gen/openai``,
-        # ``disk-cleanup``) so ``tts/openai`` and ``image_gen/openai``
-        # don't collide even when both manifests say ``name: openai``.
+        # Load each manifest (skip user-disabled plugins)
        disabled = _get_disabled_plugins()
-        enabled = _get_enabled_plugins()  # None = opt-in default (nothing enabled)
-        winners: Dict[str, PluginManifest] = {}
        for manifest in manifests:
-            winners[manifest.key or manifest.name] = manifest
-        for manifest in winners.values():
-            lookup_key = manifest.key or manifest.name
-
-            # Explicit disable always wins (matches on key or on legacy
-            # bare name for back-compat with existing user configs).
-            if lookup_key in disabled or manifest.name in disabled:
+            if manifest.name in disabled:
                loaded = LoadedPlugin(manifest=manifest, enabled=False)
                loaded.error = "disabled via config"
-                self._plugins[lookup_key] = loaded
-                logger.debug("Skipping disabled plugin '%s'", lookup_key)
-                continue
-
-            # Exclusive plugins (memory providers) have their own
-            # discovery/activation path. The general loader records the
-            # manifest for introspection but does not load the module.
-            if manifest.kind == "exclusive":
-                loaded = LoadedPlugin(manifest=manifest, enabled=False)
-                loaded.error = (
-                    "exclusive plugin — activate via <category>.provider config"
-                )
-                self._plugins[lookup_key] = loaded
-                logger.debug(
-                    "Skipping '%s' (exclusive, handled by category discovery)",
-                    lookup_key,
-                )
-                continue
-
-            # Built-in backends auto-load — they ship with hermes and must
-            # just work. Selection among them (e.g. which image_gen backend
-            # services calls) is driven by ``<category>.provider`` config,
-            # enforced by the tool wrapper.
-            if manifest.kind == "backend" and manifest.source == "bundled":
-                self._load_plugin(manifest)
-                continue
-
-            # Everything else (standalone, user-installed backends,
-            # entry-point plugins) is opt-in via plugins.enabled.
-            # Accept both the path-derived key and the legacy bare name
-            # so existing configs keep working.
-            is_enabled = (
-                enabled is not None
-                and (lookup_key in enabled or manifest.name in enabled)
-            )
-            if not is_enabled:
-                loaded = LoadedPlugin(manifest=manifest, enabled=False)
-                loaded.error = (
-                    "not enabled in config (run `hermes plugins enable {}` to activate)"
-                    .format(lookup_key)
-                )
-                self._plugins[lookup_key] = loaded
-                logger.debug(
-                    "Skipping '%s' (not in plugins.enabled)", lookup_key
-                )
+                self._plugins[manifest.name] = loaded
+                logger.debug("Skipping disabled plugin '%s'", manifest.name)
                continue
            self._load_plugin(manifest)

@@ -644,46 +456,8 @@ class PluginManager:
    # Directory scanning
    # -----------------------------------------------------------------------

-    def _scan_directory(
-        self,
-        path: Path,
-        source: str,
-        skip_names: Optional[Set[str]] = None,
-    ) -> List[PluginManifest]:
-        """Read ``plugin.yaml`` manifests from subdirectories of *path*.
-
-        Supports two layouts, mixed freely:
-
-        * **Flat** — ``<root>/<plugin-name>/plugin.yaml``. Key is
-          ``<plugin-name>`` (e.g. ``disk-cleanup``).
-        * **Category** — ``<root>/<category>/<plugin-name>/plugin.yaml``,
-          where the ``<category>`` directory itself has no ``plugin.yaml``.
-          Key is ``<category>/<plugin-name>`` (e.g. ``image_gen/openai``).
-          Depth is capped at two segments.
-
-        *skip_names* is an optional allow-list of names to ignore at the
-        top level (kept for back-compat; the current call sites no longer
-        pass it now that categories are first-class).
-        """
-        return self._scan_directory_level(
-            path, source, skip_names=skip_names, prefix="", depth=0
-        )
-
-    def _scan_directory_level(
-        self,
-        path: Path,
-        source: str,
-        *,
-        skip_names: Optional[Set[str]],
-        prefix: str,
-        depth: int,
-    ) -> List[PluginManifest]:
-        """Recursive implementation of :meth:`_scan_directory`.
-
-        ``prefix`` is the category path already accumulated ("" at root,
-        "image_gen" one level in). ``depth`` is the recursion depth; we
-        cap at 2 so ``<root>/a/b/c/`` is ignored.
-        """
+    def _scan_directory(self, path: Path, source: str) -> List[PluginManifest]:
+        """Read ``plugin.yaml`` manifests from subdirectories of *path*."""
        manifests: List[PluginManifest] = []
        if not path.is_dir():
            return manifests
@@ -691,112 +465,35 @@ class PluginManager:
        for child in sorted(path.iterdir()):
            if not child.is_dir():
                continue
-            if depth == 0 and skip_names and child.name in skip_names:
-                continue
            manifest_file = child / "plugin.yaml"
            if not manifest_file.exists():
                manifest_file = child / "plugin.yml"
-
-            if manifest_file.exists():
-                manifest = self._parse_manifest(
-                    manifest_file, child, source, prefix
-                )
-                if manifest is not None:
-                    manifests.append(manifest)
+            if not manifest_file.exists():
+                logger.debug("Skipping %s (no plugin.yaml)", child)
                continue

-            # No manifest at this level. If we're still within the depth
-            # cap, treat this directory as a category namespace and recurse
-            # one level in looking for children with manifests.
-            if depth >= 1:
-                logger.debug("Skipping %s (no plugin.yaml, depth cap reached)", child)
-                continue
-
-            sub_prefix = f"{prefix}/{child.name}" if prefix else child.name
-            manifests.extend(
-                self._scan_directory_level(
-                    child,
-                    source,
-                    skip_names=None,
-                    prefix=sub_prefix,
-                    depth=depth + 1,
+            try:
+                if yaml is None:
+                    logger.warning("PyYAML not installed – cannot load %s", manifest_file)
+                    continue
+                data = yaml.safe_load(manifest_file.read_text()) or {}
+                manifest = PluginManifest(
+                    name=data.get("name", child.name),
+                    version=str(data.get("version", "")),
+                    description=data.get("description", ""),
+                    author=data.get("author", ""),
+                    requires_env=data.get("requires_env", []),
+                    provides_tools=data.get("provides_tools", []),
+                    provides_hooks=data.get("provides_hooks", []),
+                    source=source,
+                    path=str(child),
                )
-            )
+                manifests.append(manifest)
+            except Exception as exc:
+                logger.warning("Failed to parse %s: %s", manifest_file, exc)

        return manifests

-    def _parse_manifest(
-        self,
-        manifest_file: Path,
-        plugin_dir: Path,
-        source: str,
-        prefix: str,
-    ) -> Optional[PluginManifest]:
-        """Parse a single ``plugin.yaml`` into a :class:`PluginManifest`.
-
-        Returns ``None`` on parse failure (logs a warning).
-        """
-        try:
-            if yaml is None:
-                logger.warning("PyYAML not installed – cannot load %s", manifest_file)
-                return None
-            data = yaml.safe_load(manifest_file.read_text()) or {}
-
-            name = data.get("name", plugin_dir.name)
-            key = f"{prefix}/{plugin_dir.name}" if prefix else name
-
-            raw_kind = data.get("kind", "standalone")
-            if not isinstance(raw_kind, str):
-                raw_kind = "standalone"
-            kind = raw_kind.strip().lower()
-            if kind not in _VALID_PLUGIN_KINDS:
-                logger.warning(
-                    "Plugin %s: unknown kind '%s' (valid: %s); treating as 'standalone'",
-                    key, raw_kind, ", ".join(sorted(_VALID_PLUGIN_KINDS)),
-                )
-                kind = "standalone"
-
-            # Auto-coerce user-installed memory providers to kind="exclusive"
-            # so they're routed to plugins/memory discovery instead of being
-            # loaded by the general PluginManager (which has no
-            # register_memory_provider on PluginContext). Mirrors the
-            # heuristic in plugins/memory/__init__.py:_is_memory_provider_dir.
-            # Bundled memory providers are already skipped via skip_names.
-            if kind == "standalone" and "kind" not in data:
-                init_file = plugin_dir / "__init__.py"
-                if init_file.exists():
-                    try:
-                        source_text = init_file.read_text(errors="replace")[:8192]
-                        if (
-                            "register_memory_provider" in source_text
-                            or "MemoryProvider" in source_text
-                        ):
-                            kind = "exclusive"
-                            logger.debug(
-                                "Plugin %s: detected memory provider, "
-                                "treating as kind='exclusive'",
-                                key,
-                            )
-                    except Exception:
-                        pass
-
-            return PluginManifest(
-                name=name,
-                version=str(data.get("version", "")),
-                description=data.get("description", ""),
-                author=data.get("author", ""),
-                requires_env=data.get("requires_env", []),
-                provides_tools=data.get("provides_tools", []),
-                provides_hooks=data.get("provides_hooks", []),
-                source=source,
-                path=str(plugin_dir),
-                kind=kind,
-                key=key,
-            )
-        except Exception as exc:
-            logger.warning("Failed to parse %s: %s", manifest_file, exc)
-            return None
-
    # -----------------------------------------------------------------------
    # Entry-point scanning
    # -----------------------------------------------------------------------
@@ -819,7 +516,6 @@ class PluginManager:
                    name=ep.name,
                    source="entrypoint",
                    path=ep.value,
-                    key=ep.name,
                )
                manifests.append(manifest)
        except Exception as exc:
@@ -836,7 +532,7 @@ class PluginManager:
        loaded = LoadedPlugin(manifest=manifest)

        try:
-            if manifest.source in ("user", "project", "bundled"):
+            if manifest.source in ("user", "project"):
                module = self._load_directory_module(manifest)
            else:
                module = self._load_entrypoint_module(manifest)
@@ -881,16 +577,10 @@ class PluginManager:
            loaded.error = str(exc)
            logger.warning("Failed to load plugin '%s': %s", manifest.name, exc)

-        self._plugins[manifest.key or manifest.name] = loaded
+        self._plugins[manifest.name] = loaded

    def _load_directory_module(self, manifest: PluginManifest) -> types.ModuleType:
-        """Import a directory-based plugin as ``hermes_plugins.<slug>``.
-
-        The module slug is derived from ``manifest.key`` so category-namespaced
-        plugins (``image_gen/openai``) import as
-        ``hermes_plugins.image_gen__openai`` without colliding with any
-        future ``tts/openai``.
-        """
+        """Import a directory-based plugin as ``hermes_plugins.<name>``."""
        plugin_dir = Path(manifest.path)  # type: ignore[arg-type]
        init_file = plugin_dir / "__init__.py"
        if not init_file.exists():
@@ -903,9 +593,7 @@ class PluginManager:
            ns_pkg.__package__ = _NS_PARENT
            sys.modules[_NS_PARENT] = ns_pkg

-        key = manifest.key or manifest.name
-        slug = key.replace("/", "__").replace("-", "_")
-        module_name = f"{_NS_PARENT}.{slug}"
+        module_name = f"{_NS_PARENT}.{manifest.name.replace('-', '_')}"
        spec = importlib.util.spec_from_file_location(
            module_name,
            init_file,
@@ -986,12 +674,10 @@ class PluginManager:
    def list_plugins(self) -> List[Dict[str, Any]]:
        """Return a list of info dicts for all discovered plugins."""
        result: List[Dict[str, Any]] = []
-        for key, loaded in sorted(self._plugins.items()):
+        for name, loaded in sorted(self._plugins.items()):
            result.append(
                {
-                    "name": loaded.manifest.name,
-                    "key": loaded.manifest.key or loaded.manifest.name,
-                    "kind": loaded.manifest.kind,
+                    "name": name,
                    "version": loaded.manifest.version,
                    "description": loaded.manifest.description,
                    "source": loaded.manifest.source,
@@ -1042,13 +728,9 @@ def get_plugin_manager() -> PluginManager:
    return _plugin_manager


-def discover_plugins(force: bool = False) -> None:
-    """Discover and load all plugins.
-
-    Default behavior is idempotent. Pass ``force=True`` to rescan plugin
-    manifests and reload state in the current process.
-    """
-    get_plugin_manager().discover_and_load(force=force)
+def discover_plugins() -> None:
+    """Discover and load all plugins (idempotent)."""
+    get_plugin_manager().discover_and_load()


 def invoke_hook(hook_name: str, **kwargs: Any) -> List[Any]:
@@ -1099,34 +781,23 @@ def get_pre_tool_call_block_message(
    return None


-def _ensure_plugins_discovered(force: bool = False) -> PluginManager:
-    """Return the global manager after ensuring plugin discovery has run.
-
-    Pass ``force=True`` to rescan in the current process.
-    """
-    manager = get_plugin_manager()
-    manager.discover_and_load(force=force)
-    return manager
-
-
 def get_plugin_context_engine():
    """Return the plugin-registered context engine, or None."""
-    return _ensure_plugins_discovered()._context_engine
+    return get_plugin_manager()._context_engine


 def get_plugin_command_handler(name: str) -> Optional[Callable]:
    """Return the handler for a plugin-registered slash command, or ``None``."""
-    entry = _ensure_plugins_discovered()._plugin_commands.get(name)
+    entry = get_plugin_manager()._plugin_commands.get(name)
    return entry["handler"] if entry else None


 def get_plugin_commands() -> Dict[str, dict]:
    """Return the full plugin commands dict (name → {handler, description, plugin}).

-    Triggers idempotent plugin discovery so callers can use plugin commands
-    before any explicit discover_plugins() call.
+    Safe to call before discovery — returns an empty dict if no plugins loaded.
    """
-    return _ensure_plugins_discovered()._plugin_commands
+    return get_plugin_manager()._plugin_commands


 def get_plugin_toolsets() -> List[tuple]:
@@ -15,7 +15,6 @@ import shutil
 import subprocess
 import sys
 from pathlib import Path
-from typing import Optional

 from hermes_constants import get_hermes_home

@@ -282,16 +281,8 @@ def _require_installed_plugin(name: str, plugins_dir: Path, console) -> Path:
 # ---------------------------------------------------------------------------


-def cmd_install(
-    identifier: str,
-    force: bool = False,
-    enable: Optional[bool] = None,
-) -> None:
-    """Install a plugin from a Git URL or owner/repo shorthand.
-
-    After install, prompt "Enable now? [y/N]" unless *enable* is provided
-    (True = auto-enable without prompting, False = install disabled).
-    """
+def cmd_install(identifier: str, force: bool = False) -> None:
+    """Install a plugin from a Git URL or owner/repo shorthand."""
    import tempfile
    from rich.console import Console

@@ -400,40 +391,6 @@ def cmd_install(

    _display_after_install(target, identifier)

-    # Determine the canonical plugin name for enable-list bookkeeping.
-    installed_name = installed_manifest.get("name") or target.name
-
-    # Decide whether to enable: explicit flag > interactive prompt > default off
-    should_enable = enable
-    if should_enable is None:
-        # Interactive prompt unless stdin isn't a TTY (scripted install).
-        if sys.stdin.isatty() and sys.stdout.isatty():
-            try:
-                answer = input(
-                    f"  Enable '{installed_name}' now? [y/N]: "
-                ).strip().lower()
-                should_enable = answer in ("y", "yes")
-            except (EOFError, KeyboardInterrupt):
-                should_enable = False
-        else:
-            should_enable = False
-
-    if should_enable:
-        enabled = _get_enabled_set()
-        disabled = _get_disabled_set()
-        enabled.add(installed_name)
-        disabled.discard(installed_name)
-        _save_enabled_set(enabled)
-        _save_disabled_set(disabled)
-        console.print(
-            f"[green]✓[/green] Plugin [bold]{installed_name}[/bold] enabled."
-        )
-    else:
-        console.print(
-            f"[dim]Plugin installed but not enabled. "
-            f"Run `hermes plugins enable {installed_name}` to activate.[/dim]"
-        )
-
    console.print("[dim]Restart the gateway for the plugin to take effect:[/dim]")
    console.print("[dim]  hermes gateway restart[/dim]")
    console.print()
@@ -511,11 +468,7 @@ def cmd_remove(name: str) -> None:


 def _get_disabled_set() -> set:
-    """Read the disabled plugins set from config.yaml.
-
-    An explicit deny-list. A plugin name here never loads, even if also
-    listed in ``plugins.enabled``.
-    """
+    """Read the disabled plugins set from config.yaml."""
    try:
        from hermes_cli.config import load_config
        config = load_config()
@@ -535,196 +488,103 @@ def _save_disabled_set(disabled: set) -> None:
    save_config(config)


-def _get_enabled_set() -> set:
-    """Read the enabled plugins allow-list from config.yaml.
-
-    Plugins are opt-in: only names here are loaded. Returns ``set()`` if
-    the key is missing (same behaviour as "nothing enabled yet").
-    """
-    try:
-        from hermes_cli.config import load_config
-        config = load_config()
-        plugins_cfg = config.get("plugins", {})
-        if not isinstance(plugins_cfg, dict):
-            return set()
-        enabled = plugins_cfg.get("enabled", [])
-        return set(enabled) if isinstance(enabled, list) else set()
-    except Exception:
-        return set()
-
-
-def _save_enabled_set(enabled: set) -> None:
-    """Write the enabled plugins list to config.yaml."""
-    from hermes_cli.config import load_config, save_config
-    config = load_config()
-    if "plugins" not in config:
-        config["plugins"] = {}
-    config["plugins"]["enabled"] = sorted(enabled)
-    save_config(config)
-
-
 def cmd_enable(name: str) -> None:
-    """Add a plugin to the enabled allow-list (and remove it from disabled)."""
+    """Enable a previously disabled plugin."""
    from rich.console import Console

    console = Console()
-    # Discover the plugin — check installed (user) AND bundled.
-    if not _plugin_exists(name):
-        console.print(f"[red]Plugin '{name}' is not installed or bundled.[/red]")
+    plugins_dir = _plugins_dir()
+
+    # Verify the plugin exists
+    target = plugins_dir / name
+    if not target.is_dir():
+        console.print(f"[red]Plugin '{name}' is not installed.[/red]")
        sys.exit(1)

-    enabled = _get_enabled_set()
    disabled = _get_disabled_set()
-
-    if name in enabled and name not in disabled:
+    if name not in disabled:
        console.print(f"[dim]Plugin '{name}' is already enabled.[/dim]")
        return

-    enabled.add(name)
    disabled.discard(name)
-    _save_enabled_set(enabled)
    _save_disabled_set(disabled)
-    console.print(
-        f"[green]✓[/green] Plugin [bold]{name}[/bold] enabled. "
-        "Takes effect on next session."
-    )
+    console.print(f"[green]✓[/green] Plugin [bold]{name}[/bold] enabled. Takes effect on next session.")


 def cmd_disable(name: str) -> None:
-    """Remove a plugin from the enabled allow-list (and add to disabled)."""
+    """Disable a plugin without removing it."""
    from rich.console import Console

    console = Console()
-    if not _plugin_exists(name):
-        console.print(f"[red]Plugin '{name}' is not installed or bundled.[/red]")
+    plugins_dir = _plugins_dir()
+
+    # Verify the plugin exists
+    target = plugins_dir / name
+    if not target.is_dir():
+        console.print(f"[red]Plugin '{name}' is not installed.[/red]")
        sys.exit(1)

-    enabled = _get_enabled_set()
    disabled = _get_disabled_set()
-
-    if name not in enabled and name in disabled:
+    if name in disabled:
        console.print(f"[dim]Plugin '{name}' is already disabled.[/dim]")
        return

-    enabled.discard(name)
    disabled.add(name)
-    _save_enabled_set(enabled)
    _save_disabled_set(disabled)
-    console.print(
-        f"[yellow]\u2298[/yellow] Plugin [bold]{name}[/bold] disabled. "
-        "Takes effect on next session."
-    )
+    console.print(f"[yellow]\u2298[/yellow] Plugin [bold]{name}[/bold] disabled. Takes effect on next session.")


-def _plugin_exists(name: str) -> bool:
-    """Return True if a plugin with *name* is installed (user) or bundled."""
-    # Installed: directory name or manifest name match in user plugins dir
-    user_dir = _plugins_dir()
-    if user_dir.is_dir():
-        if (user_dir / name).is_dir():
-            return True
-        for child in user_dir.iterdir():
-            if not child.is_dir():
-                continue
-            manifest = _read_manifest(child)
-            if manifest.get("name") == name:
-                return True
-    # Bundled: <repo>/plugins/<name>/
-    from pathlib import Path as _P
-    import hermes_cli
-    repo_plugins = _P(hermes_cli.__file__).resolve().parent.parent / "plugins"
-    if repo_plugins.is_dir():
-        candidate = repo_plugins / name
-        if candidate.is_dir() and (
-            (candidate / "plugin.yaml").exists()
-            or (candidate / "plugin.yml").exists()
-        ):
-            return True
-    return False
+def cmd_list() -> None:
+    """List installed plugins."""
+    from rich.console import Console
+    from rich.table import Table

-
-def _discover_all_plugins() -> list:
-    """Return a list of (name, version, description, source, dir_path) for
-    every plugin the loader can see — user + bundled + project.
-
-    Matches the ordering/dedup of ``PluginManager.discover_and_load``:
-    bundled first, then user, then project; user overrides bundled on
-    name collision.
-    """
    try:
        import yaml
    except ImportError:
        yaml = None

-    seen: dict = {}  # name -> (name, version, description, source, path)
-
-    # Bundled (<repo>/plugins/<name>/), excluding memory/ and context_engine/
-    import hermes_cli
-    repo_plugins = Path(hermes_cli.__file__).resolve().parent.parent / "plugins"
-    for base, source in ((repo_plugins, "bundled"), (_plugins_dir(), "user")):
-        if not base.is_dir():
-            continue
-        for d in sorted(base.iterdir()):
-            if not d.is_dir():
-                continue
-            if source == "bundled" and d.name in ("memory", "context_engine"):
-                continue
-            manifest_file = d / "plugin.yaml"
-            if not manifest_file.exists():
-                manifest_file = d / "plugin.yml"
-            if not manifest_file.exists():
-                continue
-            name = d.name
-            version = ""
-            description = ""
-            if yaml:
-                try:
-                    with open(manifest_file) as f:
-                        manifest = yaml.safe_load(f) or {}
-                    name = manifest.get("name", d.name)
-                    version = manifest.get("version", "")
-                    description = manifest.get("description", "")
-                except Exception:
-                    pass
-            # User plugins override bundled on name collision.
-            if name in seen and source == "bundled":
-                continue
-            src_label = source
-            if source == "user" and (d / ".git").exists():
-                src_label = "git"
-            seen[name] = (name, version, description, src_label, d)
-    return list(seen.values())
-
-
-def cmd_list() -> None:
-    """List all plugins (bundled + user) with enabled/disabled state."""
-    from rich.console import Console
-    from rich.table import Table
-
    console = Console()
-    entries = _discover_all_plugins()
-    if not entries:
+    plugins_dir = _plugins_dir()
+
+    dirs = sorted(d for d in plugins_dir.iterdir() if d.is_dir())
+    if not dirs:
        console.print("[dim]No plugins installed.[/dim]")
        console.print("[dim]Install with:[/dim] hermes plugins install owner/repo")
        return

-    enabled = _get_enabled_set()
    disabled = _get_disabled_set()

-    table = Table(title="Plugins", show_lines=False)
+    table = Table(title="Installed Plugins", show_lines=False)
    table.add_column("Name", style="bold")
    table.add_column("Status")
    table.add_column("Version", style="dim")
    table.add_column("Description")
    table.add_column("Source", style="dim")

-    for name, version, description, source, _dir in entries:
-        if name in disabled:
-            status = "[red]disabled[/red]"
-        elif name in enabled:
-            status = "[green]enabled[/green]"
-        else:
-            status = "[yellow]not enabled[/yellow]"
+    for d in dirs:
+        manifest_file = d / "plugin.yaml"
+        name = d.name
+        version = ""
+        description = ""
+        source = "local"
+
+        if manifest_file.exists() and yaml:
+            try:
+                with open(manifest_file) as f:
+                    manifest = yaml.safe_load(f) or {}
+                name = manifest.get("name", d.name)
+                version = manifest.get("version", "")
+                description = manifest.get("description", "")
+            except Exception:
+                pass
+
+        # Check if it's a git repo (installed via hermes plugins install)
+        if (d / ".git").exists():
+            source = "git"
+
+        is_disabled = name in disabled or d.name in disabled
+        status = "[red]disabled[/red]" if is_disabled else "[green]enabled[/green]"
        table.add_row(name, status, str(version), description, source)

    console.print()
@@ -732,7 +592,6 @@ def cmd_list() -> None:
    console.print()
    console.print("[dim]Interactive toggle:[/dim] hermes plugins")
    console.print("[dim]Enable/disable:[/dim] hermes plugins enable/disable <name>")
-    console.print("[dim]Plugins are opt-in by default — only 'enabled' plugins load.[/dim]")


 # ---------------------------------------------------------------------------
@@ -883,25 +742,41 @@ def cmd_toggle() -> None:
    """Interactive composite UI — general plugins + provider plugin categories."""
    from rich.console import Console

-    console = Console()
+    try:
+        import yaml
+    except ImportError:
+        yaml = None

-    # -- General plugins discovery (bundled + user) --
-    entries = _discover_all_plugins()
-    enabled_set = _get_enabled_set()
-    disabled_set = _get_disabled_set()
+    console = Console()
+    plugins_dir = _plugins_dir()
+
+    # -- General plugins discovery --
+    dirs = sorted(d for d in plugins_dir.iterdir() if d.is_dir())
+    disabled = _get_disabled_set()

    plugin_names = []
    plugin_labels = []
    plugin_selected = set()

-    for i, (name, _version, description, source, _d) in enumerate(entries):
-        label = f"{name} \u2014 {description}" if description else name
-        if source == "bundled":
-            label = f"{label} [bundled]"
+    for i, d in enumerate(dirs):
+        manifest_file = d / "plugin.yaml"
+        name = d.name
+        description = ""
+
+        if manifest_file.exists() and yaml:
+            try:
+                with open(manifest_file) as f:
+                    manifest = yaml.safe_load(f) or {}
+                name = manifest.get("name", d.name)
+                description = manifest.get("description", "")
+            except Exception:
+                pass
+
        plugin_names.append(name)
+        label = f"{name} \u2014 {description}" if description else name
        plugin_labels.append(label)
-        # Selected (enabled) when in enabled-set AND not in disabled-set
-        if name in enabled_set and name not in disabled_set:
+
+        if name not in disabled and d.name not in disabled:
            plugin_selected.add(i)

    # -- Provider categories --
@@ -929,10 +804,10 @@ def cmd_toggle() -> None:
    try:
        import curses
        _run_composite_ui(curses, plugin_names, plugin_labels, plugin_selected,
-                          disabled_set, categories, console)
+                          disabled, categories, console)
    except ImportError:
        _run_composite_fallback(plugin_names, plugin_labels, plugin_selected,
-                                disabled_set, categories, console)
+                                disabled, categories, console)


 def _run_composite_ui(curses, plugin_names, plugin_labels, plugin_selected,
@@ -1145,29 +1020,18 @@ def _run_composite_ui(curses, plugin_names, plugin_labels, plugin_selected,
    curses.wrapper(_draw)
    flush_stdin()

-    # Persist general plugin changes. The new allow-list is the set of
-    # plugin names that were checked; anything not checked is explicitly
-    # disabled (written to disabled-list) so it remains off even if the
-    # plugin code does something clever like auto-enable in the future.
-    new_enabled: set = set()
-    new_disabled: set = set(disabled)  # preserve existing disabled state for unseen plugins
+    # Persist general plugin changes
+    new_disabled = set()
    for i, name in enumerate(plugin_names):
-        if i in chosen:
-            new_enabled.add(name)
-            new_disabled.discard(name)
-        else:
+        if i not in chosen:
            new_disabled.add(name)

-    prev_enabled = _get_enabled_set()
-    enabled_changed = new_enabled != prev_enabled
-    disabled_changed = new_disabled != disabled
-
-    if enabled_changed or disabled_changed:
-        _save_enabled_set(new_enabled)
+    if new_disabled != disabled:
        _save_disabled_set(new_disabled)
+        enabled_count = len(plugin_names) - len(new_disabled)
        console.print(
-            f"\n[green]\u2713[/green] General plugins: {len(new_enabled)} enabled, "
-            f"{len(plugin_names) - len(new_enabled)} disabled."
+            f"\n[green]\u2713[/green] General plugins: {enabled_count} enabled, "
+            f"{len(new_disabled)} disabled."
        )
    elif n_plugins > 0:
        console.print("\n[dim]General plugins unchanged.[/dim]")
@@ -1214,17 +1078,11 @@ def _run_composite_fallback(plugin_names, plugin_labels, plugin_selected,
                return
            print()

-        new_enabled: set = set()
-        new_disabled: set = set(disabled)
+        new_disabled = set()
        for i, name in enumerate(plugin_names):
-            if i in chosen:
-                new_enabled.add(name)
-                new_disabled.discard(name)
-            else:
+            if i not in chosen:
                new_disabled.add(name)
-        prev_enabled = _get_enabled_set()
-        if new_enabled != prev_enabled or new_disabled != disabled:
-            _save_enabled_set(new_enabled)
+        if new_disabled != disabled:
            _save_disabled_set(new_disabled)

    # Provider categories
@@ -1250,17 +1108,7 @@ def plugins_command(args) -> None:
    action = getattr(args, "plugins_action", None)

    if action == "install":
-        # Map argparse tri-state: --enable=True, --no-enable=False, neither=None (prompt)
-        enable_arg = None
-        if getattr(args, "enable", False):
-            enable_arg = True
-        elif getattr(args, "no_enable", False):
-            enable_arg = False
-        cmd_install(
-            args.identifier,
-            force=getattr(args, "force", False),
-            enable=enable_arg,
-        )
+        cmd_install(args.identifier, force=getattr(args, "force", False))
    elif action == "update":
        cmd_update(args.name)
    elif action in ("remove", "rm", "uninstall"):
@@ -863,15 +863,19 @@ def _safe_extract_profile_archive(archive: Path, destination: Path) -> None:
                pass


-def _inspect_profile_archive_roots(archive: Path) -> set[str]:
-    """Return the archive's top-level directory names.
+def import_profile(archive_path: str, name: Optional[str] = None) -> Path:
+    """Import a profile from a tar.gz archive.

-    Profile imports expect exactly one root directory. Inspecting the archive
-    before extraction lets us stage the import safely instead of mutating a
-    live profile tree first and reconciling names later.
+    If *name* is not given, infers it from the archive's top-level directory.
+    Returns the imported profile directory.
    """
    import tarfile

+    archive = Path(archive_path)
+    if not archive.exists():
+        raise FileNotFoundError(f"Archive not found: {archive}")
+
+    # Peek at the archive to find the top-level directory name
    with tarfile.open(archive, "r:gz") as tf:
        top_dirs = {
            parts[0]
@@ -885,33 +889,13 @@ def _inspect_profile_archive_roots(archive: Path) -> set[str]:
                for member in tf.getmembers()
                if member.isdir()
            }
-    return top_dirs

-
-def import_profile(archive_path: str, name: Optional[str] = None) -> Path:
-    """Import a profile from a tar.gz archive.
-
-    If *name* is not given, infers it from the archive's top-level directory.
-    Returns the imported profile directory.
-    """
-    import tempfile
-
-    archive = Path(archive_path)
-    if not archive.exists():
-        raise FileNotFoundError(f"Archive not found: {archive}")
-
-    top_dirs = _inspect_profile_archive_roots(archive)
-    archive_root = top_dirs.pop() if len(top_dirs) == 1 else None
-    inferred_name = name or archive_root
+    inferred_name = name or (top_dirs.pop() if len(top_dirs) == 1 else None)
    if not inferred_name:
        raise ValueError(
            "Cannot determine profile name from archive. "
            "Specify it explicitly: hermes profile import <archive> --name <name>"
        )
-    if archive_root is None:
-        raise ValueError(
-            "Profile archive must contain exactly one top-level directory."
-        )

    # Archives exported from the default profile have "default/" as top-level
    # dir.  Importing as "default" would target ~/.hermes itself — disallow
@@ -930,22 +914,12 @@ def import_profile(archive_path: str, name: Optional[str] = None) -> Path:
    profiles_root = _get_profiles_root()
    profiles_root.mkdir(parents=True, exist_ok=True)

-    with tempfile.TemporaryDirectory(prefix="hermes_profile_import_") as tmpdir:
-        staging_root = Path(tmpdir)
-        _safe_extract_profile_archive(archive, staging_root)
+    _safe_extract_profile_archive(archive, profiles_root)

-        extracted = staging_root / archive_root
-        if not extracted.is_dir():
-            raise ValueError(
-                f"Profile archive root is missing or invalid: {archive_root}"
-            )
-
-        final_source = extracted
-        if archive_root != inferred_name:
-            final_source = staging_root / inferred_name
-            extracted.rename(final_source)
-
-        shutil.move(str(final_source), str(profile_dir))
+    # If the archive extracted under a different name, rename
+    extracted = profiles_root / (top_dirs.pop() if top_dirs else inferred_name)
+    if extracted != profile_dir and extracted.exists():
+        extracted.rename(profile_dir)

    return profile_dir

@@ -23,8 +23,6 @@ import logging
 from dataclasses import dataclass
 from typing import Any, Dict, List, Optional, Tuple

-from utils import base_url_host_matches, base_url_hostname
-
 logger = logging.getLogger(__name__)


@@ -94,12 +92,6 @@ HERMES_OVERLAYS: Dict[str, HermesOverlay] = {
        transport="openai_chat",
        base_url_env_var="KIMI_BASE_URL",
    ),
-    "stepfun": HermesOverlay(
-        transport="openai_chat",
-        extra_env_vars=("STEPFUN_API_KEY",),
-        base_url_override="https://api.stepfun.ai/step_plan/v1",
-        base_url_env_var="STEPFUN_BASE_URL",
-    ),
    "minimax": HermesOverlay(
        transport="anthropic_messages",
        base_url_env_var="MINIMAX_BASE_URL",
@@ -216,10 +208,6 @@ ALIASES: Dict[str, str] = {
    "kimi-coding-cn": "kimi-for-coding",
    "moonshot": "kimi-for-coding",

-    # stepfun
-    "step": "stepfun",
-    "stepfun-coding-plan": "stepfun",
-
    # minimax-cn
    "minimax-china": "minimax-cn",
    "minimax_cn": "minimax-cn",
@@ -304,7 +292,6 @@ _LABEL_OVERRIDES: Dict[str, str] = {
    "nous": "Nous Portal",
    "openai-codex": "OpenAI Codex",
    "copilot-acp": "GitHub Copilot ACP",
-    "stepfun": "StepFun Step Plan",
    "xiaomi": "Xiaomi MiMo",
    "local": "Local endpoint",
    "bedrock": "AWS Bedrock",
@@ -438,16 +425,6 @@ def determine_api_mode(provider: str, base_url: str = "") -> str:
    """
    pdef = get_provider(provider)
    if pdef is not None:
-        # Even for known providers, check URL heuristics for special endpoints
-        # (e.g. kimi /coding endpoint needs anthropic_messages even on 'custom')
-        if base_url:
-            url_lower = base_url.rstrip("/").lower()
-            if "api.kimi.com/coding" in url_lower:
-                return "anthropic_messages"
-            if url_lower.endswith("/anthropic") or "api.anthropic.com" in url_lower:
-                return "anthropic_messages"
-            if "api.openai.com" in url_lower:
-                return "codex_responses"
        return TRANSPORT_TO_API_MODE.get(pdef.transport, "chat_completions")

    # Direct provider checks for providers not in HERMES_OVERLAYS
@@ -457,14 +434,11 @@ def determine_api_mode(provider: str, base_url: str = "") -> str:
    # URL-based heuristics for custom / unknown providers
    if base_url:
        url_lower = base_url.rstrip("/").lower()
-        hostname = base_url_hostname(base_url)
-        if url_lower.endswith("/anthropic") or hostname == "api.anthropic.com":
+        if url_lower.endswith("/anthropic") or "api.anthropic.com" in url_lower:
            return "anthropic_messages"
-        if hostname == "api.kimi.com" and "/coding" in url_lower:
-            return "anthropic_messages"
-        if hostname == "api.openai.com":
+        if "api.openai.com" in url_lower:
            return "codex_responses"
-        if hostname.startswith("bedrock-runtime.") and base_url_host_matches(base_url, "amazonaws.com"):
+        if "bedrock-runtime" in url_lower and "amazonaws.com" in url_lower:
            return "bedrock_converse"

    return "chat_completions"
@@ -29,7 +29,6 @@ from hermes_cli.auth import (
 )
 from hermes_cli.config import get_compatible_custom_providers, load_config
 from hermes_constants import OPENROUTER_BASE_URL
-from utils import base_url_host_matches, base_url_hostname


 def _normalize_custom_provider_name(value: str) -> str:
@@ -46,20 +45,14 @@ def _detect_api_mode_for_url(base_url: str) -> Optional[str]:
      protocol under a ``/anthropic`` suffix — treat those as
      ``anthropic_messages`` transport instead of the default
      ``chat_completions``.
-    - Kimi Code's ``api.kimi.com/coding`` endpoint also speaks the
-      Anthropic Messages protocol (the /coding route accepts Claude
-      Code's native request shape).
    """
    normalized = (base_url or "").strip().lower().rstrip("/")
-    hostname = base_url_hostname(base_url)
-    if hostname == "api.x.ai":
+    if "api.x.ai" in normalized:
        return "codex_responses"
-    if hostname == "api.openai.com":
+    if "api.openai.com" in normalized and "openrouter" not in normalized:
        return "codex_responses"
    if normalized.endswith("/anthropic"):
        return "anthropic_messages"
-    if hostname == "api.kimi.com" and "/coding" in normalized:
-        return "anthropic_messages"
    return None


@@ -210,8 +203,7 @@ def _resolve_runtime_from_pool_entry(
            api_mode = opencode_model_api_mode(provider, model_cfg.get("default", ""))
        else:
            # Auto-detect Anthropic-compatible endpoints (/anthropic suffix,
-            # Kimi /coding, api.openai.com → codex_responses, api.x.ai →
-            # codex_responses).
+            # api.openai.com → codex_responses, api.x.ai → codex_responses).
            detected = _detect_api_mode_for_url(base_url)
            if detected:
                api_mode = detected
@@ -488,7 +480,7 @@ def _resolve_openrouter_runtime(
    # When hitting a custom endpoint (e.g. Z.ai, local LLM), prefer
    # OPENAI_API_KEY so the OpenRouter key doesn't leak to an unrelated
    # provider (issues #420, #560).
-    _is_openrouter_url = base_url_host_matches(base_url, "openrouter.ai")
+    _is_openrouter_url = "openrouter.ai" in base_url
    if _is_openrouter_url:
        api_key_candidates = [
            explicit_api_key,
@@ -498,12 +490,8 @@ def _resolve_openrouter_runtime(
    else:
        # Custom endpoint: use api_key from config when using config base_url (#1760).
        # When the endpoint is Ollama Cloud, check OLLAMA_API_KEY — it's
-        # the canonical env var for ollama.com authentication. Match on
-        # HOST, not substring — a custom base_url whose path contains
-        # "ollama.com" (e.g. http://127.0.0.1/ollama.com/v1) or whose
-        # hostname is a look-alike (ollama.com.attacker.test) must not
-        # receive the Ollama credential. See GHSA-76xc-57q6-vm5m.
-        _is_ollama_url = base_url_host_matches(base_url, "ollama.com")
+        # the canonical env var for ollama.com authentication.
+        _is_ollama_url = "ollama.com" in base_url.lower()
        api_key_candidates = [
            explicit_api_key,
            (cfg_api_key if use_config_base_url else ""),
@@ -666,8 +654,7 @@ def _resolve_explicit_runtime(
            if configured_mode:
                api_mode = configured_mode
            else:
-                # Auto-detect from URL (Anthropic /anthropic suffix,
-                # api.openai.com → Responses, Kimi /coding, etc.).
+                # Auto-detect Anthropic-compatible endpoints (/anthropic suffix).
                detected = _detect_api_mode_for_url(base_url)
                if detected:
                    api_mode = detected
@@ -917,7 +904,8 @@ def resolve_runtime_provider(
                code="no_aws_credentials",
            )
        # Read bedrock-specific config from config.yaml
-        _bedrock_cfg = load_config().get("bedrock", {})
+        from hermes_cli.config import load_config as _load_bedrock_config
+        _bedrock_cfg = _load_bedrock_config().get("bedrock", {})
        # Region priority: config.yaml bedrock.region → env var → us-east-1
        region = (_bedrock_cfg.get("region") or "").strip() or resolve_bedrock_region()
        auth_source = resolve_aws_auth_env_var() or "aws-sdk-default-chain"
--- a/Show More
+++ b/Show More