refactor: extract codex_responses logic into dedicated adapter

Move 10 Responses API format-conversion and normalization functions from run_agent.py into agent/codex_responses_adapter.py. All functions are now stateless module-level functions with zero self references. The AIAgent methods remain as thin one-line wrappers that delegate to the adapter, so all callers (tests, gateway, CLI) are unchanged. Functions extracted: - _deterministic_call_id: deterministic tool call ID generation - _split_responses_tool_id: composite ID splitting - _derive_responses_function_call_id: call_ to fc_ prefix conversion - _responses_tools: chat completions tool schema → Responses format - _chat_messages_to_responses_input: message format conversion - _preflight_codex_input_items: input item normalization - _preflight_codex_api_kwargs: API kwargs validation/cleaning - _extract_responses_message_text: text extraction from response items - _extract_responses_reasoning_text: reasoning extraction - _normalize_codex_response: full response normalization This brings codex_responses in line with anthropic_adapter.py and bedrock_adapter.py which already have their own adapter files. run_agent.py: 12410 → 11845 lines (-565 net)
2026-04-20 16:32:43 +05:30
1061 changed files with 16272 additions and 175258 deletions
@@ -14,6 +14,3 @@ node_modules
 .env

 *.md
-
-# Runtime data (bind-mounted at /opt/data; must not leak into build context)
-data/
@@ -1,8 +0,0 @@
-name: 'Setup Nix'
-description: 'Install Nix with DeterminateSystems and enable magic-nix-cache'
-
-runs:
-  using: composite
-  steps:
-    - uses: DeterminateSystems/nix-installer-action@ef8a148080ab6020fd15196c2084a2eea5ff2d25 # v22
-    - uses: DeterminateSystems/magic-nix-cache-action@565684385bcd71bad329742eefe8d12f2e765b39 # v13
@@ -53,9 +53,6 @@ jobs:
      - name: Extract skill metadata for dashboard
        run: python3 website/scripts/extract-skills.py

-      - name: Regenerate per-skill docs pages + catalogs
-        run: python3 website/scripts/generate-skill-docs.py
-
      - name: Build skills index (if not already present)
        env:
          GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
@@ -36,9 +36,6 @@ jobs:
      - name: Extract skill metadata for dashboard
        run: python3 website/scripts/extract-skills.py

-      - name: Regenerate per-skill docs pages + catalogs
-        run: python3 website/scripts/generate-skill-docs.py
-
      - name: Lint docs diagrams
        run: npm run lint:diagrams
        working-directory: website
@@ -1,68 +0,0 @@
-name: Nix Lockfile Check
-
-on:
-  pull_request:
-  workflow_dispatch:
-
-permissions:
-  contents: read
-  pull-requests: write
-
-concurrency:
-  group: nix-lockfile-check-${{ github.ref }}
-  cancel-in-progress: true
-
-jobs:
-  check:
-    runs-on: ubuntu-latest
-    timeout-minutes: 20
-    steps:
-      - uses: actions/checkout@34e114876b0b11c390a56381ad16ebd13914f8d5  # v4
-
-      - uses: ./.github/actions/nix-setup
-
-      - name: Resolve head SHA
-        id: sha
-        shell: bash
-        run: |
-          FULL="${{ github.event.pull_request.head.sha || github.sha }}"
-          echo "full=$FULL" >> "$GITHUB_OUTPUT"
-          echo "short=${FULL:0:7}" >> "$GITHUB_OUTPUT"
-
-      - name: Check lockfile hashes
-        id: check
-        continue-on-error: true
-        env:
-          LINK_SHA: ${{ steps.sha.outputs.full }}
-        run: nix run .#fix-lockfiles -- --check
-
-      - name: Post sticky PR comment (stale)
-        if: steps.check.outputs.stale == 'true' && github.event_name == 'pull_request'
-        uses: marocchino/sticky-pull-request-comment@52423e01640425a022ef5fd42c6fb5f633a02728  # v2.9.1
-        with:
-          header: nix-lockfile-check
-          message: |
-            ### ⚠️ npm lockfile hash out of date
-
-            Checked against commit [`${{ steps.sha.outputs.short }}`](${{ github.server_url }}/${{ github.repository }}/commit/${{ steps.sha.outputs.full }}) (PR head at check time).
-
-            The `hash = "sha256-..."` line in these nix files no longer matches the committed `package-lock.json`:
-
-            ${{ steps.check.outputs.report }}
-
-            #### Apply the fix
-
-            - [ ] **Apply lockfile fix** — tick to push a commit with the correct hashes to this PR branch
-            - Or [run the Nix Lockfile Fix workflow](${{ github.server_url }}/${{ github.repository }}/actions/workflows/nix-lockfile-fix.yml) manually (pass PR `#${{ github.event.pull_request.number }}`)
-            - Or locally: `nix run .#fix-lockfiles -- --apply` and commit the diff
-
-      - name: Clear sticky PR comment (resolved)
-        if: steps.check.outputs.stale == 'false' && github.event_name == 'pull_request'
-        uses: marocchino/sticky-pull-request-comment@52423e01640425a022ef5fd42c6fb5f633a02728  # v2.9.1
-        with:
-          header: nix-lockfile-check
-          delete: true
-
-      - name: Fail if stale
-        if: steps.check.outputs.stale == 'true'
-        run: exit 1
@@ -1,149 +0,0 @@
-name: Nix Lockfile Fix
-
-on:
-  workflow_dispatch:
-    inputs:
-      pr_number:
-        description: 'PR number to fix (leave empty to run on the selected branch)'
-        required: false
-        type: string
-  issue_comment:
-    types: [edited]
-
-permissions:
-  contents: write
-  pull-requests: write
-
-concurrency:
-  group: nix-lockfile-fix-${{ github.event.issue.number || github.event.inputs.pr_number || github.ref }}
-  cancel-in-progress: false
-
-jobs:
-  fix:
-    # Run on manual dispatch OR when a task-list checkbox in the sticky
-    # lockfile-check comment flips from `[ ]` to `[x]`.
-    if: |
-      github.event_name == 'workflow_dispatch' ||
-      (github.event_name == 'issue_comment'
-       && github.event.issue.pull_request != null
-       && contains(github.event.comment.body, '[x] **Apply lockfile fix**')
-       && !contains(github.event.changes.body.from, '[x] **Apply lockfile fix**'))
-    runs-on: ubuntu-latest
-    timeout-minutes: 25
-    steps:
-      - name: Authorize & resolve PR
-        id: resolve
-        uses: actions/github-script@60a0d83039c74a4aee543508d2ffcb1c3799cdea  # v7.0.1
-        with:
-          script: |
-            // 1. Verify the actor has write access — applies to both checkbox
-            //    clicks and manual dispatch.
-            const { data: perm } =
-              await github.rest.repos.getCollaboratorPermissionLevel({
-                owner: context.repo.owner,
-                repo: context.repo.repo,
-                username: context.actor,
-              });
-            if (!['admin', 'write', 'maintain'].includes(perm.permission)) {
-              core.setFailed(
-                `${context.actor} lacks write access (has: ${perm.permission})`
-              );
-              return;
-            }
-
-            // 2. Resolve which ref to check out.
-            let prNumber = '';
-            if (context.eventName === 'issue_comment') {
-              prNumber = String(context.payload.issue.number);
-            } else if (context.eventName === 'workflow_dispatch') {
-              prNumber = context.payload.inputs.pr_number || '';
-            }
-
-            if (!prNumber) {
-              core.setOutput('ref', context.ref.replace(/^refs\/heads\//, ''));
-              core.setOutput('repo', context.repo.repo);
-              core.setOutput('owner', context.repo.owner);
-              core.setOutput('pr', '');
-              return;
-            }
-
-            const { data: pr } = await github.rest.pulls.get({
-              owner: context.repo.owner,
-              repo: context.repo.repo,
-              pull_number: Number(prNumber),
-            });
-            core.setOutput('ref', pr.head.ref);
-            core.setOutput('repo', pr.head.repo.name);
-            core.setOutput('owner', pr.head.repo.owner.login);
-            core.setOutput('pr', String(pr.number));
-
-      # Wipe the sticky lockfile-check comment to a "running" state as soon
-      # as the job is authorized, so the user sees their click was picked up
-      # before the ~minute of nix build work.
-      - name: Mark sticky as running
-        if: steps.resolve.outputs.pr != ''
-        uses: marocchino/sticky-pull-request-comment@52423e01640425a022ef5fd42c6fb5f633a02728  # v2.9.1
-        with:
-          header: nix-lockfile-check
-          number: ${{ steps.resolve.outputs.pr }}
-          message: |
-            ### 🔄 Applying lockfile fix…
-
-            Triggered by @${{ github.actor }} — [workflow run](${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }}).
-
-      - uses: actions/checkout@34e114876b0b11c390a56381ad16ebd13914f8d5  # v4
-        with:
-          repository: ${{ steps.resolve.outputs.owner }}/${{ steps.resolve.outputs.repo }}
-          ref: ${{ steps.resolve.outputs.ref }}
-          token: ${{ secrets.GITHUB_TOKEN }}
-          fetch-depth: 0
-
-      - uses: ./.github/actions/nix-setup
-
-      - name: Apply lockfile hashes
-        id: apply
-        run: nix run .#fix-lockfiles -- --apply
-
-      - name: Commit & push
-        if: steps.apply.outputs.changed == 'true'
-        shell: bash
-        run: |
-          set -euo pipefail
-          git config user.name 'github-actions[bot]'
-          git config user.email '41898282+github-actions[bot]@users.noreply.github.com'
-          git add nix/tui.nix nix/web.nix
-          git commit -m "fix(nix): refresh npm lockfile hashes"
-          git push
-
-      - name: Update sticky (applied)
-        if: steps.apply.outputs.changed == 'true' && steps.resolve.outputs.pr != ''
-        uses: marocchino/sticky-pull-request-comment@52423e01640425a022ef5fd42c6fb5f633a02728  # v2.9.1
-        with:
-          header: nix-lockfile-check
-          number: ${{ steps.resolve.outputs.pr }}
-          message: |
-            ### ✅ Lockfile fix applied
-
-            Pushed a commit refreshing the npm lockfile hashes — [workflow run](${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }}).
-
-      - name: Update sticky (already current)
-        if: steps.apply.outputs.changed == 'false' && steps.resolve.outputs.pr != ''
-        uses: marocchino/sticky-pull-request-comment@52423e01640425a022ef5fd42c6fb5f633a02728  # v2.9.1
-        with:
-          header: nix-lockfile-check
-          number: ${{ steps.resolve.outputs.pr }}
-          message: |
-            ### ✅ Lockfile hashes already current
-
-            Nothing to commit — [workflow run](${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }}).
-
-      - name: Update sticky (failed)
-        if: failure() && steps.resolve.outputs.pr != ''
-        uses: marocchino/sticky-pull-request-comment@52423e01640425a022ef5fd42c6fb5f633a02728  # v2.9.1
-        with:
-          header: nix-lockfile-check
-          number: ${{ steps.resolve.outputs.pr }}
-          message: |
-            ### ❌ Lockfile fix failed
-
-            See the [workflow run](${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }}) for logs.
@@ -4,6 +4,15 @@ on:
  push:
    branches: [main]
  pull_request:
+    paths:
+      - 'flake.nix'
+      - 'flake.lock'
+      - 'nix/**'
+      - 'pyproject.toml'
+      - 'uv.lock'
+      - 'hermes_cli/**'
+      - 'run_agent.py'
+      - 'acp_adapter/**'

 permissions:
  contents: read
@@ -20,8 +29,9 @@ jobs:
    runs-on: ${{ matrix.os }}
    timeout-minutes: 30
    steps:
-      - uses: actions/checkout@34e114876b0b11c390a56381ad16ebd13914f8d5 # v4
-      - uses: ./.github/actions/nix-setup
+      - uses: actions/checkout@34e114876b0b11c390a56381ad16ebd13914f8d5  # v4
+      - uses: DeterminateSystems/nix-installer-action@ef8a148080ab6020fd15196c2084a2eea5ff2d25  # v22
+      - uses: DeterminateSystems/magic-nix-cache-action@565684385bcd71bad329742eefe8d12f2e765b39  # v13
      - name: Check flake
        if: runner.os == 'Linux'
        run: nix flake check --print-build-logs
@@ -1,4 +1,3 @@
-.DS_Store
 /venv/
 /_pycache/
 *.pyc*
@@ -5,61 +5,78 @@ Instructions for AI coding assistants and developers working on the hermes-agent
 ## Development Environment

 ```bash
-# Prefer .venv; fall back to venv if that's what your checkout has.
-source .venv/bin/activate   # or: source venv/bin/activate
+source venv/bin/activate  # ALWAYS activate before running Python
 ```

-`scripts/run_tests.sh` probes `.venv` first, then `venv`, then
-`$HOME/.hermes/hermes-agent/venv` (for worktrees that share a venv with the
-main checkout).
-
 ## Project Structure

-File counts shift constantly — don't treat the tree below as exhaustive.
-The canonical source is the filesystem. The notes call out the load-bearing
-entry points you'll actually edit.
-
 ```
 hermes-agent/
-├── run_agent.py          # AIAgent class — core conversation loop (~12k LOC)
+├── run_agent.py          # AIAgent class — core conversation loop
 ├── model_tools.py        # Tool orchestration, discover_builtin_tools(), handle_function_call()
 ├── toolsets.py           # Toolset definitions, _HERMES_CORE_TOOLS list
-├── cli.py                # HermesCLI class — interactive CLI orchestrator (~11k LOC)
+├── cli.py                # HermesCLI class — interactive CLI orchestrator
 ├── hermes_state.py       # SessionDB — SQLite session store (FTS5 search)
-├── hermes_constants.py   # get_hermes_home(), display_hermes_home() — profile-aware paths
-├── hermes_logging.py     # setup_logging() — agent.log / errors.log / gateway.log (profile-aware)
-├── batch_runner.py       # Parallel batch processing
-├── agent/                # Agent internals (provider adapters, memory, caching, compression, etc.)
-├── hermes_cli/           # CLI subcommands, setup wizard, plugins loader, skin engine
-├── tools/                # Tool implementations — auto-discovered via tools/registry.py
+├── agent/                # Agent internals
+│   ├── prompt_builder.py     # System prompt assembly
+│   ├── context_compressor.py # Auto context compression
+│   ├── prompt_caching.py     # Anthropic prompt caching
+│   ├── auxiliary_client.py   # Auxiliary LLM client (vision, summarization)
+│   ├── model_metadata.py     # Model context lengths, token estimation
+│   ├── models_dev.py         # models.dev registry integration (provider-aware context)
+│   ├── display.py            # KawaiiSpinner, tool preview formatting
+│   ├── skill_commands.py     # Skill slash commands (shared CLI/gateway)
+│   └── trajectory.py         # Trajectory saving helpers
+├── hermes_cli/           # CLI subcommands and setup
+│   ├── main.py           # Entry point — all `hermes` subcommands
+│   ├── config.py         # DEFAULT_CONFIG, OPTIONAL_ENV_VARS, migration
+│   ├── commands.py       # Slash command definitions + SlashCommandCompleter
+│   ├── callbacks.py      # Terminal callbacks (clarify, sudo, approval)
+│   ├── setup.py          # Interactive setup wizard
+│   ├── skin_engine.py    # Skin/theme engine — CLI visual customization
+│   ├── skills_config.py  # `hermes skills` — enable/disable skills per platform
+│   ├── tools_config.py   # `hermes tools` — enable/disable tools per platform
+│   ├── skills_hub.py     # `/skills` slash command (search, browse, install)
+│   ├── models.py         # Model catalog, provider model lists
+│   ├── model_switch.py   # Shared /model switch pipeline (CLI + gateway)
+│   └── auth.py           # Provider credential resolution
+├── tools/                # Tool implementations (one file per tool)
+│   ├── registry.py       # Central tool registry (schemas, handlers, dispatch)
+│   ├── approval.py       # Dangerous command detection
+│   ├── terminal_tool.py  # Terminal orchestration
+│   ├── process_registry.py # Background process management
+│   ├── file_tools.py     # File read/write/search/patch
+│   ├── web_tools.py      # Web search/extract (Parallel + Firecrawl)
+│   ├── browser_tool.py   # Browserbase browser automation
+│   ├── code_execution_tool.py # execute_code sandbox
+│   ├── delegate_tool.py  # Subagent delegation
+│   ├── mcp_tool.py       # MCP client (~1050 lines)
 │   └── environments/     # Terminal backends (local, docker, ssh, modal, daytona, singularity)
-├── gateway/              # Messaging gateway — run.py + session.py + platforms/
-│   ├── platforms/        # Adapter per platform (telegram, discord, slack, whatsapp,
-│   │                     #   homeassistant, signal, matrix, mattermost, email, sms,
-│   │                     #   dingtalk, wecom, weixin, feishu, qqbot, bluebubbles,
-│   │                     #   webhook, api_server, ...). See ADDING_A_PLATFORM.md.
-│   └── builtin_hooks/    # Always-registered gateway hooks (boot-md, ...)
-├── plugins/              # Plugin system (see "Plugins" section below)
-│   ├── memory/           # Memory-provider plugins (honcho, mem0, supermemory, ...)
-│   ├── context_engine/   # Context-engine plugins
-│   └── <others>/         # Dashboard, image-gen, disk-cleanup, examples, ...
-├── optional-skills/      # Heavier/niche skills shipped but NOT active by default
-├── skills/               # Built-in skills bundled with the repo
+├── gateway/              # Messaging platform gateway
+│   ├── run.py            # Main loop, slash commands, message dispatch
+│   ├── session.py        # SessionStore — conversation persistence
+│   └── platforms/        # Adapters: telegram, discord, slack, whatsapp, homeassistant, signal, qqbot
 ├── ui-tui/               # Ink (React) terminal UI — `hermes --tui`
-│   └── src/              # entry.tsx, app.tsx, gatewayClient.ts + app/components/hooks/lib
+│   ├── src/entry.tsx        # TTY gate + render()
+│   ├── src/app.tsx          # Main state machine and UI
+│   ├── src/gatewayClient.ts # Child process + JSON-RPC bridge
+│   ├── src/app/             # Decomposed app logic (event handler, slash handler, stores, hooks)
+│   ├── src/components/      # Ink components (branding, markdown, prompts, pickers, etc.)
+│   ├── src/hooks/           # useCompletion, useInputHistory, useQueue, useVirtualHistory
+│   └── src/lib/             # Pure helpers (history, osc52, text, rpc, messages)
 ├── tui_gateway/          # Python JSON-RPC backend for the TUI
+│   ├── entry.py             # stdio entrypoint
+│   ├── server.py            # RPC handlers and session logic
+│   ├── render.py            # Optional rich/ANSI bridge
+│   └── slash_worker.py      # Persistent HermesCLI subprocess for slash commands
 ├── acp_adapter/          # ACP server (VS Code / Zed / JetBrains integration)
-├── cron/                 # Scheduler — jobs.py, scheduler.py
+├── cron/                 # Scheduler (jobs.py, scheduler.py)
 ├── environments/         # RL training environments (Atropos)
-├── scripts/              # run_tests.sh, release.py, auxiliary scripts
-├── website/              # Docusaurus docs site
-└── tests/                # Pytest suite (~15k tests across ~700 files as of Apr 2026)
+├── tests/                # Pytest suite (~3000 tests)
+└── batch_runner.py       # Parallel batch processing
 ```

-**User config:** `~/.hermes/config.yaml` (settings), `~/.hermes/.env` (API keys only).
-**Logs:** `~/.hermes/logs/` — `agent.log` (INFO+), `errors.log` (WARNING+),
-`gateway.log` when running the gateway. Profile-aware via `get_hermes_home()`.
-Browse with `hermes logs [--follow] [--level ...] [--session ...]`.
+**User config:** `~/.hermes/config.yaml` (settings), `~/.hermes/.env` (API keys)

 ## File Dependency Chain

@@ -77,30 +94,20 @@ run_agent.py, cli.py, batch_runner.py, environments/

 ## AIAgent Class (run_agent.py)

-The real `AIAgent.__init__` takes ~60 parameters (credentials, routing, callbacks,
-session context, budget, credential pool, etc.). The signature below is the
-minimum subset you'll usually touch — read `run_agent.py` for the full list.
-
 ```python
 class AIAgent:
    def __init__(self,
-        base_url: str = None,
-        api_key: str = None,
-        provider: str = None,
-        api_mode: str = None,              # "chat_completions" | "codex_responses" | ...
-        model: str = "",                   # empty → resolved from config/provider later
-        max_iterations: int = 90,          # tool-calling iterations (shared with subagents)
+        model: str = "anthropic/claude-opus-4.6",
+        max_iterations: int = 90,
        enabled_toolsets: list = None,
        disabled_toolsets: list = None,
        quiet_mode: bool = False,
        save_trajectories: bool = False,
-        platform: str = None,              # "cli", "telegram", etc.
+        platform: str = None,           # "cli", "telegram", etc.
        session_id: str = None,
        skip_context_files: bool = False,
        skip_memory: bool = False,
-        credential_pool=None,
-        # ... plus callbacks, thread/user/chat IDs, iteration_budget, fallback_model,
-        # checkpoints config, prefill_messages, service_tier, reasoning_config, etc.
+        # ... plus provider, api_mode, callbacks, routing params
    ): ...

    def chat(self, message: str) -> str:
@@ -113,13 +120,10 @@ class AIAgent:

 ### Agent Loop

-The core loop is inside `run_conversation()` — entirely synchronous, with
-interrupt checks, budget tracking, and a one-turn grace call:
+The core loop is inside `run_conversation()` — entirely synchronous:

 ```python
-while (api_call_count < self.max_iterations and self.iteration_budget.remaining > 0) \
-        or self._budget_grace_call:
-    if self._interrupt_requested: break
+while api_call_count < self.max_iterations and self.iteration_budget.remaining > 0:
    response = client.chat.completions.create(model=model, messages=messages, tools=tool_schemas)
    if response.tool_calls:
        for tool_call in response.tool_calls:
@@ -130,8 +134,7 @@ while (api_call_count < self.max_iterations and self.iteration_budget.remaining
        return response.content
 ```

-Messages follow OpenAI format: `{"role": "system/user/assistant/tool", ...}`.
-Reasoning content is stored in `assistant_msg["reasoning"]`.
+Messages follow OpenAI format: `{"role": "system/user/assistant/tool", ...}`. Reasoning content is stored in `assistant_msg["reasoning"]`.

 ---

@@ -240,19 +243,6 @@ npm run fmt       # prettier
 npm test          # vitest
 ```

-### TUI in the Dashboard (`hermes dashboard` → `/chat`)
-
-The dashboard embeds the real `hermes --tui` — **not** a rewrite.  See `hermes_cli/pty_bridge.py` + the `@app.websocket("/api/pty")` endpoint in `hermes_cli/web_server.py`.
-
- Browser loads `web/src/pages/ChatPage.tsx`, which mounts xterm.js's `Terminal` with the WebGL renderer, `@xterm/addon-fit` for container-driven resize, and `@xterm/addon-unicode11` for modern wide-character widths.
- `/api/pty?token=…` upgrades to a WebSocket; auth uses the same ephemeral `_SESSION_TOKEN` as REST, via query param (browsers can't set `Authorization` on WS upgrade).
- The server spawns whatever `hermes --tui` would spawn, through `ptyprocess` (POSIX PTY — WSL works, native Windows does not).
- Frames: raw PTY bytes each direction; resize via `\x1b[RESIZE:<cols>;<rows>]` intercepted on the server and applied with `TIOCSWINSZ`.
-
-**Do not re-implement the primary chat experience in React.** The main transcript, composer/input flow (including slash-command behavior), and PTY-backed terminal belong to the embedded `hermes --tui` — anything new you add to Ink shows up in the dashboard automatically. If you find yourself rebuilding the transcript or composer for the dashboard, stop and extend Ink instead.
-
-**Structured React UI around the TUI is allowed when it is not a second chat surface.** Sidebar widgets, inspectors, summaries, status panels, and similar supporting views (e.g. `ChatSidebar`, `ModelPickerDialog`, `ToolCall`) are fine when they complement the embedded TUI rather than replacing the transcript / composer / terminal. Keep their state independent of the PTY child's session and surface their failures non-destructively so the terminal pane keeps working unimpaired.
-
 ---

 ## Adding New Tools
@@ -290,7 +280,7 @@ The registry handles schema collection, dispatch, availability checking, and err

 **State files**: If a tool stores persistent state (caches, logs, checkpoints), use `get_hermes_home()` for the base directory — never `Path.home() / ".hermes"`. This ensures each profile gets its own state.

-**Agent-level tools** (todo, memory): intercepted by `run_agent.py` before `handle_function_call()`. See `tools/todo_tool.py` for the pattern.
+**Agent-level tools** (todo, memory): intercepted by `run_agent.py` before `handle_function_call()`. See `todo_tool.py` for the pattern.

 ---

@@ -298,13 +288,9 @@ The registry handles schema collection, dispatch, availability checking, and err

 ### config.yaml options:
 1. Add to `DEFAULT_CONFIG` in `hermes_cli/config.py`
-2. Bump `_config_version` (check the current value at the top of `DEFAULT_CONFIG`)
-   ONLY if you need to actively migrate/transform existing user config
-   (renaming keys, changing structure). Adding a new key to an existing
-   section is handled automatically by the deep-merge and does NOT require
-   a version bump.
+2. Bump `_config_version` (currently 5) to trigger migration for existing users

-### .env variables (SECRETS ONLY — API keys, tokens, passwords):
+### .env variables:
 1. Add to `OPTIONAL_ENV_VARS` in `hermes_cli/config.py` with metadata:
 ```python
 "NEW_API_KEY": {
@@ -316,29 +302,13 @@ The registry handles schema collection, dispatch, availability checking, and err
 },
 ```

-Non-secret settings (timeouts, thresholds, feature flags, paths, display
-preferences) belong in `config.yaml`, not `.env`. If internal code needs an
-env var mirror for backward compatibility, bridge it from `config.yaml` to
-the env var in code (see `gateway_timeout`, `terminal.cwd` → `TERMINAL_CWD`).
-
-### Config loaders (three paths — know which one you're in):
+### Config loaders (two separate systems):

 | Loader | Used by | Location |
 |--------|---------|----------|
-| `load_cli_config()` | CLI mode | `cli.py` — merges CLI-specific defaults + user YAML |
-| `load_config()` | `hermes tools`, `hermes setup`, most CLI subcommands | `hermes_cli/config.py` — merges `DEFAULT_CONFIG` + user YAML |
-| Direct YAML load | Gateway runtime | `gateway/run.py` + `gateway/config.py` — reads user YAML raw |
-
-If you add a new key and the CLI sees it but the gateway doesn't (or vice
-versa), you're on the wrong loader. Check `DEFAULT_CONFIG` coverage.
-
-### Working directory:
- **CLI** — uses the process's current directory (`os.getcwd()`).
- **Messaging** — uses `terminal.cwd` from `config.yaml`. The gateway bridges this
-  to the `TERMINAL_CWD` env var for child tools. **`MESSAGING_CWD` has been
-  removed** — the config loader prints a deprecation warning if it's set in
-  `.env`. Same for `TERMINAL_CWD` in `.env`; the canonical setting is
-  `terminal.cwd` in `config.yaml`.
+| `load_cli_config()` | CLI mode | `cli.py` |
+| `load_config()` | `hermes tools`, `hermes setup` | `hermes_cli/config.py` |
+| Direct YAML load | Gateway | `gateway/run.py` |

 ---

@@ -431,95 +401,7 @@ Activate with `/skin cyberpunk` or `display.skin: cyberpunk` in config.yaml.

 ---

-## Plugins
-
-Hermes has two plugin surfaces. Both live under `plugins/` in the repo so
-repo-shipped plugins can be discovered alongside user-installed ones in
-`~/.hermes/plugins/` and pip-installed entry points.
-
-### General plugins (`hermes_cli/plugins.py` + `plugins/<name>/`)
-
-`PluginManager` discovers plugins from `~/.hermes/plugins/`, `./.hermes/plugins/`,
-and pip entry points. Each plugin exposes a `register(ctx)` function that
-can:
-
- Register Python-callback lifecycle hooks:
-  `pre_tool_call`, `post_tool_call`, `pre_llm_call`, `post_llm_call`,
-  `on_session_start`, `on_session_end`
- Register new tools via `ctx.register_tool(...)`
- Register CLI subcommands via `ctx.register_cli_command(...)` — the
-  plugin's argparse tree is wired into `hermes` at startup so
-  `hermes <pluginname> <subcmd>` works with no change to `main.py`
-
-Hooks are invoked from `model_tools.py` (pre/post tool) and `run_agent.py`
-(lifecycle). **Discovery timing pitfall:** `discover_plugins()` only runs
-as a side effect of importing `model_tools.py`. Code paths that read plugin
-state without importing `model_tools.py` first must call `discover_plugins()`
-explicitly (it's idempotent).
-
-### Memory-provider plugins (`plugins/memory/<name>/`)
-
-Separate discovery system for pluggable memory backends. Current built-in
-providers include **honcho, mem0, supermemory, byterover, hindsight,
-holographic, openviking, retaindb**.
-
-Each provider implements the `MemoryProvider` ABC (see `agent/memory_provider.py`)
-and is orchestrated by `agent/memory_manager.py`. Lifecycle hooks include
-`sync_turn(turn_messages)`, `prefetch(query)`, `shutdown()`, and optional
-`post_setup(hermes_home, config)` for setup-wizard integration.
-
-**CLI commands via `plugins/memory/<name>/cli.py`:** if a memory plugin
-defines `register_cli(subparser)`, `discover_plugin_cli_commands()` finds
-it at argparse setup time and wires it into `hermes <plugin>`. The
-framework only exposes CLI commands for the **currently active** memory
-provider (read from `memory.provider` in config.yaml), so disabled
-providers don't clutter `hermes --help`.
-
-**Rule (Teknium, May 2026):** plugins MUST NOT modify core files
-(`run_agent.py`, `cli.py`, `gateway/run.py`, `hermes_cli/main.py`, etc.).
-If a plugin needs a capability the framework doesn't expose, expand the
-generic plugin surface (new hook, new ctx method) — never hardcode
-plugin-specific logic into core. PR #5295 removed 95 lines of hardcoded
-honcho argparse from `main.py` for exactly this reason.
-
-### Dashboard / context-engine / image-gen plugin directories
-
-`plugins/context_engine/`, `plugins/image_gen/`, `plugins/example-dashboard/`,
-etc. follow the same pattern (ABC + orchestrator + per-plugin directory).
-Context engines plug into `agent/context_engine.py`; image-gen providers
-into `agent/image_gen_provider.py`.
-
---
-
-## Skills
-
-Two parallel surfaces:
-
- **`skills/`** — built-in skills shipped and loadable by default.
-  Organized by category directories (e.g. `skills/github/`, `skills/mlops/`).
- **`optional-skills/`** — heavier or niche skills shipped with the repo but
-  NOT active by default. Installed explicitly via
-  `hermes skills install official/<category>/<skill>`. Adapter lives in
-  `tools/skills_hub.py` (`OptionalSkillSource`). Categories include
-  `autonomous-ai-agents`, `blockchain`, `communication`, `creative`,
-  `devops`, `email`, `health`, `mcp`, `migration`, `mlops`, `productivity`,
-  `research`, `security`, `web-development`.
-
-When reviewing skill PRs, check which directory they target — heavy-dep or
-niche skills belong in `optional-skills/`.
-
-### SKILL.md frontmatter
-
-Standard fields: `name`, `description`, `version`, `platforms`
-(OS-gating list: `[macos]`, `[linux, macos]`, ...),
-`metadata.hermes.tags`, `metadata.hermes.category`,
-`metadata.hermes.config` (config.yaml settings the skill needs — stored
-under `skills.config.<key>`, prompted during setup, injected at load time).
-
---
-
 ## Important Policies
-
 ### Prompt Caching Must Not Break

 Hermes-Agent ensures caching remains valid throughout a conversation. **Do NOT implement changes that would:**
@@ -529,10 +411,9 @@ Hermes-Agent ensures caching remains valid throughout a conversation. **Do NOT i

 Cache-breaking forces dramatically higher costs. The ONLY time we alter context is during context compression.

-Slash commands that mutate system-prompt state (skills, tools, memory, etc.)
-must be **cache-aware**: default to deferred invalidation (change takes
-effect next session), with an opt-in `--now` flag for immediate
-invalidation. See `/skills install --now` for the canonical pattern.
+### Working Directory Behavior
+- **CLI**: Uses current directory (`.` → `os.getcwd()`)
+- **Messaging**: Uses `MESSAGING_CWD` env var (default: home directory)

 ### Background Process Notifications (Gateway)

@@ -554,7 +435,7 @@ Hermes supports **profiles** — multiple fully isolated instances, each with it
 `HERMES_HOME` directory (config, API keys, memory, sessions, skills, gateway, etc.).

 The core mechanism: `_apply_profile_override()` in `hermes_cli/main.py` sets
-`HERMES_HOME` before any module imports. All `get_hermes_home()` references
+`HERMES_HOME` before any module imports. All 119+ references to `get_hermes_home()`
 automatically scope to the active profile.

 ### Rules for profile-safe code
@@ -611,12 +492,8 @@ Use `get_hermes_home()` from `hermes_constants` for code paths. Use `display_her
 for user-facing print/log messages. Hardcoding `~/.hermes` breaks profiles — each profile
 has its own `HERMES_HOME` directory. This was the source of 5 bugs fixed in PR #3575.

-### DO NOT introduce new `simple_term_menu` usage
-Existing call sites in `hermes_cli/main.py` remain for legacy fallback only;
-the preferred UI is curses (stdlib) because `simple_term_menu` has
-ghost-duplication rendering bugs in tmux/iTerm2 with arrow keys. New
-interactive menus must use `hermes_cli/curses_ui.py` — see
-`hermes_cli/tools_config.py` for the canonical pattern.
+### DO NOT use `simple_term_menu` for interactive menus
+Rendering bugs in tmux/iTerm2 — ghosting on scroll. Use `curses` (stdlib) instead. See `hermes_cli/tools_config.py` for the pattern.

 ### DO NOT use `\033[K` (ANSI erase-to-EOL) in spinner/display code
 Leaks as literal `?[K` text under `prompt_toolkit`'s `patch_stdout`. Use space-padding: `f"\r{line}{' ' * pad}"`.
@@ -627,30 +504,6 @@ Leaks as literal `?[K` text under `prompt_toolkit`'s `patch_stdout`. Use space-p
 ### DO NOT hardcode cross-tool references in schema descriptions
 Tool schema descriptions must not mention tools from other toolsets by name (e.g., `browser_navigate` saying "prefer web_search"). Those tools may be unavailable (missing API keys, disabled toolset), causing the model to hallucinate calls to non-existent tools. If a cross-reference is needed, add it dynamically in `get_tool_definitions()` in `model_tools.py` — see the `browser_navigate` / `execute_code` post-processing blocks for the pattern.

-### The gateway has TWO message guards — both must bypass approval/control commands
-When an agent is running, messages pass through two sequential guards:
-(1) **base adapter** (`gateway/platforms/base.py`) queues messages in
-`_pending_messages` when `session_key in self._active_sessions`, and
-(2) **gateway runner** (`gateway/run.py`) intercepts `/stop`, `/new`,
-`/queue`, `/status`, `/approve`, `/deny` before they reach
-`running_agent.interrupt()`. Any new command that must reach the runner
-while the agent is blocked (e.g. approval prompts) MUST bypass BOTH
-guards and be dispatched inline, not via `_process_message_background()`
-(which races session lifecycle).
-
-### Squash merges from stale branches silently revert recent fixes
-Before squash-merging a PR, ensure the branch is up to date with `main`
-(`git fetch origin main && git reset --hard origin/main` in the worktree,
-then re-apply the PR's commits). A stale branch's version of an unrelated
-file will silently overwrite recent fixes on main when squashed. Verify
-with `git diff HEAD~1..HEAD` after merging — unexpected deletions are a
-red flag.
-
-### Don't wire in dead code without E2E validation
-Unused code that was never shipped was dead for a reason. Before wiring an
-unused module into a live code path, E2E test the real resolution chain
-with actual imports (not mocks) against a temp `HERMES_HOME`.
-
 ### Tests must not write to `~/.hermes/`
 The `_isolate_hermes_home` autouse fixture in `tests/conftest.py` redirects `HERMES_HOME` to a temp dir. Never hardcode `~/.hermes/` paths in tests.

@@ -706,59 +559,10 @@ If you can't use the wrapper (e.g. on Windows or inside an IDE that shells
 pytest directly), at minimum activate the venv and pass `-n 4`:

 ```bash
-source .venv/bin/activate   # or: source venv/bin/activate
+source venv/bin/activate
 python -m pytest tests/ -q -n 4
 ```

 Worker count above 4 will surface test-ordering flakes that CI never sees.

 Always run the full suite before pushing changes.
-
-### Don't write change-detector tests
-
-A test is a **change-detector** if it fails whenever data that is **expected
-to change** gets updated — model catalogs, config version numbers,
-enumeration counts, hardcoded lists of provider models. These tests add no
-behavioral coverage; they just guarantee that routine source updates break
-CI and cost engineering time to "fix."
-
-**Do not write:**
-
-```python
-# catalog snapshot — breaks every model release
-assert "gemini-2.5-pro" in _PROVIDER_MODELS["gemini"]
-assert "MiniMax-M2.7" in models
-
-# config version literal — breaks every schema bump
-assert DEFAULT_CONFIG["_config_version"] == 21
-
-# enumeration count — breaks every time a skill/provider is added
-assert len(_PROVIDER_MODELS["huggingface"]) == 8
-```
-
-**Do write:**
-
-```python
-# behavior: does the catalog plumbing work at all?
-assert "gemini" in _PROVIDER_MODELS
-assert len(_PROVIDER_MODELS["gemini"]) >= 1
-
-# behavior: does migration bump the user's version to current latest?
-assert raw["_config_version"] == DEFAULT_CONFIG["_config_version"]
-
-# invariant: no plan-only model leaks into the legacy list
-assert not (set(moonshot_models) & coding_plan_only_models)
-
-# invariant: every model in the catalog has a context-length entry
-for m in _PROVIDER_MODELS["huggingface"]:
-    assert m.lower() in DEFAULT_CONTEXT_LENGTHS_LOWER
-```
-
-The rule: if the test reads like a snapshot of current data, delete it. If
-it reads like a contract about how two pieces of data must relate, keep it.
-When a PR adds a new provider/model and you want a test, make the test
-assert the relationship (e.g. "catalog entries all have context lengths"),
-not the specific names.
-
-Reviewers should reject new change-detector tests; authors should convert
-them into invariants before re-requesting review.
@@ -9,7 +9,7 @@ Thank you for contributing to Hermes Agent! This guide covers everything you nee
 We value contributions in this order:

 1. **Bug fixes** — crashes, incorrect behavior, data loss. Always top priority.
-2. **Cross-platform compatibility** — macOS, different Linux distros, and WSL2 on Windows. We want Hermes to work everywhere.
+2. **Cross-platform compatibility** — Windows, macOS, different Linux distros, different terminal emulators. We want Hermes to work everywhere.
 3. **Security hardening** — shell injection, prompt injection, path traversal, privilege escalation. See [Security](#security-considerations).
 4. **Performance and robustness** — retry logic, error handling, graceful degradation.
 5. **New skills** — but only broadly useful ones. See [Should it be a Skill or a Tool?](#should-it-be-a-skill-or-a-tool)
@@ -55,10 +55,10 @@ If your skill is specialized, community-contributed, or niche, it's better suite

 | Requirement | Notes |
 |-------------|-------|
-| **Git** | With `--recurse-submodules` support, and the `git-lfs` extension installed |
+| **Git** | With `--recurse-submodules` support |
 | **Python 3.11+** | uv will install it if missing |
 | **uv** | Fast Python package manager ([install](https://docs.astral.sh/uv/)) |
-| **Node.js 20+** | Optional — needed for browser tools and WhatsApp bridge (matches root `package.json` engines) |
+| **Node.js 18+** | Optional — needed for browser tools and WhatsApp bridge |

 ### Clone and install

@@ -88,7 +88,7 @@ cp cli-config.yaml.example ~/.hermes/config.yaml
 touch ~/.hermes/.env

 # Add at minimum an LLM provider key:
-echo "OPENROUTER_API_KEY=***" >> ~/.hermes/.env
+echo 'OPENROUTER_API_KEY=sk-or-v1-your-key' >> ~/.hermes/.env
 ```

 ### Run
@@ -515,7 +515,7 @@ See `hermes_cli/skin_engine.py` for the full schema and existing skins as exampl

 ## Cross-Platform Compatibility

-Hermes runs on Linux, macOS, and WSL2 on Windows. When writing code that touches the OS:
+Hermes runs on Linux, macOS, and Windows. When writing code that touches the OS:

 ### Critical rules

@@ -597,7 +597,7 @@ refactor/description   # Code restructuring

 1. **Run tests**: `pytest tests/ -v`
 2. **Test manually**: Run `hermes` and exercise the code path you changed
-3. **Check cross-platform impact**: If you touch file I/O, process management, or terminal handling, consider macOS, Linux, and WSL2
+3. **Check cross-platform impact**: If you touch file I/O, process management, or terminal handling, consider Windows and macOS
 4. **Keep PRs focused**: One logical change per PR. Don't mix a bug fix with a refactor with a new feature.

 ### PR description
@@ -10,11 +10,9 @@ ENV PYTHONUNBUFFERED=1
 ENV PLAYWRIGHT_BROWSERS_PATH=/opt/hermes/.playwright

 # Install system dependencies in one layer, clear APT cache
-# tini reaps orphaned zombie processes (MCP stdio subprocesses, git, bun, etc.)
-# that would otherwise accumulate when hermes runs as PID 1. See #15012.
 RUN apt-get update && \
    apt-get install -y --no-install-recommends \
-        build-essential nodejs npm python3 ripgrep ffmpeg gcc python3-dev libffi-dev procps git openssh-client docker-cli tini && \
+        build-essential nodejs npm python3 ripgrep ffmpeg gcc python3-dev libffi-dev procps git && \
    rm -rf /var/lib/apt/lists/*

 # Non-root user for runtime; UID can be overridden via HERMES_UID at runtime
@@ -29,10 +27,12 @@ WORKDIR /opt/hermes
 # Copy only package manifests first so npm install + Playwright are cached
 # unless the lockfiles themselves change.
 COPY package.json package-lock.json ./
+COPY scripts/whatsapp-bridge/package.json scripts/whatsapp-bridge/package-lock.json scripts/whatsapp-bridge/
 COPY web/package.json web/package-lock.json web/

 RUN npm install --prefer-offline --no-audit && \
    npx playwright install --with-deps chromium --only-shell && \
+    (cd scripts/whatsapp-bridge && npm install --prefer-offline --no-audit) && \
    (cd web && npm install --prefer-offline --no-audit) && \
    npm cache clean --force

@@ -43,21 +43,14 @@ COPY --chown=hermes:hermes . .
 # Build web dashboard (Vite outputs to hermes_cli/web_dist/)
 RUN cd web && npm run build

-# ---------- Permissions ----------
-# Make install dir world-readable so any HERMES_UID can read it at runtime.
-# The venv needs to be traversable too.
-USER root
-RUN chmod -R a+rX /opt/hermes
-# Start as root so the entrypoint can usermod/groupmod + gosu.
-# If HERMES_UID is unset, the entrypoint drops to the default hermes user (10000).
-
 # ---------- Python virtualenv ----------
+RUN chown hermes:hermes /opt/hermes
+USER hermes
 RUN uv venv && \
    uv pip install --no-cache-dir -e ".[all]"

 # ---------- Runtime ----------
 ENV HERMES_WEB_DIST=/opt/hermes/hermes_cli/web_dist
 ENV HERMES_HOME=/opt/data
-ENV PATH="/opt/data/.local/bin:${PATH}"
 VOLUME [ "/opt/data" ]
-ENTRYPOINT [ "/usr/bin/tini", "-g", "--", "/opt/hermes/docker/entrypoint.sh" ]
+ENTRYPOINT [ "/opt/hermes/docker/entrypoint.sh" ]
@@ -76,7 +76,7 @@ Hermes has two entry points: start the terminal UI with `hermes`, or run the gat
 | Set a personality | `/personality [name]` | `/personality [name]` |
 | Retry or undo the last turn | `/retry`, `/undo` | `/retry`, `/undo` |
 | Compress context / check usage | `/compress`, `/usage`, `/insights [--days N]` | `/compress`, `/usage`, `/insights [days]` |
-| Browse skills | `/skills` or `/<skill-name>` | `/<skill-name>` |
+| Browse skills | `/skills` or `/<skill-name>` | `/skills` or `/<skill-name>` |
 | Interrupt current work | `Ctrl+C` or send a new message | `/stop` or send a new message |
 | Platform-specific status | `/platforms` | `/status`, `/sethome` |

@@ -157,10 +157,14 @@ curl -LsSf https://astral.sh/uv/install.sh | sh
 uv venv venv --python 3.11
 source venv/bin/activate
 uv pip install -e ".[all,dev]"
-scripts/run_tests.sh
+python -m pytest tests/ -q
 ```

-> **RL Training (optional):** The RL/Atropos integration (`environments/`) ships via the `atroposlib` and `tinker` dependencies pulled in by `.[all,dev]` — no submodule setup required.
+> **RL Training (optional):** To work on the RL/Tinker-Atropos integration:
+> ```bash
+> git submodule update --init tinker-atropos
+> uv pip install -e "./tinker-atropos"
+> ```

 ---

@@ -169,6 +173,7 @@ scripts/run_tests.sh
 - 💬 [Discord](https://discord.gg/NousResearch)
 - 📚 [Skills Hub](https://agentskills.io)
 - 🐛 [Issues](https://github.com/NousResearch/hermes-agent/issues)
+- 💡 [Discussions](https://github.com/NousResearch/hermes-agent/discussions)
 - 🔌 [HermesClaw](https://github.com/AaronWong1999/hermesclaw) — Community WeChat bridge: Run Hermes Agent and OpenClaw on the same WeChat account.

 ---
@@ -1,453 +0,0 @@
-# Hermes Agent v0.11.0 (v2026.4.23)
-
-**Release Date:** April 23, 2026
-**Since v0.9.0:** 1,556 commits · 761 merged PRs · 1,314 files changed · 224,174 insertions · 29 community contributors (290 including co-authors)
-
-> The Interface release — a full React/Ink rewrite of the interactive CLI, a pluggable transport architecture underneath every provider, native AWS Bedrock support, five new inference paths, a 17th messaging platform (QQBot), a dramatically expanded plugin surface, and GPT-5.5 via Codex OAuth.
-
-This release also folds in all the highlights deferred from v0.10.0 (which shipped only the Nous Tool Gateway) — so it covers roughly two weeks of work across the whole stack.
-
---
-
-## ✨ Highlights
-
- **New Ink-based TUI** — `hermes --tui` is now a full React/Ink rewrite of the interactive CLI, with a Python JSON-RPC backend (`tui_gateway`). Sticky composer, live streaming with OSC-52 clipboard support, stable picker keys, status bar with per-turn stopwatch and git branch, `/clear` confirm, light-theme preset, and a subagent spawn observability overlay. ~310 commits to `ui-tui/` + `tui_gateway/`. (@OutThisLife + Teknium)
-
- **Transport ABC + Native AWS Bedrock** — Format conversion and HTTP transport were extracted from `run_agent.py` into a pluggable `agent/transports/` layer. `AnthropicTransport`, `ChatCompletionsTransport`, `ResponsesApiTransport`, and `BedrockTransport` each own their own format conversion and API shape. Native AWS Bedrock support via the Converse API ships on top of the new abstraction. ([#10549](https://github.com/NousResearch/hermes-agent/pull/10549), [#13347](https://github.com/NousResearch/hermes-agent/pull/13347), [#13366](https://github.com/NousResearch/hermes-agent/pull/13366), [#13430](https://github.com/NousResearch/hermes-agent/pull/13430), [#13805](https://github.com/NousResearch/hermes-agent/pull/13805), [#13814](https://github.com/NousResearch/hermes-agent/pull/13814) — @kshitijk4poor + Teknium)
-
- **Five new inference paths** — Native NVIDIA NIM ([#11774](https://github.com/NousResearch/hermes-agent/pull/11774)), Arcee AI ([#9276](https://github.com/NousResearch/hermes-agent/pull/9276)), Step Plan ([#13893](https://github.com/NousResearch/hermes-agent/pull/13893)), Google Gemini CLI OAuth ([#11270](https://github.com/NousResearch/hermes-agent/pull/11270)), and Vercel ai-gateway with pricing + dynamic discovery ([#13223](https://github.com/NousResearch/hermes-agent/pull/13223) — @jerilynzheng). Plus Gemini routed through the native AI Studio API for better performance ([#12674](https://github.com/NousResearch/hermes-agent/pull/12674)).
-
- **GPT-5.5 over Codex OAuth** — OpenAI's new GPT-5.5 reasoning model is now available through your ChatGPT Codex OAuth, with live model discovery wired into the model picker so new OpenAI releases show up without catalog updates. ([#14720](https://github.com/NousResearch/hermes-agent/pull/14720))
-
- **QQBot — 17th supported platform** — Native QQBot adapter via QQ Official API v2, with QR scan-to-configure setup wizard, streaming cursor, emoji reactions, and DM/group policy gating that matches WeCom/Weixin parity. ([#9364](https://github.com/NousResearch/hermes-agent/pull/9364), [#11831](https://github.com/NousResearch/hermes-agent/pull/11831))
-
- **Plugin surface expanded** — Plugins can now register slash commands (`register_command`), dispatch tools directly (`dispatch_tool`), block tool execution from hooks (`pre_tool_call` can veto), rewrite tool results (`transform_tool_result`), transform terminal output (`transform_terminal_output`), ship image_gen backends, and add custom dashboard tabs. The bundled disk-cleanup plugin is opt-in by default as a reference implementation. ([#9377](https://github.com/NousResearch/hermes-agent/pull/9377), [#10626](https://github.com/NousResearch/hermes-agent/pull/10626), [#10763](https://github.com/NousResearch/hermes-agent/pull/10763), [#10951](https://github.com/NousResearch/hermes-agent/pull/10951), [#12929](https://github.com/NousResearch/hermes-agent/pull/12929), [#12944](https://github.com/NousResearch/hermes-agent/pull/12944), [#12972](https://github.com/NousResearch/hermes-agent/pull/12972), [#13799](https://github.com/NousResearch/hermes-agent/pull/13799), [#14175](https://github.com/NousResearch/hermes-agent/pull/14175))
-
- **`/steer` — mid-run agent nudges** — `/steer <prompt>` injects a note that the running agent sees after its next tool call, without interrupting the turn or breaking prompt cache. For when you want to course-correct an agent in-flight. ([#12116](https://github.com/NousResearch/hermes-agent/pull/12116))
-
- **Shell hooks** — Wire any shell script as a Hermes lifecycle hook (pre_tool_call, post_tool_call, on_session_start, etc.) without writing a Python plugin. ([#13296](https://github.com/NousResearch/hermes-agent/pull/13296))
-
- **Webhook direct-delivery mode** — Webhook subscriptions can now forward payloads straight to a platform chat without going through the agent — zero-LLM push notifications for alerting, uptime checks, and event streams. ([#12473](https://github.com/NousResearch/hermes-agent/pull/12473))
-
- **Smarter delegation** — Subagents now have an explicit `orchestrator` role that can spawn their own workers, with configurable `max_spawn_depth` (default flat). Concurrent sibling subagents share filesystem state through a file-coordination layer so they don't clobber each other's edits. ([#13691](https://github.com/NousResearch/hermes-agent/pull/13691), [#13718](https://github.com/NousResearch/hermes-agent/pull/13718))
-
- **Auxiliary models — configurable UI + main-model-first** — `hermes model` has a dedicated "Configure auxiliary models" screen for per-task overrides (compression, vision, session_search, title_generation). `auto` routing now defaults to the main model for side tasks across all users (previously aggregator users were silently routed to a cheap provider-side default). ([#11891](https://github.com/NousResearch/hermes-agent/pull/11891), [#11900](https://github.com/NousResearch/hermes-agent/pull/11900))
-
- **Dashboard plugin system + live theme switching** — The web dashboard is now extensible. Third-party plugins can add custom tabs, widgets, and views without forking. Paired with a live-switching theme system — themes now control colors, fonts, layout, and density — so users can hot-swap the dashboard look without a reload. Same theming discipline the CLI has, now on the web. ([#10951](https://github.com/NousResearch/hermes-agent/pull/10951), [#10687](https://github.com/NousResearch/hermes-agent/pull/10687), [#14725](https://github.com/NousResearch/hermes-agent/pull/14725))
-
- **Dashboard polish** — i18n (English + Chinese), react-router sidebar layout, mobile-responsive, Vercel deployment, real per-session API call tracking, and one-click update + gateway restart buttons. ([#9228](https://github.com/NousResearch/hermes-agent/pull/9228), [#9370](https://github.com/NousResearch/hermes-agent/pull/9370), [#9453](https://github.com/NousResearch/hermes-agent/pull/9453), [#10686](https://github.com/NousResearch/hermes-agent/pull/10686), [#13526](https://github.com/NousResearch/hermes-agent/pull/13526), [#14004](https://github.com/NousResearch/hermes-agent/pull/14004) — @austinpickett + @DeployFaith + Teknium)
-
---
-
-## 🏗️ Core Agent & Architecture
-
-### Transport Layer (NEW)
- **Transport ABC** abstracts format conversion and HTTP transport from `run_agent.py` into `agent/transports/` ([#13347](https://github.com/NousResearch/hermes-agent/pull/13347))
- **AnthropicTransport** — Anthropic Messages API path ([#13366](https://github.com/NousResearch/hermes-agent/pull/13366), @kshitijk4poor)
- **ChatCompletionsTransport** — default path for OpenAI-compatible providers ([#13805](https://github.com/NousResearch/hermes-agent/pull/13805))
- **ResponsesApiTransport** — OpenAI Responses API + Codex build_kwargs wiring ([#13430](https://github.com/NousResearch/hermes-agent/pull/13430), @kshitijk4poor)
- **BedrockTransport** — AWS Bedrock Converse API transport ([#13814](https://github.com/NousResearch/hermes-agent/pull/13814))
-
-### Provider & Model Support
- **Native AWS Bedrock provider** via Converse API ([#10549](https://github.com/NousResearch/hermes-agent/pull/10549))
- **NVIDIA NIM native provider** (salvage of #11703) ([#11774](https://github.com/NousResearch/hermes-agent/pull/11774))
- **Arcee AI direct provider** ([#9276](https://github.com/NousResearch/hermes-agent/pull/9276))
- **Step Plan provider** (salvage #6005) ([#13893](https://github.com/NousResearch/hermes-agent/pull/13893), @kshitijk4poor)
- **Google Gemini CLI OAuth** inference provider ([#11270](https://github.com/NousResearch/hermes-agent/pull/11270))
- **Vercel ai-gateway** with pricing, attribution, and dynamic discovery ([#13223](https://github.com/NousResearch/hermes-agent/pull/13223), @jerilynzheng)
- **GPT-5.5 over Codex OAuth** with live model discovery in the picker ([#14720](https://github.com/NousResearch/hermes-agent/pull/14720))
- **Gemini routed through native AI Studio API** ([#12674](https://github.com/NousResearch/hermes-agent/pull/12674))
- **xAI Grok upgraded to Responses API** ([#10783](https://github.com/NousResearch/hermes-agent/pull/10783))
- **Ollama improvements** — Cloud provider support, GLM continuation, `think=false` control, surrogate sanitization, `/v1` hint ([#10782](https://github.com/NousResearch/hermes-agent/pull/10782))
- **Kimi K2.6** across OpenRouter, Nous Portal, native Kimi, and HuggingFace ([#13148](https://github.com/NousResearch/hermes-agent/pull/13148), [#13152](https://github.com/NousResearch/hermes-agent/pull/13152), [#13169](https://github.com/NousResearch/hermes-agent/pull/13169))
- **Kimi K2.5** promoted to first position in all model suggestion lists ([#11745](https://github.com/NousResearch/hermes-agent/pull/11745), @kshitijk4poor)
- **Xiaomi MiMo v2.5-pro + v2.5** on OpenRouter, Nous Portal, and native ([#14184](https://github.com/NousResearch/hermes-agent/pull/14184), [#14635](https://github.com/NousResearch/hermes-agent/pull/14635), @kshitijk4poor)
- **GLM-5V-Turbo** for coding plan ([#9907](https://github.com/NousResearch/hermes-agent/pull/9907))
- **Claude Opus 4.7** in Nous Portal catalog ([#11398](https://github.com/NousResearch/hermes-agent/pull/11398))
- **OpenRouter elephant-alpha** in curated lists ([#9378](https://github.com/NousResearch/hermes-agent/pull/9378))
- **OpenCode-Go** — Kimi K2.6 and Qwen3.5/3.6 Plus in curated catalog ([#13429](https://github.com/NousResearch/hermes-agent/pull/13429))
- **minimax/minimax-m2.5:free** in OpenRouter catalog ([#13836](https://github.com/NousResearch/hermes-agent/pull/13836))
- **`/model` merges models.dev entries** for lesser-loved providers ([#14221](https://github.com/NousResearch/hermes-agent/pull/14221))
- **Per-provider + per-model `request_timeout_seconds`** config ([#12652](https://github.com/NousResearch/hermes-agent/pull/12652))
- **Configurable API retry count** via `agent.api_max_retries` ([#14730](https://github.com/NousResearch/hermes-agent/pull/14730))
- **ctx_size context length key** for Lemonade server (salvage #8536) ([#14215](https://github.com/NousResearch/hermes-agent/pull/14215))
- **Custom provider display name prompt** ([#9420](https://github.com/NousResearch/hermes-agent/pull/9420))
- **Recommendation badges** on tool provider selection ([#9929](https://github.com/NousResearch/hermes-agent/pull/9929))
- Fix: correct GPT-5 family context lengths in fallback defaults ([#9309](https://github.com/NousResearch/hermes-agent/pull/9309))
- Fix: clamp `minimal` reasoning effort to `low` on Responses API ([#9429](https://github.com/NousResearch/hermes-agent/pull/9429))
- Fix: strip reasoning item IDs from Responses API input when `store=False` ([#10217](https://github.com/NousResearch/hermes-agent/pull/10217))
- Fix: OpenViking correct account default + commit session on `/new` and compress ([#10463](https://github.com/NousResearch/hermes-agent/pull/10463))
- Fix: Kimi `/coding` thinking block survival + empty reasoning_content + block ordering (multiple PRs)
- Fix: don't send Anthropic thinking to api.kimi.com/coding ([#13826](https://github.com/NousResearch/hermes-agent/pull/13826))
- Fix: send `max_tokens`, `reasoning_effort`, and `thinking` for Kimi/Moonshot
- Fix: stream reasoning content through OpenAI-compatible providers that emit it
-
-### Agent Loop & Conversation
- **`/steer <prompt>`** — mid-run agent nudges after next tool call ([#12116](https://github.com/NousResearch/hermes-agent/pull/12116))
- **Orchestrator role + configurable spawn depth** for `delegate_task` (default flat) ([#13691](https://github.com/NousResearch/hermes-agent/pull/13691))
- **Cross-agent file state coordination** for concurrent subagents ([#13718](https://github.com/NousResearch/hermes-agent/pull/13718))
- **Compressor smart collapse, dedup, anti-thrashing**, template upgrade, hardening ([#10088](https://github.com/NousResearch/hermes-agent/pull/10088))
- **Compression summaries respect the conversation's language** ([#12556](https://github.com/NousResearch/hermes-agent/pull/12556))
- **Compression model falls back to main model** on permanent 503/404 ([#10093](https://github.com/NousResearch/hermes-agent/pull/10093))
- **Auto-continue interrupted agent work** after gateway restart ([#9934](https://github.com/NousResearch/hermes-agent/pull/9934))
- **Activity heartbeats** prevent false gateway inactivity timeouts ([#10501](https://github.com/NousResearch/hermes-agent/pull/10501))
- **Auxiliary models UI** — dedicated screen for per-task overrides ([#11891](https://github.com/NousResearch/hermes-agent/pull/11891))
- **Auxiliary auto routing defaults to main model** for all users ([#11900](https://github.com/NousResearch/hermes-agent/pull/11900))
- **PLATFORM_HINTS for Matrix, Mattermost, Feishu** ([#14428](https://github.com/NousResearch/hermes-agent/pull/14428), @alt-glitch)
- Fix: reset retry counters after compression; stop poisoning conversation history ([#10055](https://github.com/NousResearch/hermes-agent/pull/10055))
- Fix: break compression-exhaustion infinite loop and auto-reset session ([#10063](https://github.com/NousResearch/hermes-agent/pull/10063))
- Fix: stale agent timeout, uv venv detection, empty response after tools ([#10065](https://github.com/NousResearch/hermes-agent/pull/10065))
- Fix: prevent premature loop exit when weak models return empty after substantive tool calls ([#10472](https://github.com/NousResearch/hermes-agent/pull/10472))
- Fix: preserve pre-start terminal interrupts ([#10504](https://github.com/NousResearch/hermes-agent/pull/10504))
- Fix: improve interrupt responsiveness during concurrent tool execution ([#10935](https://github.com/NousResearch/hermes-agent/pull/10935))
- Fix: word-wrap spinner, interruptable agent join, and delegate_task interrupt ([#10940](https://github.com/NousResearch/hermes-agent/pull/10940))
- Fix: `/stop` no longer resets the session ([#9224](https://github.com/NousResearch/hermes-agent/pull/9224))
- Fix: honor interrupts during MCP tool waits ([#9382](https://github.com/NousResearch/hermes-agent/pull/9382), @helix4u)
- Fix: break stuck session resume loops after repeated restarts ([#9941](https://github.com/NousResearch/hermes-agent/pull/9941))
- Fix: empty response nudge crash + placeholder leak to cron targets ([#11021](https://github.com/NousResearch/hermes-agent/pull/11021))
- Fix: streaming cursor sanitization to prevent message truncation (multiple PRs)
- Fix: resolve `context_length` for plugin context engines ([#9238](https://github.com/NousResearch/hermes-agent/pull/9238))
-
-### Session & Memory
- **Auto-prune old sessions + VACUUM state.db** at startup ([#13861](https://github.com/NousResearch/hermes-agent/pull/13861))
- **Honcho overhaul** — context injection, 5-tool surface, cost safety, session isolation ([#10619](https://github.com/NousResearch/hermes-agent/pull/10619))
- **Hindsight richer session-scoped retain metadata** (salvage of #6290) ([#13987](https://github.com/NousResearch/hermes-agent/pull/13987))
- Fix: deduplicate memory provider tools to prevent 400 on strict providers ([#10511](https://github.com/NousResearch/hermes-agent/pull/10511))
- Fix: discover user-installed memory providers from `$HERMES_HOME/plugins/` ([#10529](https://github.com/NousResearch/hermes-agent/pull/10529))
- Fix: add `on_memory_write` bridge to sequential tool execution path ([#10507](https://github.com/NousResearch/hermes-agent/pull/10507))
- Fix: preserve `session_id` across `previous_response_id` chains in `/v1/responses` ([#10059](https://github.com/NousResearch/hermes-agent/pull/10059))
-
---
-
-## 🖥️ New Ink-based TUI
-
-A full React/Ink rewrite of the interactive CLI — invoked via `hermes --tui` or `HERMES_TUI=1`. Shipped across ~310 commits to `ui-tui/` and `tui_gateway/`.
-
-### TUI Foundations
- New TUI based on Ink + Python JSON-RPC backend
- Prettier + ESLint + vitest tooling for `ui-tui/`
- Entry split between `src/entry.tsx` (TTY gate) and `src/app.tsx` (state machine)
- Persistent `_SlashWorker` subprocess for slash command dispatch
-
-### UX & Features
- **Stable picker keys, /clear confirm, light-theme preset** ([#12312](https://github.com/NousResearch/hermes-agent/pull/12312), @OutThisLife)
- **Git branch in status bar** cwd label ([#12305](https://github.com/NousResearch/hermes-agent/pull/12305), @OutThisLife)
- **Per-turn elapsed stopwatch in FaceTicker + done-in sys line** ([#13105](https://github.com/NousResearch/hermes-agent/pull/13105), @OutThisLife)
- **Subagent spawn observability overlay** ([#14045](https://github.com/NousResearch/hermes-agent/pull/14045), @OutThisLife)
- **Per-prompt elapsed stopwatch in status bar** ([#12948](https://github.com/NousResearch/hermes-agent/pull/12948))
- Sticky composer that freezes during scroll
- OSC-52 clipboard support for copy across SSH sessions
- Virtualized history rendering for performance
- Slash command autocomplete via `complete.slash` RPC
- Path autocomplete via `complete.path` RPC
- Dozens of resize/ghosting/sticky-prompt fixes landed through the week
-
-### Structural Refactors
- Decomposed `app.tsx` into `app/event-handler`, `app/slash-handler`, `app/stores`, `app/hooks` ([#14640](https://github.com/NousResearch/hermes-agent/pull/14640) and surrounding)
- Component split: `branding.tsx`, `markdown.tsx`, `prompts.tsx`, `sessionPicker.tsx`, `messageLine.tsx`, `thinking.tsx`, `maskedPrompt.tsx`
- Hook split: `useCompletion`, `useInputHistory`, `useQueue`, `useVirtualHistory`
-
---
-
-## 📱 Messaging Platforms (Gateway)
-
-### New Platforms
- **QQBot (17th platform)** — QQ Official API v2 adapter with QR setup, streaming, package split ([#9364](https://github.com/NousResearch/hermes-agent/pull/9364), [#11831](https://github.com/NousResearch/hermes-agent/pull/11831))
-
-### Telegram
- **Dedicated `TELEGRAM_PROXY` env var + config.yaml proxy support** (closes #9414, #6530, #9074, #7786) ([#10681](https://github.com/NousResearch/hermes-agent/pull/10681))
- **`ignored_threads` config** for Telegram groups ([#9530](https://github.com/NousResearch/hermes-agent/pull/9530))
- **Config option to disable link previews** (closes #8728) ([#10610](https://github.com/NousResearch/hermes-agent/pull/10610))
- **Auto-wrap markdown tables** in code blocks ([#11794](https://github.com/NousResearch/hermes-agent/pull/11794))
- Fix: prevent duplicate replies when stream task is cancelled ([#9319](https://github.com/NousResearch/hermes-agent/pull/9319))
- Fix: prevent streaming cursor (▉) from appearing as standalone messages ([#9538](https://github.com/NousResearch/hermes-agent/pull/9538))
- Fix: retry transient tool sends + cold-boot budget ([#10947](https://github.com/NousResearch/hermes-agent/pull/10947))
- Fix: Markdown special char escaping in `send_exec_approval`
- Fix: parentheses in URLs during MarkdownV2 link conversion
- Fix: Unicode dash normalization in model switch (closes iOS smart-punctuation issue)
- Many platform hint / streaming / session-key fixes
-
-### Discord
- **Forum channel support** (salvage of #10145 + media + polish) ([#11920](https://github.com/NousResearch/hermes-agent/pull/11920))
- **`DISCORD_ALLOWED_ROLES`** for role-based access control ([#11608](https://github.com/NousResearch/hermes-agent/pull/11608))
- **Config option to disable slash commands** (salvage #13130) ([#14315](https://github.com/NousResearch/hermes-agent/pull/14315))
- **Native `send_animation`** for inline GIF playback ([#10283](https://github.com/NousResearch/hermes-agent/pull/10283))
- **`send_message` Discord media attachments** ([#10246](https://github.com/NousResearch/hermes-agent/pull/10246))
- **`/skill` command group** with category subcommands ([#9909](https://github.com/NousResearch/hermes-agent/pull/9909))
- **Extract reply text from message references** ([#9781](https://github.com/NousResearch/hermes-agent/pull/9781))
-
-### Feishu
- **Intelligent reply on document comments** with 3-tier access control ([#11898](https://github.com/NousResearch/hermes-agent/pull/11898))
- **Show processing state via reactions** on user messages ([#12927](https://github.com/NousResearch/hermes-agent/pull/12927))
- **Preserve @mention context for agent consumption** (salvage #13874) ([#14167](https://github.com/NousResearch/hermes-agent/pull/14167))
-
-### DingTalk
- **`require_mention` + `allowed_users` gating** (parity with Slack/Telegram/Discord) ([#11564](https://github.com/NousResearch/hermes-agent/pull/11564))
- **QR-code device-flow authorization** for setup wizard ([#11574](https://github.com/NousResearch/hermes-agent/pull/11574))
- **AI Cards streaming, emoji reactions, and media handling** (salvage of #10985) ([#11910](https://github.com/NousResearch/hermes-agent/pull/11910))
-
-### WhatsApp
- **`send_voice`** — native audio message delivery ([#13002](https://github.com/NousResearch/hermes-agent/pull/13002))
- **`dm_policy` and `group_policy`** parity with WeCom/Weixin/QQ adapters ([#13151](https://github.com/NousResearch/hermes-agent/pull/13151))
-
-### WeCom / Weixin
- **WeCom QR-scan bot creation + interactive setup wizard** (salvage #13923) ([#13961](https://github.com/NousResearch/hermes-agent/pull/13961))
-
-### Signal
- **Media delivery support** via `send_message` ([#13178](https://github.com/NousResearch/hermes-agent/pull/13178))
-
-### Slack
- **Per-thread sessions for DMs by default** ([#10987](https://github.com/NousResearch/hermes-agent/pull/10987))
-
-### BlueBubbles (iMessage)
- Group chat session separation, webhook registration & auth fixes ([#9806](https://github.com/NousResearch/hermes-agent/pull/9806))
-
-### Gateway Core
- **Gateway proxy mode** — forward messages to a remote API server ([#9787](https://github.com/NousResearch/hermes-agent/pull/9787))
- **Per-channel ephemeral prompts** (Discord, Telegram, Slack, Mattermost) ([#10564](https://github.com/NousResearch/hermes-agent/pull/10564))
- **Surface plugin slash commands** natively on all platforms + decision-capable command hook ([#14175](https://github.com/NousResearch/hermes-agent/pull/14175))
- **Support document/archive extensions in MEDIA: tag extraction** (salvage #8255) ([#14307](https://github.com/NousResearch/hermes-agent/pull/14307))
- **Recognize `.pdf` in MEDIA: tag extraction** ([#13683](https://github.com/NousResearch/hermes-agent/pull/13683))
- **`--all` flag for `gateway start` and `restart`** ([#10043](https://github.com/NousResearch/hermes-agent/pull/10043))
- **Notify active sessions on gateway shutdown** + update health check ([#9850](https://github.com/NousResearch/hermes-agent/pull/9850))
- **Block agent from self-destructing the gateway** via terminal (closes #6666) ([#9895](https://github.com/NousResearch/hermes-agent/pull/9895))
- Fix: suppress duplicate replies on interrupt and streaming flood control ([#10235](https://github.com/NousResearch/hermes-agent/pull/10235))
- Fix: close temporary agents after one-off tasks ([#11028](https://github.com/NousResearch/hermes-agent/pull/11028), @kshitijk4poor)
- Fix: busy-session ack when user messages during active agent run ([#10068](https://github.com/NousResearch/hermes-agent/pull/10068))
- Fix: route watch-pattern notifications to the originating session ([#10460](https://github.com/NousResearch/hermes-agent/pull/10460))
- Fix: preserve notify context in executor threads ([#10921](https://github.com/NousResearch/hermes-agent/pull/10921), @kshitijk4poor)
- Fix: avoid duplicate replies after interrupted long tasks ([#11018](https://github.com/NousResearch/hermes-agent/pull/11018))
- Fix: unlink stale PID + lock files on cleanup
- Fix: force-unlink stale PID file after `--replace` takeover
-
---
-
-## 🔧 Tool System
-
-### Plugin Surface (major expansion)
- **`register_command()`** — plugins can now add slash commands ([#10626](https://github.com/NousResearch/hermes-agent/pull/10626))
- **`dispatch_tool()`** — plugins can invoke tools from their code ([#10763](https://github.com/NousResearch/hermes-agent/pull/10763))
- **`pre_tool_call` blocking** — plugins can veto tool execution ([#9377](https://github.com/NousResearch/hermes-agent/pull/9377))
- **`transform_tool_result`** — plugins rewrite tool results generically ([#12972](https://github.com/NousResearch/hermes-agent/pull/12972))
- **`transform_terminal_output`** — plugins rewrite terminal tool output ([#12929](https://github.com/NousResearch/hermes-agent/pull/12929))
- **Namespaced skill registration** for plugin skill bundles ([#9786](https://github.com/NousResearch/hermes-agent/pull/9786))
- **Opt-in-by-default + bundled disk-cleanup plugin** (salvage #12212) ([#12944](https://github.com/NousResearch/hermes-agent/pull/12944))
- **Pluggable `image_gen` backends + OpenAI provider** ([#13799](https://github.com/NousResearch/hermes-agent/pull/13799))
- **`openai-codex` image_gen plugin** (gpt-image-2 via Codex OAuth) ([#14317](https://github.com/NousResearch/hermes-agent/pull/14317))
- **Shell hooks** — wire shell scripts as hook callbacks ([#13296](https://github.com/NousResearch/hermes-agent/pull/13296))
-
-### Browser
- **`browser_cdp` raw DevTools Protocol passthrough** ([#12369](https://github.com/NousResearch/hermes-agent/pull/12369))
- Camofox hardening + connection stability across the window
-
-### Execute Code
- **Project/strict execution modes** (default: project) ([#11971](https://github.com/NousResearch/hermes-agent/pull/11971))
-
-### Image Generation
- **Multi-model FAL support** with picker in `hermes tools` ([#11265](https://github.com/NousResearch/hermes-agent/pull/11265))
- **Recraft V3 → V4 Pro, Nano Banana → Pro upgrades** ([#11406](https://github.com/NousResearch/hermes-agent/pull/11406))
- **GPT Image 2** in FAL catalog ([#13677](https://github.com/NousResearch/hermes-agent/pull/13677))
- **xAI image generation provider** (grok-imagine-image) ([#14765](https://github.com/NousResearch/hermes-agent/pull/14765))
-
-### TTS / STT / Voice
- **Google Gemini TTS provider** ([#11229](https://github.com/NousResearch/hermes-agent/pull/11229))
- **xAI Grok STT provider** ([#14473](https://github.com/NousResearch/hermes-agent/pull/14473))
- **xAI TTS** (shipped with Responses API upgrade) ([#10783](https://github.com/NousResearch/hermes-agent/pull/10783))
- **KittenTTS local provider** (salvage of #2109) ([#13395](https://github.com/NousResearch/hermes-agent/pull/13395))
- **CLI record beep toggle** ([#13247](https://github.com/NousResearch/hermes-agent/pull/13247), @helix4u)
-
-### Webhook / Cron
- **Webhook direct-delivery mode** — zero-LLM push notifications ([#12473](https://github.com/NousResearch/hermes-agent/pull/12473))
- **Cron `wakeAgent` gate** — scripts can skip the agent entirely ([#12373](https://github.com/NousResearch/hermes-agent/pull/12373))
- **Cron per-job `enabled_toolsets`** — cap token overhead + cost per job ([#14767](https://github.com/NousResearch/hermes-agent/pull/14767))
-
-### Delegate
- **Orchestrator role** + configurable spawn depth (default flat) ([#13691](https://github.com/NousResearch/hermes-agent/pull/13691))
- **Cross-agent file state coordination** ([#13718](https://github.com/NousResearch/hermes-agent/pull/13718))
-
-### File / Patch
- **`patch` — "did you mean?" feedback** when patch fails to match ([#13435](https://github.com/NousResearch/hermes-agent/pull/13435))
-
-### API Server
- **Stream `/v1/responses` SSE tool events** (salvage #9779) ([#10049](https://github.com/NousResearch/hermes-agent/pull/10049))
- **Inline image inputs** on `/v1/chat/completions` and `/v1/responses` ([#12969](https://github.com/NousResearch/hermes-agent/pull/12969))
-
-### Docker / Podman
- **Entry-level Podman support** — `find_docker()` + rootless entrypoint ([#10066](https://github.com/NousResearch/hermes-agent/pull/10066))
- **Add docker-cli to Docker image** (salvage #10096) ([#14232](https://github.com/NousResearch/hermes-agent/pull/14232))
- **File-sync back to host on teardown** (salvage of #8189 + hardening) ([#11291](https://github.com/NousResearch/hermes-agent/pull/11291))
-
-### MCP
- 12 MCP improvements across the window (status, timeout handling, tool-call forwarding, etc.)
-
---
-
-## 🧩 Skills Ecosystem
-
-### Skill System
- **Namespaced skill registration** for plugin bundles ([#9786](https://github.com/NousResearch/hermes-agent/pull/9786))
- **`hermes skills reset`** to un-stick bundled skills ([#11468](https://github.com/NousResearch/hermes-agent/pull/11468))
- **Skills guard opt-in** — `config.skills.guard_agent_created` (default off) ([#14557](https://github.com/NousResearch/hermes-agent/pull/14557))
- **Bundled skill scripts runnable out of the box** ([#13384](https://github.com/NousResearch/hermes-agent/pull/13384))
- **`xitter` replaced with `xurl`** — the official X API CLI ([#12303](https://github.com/NousResearch/hermes-agent/pull/12303))
- **MiniMax-AI/cli as default skill tap** (salvage #7501) ([#14493](https://github.com/NousResearch/hermes-agent/pull/14493))
- **Fuzzy `@` file completions + mtime sorting** ([#9467](https://github.com/NousResearch/hermes-agent/pull/9467))
-
-### New Skills
- **concept-diagrams** (salvage of #11045, @v1k22) ([#11363](https://github.com/NousResearch/hermes-agent/pull/11363))
- **architecture-diagram** (Cocoon AI port) ([#9906](https://github.com/NousResearch/hermes-agent/pull/9906))
- **pixel-art** with hardware palettes and video animation ([#12663](https://github.com/NousResearch/hermes-agent/pull/12663), [#12725](https://github.com/NousResearch/hermes-agent/pull/12725))
- **baoyu-comic** ([#13257](https://github.com/NousResearch/hermes-agent/pull/13257), @JimLiu)
- **baoyu-infographic** — 21 layouts × 21 styles (salvage #9901) ([#12254](https://github.com/NousResearch/hermes-agent/pull/12254))
- **page-agent** — embed Alibaba's in-page GUI agent in your webapp ([#13976](https://github.com/NousResearch/hermes-agent/pull/13976))
- **fitness-nutrition** optional skill + optional env var support ([#9355](https://github.com/NousResearch/hermes-agent/pull/9355))
- **drug-discovery** — ChEMBL, PubChem, OpenFDA, ADMET ([#9443](https://github.com/NousResearch/hermes-agent/pull/9443))
- **touchdesigner-mcp** (salvage of #10081) ([#12298](https://github.com/NousResearch/hermes-agent/pull/12298))
- **adversarial-ux-test** optional skill (salvage of #2494, @omnissiah-comelse) ([#13425](https://github.com/NousResearch/hermes-agent/pull/13425))
- **maps** — added `guest_house`, `camp_site`, and dual-key bakery lookup ([#13398](https://github.com/NousResearch/hermes-agent/pull/13398))
- **llm-wiki** — port provenance markers, source hashing, and quality signals ([#13700](https://github.com/NousResearch/hermes-agent/pull/13700))
-
---
-
-## 📊 Web Dashboard
-
- **i18n (English + Chinese) language switcher** ([#9453](https://github.com/NousResearch/hermes-agent/pull/9453))
- **Live-switching theme system** ([#10687](https://github.com/NousResearch/hermes-agent/pull/10687))
- **Dashboard plugin system** — extend the web UI with custom tabs ([#10951](https://github.com/NousResearch/hermes-agent/pull/10951))
- **react-router, sidebar layout, sticky header, dropdown component** ([#9370](https://github.com/NousResearch/hermes-agent/pull/9370), @austinpickett)
- **Responsive for mobile** ([#9228](https://github.com/NousResearch/hermes-agent/pull/9228), @DeployFaith)
- **Vercel deployment** ([#10686](https://github.com/NousResearch/hermes-agent/pull/10686), [#11061](https://github.com/NousResearch/hermes-agent/pull/11061), @austinpickett)
- **Context window config support** ([#9357](https://github.com/NousResearch/hermes-agent/pull/9357))
- **HTTP health probe for cross-container gateway detection** ([#9894](https://github.com/NousResearch/hermes-agent/pull/9894))
- **Update + restart gateway buttons** ([#13526](https://github.com/NousResearch/hermes-agent/pull/13526), @austinpickett)
- **Real API call count per session** (salvages #10140) ([#14004](https://github.com/NousResearch/hermes-agent/pull/14004))
-
---
-
-## 🖱️ CLI & User Experience
-
- **Dynamic shell completion for bash, zsh, and fish** ([#9785](https://github.com/NousResearch/hermes-agent/pull/9785))
- **Light-mode skins + skin-aware completion menus** ([#9461](https://github.com/NousResearch/hermes-agent/pull/9461))
- **Numbered keyboard shortcuts** on approval and clarify prompts ([#13416](https://github.com/NousResearch/hermes-agent/pull/13416))
- **Markdown stripping, compact multiline previews, external editor** ([#12934](https://github.com/NousResearch/hermes-agent/pull/12934))
- **`--ignore-user-config` and `--ignore-rules` flags** (port codex#18646) ([#14277](https://github.com/NousResearch/hermes-agent/pull/14277))
- **Account limits section in `/usage`** ([#13428](https://github.com/NousResearch/hermes-agent/pull/13428))
- **Doctor: Command Installation check** for `hermes` bin symlink ([#10112](https://github.com/NousResearch/hermes-agent/pull/10112))
- **ESC cancels secret/sudo prompts**, clearer skip messaging ([#9902](https://github.com/NousResearch/hermes-agent/pull/9902))
- Fix: agent-facing text uses `display_hermes_home()` instead of hardcoded `~/.hermes` ([#10285](https://github.com/NousResearch/hermes-agent/pull/10285))
- Fix: enforce `config.yaml` as sole CWD source + deprecate `.env` CWD vars + add `hermes memory reset` ([#11029](https://github.com/NousResearch/hermes-agent/pull/11029))
-
---
-
-## 🔒 Security & Reliability
-
- **Global toggle to allow private/internal URL resolution** ([#14166](https://github.com/NousResearch/hermes-agent/pull/14166))
- **Block agent from self-destructing the gateway** via terminal (closes #6666) ([#9895](https://github.com/NousResearch/hermes-agent/pull/9895))
- **Telegram callback authorization** on update prompts ([#10536](https://github.com/NousResearch/hermes-agent/pull/10536))
- **SECURITY.md** added ([#10532](https://github.com/NousResearch/hermes-agent/pull/10532), @I3eg1nner)
- **Warn about legacy hermes.service units** during `hermes update` ([#11918](https://github.com/NousResearch/hermes-agent/pull/11918))
- **Complete ASCII-locale UnicodeEncodeError recovery** for `api_messages`/`reasoning_content` (closes #6843) ([#10537](https://github.com/NousResearch/hermes-agent/pull/10537))
- **Prevent stale `os.environ` leak** after `clear_session_vars` ([#10527](https://github.com/NousResearch/hermes-agent/pull/10527))
- **Prevent agent hang when backgrounding processes** via terminal tool ([#10584](https://github.com/NousResearch/hermes-agent/pull/10584))
- Many smaller session-resume, interrupt, streaming, and memory-race fixes throughout the window
-
---
-
-## 🐛 Notable Bug Fixes
-
-The `fix:` category in this window covers 482 PRs. Highlights:
-
- Streaming cursor artifacts filtered from Matrix, Telegram, WhatsApp, Discord (multiple PRs)
- `<think>` and `<thought>` blocks filtered from gateway stream consumers ([#9408](https://github.com/NousResearch/hermes-agent/pull/9408))
- Gateway display.streaming root-config override regression ([#9799](https://github.com/NousResearch/hermes-agent/pull/9799))
- Context `session_search` coerces limit to int (prevents TypeError) ([#10522](https://github.com/NousResearch/hermes-agent/pull/10522))
- Memory tool stays available when `fcntl` is unavailable (Windows) ([#9783](https://github.com/NousResearch/hermes-agent/pull/9783))
- Trajectory compressor credentials load from `HERMES_HOME/.env` ([#9632](https://github.com/NousResearch/hermes-agent/pull/9632), @Dusk1e)
- `@_context_completions` no longer crashes on `@` mention ([#9683](https://github.com/NousResearch/hermes-agent/pull/9683), @kshitijk4poor)
- Group session `user_id` no longer treated as `thread_id` in shutdown notifications ([#10546](https://github.com/NousResearch/hermes-agent/pull/10546))
- Telegram `platform_hint` — markdown is supported (closes #8261) ([#10612](https://github.com/NousResearch/hermes-agent/pull/10612))
- Doctor checks for Kimi China credentials fixed
- Streaming: don't suppress final response when commentary message is sent ([#10540](https://github.com/NousResearch/hermes-agent/pull/10540))
- Rapid Telegram follow-ups no longer get cut off
-
---
-
-## 🧪 Testing & CI
-
- **Contributor attribution CI check** on PRs ([#9376](https://github.com/NousResearch/hermes-agent/pull/9376))
- Hermetic test parity (`scripts/run_tests.sh`) held across this window
- Test count stabilized post-Transport refactor; CI matrix held green through the transport rollout
-
---
-
-## 📚 Documentation
-
- Atropos + wandb links in user guide
- ACP / VS Code / Zed / JetBrains integration docs refresh
- Webhook subscription docs updated for direct-delivery mode
- Plugin author guide expanded for new hooks (`register_command`, `dispatch_tool`, `transform_tool_result`)
- Transport layer developer guide added
- Website removed Discussions link from README
-
---
-
-## 👥 Contributors
-
-### Core
- **@teknium1** (Teknium)
-
-### Top Community Contributors (by merged PR count)
- **@kshitijk4poor** — 49 PRs · Transport refactor (AnthropicTransport, ResponsesApiTransport), Step Plan provider, Xiaomi MiMo v2.5 support, numerous gateway fixes, promoted Kimi K2.5, @ mention crash fix
- **@OutThisLife** (Brooklyn) — 31 PRs · TUI polish, git branch in status bar, per-turn stopwatch, stable picker keys, `/clear` confirm, light-theme preset, subagent spawn observability overlay
- **@helix4u** — 11 PRs · Voice CLI record beep, MCP tool interrupt handling, assorted stability fixes
- **@austinpickett** — 8 PRs · Dashboard react-router + sidebar + sticky header + dropdown, Vercel deployment, update + restart buttons
- **@alt-glitch** — 8 PRs · PLATFORM_HINTS for Matrix/Mattermost/Feishu, Matrix fixes
- **@ethernet8023** — 3 PRs
- **@benbarclay** — 3 PRs
- **@Aslaaen** — 2 PRs
-
-### Also contributing
-@jerilynzheng (ai-gateway pricing), @JimLiu (baoyu-comic skill), @Dusk1e (trajectory compressor credentials), @DeployFaith (mobile-responsive dashboard), @LeonSGP43, @v1k22 (concept-diagrams), @omnissiah-comelse (adversarial-ux-test), @coekfung (Telegram MarkdownV2 expandable blockquotes), @liftaris (TUI provider resolution), @arihantsethia (skill analytics dashboard), @topcheer + @xing8star (QQBot foundation), @kovyrin, @I3eg1nner (SECURITY.md), @PeterBerthelsen, @lengxii, @priveperfumes, @sjz-ks, @cuyua9, @Disaster-Terminator, @leozeli, @LehaoLin, @trevthefoolish, @loongfay, @MrNiceRicee, @WideLee, @bluefishs, @malaiwah, @bobashopcashier, @dsocolobsky, @iamagenius00, @IAvecilla, @aniruddhaadak80, @Es1la, @asheriif, @walli, @jquesnelle (original Tool Gateway work).
-
-### All Contributors (alphabetical)
-
-@0xyg3n, @10ishq, @A-afflatus, @Abnertheforeman, @admin28980, @adybag14-cyber, @akhater, @alexzhu0,
-@AllardQuek, @alt-glitch, @aniruddhaadak80, @anna-oake, @anniesurla, @anthhub, @areu01or00, @arihantsethia,
-@arthurbr11, @asheriif, @Aslaaen, @Asunfly, @austinpickett, @AviArora02-commits, @AxDSan, @azhengbot, @Bartok9,
-@benbarclay, @bennytimz, @bernylinville, @bingo906, @binhnt92, @bkadish, @bluefishs, @bobashopcashier,
-@brantzh6, @BrennerSpear, @brianclemens, @briandevans, @brooklynnicholson, @bugkill3r, @buray, @burtenshaw,
-@cdanis, @cgarwood82, @ChimingLiu, @chongweiliu, @christopherwoodall, @coekfung, @cola-runner, @corazzione,
-@counterposition, @cresslank, @cuyua9, @cypres0099, @danieldoderlein, @davetist, @davidvv, @DeployFaith,
-@Dev-Mriganka, @devorun, @dieutx, @Disaster-Terminator, @dodo-reach, @draix, @DrStrangerUJN, @dsocolobsky,
-@Dusk1e, @dyxushuai, @elkimek, @elmatadorgh, @emozilla, @entropidelic, @Erosika, @erosika, @Es1la, @etcircle,
-@etherman-os, @ethernet8023, @fancydirty, @farion1231, @fatinghenji, @Fatty911, @fengtianyu88, @Feranmi10,
-@flobo3, @francip, @fuleinist, @g-guthrie, @GenKoKo, @gianfrancopiana, @gnanam1990, @GuyCui, @haileymarshall,
-@haimu0x, @handsdiff, @hansnow, @hedgeho9X, @helix4u, @hengm3467, @HenkDz, @heykb, @hharry11, @HiddenPuppy,
-@honghua, @houko, @houziershi, @hsy5571616, @huangke19, @hxp-plus, @Hypn0sis, @I3eg1nner, @iacker,
-@iamagenius00, @IAvecilla, @iborazzi, @Ifkellx, @ifrederico, @imink, @isaachuangGMICLOUD, @ismell0992-afk,
-@j0sephz, @Jaaneek, @jackjin1997, @JackTheGit, @jaffarkeikei, @jerilynzheng, @JiaDe-Wu, @Jiawen-lee, @JimLiu,
-@jinzheng8115, @jneeee, @jplew, @jquesnelle, @Julientalbot, @Junass1, @jvcl, @kagura-agent, @keifergu,
-@kevinskysunny, @keyuyuan, @konsisumer, @kovyrin, @kshitijk4poor, @leeyang1990, @LehaoLin, @lengxii,
-@LeonSGP43, @leozeli, @li0near, @liftaris, @Lind3ey, @Linux2010, @liujinkun2025, @LLQWQ, @Llugaes, @lmoncany,
-@longsizhuo, @lrawnsley, @Lubrsy706, @lumenradley, @luyao618, @lvnilesh, @LVT382009, @m0n5t3r, @Magaav,
-@MagicRay1217, @malaiwah, @manuelschipper, @Marvae, @MassiveMassimo, @mavrickdeveloper, @maxchernin, @memosr,
-@meng93, @mengjian-github, @MestreY0d4-Uninter, @Mibayy, @MikeFac, @mikewaters, @milkoor, @minorgod,
-@MrNiceRicee, @ms-alan, @mvanhorn, @n-WN, @N0nb0at, @Nan93, @NIDNASSER-Abdelmajid, @nish3451, @niyoh120,
-@nocoo, @nosleepcassette, @NousResearch, @ogzerber, @omnissiah-comelse, @Only-Code-A, @opriz, @OwenYWT, @pedh,
-@pefontana, @PeterBerthelsen, @phpoh, @pinion05, @plgonzalezrx8, @pradeep7127, @priveperfumes,
-@projectadmin-dev, @PStarH, @rnijhara, @Roy-oss1, @roytian1217, @RucchiZ, @Ruzzgar, @RyanLee-Dev, @Salt-555,
-@Sanjays2402, @sgaofen, @sharziki, @shenuu, @shin4, @SHL0MS, @shushuzn, @sicnuyudidi, @simon-gtcl,
-@simon-marcus, @sirEven, @Sisyphus, @sjz-ks, @snreynolds, @Societus, @Somme4096, @sontianye, @sprmn24,
-@StefanIsMe, @stephenschoettler, @Swift42, @taeng0204, @taeuk178, @tannerfokkens-maker, @TaroballzChen,
-@ten-ltw, @teyrebaz33, @Tianworld, @topcheer, @Tranquil-Flow, @trevthefoolish, @TroyMitchell911, @UNLINEARITY,
-@v1k22, @vivganes, @vominh1919, @vrinek, @VTRiot, @WadydX, @walli, @wenhao7, @WhiteWorld, @WideLee, @wujhsu,
-@WuTianyi123, @Wysie, @xandersbell, @xiaoqiang243, @xiayh0107, @xinpengdr, @Xowiek, @ycbai, @yeyitech, @ygd58,
-@youngDoo, @yudaiyan, @Yukipukii1, @yule975, @yyq4193, @yzx9, @ZaynJarvis, @zhang9w0v5, @zhanggttry,
-@zhangxicen, @zhongyueming1121, @zhouxiaoya12, @zons-zhaozhy
-
-Also: @maelrx, @Marco Rutsch, @MaxsolcuCrypto, @Mind-Dragon, @Paul Bergeron, @say8hi, @whitehatjr1001.
-
-
---
-
-**Full Changelog**: [v2026.4.13...v2026.4.23](https://github.com/NousResearch/hermes-agent/compare/v2026.4.13...v2026.4.23)
@@ -63,9 +63,6 @@ def make_approval_callback(
            logger.warning("Permission request timed out or failed: %s", exc)
            return "deny"

-        if response is None:
-            return "deny"
-
        outcome = response.outcome
        if isinstance(outcome, AllowedOutcome):
            option_id = outcome.option_id
@@ -4,7 +4,6 @@ from __future__ import annotations

 import asyncio
 import logging
-import os
 from collections import defaultdict, deque
 from concurrent.futures import ThreadPoolExecutor
 from typing import Any, Deque, Optional
@@ -52,7 +51,7 @@ try:
 except ImportError:
    from acp.schema import AuthMethod as AuthMethodAgent  # type: ignore[attr-defined]

-from acp_adapter.auth import detect_provider
+from acp_adapter.auth import detect_provider, has_provider
 from acp_adapter.events import (
    make_message_cb,
    make_step_cb,
@@ -60,7 +59,7 @@ from acp_adapter.events import (
    make_tool_progress_cb,
 )
 from acp_adapter.permissions import make_approval_callback
-from acp_adapter.session import SessionManager, SessionState, _expand_acp_enabled_toolsets
+from acp_adapter.session import SessionManager, SessionState

 logger = logging.getLogger(__name__)

@@ -72,11 +71,6 @@ except Exception:
 # Thread pool for running AIAgent (synchronous) in parallel.
 _executor = ThreadPoolExecutor(max_workers=4, thread_name_prefix="acp-agent")

-# Server-side page size for list_sessions. The ACP ListSessionsRequest schema
-# does not expose a client-side limit, so this is a fixed cap that clients
-# paginate against using `cursor` / `next_cursor`.
-_LIST_SESSIONS_PAGE_SIZE = 50
-

 def _extract_text(
    prompt: list[
@@ -287,11 +281,7 @@ class HermesACPAgent(acp.Agent):
        try:
            from model_tools import get_tool_definitions

-            enabled_toolsets = _expand_acp_enabled_toolsets(
-                getattr(state.agent, "enabled_toolsets", None) or ["hermes-acp"],
-                mcp_server_names=[server.name for server in mcp_servers],
-            )
-            state.agent.enabled_toolsets = enabled_toolsets
+            enabled_toolsets = getattr(state.agent, "enabled_toolsets", None) or ["hermes-acp"]
            disabled_toolsets = getattr(state.agent, "disabled_toolsets", None)
            state.agent.tools = get_tool_definitions(
                enabled_toolsets=enabled_toolsets,
@@ -361,18 +351,9 @@ class HermesACPAgent(acp.Agent):
        )

    async def authenticate(self, method_id: str, **kwargs: Any) -> AuthenticateResponse | None:
-        # Only accept authenticate() calls whose method_id matches the
-        # provider we advertised in initialize(). Without this check,
-        # authenticate() would acknowledge any method_id as long as the
-        # server has provider credentials configured — harmless under
-        # Hermes' threat model (ACP is stdio-only, local-trust), but poor
-        # API hygiene and confusing if ACP ever grows multi-method auth.
-        provider = detect_provider()
-        if not provider:
-            return None
-        if not isinstance(method_id, str) or method_id.strip().lower() != provider:
-            return None
-        return AuthenticateResponse()
+        if has_provider():
+            return AuthenticateResponse()
+        return None

    # ---- Session management -------------------------------------------------

@@ -456,28 +437,7 @@ class HermesACPAgent(acp.Agent):
        cwd: str | None = None,
        **kwargs: Any,
    ) -> ListSessionsResponse:
-        """List ACP sessions with optional ``cwd`` filtering and cursor pagination.
-
-        ``cwd`` is passed through to ``SessionManager.list_sessions`` which already
-        normalizes and filters by working directory. ``cursor`` is a ``session_id``
-        previously returned as ``next_cursor``; results resume after that entry.
-        Server-side page size is capped at ``_LIST_SESSIONS_PAGE_SIZE``; when more
-        results remain, ``next_cursor`` is set to the last returned ``session_id``.
-        """
        infos = self.session_manager.list_sessions(cwd=cwd)
-
-        if cursor:
-            for idx, s in enumerate(infos):
-                if s["session_id"] == cursor:
-                    infos = infos[idx + 1:]
-                    break
-            else:
-                # Unknown cursor -> empty page (do not fall back to full list).
-                infos = []
-
-        has_more = len(infos) > _LIST_SESSIONS_PAGE_SIZE
-        infos = infos[:_LIST_SESSIONS_PAGE_SIZE]
-
        sessions = []
        for s in infos:
            updated_at = s.get("updated_at")
@@ -491,9 +451,7 @@ class HermesACPAgent(acp.Agent):
                    updated_at=updated_at,
                )
            )
-
-        next_cursor = sessions[-1].session_id if has_more and sessions else None
-        return ListSessionsResponse(sessions=sessions, next_cursor=next_cursor)
+        return ListSessionsResponse(sessions=sessions)

    # ---- Prompt (core) ------------------------------------------------------

@@ -559,32 +517,15 @@ class HermesACPAgent(acp.Agent):
        agent.step_callback = step_cb
        agent.message_callback = message_cb

-        # Approval callback is per-thread (thread-local, GHSA-qg5c-hvr5-hjgr).
-        # Set it INSIDE _run_agent so the TLS write happens in the executor
-        # thread — setting it here would write to the event-loop thread's TLS,
-        # not the executor's. Also set HERMES_INTERACTIVE so approval.py
-        # takes the CLI-interactive path (which calls the registered
-        # callback via prompt_dangerous_approval) instead of the
-        # non-interactive auto-approve branch (GHSA-96vc-wcxf-jjff).
-        # ACP's conn.request_permission maps cleanly to the interactive
-        # callback shape — not the gateway-queue HERMES_EXEC_ASK path,
-        # which requires a notify_cb registered in _gateway_notify_cbs.
-        previous_approval_cb = None
-        previous_interactive = None
+        if approval_cb:
+            try:
+                from tools import terminal_tool as _terminal_tool
+                previous_approval_cb = getattr(_terminal_tool, "_approval_callback", None)
+                _terminal_tool.set_approval_callback(approval_cb)
+            except Exception:
+                logger.debug("Could not set ACP approval callback", exc_info=True)

        def _run_agent() -> dict:
-            nonlocal previous_approval_cb, previous_interactive
-            if approval_cb:
-                try:
-                    from tools import terminal_tool as _terminal_tool
-                    previous_approval_cb = _terminal_tool._get_approval_callback()
-                    _terminal_tool.set_approval_callback(approval_cb)
-                except Exception:
-                    logger.debug("Could not set ACP approval callback", exc_info=True)
-            # Signal to tools.approval that we have an interactive callback
-            # and the non-interactive auto-approve path must not fire.
-            previous_interactive = os.environ.get("HERMES_INTERACTIVE")
-            os.environ["HERMES_INTERACTIVE"] = "1"
            try:
                result = agent.run_conversation(
                    user_message=user_text,
@@ -596,11 +537,6 @@ class HermesACPAgent(acp.Agent):
                logger.exception("Agent error in session %s", session_id)
                return {"final_response": f"Error: {e}", "messages": state.history}
            finally:
-                # Restore HERMES_INTERACTIVE.
-                if previous_interactive is None:
-                    os.environ.pop("HERMES_INTERACTIVE", None)
-                else:
-                    os.environ["HERMES_INTERACTIVE"] = previous_interactive
                if approval_cb:
                    try:
                        from tools import terminal_tool as _terminal_tool
@@ -677,8 +613,8 @@ class HermesACPAgent(acp.Agent):
            await self._conn.session_update(
                session_id=session_id,
                update=AvailableCommandsUpdate(
-                    session_update="available_commands_update",
-                    available_commands=self._available_commands(),
+                    sessionUpdate="available_commands_update",
+                    availableCommands=self._available_commands(),
                ),
            )
        except Exception:
@@ -758,9 +694,7 @@ class HermesACPAgent(acp.Agent):
    def _cmd_tools(self, args: str, state: SessionState) -> str:
        try:
            from model_tools import get_tool_definitions
-            toolsets = _expand_acp_enabled_toolsets(
-                getattr(state.agent, "enabled_toolsets", None) or ["hermes-acp"]
-            )
+            toolsets = getattr(state.agent, "enabled_toolsets", None) or ["hermes-acp"]
            tools = get_tool_definitions(enabled_toolsets=toolsets, quiet_mode=True)
            if not tools:
                return "No tools available."
@@ -106,24 +106,6 @@ def _register_task_cwd(task_id: str, cwd: str) -> None:
        logger.debug("Failed to register ACP task cwd override", exc_info=True)


-def _expand_acp_enabled_toolsets(
-    toolsets: List[str] | None = None,
-    mcp_server_names: List[str] | None = None,
-) -> List[str]:
-    """Return ACP toolsets plus explicit MCP server toolsets for this session."""
-    expanded: List[str] = []
-    for name in list(toolsets or ["hermes-acp"]):
-        if name and name not in expanded:
-            expanded.append(name)
-
-    for server_name in list(mcp_server_names or []):
-        toolset_name = f"mcp-{server_name}"
-        if server_name and toolset_name not in expanded:
-            expanded.append(toolset_name)
-
-    return expanded
-
-
 def _clear_task_cwd(task_id: str) -> None:
    """Remove task-specific cwd overrides for an ACP session."""
    if not task_id:
@@ -555,18 +537,9 @@ class SessionManager:
        elif isinstance(model_cfg, str) and model_cfg.strip():
            default_model = model_cfg.strip()

-        configured_mcp_servers = [
-            name
-            for name, cfg in (config.get("mcp_servers") or {}).items()
-            if not isinstance(cfg, dict) or cfg.get("enabled", True) is not False
-        ]
-
        kwargs = {
            "platform": "acp",
-            "enabled_toolsets": _expand_acp_enabled_toolsets(
-                ["hermes-acp"],
-                mcp_server_names=configured_mcp_servers,
-            ),
+            "enabled_toolsets": ["hermes-acp"],
            "quiet_mode": True,
            "session_id": session_id,
            "model": model or default_model,
@@ -1,326 +0,0 @@
-from __future__ import annotations
-
-from dataclasses import dataclass
-from datetime import datetime, timezone
-from typing import Any, Optional
-
-import httpx
-
-from agent.anthropic_adapter import _is_oauth_token, resolve_anthropic_token
-from hermes_cli.auth import _read_codex_tokens, resolve_codex_runtime_credentials
-from hermes_cli.runtime_provider import resolve_runtime_provider
-
-
-def _utc_now() -> datetime:
-    return datetime.now(timezone.utc)
-
-
-@dataclass(frozen=True)
-class AccountUsageWindow:
-    label: str
-    used_percent: Optional[float] = None
-    reset_at: Optional[datetime] = None
-    detail: Optional[str] = None
-
-
-@dataclass(frozen=True)
-class AccountUsageSnapshot:
-    provider: str
-    source: str
-    fetched_at: datetime
-    title: str = "Account limits"
-    plan: Optional[str] = None
-    windows: tuple[AccountUsageWindow, ...] = ()
-    details: tuple[str, ...] = ()
-    unavailable_reason: Optional[str] = None
-
-    @property
-    def available(self) -> bool:
-        return bool(self.windows or self.details) and not self.unavailable_reason
-
-
-def _title_case_slug(value: Optional[str]) -> Optional[str]:
-    cleaned = str(value or "").strip()
-    if not cleaned:
-        return None
-    return cleaned.replace("_", " ").replace("-", " ").title()
-
-
-def _parse_dt(value: Any) -> Optional[datetime]:
-    if value in (None, ""):
-        return None
-    if isinstance(value, (int, float)):
-        return datetime.fromtimestamp(float(value), tz=timezone.utc)
-    if isinstance(value, str):
-        text = value.strip()
-        if not text:
-            return None
-        if text.endswith("Z"):
-            text = text[:-1] + "+00:00"
-        try:
-            dt = datetime.fromisoformat(text)
-            return dt if dt.tzinfo else dt.replace(tzinfo=timezone.utc)
-        except ValueError:
-            return None
-    return None
-
-
-def _format_reset(dt: Optional[datetime]) -> str:
-    if not dt:
-        return "unknown"
-    local_dt = dt.astimezone()
-    delta = dt - _utc_now()
-    total_seconds = int(delta.total_seconds())
-    if total_seconds <= 0:
-        return f"now ({local_dt.strftime('%Y-%m-%d %H:%M %Z')})"
-    hours, rem = divmod(total_seconds, 3600)
-    minutes = rem // 60
-    if hours >= 24:
-        days, hours = divmod(hours, 24)
-        rel = f"in {days}d {hours}h"
-    elif hours > 0:
-        rel = f"in {hours}h {minutes}m"
-    else:
-        rel = f"in {minutes}m"
-    return f"{rel} ({local_dt.strftime('%Y-%m-%d %H:%M %Z')})"
-
-
-def render_account_usage_lines(snapshot: Optional[AccountUsageSnapshot], *, markdown: bool = False) -> list[str]:
-    if not snapshot:
-        return []
-    header = f"📈 {'**' if markdown else ''}{snapshot.title}{'**' if markdown else ''}"
-    lines = [header]
-    if snapshot.plan:
-        lines.append(f"Provider: {snapshot.provider} ({snapshot.plan})")
-    else:
-        lines.append(f"Provider: {snapshot.provider}")
-    for window in snapshot.windows:
-        if window.used_percent is None:
-            base = f"{window.label}: unavailable"
-        else:
-            remaining = max(0, round(100 - float(window.used_percent)))
-            used = max(0, round(float(window.used_percent)))
-            base = f"{window.label}: {remaining}% remaining ({used}% used)"
-        if window.reset_at:
-            base += f" • resets {_format_reset(window.reset_at)}"
-        elif window.detail:
-            base += f" • {window.detail}"
-        lines.append(base)
-    for detail in snapshot.details:
-        lines.append(detail)
-    if snapshot.unavailable_reason:
-        lines.append(f"Unavailable: {snapshot.unavailable_reason}")
-    return lines
-
-
-def _resolve_codex_usage_url(base_url: str) -> str:
-    normalized = (base_url or "").strip().rstrip("/")
-    if not normalized:
-        normalized = "https://chatgpt.com/backend-api/codex"
-    if normalized.endswith("/codex"):
-        normalized = normalized[: -len("/codex")]
-    if "/backend-api" in normalized:
-        return normalized + "/wham/usage"
-    return normalized + "/api/codex/usage"
-
-
-def _fetch_codex_account_usage() -> Optional[AccountUsageSnapshot]:
-    creds = resolve_codex_runtime_credentials(refresh_if_expiring=True)
-    token_data = _read_codex_tokens()
-    tokens = token_data.get("tokens") or {}
-    account_id = str(tokens.get("account_id", "") or "").strip() or None
-    headers = {
-        "Authorization": f"Bearer {creds['api_key']}",
-        "Accept": "application/json",
-        "User-Agent": "codex-cli",
-    }
-    if account_id:
-        headers["ChatGPT-Account-Id"] = account_id
-    with httpx.Client(timeout=15.0) as client:
-        response = client.get(_resolve_codex_usage_url(creds.get("base_url", "")), headers=headers)
-        response.raise_for_status()
-    payload = response.json() or {}
-    rate_limit = payload.get("rate_limit") or {}
-    windows: list[AccountUsageWindow] = []
-    for key, label in (("primary_window", "Session"), ("secondary_window", "Weekly")):
-        window = rate_limit.get(key) or {}
-        used = window.get("used_percent")
-        if used is None:
-            continue
-        windows.append(
-            AccountUsageWindow(
-                label=label,
-                used_percent=float(used),
-                reset_at=_parse_dt(window.get("reset_at")),
-            )
-        )
-    details: list[str] = []
-    credits = payload.get("credits") or {}
-    if credits.get("has_credits"):
-        balance = credits.get("balance")
-        if isinstance(balance, (int, float)):
-            details.append(f"Credits balance: ${float(balance):.2f}")
-        elif credits.get("unlimited"):
-            details.append("Credits balance: unlimited")
-    return AccountUsageSnapshot(
-        provider="openai-codex",
-        source="usage_api",
-        fetched_at=_utc_now(),
-        plan=_title_case_slug(payload.get("plan_type")),
-        windows=tuple(windows),
-        details=tuple(details),
-    )
-
-
-def _fetch_anthropic_account_usage() -> Optional[AccountUsageSnapshot]:
-    token = (resolve_anthropic_token() or "").strip()
-    if not token:
-        return None
-    if not _is_oauth_token(token):
-        return AccountUsageSnapshot(
-            provider="anthropic",
-            source="oauth_usage_api",
-            fetched_at=_utc_now(),
-            unavailable_reason="Anthropic account limits are only available for OAuth-backed Claude accounts.",
-        )
-    headers = {
-        "Authorization": f"Bearer {token}",
-        "Accept": "application/json",
-        "Content-Type": "application/json",
-        "anthropic-beta": "oauth-2025-04-20",
-        "User-Agent": "claude-code/2.1.0",
-    }
-    with httpx.Client(timeout=15.0) as client:
-        response = client.get("https://api.anthropic.com/api/oauth/usage", headers=headers)
-        response.raise_for_status()
-    payload = response.json() or {}
-    windows: list[AccountUsageWindow] = []
-    mapping = (
-        ("five_hour", "Current session"),
-        ("seven_day", "Current week"),
-        ("seven_day_opus", "Opus week"),
-        ("seven_day_sonnet", "Sonnet week"),
-    )
-    for key, label in mapping:
-        window = payload.get(key) or {}
-        util = window.get("utilization")
-        if util is None:
-            continue
-        used = float(util) * 100 if float(util) <= 1 else float(util)
-        windows.append(
-            AccountUsageWindow(
-                label=label,
-                used_percent=used,
-                reset_at=_parse_dt(window.get("resets_at")),
-            )
-        )
-    details: list[str] = []
-    extra = payload.get("extra_usage") or {}
-    if extra.get("is_enabled"):
-        used_credits = extra.get("used_credits")
-        monthly_limit = extra.get("monthly_limit")
-        currency = extra.get("currency") or "USD"
-        if isinstance(used_credits, (int, float)) and isinstance(monthly_limit, (int, float)):
-            details.append(
-                f"Extra usage: {used_credits:.2f} / {monthly_limit:.2f} {currency}"
-            )
-    return AccountUsageSnapshot(
-        provider="anthropic",
-        source="oauth_usage_api",
-        fetched_at=_utc_now(),
-        windows=tuple(windows),
-        details=tuple(details),
-    )
-
-
-def _fetch_openrouter_account_usage(base_url: Optional[str], api_key: Optional[str]) -> Optional[AccountUsageSnapshot]:
-    runtime = resolve_runtime_provider(
-        requested="openrouter",
-        explicit_base_url=base_url,
-        explicit_api_key=api_key,
-    )
-    token = str(runtime.get("api_key", "") or "").strip()
-    if not token:
-        return None
-    normalized = str(runtime.get("base_url", "") or "").rstrip("/")
-    credits_url = f"{normalized}/credits"
-    key_url = f"{normalized}/key"
-    headers = {
-        "Authorization": f"Bearer {token}",
-        "Accept": "application/json",
-    }
-    with httpx.Client(timeout=10.0) as client:
-        credits_resp = client.get(credits_url, headers=headers)
-        credits_resp.raise_for_status()
-        credits = (credits_resp.json() or {}).get("data") or {}
-        try:
-            key_resp = client.get(key_url, headers=headers)
-            key_resp.raise_for_status()
-            key_data = (key_resp.json() or {}).get("data") or {}
-        except Exception:
-            key_data = {}
-    total_credits = float(credits.get("total_credits") or 0.0)
-    total_usage = float(credits.get("total_usage") or 0.0)
-    details = [f"Credits balance: ${max(0.0, total_credits - total_usage):.2f}"]
-    windows: list[AccountUsageWindow] = []
-    limit = key_data.get("limit")
-    limit_remaining = key_data.get("limit_remaining")
-    limit_reset = str(key_data.get("limit_reset") or "").strip()
-    usage = key_data.get("usage")
-    if (
-        isinstance(limit, (int, float))
-        and float(limit) > 0
-        and isinstance(limit_remaining, (int, float))
-        and 0 <= float(limit_remaining) <= float(limit)
-    ):
-        limit_value = float(limit)
-        remaining_value = float(limit_remaining)
-        used_percent = ((limit_value - remaining_value) / limit_value) * 100
-        detail_parts = [f"${remaining_value:.2f} of ${limit_value:.2f} remaining"]
-        if limit_reset:
-            detail_parts.append(f"resets {limit_reset}")
-        windows.append(
-            AccountUsageWindow(
-                label="API key quota",
-                used_percent=used_percent,
-                detail=" • ".join(detail_parts),
-            )
-        )
-    if isinstance(usage, (int, float)):
-        usage_parts = [f"API key usage: ${float(usage):.2f} total"]
-        for value, label in (
-            (key_data.get("usage_daily"), "today"),
-            (key_data.get("usage_weekly"), "this week"),
-            (key_data.get("usage_monthly"), "this month"),
-        ):
-            if isinstance(value, (int, float)) and float(value) > 0:
-                usage_parts.append(f"${float(value):.2f} {label}")
-        details.append(" • ".join(usage_parts))
-    return AccountUsageSnapshot(
-        provider="openrouter",
-        source="credits_api",
-        fetched_at=_utc_now(),
-        windows=tuple(windows),
-        details=tuple(details),
-    )
-
-
-def fetch_account_usage(
-    provider: Optional[str],
-    *,
-    base_url: Optional[str] = None,
-    api_key: Optional[str] = None,
-) -> Optional[AccountUsageSnapshot]:
-    normalized = str(provider or "").strip().lower()
-    if normalized in {"", "auto", "custom"}:
-        return None
-    try:
-        if normalized == "openai-codex":
-            return _fetch_codex_account_usage()
-        if normalized == "anthropic":
-            return _fetch_anthropic_account_usage()
-        if normalized == "openrouter":
-            return _fetch_openrouter_account_usage(base_url, api_key)
-    except Exception:
-        return None
-    return None
@@ -14,13 +14,11 @@ import copy
 import json
 import logging
 import os
-import platform
-import subprocess
 from pathlib import Path

 from hermes_constants import get_hermes_home
+from types import SimpleNamespace
 from typing import Any, Dict, List, Optional, Tuple
-from utils import normalize_proxy_env_vars

 try:
    import anthropic as _anthropic_sdk
@@ -118,63 +116,6 @@ def _get_anthropic_max_output(model: str) -> int:
    return best_val


-def _resolve_positive_anthropic_max_tokens(value) -> Optional[int]:
-    """Return ``value`` floored to a positive int, or ``None`` if it is not a
-    finite positive number. Ported from openclaw/openclaw#66664.
-
-    Anthropic's Messages API rejects ``max_tokens`` values that are 0,
-    negative, non-integer, or non-finite with HTTP 400. Python's ``or``
-    idiom (``max_tokens or fallback``) correctly catches ``0`` but lets
-    negative ints and fractional floats (``-1``, ``0.5``) through to the
-    API, producing a user-visible failure instead of a local error.
-    """
-    # Booleans are a subclass of int — exclude explicitly so ``True`` doesn't
-    # silently become 1 and ``False`` doesn't become 0.
-    if isinstance(value, bool):
-        return None
-    if not isinstance(value, (int, float)):
-        return None
-    try:
-        import math
-        if not math.isfinite(value):
-            return None
-    except Exception:
-        return None
-    floored = int(value)  # truncates toward zero for floats
-    return floored if floored > 0 else None
-
-
-def _resolve_anthropic_messages_max_tokens(
-    requested,
-    model: str,
-    context_length: Optional[int] = None,
-) -> int:
-    """Resolve the ``max_tokens`` budget for an Anthropic Messages call.
-
-    Prefers ``requested`` when it is a positive finite number; otherwise
-    falls back to the model's output ceiling. Raises ``ValueError`` if no
-    positive budget can be resolved (should not happen with current model
-    table defaults, but guards against a future regression where
-    ``_get_anthropic_max_output`` could return ``0``).
-
-    Separately, callers apply a context-window clamp — this resolver does
-    not, to keep the positive-value contract independent of endpoint
-    specifics.
-
-    Ported from openclaw/openclaw#66664 (resolveAnthropicMessagesMaxTokens).
-    """
-    resolved = _resolve_positive_anthropic_max_tokens(requested)
-    if resolved is not None:
-        return resolved
-    fallback = _get_anthropic_max_output(model)
-    if fallback > 0:
-        return fallback
-    raise ValueError(
-        f"Anthropic Messages adapter requires a positive max_tokens value for "
-        f"model {model!r}; got {requested!r} and no model default resolved."
-    )
-
-
 def _supports_adaptive_thinking(model: str) -> bool:
    """Return True for Claude 4.6+ models that support adaptive thinking."""
    return any(v in model for v in _ADAPTIVE_THINKING_SUBSTRINGS)
@@ -279,9 +220,8 @@ def _is_oauth_token(key: str) -> bool:
    Positively identifies Anthropic OAuth tokens by their key format:
    - ``sk-ant-`` prefix (but NOT ``sk-ant-api``) → setup tokens, managed keys
    - ``eyJ`` prefix → JWTs from the Anthropic OAuth flow
-    - ``cc-`` prefix → Claude Code OAuth access tokens (from CLAUDE_CODE_OAUTH_TOKEN)

-    Non-Anthropic keys (MiniMax, Alibaba, etc.) don't match any pattern
+    Non-Anthropic keys (MiniMax, Alibaba, etc.) don't match either pattern
    and correctly return False.
    """
    if not key:
@@ -295,9 +235,6 @@ def _is_oauth_token(key: str) -> bool:
    # JWTs from Anthropic OAuth flow
    if key.startswith("eyJ"):
        return True
-    # Claude Code OAuth access tokens (opaque, from CLAUDE_CODE_OAUTH_TOKEN)
-    if key.startswith("cc-"):
-        return True
    return False


@@ -328,14 +265,6 @@ def _is_third_party_anthropic_endpoint(base_url: str | None) -> bool:
    return True  # Any other endpoint is a third-party proxy


-def _is_kimi_coding_endpoint(base_url: str | None) -> bool:
-    """Return True for Kimi's /coding endpoint that requires claude-code UA."""
-    normalized = _normalize_base_url_text(base_url)
-    if not normalized:
-        return False
-    return normalized.rstrip("/").lower().startswith("https://api.kimi.com/coding")
-
-
 def _requires_bearer_auth(base_url: str | None) -> bool:
    """Return True for Anthropic-compatible providers that require Bearer auth.

@@ -379,9 +308,6 @@ def build_anthropic_client(api_key: str, base_url: str = None, timeout: float =
            "The 'anthropic' package is required for the Anthropic provider. "
            "Install it with: pip install 'anthropic>=0.39.0'"
        )
-
-    normalize_proxy_env_vars()
-
    from httpx import Timeout

    normalized_base_url = _normalize_base_url_text(base_url)
@@ -390,30 +316,12 @@ def build_anthropic_client(api_key: str, base_url: str = None, timeout: float =
        "timeout": Timeout(timeout=float(_read_timeout), connect=10.0),
    }
    if normalized_base_url:
-        # Azure Anthropic endpoints require an ``api-version`` query parameter.
-        # Pass it via default_query so the SDK appends it to every request URL
-        # without corrupting the base_url (appending it directly produces
-        # malformed paths like /anthropic?api-version=.../v1/messages).
-        _is_azure_endpoint = "azure.com" in normalized_base_url.lower()
-        if _is_azure_endpoint and "api-version" not in normalized_base_url:
-            kwargs["base_url"] = normalized_base_url.rstrip("/")
-            kwargs["default_query"] = {"api-version": "2025-04-15"}
-        else:
-            kwargs["base_url"] = normalized_base_url
+        kwargs["base_url"] = normalized_base_url
    common_betas = _common_betas_for_base_url(normalized_base_url)

-    if _is_kimi_coding_endpoint(base_url):
-        # Kimi's /coding endpoint requires User-Agent: claude-code/0.1.0
-        # to be recognized as a valid Coding Agent. Without it, returns 403.
-        # Check this BEFORE _requires_bearer_auth since both match api.kimi.com/coding.
-        kwargs["api_key"] = api_key
-        kwargs["default_headers"] = {
-            "User-Agent": "claude-code/0.1.0",
-            **( {"anthropic-beta": ",".join(common_betas)} if common_betas else {} )
-        }
-    elif _requires_bearer_auth(normalized_base_url):
+    if _requires_bearer_auth(normalized_base_url):
        # Some Anthropic-compatible providers (e.g. MiniMax) expect the API key in
-        # Authorization: Bearer *** for regular API keys. Route those endpoints
+        # Authorization: Bearer even for regular API keys. Route those endpoints
        # through auth_token so the SDK sends Bearer auth instead of x-api-key.
        # Check this before OAuth token shape detection because MiniMax secrets do
        # not use Anthropic's sk-ant-api prefix and would otherwise be misread as
@@ -476,72 +384,8 @@ def build_anthropic_bedrock_client(region: str):
    )


-def _read_claude_code_credentials_from_keychain() -> Optional[Dict[str, Any]]:
-    """Read Claude Code OAuth credentials from the macOS Keychain.
-
-    Claude Code >=2.1.114 stores credentials in the macOS Keychain under the
-    service name "Claude Code-credentials" rather than (or in addition to)
-    the JSON file at ~/.claude/.credentials.json.
-
-    The password field contains a JSON string with the same claudeAiOauth
-    structure as the JSON file.
-
-    Returns dict with {accessToken, refreshToken?, expiresAt?} or None.
-    """
-    import platform
-    import subprocess
-
-    if platform.system() != "Darwin":
-        return None
-
-    try:
-        # Read the "Claude Code-credentials" generic password entry
-        result = subprocess.run(
-            ["security", "find-generic-password",
-             "-s", "Claude Code-credentials",
-             "-w"],
-            capture_output=True,
-            text=True,
-            timeout=5,
-        )
-    except (OSError, subprocess.TimeoutExpired):
-        logger.debug("Keychain: security command not available or timed out")
-        return None
-
-    if result.returncode != 0:
-        logger.debug("Keychain: no entry found for 'Claude Code-credentials'")
-        return None
-
-    raw = result.stdout.strip()
-    if not raw:
-        return None
-
-    try:
-        data = json.loads(raw)
-    except json.JSONDecodeError:
-        logger.debug("Keychain: credentials payload is not valid JSON")
-        return None
-
-    oauth_data = data.get("claudeAiOauth")
-    if oauth_data and isinstance(oauth_data, dict):
-        access_token = oauth_data.get("accessToken", "")
-        if access_token:
-            return {
-                "accessToken": access_token,
-                "refreshToken": oauth_data.get("refreshToken", ""),
-                "expiresAt": oauth_data.get("expiresAt", 0),
-                "source": "macos_keychain",
-            }
-
-    return None
-
-
 def read_claude_code_credentials() -> Optional[Dict[str, Any]]:
-    """Read refreshable Claude Code OAuth credentials.
-
-    Checks two sources in order:
-      1. macOS Keychain (Darwin only) — "Claude Code-credentials" entry
-      2. ~/.claude/.credentials.json file
+    """Read refreshable Claude Code OAuth credentials from ~/.claude/.credentials.json.

    This intentionally excludes ~/.claude.json primaryApiKey. Opencode's
    subscription flow is OAuth/setup-token based with refreshable credentials,
@@ -550,12 +394,6 @@ def read_claude_code_credentials() -> Optional[Dict[str, Any]]:

    Returns dict with {accessToken, refreshToken?, expiresAt?} or None.
    """
-    # Try macOS Keychain first (covers Claude Code >=2.1.114)
-    kc_creds = _read_claude_code_credentials_from_keychain()
-    if kc_creds:
-        return kc_creds
-
-    # Fall back to JSON file
    cred_path = Path.home() / ".claude" / ".credentials.json"
    if cred_path.exists():
        try:
@@ -726,9 +564,7 @@ def _write_claude_code_credentials(
        existing["claudeAiOauth"] = oauth_data

        cred_path.parent.mkdir(parents=True, exist_ok=True)
-        _tmp_cred = cred_path.with_suffix(".tmp")
-        _tmp_cred.write_text(json.dumps(existing, indent=2), encoding="utf-8")
-        _tmp_cred.replace(cred_path)
+        cred_path.write_text(json.dumps(existing, indent=2), encoding="utf-8")
        # Restrict permissions (credentials file)
        cred_path.chmod(0o600)
    except (OSError, IOError) as e:
@@ -995,26 +831,6 @@ def read_hermes_oauth_credentials() -> Optional[Dict[str, Any]]:
 # ---------------------------------------------------------------------------


-def _is_bedrock_model_id(model: str) -> bool:
-    """Detect AWS Bedrock model IDs that use dots as namespace separators.
-
-    Bedrock model IDs come in two forms:
-    - Bare:    ``anthropic.claude-opus-4-7``
-    - Regional (inference profiles): ``us.anthropic.claude-sonnet-4-5-v1:0``
-
-    In both cases the dots separate namespace components, not version
-    numbers, and must be preserved verbatim for the Bedrock API.
-    """
-    lower = model.lower()
-    # Regional inference-profile prefixes
-    if any(lower.startswith(p) for p in ("global.", "us.", "eu.", "ap.", "jp.")):
-        return True
-    # Bare Bedrock model IDs: provider.model-family
-    if lower.startswith("anthropic."):
-        return True
-    return False
-
-
 def normalize_model_name(model: str, preserve_dots: bool = False) -> str:
    """Normalize a model name for the Anthropic API.

@@ -1022,19 +838,11 @@ def normalize_model_name(model: str, preserve_dots: bool = False) -> str:
    - Converts dots to hyphens in version numbers (OpenRouter uses dots,
      Anthropic uses hyphens: claude-opus-4.6 → claude-opus-4-6), unless
      preserve_dots is True (e.g. for Alibaba/DashScope: qwen3.5-plus).
-    - Preserves Bedrock model IDs (``anthropic.claude-opus-4-7``) and
-      regional inference profiles (``us.anthropic.claude-*``) whose dots
-      are namespace separators, not version separators.
    """
    lower = model.lower()
    if lower.startswith("anthropic/"):
        model = model[len("anthropic/"):]
    if not preserve_dots:
-        # Bedrock model IDs use dots as namespace separators
-        # (e.g. "anthropic.claude-opus-4-7", "us.anthropic.claude-*").
-        # These must not be converted to hyphens.  See issue #12295.
-        if _is_bedrock_model_id(model):
-            return model
        # OpenRouter uses dots for version separators (claude-opus-4.6),
        # Anthropic uses hyphens (claude-opus-4-6). Convert dots to hyphens.
        model = model.replace(".", "-")
@@ -1254,31 +1062,6 @@ def convert_messages_to_anthropic(
                    "name": fn.get("name", ""),
                    "input": parsed_args,
                })
-            # Kimi's /coding endpoint (Anthropic protocol) requires assistant
-            # tool-call messages to carry reasoning_content when thinking is
-            # enabled server-side.  Preserve it as a thinking block so Kimi
-            # can validate the message history.  See hermes-agent#13848.
-            #
-            # Accept empty string "" — _copy_reasoning_content_for_api()
-            # injects "" as a tier-3 fallback for Kimi tool-call messages
-            # that had no reasoning.  Kimi requires the field to exist, even
-            # if empty.
-            #
-            # Prepend (not append): Anthropic protocol requires thinking
-            # blocks before text and tool_use blocks.
-            #
-            # Guard: only add when reasoning_details didn't already contribute
-            # thinking blocks.  On native Anthropic, reasoning_details produces
-            # signed thinking blocks — adding another unsigned one from
-            # reasoning_content would create a duplicate (same text) that gets
-            # downgraded to a spurious text block on the last assistant message.
-            reasoning_content = m.get("reasoning_content")
-            _already_has_thinking = any(
-                isinstance(b, dict) and b.get("type") in ("thinking", "redacted_thinking")
-                for b in blocks
-            )
-            if isinstance(reasoning_content, str) and not _already_has_thinking:
-                blocks.insert(0, {"type": "thinking", "thinking": reasoning_content})
            # Anthropic rejects empty assistant content
            effective = blocks or content
            if not effective or effective == "":
@@ -1434,7 +1217,6 @@ def convert_messages_to_anthropic(
    #    cache markers can interfere with signature validation.
    _THINKING_TYPES = frozenset(("thinking", "redacted_thinking"))
    _is_third_party = _is_third_party_anthropic_endpoint(base_url)
-    _is_kimi = _is_kimi_coding_endpoint(base_url)

    last_assistant_idx = None
    for i in range(len(result) - 1, -1, -1):
@@ -1446,25 +1228,7 @@ def convert_messages_to_anthropic(
        if m.get("role") != "assistant" or not isinstance(m.get("content"), list):
            continue

-        if _is_kimi:
-            # Kimi's /coding endpoint enables thinking server-side and
-            # requires unsigned thinking blocks on replayed assistant
-            # tool-call messages.  Strip signed Anthropic blocks (Kimi
-            # can't validate signatures) but preserve the unsigned ones
-            # we synthesised from reasoning_content above.
-            new_content = []
-            for b in m["content"]:
-                if not isinstance(b, dict) or b.get("type") not in _THINKING_TYPES:
-                    new_content.append(b)
-                    continue
-                if b.get("signature") or b.get("data"):
-                    # Anthropic-signed block — Kimi can't validate, strip
-                    continue
-                # Unsigned thinking (synthesised from reasoning_content) —
-                # keep it: Kimi needs it for message-history validation.
-                new_content.append(b)
-            m["content"] = new_content or [{"type": "text", "text": "(empty)"}]
-        elif _is_third_party or idx != last_assistant_idx:
+        if _is_third_party or idx != last_assistant_idx:
            # Third-party endpoint: strip ALL thinking blocks from every
            # assistant message — signatures are Anthropic-proprietary.
            # Direct Anthropic: strip from non-latest assistant messages only.
@@ -1562,12 +1326,7 @@ def build_anthropic_kwargs(

    model = normalize_model_name(model, preserve_dots=preserve_dots)
    # effective_max_tokens = output cap for this call (≠ total context window)
-    # Use the resolver helper so non-positive values (negative ints,
-    # fractional floats, NaN, non-numeric) fail locally with a clear error
-    # rather than 400-ing at the Anthropic API. See openclaw/openclaw#66664.
-    effective_max_tokens = _resolve_anthropic_messages_max_tokens(
-        max_tokens, model, context_length=context_length
-    )
+    effective_max_tokens = max_tokens or _get_anthropic_max_output(model)

    # Clamp output cap to fit inside the total context window.
    # Only matters for small custom endpoints where context_length < native
@@ -1646,25 +1405,11 @@ def build_anthropic_kwargs(
    # MiniMax Anthropic-compat endpoints support thinking (manual mode only,
    # not adaptive).  Haiku does NOT support extended thinking — skip entirely.
    #
-    # Kimi's /coding endpoint speaks the Anthropic Messages protocol but has
-    # its own thinking semantics: when ``thinking.enabled`` is sent, Kimi
-    # validates the message history and requires every prior assistant
-    # tool-call message to carry OpenAI-style ``reasoning_content``.  The
-    # Anthropic path never populates that field, and
-    # ``convert_messages_to_anthropic`` strips all Anthropic thinking blocks
-    # on third-party endpoints — so the request fails with HTTP 400
-    # "thinking is enabled but reasoning_content is missing in assistant
-    # tool call message at index N".  Kimi's reasoning is driven server-side
-    # on the /coding route, so skip Anthropic's thinking parameter entirely
-    # for that host.  (Kimi on chat_completions enables thinking via
-    # extra_body in the ChatCompletionsTransport — see #13503.)
-    #
    # On 4.7+ the `thinking.display` field defaults to "omitted", which
    # silently hides reasoning text that Hermes surfaces in its CLI. We
    # request "summarized" so the reasoning blocks stay populated — matching
    # 4.6 behavior and preserving the activity-feed UX during long tool runs.
-    _is_kimi_coding = _is_kimi_coding_endpoint(base_url)
-    if reasoning_config and isinstance(reasoning_config, dict) and not _is_kimi_coding:
+    if reasoning_config and isinstance(reasoning_config, dict):
        if reasoning_config.get("enabled") is not False and "haiku" not in model.lower():
            effort = str(reasoning_config.get("effort", "medium")).lower()
            budget = THINKING_BUDGET.get(effort, 8000)
@@ -1689,9 +1434,9 @@ def build_anthropic_kwargs(

    # ── Strip sampling params on 4.7+ ─────────────────────────────────
    # Opus 4.7 rejects any non-default temperature/top_p/top_k with a 400.
-    # Callers (auxiliary_client, etc.) may set these for older models;
-    # drop them here as a safety net so upstream 4.6 → 4.7 migrations
-    # don't require coordinated edits everywhere.
+    # Callers (auxiliary_client, flush_memories, etc.) may set these for
+    # older models; drop them here as a safety net so upstream 4.6 → 4.7
+    # migrations don't require coordinated edits everywhere.
    if _forbids_sampling_params(model):
        for _sampling_key in ("temperature", "top_p", "top_k"):
            kwargs.pop(_sampling_key, None)
@@ -1713,3 +1458,70 @@ def build_anthropic_kwargs(
    return kwargs


+def normalize_anthropic_response(
+    response,
+    strip_tool_prefix: bool = False,
+) -> Tuple[SimpleNamespace, str]:
+    """Normalize Anthropic response to match the shape expected by AIAgent.
+
+    Returns (assistant_message, finish_reason) where assistant_message has
+    .content, .tool_calls, and .reasoning attributes.
+
+    When *strip_tool_prefix* is True, removes the ``mcp_`` prefix that was
+    added to tool names for OAuth Claude Code compatibility.
+    """
+    text_parts = []
+    reasoning_parts = []
+    reasoning_details = []
+    tool_calls = []
+
+    for block in response.content:
+        if block.type == "text":
+            text_parts.append(block.text)
+        elif block.type == "thinking":
+            reasoning_parts.append(block.thinking)
+            block_dict = _to_plain_data(block)
+            if isinstance(block_dict, dict):
+                reasoning_details.append(block_dict)
+        elif block.type == "tool_use":
+            name = block.name
+            if strip_tool_prefix and name.startswith(_MCP_TOOL_PREFIX):
+                name = name[len(_MCP_TOOL_PREFIX):]
+            tool_calls.append(
+                SimpleNamespace(
+                    id=block.id,
+                    type="function",
+                    function=SimpleNamespace(
+                        name=name,
+                        arguments=json.dumps(block.input),
+                    ),
+                )
+            )
+
+    # Map Anthropic stop_reason to OpenAI finish_reason.
+    # Newer stop reasons added in Claude 4.5+ / 4.7:
+    #   - refusal: the model declined to answer (cyber safeguards, CSAM, etc.)
+    #   - model_context_window_exceeded: hit context limit (not max_tokens)
+    # Both need distinct handling upstream — a refusal should surface to the
+    # user with a clear message, and a context-window overflow should trigger
+    # compression/truncation rather than be treated as normal end-of-turn.
+    stop_reason_map = {
+        "end_turn": "stop",
+        "tool_use": "tool_calls",
+        "max_tokens": "length",
+        "stop_sequence": "stop",
+        "refusal": "content_filter",
+        "model_context_window_exceeded": "length",
+    }
+    finish_reason = stop_reason_map.get(response.stop_reason, "stop")
+
+    return (
+        SimpleNamespace(
+            content="\n".join(text_parts) if text_parts else None,
+            tool_calls=tool_calls or None,
+            reasoning="\n\n".join(reasoning_parts) if reasoning_parts else None,
+            reasoning_content=None,
+            reasoning_details=reasoning_details or None,
+        ),
+        finish_reason,
+    )
@@ -87,114 +87,6 @@ def reset_client_cache():
    _bedrock_control_client_cache.clear()


-def invalidate_runtime_client(region: str) -> bool:
-    """Evict the cached ``bedrock-runtime`` client for a single region.
-
-    Per-region counterpart to :func:`reset_client_cache`. Used by the converse
-    call wrappers to discard clients whose underlying HTTP connection has
-    gone stale, so the next call allocates a fresh client (with a fresh
-    connection pool) instead of reusing a dead socket.
-
-    Returns True if a cached entry was evicted, False if the region was not
-    cached.
-    """
-    existed = region in _bedrock_runtime_client_cache
-    _bedrock_runtime_client_cache.pop(region, None)
-    return existed
-
-
-# ---------------------------------------------------------------------------
-# Stale-connection detection
-# ---------------------------------------------------------------------------
-#
-# boto3 caches its HTTPS connection pool inside the client object. When a
-# pooled connection is killed out from under us (NAT timeout, VPN flap,
-# server-side TCP RST, proxy idle cull, etc.), the next use surfaces as
-# one of a handful of low-level exceptions — most commonly
-# ``botocore.exceptions.ConnectionClosedError`` or
-# ``urllib3.exceptions.ProtocolError``. urllib3 also trips an internal
-# ``assert`` in a couple of paths (connection pool state checks, chunked
-# response readers) which bubbles up as a bare ``AssertionError`` with an
-# empty ``str(exc)``.
-#
-# In all of these cases the client is the problem, not the request: retrying
-# with the same cached client reproduces the failure until the process
-# restarts. The fix is to evict the region's cached client so the next
-# attempt builds a new one.
-
-_STALE_LIB_MODULE_PREFIXES = (
-    "urllib3.",
-    "botocore.",
-    "boto3.",
-)
-
-
-def _traceback_frames_modules(exc: BaseException):
-    """Yield ``__name__``-style module strings for each frame in exc's traceback."""
-    tb = getattr(exc, "__traceback__", None)
-    while tb is not None:
-        frame = tb.tb_frame
-        module = frame.f_globals.get("__name__", "")
-        yield module or ""
-        tb = tb.tb_next
-
-
-def is_stale_connection_error(exc: BaseException) -> bool:
-    """Return True if ``exc`` indicates a dead/stale Bedrock HTTP connection.
-
-    Matches:
-      * ``botocore.exceptions.ConnectionError`` and subclasses
-        (``ConnectionClosedError``, ``EndpointConnectionError``,
-        ``ReadTimeoutError``, ``ConnectTimeoutError``).
-      * ``urllib3.exceptions.ProtocolError`` / ``NewConnectionError`` /
-        ``ConnectionError`` (best-effort import — urllib3 is a transitive
-        dependency of botocore so it is always available in practice).
-      * Bare ``AssertionError`` raised from a frame inside urllib3, botocore,
-        or boto3. These are internal-invariant failures (typically triggered
-        by corrupted connection-pool state after a dropped socket) and are
-        recoverable by swapping the client.
-
-    Non-library ``AssertionError``s (from application code or tests) are
-    intentionally not matched — only library-internal asserts signal stale
-    connection state.
-    """
-    # botocore: the canonical signal — HTTPClientError is the umbrella for
-    # ConnectionClosedError, ReadTimeoutError, EndpointConnectionError,
-    # ConnectTimeoutError, and ProxyConnectionError. ConnectionError covers
-    # the same family via a different branch of the hierarchy.
-    try:
-        from botocore.exceptions import (
-            ConnectionError as BotoConnectionError,
-            HTTPClientError,
-        )
-        botocore_errors: tuple = (BotoConnectionError, HTTPClientError)
-    except ImportError:  # pragma: no cover — botocore always present with boto3
-        botocore_errors = ()
-    if botocore_errors and isinstance(exc, botocore_errors):
-        return True
-
-    # urllib3: low-level transport failures
-    try:
-        from urllib3.exceptions import (
-            ProtocolError,
-            NewConnectionError,
-            ConnectionError as Urllib3ConnectionError,
-        )
-        urllib3_errors = (ProtocolError, NewConnectionError, Urllib3ConnectionError)
-    except ImportError:  # pragma: no cover
-        urllib3_errors = ()
-    if urllib3_errors and isinstance(exc, urllib3_errors):
-        return True
-
-    # Library-internal AssertionError (urllib3 / botocore / boto3)
-    if isinstance(exc, AssertionError):
-        for module in _traceback_frames_modules(exc):
-            if any(module.startswith(prefix) for prefix in _STALE_LIB_MODULE_PREFIXES):
-                return True
-
-    return False
-
-
 # ---------------------------------------------------------------------------
 # AWS credential detection
 # ---------------------------------------------------------------------------
@@ -895,17 +787,7 @@ def call_converse(
        guardrail_config=guardrail_config,
    )

-    try:
-        response = client.converse(**kwargs)
-    except Exception as exc:
-        if is_stale_connection_error(exc):
-            logger.warning(
-                "bedrock: stale-connection error on converse(region=%s, model=%s): "
-                "%s — evicting cached client so the next call reconnects.",
-                region, model, type(exc).__name__,
-            )
-            invalidate_runtime_client(region)
-        raise
+    response = client.converse(**kwargs)
    return normalize_converse_response(response)


@@ -937,17 +819,7 @@ def call_converse_stream(
        guardrail_config=guardrail_config,
    )

-    try:
-        response = client.converse_stream(**kwargs)
-    except Exception as exc:
-        if is_stale_connection_error(exc):
-            logger.warning(
-                "bedrock: stale-connection error on converse_stream(region=%s, "
-                "model=%s): %s — evicting cached client so the next call reconnects.",
-                region, model, type(exc).__name__,
-            )
-            invalidate_runtime_client(region)
-        raise
+    response = client.converse_stream(**kwargs)
    return normalize_converse_stream_events(response)


@@ -22,124 +22,6 @@ from agent.prompt_builder import DEFAULT_AGENT_IDENTITY

 logger = logging.getLogger(__name__)

-
-# Matches Codex/Harmony tool-call serialization that occasionally leaks into
-# assistant-message content when the model fails to emit a structured
-# ``function_call`` item.  Accepts the common forms:
-#
-#   to=functions.exec_command
-#   assistant to=functions.exec_command
-#   <|channel|>commentary to=functions.exec_command
-#
-# ``to=functions.<name>`` is the stable marker — the optional ``assistant`` or
-# Harmony channel prefix varies by degeneration mode.  Case-insensitive to
-# cover lowercase/uppercase ``assistant`` variants.
-_TOOL_CALL_LEAK_PATTERN = re.compile(
-    r"(?:^|[\s>|])to=functions\.[A-Za-z_][\w.]*",
-    re.IGNORECASE,
-)
-
-
-# ---------------------------------------------------------------------------
-# Multimodal content helpers
-# ---------------------------------------------------------------------------
-
-def _chat_content_to_responses_parts(content: Any, *, role: str = "user") -> List[Dict[str, Any]]:
-    """Convert chat-style multimodal content to Responses API input parts.
-
-    Input:  ``[{"type":"text"|"image_url", ...}]`` (native OpenAI Chat format)
-    Output: ``[{"type":"input_text"|"output_text"|"input_image", ...}]`` (Responses format)
-
-    The ``role`` parameter controls the text content type:
-    - ``"user"`` (default) → ``"input_text"``
-    - ``"assistant"`` → ``"output_text"``
-
-    The Responses API rejects ``input_text`` inside assistant messages and
-    ``output_text`` inside user messages, so callers MUST pass the correct
-    role for the message being converted.
-
-    Returns an empty list when ``content`` is not a list or contains no
-    recognized parts — callers fall back to the string path.
-    """
-    text_type = "output_text" if role == "assistant" else "input_text"
-    if not isinstance(content, list):
-        return []
-    converted: List[Dict[str, Any]] = []
-    for part in content:
-        if isinstance(part, str):
-            if part:
-                converted.append({"type": text_type, "text": part})
-            continue
-        if not isinstance(part, dict):
-            continue
-        ptype = str(part.get("type") or "").strip().lower()
-        if ptype in {"text", "input_text", "output_text"}:
-            text = part.get("text")
-            if isinstance(text, str) and text:
-                converted.append({"type": text_type, "text": text})
-            continue
-        if ptype in {"image_url", "input_image"}:
-            image_ref = part.get("image_url")
-            detail = part.get("detail")
-            if isinstance(image_ref, dict):
-                url = image_ref.get("url")
-                detail = image_ref.get("detail", detail)
-            else:
-                url = image_ref
-            if not isinstance(url, str) or not url:
-                continue
-            image_part: Dict[str, Any] = {"type": "input_image", "image_url": url}
-            if isinstance(detail, str) and detail.strip():
-                image_part["detail"] = detail.strip()
-            converted.append(image_part)
-    return converted
-
-
-def _summarize_user_message_for_log(content: Any) -> str:
-    """Return a short text summary of a user message for logging/trajectory.
-
-    Multimodal messages arrive as a list of ``{type:"text"|"image_url", ...}``
-    parts from the API server.  Logging, spinner previews, and trajectory
-    files all want a plain string — this helper extracts the first chunk of
-    text and notes any attached images.  Returns an empty string for empty
-    lists and ``str(content)`` for unexpected scalar types.
-    """
-    if content is None:
-        return ""
-    if isinstance(content, str):
-        return content
-    if isinstance(content, list):
-        text_bits: List[str] = []
-        image_count = 0
-        for part in content:
-            if isinstance(part, str):
-                if part:
-                    text_bits.append(part)
-                continue
-            if not isinstance(part, dict):
-                continue
-            ptype = str(part.get("type") or "").strip().lower()
-            if ptype in {"text", "input_text", "output_text"}:
-                text = part.get("text")
-                if isinstance(text, str) and text:
-                    text_bits.append(text)
-            elif ptype in {"image_url", "input_image"}:
-                image_count += 1
-        summary = " ".join(text_bits).strip()
-        if image_count:
-            note = f"[{image_count} image{'s' if image_count != 1 else ''}]"
-            summary = f"{note} {summary}" if summary else note
-        return summary
-    try:
-        return str(content)
-    except Exception:
-        return ""
-
-
-# ---------------------------------------------------------------------------
-# ID helpers
-# ---------------------------------------------------------------------------
-
 def _deterministic_call_id(fn_name: str, arguments: str, index: int = 0) -> str:
    """Generate a deterministic call_id from tool call content.

@@ -198,17 +80,14 @@ def _derive_responses_function_call_id(
    return f"fc_{digest}"


-# ---------------------------------------------------------------------------
-# Schema conversion
-# ---------------------------------------------------------------------------
-
 def _responses_tools(tools: Optional[List[Dict[str, Any]]] = None) -> Optional[List[Dict[str, Any]]]:
    """Convert chat-completions tool schemas to Responses function-tool schemas."""
-    if not tools:
+    source_tools = tools
+    if not source_tools:
        return None

    converted: List[Dict[str, Any]] = []
-    for item in tools:
+    for item in source_tools:
        fn = item.get("function", {}) if isinstance(item, dict) else {}
        name = fn.get("name")
        if not isinstance(name, str) or not name.strip():
@@ -223,27 +102,6 @@ def _responses_tools(tools: Optional[List[Dict[str, Any]]] = None) -> Optional[L
    return converted or None


-# ---------------------------------------------------------------------------
-# Message format conversion
-# ---------------------------------------------------------------------------
-
-_RESPONSE_MESSAGE_STATUSES = {"completed", "incomplete", "in_progress"}
-
-
-def _normalize_responses_message_status(value: Any, *, default: str = "completed") -> str:
-    """Normalize a Responses assistant message status for replay.
-
-    The API accepts completed/incomplete/in_progress on replayed assistant
-    output messages.  Preserve those exactly (modulo case/hyphen spelling) so
-    incomplete Codex continuation turns don't get falsely marked completed.
-    """
-    if isinstance(value, str):
-        status = value.strip().lower().replace("-", "_").replace(" ", "_")
-        if status in _RESPONSE_MESSAGE_STATUSES:
-            return status
-    return default
-
-
 def _chat_messages_to_responses_input(messages: List[Dict[str, Any]]) -> List[Dict[str, Any]]:
    """Convert internal chat-style messages to Responses input items."""
    items: List[Dict[str, Any]] = []
@@ -258,15 +116,7 @@ def _chat_messages_to_responses_input(messages: List[Dict[str, Any]]) -> List[Di

        if role in {"user", "assistant"}:
            content = msg.get("content", "")
-            if isinstance(content, list):
-                content_parts = _chat_content_to_responses_parts(content, role=role)
-                text_type = "output_text" if role == "assistant" else "input_text"
-                content_text = "".join(
-                    p.get("text", "") for p in content_parts if p.get("type") == text_type
-                )
-            else:
-                content_parts = []
-                content_text = str(content) if content is not None else ""
+            content_text = str(content) if content is not None else ""

            if role == "assistant":
                # Replay encrypted reasoning items from previous turns
@@ -289,59 +139,7 @@ def _chat_messages_to_responses_input(messages: List[Dict[str, Any]]) -> List[Di
                                seen_item_ids.add(item_id)
                            has_codex_reasoning = True

-                # Replay exact assistant message items (with id/phase) from
-                # previous turns so the API can maintain prefix-cache hits.
-                # OpenAI docs: "preserve and resend phase on all assistant
-                # messages — dropping it can degrade performance."
-                codex_message_items = msg.get("codex_message_items")
-                replayed_message_items = 0
-                if isinstance(codex_message_items, list):
-                    for raw_item in codex_message_items:
-                        if not isinstance(raw_item, dict):
-                            continue
-                        if raw_item.get("type") != "message" or raw_item.get("role") != "assistant":
-                            continue
-                        raw_content_parts = raw_item.get("content")
-                        if not isinstance(raw_content_parts, list):
-                            continue
-
-                        normalized_content_parts = []
-                        for part in raw_content_parts:
-                            if not isinstance(part, dict):
-                                continue
-                            part_type = str(part.get("type") or "").strip()
-                            if part_type not in {"output_text", "text"}:
-                                continue
-                            text = part.get("text", "")
-                            if text is None:
-                                text = ""
-                            if not isinstance(text, str):
-                                text = str(text)
-                            normalized_content_parts.append({"type": "output_text", "text": text})
-
-                        if not normalized_content_parts:
-                            continue
-
-                        replay_item = {
-                            "type": "message",
-                            "role": "assistant",
-                            "status": _normalize_responses_message_status(raw_item.get("status")),
-                            "content": normalized_content_parts,
-                        }
-                        item_id = raw_item.get("id")
-                        if isinstance(item_id, str) and item_id.strip():
-                            replay_item["id"] = item_id.strip()
-                        phase = raw_item.get("phase")
-                        if isinstance(phase, str) and phase.strip():
-                            replay_item["phase"] = phase.strip()
-                        items.append(replay_item)
-                        replayed_message_items += 1
-
-                if replayed_message_items > 0:
-                    pass
-                elif content_parts:
-                    items.append({"role": "assistant", "content": content_parts})
-                elif content_text.strip():
+                if content_text.strip():
                    items.append({"role": "assistant", "content": content_text})
                elif has_codex_reasoning:
                    # The Responses API requires a following item after each
@@ -394,12 +192,7 @@ def _chat_messages_to_responses_input(messages: List[Dict[str, Any]]) -> List[Di
                        })
                continue

-            # Non-assistant (user) role: emit multimodal parts when present,
-            # otherwise fall back to the text payload.
-            if content_parts:
-                items.append({"role": role, "content": content_parts})
-            else:
-                items.append({"role": role, "content": content_text})
+            items.append({"role": role, "content": content_text})
            continue

        if role == "tool":
@@ -419,10 +212,6 @@ def _chat_messages_to_responses_input(messages: List[Dict[str, Any]]) -> List[Di
    return items


-# ---------------------------------------------------------------------------
-# Input preflight / validation
-# ---------------------------------------------------------------------------
-
 def _preflight_codex_input_items(raw_items: Any) -> List[Dict[str, Any]]:
    if not isinstance(raw_items, list):
        raise ValueError("Codex Responses input must be a list of input items.")
@@ -499,95 +288,11 @@ def _preflight_codex_input_items(raw_items: Any) -> List[Dict[str, Any]]:
                normalized.append(reasoning_item)
            continue

-        if item_type == "message":
-            role = item.get("role")
-            if role != "assistant":
-                raise ValueError(f"Codex Responses input[{idx}] message items must have role='assistant'.")
-            content = item.get("content")
-            if not isinstance(content, list):
-                raise ValueError(f"Codex Responses input[{idx}] message item must have content list.")
-            normalized_content = []
-            for part_idx, part in enumerate(content):
-                if not isinstance(part, dict):
-                    raise ValueError(
-                        f"Codex Responses input[{idx}] message content[{part_idx}] must be an object."
-                    )
-                part_type = part.get("type")
-                if part_type not in {"output_text", "text"}:
-                    raise ValueError(
-                        f"Codex Responses input[{idx}] message content[{part_idx}] has unsupported type {part_type!r}."
-                    )
-                text = part.get("text", "")
-                if text is None:
-                    text = ""
-                if not isinstance(text, str):
-                    text = str(text)
-                normalized_content.append({"type": "output_text", "text": text})
-            if not normalized_content:
-                raise ValueError(f"Codex Responses input[{idx}] message item must contain at least one text part.")
-            normalized_item: Dict[str, Any] = {
-                "type": "message",
-                "role": "assistant",
-                "status": _normalize_responses_message_status(item.get("status")),
-                "content": normalized_content,
-            }
-            item_id = item.get("id")
-            if isinstance(item_id, str) and item_id.strip():
-                normalized_item["id"] = item_id.strip()
-            phase = item.get("phase")
-            if isinstance(phase, str) and phase.strip():
-                normalized_item["phase"] = phase.strip()
-            normalized.append(normalized_item)
-            continue
-
        role = item.get("role")
        if role in {"user", "assistant"}:
            content = item.get("content", "")
            if content is None:
                content = ""
-            if isinstance(content, list):
-                # Multimodal content from ``_chat_messages_to_responses_input``
-                # is already in Responses format (``input_text`` / ``output_text``
-                # / ``input_image``).  Validate each part and pass through.
-                # Use the correct text type for the role — ``output_text`` for
-                # assistant messages, ``input_text`` for user messages.
-                text_type = "output_text" if role == "assistant" else "input_text"
-                validated: List[Dict[str, Any]] = []
-                for part_idx, part in enumerate(content):
-                    if isinstance(part, str):
-                        if part:
-                            validated.append({"type": text_type, "text": part})
-                        continue
-                    if not isinstance(part, dict):
-                        raise ValueError(
-                            f"Codex Responses input[{idx}].content[{part_idx}] must be an object or string."
-                        )
-                    ptype = str(part.get("type") or "").strip().lower()
-                    if ptype in {"input_text", "text", "output_text"}:
-                        text = part.get("text", "")
-                        if not isinstance(text, str):
-                            text = str(text or "")
-                        validated.append({"type": text_type, "text": text})
-                    elif ptype in {"input_image", "image_url"}:
-                        image_ref = part.get("image_url", "")
-                        detail = part.get("detail")
-                        if isinstance(image_ref, dict):
-                            url = image_ref.get("url", "")
-                            detail = image_ref.get("detail", detail)
-                        else:
-                            url = image_ref
-                        if not isinstance(url, str):
-                            url = str(url or "")
-                        image_part: Dict[str, Any] = {"type": "input_image", "image_url": url}
-                        if isinstance(detail, str) and detail.strip():
-                            image_part["detail"] = detail.strip()
-                        validated.append(image_part)
-                    else:
-                        raise ValueError(
-                            f"Codex Responses input[{idx}].content[{part_idx}] has unsupported type {part.get('type')!r}."
-                        )
-                normalized.append({"role": role, "content": validated})
-                continue
            if not isinstance(content, str):
                content = str(content)

@@ -744,10 +449,6 @@ def _preflight_codex_api_kwargs(
    return normalized


-# ---------------------------------------------------------------------------
-# Response extraction helpers
-# ---------------------------------------------------------------------------
-
 def _extract_responses_message_text(item: Any) -> str:
    """Extract assistant text from a Responses message output item."""
    content = getattr(item, "content", None)
@@ -782,10 +483,6 @@ def _extract_responses_reasoning_text(item: Any) -> str:
    return ""


-# ---------------------------------------------------------------------------
-# Full response normalization
-# ---------------------------------------------------------------------------
-
 def _normalize_codex_response(response: Any) -> tuple[Any, str]:
    """Normalize a Responses API object to an assistant_message-like object."""
    output = getattr(response, "output", None)
@@ -824,7 +521,6 @@ def _normalize_codex_response(response: Any) -> tuple[Any, str]:
    content_parts: List[str] = []
    reasoning_parts: List[str] = []
    reasoning_items_raw: List[Dict[str, Any]] = []
-    message_items_raw: List[Dict[str, Any]] = []
    tool_calls: List[Any] = []
    has_incomplete_items = response_status in {"queued", "in_progress", "incomplete"}
    saw_commentary_phase = False
@@ -843,7 +539,6 @@ def _normalize_codex_response(response: Any) -> tuple[Any, str]:

        if item_type == "message":
            item_phase = getattr(item, "phase", None)
-            normalized_phase = None
            if isinstance(item_phase, str):
                normalized_phase = item_phase.strip().lower()
                if normalized_phase in {"commentary", "analysis"}:
@@ -853,18 +548,6 @@ def _normalize_codex_response(response: Any) -> tuple[Any, str]:
            message_text = _extract_responses_message_text(item)
            if message_text:
                content_parts.append(message_text)
-                raw_message_item: Dict[str, Any] = {
-                    "type": "message",
-                    "role": "assistant",
-                    "status": _normalize_responses_message_status(item_status),
-                    "content": [{"type": "output_text", "text": message_text}],
-                }
-                item_id = getattr(item, "id", None)
-                if isinstance(item_id, str) and item_id:
-                    raw_message_item["id"] = item_id
-                if normalized_phase:
-                    raw_message_item["phase"] = normalized_phase
-                message_items_raw.append(raw_message_item)
        elif item_type == "reasoning":
            reasoning_text = _extract_responses_reasoning_text(item)
            if reasoning_text:
@@ -939,37 +622,6 @@ def _normalize_codex_response(response: Any) -> tuple[Any, str]:
        if isinstance(out_text, str):
            final_text = out_text.strip()

-    # ── Tool-call leak recovery ──────────────────────────────────
-    # gpt-5.x on the Codex Responses API sometimes degenerates and emits
-    # what should be a structured `function_call` item as plain assistant
-    # text using the Harmony/Codex serialization (``to=functions.foo
-    # {json}`` or ``assistant to=functions.foo {json}``). The model
-    # intended to call a tool, but the intent never made it into
-    # ``response.output`` as a ``function_call`` item, so ``tool_calls``
-    # is empty here. If we pass this through, the parent sees a
-    # confident-looking summary with no audit trail (empty ``tool_trace``)
-    # and no tools actually ran — the Taiwan-embassy-email incident.
-    #
-    # Detection: leaked tokens always contain ``to=functions.<name>`` and
-    # the assistant message has no real tool calls. Treat it as incomplete
-    # so the existing Codex-incomplete continuation path (3 retries,
-    # handled in run_agent.py) gets a chance to re-elicit a proper
-    # ``function_call`` item. The existing loop already handles message
-    # append, dedup, and retry budget.
-    leaked_tool_call_text = False
-    if final_text and not tool_calls and _TOOL_CALL_LEAK_PATTERN.search(final_text):
-        leaked_tool_call_text = True
-        logger.warning(
-            "Codex response contains leaked tool-call text in assistant content "
-            "(no structured function_call items). Treating as incomplete so the "
-            "continuation path can re-elicit a proper tool call. Leaked snippet: %r",
-            final_text[:300],
-        )
-        # Clear the text so downstream code doesn't surface the garbage as
-        # a summary. The encrypted reasoning items (if any) are preserved
-        # so the model keeps its chain-of-thought on the retry.
-        final_text = ""
-
    assistant_message = SimpleNamespace(
        content=final_text,
        tool_calls=tool_calls,
@@ -977,13 +629,10 @@ def _normalize_codex_response(response: Any) -> tuple[Any, str]:
        reasoning_content=None,
        reasoning_details=None,
        codex_reasoning_items=reasoning_items_raw or None,
-        codex_message_items=message_items_raw or None,
    )

    if tool_calls:
        finish_reason = "tool_calls"
-    elif leaked_tool_call_text:
-        finish_reason = "incomplete"
    elif has_incomplete_items or (saw_commentary_phase and not saw_final_answer_phase):
        finish_reason = "incomplete"
    elif reasoning_items_raw and not final_text:
@@ -997,3 +646,5 @@ def _normalize_codex_response(response: Any) -> tuple[Any, str]:
    else:
        finish_reason = "stop"
    return assistant_message, finish_reason
+
+
@@ -31,7 +31,6 @@ from agent.model_metadata import (
    get_model_context_length,
    estimate_messages_tokens_rough,
 )
-from agent.redact import redact_sensitive_text

 logger = logging.getLogger(__name__)

@@ -64,47 +63,6 @@ _CHARS_PER_TOKEN = 4
 _SUMMARY_FAILURE_COOLDOWN_SECONDS = 600


-def _content_text_for_contains(content: Any) -> str:
-    """Return a best-effort text view of message content.
-
-    Used only for substring checks when we need to know whether we've already
-    appended a note to a message. Keeps multimodal lists intact elsewhere.
-    """
-    if content is None:
-        return ""
-    if isinstance(content, str):
-        return content
-    if isinstance(content, list):
-        parts: list[str] = []
-        for item in content:
-            if isinstance(item, str):
-                parts.append(item)
-            elif isinstance(item, dict):
-                text = item.get("text")
-                if isinstance(text, str):
-                    parts.append(text)
-        return "\n".join(part for part in parts if part)
-    return str(content)
-
-
-def _append_text_to_content(content: Any, text: str, *, prepend: bool = False) -> Any:
-    """Append or prepend plain text to message content safely.
-
-    Compression sometimes needs to add a note or merge a summary into an
-    existing message. Message content may be plain text or a multimodal list of
-    blocks, so direct string concatenation is not always safe.
-    """
-    if content is None:
-        return text
-    if isinstance(content, str):
-        return text + content if prepend else content + text
-    if isinstance(content, list):
-        text_block = {"type": "text", "text": text}
-        return [text_block, *content] if prepend else [*content, text_block]
-    rendered = str(content)
-    return text + rendered if prepend else rendered + text
-
-
 def _truncate_tool_call_args_json(args: str, head_chars: int = 200) -> str:
    """Shrink long string values inside a tool-call arguments JSON blob while
    preserving JSON validity.
@@ -294,7 +252,6 @@ class ContextCompressor(ContextEngine):
        self._context_probed = False
        self._context_probe_persistable = False
        self._previous_summary = None
-        self._last_summary_error = None
        self._last_compression_savings_pct = 100.0
        self._ineffective_compression_count = 0

@@ -318,13 +275,6 @@ class ContextCompressor(ContextEngine):
            int(context_length * self.threshold_percent),
            MINIMUM_CONTEXT_LENGTH,
        )
-        # Recalculate token budgets for the new context length so the
-        # compressor stays calibrated after a model switch (e.g. 200K → 32K).
-        target_tokens = int(self.threshold_tokens * self.summary_target_ratio)
-        self.tail_token_budget = target_tokens
-        self.max_summary_tokens = min(
-            int(context_length * 0.05), _SUMMARY_TOKENS_CEILING,
-        )

    def __init__(
        self,
@@ -397,7 +347,6 @@ class ContextCompressor(ContextEngine):
        self._last_compression_savings_pct: float = 100.0
        self._ineffective_compression_count: int = 0
        self._summary_failure_cooldown_until: float = 0.0
-        self._last_summary_error: Optional[str] = None

    def update_from_response(self, usage: Dict[str, Any]):
        """Update tracked token usage from API response."""
@@ -601,15 +550,11 @@ class ContextCompressor(ContextEngine):
        Includes tool call arguments and result content (up to
        ``_CONTENT_MAX`` chars per message) so the summarizer can preserve
        specific details like file paths, commands, and outputs.
-
-        All content is redacted before serialization to prevent secrets
-        (API keys, tokens, passwords) from leaking into the summary that
-        gets sent to the auxiliary model and persisted across compactions.
        """
        parts = []
        for msg in turns:
            role = msg.get("role", "unknown")
-            content = redact_sensitive_text(msg.get("content") or "")
+            content = msg.get("content") or ""

            # Tool results: keep enough content for the summarizer
            if role == "tool":
@@ -630,7 +575,7 @@ class ContextCompressor(ContextEngine):
                        if isinstance(tc, dict):
                            fn = tc.get("function", {})
                            name = fn.get("name", "?")
-                            args = redact_sensitive_text(fn.get("arguments", ""))
+                            args = fn.get("arguments", "")
                            # Truncate long arguments but keep enough for context
                            if len(args) > self._TOOL_ARGS_MAX:
                                args = args[:self._TOOL_ARGS_HEAD] + "..."
@@ -690,11 +635,7 @@ class ContextCompressor(ContextEngine):
            "only output the structured summary. "
            "Do NOT include any preamble, greeting, or prefix. "
            "Write the summary in the same language the user was using in the "
-            "conversation — do not translate or switch to English. "
-            "NEVER include API keys, tokens, passwords, secrets, credentials, "
-            "or connection strings in the summary — replace any that appear "
-            "with [REDACTED]. Note that the user had credentials present, but "
-            "do not preserve their values."
+            "conversation — do not translate or switch to English."
        )

        # Shared structured template (used by both paths).
@@ -751,7 +692,7 @@ Be specific with file paths, commands, line numbers, and results.]
 [What remains to be done — framed as context, not instructions]

 ## Critical Context
-[Any specific values, error messages, configuration details, or data that would be lost without explicit preservation. NEVER include API keys, tokens, passwords, or credentials — write [REDACTED] instead.]
+[Any specific values, error messages, configuration details, or data that would be lost without explicit preservation]

 Target ~{summary_budget} tokens. Be CONCRETE — include file paths, command outputs, error messages, line numbers, and specific values. Avoid vague descriptions like "made some changes" — say exactly what changed.

@@ -791,7 +732,7 @@ Use this exact structure:
            prompt += f"""

 FOCUS TOPIC: "{focus_topic}"
-The user has requested that this compaction PRIORITISE preserving all information related to the focus topic above. For content related to "{focus_topic}", include full detail — exact values, file paths, command outputs, error messages, and decisions. For content NOT related to the focus topic, summarise more aggressively (brief one-liners or omit if truly irrelevant). The focus topic sections should receive roughly 60-70% of the summary token budget. Even for the focus topic, NEVER preserve API keys, tokens, passwords, or credentials — use [REDACTED]."""
+The user has requested that this compaction PRIORITISE preserving all information related to the focus topic above. For content related to "{focus_topic}", include full detail — exact values, file paths, command outputs, error messages, and decisions. For content NOT related to the focus topic, summarise more aggressively (brief one-liners or omit if truly irrelevant). The focus topic sections should receive roughly 60-70% of the summary token budget."""

        try:
            call_kwargs = {
@@ -814,19 +755,15 @@ The user has requested that this compaction PRIORITISE preserving all informatio
            # Handle cases where content is not a string (e.g., dict from llama.cpp)
            if not isinstance(content, str):
                content = str(content) if content else ""
-            # Redact the summary output as well — the summarizer LLM may
-            # ignore prompt instructions and echo back secrets verbatim.
-            summary = redact_sensitive_text(content.strip())
+            summary = content.strip()
            # Store for iterative updates on next compaction
            self._previous_summary = summary
            self._summary_failure_cooldown_until = 0.0
            self._summary_model_fallen_back = False
-            self._last_summary_error = None
            return self._with_summary_prefix(summary)
        except RuntimeError:
            # No provider configured — long cooldown, unlikely to self-resolve
            self._summary_failure_cooldown_until = time.monotonic() + _SUMMARY_FAILURE_COOLDOWN_SECONDS
-            self._last_summary_error = "no auxiliary LLM provider configured"
            logging.warning("Context compression: no provider available for "
                            "summary. Middle turns will be dropped without summary "
                            "for %d seconds.",
@@ -859,15 +796,11 @@ The user has requested that this compaction PRIORITISE preserving all informatio
                )
                self.summary_model = ""  # empty = use main model
                self._summary_failure_cooldown_until = 0.0  # no cooldown
-                return self._generate_summary(turns_to_summarize, focus_topic=focus_topic)  # retry immediately
+                return self._generate_summary(messages, summary_budget)  # retry immediately

            # Transient errors (timeout, rate limit, network) — shorter cooldown
            _transient_cooldown = 60
            self._summary_failure_cooldown_until = time.monotonic() + _transient_cooldown
-            err_text = str(e).strip() or e.__class__.__name__
-            if len(err_text) > 220:
-                err_text = err_text[:217].rstrip() + "..."
-            self._last_summary_error = err_text
            logging.warning(
                "Failed to generate context summary: %s. "
                "Further summary attempts paused for %d seconds.",
@@ -1114,21 +1047,6 @@ The user has requested that this compaction PRIORITISE preserving all informatio

        return max(cut_idx, head_end + 1)

-    # ------------------------------------------------------------------
-    # ContextEngine: manual /compress preflight
-    # ------------------------------------------------------------------
-
-    def has_content_to_compress(self, messages: List[Dict[str, Any]]) -> bool:
-        """Return True if there is a non-empty middle region to compact.
-
-        Overrides the ABC default so the gateway ``/compress`` guard can
-        skip the LLM call when the transcript is still entirely inside
-        the protected head/tail.
-        """
-        compress_start = self._align_boundary_forward(messages, self.protect_first_n)
-        compress_end = self._find_tail_cut_by_tokens(messages, compress_start)
-        return compress_start < compress_end
-
    # ------------------------------------------------------------------
    # Main compression entry point
    # ------------------------------------------------------------------
@@ -1215,13 +1133,10 @@ The user has requested that this compaction PRIORITISE preserving all informatio
        for i in range(compress_start):
            msg = messages[i].copy()
            if i == 0 and msg.get("role") == "system":
-                existing = msg.get("content")
+                existing = msg.get("content") or ""
                _compression_note = "[Note: Some earlier conversation turns have been compacted into a handoff summary to preserve context space. The current session state may still reflect earlier work, so build on that summary and state rather than re-doing work.]"
-                if _compression_note not in _content_text_for_contains(existing):
-                    msg["content"] = _append_text_to_content(
-                        existing,
-                        "\n\n" + _compression_note if isinstance(existing, str) and existing else _compression_note,
-                    )
+                if _compression_note not in existing:
+                    msg["content"] = existing + "\n\n" + _compression_note
            compressed.append(msg)

        # If LLM summary failed, insert a static fallback so the model
@@ -1265,15 +1180,12 @@ The user has requested that this compaction PRIORITISE preserving all informatio
        for i in range(compress_end, n_messages):
            msg = messages[i].copy()
            if _merge_summary_into_tail and i == compress_end:
-                merged_prefix = (
+                original = msg.get("content") or ""
+                msg["content"] = (
                    summary
                    + "\n\n--- END OF CONTEXT SUMMARY — "
                    "respond to the message below, not the summary above ---\n\n"
-                )
-                msg["content"] = _append_text_to_content(
-                    msg.get("content"),
-                    merged_prefix,
-                    prepend=True,
+                    + original
                )
                _merge_summary_into_tail = False
            compressed.append(msg)
@@ -78,7 +78,6 @@ class ContextEngine(ABC):
        self,
        messages: List[Dict[str, Any]],
        current_tokens: int = None,
-        focus_topic: str = None,
    ) -> List[Dict[str, Any]]:
        """Compact the message list and return the new message list.

@@ -87,12 +86,6 @@ class ContextEngine(ABC):
        context budget. The implementation is free to summarize, build a
        DAG, or do anything else — as long as the returned list is a valid
        OpenAI-format message sequence.
-
-        Args:
-            focus_topic: Optional topic string from manual ``/compress <focus>``.
-                Engines that support guided compression should prioritise
-                preserving information related to this topic.  Engines that
-                don't support it may simply ignore this argument.
        """

    # -- Optional: pre-flight check ----------------------------------------
@@ -105,21 +98,6 @@ class ContextEngine(ABC):
        """
        return False

-    # -- Optional: manual /compress preflight ------------------------------
-
-    def has_content_to_compress(self, messages: List[Dict[str, Any]]) -> bool:
-        """Quick check: is there anything in ``messages`` that can be compacted?
-
-        Used by the gateway ``/compress`` command as a preflight guard —
-        returning False lets the gateway report "nothing to compress yet"
-        without making an LLM call.
-
-        Default returns True (always attempt).  Engines with a cheap way
-        to introspect their own head/tail boundaries should override this
-        to return False when the transcript is still entirely protected.
-        """
-        return True
-
    # -- Optional: session lifecycle ---------------------------------------

    def on_session_start(self, session_id: str, **kwargs) -> None:
@@ -21,9 +21,6 @@ from pathlib import Path
 from types import SimpleNamespace
 from typing import Any

-from agent.file_safety import get_read_block_error, is_write_denied
-from agent.redact import redact_sensitive_text
-
 ACP_MARKER_BASE_URL = "acp://copilot"
 _DEFAULT_TIMEOUT_SECONDS = 900.0

@@ -46,47 +43,6 @@ def _resolve_args() -> list[str]:
    return shlex.split(raw)


-def _resolve_home_dir() -> str:
-    """Return a stable HOME for child ACP processes."""
-
-    try:
-        from hermes_constants import get_subprocess_home
-
-        profile_home = get_subprocess_home()
-        if profile_home:
-            return profile_home
-    except Exception:
-        pass
-
-    home = os.environ.get("HOME", "").strip()
-    if home:
-        return home
-
-    expanded = os.path.expanduser("~")
-    if expanded and expanded != "~":
-        return expanded
-
-    try:
-        import pwd
-
-        resolved = pwd.getpwuid(os.getuid()).pw_dir.strip()
-        if resolved:
-            return resolved
-    except Exception:
-        pass
-
-    # Last resort: /tmp (writable on any POSIX system). Avoids crashing the
-    # subprocess with no HOME; callers can set HERMES_HOME explicitly if they
-    # need a different writable dir.
-    return "/tmp"
-
-
-def _build_subprocess_env() -> dict[str, str]:
-    env = os.environ.copy()
-    env["HOME"] = _resolve_home_dir()
-    return env
-
-
 def _jsonrpc_error(message_id: Any, code: int, message: str) -> dict[str, Any]:
    return {
        "jsonrpc": "2.0",
@@ -98,18 +54,6 @@ def _jsonrpc_error(message_id: Any, code: int, message: str) -> dict[str, Any]:
    }


-def _permission_denied(message_id: Any) -> dict[str, Any]:
-    return {
-        "jsonrpc": "2.0",
-        "id": message_id,
-        "result": {
-            "outcome": {
-                "outcome": "cancelled",
-            }
-        },
-    }
-
-
 def _format_messages_as_prompt(
    messages: list[dict[str, Any]],
    model: str | None = None,
@@ -423,7 +367,6 @@ class CopilotACPClient:
                text=True,
                bufsize=1,
                cwd=self._acp_cwd,
-                env=_build_subprocess_env(),
            )
        except FileNotFoundError as exc:
            raise RuntimeError(
@@ -443,8 +386,6 @@ class CopilotACPClient:
        stderr_tail: deque[str] = deque(maxlen=40)

        def _stdout_reader() -> None:
-            if proc.stdout is None:
-                return
            for line in proc.stdout:
                try:
                    inbox.put(json.loads(line))
@@ -592,13 +533,18 @@ class CopilotACPClient:
        params = msg.get("params") or {}

        if method == "session/request_permission":
-            response = _permission_denied(message_id)
+            response = {
+                "jsonrpc": "2.0",
+                "id": message_id,
+                "result": {
+                    "outcome": {
+                        "outcome": "allow_once",
+                    }
+                },
+            }
        elif method == "fs/read_text_file":
            try:
                path = _ensure_path_within_cwd(str(params.get("path") or ""), cwd)
-                block_error = get_read_block_error(str(path))
-                if block_error:
-                    raise PermissionError(block_error)
                content = path.read_text() if path.exists() else ""
                line = params.get("line")
                limit = params.get("limit")
@@ -607,8 +553,6 @@ class CopilotACPClient:
                    start = line - 1
                    end = start + limit if isinstance(limit, int) and limit > 0 else None
                    content = "".join(lines[start:end])
-                if content:
-                    content = redact_sensitive_text(content)
                response = {
                    "jsonrpc": "2.0",
                    "id": message_id,
@@ -621,10 +565,6 @@ class CopilotACPClient:
        elif method == "fs/write_text_file":
            try:
                path = _ensure_path_within_cwd(str(params.get("path") or ""), cwd)
-                if is_write_denied(str(path)):
-                    raise PermissionError(
-                        f"Write denied: '{path}' is a protected system/credential file."
-                    )
                path.parent.mkdir(parents=True, exist_ok=True)
                path.write_text(str(params.get("content") or ""))
                response = {
@@ -14,7 +14,6 @@ from datetime import datetime
 from typing import Any, Dict, List, Optional, Set, Tuple

 from hermes_constants import OPENROUTER_BASE_URL
-from hermes_cli.config import get_env_value
 import hermes_cli.auth as auth_mod
 from hermes_cli.auth import (
    CODEX_ACCESS_TOKEN_REFRESH_SKEW_SECONDS,
@@ -456,61 +455,6 @@ class CredentialPool:
            logger.debug("Failed to sync from credentials file: %s", exc)
        return entry

-    def _sync_nous_entry_from_auth_store(self, entry: PooledCredential) -> PooledCredential:
-        """Sync a Nous pool entry from auth.json if tokens differ.
-
-        Nous OAuth refresh tokens are single-use.  When another process
-        (e.g. a concurrent cron) refreshes the token via
-        ``resolve_nous_runtime_credentials``, it writes fresh tokens to
-        auth.json under ``_auth_store_lock``.  The pool entry's tokens
-        become stale.  This method detects that and adopts the newer pair,
-        avoiding a "refresh token reuse" revocation on the Nous Portal.
-        """
-        if self.provider != "nous" or entry.source != "device_code":
-            return entry
-        try:
-            with _auth_store_lock():
-                auth_store = _load_auth_store()
-                state = _load_provider_state(auth_store, "nous")
-            if not state:
-                return entry
-            store_refresh = state.get("refresh_token", "")
-            store_access = state.get("access_token", "")
-            if store_refresh and store_refresh != entry.refresh_token:
-                logger.debug(
-                    "Pool entry %s: syncing tokens from auth.json (Nous refresh token changed)",
-                    entry.id,
-                )
-                field_updates: Dict[str, Any] = {
-                    "access_token": store_access,
-                    "refresh_token": store_refresh,
-                    "last_status": None,
-                    "last_status_at": None,
-                    "last_error_code": None,
-                }
-                if state.get("expires_at"):
-                    field_updates["expires_at"] = state["expires_at"]
-                if state.get("agent_key"):
-                    field_updates["agent_key"] = state["agent_key"]
-                if state.get("agent_key_expires_at"):
-                    field_updates["agent_key_expires_at"] = state["agent_key_expires_at"]
-                if state.get("inference_base_url"):
-                    field_updates["inference_base_url"] = state["inference_base_url"]
-                extra_updates = dict(entry.extra)
-                for extra_key in ("obtained_at", "expires_in", "agent_key_id",
-                                  "agent_key_expires_in", "agent_key_reused",
-                                  "agent_key_obtained_at"):
-                    val = state.get(extra_key)
-                    if val is not None:
-                        extra_updates[extra_key] = val
-                updated = replace(entry, extra=extra_updates, **field_updates)
-                self._replace_entry(entry, updated)
-                self._persist()
-                return updated
-        except Exception as exc:
-            logger.debug("Failed to sync Nous entry from auth.json: %s", exc)
-        return entry
-
    def _sync_device_code_entry_to_auth_store(self, entry: PooledCredential) -> None:
        """Write refreshed pool entry tokens back to auth.json providers.

@@ -617,9 +561,6 @@ class CredentialPool:
                    last_refresh=refreshed.get("last_refresh"),
                )
            elif self.provider == "nous":
-                synced = self._sync_nous_entry_from_auth_store(entry)
-                if synced is not entry:
-                    entry = synced
                nous_state = {
                    "access_token": entry.access_token,
                    "refresh_token": entry.refresh_token,
@@ -694,26 +635,6 @@ class CredentialPool:
                    # Credentials file had a valid (non-expired) token — use it directly
                    logger.debug("Credentials file has valid token, using without refresh")
                    return synced
-            # For nous: another process may have consumed the refresh token
-            # between our proactive sync and the HTTP call.  Re-sync from
-            # auth.json and adopt the fresh tokens if available.
-            if self.provider == "nous":
-                synced = self._sync_nous_entry_from_auth_store(entry)
-                if synced.refresh_token != entry.refresh_token:
-                    logger.debug("Nous refresh failed but auth.json has newer tokens — adopting")
-                    updated = replace(
-                        synced,
-                        last_status=STATUS_OK,
-                        last_status_at=None,
-                        last_error_code=None,
-                        last_error_reason=None,
-                        last_error_message=None,
-                        last_error_reset_at=None,
-                    )
-                    self._replace_entry(synced, updated)
-                    self._persist()
-                    self._sync_device_code_entry_to_auth_store(updated)
-                    return updated
            self._mark_exhausted(entry, None)
            return None

@@ -777,17 +698,6 @@ class CredentialPool:
                if synced is not entry:
                    entry = synced
                    cleared_any = True
-            # For nous entries, sync from auth.json before status checks.
-            # Another process may have successfully refreshed via
-            # resolve_nous_runtime_credentials(), making this entry's
-            # exhausted status stale.
-            if (self.provider == "nous"
-                    and entry.source == "device_code"
-                    and entry.last_status == STATUS_EXHAUSTED):
-                synced = self._sync_nous_entry_from_auth_store(entry)
-                if synced is not entry:
-                    entry = synced
-                    cleared_any = True
            if entry.last_status == STATUS_EXHAUSTED:
                exhausted_until = _exhausted_until(entry)
                if exhausted_until is not None and now < exhausted_until:
@@ -829,11 +739,8 @@ class CredentialPool:

        if self._strategy == STRATEGY_LEAST_USED and len(available) > 1:
            entry = min(available, key=lambda e: e.request_count)
-            # Increment usage counter so subsequent selections distribute load
-            updated = replace(entry, request_count=entry.request_count + 1)
-            self._replace_entry(entry, updated)
            self._current_id = entry.id
-            return updated
+            return entry

        if self._strategy == STRATEGY_ROUND_ROBIN and len(available) > 1:
            entry = available[0]
@@ -1076,14 +983,6 @@ def _seed_from_singletons(provider: str, entries: List[PooledCredential]) -> Tup
    active_sources: Set[str] = set()
    auth_store = _load_auth_store()

-    # Shared suppression gate — used at every upsert site so
-    # `hermes auth remove <provider> <N>` is stable across all source types.
-    try:
-        from hermes_cli.auth import is_source_suppressed as _is_suppressed
-    except ImportError:
-        def _is_suppressed(_p, _s):  # type: ignore[misc]
-            return False
-
    if provider == "anthropic":
        # Only auto-discover external credentials (Claude Code, Hermes PKCE)
        # when the user has explicitly configured anthropic as their provider.
@@ -1103,8 +1002,13 @@ def _seed_from_singletons(provider: str, entries: List[PooledCredential]) -> Tup
            ("claude_code", read_claude_code_credentials()),
        ):
            if creds and creds.get("accessToken"):
-                if _is_suppressed(provider, source_name):
-                    continue
+                # Check if user explicitly removed this source
+                try:
+                    from hermes_cli.auth import is_source_suppressed
+                    if is_source_suppressed(provider, source_name):
+                        continue
+                except ImportError:
+                    pass
                active_sources.add(source_name)
                changed |= _upsert_entry(
                    entries,
@@ -1122,7 +1026,7 @@ def _seed_from_singletons(provider: str, entries: List[PooledCredential]) -> Tup

    elif provider == "nous":
        state = _load_provider_state(auth_store, "nous")
-        if state and not _is_suppressed(provider, "device_code"):
+        if state:
            active_sources.add("device_code")
            # Prefer a user-supplied label embedded in the singleton state
            # (set by persist_nous_credentials(label=...) when the user ran
@@ -1149,18 +1053,6 @@ def _seed_from_singletons(provider: str, entries: List[PooledCredential]) -> Tup
                    "inference_base_url": state.get("inference_base_url"),
                    "agent_key": state.get("agent_key"),
                    "agent_key_expires_at": state.get("agent_key_expires_at"),
-                    # Carry the mint/refresh timestamps into the pool so
-                    # freshness-sensitive consumers (self-heal hooks, pool
-                    # pruning by age) can distinguish just-minted credentials
-                    # from stale ones.  Without these, fresh device_code
-                    # entries get obtained_at=None and look older than they
-                    # are (#15099).
-                    "obtained_at": state.get("obtained_at"),
-                    "expires_in": state.get("expires_in"),
-                    "agent_key_id": state.get("agent_key_id"),
-                    "agent_key_expires_in": state.get("agent_key_expires_in"),
-                    "agent_key_reused": state.get("agent_key_reused"),
-                    "agent_key_obtained_at": state.get("agent_key_obtained_at"),
                    "tls": state.get("tls") if isinstance(state.get("tls"), dict) else None,
                    "label": seeded_label,
                },
@@ -1171,26 +1063,24 @@ def _seed_from_singletons(provider: str, entries: List[PooledCredential]) -> Tup
        # env vars (COPILOT_GITHUB_TOKEN / GH_TOKEN).  They don't live in
        # the auth store or credential pool, so we resolve them here.
        try:
-            from hermes_cli.copilot_auth import resolve_copilot_token, get_copilot_api_token
+            from hermes_cli.copilot_auth import resolve_copilot_token
            token, source = resolve_copilot_token()
            if token:
-                api_token = get_copilot_api_token(token)
                source_name = "gh_cli" if "gh" in source.lower() else f"env:{source}"
-                if not _is_suppressed(provider, source_name):
-                    active_sources.add(source_name)
-                    pconfig = PROVIDER_REGISTRY.get(provider)
-                    changed |= _upsert_entry(
-                        entries,
-                        provider,
-                        source_name,
-                        {
-                            "source": source_name,
-                            "auth_type": AUTH_TYPE_API_KEY,
-                            "access_token": api_token,
-                            "base_url": pconfig.inference_base_url if pconfig else "",
-                            "label": source,
-                        },
-                    )
+                active_sources.add(source_name)
+                pconfig = PROVIDER_REGISTRY.get(provider)
+                changed |= _upsert_entry(
+                    entries,
+                    provider,
+                    source_name,
+                    {
+                        "source": source_name,
+                        "auth_type": AUTH_TYPE_API_KEY,
+                        "access_token": token,
+                        "base_url": pconfig.inference_base_url if pconfig else "",
+                        "label": source,
+                    },
+                )
        except Exception as exc:
            logger.debug("Copilot token seed failed: %s", exc)

@@ -1206,21 +1096,20 @@ def _seed_from_singletons(provider: str, entries: List[PooledCredential]) -> Tup
            token = creds.get("api_key", "")
            if token:
                source_name = creds.get("source", "qwen-cli")
-                if not _is_suppressed(provider, source_name):
-                    active_sources.add(source_name)
-                    changed |= _upsert_entry(
-                        entries,
-                        provider,
-                        source_name,
-                        {
-                            "source": source_name,
-                            "auth_type": AUTH_TYPE_OAUTH,
-                            "access_token": token,
-                            "expires_at_ms": creds.get("expires_at_ms"),
-                            "base_url": creds.get("base_url", ""),
-                            "label": creds.get("auth_file", source_name),
-                        },
-                    )
+                active_sources.add(source_name)
+                changed |= _upsert_entry(
+                    entries,
+                    provider,
+                    source_name,
+                    {
+                        "source": source_name,
+                        "auth_type": AUTH_TYPE_OAUTH,
+                        "access_token": token,
+                        "expires_at_ms": creds.get("expires_at_ms"),
+                        "base_url": creds.get("base_url", ""),
+                        "label": creds.get("auth_file", source_name),
+                    },
+                )
        except Exception as exc:
            logger.debug("Qwen OAuth token seed failed: %s", exc)

@@ -1229,7 +1118,13 @@ def _seed_from_singletons(provider: str, entries: List[PooledCredential]) -> Tup
        # the device_code source as suppressed so it won't be re-seeded from
        # the Hermes auth store.  Without this gate the removal is instantly
        # undone on the next load_pool() call.
-        if _is_suppressed(provider, "device_code"):
+        codex_suppressed = False
+        try:
+            from hermes_cli.auth import is_source_suppressed
+            codex_suppressed = is_source_suppressed(provider, "device_code")
+        except ImportError:
+            pass
+        if codex_suppressed:
            return changed, active_sources

        state = _load_provider_state(auth_store, "openai-codex")
@@ -1263,23 +1158,10 @@ def _seed_from_singletons(provider: str, entries: List[PooledCredential]) -> Tup
 def _seed_from_env(provider: str, entries: List[PooledCredential]) -> Tuple[bool, Set[str]]:
    changed = False
    active_sources: Set[str] = set()
-    # Honour user suppression — `hermes auth remove <provider> <N>` for an
-    # env-seeded credential marks the env:<VAR> source as suppressed so it
-    # won't be re-seeded from the user's shell environment or ~/.hermes/.env.
-    # Without this gate the removal is silently undone on the next
-    # load_pool() call whenever the var is still exported by the shell.
-    try:
-        from hermes_cli.auth import is_source_suppressed as _is_source_suppressed
-    except ImportError:
-        def _is_source_suppressed(_p, _s):  # type: ignore[misc]
-            return False
    if provider == "openrouter":
-        # Check both os.environ and ~/.hermes/.env file
-        token = (get_env_value("OPENROUTER_API_KEY") or "").strip()
+        token = os.getenv("OPENROUTER_API_KEY", "").strip()
        if token:
            source = "env:OPENROUTER_API_KEY"
-            if _is_source_suppressed(provider, source):
-                return changed, active_sources
            active_sources.add(source)
            changed |= _upsert_entry(
                entries,
@@ -1301,7 +1183,7 @@ def _seed_from_env(provider: str, entries: List[PooledCredential]) -> Tuple[bool

    env_url = ""
    if pconfig.base_url_env_var:
-        env_url = (get_env_value(pconfig.base_url_env_var) or "").strip().rstrip("/")
+        env_url = os.getenv(pconfig.base_url_env_var, "").strip().rstrip("/")

    env_vars = list(pconfig.api_key_env_vars)
    if provider == "anthropic":
@@ -1312,13 +1194,10 @@ def _seed_from_env(provider: str, entries: List[PooledCredential]) -> Tuple[bool
        ]

    for env_var in env_vars:
-        # Check both os.environ and ~/.hermes/.env file
-        token = (get_env_value(env_var) or "").strip()
+        token = os.getenv(env_var, "").strip()
        if not token:
            continue
        source = f"env:{env_var}"
-        if _is_source_suppressed(provider, source):
-            continue
        active_sources.add(source)
        auth_type = AUTH_TYPE_OAUTH if provider == "anthropic" and not token.startswith("sk-ant-api") else AUTH_TYPE_API_KEY
        base_url = env_url or pconfig.inference_base_url
@@ -1363,13 +1242,6 @@ def _seed_custom_pool(pool_key: str, entries: List[PooledCredential]) -> Tuple[b
    changed = False
    active_sources: Set[str] = set()

-    # Shared suppression gate — same pattern as _seed_from_env/_seed_from_singletons.
-    try:
-        from hermes_cli.auth import is_source_suppressed as _is_suppressed
-    except ImportError:
-        def _is_suppressed(_p, _s):  # type: ignore[misc]
-            return False
-
    # Seed from the custom_providers config entry's api_key field
    cp_config = _get_custom_provider_config(pool_key)
    if cp_config:
@@ -1378,20 +1250,19 @@ def _seed_custom_pool(pool_key: str, entries: List[PooledCredential]) -> Tuple[b
        name = str(cp_config.get("name") or "").strip()
        if api_key:
            source = f"config:{name}"
-            if not _is_suppressed(pool_key, source):
-                active_sources.add(source)
-                changed |= _upsert_entry(
-                    entries,
-                    pool_key,
-                    source,
-                    {
-                        "source": source,
-                        "auth_type": AUTH_TYPE_API_KEY,
-                        "access_token": api_key,
-                        "base_url": base_url,
-                        "label": name or source,
-                    },
-                )
+            active_sources.add(source)
+            changed |= _upsert_entry(
+                entries,
+                pool_key,
+                source,
+                {
+                    "source": source,
+                    "auth_type": AUTH_TYPE_API_KEY,
+                    "access_token": api_key,
+                    "base_url": base_url,
+                    "label": name or source,
+                },
+            )

    # Seed from model.api_key if model.provider=='custom' and model.base_url matches
    try:
@@ -1411,20 +1282,19 @@ def _seed_custom_pool(pool_key: str, entries: List[PooledCredential]) -> Tuple[b
                matched_key = get_custom_provider_pool_key(model_base_url)
                if matched_key == pool_key:
                    source = "model_config"
-                    if not _is_suppressed(pool_key, source):
-                        active_sources.add(source)
-                        changed |= _upsert_entry(
-                            entries,
-                            pool_key,
-                            source,
-                            {
-                                "source": source,
-                                "auth_type": AUTH_TYPE_API_KEY,
-                                "access_token": model_api_key,
-                                "base_url": model_base_url,
-                                "label": "model_config",
-                            },
-                        )
+                    active_sources.add(source)
+                    changed |= _upsert_entry(
+                        entries,
+                        pool_key,
+                        source,
+                        {
+                            "source": source,
+                            "auth_type": AUTH_TYPE_API_KEY,
+                            "access_token": model_api_key,
+                            "base_url": model_base_url,
+                            "label": "model_config",
+                        },
+                    )
    except Exception:
        pass

@@ -1,401 +0,0 @@
-"""Unified removal contract for every credential source Hermes reads from.
-
-Hermes seeds its credential pool from many places:
-
-    env:<VAR>     — os.environ / ~/.hermes/.env
-    claude_code   — ~/.claude/.credentials.json
-    hermes_pkce   — ~/.hermes/.anthropic_oauth.json
-    device_code   — auth.json providers.<provider> (nous, openai-codex, ...)
-    qwen-cli      — ~/.qwen/oauth_creds.json
-    gh_cli        — gh auth token
-    config:<name> — custom_providers config entry
-    model_config  — model.api_key when model.provider == "custom"
-    manual        — user ran `hermes auth add`
-
-Each source has its own reader inside ``agent.credential_pool._seed_from_*``
-(which keep their existing shape — we haven't restructured them).  What we
-unify here is **removal**:
-
-    ``hermes auth remove <provider> <N>`` must make the pool entry stay gone.
-
-Before this module, every source had an ad-hoc removal branch in
-``auth_remove_command``, and several sources had no branch at all — so
-``auth remove`` silently reverted on the next ``load_pool()`` call for
-qwen-cli, nous device_code (partial), hermes_pkce, copilot gh_cli, and
-custom-config sources.
-
-Now every source registers a ``RemovalStep`` that does exactly three things
-in the same shape:
-
-    1. Clean up whatever externally-readable state the source reads from
-       (.env line, auth.json block, OAuth file, etc.)
-    2. Suppress the ``(provider, source_id)`` in auth.json so the
-       corresponding ``_seed_from_*`` branch skips the upsert on re-load
-    3. Return ``RemovalResult`` describing what was cleaned and any
-       diagnostic hints the user should see (shell-exported env vars,
-       external credential files we deliberately don't delete, etc.)
-
-Adding a new credential source is:
-    - wire up a reader branch in ``_seed_from_*`` (existing pattern)
-    - gate that reader behind ``is_source_suppressed(provider, source_id)``
-    - register a ``RemovalStep`` here
-
-No more per-source if/elif chain in ``auth_remove_command``.
-"""
-
-from __future__ import annotations
-
-import os
-from dataclasses import dataclass, field
-from pathlib import Path
-from typing import Callable, List, Optional
-
-
-@dataclass
-class RemovalResult:
-    """Outcome of removing a credential source.
-
-    Attributes:
-        cleaned: Short strings describing external state that was actually
-            mutated (``"Cleared XAI_API_KEY from .env"``,
-            ``"Cleared openai-codex OAuth tokens from auth store"``).
-            Printed as plain lines to the user.
-        hints: Diagnostic lines ABOUT state the user may need to clean up
-            themselves or is deliberately left intact (shell-exported env
-            var, Claude Code credential file we don't delete, etc.).
-            Printed as plain lines to the user.  Always non-destructive.
-        suppress: Whether to call ``suppress_credential_source`` after
-            cleanup so future ``load_pool`` calls skip this source.
-            Default True — almost every source needs this to stay sticky.
-            The only legitimate False is ``manual`` entries, which aren't
-            seeded from anywhere external.
-    """
-
-    cleaned: List[str] = field(default_factory=list)
-    hints: List[str] = field(default_factory=list)
-    suppress: bool = True
-
-
-@dataclass
-class RemovalStep:
-    """How to remove one specific credential source cleanly.
-
-    Attributes:
-        provider: Provider pool key (``"xai"``, ``"anthropic"``, ``"nous"``, ...).
-            Special value ``"*"`` means "matches any provider" — used for
-            sources like ``manual`` that aren't provider-specific.
-        source_id: Source identifier as it appears in
-            ``PooledCredential.source``.  May be a literal (``"claude_code"``)
-            or a prefix pattern matched via ``match_fn``.
-        match_fn: Optional predicate overriding literal ``source_id``
-            matching.  Gets the removed entry's source string.  Used for
-            ``env:*`` (any env-seeded key), ``config:*`` (any custom
-            pool), and ``manual:*`` (any manual-source variant).
-        remove_fn: ``(provider, removed_entry) -> RemovalResult``.  Does the
-            actual cleanup and returns what happened for the user.
-        description: One-line human-readable description for docs / tests.
-    """
-
-    provider: str
-    source_id: str
-    remove_fn: Callable[..., RemovalResult]
-    match_fn: Optional[Callable[[str], bool]] = None
-    description: str = ""
-
-    def matches(self, provider: str, source: str) -> bool:
-        if self.provider != "*" and self.provider != provider:
-            return False
-        if self.match_fn is not None:
-            return self.match_fn(source)
-        return source == self.source_id
-
-
-_REGISTRY: List[RemovalStep] = []
-
-
-def register(step: RemovalStep) -> RemovalStep:
-    _REGISTRY.append(step)
-    return step
-
-
-def find_removal_step(provider: str, source: str) -> Optional[RemovalStep]:
-    """Return the first matching RemovalStep, or None if unregistered.
-
-    Unregistered sources fall through to the default remove path in
-    ``auth_remove_command``: the pool entry is already gone (that happens
-    before dispatch), no external cleanup, no suppression.  This is the
-    correct behaviour for ``manual`` entries — they were only ever stored
-    in the pool, nothing external to clean up.
-    """
-    for step in _REGISTRY:
-        if step.matches(provider, source):
-            return step
-    return None
-
-
-# ---------------------------------------------------------------------------
-# Individual RemovalStep implementations — one per source.
-# ---------------------------------------------------------------------------
-# Each remove_fn is intentionally small and single-purpose.  Adding a new
-# credential source means adding ONE entry here — no other changes to
-# auth_remove_command.
-
-
-def _remove_env_source(provider: str, removed) -> RemovalResult:
-    """env:<VAR> — the most common case.
-
-    Handles three user situations:
-      1. Var lives only in ~/.hermes/.env  → clear it
-      2. Var lives only in the user's shell (shell profile, systemd
-         EnvironmentFile, launchd plist) → hint them where to unset it
-      3. Var lives in both → clear from .env, hint about shell
-    """
-    from hermes_cli.config import get_env_path, remove_env_value
-
-    result = RemovalResult()
-    env_var = removed.source[len("env:"):]
-    if not env_var:
-        return result
-
-    # Detect shell vs .env BEFORE remove_env_value pops os.environ.
-    env_in_process = bool(os.getenv(env_var))
-    env_in_dotenv = False
-    try:
-        env_path = get_env_path()
-        if env_path.exists():
-            env_in_dotenv = any(
-                line.strip().startswith(f"{env_var}=")
-                for line in env_path.read_text(errors="replace").splitlines()
-            )
-    except OSError:
-        pass
-    shell_exported = env_in_process and not env_in_dotenv
-
-    cleared = remove_env_value(env_var)
-    if cleared:
-        result.cleaned.append(f"Cleared {env_var} from .env")
-
-    if shell_exported:
-        result.hints.extend([
-            f"Note: {env_var} is still set in your shell environment "
-            f"(not in ~/.hermes/.env).",
-            "  Unset it there (shell profile, systemd EnvironmentFile, "
-            "launchd plist, etc.) or it will keep being visible to Hermes.",
-            f"  The pool entry is now suppressed — Hermes will ignore "
-            f"{env_var} until you run `hermes auth add {provider}`.",
-        ])
-    else:
-        result.hints.append(
-            f"Suppressed env:{env_var} — it will not be re-seeded even "
-            f"if the variable is re-exported later."
-        )
-    return result
-
-
-def _remove_claude_code(provider: str, removed) -> RemovalResult:
-    """~/.claude/.credentials.json is owned by Claude Code itself.
-
-    We don't delete it — the user's Claude Code install still needs to
-    work.  We just suppress it so Hermes stops reading it.
-    """
-    return RemovalResult(hints=[
-        "Suppressed claude_code credential — it will not be re-seeded.",
-        "Note: Claude Code credentials still live in ~/.claude/.credentials.json",
-        "Run `hermes auth add anthropic` to re-enable if needed.",
-    ])
-
-
-def _remove_hermes_pkce(provider: str, removed) -> RemovalResult:
-    """~/.hermes/.anthropic_oauth.json is ours — delete it outright."""
-    from hermes_constants import get_hermes_home
-
-    result = RemovalResult()
-    oauth_file = get_hermes_home() / ".anthropic_oauth.json"
-    if oauth_file.exists():
-        try:
-            oauth_file.unlink()
-            result.cleaned.append("Cleared Hermes Anthropic OAuth credentials")
-        except OSError as exc:
-            result.hints.append(f"Could not delete {oauth_file}: {exc}")
-    return result
-
-
-def _clear_auth_store_provider(provider: str) -> bool:
-    """Delete auth_store.providers[provider].  Returns True if deleted."""
-    from hermes_cli.auth import (
-        _auth_store_lock,
-        _load_auth_store,
-        _save_auth_store,
-    )
-
-    with _auth_store_lock():
-        auth_store = _load_auth_store()
-        providers_dict = auth_store.get("providers")
-        if isinstance(providers_dict, dict) and provider in providers_dict:
-            del providers_dict[provider]
-            _save_auth_store(auth_store)
-            return True
-    return False
-
-
-def _remove_nous_device_code(provider: str, removed) -> RemovalResult:
-    """Nous OAuth lives in auth.json providers.nous — clear it and suppress.
-
-    We suppress in addition to clearing because nothing else stops the
-    user's next `hermes login` run from writing providers.nous again
-    before they decide to.  Suppression forces them to go through
-    `hermes auth add nous` to re-engage, which is the documented re-add
-    path and clears the suppression atomically.
-    """
-    result = RemovalResult()
-    if _clear_auth_store_provider(provider):
-        result.cleaned.append(f"Cleared {provider} OAuth tokens from auth store")
-    return result
-
-
-def _remove_codex_device_code(provider: str, removed) -> RemovalResult:
-    """Codex tokens live in TWO places: our auth store AND ~/.codex/auth.json.
-
-    refresh_codex_oauth_pure() writes both every time, so clearing only
-    the Hermes auth store is not enough — _seed_from_singletons() would
-    re-import from ~/.codex/auth.json on the next load_pool() call and
-    the removal would be instantly undone.  We suppress instead of
-    deleting Codex CLI's file, so the Codex CLI itself keeps working.
-
-    The canonical source name in ``_seed_from_singletons`` is
-    ``"device_code"`` (no prefix).  Entries may show up in the pool as
-    either ``"device_code"`` (seeded) or ``"manual:device_code"`` (added
-    via ``hermes auth add openai-codex``), but in both cases the re-seed
-    gate lives at the ``"device_code"`` suppression key.  We suppress
-    that canonical key here; the central dispatcher also suppresses
-    ``removed.source`` which is fine — belt-and-suspenders, idempotent.
-    """
-    from hermes_cli.auth import suppress_credential_source
-
-    result = RemovalResult()
-    if _clear_auth_store_provider(provider):
-        result.cleaned.append(f"Cleared {provider} OAuth tokens from auth store")
-    # Suppress the canonical re-seed source, not just whatever source the
-    # removed entry had.  Otherwise `manual:device_code` removals wouldn't
-    # block the `device_code` re-seed path.
-    suppress_credential_source(provider, "device_code")
-    result.hints.extend([
-        "Suppressed openai-codex device_code source — it will not be re-seeded.",
-        "Note: Codex CLI credentials still live in ~/.codex/auth.json",
-        "Run `hermes auth add openai-codex` to re-enable if needed.",
-    ])
-    return result
-
-
-def _remove_qwen_cli(provider: str, removed) -> RemovalResult:
-    """~/.qwen/oauth_creds.json is owned by the Qwen CLI.
-
-    Same pattern as claude_code — suppress, don't delete.  The user's
-    Qwen CLI install still reads from that file.
-    """
-    return RemovalResult(hints=[
-        "Suppressed qwen-cli credential — it will not be re-seeded.",
-        "Note: Qwen CLI credentials still live in ~/.qwen/oauth_creds.json",
-        "Run `hermes auth add qwen-oauth` to re-enable if needed.",
-    ])
-
-
-def _remove_copilot_gh(provider: str, removed) -> RemovalResult:
-    """Copilot token comes from `gh auth token` or COPILOT_GITHUB_TOKEN / GH_TOKEN / GITHUB_TOKEN.
-
-    Copilot is special: the same token can be seeded as multiple source
-    entries (gh_cli from ``_seed_from_singletons`` plus env:<VAR> from
-    ``_seed_from_env``), so removing one entry without suppressing the
-    others lets the duplicates resurrect.  We suppress ALL known copilot
-    sources here so removal is stable regardless of which entry the
-    user clicked.
-
-    We don't touch the user's gh CLI or shell state — just suppress so
-    Hermes stops picking the token up.
-    """
-    # Suppress ALL copilot source variants up-front so no path resurrects
-    # the pool entry.  The central dispatcher in auth_remove_command will
-    # ALSO suppress removed.source, but it's idempotent so double-calling
-    # is harmless.
-    from hermes_cli.auth import suppress_credential_source
-    suppress_credential_source(provider, "gh_cli")
-    for env_var in ("COPILOT_GITHUB_TOKEN", "GH_TOKEN", "GITHUB_TOKEN"):
-        suppress_credential_source(provider, f"env:{env_var}")
-
-    return RemovalResult(hints=[
-        "Suppressed all copilot token sources (gh_cli + env vars) — they will not be re-seeded.",
-        "Note: Your gh CLI / shell environment is unchanged.",
-        "Run `hermes auth add copilot` to re-enable if needed.",
-    ])
-
-
-def _remove_custom_config(provider: str, removed) -> RemovalResult:
-    """Custom provider pools are seeded from custom_providers config or
-    model.api_key.  Both are in config.yaml — modifying that from here
-    is more invasive than suppression.  We suppress; the user can edit
-    config.yaml if they want to remove the key from disk entirely.
-    """
-    source_label = removed.source
-    return RemovalResult(hints=[
-        f"Suppressed {source_label} — it will not be re-seeded.",
-        "Note: The underlying value in config.yaml is unchanged.  Edit it "
-        "directly if you want to remove the credential from disk.",
-    ])
-
-
-def _register_all_sources() -> None:
-    """Called once on module import.
-
-    ORDER MATTERS — ``find_removal_step`` returns the first match.  Put
-    provider-specific steps before the generic ``env:*`` step so that e.g.
-    copilot's ``env:GH_TOKEN`` goes through the copilot removal (which
-    doesn't touch the user's shell), not the generic env-var removal
-    (which would try to clear .env).
-    """
-    register(RemovalStep(
-        provider="copilot", source_id="gh_cli",
-        match_fn=lambda src: src == "gh_cli" or src.startswith("env:"),
-        remove_fn=_remove_copilot_gh,
-        description="gh auth token / COPILOT_GITHUB_TOKEN / GH_TOKEN",
-    ))
-    register(RemovalStep(
-        provider="*", source_id="env:",
-        match_fn=lambda src: src.startswith("env:"),
-        remove_fn=_remove_env_source,
-        description="Any env-seeded credential (XAI_API_KEY, DEEPSEEK_API_KEY, etc.)",
-    ))
-    register(RemovalStep(
-        provider="anthropic", source_id="claude_code",
-        remove_fn=_remove_claude_code,
-        description="~/.claude/.credentials.json",
-    ))
-    register(RemovalStep(
-        provider="anthropic", source_id="hermes_pkce",
-        remove_fn=_remove_hermes_pkce,
-        description="~/.hermes/.anthropic_oauth.json",
-    ))
-    register(RemovalStep(
-        provider="nous", source_id="device_code",
-        remove_fn=_remove_nous_device_code,
-        description="auth.json providers.nous",
-    ))
-    register(RemovalStep(
-        provider="openai-codex", source_id="device_code",
-        match_fn=lambda src: src == "device_code" or src.endswith(":device_code"),
-        remove_fn=_remove_codex_device_code,
-        description="auth.json providers.openai-codex + ~/.codex/auth.json",
-    ))
-    register(RemovalStep(
-        provider="qwen-oauth", source_id="qwen-cli",
-        remove_fn=_remove_qwen_cli,
-        description="~/.qwen/oauth_creds.json",
-    ))
-    register(RemovalStep(
-        provider="*", source_id="config:",
-        match_fn=lambda src: src.startswith("config:") or src == "model_config",
-        remove_fn=_remove_custom_config,
-        description="Custom provider config.yaml api_key field",
-    ))
-
-
-_register_all_sources()
@@ -45,7 +45,6 @@ class FailoverReason(enum.Enum):

    # Model
    model_not_found = "model_not_found"  # 404 or invalid model — fallback to different model
-    provider_policy_blocked = "provider_policy_blocked"  # Aggregator (e.g. OpenRouter) blocked the only endpoint due to account data/privacy policy

    # Request format
    format_error = "format_error"        # 400 bad request — abort or strip + retry
@@ -195,29 +194,6 @@ _MODEL_NOT_FOUND_PATTERNS = [
    "unsupported model",
 ]

-# OpenRouter aggregator policy-block patterns.
-#
-# When a user's OpenRouter account privacy setting (or a per-request
-# `provider.data_collection: deny` preference) excludes the only endpoint
-# serving a model, OpenRouter returns 404 with a *specific* message that is
-# distinct from "model not found":
-#
-#   "No endpoints available matching your guardrail restrictions and
-#    data policy. Configure: https://openrouter.ai/settings/privacy"
-#
-# We classify this as `provider_policy_blocked` rather than
-# `model_not_found` because:
-#   - The model *exists* — model_not_found is misleading in logs
-#   - Provider fallback won't help: the account-level setting applies to
-#     every call on the same OpenRouter account
-#   - The error body already contains the fix URL, so the user gets
-#     actionable guidance without us rewriting the message
-_PROVIDER_POLICY_BLOCKED_PATTERNS = [
-    "no endpoints available matching your guardrail",
-    "no endpoints available matching your data policy",
-    "no endpoints found matching your data policy",
-]
-
 # Auth patterns (non-status-code signals)
 _AUTH_PATTERNS = [
    "invalid api key",
@@ -244,25 +220,12 @@ _TRANSPORT_ERROR_TYPES = frozenset({
    "ConnectionAbortedError", "BrokenPipeError",
    "TimeoutError", "ReadError",
    "ServerDisconnectedError",
-    # SSL/TLS transport errors — transient mid-stream handshake/record
-    # failures that should retry rather than surface as a stalled session.
-    # ssl.SSLError subclasses OSError (caught by isinstance) but we list
-    # the type names here so provider-wrapped SSL errors (e.g. when the
-    # SDK re-raises without preserving the exception chain) still classify
-    # as transport rather than falling through to the unknown bucket.
-    "SSLError", "SSLZeroReturnError", "SSLWantReadError",
-    "SSLWantWriteError", "SSLEOFError", "SSLSyscallError",
    # OpenAI SDK errors (not subclasses of Python builtins)
    "APIConnectionError",
    "APITimeoutError",
 })

-# Server disconnect patterns (no status code, but transport-level).
-# These are the "ambiguous" patterns — a plain connection close could be
-# transient transport hiccup OR server-side context overflow rejection
-# (common when the API gateway disconnects instead of returning an HTTP
-# error for oversized requests).  A large session + one of these patterns
-# triggers the context-overflow-with-compression recovery path.
+# Server disconnect patterns (no status code, but transport-level)
 _SERVER_DISCONNECT_PATTERNS = [
    "server disconnected",
    "peer closed connection",
@@ -273,40 +236,6 @@ _SERVER_DISCONNECT_PATTERNS = [
    "incomplete chunked read",
 ]

-# SSL/TLS transient failure patterns — intentionally distinct from
-# _SERVER_DISCONNECT_PATTERNS above.
-#
-# An SSL alert mid-stream is almost always a transport-layer hiccup
-# (flaky network, mid-session TLS renegotiation failure, load balancer
-# dropping the connection) — NOT a server-side context overflow signal.
-# So we want the retry path but NOT the compression path; lumping these
-# into _SERVER_DISCONNECT_PATTERNS would trigger unnecessary (and
-# expensive) context compression on any large-session SSL hiccup.
-#
-# The OpenSSL library constructs error codes by prepending a format string
-# to the uppercased alert reason; OpenSSL 3.x changed the separator
-# (e.g. `SSLV3_ALERT_BAD_RECORD_MAC` → `SSL/TLS_ALERT_BAD_RECORD_MAC`),
-# which silently stopped matching anything explicit.  Matching on the
-# stable substrings (`bad record mac`, `ssl alert`, `tls alert`, etc.)
-# survives future OpenSSL format churn without code changes.
-_SSL_TRANSIENT_PATTERNS = [
-    # Space-separated (human-readable form, Python ssl module, most SDKs)
-    "bad record mac",
-    "ssl alert",
-    "tls alert",
-    "ssl handshake failure",
-    "tlsv1 alert",
-    "sslv3 alert",
-    # Underscore-separated (OpenSSL error code tokens, e.g.
-    # `ERR_SSL_SSL/TLS_ALERT_BAD_RECORD_MAC`, `SSLV3_ALERT_BAD_RECORD_MAC`)
-    "bad_record_mac",
-    "ssl_alert",
-    "tls_alert",
-    "tls_alert_internal_error",
-    # Python ssl module prefix, e.g. "[SSL: BAD_RECORD_MAC]"
-    "[ssl:",
-]
-

 # ── Classification pipeline ─────────────────────────────────────────────

@@ -326,10 +255,9 @@ def classify_api_error(
      2. HTTP status code + message-aware refinement
      3. Error code classification (from body)
      4. Message pattern matching (billing vs rate_limit vs context vs auth)
-      5. SSL/TLS transient alert patterns → retry as timeout
+      5. Transport error heuristics
      6. Server disconnect + large session → context overflow
-      7. Transport error heuristics
-      8. Fallback: unknown (retryable with backoff)
+      7. Fallback: unknown (retryable with backoff)

    Args:
        error: The exception from the API call.
@@ -343,11 +271,6 @@ def classify_api_error(
    """
    status_code = _extract_status_code(error)
    error_type = type(error).__name__
-    # Copilot/GitHub Models RateLimitError may not set .status_code; force 429
-    # so downstream rate-limit handling (classifier reason, pool rotation,
-    # fallback gating) fires correctly instead of misclassifying as generic.
-    if status_code is None and error_type == "RateLimitError":
-        status_code = 429
    body = _extract_error_body(error)
    error_code = _extract_error_code(body)

@@ -465,18 +388,7 @@ def classify_api_error(
    if classified is not None:
        return classified

-    # ── 5. SSL/TLS transient errors → retry as timeout (not compression) ──
-    # SSL alerts mid-stream are transport hiccups, not server-side context
-    # overflow signals.  Classify before the disconnect check so a large
-    # session doesn't incorrectly trigger context compression when the real
-    # cause is a flaky TLS handshake.  Also matches when the error is
-    # wrapped in a generic exception whose message string carries the SSL
-    # alert text but the type isn't ssl.SSLError (happens with some SDKs
-    # that re-raise without chaining).
-    if any(p in error_msg for p in _SSL_TRANSIENT_PATTERNS):
-        return _result(FailoverReason.timeout, retryable=True)
-
-    # ── 6. Server disconnect + large session → context overflow ─────
+    # ── 5. Server disconnect + large session → context overflow ─────
    # Must come BEFORE generic transport error catch — a disconnect on
    # a large session is more likely context overflow than a transient
    # transport hiccup.  Without this ordering, RemoteProtocolError
@@ -493,12 +405,12 @@ def classify_api_error(
            )
        return _result(FailoverReason.timeout, retryable=True)

-    # ── 7. Transport / timeout heuristics ───────────────────────────
+    # ── 6. Transport / timeout heuristics ───────────────────────────

    if error_type in _TRANSPORT_ERROR_TYPES or isinstance(error, (TimeoutError, ConnectionError, OSError)):
        return _result(FailoverReason.timeout, retryable=True)

-    # ── 8. Fallback: unknown ────────────────────────────────────────
+    # ── 7. Fallback: unknown ────────────────────────────────────────

    return _result(FailoverReason.unknown, retryable=True)

@@ -552,33 +464,17 @@ def _classify_by_status(
        return _classify_402(error_msg, result_fn)

    if status_code == 404:
-        # OpenRouter policy-block 404 — distinct from "model not found".
-        # The model exists; the user's account privacy setting excludes the
-        # only endpoint serving it. Falling back to another provider won't
-        # help (same account setting applies).  The error body already
-        # contains the fix URL, so just surface it.
-        if any(p in error_msg for p in _PROVIDER_POLICY_BLOCKED_PATTERNS):
-            return result_fn(
-                FailoverReason.provider_policy_blocked,
-                retryable=False,
-                should_fallback=False,
-            )
        if any(p in error_msg for p in _MODEL_NOT_FOUND_PATTERNS):
            return result_fn(
                FailoverReason.model_not_found,
                retryable=False,
                should_fallback=True,
            )
-        # Generic 404 with no "model not found" signal — could be a wrong
-        # endpoint path (common with local llama.cpp / Ollama / vLLM when
-        # the URL is slightly misconfigured), a proxy routing glitch, or
-        # a transient backend issue.  Classifying these as model_not_found
-        # silently falls back to a different provider and tells the model
-        # the model is missing, which is wrong and wastes a turn.  Treat
-        # as unknown so the retry loop surfaces the real error instead.
+        # Generic 404 — could be model or endpoint
        return result_fn(
-            FailoverReason.unknown,
-            retryable=True,
+            FailoverReason.model_not_found,
+            retryable=False,
+            should_fallback=True,
        )

    if status_code == 413:
@@ -680,12 +576,6 @@ def _classify_400(
        )

    # Some providers return model-not-found as 400 instead of 404 (e.g. OpenRouter).
-    if any(p in error_msg for p in _PROVIDER_POLICY_BLOCKED_PATTERNS):
-        return result_fn(
-            FailoverReason.provider_policy_blocked,
-            retryable=False,
-            should_fallback=False,
-        )
    if any(p in error_msg for p in _MODEL_NOT_FOUND_PATTERNS):
        return result_fn(
            FailoverReason.model_not_found,
@@ -858,15 +748,6 @@ def _classify_by_message(
            should_fallback=True,
        )

-    # Provider policy-block (aggregator-side guardrail) — check before
-    # model_not_found so we don't mis-label as a missing model.
-    if any(p in error_msg for p in _PROVIDER_POLICY_BLOCKED_PATTERNS):
-        return result_fn(
-            FailoverReason.provider_policy_blocked,
-            retryable=False,
-            should_fallback=False,
-        )
-
    # Model not found patterns
    if any(p in error_msg for p in _MODEL_NOT_FOUND_PATTERNS):
        return result_fn(
@@ -1,111 +0,0 @@
-"""Shared file safety rules used by both tools and ACP shims."""
-
-from __future__ import annotations
-
-import os
-from pathlib import Path
-from typing import Optional
-
-
-def _hermes_home_path() -> Path:
-    """Resolve the active HERMES_HOME (profile-aware) without circular imports."""
-    try:
-        from hermes_constants import get_hermes_home  # local import to avoid cycles
-        return get_hermes_home()
-    except Exception:
-        return Path(os.path.expanduser("~/.hermes"))
-
-
-def build_write_denied_paths(home: str) -> set[str]:
-    """Return exact sensitive paths that must never be written."""
-    hermes_home = _hermes_home_path()
-    return {
-        os.path.realpath(p)
-        for p in [
-            os.path.join(home, ".ssh", "authorized_keys"),
-            os.path.join(home, ".ssh", "id_rsa"),
-            os.path.join(home, ".ssh", "id_ed25519"),
-            os.path.join(home, ".ssh", "config"),
-            str(hermes_home / ".env"),
-            os.path.join(home, ".bashrc"),
-            os.path.join(home, ".zshrc"),
-            os.path.join(home, ".profile"),
-            os.path.join(home, ".bash_profile"),
-            os.path.join(home, ".zprofile"),
-            os.path.join(home, ".netrc"),
-            os.path.join(home, ".pgpass"),
-            os.path.join(home, ".npmrc"),
-            os.path.join(home, ".pypirc"),
-            "/etc/sudoers",
-            "/etc/passwd",
-            "/etc/shadow",
-        ]
-    }
-
-
-def build_write_denied_prefixes(home: str) -> list[str]:
-    """Return sensitive directory prefixes that must never be written."""
-    return [
-        os.path.realpath(p) + os.sep
-        for p in [
-            os.path.join(home, ".ssh"),
-            os.path.join(home, ".aws"),
-            os.path.join(home, ".gnupg"),
-            os.path.join(home, ".kube"),
-            "/etc/sudoers.d",
-            "/etc/systemd",
-            os.path.join(home, ".docker"),
-            os.path.join(home, ".azure"),
-            os.path.join(home, ".config", "gh"),
-        ]
-    ]
-
-
-def get_safe_write_root() -> Optional[str]:
-    """Return the resolved HERMES_WRITE_SAFE_ROOT path, or None if unset."""
-    root = os.getenv("HERMES_WRITE_SAFE_ROOT", "")
-    if not root:
-        return None
-    try:
-        return os.path.realpath(os.path.expanduser(root))
-    except Exception:
-        return None
-
-
-def is_write_denied(path: str) -> bool:
-    """Return True if path is blocked by the write denylist or safe root."""
-    home = os.path.realpath(os.path.expanduser("~"))
-    resolved = os.path.realpath(os.path.expanduser(str(path)))
-
-    if resolved in build_write_denied_paths(home):
-        return True
-    for prefix in build_write_denied_prefixes(home):
-        if resolved.startswith(prefix):
-            return True
-
-    safe_root = get_safe_write_root()
-    if safe_root and not (resolved == safe_root or resolved.startswith(safe_root + os.sep)):
-        return True
-
-    return False
-
-
-def get_read_block_error(path: str) -> Optional[str]:
-    """Return an error message when a read targets internal Hermes cache files."""
-    resolved = Path(path).expanduser().resolve()
-    hermes_home = _hermes_home_path().resolve()
-    blocked_dirs = [
-        hermes_home / "skills" / ".hub" / "index-cache",
-        hermes_home / "skills" / ".hub",
-    ]
-    for blocked in blocked_dirs:
-        try:
-            resolved.relative_to(blocked)
-        except ValueError:
-            continue
-        return (
-            f"Access denied: {path} is an internal Hermes cache file "
-            "and cannot be read directly to prevent prompt injection. "
-            "Use the skills_list or skill_view tools instead."
-        )
-    return None
@@ -799,8 +799,7 @@ def _gemini_http_error(response: httpx.Response) -> CodeAssistError:
        err_obj = {}
    err_status = str(err_obj.get("status") or "").strip()
    err_message = str(err_obj.get("message") or "").strip()
-    _raw_details = err_obj.get("details")
-    err_details_list = _raw_details if isinstance(_raw_details, list) else []
+    err_details_list = err_obj.get("details") if isinstance(err_obj.get("details"), list) else []

    # Extract google.rpc.ErrorInfo reason + metadata.  There may be more
    # than one ErrorInfo (rare), so we pick the first one with a reason.
@@ -44,97 +44,6 @@ def is_native_gemini_base_url(base_url: str) -> bool:
    return not normalized.endswith("/openai")


-def probe_gemini_tier(
-    api_key: str,
-    base_url: str = DEFAULT_GEMINI_BASE_URL,
-    *,
-    model: str = "gemini-2.5-flash",
-    timeout: float = 10.0,
-) -> str:
-    """Probe a Google AI Studio API key and return its tier.
-
-    Returns one of:
-
-    - ``"free"``    -- key is on the free tier (unusable with Hermes)
-    - ``"paid"``    -- key is on a paid tier
-    - ``"unknown"`` -- probe failed; callers should proceed without blocking.
-    """
-    key = (api_key or "").strip()
-    if not key:
-        return "unknown"
-
-    normalized_base = str(base_url or DEFAULT_GEMINI_BASE_URL).strip().rstrip("/")
-    if not normalized_base:
-        normalized_base = DEFAULT_GEMINI_BASE_URL
-    if normalized_base.lower().endswith("/openai"):
-        normalized_base = normalized_base[: -len("/openai")]
-
-    url = f"{normalized_base}/models/{model}:generateContent"
-    payload = {
-        "contents": [{"role": "user", "parts": [{"text": "hi"}]}],
-        "generationConfig": {"maxOutputTokens": 1},
-    }
-
-    try:
-        with httpx.Client(timeout=timeout) as client:
-            resp = client.post(
-                url,
-                params={"key": key},
-                json=payload,
-                headers={"Content-Type": "application/json"},
-            )
-    except Exception as exc:
-        logger.debug("probe_gemini_tier: network error: %s", exc)
-        return "unknown"
-
-    headers_lower = {k.lower(): v for k, v in resp.headers.items()}
-    rpd_header = headers_lower.get("x-ratelimit-limit-requests-per-day")
-    if rpd_header:
-        try:
-            rpd_val = int(rpd_header)
-        except (TypeError, ValueError):
-            rpd_val = None
-        # Published free-tier daily caps (Dec 2025):
-        #   gemini-2.5-pro: 100, gemini-2.5-flash: 250, flash-lite: 1000
-        # Tier 1 starts at ~1500+ for Flash. We treat <= 1000 as free.
-        if rpd_val is not None and rpd_val <= 1000:
-            return "free"
-        if rpd_val is not None and rpd_val > 1000:
-            return "paid"
-
-    if resp.status_code == 429:
-        body_text = ""
-        try:
-            body_text = resp.text or ""
-        except Exception:
-            body_text = ""
-        if "free_tier" in body_text.lower():
-            return "free"
-        return "paid"
-
-    if 200 <= resp.status_code < 300:
-        return "paid"
-
-    return "unknown"
-
-
-def is_free_tier_quota_error(error_message: str) -> bool:
-    """Return True when a Gemini 429 message indicates free-tier exhaustion."""
-    if not error_message:
-        return False
-    return "free_tier" in error_message.lower()
-
-
-_FREE_TIER_GUIDANCE = (
-    "\n\nYour Google API key is on the free tier (<= 250 requests/day for "
-    "gemini-2.5-flash). Hermes typically makes 3-10 API calls per user turn, "
-    "so the free tier is exhausted in a handful of messages and cannot sustain "
-    "an agent session. Enable billing on your Google Cloud project and "
-    "regenerate the key in a billing-enabled project: "
-    "https://aistudio.google.com/apikey"
-)
-
-
 class GeminiAPIError(Exception):
    """Error shape compatible with Hermes retry/error classification."""

@@ -704,8 +613,7 @@ def gemini_http_error(response: httpx.Response) -> GeminiAPIError:
        err_obj = {}
    err_status = str(err_obj.get("status") or "").strip()
    err_message = str(err_obj.get("message") or "").strip()
-    _raw_details = err_obj.get("details")
-    details_list = _raw_details if isinstance(_raw_details, list) else []
+    details_list = err_obj.get("details") if isinstance(err_obj.get("details"), list) else []

    reason = ""
    retry_after: Optional[float] = None
@@ -741,12 +649,6 @@ def gemini_http_error(response: httpx.Response) -> GeminiAPIError:
    else:
        message = f"Gemini returned HTTP {status}: {body_text[:500]}"

-    # Free-tier quota exhaustion -> append actionable guidance so users who
-    # bypassed the setup wizard (direct GOOGLE_API_KEY in .env) still learn
-    # that the free tier cannot sustain an agent session.
-    if status == 429 and is_free_tier_quota_error(err_message or body_text):
-        message = message + _FREE_TIER_GUIDANCE
-
    return GeminiAPIError(
        message,
        code=code,
@@ -801,13 +703,6 @@ class GeminiNativeClient:
        http_client: Optional[httpx.Client] = None,
        **_: Any,
    ) -> None:
-        if not (api_key or "").strip():
-            raise RuntimeError(
-                "Gemini native client requires an API key, but none was provided. "
-                "Set GOOGLE_API_KEY or GEMINI_API_KEY in your environment / ~/.hermes/.env "
-                "(get one at https://aistudio.google.com/app/apikey), or run `hermes setup` "
-                "to configure the Google provider."
-            )
        self.api_key = api_key
        normalized_base = (base_url or DEFAULT_GEMINI_BASE_URL).rstrip("/")
        if normalized_base.endswith("/openai"):
@@ -73,20 +73,6 @@ def sanitize_gemini_schema(schema: Any) -> Dict[str, Any]:
            ]
            continue
        cleaned[key] = value
-
-    # Gemini's Schema validator requires every ``enum`` entry to be a string,
-    # even when the parent ``type`` is ``integer`` / ``number`` / ``boolean``.
-    # OpenAI / OpenRouter / Anthropic accept typed enums (e.g. Discord's
-    # ``auto_archive_duration: {type: integer, enum: [60, 1440, 4320, 10080]}``),
-    # so we only drop the ``enum`` when it would collide with Gemini's rule.
-    # Keeping ``type: integer`` plus the human-readable description gives the
-    # model enough guidance; the tool handler still validates the value.
-    enum_val = cleaned.get("enum")
-    type_val = cleaned.get("type")
-    if isinstance(enum_val, list) and type_val in {"integer", "number", "boolean"}:
-        if any(not isinstance(item, str) for item in enum_val):
-            cleaned.pop("enum", None)
-
    return cleaned


@@ -1,242 +0,0 @@
-"""
-Image Generation Provider ABC
-=============================
-
-Defines the pluggable-backend interface for image generation. Providers register
-instances via ``PluginContext.register_image_gen_provider()``; the active one
-(selected via ``image_gen.provider`` in ``config.yaml``) services every
-``image_generate`` tool call.
-
-Providers live in ``<repo>/plugins/image_gen/<name>/`` (built-in, auto-loaded
-as ``kind: backend``) or ``~/.hermes/plugins/image_gen/<name>/`` (user, opt-in
-via ``plugins.enabled``).
-
-Response shape
--------------
-All providers return a dict that :func:`success_response` / :func:`error_response`
-produce. The tool wrapper JSON-serializes it. Keys:
-
-    success        bool
-    image          str | None       URL or absolute file path
-    model          str              provider-specific model identifier
-    prompt         str              echoed prompt
-    aspect_ratio   str              "landscape" | "square" | "portrait"
-    provider       str              provider name (for diagnostics)
-    error          str              only when success=False
-    error_type     str              only when success=False
-"""
-
-from __future__ import annotations
-
-import abc
-import base64
-import datetime
-import logging
-import uuid
-from pathlib import Path
-from typing import Any, Dict, List, Optional, Tuple
-
-logger = logging.getLogger(__name__)
-
-
-VALID_ASPECT_RATIOS: Tuple[str, ...] = ("landscape", "square", "portrait")
-DEFAULT_ASPECT_RATIO = "landscape"
-
-
-# ---------------------------------------------------------------------------
-# ABC
-# ---------------------------------------------------------------------------
-
-
-class ImageGenProvider(abc.ABC):
-    """Abstract base class for an image generation backend.
-
-    Subclasses must implement :meth:`generate`. Everything else has sane
-    defaults — override only what your provider needs.
-    """
-
-    @property
-    @abc.abstractmethod
-    def name(self) -> str:
-        """Stable short identifier used in ``image_gen.provider`` config.
-
-        Lowercase, no spaces. Examples: ``fal``, ``openai``, ``replicate``.
-        """
-
-    @property
-    def display_name(self) -> str:
-        """Human-readable label shown in ``hermes tools``. Defaults to ``name.title()``."""
-        return self.name.title()
-
-    def is_available(self) -> bool:
-        """Return True when this provider can service calls.
-
-        Typically checks for a required API key. Default: True
-        (providers with no external dependencies are always available).
-        """
-        return True
-
-    def list_models(self) -> List[Dict[str, Any]]:
-        """Return catalog entries for ``hermes tools`` model picker.
-
-        Each entry::
-
-            {
-                "id": "gpt-image-1.5",               # required
-                "display": "GPT Image 1.5",          # optional; defaults to id
-                "speed": "~10s",                     # optional
-                "strengths": "...",                  # optional
-                "price": "$...",                     # optional
-            }
-
-        Default: empty list (provider has no user-selectable models).
-        """
-        return []
-
-    def get_setup_schema(self) -> Dict[str, Any]:
-        """Return provider metadata for the ``hermes tools`` picker.
-
-        Used by ``tools_config.py`` to inject this provider as a row in
-        the Image Generation provider list. Shape::
-
-            {
-                "name": "OpenAI",                     # picker label
-                "badge": "paid",                      # optional short tag
-                "tag": "One-line description...",     # optional subtitle
-                "env_vars": [                         # keys to prompt for
-                    {"key": "OPENAI_API_KEY",
-                     "prompt": "OpenAI API key",
-                     "url": "https://platform.openai.com/api-keys"},
-                ],
-            }
-
-        Default: minimal entry derived from ``display_name``. Override to
-        expose API key prompts and custom badges.
-        """
-        return {
-            "name": self.display_name,
-            "badge": "",
-            "tag": "",
-            "env_vars": [],
-        }
-
-    def default_model(self) -> Optional[str]:
-        """Return the default model id, or None if not applicable."""
-        models = self.list_models()
-        if models:
-            return models[0].get("id")
-        return None
-
-    @abc.abstractmethod
-    def generate(
-        self,
-        prompt: str,
-        aspect_ratio: str = DEFAULT_ASPECT_RATIO,
-        **kwargs: Any,
-    ) -> Dict[str, Any]:
-        """Generate an image.
-
-        Implementations should return the dict from :func:`success_response`
-        or :func:`error_response`. ``kwargs`` may contain forward-compat
-        parameters future versions of the schema will expose — implementations
-        should ignore unknown keys.
-        """
-
-
-# ---------------------------------------------------------------------------
-# Helpers
-# ---------------------------------------------------------------------------
-
-
-def resolve_aspect_ratio(value: Optional[str]) -> str:
-    """Clamp an aspect_ratio value to the valid set, defaulting to landscape.
-
-    Invalid values are coerced rather than rejected so the tool surface is
-    forgiving of agent mistakes.
-    """
-    if not isinstance(value, str):
-        return DEFAULT_ASPECT_RATIO
-    v = value.strip().lower()
-    if v in VALID_ASPECT_RATIOS:
-        return v
-    return DEFAULT_ASPECT_RATIO
-
-
-def _images_cache_dir() -> Path:
-    """Return ``$HERMES_HOME/cache/images/``, creating parents as needed."""
-    from hermes_constants import get_hermes_home
-
-    path = get_hermes_home() / "cache" / "images"
-    path.mkdir(parents=True, exist_ok=True)
-    return path
-
-
-def save_b64_image(
-    b64_data: str,
-    *,
-    prefix: str = "image",
-    extension: str = "png",
-) -> Path:
-    """Decode base64 image data and write it under ``$HERMES_HOME/cache/images/``.
-
-    Returns the absolute :class:`Path` to the saved file.
-
-    Filename format: ``<prefix>_<YYYYMMDD_HHMMSS>_<short-uuid>.<ext>``.
-    """
-    raw = base64.b64decode(b64_data)
-    ts = datetime.datetime.now().strftime("%Y%m%d_%H%M%S")
-    short = uuid.uuid4().hex[:8]
-    path = _images_cache_dir() / f"{prefix}_{ts}_{short}.{extension}"
-    path.write_bytes(raw)
-    return path
-
-
-def success_response(
-    *,
-    image: str,
-    model: str,
-    prompt: str,
-    aspect_ratio: str,
-    provider: str,
-    extra: Optional[Dict[str, Any]] = None,
-) -> Dict[str, Any]:
-    """Build a uniform success response dict.
-
-    ``image`` may be an HTTP URL or an absolute filesystem path (for b64
-    providers like OpenAI). Callers that need to pass through additional
-    backend-specific fields can supply ``extra``.
-    """
-    payload: Dict[str, Any] = {
-        "success": True,
-        "image": image,
-        "model": model,
-        "prompt": prompt,
-        "aspect_ratio": aspect_ratio,
-        "provider": provider,
-    }
-    if extra:
-        for k, v in extra.items():
-            payload.setdefault(k, v)
-    return payload
-
-
-def error_response(
-    *,
-    error: str,
-    error_type: str = "provider_error",
-    provider: str = "",
-    model: str = "",
-    prompt: str = "",
-    aspect_ratio: str = DEFAULT_ASPECT_RATIO,
-) -> Dict[str, Any]:
-    """Build a uniform error response dict."""
-    return {
-        "success": False,
-        "image": None,
-        "error": error,
-        "error_type": error_type,
-        "model": model,
-        "prompt": prompt,
-        "aspect_ratio": aspect_ratio,
-        "provider": provider,
-    }
@@ -1,120 +0,0 @@
-"""
-Image Generation Provider Registry
-==================================
-
-Central map of registered providers. Populated by plugins at import-time via
-``PluginContext.register_image_gen_provider()``; consumed by the
-``image_generate`` tool to dispatch each call to the active backend.
-
-Active selection
----------------
-The active provider is chosen by ``image_gen.provider`` in ``config.yaml``.
-If unset, :func:`get_active_provider` applies fallback logic:
-
-1. If exactly one provider is registered, use it.
-2. Otherwise if a provider named ``fal`` is registered, use it (legacy
-   default — matches pre-plugin behavior).
-3. Otherwise return ``None`` (the tool surfaces a helpful error pointing
-   the user at ``hermes tools``).
-"""
-
-from __future__ import annotations
-
-import logging
-import threading
-from typing import Dict, List, Optional
-
-from agent.image_gen_provider import ImageGenProvider
-
-logger = logging.getLogger(__name__)
-
-
-_providers: Dict[str, ImageGenProvider] = {}
-_lock = threading.Lock()
-
-
-def register_provider(provider: ImageGenProvider) -> None:
-    """Register an image generation provider.
-
-    Re-registration (same ``name``) overwrites the previous entry and logs
-    a debug message — this makes hot-reload scenarios (tests, dev loops)
-    behave predictably.
-    """
-    if not isinstance(provider, ImageGenProvider):
-        raise TypeError(
-            f"register_provider() expects an ImageGenProvider instance, "
-            f"got {type(provider).__name__}"
-        )
-    name = provider.name
-    if not isinstance(name, str) or not name.strip():
-        raise ValueError("Image gen provider .name must be a non-empty string")
-    with _lock:
-        existing = _providers.get(name)
-        _providers[name] = provider
-    if existing is not None:
-        logger.debug("Image gen provider '%s' re-registered (was %r)", name, type(existing).__name__)
-    else:
-        logger.debug("Registered image gen provider '%s' (%s)", name, type(provider).__name__)
-
-
-def list_providers() -> List[ImageGenProvider]:
-    """Return all registered providers, sorted by name."""
-    with _lock:
-        items = list(_providers.values())
-    return sorted(items, key=lambda p: p.name)
-
-
-def get_provider(name: str) -> Optional[ImageGenProvider]:
-    """Return the provider registered under *name*, or None."""
-    if not isinstance(name, str):
-        return None
-    with _lock:
-        return _providers.get(name.strip())
-
-
-def get_active_provider() -> Optional[ImageGenProvider]:
-    """Resolve the currently-active provider.
-
-    Reads ``image_gen.provider`` from config.yaml; falls back per the
-    module docstring.
-    """
-    configured: Optional[str] = None
-    try:
-        from hermes_cli.config import load_config
-
-        cfg = load_config()
-        section = cfg.get("image_gen") if isinstance(cfg, dict) else None
-        if isinstance(section, dict):
-            raw = section.get("provider")
-            if isinstance(raw, str) and raw.strip():
-                configured = raw.strip()
-    except Exception as exc:
-        logger.debug("Could not read image_gen.provider from config: %s", exc)
-
-    with _lock:
-        snapshot = dict(_providers)
-
-    if configured:
-        provider = snapshot.get(configured)
-        if provider is not None:
-            return provider
-        logger.debug(
-            "image_gen.provider='%s' configured but not registered; falling back",
-            configured,
-        )
-
-    # Fallback: single-provider case
-    if len(snapshot) == 1:
-        return next(iter(snapshot.values()))
-
-    # Fallback: prefer legacy FAL for backward compat
-    if "fal" in snapshot:
-        return snapshot["fal"]
-
-    return None
-
-
-def _reset_for_tests() -> None:
-    """Clear the registry. **Test-only.**"""
-    with _lock:
-        _providers.clear()
@@ -124,7 +124,6 @@ class InsightsEngine:
        # Gather raw data
        sessions = self._get_sessions(cutoff, source)
        tool_usage = self._get_tool_usage(cutoff, source)
-        skill_usage = self._get_skill_usage(cutoff, source)
        message_stats = self._get_message_stats(cutoff, source)

        if not sessions:
@@ -136,15 +135,6 @@ class InsightsEngine:
                "models": [],
                "platforms": [],
                "tools": [],
-                "skills": {
-                    "summary": {
-                        "total_skill_loads": 0,
-                        "total_skill_edits": 0,
-                        "total_skill_actions": 0,
-                        "distinct_skills_used": 0,
-                    },
-                    "top_skills": [],
-                },
                "activity": {},
                "top_sessions": [],
            }
@@ -154,7 +144,6 @@ class InsightsEngine:
        models = self._compute_model_breakdown(sessions)
        platforms = self._compute_platform_breakdown(sessions)
        tools = self._compute_tool_breakdown(tool_usage)
-        skills = self._compute_skill_breakdown(skill_usage)
        activity = self._compute_activity_patterns(sessions)
        top_sessions = self._compute_top_sessions(sessions)

@@ -167,7 +156,6 @@ class InsightsEngine:
            "models": models,
            "platforms": platforms,
            "tools": tools,
-            "skills": skills,
            "activity": activity,
            "top_sessions": top_sessions,
        }
@@ -296,82 +284,6 @@ class InsightsEngine:
            for name, count in tool_counts.most_common()
        ]

-    def _get_skill_usage(self, cutoff: float, source: str = None) -> List[Dict]:
-        """Extract per-skill usage from assistant tool calls."""
-        skill_counts: Dict[str, Dict[str, Any]] = {}
-
-        if source:
-            cursor = self._conn.execute(
-                """SELECT m.tool_calls, m.timestamp
-                   FROM messages m
-                   JOIN sessions s ON s.id = m.session_id
-                   WHERE s.started_at >= ? AND s.source = ?
-                     AND m.role = 'assistant' AND m.tool_calls IS NOT NULL""",
-                (cutoff, source),
-            )
-        else:
-            cursor = self._conn.execute(
-                """SELECT m.tool_calls, m.timestamp
-                   FROM messages m
-                   JOIN sessions s ON s.id = m.session_id
-                   WHERE s.started_at >= ?
-                     AND m.role = 'assistant' AND m.tool_calls IS NOT NULL""",
-                (cutoff,),
-            )
-
-        for row in cursor.fetchall():
-            try:
-                calls = row["tool_calls"]
-                if isinstance(calls, str):
-                    calls = json.loads(calls)
-                if not isinstance(calls, list):
-                    continue
-            except (json.JSONDecodeError, TypeError):
-                continue
-
-            timestamp = row["timestamp"]
-            for call in calls:
-                if not isinstance(call, dict):
-                    continue
-                func = call.get("function", {})
-                tool_name = func.get("name")
-                if tool_name not in {"skill_view", "skill_manage"}:
-                    continue
-
-                args = func.get("arguments")
-                if isinstance(args, str):
-                    try:
-                        args = json.loads(args)
-                    except (json.JSONDecodeError, TypeError):
-                        continue
-                if not isinstance(args, dict):
-                    continue
-
-                skill_name = args.get("name")
-                if not isinstance(skill_name, str) or not skill_name.strip():
-                    continue
-
-                entry = skill_counts.setdefault(
-                    skill_name,
-                    {
-                        "skill": skill_name,
-                        "view_count": 0,
-                        "manage_count": 0,
-                        "last_used_at": None,
-                    },
-                )
-                if tool_name == "skill_view":
-                    entry["view_count"] += 1
-                else:
-                    entry["manage_count"] += 1
-
-                if timestamp is not None and (
-                    entry["last_used_at"] is None or timestamp > entry["last_used_at"]
-                ):
-                    entry["last_used_at"] = timestamp
-
-        return list(skill_counts.values())
-
    def _get_message_stats(self, cutoff: float, source: str = None) -> Dict:
        """Get aggregate message statistics."""
        if source:
@@ -563,46 +475,6 @@ class InsightsEngine:
            })
        return result

-    def _compute_skill_breakdown(self, skill_usage: List[Dict]) -> Dict[str, Any]:
-        """Process per-skill usage into summary + ranked list."""
-        total_skill_loads = sum(s["view_count"] for s in skill_usage) if skill_usage else 0
-        total_skill_edits = sum(s["manage_count"] for s in skill_usage) if skill_usage else 0
-        total_skill_actions = total_skill_loads + total_skill_edits
-
-        top_skills = []
-        for skill in skill_usage:
-            total_count = skill["view_count"] + skill["manage_count"]
-            percentage = (total_count / total_skill_actions * 100) if total_skill_actions else 0
-            top_skills.append({
-                "skill": skill["skill"],
-                "view_count": skill["view_count"],
-                "manage_count": skill["manage_count"],
-                "total_count": total_count,
-                "percentage": percentage,
-                "last_used_at": skill.get("last_used_at"),
-            })
-
-        top_skills.sort(
-            key=lambda s: (
-                s["total_count"],
-                s["view_count"],
-                s["manage_count"],
-                s["last_used_at"] or 0,
-                s["skill"],
-            ),
-            reverse=True,
-        )
-
-        return {
-            "summary": {
-                "total_skill_loads": total_skill_loads,
-                "total_skill_edits": total_skill_edits,
-                "total_skill_actions": total_skill_actions,
-                "distinct_skills_used": len(skill_usage),
-            },
-            "top_skills": top_skills,
-        }
-
    def _compute_activity_patterns(self, sessions: List[Dict]) -> Dict:
        """Analyze activity patterns by day of week and hour."""
        day_counts = Counter()  # 0=Monday ... 6=Sunday
@@ -798,28 +670,6 @@ class InsightsEngine:
                lines.append(f"  ... and {len(report['tools']) - 15} more tools")
            lines.append("")

-        # Skill usage
-        skills = report.get("skills", {})
-        top_skills = skills.get("top_skills", [])
-        if top_skills:
-            lines.append("  🧠 Top Skills")
-            lines.append("  " + "─" * 56)
-            lines.append(f"  {'Skill':<28} {'Loads':>7} {'Edits':>7} {'Last used':>11}")
-            for skill in top_skills[:10]:
-                last_used = "—"
-                if skill.get("last_used_at"):
-                    last_used = datetime.fromtimestamp(skill["last_used_at"]).strftime("%b %d")
-                lines.append(
-                    f"  {skill['skill'][:28]:<28} {skill['view_count']:>7,} {skill['manage_count']:>7,} {last_used:>11}"
-                )
-            summary = skills.get("summary", {})
-            lines.append(
-                f"  Distinct skills: {summary.get('distinct_skills_used', 0)}  "
-                f"Loads: {summary.get('total_skill_loads', 0):,}  "
-                f"Edits: {summary.get('total_skill_edits', 0):,}"
-            )
-            lines.append("")
-
        # Activity patterns
        act = report.get("activity", {})
        if act.get("by_day"):
@@ -903,18 +753,6 @@ class InsightsEngine:
                lines.append(f"  {t['tool']} — {t['count']:,} calls ({t['percentage']:.1f}%)")
            lines.append("")

-        skills = report.get("skills", {})
-        if skills.get("top_skills"):
-            lines.append("**🧠 Top Skills:**")
-            for skill in skills["top_skills"][:5]:
-                suffix = ""
-                if skill.get("last_used_at"):
-                    suffix = f", last used {datetime.fromtimestamp(skill['last_used_at']).strftime('%b %d')}"
-                lines.append(
-                    f"  {skill['skill']} — {skill['view_count']:,} loads, {skill['manage_count']:,} edits{suffix}"
-                )
-            lines.append("")
-
        # Activity summary
        act = report.get("activity", {})
        if act.get("busiest_day") and act.get("busiest_hour"):
@@ -31,7 +31,6 @@ from __future__ import annotations
 import json
 import logging
 import re
-import inspect
 from typing import Any, Dict, List, Optional

 from agent.memory_provider import MemoryProvider
@@ -313,39 +312,7 @@ class MemoryManager:
                )
        return "\n\n".join(parts)

-    @staticmethod
-    def _provider_memory_write_metadata_mode(provider: MemoryProvider) -> str:
-        """Return how to pass metadata to a provider's memory-write hook."""
-        try:
-            signature = inspect.signature(provider.on_memory_write)
-        except (TypeError, ValueError):
-            return "keyword"
-
-        params = list(signature.parameters.values())
-        if any(p.kind == inspect.Parameter.VAR_KEYWORD for p in params):
-            return "keyword"
-        if "metadata" in signature.parameters:
-            return "keyword"
-
-        accepted = [
-            p for p in params
-            if p.kind in (
-                inspect.Parameter.POSITIONAL_ONLY,
-                inspect.Parameter.POSITIONAL_OR_KEYWORD,
-                inspect.Parameter.KEYWORD_ONLY,
-            )
-        ]
-        if len(accepted) >= 4:
-            return "positional"
-        return "legacy"
-
-    def on_memory_write(
-        self,
-        action: str,
-        target: str,
-        content: str,
-        metadata: Optional[Dict[str, Any]] = None,
-    ) -> None:
+    def on_memory_write(self, action: str, target: str, content: str) -> None:
        """Notify external providers when the built-in memory tool writes.

        Skips the builtin provider itself (it's the source of the write).
@@ -354,15 +321,7 @@ class MemoryManager:
            if provider.name == "builtin":
                continue
            try:
-                metadata_mode = self._provider_memory_write_metadata_mode(provider)
-                if metadata_mode == "keyword":
-                    provider.on_memory_write(
-                        action, target, content, metadata=dict(metadata or {})
-                    )
-                elif metadata_mode == "positional":
-                    provider.on_memory_write(action, target, content, dict(metadata or {}))
-                else:
-                    provider.on_memory_write(action, target, content)
+                provider.on_memory_write(action, target, content)
            except Exception as e:
                logger.debug(
                    "Memory provider '%s' on_memory_write failed: %s",
@@ -26,7 +26,7 @@ Optional hooks (override to opt in):
  on_turn_start(turn, message, **kwargs) — per-turn tick with runtime context
  on_session_end(messages)               — end-of-session extraction
  on_pre_compress(messages) -> str       — extract before context compression
-  on_memory_write(action, target, content, metadata=None) — mirror built-in memory writes
+  on_memory_write(action, target, content) — mirror built-in memory writes
  on_delegation(task, result, **kwargs)  — parent-side observation of subagent work
 """

@@ -34,7 +34,7 @@ from __future__ import annotations

 import logging
 from abc import ABC, abstractmethod
-from typing import Any, Dict, List, Optional
+from typing import Any, Dict, List

 logger = logging.getLogger(__name__)

@@ -220,21 +220,12 @@ class MemoryProvider(ABC):
          should all have ``env_var`` set and this method stays no-op).
        """

-    def on_memory_write(
-        self,
-        action: str,
-        target: str,
-        content: str,
-        metadata: Optional[Dict[str, Any]] = None,
-    ) -> None:
+    def on_memory_write(self, action: str, target: str, content: str) -> None:
        """Called when the built-in memory tool writes an entry.

        action: 'add', 'replace', or 'remove'
        target: 'memory' or 'user'
        content: the entry content
-        metadata: structured provenance for the write, when available. Common
-          keys include ``write_origin``, ``execution_context``, ``session_id``,
-          ``parent_session_id``, ``platform``, and ``tool_name``.

        Use to mirror built-in memory writes to your backend.
        """
@@ -4,9 +4,7 @@ Pure utility functions with no AIAgent dependency. Used by ContextCompressor
 and run_agent.py for pre-flight context checks.
 """

-import ipaddress
 import logging
-import os
 import re
 import time
 from pathlib import Path
@@ -16,37 +14,16 @@ from urllib.parse import urlparse
 import requests
 import yaml

-from utils import base_url_host_matches, base_url_hostname
-
 from hermes_constants import OPENROUTER_MODELS_URL

 logger = logging.getLogger(__name__)

-
-def _resolve_requests_verify() -> bool | str:
-    """Resolve SSL verify setting for `requests` calls from env vars.
-
-    The `requests` library only honours REQUESTS_CA_BUNDLE / CURL_CA_BUNDLE
-    by default. Hermes also honours HERMES_CA_BUNDLE (its own convention)
-    and SSL_CERT_FILE (used by the stdlib `ssl` module and by httpx), so
-    that a single env var can cover both `requests` and `httpx` callsites
-    inside the same process.
-
-    Returns either a filesystem path to a CA bundle, or True to defer to
-    the requests default (certifi).
-    """
-    for env_var in ("HERMES_CA_BUNDLE", "REQUESTS_CA_BUNDLE", "SSL_CERT_FILE"):
-        val = os.getenv(env_var)
-        if val and os.path.isfile(val):
-            return val
-    return True
-
 # Provider names that can appear as a "provider:" prefix before a model ID.
 # Only these are stripped — Ollama-style "model:tag" colons (e.g. "qwen3.5:27b")
 # are preserved so the full model name reaches cache lookups and server queries.
 _PROVIDER_PREFIXES: frozenset[str] = frozenset({
    "openrouter", "nous", "openai-codex", "copilot", "copilot-acp",
-    "gemini", "ollama-cloud", "zai", "kimi-coding", "kimi-coding-cn", "stepfun", "minimax", "minimax-cn", "anthropic", "deepseek",
+    "gemini", "ollama-cloud", "zai", "kimi-coding", "kimi-coding-cn", "minimax", "minimax-cn", "anthropic", "deepseek",
    "opencode-zen", "opencode-go", "ai-gateway", "kilocode", "alibaba",
    "qwen-oauth",
    "xiaomi",
@@ -57,7 +34,7 @@ _PROVIDER_PREFIXES: frozenset[str] = frozenset({
    "glm", "z-ai", "z.ai", "zhipu", "github", "github-copilot",
    "github-models", "kimi", "moonshot", "kimi-cn", "moonshot-cn", "claude", "deep-seek",
    "ollama",
-    "stepfun", "opencode", "zen", "go", "vercel", "kilo", "dashscope", "aliyun", "qwen",
+    "opencode", "zen", "go", "vercel", "kilo", "dashscope", "aliyun", "qwen",
    "mimo", "xiaomi-mimo",
    "arcee-ai", "arceeai",
    "xai", "x-ai", "x.ai", "grok",
@@ -72,13 +49,6 @@ _OLLAMA_TAG_PATTERN = re.compile(
 )


-# Tailscale's CGNAT range (RFC 6598). `ipaddress.is_private` excludes this
-# block, so without an explicit check Ollama reached over Tailscale (e.g.
-# `http://100.77.243.5:11434`) wouldn't be treated as local and its stream
-# read / stale timeouts wouldn't get auto-bumped. Built once at import time.
-_TAILSCALE_CGNAT = ipaddress.IPv4Network("100.64.0.0/10")
-
-
 def _strip_provider_prefix(model: str) -> str:
    """Strip a recognised provider prefix from a model string.

@@ -106,11 +76,9 @@ _endpoint_model_metadata_cache_time: Dict[str, float] = {}
 _ENDPOINT_MODEL_CACHE_TTL = 300

 # Descending tiers for context length probing when the model is unknown.
-# We start at 256K (covers GPT-5.x, many current large-context models) and
-# step down on context-length errors until one works.  Tier[0] is also the
-# default fallback when no detection method succeeds.
+# We start at 128K (a safe default for most modern models) and step down
+# on context-length errors until one works.
 CONTEXT_PROBE_TIERS = [
-    256_000,
    128_000,
    64_000,
    32_000,
@@ -145,14 +113,10 @@ DEFAULT_CONTEXT_LENGTHS = {
    "claude": 200000,
    # OpenAI — GPT-5 family (most have 400k; specific overrides first)
    # Source: https://developers.openai.com/api/docs/models
-    # GPT-5.5 (launched Apr 23 2026) is 1.05M on the direct OpenAI API and
-    # ChatGPT Codex OAuth caps it at 272K; both paths resolve via their own
-    # provider-aware branches (_resolve_codex_oauth_context_length + models.dev).
-    # This hardcoded value is only reached when every probe misses.
-    "gpt-5.5": 1050000,
    "gpt-5.4-nano": 400000,           # 400k (not 1.05M like full 5.4)
    "gpt-5.4-mini": 400000,           # 400k (not 1.05M like full 5.4)
    "gpt-5.4": 1050000,               # GPT-5.4, GPT-5.4 Pro (1.05M context)
+    "gpt-5.3-codex-spark": 128000,    # Spark variant has reduced 128k context
    "gpt-5.1-chat": 128000,           # Chat variant has 128k context
    "gpt-5": 400000,                  # GPT-5.x base, mini, codex variants (400k)
    "gpt-4.1": 1047576,
@@ -160,22 +124,10 @@ DEFAULT_CONTEXT_LENGTHS = {
    # Google
    "gemini": 1048576,
    # Gemma (open models served via AI Studio)
-    "gemma-4": 256000,  # Gemma 4 family
-    "gemma4": 256000,  # Ollama-style naming (e.g. gemma4:31b-cloud)
    "gemma-4-31b": 256000,
    "gemma-3": 131072,
    "gemma": 8192,  # fallback for older gemma models
-    # DeepSeek — V4 family ships with a 1M context window. The legacy
-    # aliases ``deepseek-chat`` / ``deepseek-reasoner`` are server-side
-    # mapped to the non-thinking / thinking modes of ``deepseek-v4-flash``
-    # and inherit the same 1M window. The ``deepseek`` substring entry
-    # below remains as a 128K fallback for older / unknown DeepSeek model
-    # ids (e.g. via custom endpoints).
-    # https://api-docs.deepseek.com/zh-cn/quick_start/pricing
-    "deepseek-v4-pro": 1_000_000,
-    "deepseek-v4-flash": 1_000_000,
-    "deepseek-chat": 1_000_000,
-    "deepseek-reasoner": 1_000_000,
+    # DeepSeek
    "deepseek": 128000,
    # Meta
    "llama": 131072,
@@ -217,15 +169,12 @@ DEFAULT_CONTEXT_LENGTHS = {
    "Qwen/Qwen3.5-35B-A3B": 131072,
    "deepseek-ai/DeepSeek-V3.2": 65536,
    "moonshotai/Kimi-K2.5": 262144,
-    "moonshotai/Kimi-K2.6": 262144,
    "moonshotai/Kimi-K2-Thinking": 262144,
    "MiniMaxAI/MiniMax-M2.5": 204800,
-    "XiaomiMiMo/MiMo-V2-Flash": 262144,
-    "mimo-v2-pro": 1048576,
-    "mimo-v2.5-pro": 1048576,
-    "mimo-v2.5": 1048576,
-    "mimo-v2-omni": 262144,
-    "mimo-v2-flash": 262144,
+    "XiaomiMiMo/MiMo-V2-Flash": 256000,
+    "mimo-v2-pro": 1000000,
+    "mimo-v2-omni": 256000,
+    "mimo-v2-flash": 256000,
    "zai-org/GLM-5": 202752,
 }

@@ -240,7 +189,6 @@ _CONTEXT_LENGTH_KEYS = (
    "max_seq_len",
    "n_ctx_train",
    "n_ctx",
-    "ctx_size",
 )

 _MAX_COMPLETION_KEYS = (
@@ -263,15 +211,8 @@ def _normalize_base_url(base_url: str) -> str:
    return (base_url or "").strip().rstrip("/")


-def _auth_headers(api_key: str = "") -> Dict[str, str]:
-    token = str(api_key or "").strip()
-    if not token:
-        return {}
-    return {"Authorization": f"Bearer {token}"}
-
-
 def _is_openrouter_base_url(base_url: str) -> bool:
-    return base_url_host_matches(base_url, "openrouter.ai")
+    return "openrouter.ai" in _normalize_base_url(base_url).lower()


 def _is_custom_endpoint(base_url: str) -> bool:
@@ -284,12 +225,9 @@ _URL_TO_PROVIDER: Dict[str, str] = {
    "chatgpt.com": "openai",
    "api.anthropic.com": "anthropic",
    "api.z.ai": "zai",
-    "open.bigmodel.cn": "zai",
    "api.moonshot.ai": "kimi-coding",
    "api.moonshot.cn": "kimi-coding-cn",
    "api.kimi.com": "kimi-coding",
-    "api.stepfun.ai": "stepfun",
-    "api.stepfun.com": "stepfun",
    "api.arcee.ai": "arcee",
    "api.minimax": "minimax",
    "dashscope.aliyuncs.com": "alibaba",
@@ -334,15 +272,7 @@ def _is_known_provider_base_url(base_url: str) -> bool:


 def is_local_endpoint(base_url: str) -> bool:
-    """Return True if base_url points to a local machine.
-
-    Recognises loopback (``localhost``, ``127.0.0.0/8``, ``::1``),
-    container-internal DNS names (``host.docker.internal`` et al.),
-    RFC-1918 private ranges (``10/8``, ``172.16/12``, ``192.168/16``),
-    link-local, and Tailscale CGNAT (``100.64.0.0/10``). Tailscale CGNAT
-    is included so remote-but-trusted Ollama boxes reached over a
-    Tailscale mesh get the same timeout auto-bumps as localhost Ollama.
-    """
+    """Return True if base_url points to a local machine (localhost / RFC-1918 / WSL)."""
    normalized = _normalize_base_url(base_url)
    if not normalized:
        return False
@@ -357,17 +287,14 @@ def is_local_endpoint(base_url: str) -> bool:
    # Docker / Podman / Lima internal DNS names (e.g. host.docker.internal)
    if any(host.endswith(suffix) for suffix in _CONTAINER_LOCAL_SUFFIXES):
        return True
-    # RFC-1918 private ranges, link-local, and Tailscale CGNAT
+    # RFC-1918 private ranges and link-local
+    import ipaddress
    try:
        addr = ipaddress.ip_address(host)
-        if addr.is_private or addr.is_loopback or addr.is_link_local:
-            return True
-        if isinstance(addr, ipaddress.IPv4Address) and addr in _TAILSCALE_CGNAT:
-            return True
+        return addr.is_private or addr.is_loopback or addr.is_link_local
    except ValueError:
        pass
    # Bare IP that looks like a private range (e.g. 172.26.x.x for WSL)
-    # or Tailscale CGNAT (100.64.x.x–100.127.x.x).
    parts = host.split(".")
    if len(parts) == 4:
        try:
@@ -378,14 +305,12 @@ def is_local_endpoint(base_url: str) -> bool:
                return True
            if first == 192 and second == 168:
                return True
-            if first == 100 and 64 <= second <= 127:
-                return True
        except ValueError:
            pass
    return False


-def detect_local_server_type(base_url: str, api_key: str = "") -> Optional[str]:
+def detect_local_server_type(base_url: str) -> Optional[str]:
    """Detect which local server is running at base_url by probing known endpoints.

    Returns one of: "ollama", "lm-studio", "vllm", "llamacpp", or None.
@@ -397,10 +322,8 @@ def detect_local_server_type(base_url: str, api_key: str = "") -> Optional[str]:
    if server_url.endswith("/v1"):
        server_url = server_url[:-3]

-    headers = _auth_headers(api_key)
-
    try:
-        with httpx.Client(timeout=2.0, headers=headers) as client:
+        with httpx.Client(timeout=2.0) as client:
            # LM Studio exposes /api/v1/models — check first (most specific)
            try:
                r = client.get(f"{server_url}/api/v1/models")
@@ -528,7 +451,7 @@ def fetch_model_metadata(force_refresh: bool = False) -> Dict[str, Dict[str, Any
        return _model_metadata_cache

    try:
-        response = requests.get(OPENROUTER_MODELS_URL, timeout=10, verify=_resolve_requests_verify())
+        response = requests.get(OPENROUTER_MODELS_URL, timeout=10)
        response.raise_for_status()
        data = response.json()

@@ -587,64 +510,10 @@ def fetch_endpoint_model_metadata(
    headers = {"Authorization": f"Bearer {api_key}"} if api_key else {}
    last_error: Optional[Exception] = None

-    if is_local_endpoint(normalized):
-        try:
-            if detect_local_server_type(normalized, api_key=api_key) == "lm-studio":
-                server_url = normalized[:-3].rstrip("/") if normalized.endswith("/v1") else normalized
-                response = requests.get(
-                    server_url.rstrip("/") + "/api/v1/models",
-                    headers=headers,
-                    timeout=10,
-                    verify=_resolve_requests_verify(),
-                )
-                response.raise_for_status()
-                payload = response.json()
-                cache: Dict[str, Dict[str, Any]] = {}
-                for model in payload.get("models", []):
-                    if not isinstance(model, dict):
-                        continue
-                    model_id = model.get("key") or model.get("id")
-                    if not model_id:
-                        continue
-                    entry: Dict[str, Any] = {"name": model.get("name", model_id)}
-
-                    context_length = None
-                    for inst in model.get("loaded_instances", []) or []:
-                        if not isinstance(inst, dict):
-                            continue
-                        cfg = inst.get("config", {})
-                        ctx = cfg.get("context_length") if isinstance(cfg, dict) else None
-                        if isinstance(ctx, int) and ctx > 0:
-                            context_length = ctx
-                            break
-                    if context_length is None:
-                        context_length = _extract_context_length(model)
-                    if context_length is not None:
-                        entry["context_length"] = context_length
-
-                    max_completion_tokens = _extract_max_completion_tokens(model)
-                    if max_completion_tokens is not None:
-                        entry["max_completion_tokens"] = max_completion_tokens
-
-                    pricing = _extract_pricing(model)
-                    if pricing:
-                        entry["pricing"] = pricing
-
-                    _add_model_aliases(cache, model_id, entry)
-                    alt_id = model.get("id")
-                    if isinstance(alt_id, str) and alt_id and alt_id != model_id:
-                        _add_model_aliases(cache, alt_id, entry)
-
-                _endpoint_model_metadata_cache[normalized] = cache
-                _endpoint_model_metadata_cache_time[normalized] = time.time()
-                return cache
-        except Exception as exc:
-            last_error = exc
-
    for candidate in candidates:
        url = candidate.rstrip("/") + "/models"
        try:
-            response = requests.get(url, headers=headers, timeout=10, verify=_resolve_requests_verify())
+            response = requests.get(url, headers=headers, timeout=10)
            response.raise_for_status()
            payload = response.json()
            cache: Dict[str, Dict[str, Any]] = {}
@@ -675,10 +544,9 @@ def fetch_endpoint_model_metadata(
                try:
                    # Try /v1/props first (current llama.cpp); fall back to /props for older builds
                    base = candidate.rstrip("/").replace("/v1", "")
-                    _verify = _resolve_requests_verify()
-                    props_resp = requests.get(base + "/v1/props", headers=headers, timeout=5, verify=_verify)
+                    props_resp = requests.get(base + "/v1/props", headers=headers, timeout=5)
                    if not props_resp.ok:
-                        props_resp = requests.get(base + "/props", headers=headers, timeout=5, verify=_verify)
+                        props_resp = requests.get(base + "/props", headers=headers, timeout=5)
                    if props_resp.ok:
                        props = props_resp.json()
                        gen_settings = props.get("default_generation_settings", {})
@@ -750,22 +618,6 @@ def get_cached_context_length(model: str, base_url: str) -> Optional[int]:
    return cache.get(key)


-def _invalidate_cached_context_length(model: str, base_url: str) -> None:
-    """Drop a stale cache entry so it gets re-resolved on the next lookup."""
-    key = f"{model}@{base_url}"
-    cache = _load_context_cache()
-    if key not in cache:
-        return
-    del cache[key]
-    path = _get_context_cache_path()
-    try:
-        path.parent.mkdir(parents=True, exist_ok=True)
-        with open(path, "w") as f:
-            yaml.dump({"context_lengths": cache}, f, default_flow_style=False)
-    except Exception as e:
-        logger.debug("Failed to invalidate context length cache entry %s: %s", key, e)
-
-
 def get_next_probe_tier(current_length: int) -> Optional[int]:
    """Return the next lower probe tier, or None if already at minimum."""
    for tier in CONTEXT_PROBE_TIERS:
@@ -864,7 +716,7 @@ def _model_id_matches(candidate_id: str, lookup_model: str) -> bool:
    return False


-def query_ollama_num_ctx(model: str, base_url: str, api_key: str = "") -> Optional[int]:
+def query_ollama_num_ctx(model: str, base_url: str) -> Optional[int]:
    """Query an Ollama server for the model's context length.

    Returns the model's maximum context from GGUF metadata via ``/api/show``,
@@ -882,16 +734,14 @@ def query_ollama_num_ctx(model: str, base_url: str, api_key: str = "") -> Option
        server_url = server_url[:-3]

    try:
-        server_type = detect_local_server_type(base_url, api_key=api_key)
+        server_type = detect_local_server_type(base_url)
    except Exception:
        return None
    if server_type != "ollama":
        return None

-    headers = _auth_headers(api_key)
-
    try:
-        with httpx.Client(timeout=3.0, headers=headers) as client:
+        with httpx.Client(timeout=3.0) as client:
            resp = client.post(f"{server_url}/api/show", json={"name": bare_model})
            if resp.status_code != 200:
                return None
@@ -919,7 +769,7 @@ def query_ollama_num_ctx(model: str, base_url: str, api_key: str = "") -> Option
    return None


-def _query_local_context_length(model: str, base_url: str, api_key: str = "") -> Optional[int]:
+def _query_local_context_length(model: str, base_url: str) -> Optional[int]:
    """Query a local server for the model's context length."""
    import httpx

@@ -932,15 +782,13 @@ def _query_local_context_length(model: str, base_url: str, api_key: str = "") ->
    if server_url.endswith("/v1"):
        server_url = server_url[:-3]

-    headers = _auth_headers(api_key)
-
    try:
-        server_type = detect_local_server_type(base_url, api_key=api_key)
+        server_type = detect_local_server_type(base_url)
    except Exception:
        server_type = None

    try:
-        with httpx.Client(timeout=3.0, headers=headers) as client:
+        with httpx.Client(timeout=3.0) as client:
            # Ollama: /api/show returns model details with context info
            if server_type == "ollama":
                resp = client.post(f"{server_url}/api/show", json={"name": model})
@@ -1043,7 +891,7 @@ def _query_anthropic_context_length(model: str, base_url: str, api_key: str) ->
            "x-api-key": api_key,
            "anthropic-version": "2023-06-01",
        }
-        resp = requests.get(url, headers=headers, timeout=10, verify=_resolve_requests_verify())
+        resp = requests.get(url, headers=headers, timeout=10)
        if resp.status_code != 200:
            return None
        data = resp.json()
@@ -1057,116 +905,6 @@ def _query_anthropic_context_length(model: str, base_url: str, api_key: str) ->
    return None


-# Known ChatGPT Codex OAuth context windows (observed via live
-# chatgpt.com/backend-api/codex/models probe, Apr 2026). These are the
-# `context_window` values, which are what Codex actually enforces — the
-# direct OpenAI API has larger limits for the same slugs, but Codex OAuth
-# caps lower (e.g. gpt-5.5 is 1.05M on the API, 272K on Codex).
-#
-# Used as a fallback when the live probe fails (no token, network error).
-# Longest keys first so substring match picks the most specific entry.
-_CODEX_OAUTH_CONTEXT_FALLBACK: Dict[str, int] = {
-    "gpt-5.1-codex-max": 272_000,
-    "gpt-5.1-codex-mini": 272_000,
-    "gpt-5.3-codex": 272_000,
-    "gpt-5.2-codex": 272_000,
-    "gpt-5.4-mini": 272_000,
-    "gpt-5.5": 272_000,
-    "gpt-5.4": 272_000,
-    "gpt-5.2": 272_000,
-    "gpt-5": 272_000,
-}
-
-
-_codex_oauth_context_cache: Dict[str, int] = {}
-_codex_oauth_context_cache_time: float = 0.0
-_CODEX_OAUTH_CONTEXT_CACHE_TTL = 3600  # 1 hour
-
-
-def _fetch_codex_oauth_context_lengths(access_token: str) -> Dict[str, int]:
-    """Probe the ChatGPT Codex /models endpoint for per-slug context windows.
-
-    Codex OAuth imposes its own context limits that differ from the direct
-    OpenAI API (e.g. gpt-5.5 is 1.05M on the API, 272K on Codex). The
-    `context_window` field in each model entry is the authoritative source.
-
-    Returns a ``{slug: context_window}`` dict. Empty on failure.
-    """
-    global _codex_oauth_context_cache, _codex_oauth_context_cache_time
-    now = time.time()
-    if (
-        _codex_oauth_context_cache
-        and now - _codex_oauth_context_cache_time < _CODEX_OAUTH_CONTEXT_CACHE_TTL
-    ):
-        return _codex_oauth_context_cache
-
-    try:
-        resp = requests.get(
-            "https://chatgpt.com/backend-api/codex/models?client_version=1.0.0",
-            headers={"Authorization": f"Bearer {access_token}"},
-            timeout=10,
-            verify=_resolve_requests_verify(),
-        )
-        if resp.status_code != 200:
-            logger.debug(
-                "Codex /models probe returned HTTP %s; falling back to hardcoded defaults",
-                resp.status_code,
-            )
-            return {}
-        data = resp.json()
-    except Exception as exc:
-        logger.debug("Codex /models probe failed: %s", exc)
-        return {}
-
-    entries = data.get("models", []) if isinstance(data, dict) else []
-    result: Dict[str, int] = {}
-    for item in entries:
-        if not isinstance(item, dict):
-            continue
-        slug = item.get("slug")
-        ctx = item.get("context_window")
-        if isinstance(slug, str) and isinstance(ctx, int) and ctx > 0:
-            result[slug.strip()] = ctx
-
-    if result:
-        _codex_oauth_context_cache = result
-        _codex_oauth_context_cache_time = now
-    return result
-
-
-def _resolve_codex_oauth_context_length(
-    model: str, access_token: str = ""
-) -> Optional[int]:
-    """Resolve a Codex OAuth model's real context window.
-
-    Prefers a live probe of chatgpt.com/backend-api/codex/models (when we
-    have a bearer token), then falls back to ``_CODEX_OAUTH_CONTEXT_FALLBACK``.
-    """
-    model_bare = _strip_provider_prefix(model).strip()
-    if not model_bare:
-        return None
-
-    if access_token:
-        live = _fetch_codex_oauth_context_lengths(access_token)
-        if model_bare in live:
-            return live[model_bare]
-        # Case-insensitive match in case casing drifts
-        model_lower = model_bare.lower()
-        for slug, ctx in live.items():
-            if slug.lower() == model_lower:
-                return ctx
-
-    # Fallback: longest-key-first substring match over hardcoded defaults.
-    model_lower = model_bare.lower()
-    for slug, ctx in sorted(
-        _CODEX_OAUTH_CONTEXT_FALLBACK.items(), key=lambda x: len(x[0]), reverse=True
-    ):
-        if slug in model_lower:
-            return ctx
-
-    return None
-
-
 def _resolve_nous_context_length(model: str) -> Optional[int]:
    """Resolve Nous Portal model context length via OpenRouter metadata.

@@ -1206,14 +944,12 @@ def get_model_context_length(
    api_key: str = "",
    config_context_length: int | None = None,
    provider: str = "",
-    custom_providers: list | None = None,
 ) -> int:
    """Get the context length for a model.

    Resolution order:
    0. Explicit config override (model.context_length or custom_providers per-model)
    1. Persistent cache (previously discovered via probing)
-    1b. AWS Bedrock static table (must precede custom-endpoint probe)
    2. Active endpoint metadata (/models for explicit custom endpoints)
    3. Local server query (for local endpoints)
    4. Anthropic /v1/models API (API-key users only, not OAuth)
@@ -1227,23 +963,6 @@ def get_model_context_length(
    if config_context_length is not None and isinstance(config_context_length, int) and config_context_length > 0:
        return config_context_length

-    # 0b. custom_providers per-model override — check before any probe.
-    # This closes the gap where /model switch and display paths used to fall
-    # back to 128K despite the user having a per-model context_length set.
-    # See #15779.
-    if custom_providers and base_url and model:
-        try:
-            from hermes_cli.config import get_custom_provider_context_length
-            cp_ctx = get_custom_provider_context_length(
-                model=model,
-                base_url=base_url,
-                custom_providers=custom_providers,
-            )
-            if cp_ctx:
-                return cp_ctx
-        except Exception:
-            pass  # fall through to probing
-
    # Normalise provider-prefixed model names (e.g. "local:model-name" →
    # "model-name") so cache lookups and server queries use the bare ID that
    # local servers actually know about.  Ollama "model:tag" colons are preserved.
@@ -1253,41 +972,7 @@ def get_model_context_length(
    if base_url:
        cached = get_cached_context_length(model, base_url)
        if cached is not None:
-            # Invalidate stale Codex OAuth cache entries: pre-PR #14935 builds
-            # resolved gpt-5.x to the direct-API value (e.g. 1.05M) via
-            # models.dev and persisted it. Codex OAuth caps at 272K for every
-            # slug, so any cached Codex entry at or above 400K is a leftover
-            # from the old resolution path. Drop it and fall through to the
-            # live /models probe in step 5 below.
-            if provider == "openai-codex" and cached >= 400_000:
-                logger.info(
-                    "Dropping stale Codex cache entry %s@%s -> %s (pre-fix value); "
-                    "re-resolving via live /models probe",
-                    model, base_url, f"{cached:,}",
-                )
-                _invalidate_cached_context_length(model, base_url)
-            else:
-                return cached
-
-    # 1b. AWS Bedrock — use static context length table.
-    # Bedrock's ListFoundationModels API doesn't expose context window sizes,
-    # so we maintain a curated table in bedrock_adapter.py that reflects
-    # AWS-imposed limits (e.g. 200K for Claude models vs 1M on the native
-    # Anthropic API).  This must run BEFORE the custom-endpoint probe at
-    # step 2 — bedrock-runtime.<region>.amazonaws.com is not in
-    # _URL_TO_PROVIDER, so it would otherwise be treated as a custom endpoint,
-    # fail the /models probe (Bedrock doesn't expose that shape), and fall
-    # back to the 128K default before reaching the original step 4b branch.
-    if provider == "bedrock" or (
-        base_url
-        and base_url_hostname(base_url).startswith("bedrock-runtime.")
-        and base_url_host_matches(base_url, "amazonaws.com")
-    ):
-        try:
-            from agent.bedrock_adapter import get_bedrock_context_length
-            return get_bedrock_context_length(model)
-        except ImportError:
-            pass  # boto3 not installed — fall through to generic resolution
+            return cached

    # 2. Active endpoint metadata for truly custom/unknown endpoints.
    # Known providers (Copilot, OpenAI, Anthropic, etc.) skip this — their
@@ -1314,7 +999,7 @@ def get_model_context_length(
        if not _is_known_provider_base_url(base_url):
            # 3. Try querying local server directly
            if is_local_endpoint(base_url):
-                local_ctx = _query_local_context_length(model, base_url, api_key=api_key)
+                local_ctx = _query_local_context_length(model, base_url)
                if local_ctx and local_ctx > 0:
                    save_context_length(model, base_url, local_ctx)
                    return local_ctx
@@ -1328,13 +1013,21 @@ def get_model_context_length(

    # 4. Anthropic /v1/models API (only for regular API keys, not OAuth)
    if provider == "anthropic" or (
-        base_url and base_url_hostname(base_url) == "api.anthropic.com"
+        base_url and "api.anthropic.com" in base_url
    ):
        ctx = _query_anthropic_context_length(model, base_url or "https://api.anthropic.com", api_key)
        if ctx:
            return ctx

-    # 4b. (Bedrock handled earlier at step 1b — before custom-endpoint probe.)
+    # 4b. AWS Bedrock — use static context length table.
+    # Bedrock's ListFoundationModels doesn't expose context window sizes,
+    # so we maintain a curated table in bedrock_adapter.py.
+    if provider == "bedrock" or (base_url and "bedrock-runtime" in base_url):
+        try:
+            from agent.bedrock_adapter import get_bedrock_context_length
+            return get_bedrock_context_length(model)
+        except ImportError:
+            pass  # boto3 not installed — fall through to generic resolution

    # 5. Provider-aware lookups (before generic OpenRouter cache)
    # These are provider-specific and take priority over the generic OR cache,
@@ -1348,32 +1041,10 @@ def get_model_context_length(
            if inferred:
                effective_provider = inferred

-    # 5a. Copilot live /models API — max_prompt_tokens from the user's account.
-    # This catches account-specific models (e.g. claude-opus-4.6-1m) that
-    # don't exist in models.dev. For models that ARE in models.dev, this
-    # returns the provider-enforced limit which is what users can actually use.
-    if effective_provider in ("copilot", "copilot-acp", "github-copilot"):
-        try:
-            from hermes_cli.models import get_copilot_model_context
-            ctx = get_copilot_model_context(model, api_key=api_key)
-            if ctx:
-                return ctx
-        except Exception:
-            pass  # Fall through to models.dev
-
    if effective_provider == "nous":
        ctx = _resolve_nous_context_length(model)
        if ctx:
            return ctx
-    if effective_provider == "openai-codex":
-        # Codex OAuth enforces lower context limits than the direct OpenAI
-        # API for the same slug (e.g. gpt-5.5 is 1.05M on the API but 272K
-        # on Codex). Authoritative source is Codex's own /models endpoint.
-        codex_ctx = _resolve_codex_oauth_context_length(model, access_token=api_key or "")
-        if codex_ctx:
-            if base_url:
-                save_context_length(model, base_url, codex_ctx)
-            return codex_ctx
    if effective_provider:
        from agent.models_dev import lookup_models_dev_context
        ctx = lookup_models_dev_context(effective_provider, model)
@@ -1383,7 +1054,7 @@ def get_model_context_length(
    # 6. OpenRouter live API metadata (provider-unaware fallback)
    metadata = fetch_model_metadata()
    if model in metadata:
-        return metadata[model].get("context_length", DEFAULT_FALLBACK_CONTEXT)
+        return metadata[model].get("context_length", 128000)

    # 8. Hardcoded defaults (fuzzy match — longest key first for specificity)
    # Only check `default_model in model` (is the key a substring of the input).
@@ -1398,7 +1069,7 @@ def get_model_context_length(

    # 9. Query local server as last resort
    if base_url and is_local_endpoint(base_url):
-        local_ctx = _query_local_context_length(model, base_url, api_key=api_key)
+        local_ctx = _query_local_context_length(model, base_url)
        if local_ctx and local_ctx > 0:
            save_context_length(model, base_url, local_ctx)
            return local_ctx
@@ -146,7 +146,6 @@ PROVIDER_TO_MODELS_DEV: Dict[str, str] = {
    "openai-codex": "openai",
    "zai": "zai",
    "kimi-coding": "kimi-for-coding",
-    "stepfun": "stepfun",
    "kimi-coding-cn": "kimi-for-coding",
    "minimax": "minimax",
    "minimax-cn": "minimax-cn",
@@ -418,9 +417,6 @@ def list_provider_models(provider: str) -> List[str]:

    Returns an empty list if the provider is unknown or has no data.
    """
-    from hermes_cli.models import normalize_provider
-    provider = normalize_provider(provider) or provider
-    
    models = _get_provider_models(provider)
    if models is None:
        return []
@@ -1,190 +0,0 @@
-"""Helpers for translating OpenAI-style tool schemas to Moonshot's schema subset.
-
-Moonshot (Kimi) accepts a stricter subset of JSON Schema than standard OpenAI
-tool calling.  Requests that violate it fail with HTTP 400:
-
-    tools.function.parameters is not a valid moonshot flavored json schema,
-    details: <...>
-
-Known rejection modes documented at
-https://forum.moonshot.ai/t/tool-calling-specification-violation-on-moonshot-api/102
-and MoonshotAI/kimi-cli#1595:
-
-1. Every property schema must carry a ``type``.  Standard JSON Schema allows
-   type to be omitted (the value is then unconstrained); Moonshot refuses.
-2. When ``anyOf`` is used, ``type`` must be on the ``anyOf`` children, not
-   the parent.  Presence of both causes "type should be defined in anyOf
-   items instead of the parent schema".
-
-The ``#/definitions/...`` → ``#/$defs/...`` rewrite for draft-07 refs is
-handled separately in ``tools/mcp_tool._normalize_mcp_input_schema`` so it
-applies at MCP registration time for all providers.
-"""
-
-from __future__ import annotations
-
-import copy
-from typing import Any, Dict, List
-
-# Keys whose values are maps of name → schema (not schemas themselves).
-# When we recurse, we walk the values of these maps as schemas, but we do
-# NOT apply the missing-type repair to the map itself.
-_SCHEMA_MAP_KEYS = frozenset({"properties", "patternProperties", "$defs", "definitions"})
-
-# Keys whose values are lists of schemas.
-_SCHEMA_LIST_KEYS = frozenset({"anyOf", "oneOf", "allOf", "prefixItems"})
-
-# Keys whose values are a single nested schema.
-_SCHEMA_NODE_KEYS = frozenset({"items", "contains", "not", "additionalProperties", "propertyNames"})
-
-
-def _repair_schema(node: Any, is_schema: bool = True) -> Any:
-    """Recursively apply Moonshot repairs to a schema node.
-
-    ``is_schema=True`` means this dict is a JSON Schema node and gets the
-    missing-type + anyOf-parent repairs applied.  ``is_schema=False`` means
-    it's a container map (e.g. the value of ``properties``) and we only
-    recurse into its values.
-    """
-    if isinstance(node, list):
-        # Lists only show up under schema-list keys (anyOf/oneOf/allOf), so
-        # every element is itself a schema.
-        return [_repair_schema(item, is_schema=True) for item in node]
-    if not isinstance(node, dict):
-        return node
-
-    # Walk the dict, deciding per-key whether recursion is into a schema
-    # node, a container map, or a scalar.
-    repaired: Dict[str, Any] = {}
-    for key, value in node.items():
-        if key in _SCHEMA_MAP_KEYS and isinstance(value, dict):
-            # Map of name → schema.  Don't treat the map itself as a schema
-            # (it has no type / properties of its own), but each value is.
-            repaired[key] = {
-                sub_key: _repair_schema(sub_val, is_schema=True)
-                for sub_key, sub_val in value.items()
-            }
-        elif key in _SCHEMA_LIST_KEYS and isinstance(value, list):
-            repaired[key] = [_repair_schema(v, is_schema=True) for v in value]
-        elif key in _SCHEMA_NODE_KEYS:
-            # items / not / additionalProperties: single nested schema.
-            # additionalProperties can also be a bool — leave those alone.
-            if isinstance(value, dict):
-                repaired[key] = _repair_schema(value, is_schema=True)
-            else:
-                repaired[key] = value
-        else:
-            # Scalars (description, title, format, enum values, etc.) pass through.
-            repaired[key] = value
-
-    if not is_schema:
-        return repaired
-
-    # Rule 2: when anyOf is present, type belongs only on the children.
-    if "anyOf" in repaired and isinstance(repaired["anyOf"], list):
-        repaired.pop("type", None)
-        return repaired
-
-    # Rule 1: property schemas without type need one.  $ref nodes are exempt
-    # — their type comes from the referenced definition.
-    if "$ref" in repaired:
-        return repaired
-    return _fill_missing_type(repaired)
-
-
-def _fill_missing_type(node: Dict[str, Any]) -> Dict[str, Any]:
-    """Infer a reasonable ``type`` if this schema node has none."""
-    if "type" in node and node["type"] not in (None, ""):
-        return node
-
-    # Heuristic: presence of ``properties`` → object, ``items`` → array, ``enum``
-    # → type of first enum value, else fall back to ``string`` (safest scalar).
-    if "properties" in node or "required" in node or "additionalProperties" in node:
-        inferred = "object"
-    elif "items" in node or "prefixItems" in node:
-        inferred = "array"
-    elif "enum" in node and isinstance(node["enum"], list) and node["enum"]:
-        sample = node["enum"][0]
-        if isinstance(sample, bool):
-            inferred = "boolean"
-        elif isinstance(sample, int):
-            inferred = "integer"
-        elif isinstance(sample, float):
-            inferred = "number"
-        else:
-            inferred = "string"
-    else:
-        inferred = "string"
-
-    return {**node, "type": inferred}
-
-
-def sanitize_moonshot_tool_parameters(parameters: Any) -> Dict[str, Any]:
-    """Normalize tool parameters to a Moonshot-compatible object schema.
-
-    Returns a deep-copied schema with the two flavored-JSON-Schema repairs
-    applied.  Input is not mutated.
-    """
-    if not isinstance(parameters, dict):
-        return {"type": "object", "properties": {}}
-
-    repaired = _repair_schema(copy.deepcopy(parameters), is_schema=True)
-    if not isinstance(repaired, dict):
-        return {"type": "object", "properties": {}}
-
-    # Top-level must be an object schema
-    if repaired.get("type") != "object":
-        repaired["type"] = "object"
-    if "properties" not in repaired:
-        repaired["properties"] = {}
-
-    return repaired
-
-
-def sanitize_moonshot_tools(tools: List[Dict[str, Any]]) -> List[Dict[str, Any]]:
-    """Apply ``sanitize_moonshot_tool_parameters`` to every tool's parameters."""
-    if not tools:
-        return tools
-
-    sanitized: List[Dict[str, Any]] = []
-    any_change = False
-    for tool in tools:
-        if not isinstance(tool, dict):
-            sanitized.append(tool)
-            continue
-        fn = tool.get("function")
-        if not isinstance(fn, dict):
-            sanitized.append(tool)
-            continue
-        params = fn.get("parameters")
-        repaired = sanitize_moonshot_tool_parameters(params)
-        if repaired is not params:
-            any_change = True
-            new_fn = {**fn, "parameters": repaired}
-            sanitized.append({**tool, "function": new_fn})
-        else:
-            sanitized.append(tool)
-
-    return sanitized if any_change else tools
-
-
-def is_moonshot_model(model: str | None) -> bool:
-    """True for any Kimi / Moonshot model slug, regardless of aggregator prefix.
-
-    Matches bare names (``kimi-k2.6``, ``moonshotai/Kimi-K2.6``) and aggregator-
-    prefixed slugs (``nous/moonshotai/kimi-k2.6``, ``openrouter/moonshotai/...``).
-    Detection by model name covers Nous / OpenRouter / other aggregators that
-    route to Moonshot's inference, where the base URL is the aggregator's, not
-    ``api.moonshot.ai``.
-    """
-    if not model:
-        return False
-    bare = model.strip().lower()
-    # Last path segment (covers aggregator-prefixed slugs)
-    tail = bare.rsplit("/", 1)[-1]
-    if tail.startswith("kimi-") or tail == "kimi":
-        return True
-    # Vendor-prefixed forms commonly used on aggregators
-    if "moonshot" in bare or "/kimi" in bare or bare.startswith("kimi"):
-        return True
-    return False
@@ -180,145 +180,3 @@ def format_remaining(seconds: float) -> str:
    h, remainder = divmod(s, 3600)
    m = remainder // 60
    return f"{h}h {m}m" if m else f"{h}h"
-
-
-# Buckets with reset windows shorter than this are treated as transient
-# (upstream jitter, secondary throttling) rather than a genuine quota
-# exhaustion worth a cross-session breaker trip.
-_MIN_RESET_FOR_BREAKER_SECONDS = 60.0
-
-
-def is_genuine_nous_rate_limit(
-    *,
-    headers: Optional[Mapping[str, str]] = None,
-    last_known_state: Optional[Any] = None,
-) -> bool:
-    """Decide whether a 429 from Nous Portal is a real account rate limit.
-
-    Nous Portal multiplexes multiple upstream providers (DeepSeek, Kimi,
-    MiMo, Hermes, ...) behind one endpoint.  A 429 can mean either:
-
-      (a) The caller's own RPM / RPH / TPM / TPH bucket on Nous is
-          exhausted — a genuine rate limit that will last until the
-          bucket resets.
-      (b) The upstream provider is out of capacity for a specific model
-          — transient, clears in seconds, and has nothing to do with
-          the caller's quota on Nous.
-
-    Tripping the cross-session breaker on (b) blocks ALL Nous requests
-    (and all models, since Nous is one provider key) for minutes even
-    though the caller's account is healthy and a different model would
-    have worked.  That's the bug users hit when DeepSeek V4 Pro 429s
-    trigger a breaker that then blocks Kimi 2.6 and MiMo V2.5 Pro.
-
-    We tell the two apart by looking at:
-
-      1. The 429 response's own ``x-ratelimit-*`` headers.  Nous emits
-         the full suite on every response including 429s.  An exhausted
-         bucket (``remaining == 0`` with a reset window >= 60s) is
-         proof of (a).
-      2. The last-known-good rate-limit state captured by
-         ``_capture_rate_limits()`` on the previous successful
-         response.  If any bucket there was already near-exhausted with
-         a substantial reset window, the current 429 is almost
-         certainly (a) continuing from that condition.
-
-    If neither signal fires, we treat the 429 as (b): fail the single
-    request, let the retry loop or model-switch proceed, and do NOT
-    write the cross-session breaker file.
-
-    Returns True when the evidence points at (a).
-    """
-    # Signal 1: current 429 response headers.
-    state = _parse_buckets_from_headers(headers)
-    if _has_exhausted_bucket(state):
-        return True
-
-    # Signal 2: last-known-good state from a recent successful response.
-    # Accepts either a RateLimitState (dataclass from rate_limit_tracker)
-    # or a dict of bucket snapshots.
-    if last_known_state is not None and _has_exhausted_bucket_in_object(last_known_state):
-        return True
-
-    return False
-
-
-def _parse_buckets_from_headers(
-    headers: Optional[Mapping[str, str]],
-) -> dict[str, tuple[Optional[int], Optional[float]]]:
-    """Extract (remaining, reset_seconds) per bucket from x-ratelimit-* headers.
-
-    Returns empty dict when no rate-limit headers are present.
-    """
-    if not headers:
-        return {}
-
-    lowered = {k.lower(): v for k, v in headers.items()}
-    if not any(k.startswith("x-ratelimit-") for k in lowered):
-        return {}
-
-    def _maybe_int(raw: Optional[str]) -> Optional[int]:
-        if raw is None:
-            return None
-        try:
-            return int(float(raw))
-        except (TypeError, ValueError):
-            return None
-
-    def _maybe_float(raw: Optional[str]) -> Optional[float]:
-        if raw is None:
-            return None
-        try:
-            return float(raw)
-        except (TypeError, ValueError):
-            return None
-
-    result: dict[str, tuple[Optional[int], Optional[float]]] = {}
-    for tag in ("requests", "requests-1h", "tokens", "tokens-1h"):
-        remaining = _maybe_int(lowered.get(f"x-ratelimit-remaining-{tag}"))
-        reset = _maybe_float(lowered.get(f"x-ratelimit-reset-{tag}"))
-        if remaining is not None or reset is not None:
-            result[tag] = (remaining, reset)
-    return result
-
-
-def _has_exhausted_bucket(
-    buckets: Mapping[str, tuple[Optional[int], Optional[float]]],
-) -> bool:
-    """Return True when any bucket has remaining == 0 AND a meaningful reset window."""
-    for remaining, reset in buckets.values():
-        if remaining is None or remaining > 0:
-            continue
-        if reset is None:
-            continue
-        if reset >= _MIN_RESET_FOR_BREAKER_SECONDS:
-            return True
-    return False
-
-
-def _has_exhausted_bucket_in_object(state: Any) -> bool:
-    """Check a RateLimitState-like object for an exhausted bucket.
-
-    Accepts the dataclass from ``agent.rate_limit_tracker`` (buckets
-    exposed as attributes ``requests_min``, ``requests_hour``,
-    ``tokens_min``, ``tokens_hour``) and falls back gracefully for any
-    object missing those attributes.
-    """
-    for attr in ("requests_min", "requests_hour", "tokens_min", "tokens_hour"):
-        bucket = getattr(state, attr, None)
-        if bucket is None:
-            continue
-        limit = getattr(bucket, "limit", 0) or 0
-        remaining = getattr(bucket, "remaining", 0) or 0
-        # Prefer the adjusted "remaining_seconds_now" property when present;
-        # fall back to raw reset_seconds.
-        reset = getattr(bucket, "remaining_seconds_now", None)
-        if reset is None:
-            reset = getattr(bucket, "reset_seconds", 0.0) or 0.0
-        if limit <= 0:
-            continue
-        if remaining > 0:
-            continue
-        if reset >= _MIN_RESET_FOR_BREAKER_SECONDS:
-            return True
-    return False
@@ -1,191 +0,0 @@
-"""
-Contextual first-touch onboarding hints.
-
-Instead of blocking first-run questionnaires, show a one-time hint the *first*
-time a user hits a behavior fork — message-while-running, first long-running
-tool, etc.  Each hint is shown once per install (tracked in ``config.yaml`` under
-``onboarding.seen.<flag>``) and then never again.
-
-Keep this module tiny and dependency-free so both the CLI and gateway can import
-it without pulling in heavy modules.
-"""
-
-from __future__ import annotations
-
-import logging
-from pathlib import Path
-from typing import Any, Mapping, Optional
-
-logger = logging.getLogger(__name__)
-
-
-# -------------------------------------------------------------------------
-# Flag names (stable — used as config.yaml keys under onboarding.seen)
-# -------------------------------------------------------------------------
-
-BUSY_INPUT_FLAG = "busy_input_prompt"
-TOOL_PROGRESS_FLAG = "tool_progress_prompt"
-OPENCLAW_RESIDUE_FLAG = "openclaw_residue_cleanup"
-
-
-# -------------------------------------------------------------------------
-# Hint content
-# -------------------------------------------------------------------------
-
-def busy_input_hint_gateway(mode: str) -> str:
-    """Hint shown the first time a user messages while the agent is busy.
-
-    ``mode`` is the effective busy_input_mode that was just applied, so the
-    message matches reality ("I just interrupted…" vs "I just queued…").
-    """
-    if mode == "queue":
-        return (
-            "💡 First-time tip — I queued your message instead of interrupting. "
-            "Send `/busy interrupt` to make new messages stop the current task "
-            "immediately, or `/busy status` to check. This notice won't appear again."
-        )
-    if mode == "steer":
-        return (
-            "💡 First-time tip — I steered your message into the current run; "
-            "it will arrive after the next tool call instead of interrupting. "
-            "Send `/busy interrupt` or `/busy queue` to change this, or "
-            "`/busy status` to check. This notice won't appear again."
-        )
-    return (
-        "💡 First-time tip — I just interrupted my current task to answer you. "
-        "Send `/busy queue` to queue follow-ups for after the current task instead, "
-        "`/busy steer` to inject them mid-run without interrupting, or "
-        "`/busy status` to check. This notice won't appear again."
-    )
-
-
-def busy_input_hint_cli(mode: str) -> str:
-    """CLI version of the busy-input hint (plain text, no markdown)."""
-    if mode == "queue":
-        return (
-            "(tip) Your message was queued for the next turn. "
-            "Use /busy interrupt to make Enter stop the current run instead, "
-            "or /busy steer to inject mid-run. This tip only shows once."
-        )
-    if mode == "steer":
-        return (
-            "(tip) Your message was steered into the current run; it arrives "
-            "after the next tool call. Use /busy interrupt or /busy queue to "
-            "change this. This tip only shows once."
-        )
-    return (
-        "(tip) Your message interrupted the current run. "
-        "Use /busy queue to queue messages for the next turn instead, "
-        "or /busy steer to inject mid-run. This tip only shows once."
-    )
-
-
-def tool_progress_hint_gateway() -> str:
-    return (
-        "💡 First-time tip — that tool took a while and I'm streaming every step. "
-        "If the progress messages feel noisy, send `/verbose` to cycle modes "
-        "(all → new → off). This notice won't appear again."
-    )
-
-
-def tool_progress_hint_cli() -> str:
-    return (
-        "(tip) That tool ran for a while. Use /verbose to cycle tool-progress "
-        "display modes (all -> new -> off -> verbose). This tip only shows once."
-    )
-
-
-def openclaw_residue_hint_cli() -> str:
-    """Banner shown the first time Hermes starts and finds ``~/.openclaw/``.
-
-    OpenClaw-era config, memory, and skill paths in ``~/.openclaw/`` will
-    otherwise attract the agent (memory entries like ``~/.openclaw/config.yaml``
-    get carried forward and the agent dutifully reads them). ``hermes claw
-    cleanup`` renames the directory so the agent stops finding it.
-    """
-    return (
-        "Heads up — an OpenClaw workspace was detected at ~/.openclaw/.\n"
-        "After migrating, the agent can still get confused and read that "
-        "directory's config/memory instead of Hermes's.\n"
-        "Run `hermes claw cleanup` to archive it (rename → .openclaw.pre-migration). "
-        "This tip only shows once; rerun it any time with `hermes claw cleanup`."
-    )
-
-
-def detect_openclaw_residue(home: Optional[Path] = None) -> bool:
-    """Return True if an OpenClaw workspace directory is present in ``$HOME``.
-
-    Pure filesystem check — no side effects. ``home`` override exists for tests.
-    """
-    base = home or Path.home()
-    try:
-        return (base / ".openclaw").is_dir()
-    except OSError:
-        return False
-
-
-# -------------------------------------------------------------------------
-# State read / write
-# -------------------------------------------------------------------------
-
-def _get_seen_dict(config: Mapping[str, Any]) -> Mapping[str, Any]:
-    onboarding = config.get("onboarding") if isinstance(config, Mapping) else None
-    if not isinstance(onboarding, Mapping):
-        return {}
-    seen = onboarding.get("seen")
-    return seen if isinstance(seen, Mapping) else {}
-
-
-def is_seen(config: Mapping[str, Any], flag: str) -> bool:
-    """Return True if the user has already been shown this first-touch hint."""
-    return bool(_get_seen_dict(config).get(flag))
-
-
-def mark_seen(config_path: Path, flag: str) -> bool:
-    """Persist ``onboarding.seen.<flag> = True`` to ``config_path``.
-
-    Uses the atomic YAML writer so a concurrent process can't observe a
-    partially-written file.  Returns True on success, False on any error
-    (including the config file being absent — onboarding is best-effort).
-    """
-    try:
-        import yaml
-        from utils import atomic_yaml_write
-    except Exception as e:  # pragma: no cover — dependency issue
-        logger.debug("onboarding: failed to import yaml/utils: %s", e)
-        return False
-
-    try:
-        cfg: dict = {}
-        if config_path.exists():
-            with open(config_path, encoding="utf-8") as f:
-                cfg = yaml.safe_load(f) or {}
-        if not isinstance(cfg.get("onboarding"), dict):
-            cfg["onboarding"] = {}
-        seen = cfg["onboarding"].get("seen")
-        if not isinstance(seen, dict):
-            seen = {}
-            cfg["onboarding"]["seen"] = seen
-        if seen.get(flag) is True:
-            return True  # already marked — nothing to do
-        seen[flag] = True
-        atomic_yaml_write(config_path, cfg)
-        return True
-    except Exception as e:
-        logger.debug("onboarding: failed to mark flag %s: %s", flag, e)
-        return False
-
-
-__all__ = [
-    "BUSY_INPUT_FLAG",
-    "TOOL_PROGRESS_FLAG",
-    "OPENCLAW_RESIDUE_FLAG",
-    "busy_input_hint_gateway",
-    "busy_input_hint_cli",
-    "tool_progress_hint_gateway",
-    "tool_progress_hint_cli",
-    "openclaw_residue_hint_cli",
-    "detect_openclaw_residue",
-    "is_seen",
-    "mark_seen",
-]
@@ -350,13 +350,7 @@ PLATFORM_HINTS = {
    ),
    "cli": (
        "You are a CLI AI Agent. Try not to use markdown but simple text "
-        "renderable inside a terminal. "
-        "File delivery: there is no attachment channel — the user reads your "
-        "response directly in their terminal. Do NOT emit MEDIA:/path tags "
-        "(those are only intercepted on messaging platforms like Telegram, "
-        "Discord, Slack, etc.; on the CLI they render as literal text). "
-        "When referring to a file you created or changed, just state its "
-        "absolute path in plain text; the user can open it from there."
+        "renderable inside a terminal."
    ),
    "sms": (
        "You are communicating via SMS. Keep responses concise and use plain text "
@@ -370,32 +364,6 @@ PLATFORM_HINTS = {
        "MEDIA:/absolute/path/to/file in your response. Images (.jpg, .png, "
        ".heic) appear as photos and other files arrive as attachments."
    ),
-    "mattermost": (
-        "You are in a Mattermost workspace communicating with your user. "
-        "Mattermost renders standard Markdown — headings, bold, italic, code "
-        "blocks, and tables all work. "
-        "You can send media files natively: include MEDIA:/absolute/path/to/file "
-        "in your response. Images (.jpg, .png, .webp) are uploaded as photo "
-        "attachments, audio and video as file attachments. "
-        "Image URLs in markdown format ![alt](url) are rendered as inline previews automatically."
-    ),
-    "matrix": (
-        "You are in a Matrix room communicating with your user. "
-        "Matrix renders Markdown — bold, italic, code blocks, and links work; "
-        "the adapter converts your Markdown to HTML for rich display. "
-        "You can send media files natively: include MEDIA:/absolute/path/to/file "
-        "in your response. Images (.jpg, .png, .webp) are sent as inline photos, "
-        "audio (.ogg, .mp3) as voice/audio messages, video (.mp4) inline, "
-        "and other files as downloadable attachments."
-    ),
-    "feishu": (
-        "You are in a Feishu (Lark) workspace communicating with your user. "
-        "Feishu renders Markdown in messages — bold, italic, code blocks, and "
-        "links are supported. "
-        "You can send media files natively: include MEDIA:/absolute/path/to/file "
-        "in your response. Images (.jpg, .png, .webp) are uploaded and displayed "
-        "inline, audio files as voice messages, and other files as attachments."
-    ),
    "weixin": (
        "You are on Weixin/WeChat. Markdown formatting is supported, so you may use it when "
        "it improves readability, but keep the message compact and chat-friendly. You can send media files natively: "
@@ -422,29 +390,6 @@ PLATFORM_HINTS = {
        "your response. Images are sent as native photos, and other files arrive as downloadable "
        "documents."
    ),
-    "yuanbao": (
-        "You are on Yuanbao (腾讯元宝), a Chinese AI assistant platform. "
-        "Markdown formatting is supported (code blocks, tables, bold/italic). "
-        "You CAN send media files natively — to deliver a file to the user, include "
-        "MEDIA:/absolute/path/to/file in your response. The file will be sent as a native "
-        "Yuanbao attachment: images (.jpg, .png, .webp, .gif) are sent as photos, "
-        "and other files (.pdf, .docx, .txt, .zip, etc.) arrive as downloadable documents "
-        "(max 50 MB). You can also include image URLs in markdown format ![alt](url) and "
-        "they will be downloaded and sent as native photos. "
-        "Do NOT tell the user you lack file-sending capability — use MEDIA: syntax "
-        "whenever a file delivery is appropriate.\n\n"
-        "Stickers (贴纸 / 表情包 / TIM face): Yuanbao has a built-in sticker catalogue. "
-        "When the user sends a sticker (you see '[emoji: 名称]' in their message) or asks "
-        "you to send/reply-with a 贴纸/表情/表情包, you MUST use the sticker tools:\n"
-        "  1. Call yb_search_sticker with a Chinese keyword (e.g. '666', '比心', '吃瓜', "
-        "     '捂脸', '合十') to discover matching sticker_ids.\n"
-        "  2. Call yb_send_sticker with the chosen sticker_id or name — this sends a real "
-        "     TIMFaceElem that renders as a native sticker in the chat.\n"
-        "DO NOT draw sticker-like PNGs with execute_code/Pillow/matplotlib and then send "
-        "them via MEDIA: or send_image_file. That produces a fake low-quality 'sticker' "
-        "image and is the WRONG path. Bare Unicode emoji in text is also not a substitute "
-        "— when a sticker is the right response, use yb_send_sticker."
-    ),
 }

 # ---------------------------------------------------------------------------
@@ -848,11 +793,6 @@ def build_skills_system_prompt(
            "Skills also encode the user's preferred approach, conventions, and quality standards "
            "for tasks like code review, planning, and testing — load them even for tasks you "
            "already know how to do, because the skill defines how it should be done here.\n"
-            "Whenever the user asks you to configure, set up, install, enable, disable, modify, "
-            "or troubleshoot Hermes Agent itself — its CLI, config, models, providers, tools, "
-            "skills, voice, gateway, plugins, or any feature — load the `hermes-agent` skill "
-            "first. It has the actual commands (e.g. `hermes config set …`, `hermes tools`, "
-            "`hermes setup`) so you don't have to guess or invent workarounds.\n"
            "If a skill has issues, fix it with skill_manage(action='patch').\n"
            "After difficult/iterative tasks, offer to save as a skill. "
            "If a skill you loaded was missing steps, had wrong commands, or needed "
@@ -13,48 +13,6 @@ import re

 logger = logging.getLogger(__name__)

-# Sensitive query-string parameter names (case-insensitive exact match).
-# Ported from nearai/ironclaw#2529 — catches tokens whose values don't match
-# any known vendor prefix regex (e.g. opaque tokens, short OAuth codes).
-_SENSITIVE_QUERY_PARAMS = frozenset({
-    "access_token",
-    "refresh_token",
-    "id_token",
-    "token",
-    "api_key",
-    "apikey",
-    "client_secret",
-    "password",
-    "auth",
-    "jwt",
-    "session",
-    "secret",
-    "key",
-    "code",           # OAuth authorization codes
-    "signature",      # pre-signed URL signatures
-    "x-amz-signature",
-})
-
-# Sensitive form-urlencoded / JSON body key names (case-insensitive exact match).
-# Exact match, NOT substring — "token_count" and "session_id" must NOT match.
-# Ported from nearai/ironclaw#2529.
-_SENSITIVE_BODY_KEYS = frozenset({
-    "access_token",
-    "refresh_token",
-    "id_token",
-    "token",
-    "api_key",
-    "apikey",
-    "client_secret",
-    "password",
-    "auth",
-    "jwt",
-    "secret",
-    "private_key",
-    "authorization",
-    "key",
-})
-
 # Snapshot at import time so runtime env mutations (e.g. LLM-generated
 # `export HERMES_REDACT_SECRETS=false`) cannot disable redaction mid-session.
 _REDACT_ENABLED = os.getenv("HERMES_REDACT_SECRETS", "").lower() not in ("0", "false", "no", "off")
@@ -150,30 +108,6 @@ _DISCORD_MENTION_RE = re.compile(r"<@!?(\d{17,20})>")
 # Negative lookahead prevents matching hex strings or identifiers
 _SIGNAL_PHONE_RE = re.compile(r"(\+[1-9]\d{6,14})(?![A-Za-z0-9])")

-# URLs containing query strings — matches `scheme://...?...[# or end]`.
-# Used to scan text for URLs whose query params may contain secrets.
-# Ported from nearai/ironclaw#2529.
-_URL_WITH_QUERY_RE = re.compile(
-    r"(https?|wss?|ftp)://"          # scheme
-    r"([^\s/?#]+)"                    # authority (may include userinfo)
-    r"([^\s?#]*)"                     # path
-    r"\?([^\s#]+)"                    # query (required)
-    r"(#\S*)?",                       # optional fragment
-)
-
-# URLs containing userinfo — `scheme://user:password@host` for ANY scheme
-# (not just DB protocols already covered by _DB_CONNSTR_RE above).
-# Catches things like `https://user:token@api.example.com/v1/foo`.
-_URL_USERINFO_RE = re.compile(
-    r"(https?|wss?|ftp)://([^/\s:@]+):([^/\s@]+)@",
-)
-
-# Form-urlencoded body detection: conservative — only applies when the entire
-# text looks like a query string (k=v&k=v pattern with no newlines).
-_FORM_BODY_RE = re.compile(
-    r"^[A-Za-z_][A-Za-z0-9_.-]*=[^&\s]*(?:&[A-Za-z_][A-Za-z0-9_.-]*=[^&\s]*)+$"
-)
-
 # Compile known prefix patterns into one alternation
 _PREFIX_RE = re.compile(
    r"(?<![A-Za-z0-9_-])(" + "|".join(_PREFIX_PATTERNS) + r")(?![A-Za-z0-9_-])"
@@ -187,72 +121,6 @@ def _mask_token(token: str) -> str:
    return f"{token[:6]}...{token[-4:]}"


-def _redact_query_string(query: str) -> str:
-    """Redact sensitive parameter values in a URL query string.
-
-    Handles `k=v&k=v` format. Sensitive keys (case-insensitive) have values
-    replaced with `***`. Non-sensitive keys pass through unchanged.
-    Empty or malformed pairs are preserved as-is.
-    """
-    if not query:
-        return query
-    parts = []
-    for pair in query.split("&"):
-        if "=" not in pair:
-            parts.append(pair)
-            continue
-        key, _, value = pair.partition("=")
-        if key.lower() in _SENSITIVE_QUERY_PARAMS:
-            parts.append(f"{key}=***")
-        else:
-            parts.append(pair)
-    return "&".join(parts)
-
-
-def _redact_url_query_params(text: str) -> str:
-    """Scan text for URLs with query strings and redact sensitive params.
-
-    Catches opaque tokens that don't match vendor prefix regexes, e.g.
-    `https://example.com/cb?code=ABC123&state=xyz` → `...?code=***&state=xyz`.
-    """
-    def _sub(m: re.Match) -> str:
-        scheme = m.group(1)
-        authority = m.group(2)
-        path = m.group(3)
-        query = _redact_query_string(m.group(4))
-        fragment = m.group(5) or ""
-        return f"{scheme}://{authority}{path}?{query}{fragment}"
-    return _URL_WITH_QUERY_RE.sub(_sub, text)
-
-
-def _redact_url_userinfo(text: str) -> str:
-    """Strip `user:password@` from HTTP/WS/FTP URLs.
-
-    DB protocols (postgres, mysql, mongodb, redis, amqp) are handled
-    separately by `_DB_CONNSTR_RE`.
-    """
-    return _URL_USERINFO_RE.sub(
-        lambda m: f"{m.group(1)}://{m.group(2)}:***@",
-        text,
-    )
-
-
-def _redact_form_body(text: str) -> str:
-    """Redact sensitive values in a form-urlencoded body.
-
-    Only applies when the entire input looks like a pure form body
-    (k=v&k=v with no newlines, no other text). Single-line non-form
-    text passes through unchanged. This is a conservative pass — the
-    `_redact_url_query_params` function handles embedded query strings.
-    """
-    if not text or "\n" in text or "&" not in text:
-        return text
-    # The body-body form check is strict: only trigger on clean k=v&k=v.
-    if not _FORM_BODY_RE.match(text.strip()):
-        return text
-    return _redact_query_string(text.strip())
-
-
 def redact_sensitive_text(text: str) -> str:
    """Apply all redaction patterns to a block of text.

@@ -305,16 +173,6 @@ def redact_sensitive_text(text: str) -> str:
    # JWT tokens (eyJ... — base64-encoded JSON headers)
    text = _JWT_RE.sub(lambda m: _mask_token(m.group(0)), text)

-    # URL userinfo (http(s)://user:pass@host) — redact for non-DB schemes.
-    # DB schemes are handled above by _DB_CONNSTR_RE.
-    text = _redact_url_userinfo(text)
-
-    # URL query params containing opaque tokens (?access_token=…&code=…)
-    text = _redact_url_query_params(text)
-
-    # Form-urlencoded bodies (only triggers on clean k=v&k=v inputs).
-    text = _redact_form_body(text)
-
    # Discord user/role mentions (<@snowflake_id>)
    text = _DISCORD_MENTION_RE.sub(lambda m: f"<@{'!' if '!' in m.group(0) else ''}***>", text)

@@ -1,835 +0,0 @@
-"""
-Shell-script hooks bridge.
-
-Reads the ``hooks:`` block from ``cli-config.yaml``, prompts the user for
-consent on first use of each ``(event, command)`` pair, and registers
-callbacks on the existing plugin hook manager so every existing
-``invoke_hook()`` site dispatches to the configured shell scripts — with
-zero changes to call sites.
-
-Design notes
------------
-* Python plugins and shell hooks compose naturally: both flow through
-  :func:`hermes_cli.plugins.invoke_hook` and its aggregators.  Python
-  plugins are registered first (via ``discover_and_load()``) so their
-  block decisions win ties over shell-hook blocks.
-* Subprocess execution uses ``shlex.split(os.path.expanduser(command))``
-  with ``shell=False`` — no shell injection footguns.  Users that need
-  pipes/redirection wrap their logic in a script.
-* First-use consent is gated by the allowlist under
-  ``~/.hermes/shell-hooks-allowlist.json``.  Non-TTY callers must pass
-  ``accept_hooks=True`` (resolved from ``--accept-hooks``,
-  ``HERMES_ACCEPT_HOOKS``, or ``hooks_auto_accept: true`` in config)
-  for registration to succeed without a prompt.
-* Registration is idempotent — safe to invoke from both the CLI entry
-  point (``hermes_cli/main.py``) and the gateway entry point
-  (``gateway/run.py``).
-
-Wire protocol
-------------
-**stdin** (JSON, piped to the script)::
-
-    {
-        "hook_event_name": "pre_tool_call",
-        "tool_name":       "terminal",
-        "tool_input":      {"command": "rm -rf /"},
-        "session_id":      "sess_abc123",
-        "cwd":             "/home/user/project",
-        "extra":           {...}   # event-specific kwargs
-    }
-
-**stdout** (JSON, optional — anything else is ignored)::
-
-    # Block a pre_tool_call (either shape accepted; normalised internally):
-    {"decision": "block", "reason":  "Forbidden command"}   # Claude-Code-style
-    {"action":   "block", "message": "Forbidden command"}   # Hermes-canonical
-
-    # Inject context for pre_llm_call:
-    {"context": "Today is Friday"}
-
-    # Silent no-op:
-    <empty or any non-matching JSON object>
-"""
-
-from __future__ import annotations
-
-import difflib
-import json
-import logging
-import os
-import re
-import shlex
-import subprocess
-import sys
-import tempfile
-import threading
-import time
-from contextlib import contextmanager
-from dataclasses import dataclass, field
-from datetime import datetime, timezone
-from pathlib import Path
-from typing import Any, Callable, Dict, Iterator, List, Optional, Set, Tuple
-
-try:
-    import fcntl  # POSIX only; Windows falls back to best-effort without flock.
-except ImportError:  # pragma: no cover
-    fcntl = None  # type: ignore[assignment]
-
-from hermes_constants import get_hermes_home
-
-logger = logging.getLogger(__name__)
-
-DEFAULT_TIMEOUT_SECONDS = 60
-MAX_TIMEOUT_SECONDS = 300
-ALLOWLIST_FILENAME = "shell-hooks-allowlist.json"
-
-# (event, matcher, command) triples that have been wired to the plugin
-# manager in the current process.  Matcher is part of the key because
-# the same script can legitimately register for different matchers under
-# the same event (e.g. one entry per tool the user wants to gate).
-# Second registration attempts for the exact same triple become no-ops
-# so the CLI and gateway can both call register_from_config() safely.
-_registered: Set[Tuple[str, Optional[str], str]] = set()
-_registered_lock = threading.Lock()
-
-# Intra-process lock for allowlist read-modify-write on platforms that
-# lack ``fcntl`` (non-POSIX).  Kept separate from ``_registered_lock``
-# because ``register_from_config`` already holds ``_registered_lock`` when
-# it triggers ``_record_approval`` — reusing it here would self-deadlock
-# (``threading.Lock`` is non-reentrant).  POSIX callers use the sibling
-# ``.lock`` file via ``fcntl.flock`` and bypass this.
-_allowlist_write_lock = threading.Lock()
-
-
-@dataclass
-class ShellHookSpec:
-    """Parsed and validated representation of a single ``hooks:`` entry."""
-
-    event: str
-    command: str
-    matcher: Optional[str] = None
-    timeout: int = DEFAULT_TIMEOUT_SECONDS
-    compiled_matcher: Optional[re.Pattern] = field(default=None, repr=False)
-
-    def __post_init__(self) -> None:
-        # Strip whitespace introduced by YAML quirks (e.g. multi-line string
-        # folding) — a matcher of " terminal" would otherwise silently fail
-        # to match "terminal" without any diagnostic.
-        if isinstance(self.matcher, str):
-            stripped = self.matcher.strip()
-            self.matcher = stripped if stripped else None
-        if self.matcher:
-            try:
-                self.compiled_matcher = re.compile(self.matcher)
-            except re.error as exc:
-                logger.warning(
-                    "shell hook matcher %r is invalid (%s) — treating as "
-                    "literal equality", self.matcher, exc,
-                )
-                self.compiled_matcher = None
-
-    def matches_tool(self, tool_name: Optional[str]) -> bool:
-        if not self.matcher:
-            return True
-        if tool_name is None:
-            return False
-        if self.compiled_matcher is not None:
-            return self.compiled_matcher.fullmatch(tool_name) is not None
-        # compiled_matcher is None only when the regex failed to compile,
-        # in which case we already warned and fall back to literal equality.
-        return tool_name == self.matcher
-
-
-# ---------------------------------------------------------------------------
-# Public API
-# ---------------------------------------------------------------------------
-
-def register_from_config(
-    cfg: Optional[Dict[str, Any]],
-    *,
-    accept_hooks: bool = False,
-) -> List[ShellHookSpec]:
-    """Register every configured shell hook on the plugin manager.
-
-    ``cfg`` is the full parsed config dict (``hermes_cli.config.load_config``
-    output).  The ``hooks:`` key is read out of it.  Missing, empty, or
-    non-dict ``hooks`` is treated as zero configured hooks.
-
-    ``accept_hooks=True`` skips the TTY consent prompt — the caller is
-    promising that the user has opted in via a flag, env var, or config
-    setting.  ``HERMES_ACCEPT_HOOKS=1`` and ``hooks_auto_accept: true`` are
-    also honored inside this function so either CLI or gateway call sites
-    pick them up.
-
-    Returns the list of :class:`ShellHookSpec` entries that ended up wired
-    up on the plugin manager.  Skipped entries (unknown events, malformed,
-    not allowlisted, already registered) are logged but not returned.
-    """
-    if not isinstance(cfg, dict):
-        return []
-
-    effective_accept = _resolve_effective_accept(cfg, accept_hooks)
-
-    specs = _parse_hooks_block(cfg.get("hooks"))
-    if not specs:
-        return []
-
-    registered: List[ShellHookSpec] = []
-
-    # Import lazily — avoids circular imports at module-load time.
-    from hermes_cli.plugins import get_plugin_manager
-
-    manager = get_plugin_manager()
-
-    # Idempotence + allowlist read happen under the lock; the TTY
-    # prompt runs outside so other threads aren't parked on a blocking
-    # input().  Mutation re-takes the lock with a defensive idempotence
-    # re-check in case two callers ever race through the prompt.
-    for spec in specs:
-        key = (spec.event, spec.matcher, spec.command)
-        with _registered_lock:
-            if key in _registered:
-                continue
-            already_allowlisted = _is_allowlisted(spec.event, spec.command)
-
-        if not already_allowlisted:
-            if not _prompt_and_record(
-                spec.event, spec.command, accept_hooks=effective_accept,
-            ):
-                logger.warning(
-                    "shell hook for %s (%s) not allowlisted — skipped. "
-                    "Use --accept-hooks / HERMES_ACCEPT_HOOKS=1 / "
-                    "hooks_auto_accept: true, or approve at the TTY "
-                    "prompt next run.",
-                    spec.event, spec.command,
-                )
-                continue
-
-        with _registered_lock:
-            if key in _registered:
-                continue
-            manager._hooks.setdefault(spec.event, []).append(_make_callback(spec))
-            _registered.add(key)
-            registered.append(spec)
-            logger.info(
-                "shell hook registered: %s -> %s (matcher=%s, timeout=%ds)",
-                spec.event, spec.command, spec.matcher, spec.timeout,
-            )
-
-    return registered
-
-
-def iter_configured_hooks(cfg: Optional[Dict[str, Any]]) -> List[ShellHookSpec]:
-    """Return the parsed ``ShellHookSpec`` entries from config without
-    registering anything.  Used by ``hermes hooks list`` and ``doctor``."""
-    if not isinstance(cfg, dict):
-        return []
-    return _parse_hooks_block(cfg.get("hooks"))
-
-
-def reset_for_tests() -> None:
-    """Clear the idempotence set.  Test-only helper."""
-    with _registered_lock:
-        _registered.clear()
-
-
-# ---------------------------------------------------------------------------
-# Config parsing
-# ---------------------------------------------------------------------------
-
-def _parse_hooks_block(hooks_cfg: Any) -> List[ShellHookSpec]:
-    """Normalise the ``hooks:`` dict into a flat list of ``ShellHookSpec``.
-
-    Malformed entries warn-and-skip — we never raise from config parsing
-    because a broken hook must not crash the agent.
-    """
-    from hermes_cli.plugins import VALID_HOOKS
-
-    if not isinstance(hooks_cfg, dict):
-        return []
-
-    specs: List[ShellHookSpec] = []
-
-    for event_name, entries in hooks_cfg.items():
-        if event_name not in VALID_HOOKS:
-            suggestion = difflib.get_close_matches(
-                str(event_name), VALID_HOOKS, n=1, cutoff=0.6,
-            )
-            if suggestion:
-                logger.warning(
-                    "unknown hook event %r in hooks: config — did you mean %r?",
-                    event_name, suggestion[0],
-                )
-            else:
-                logger.warning(
-                    "unknown hook event %r in hooks: config (valid: %s)",
-                    event_name, ", ".join(sorted(VALID_HOOKS)),
-                )
-            continue
-
-        if entries is None:
-            continue
-
-        if not isinstance(entries, list):
-            logger.warning(
-                "hooks.%s must be a list of hook definitions; got %s",
-                event_name, type(entries).__name__,
-            )
-            continue
-
-        for i, raw in enumerate(entries):
-            spec = _parse_single_entry(event_name, i, raw)
-            if spec is not None:
-                specs.append(spec)
-
-    return specs
-
-
-def _parse_single_entry(
-    event: str, index: int, raw: Any,
-) -> Optional[ShellHookSpec]:
-    if not isinstance(raw, dict):
-        logger.warning(
-            "hooks.%s[%d] must be a mapping with a 'command' key; got %s",
-            event, index, type(raw).__name__,
-        )
-        return None
-
-    command = raw.get("command")
-    if not isinstance(command, str) or not command.strip():
-        logger.warning(
-            "hooks.%s[%d] is missing a non-empty 'command' field",
-            event, index,
-        )
-        return None
-
-    matcher = raw.get("matcher")
-    if matcher is not None and not isinstance(matcher, str):
-        logger.warning(
-            "hooks.%s[%d].matcher must be a string regex; ignoring",
-            event, index,
-        )
-        matcher = None
-
-    if matcher is not None and event not in ("pre_tool_call", "post_tool_call"):
-        logger.warning(
-            "hooks.%s[%d].matcher=%r will be ignored at runtime — the "
-            "matcher field is only honored for pre_tool_call / "
-            "post_tool_call.  The hook will fire on every %s event.",
-            event, index, matcher, event,
-        )
-        matcher = None
-
-    timeout_raw = raw.get("timeout", DEFAULT_TIMEOUT_SECONDS)
-    try:
-        timeout = int(timeout_raw)
-    except (TypeError, ValueError):
-        logger.warning(
-            "hooks.%s[%d].timeout must be an int (got %r); using default %ds",
-            event, index, timeout_raw, DEFAULT_TIMEOUT_SECONDS,
-        )
-        timeout = DEFAULT_TIMEOUT_SECONDS
-
-    if timeout < 1:
-        logger.warning(
-            "hooks.%s[%d].timeout must be >=1; using default %ds",
-            event, index, DEFAULT_TIMEOUT_SECONDS,
-        )
-        timeout = DEFAULT_TIMEOUT_SECONDS
-
-    if timeout > MAX_TIMEOUT_SECONDS:
-        logger.warning(
-            "hooks.%s[%d].timeout=%ds exceeds max %ds; clamping",
-            event, index, timeout, MAX_TIMEOUT_SECONDS,
-        )
-        timeout = MAX_TIMEOUT_SECONDS
-
-    return ShellHookSpec(
-        event=event,
-        command=command.strip(),
-        matcher=matcher,
-        timeout=timeout,
-    )
-
-
-# ---------------------------------------------------------------------------
-# Subprocess callback
-# ---------------------------------------------------------------------------
-
-_TOP_LEVEL_PAYLOAD_KEYS = {"tool_name", "args", "session_id", "parent_session_id"}
-
-
-def _spawn(spec: ShellHookSpec, stdin_json: str) -> Dict[str, Any]:
-    """Run ``spec.command`` as a subprocess with ``stdin_json`` on stdin.
-
-    Returns a diagnostic dict with the same keys for every outcome
-    (``returncode``, ``stdout``, ``stderr``, ``timed_out``,
-    ``elapsed_seconds``, ``error``).  This is the single place the
-    subprocess is actually invoked — both the live callback path
-    (:func:`_make_callback`) and the CLI test helper (:func:`run_once`)
-    go through it.
-    """
-    result: Dict[str, Any] = {
-        "returncode": None,
-        "stdout": "",
-        "stderr": "",
-        "timed_out": False,
-        "elapsed_seconds": 0.0,
-        "error": None,
-    }
-    try:
-        argv = shlex.split(os.path.expanduser(spec.command))
-    except ValueError as exc:
-        result["error"] = f"command {spec.command!r} cannot be parsed: {exc}"
-        return result
-    if not argv:
-        result["error"] = "empty command"
-        return result
-
-    t0 = time.monotonic()
-    try:
-        proc = subprocess.run(
-            argv,
-            input=stdin_json,
-            capture_output=True,
-            timeout=spec.timeout,
-            text=True,
-            shell=False,
-        )
-    except subprocess.TimeoutExpired:
-        result["timed_out"] = True
-        result["elapsed_seconds"] = round(time.monotonic() - t0, 3)
-        return result
-    except FileNotFoundError:
-        result["error"] = "command not found"
-        return result
-    except PermissionError:
-        result["error"] = "command not executable"
-        return result
-    except Exception as exc:  # pragma: no cover — defensive
-        result["error"] = str(exc)
-        return result
-
-    result["returncode"] = proc.returncode
-    result["stdout"] = proc.stdout or ""
-    result["stderr"] = proc.stderr or ""
-    result["elapsed_seconds"] = round(time.monotonic() - t0, 3)
-    return result
-
-
-def _make_callback(spec: ShellHookSpec) -> Callable[..., Optional[Dict[str, Any]]]:
-    """Build the closure that ``invoke_hook()`` will call per firing."""
-
-    def _callback(**kwargs: Any) -> Optional[Dict[str, Any]]:
-        # Matcher gate — only meaningful for tool-scoped events.
-        if spec.event in ("pre_tool_call", "post_tool_call"):
-            if not spec.matches_tool(kwargs.get("tool_name")):
-                return None
-
-        r = _spawn(spec, _serialize_payload(spec.event, kwargs))
-
-        if r["error"]:
-            logger.warning(
-                "shell hook failed (event=%s command=%s): %s",
-                spec.event, spec.command, r["error"],
-            )
-            return None
-        if r["timed_out"]:
-            logger.warning(
-                "shell hook timed out after %.2fs (event=%s command=%s)",
-                r["elapsed_seconds"], spec.event, spec.command,
-            )
-            return None
-
-        stderr = r["stderr"].strip()
-        if stderr:
-            logger.debug(
-                "shell hook stderr (event=%s command=%s): %s",
-                spec.event, spec.command, stderr[:400],
-            )
-        # Non-zero exits: log but still parse stdout so scripts that
-        # signal failure via exit code can also return a block directive.
-        if r["returncode"] != 0:
-            logger.warning(
-                "shell hook exited %d (event=%s command=%s); stderr=%s",
-                r["returncode"], spec.event, spec.command, stderr[:400],
-            )
-        return _parse_response(spec.event, r["stdout"])
-
-    _callback.__name__ = f"shell_hook[{spec.event}:{spec.command}]"
-    _callback.__qualname__ = _callback.__name__
-    return _callback
-
-
-def _serialize_payload(event: str, kwargs: Dict[str, Any]) -> str:
-    """Render the stdin JSON payload.  Unserialisable values are
-    stringified via ``default=str`` rather than dropped."""
-    extras = {k: v for k, v in kwargs.items() if k not in _TOP_LEVEL_PAYLOAD_KEYS}
-    try:
-        cwd = str(Path.cwd())
-    except OSError:
-        cwd = ""
-    payload = {
-        "hook_event_name": event,
-        "tool_name": kwargs.get("tool_name"),
-        "tool_input": kwargs.get("args") if isinstance(kwargs.get("args"), dict) else None,
-        "session_id": kwargs.get("session_id") or kwargs.get("parent_session_id") or "",
-        "cwd": cwd,
-        "extra": extras,
-    }
-    return json.dumps(payload, ensure_ascii=False, default=str)
-
-
-def _parse_response(event: str, stdout: str) -> Optional[Dict[str, Any]]:
-    """Translate stdout JSON into a Hermes wire-shape dict.
-
-    For ``pre_tool_call`` the Claude-Code-style ``{"decision": "block",
-    "reason": "..."}`` payload is translated into the canonical Hermes
-    ``{"action": "block", "message": "..."}`` shape expected by
-    :func:`hermes_cli.plugins.get_pre_tool_call_block_message`.  This is
-    the single most important correctness invariant in this module —
-    skipping the translation silently breaks every ``pre_tool_call``
-    block directive.
-
-    For ``pre_llm_call``, ``{"context": "..."}`` is passed through
-    unchanged to match the existing plugin-hook contract.
-
-    Anything else returns ``None``.
-    """
-    stdout = (stdout or "").strip()
-    if not stdout:
-        return None
-
-    try:
-        data = json.loads(stdout)
-    except json.JSONDecodeError:
-        logger.warning(
-            "shell hook stdout was not valid JSON (event=%s): %s",
-            event, stdout[:200],
-        )
-        return None
-
-    if not isinstance(data, dict):
-        return None
-
-    if event == "pre_tool_call":
-        if data.get("action") == "block":
-            message = data.get("message") or data.get("reason") or ""
-            if isinstance(message, str) and message:
-                return {"action": "block", "message": message}
-        if data.get("decision") == "block":
-            message = data.get("reason") or data.get("message") or ""
-            if isinstance(message, str) and message:
-                return {"action": "block", "message": message}
-        return None
-
-    context = data.get("context")
-    if isinstance(context, str) and context.strip():
-        return {"context": context}
-
-    return None
-
-
-# ---------------------------------------------------------------------------
-# Allowlist / consent
-# ---------------------------------------------------------------------------
-
-def allowlist_path() -> Path:
-    """Path to the per-user shell-hook allowlist file."""
-    return get_hermes_home() / ALLOWLIST_FILENAME
-
-
-def load_allowlist() -> Dict[str, Any]:
-    """Return the parsed allowlist, or an empty skeleton if absent."""
-    try:
-        raw = json.loads(allowlist_path().read_text())
-    except (FileNotFoundError, json.JSONDecodeError, OSError):
-        return {"approvals": []}
-    if not isinstance(raw, dict):
-        return {"approvals": []}
-    approvals = raw.get("approvals")
-    if not isinstance(approvals, list):
-        raw["approvals"] = []
-    return raw
-
-
-def save_allowlist(data: Dict[str, Any]) -> None:
-    """Atomically persist the allowlist via per-process ``mkstemp`` +
-    ``os.replace``.  Cross-process read-modify-write races are handled
-    by :func:`_locked_update_approvals` (``fcntl.flock``).  On OSError
-    the failure is logged; the in-process hook still registers but
-    the approval won't survive across runs."""
-    p = allowlist_path()
-    try:
-        p.parent.mkdir(parents=True, exist_ok=True)
-        fd, tmp_path = tempfile.mkstemp(
-            prefix=f"{p.name}.", suffix=".tmp", dir=str(p.parent),
-        )
-        try:
-            with os.fdopen(fd, "w") as fh:
-                fh.write(json.dumps(data, indent=2, sort_keys=True))
-            os.replace(tmp_path, p)
-        except Exception:
-            try:
-                os.unlink(tmp_path)
-            except OSError:
-                pass
-            raise
-    except OSError as exc:
-        logger.warning(
-            "Failed to persist shell hook allowlist to %s: %s. "
-            "The approval is in-memory for this run, but the next "
-            "startup will re-prompt (or skip registration on non-TTY "
-            "runs without --accept-hooks / HERMES_ACCEPT_HOOKS).",
-            p, exc,
-        )
-
-
-def _is_allowlisted(event: str, command: str) -> bool:
-    data = load_allowlist()
-    return any(
-        isinstance(e, dict)
-        and e.get("event") == event
-        and e.get("command") == command
-        for e in data.get("approvals", [])
-    )
-
-
-@contextmanager
-def _locked_update_approvals() -> Iterator[Dict[str, Any]]:
-    """Serialise read-modify-write on the allowlist across processes.
-
-    Holds an exclusive ``flock`` on a sibling lock file for the duration
-    of the update so concurrent ``_record_approval``/``revoke`` callers
-    cannot clobber each other's changes (the race Codex reproduced with
-    20–50 simultaneous writers).  Falls back to an in-process lock on
-    platforms without ``fcntl``.
-    """
-    p = allowlist_path()
-    p.parent.mkdir(parents=True, exist_ok=True)
-    lock_path = p.with_suffix(p.suffix + ".lock")
-
-    if fcntl is None:  # pragma: no cover — non-POSIX fallback
-        with _allowlist_write_lock:
-            data = load_allowlist()
-            yield data
-            save_allowlist(data)
-        return
-
-    with open(lock_path, "a+") as lock_fh:
-        fcntl.flock(lock_fh.fileno(), fcntl.LOCK_EX)
-        try:
-            data = load_allowlist()
-            yield data
-            save_allowlist(data)
-        finally:
-            fcntl.flock(lock_fh.fileno(), fcntl.LOCK_UN)
-
-
-def _prompt_and_record(
-    event: str, command: str, *, accept_hooks: bool,
-) -> bool:
-    """Decide whether to approve an unseen ``(event, command)`` pair.
-    Returns ``True`` iff the approval was granted and recorded.
-    """
-    if accept_hooks:
-        _record_approval(event, command)
-        logger.info(
-            "shell hook auto-approved via --accept-hooks / env / config: "
-            "%s -> %s", event, command,
-        )
-        return True
-
-    if not sys.stdin.isatty():
-        return False
-
-    print(
-        f"\n⚠ Hermes is about to register a shell hook that will run a\n"
-        f"  command on your behalf.\n\n"
-        f"    Event:   {event}\n"
-        f"    Command: {command}\n\n"
-        f"  Commands run with your full user credentials.  Only approve\n"
-        f"  commands you trust."
-    )
-    try:
-        answer = input("Allow this hook to run? [y/N]: ").strip().lower()
-    except (EOFError, KeyboardInterrupt):
-        print()  # keep the terminal tidy after ^C
-        return False
-
-    if answer in ("y", "yes"):
-        _record_approval(event, command)
-        return True
-
-    return False
-
-
-def _record_approval(event: str, command: str) -> None:
-    entry = {
-        "event": event,
-        "command": command,
-        "approved_at": _utc_now_iso(),
-        "script_mtime_at_approval": script_mtime_iso(command),
-    }
-    with _locked_update_approvals() as data:
-        data["approvals"] = [
-            e for e in data.get("approvals", [])
-            if not (
-                isinstance(e, dict)
-                and e.get("event") == event
-                and e.get("command") == command
-            )
-        ] + [entry]
-
-
-def _utc_now_iso() -> str:
-    return datetime.now(tz=timezone.utc).isoformat().replace("+00:00", "Z")
-
-
-def revoke(command: str) -> int:
-    """Remove every allowlist entry matching ``command``.
-
-    Returns the number of entries removed.  Does not unregister any
-    callbacks that are already live on the plugin manager in the current
-    process — restart the CLI / gateway to drop them.
-    """
-    with _locked_update_approvals() as data:
-        before = len(data.get("approvals", []))
-        data["approvals"] = [
-            e for e in data.get("approvals", [])
-            if not (isinstance(e, dict) and e.get("command") == command)
-        ]
-        after = len(data["approvals"])
-    return before - after
-
-
-_SCRIPT_EXTENSIONS: Tuple[str, ...] = (
-    ".sh", ".bash", ".zsh", ".fish",
-    ".py", ".pyw",
-    ".rb", ".pl", ".lua",
-    ".js", ".mjs", ".cjs", ".ts",
-)
-
-
-def _command_script_path(command: str) -> str:
-    """Return the script path from ``command`` for doctor / drift checks.
-
-    Prefers a token ending in a known script extension, then a token
-    containing ``/`` or leading ``~``, then the first token.  Handles
-    ``python3 /path/hook.py``, ``/usr/bin/env bash hook.sh``, and the
-    common bare-path form.
-    """
-    try:
-        parts = shlex.split(command)
-    except ValueError:
-        return command
-    if not parts:
-        return command
-    for part in parts:
-        if part.lower().endswith(_SCRIPT_EXTENSIONS):
-            return part
-    for part in parts:
-        if "/" in part or part.startswith("~"):
-            return part
-    return parts[0]
-
-
-# ---------------------------------------------------------------------------
-# Helpers for accept-hooks resolution
-# ---------------------------------------------------------------------------
-
-def _resolve_effective_accept(
-    cfg: Dict[str, Any], accept_hooks_arg: bool,
-) -> bool:
-    """Combine all three opt-in channels into a single boolean.
-
-    Precedence (any truthy source flips us on):
-      1. ``--accept-hooks`` flag (CLI) / explicit argument
-      2. ``HERMES_ACCEPT_HOOKS`` env var
-      3. ``hooks_auto_accept: true`` in ``cli-config.yaml``
-    """
-    if accept_hooks_arg:
-        return True
-    env = os.environ.get("HERMES_ACCEPT_HOOKS", "").strip().lower()
-    if env in ("1", "true", "yes", "on"):
-        return True
-    cfg_val = cfg.get("hooks_auto_accept", False)
-    if isinstance(cfg_val, bool):
-        return cfg_val
-    if isinstance(cfg_val, str):
-        return cfg_val.strip().lower() in ("1", "true", "yes", "on")
-    return False
-
-
-# ---------------------------------------------------------------------------
-# Introspection (used by `hermes hooks` CLI)
-# ---------------------------------------------------------------------------
-
-def allowlist_entry_for(event: str, command: str) -> Optional[Dict[str, Any]]:
-    """Return the allowlist record for this pair, if any."""
-    for e in load_allowlist().get("approvals", []):
-        if (
-            isinstance(e, dict)
-            and e.get("event") == event
-            and e.get("command") == command
-        ):
-            return e
-    return None
-
-
-def script_mtime_iso(command: str) -> Optional[str]:
-    """ISO-8601 mtime of the resolved script path, or ``None`` if the
-    script is missing."""
-    path = _command_script_path(command)
-    if not path:
-        return None
-    try:
-        expanded = os.path.expanduser(path)
-        return datetime.fromtimestamp(
-            os.path.getmtime(expanded), tz=timezone.utc,
-        ).isoformat().replace("+00:00", "Z")
-    except OSError:
-        return None
-
-
-def script_is_executable(command: str) -> bool:
-    """Return ``True`` iff ``command`` is runnable as configured.
-
-    For a bare invocation (``/path/hook.sh``) the script itself must be
-    executable.  For interpreter-prefixed commands (``python3
-    /path/hook.py``, ``/usr/bin/env bash hook.sh``) the script just has
-    to be readable — the interpreter doesn't care about the ``X_OK``
-    bit.  Mirrors what ``_spawn`` would actually do at runtime."""
-    path = _command_script_path(command)
-    if not path:
-        return False
-    expanded = os.path.expanduser(path)
-    if not os.path.isfile(expanded):
-        return False
-    try:
-        argv = shlex.split(command)
-    except ValueError:
-        return False
-    is_bare_invocation = bool(argv) and argv[0] == path
-    required = os.X_OK if is_bare_invocation else os.R_OK
-    return os.access(expanded, required)
-
-
-def run_once(
-    spec: ShellHookSpec, kwargs: Dict[str, Any],
-) -> Dict[str, Any]:
-    """Fire a single shell-hook invocation with a synthetic payload.
-    Used by ``hermes hooks test`` and ``hermes hooks doctor``.
-
-    ``kwargs`` is the same dict that :func:`hermes_cli.plugins.invoke_hook`
-    would pass at runtime.  It is routed through :func:`_serialize_payload`
-    so the synthetic stdin exactly matches what a real hook firing would
-    produce — otherwise scripts tested via ``hermes hooks test`` could
-    diverge silently from production behaviour.
-
-    Returns the :func:`_spawn` diagnostic dict plus a ``parsed`` field
-    holding the canonical Hermes-wire-shape response."""
-    stdin_json = _serialize_payload(spec.event, kwargs)
-    result = _spawn(spec, stdin_json)
-    result["parsed"] = _parse_response(spec.event, result["stdout"])
-    return result
@@ -1,29 +1,49 @@
-"""Shared slash command helpers for skills.
+"""Shared slash command helpers for skills and built-in prompt-style modes.

 Shared between CLI (cli.py) and gateway (gateway/run.py) so both surfaces
-can invoke skills via /skill-name commands.
+can invoke skills via /skill-name commands and prompt-only built-ins like
+/plan.
 """

 import json
 import logging
 import re
+from datetime import datetime
 from pathlib import Path
 from typing import Any, Dict, Optional

 from hermes_constants import display_hermes_home
-from agent.skill_preprocessing import (
-    expand_inline_shell as _expand_inline_shell,
-    load_skills_config as _load_skills_config,
-    substitute_template_vars as _substitute_template_vars,
-)

 logger = logging.getLogger(__name__)

 _skill_commands: Dict[str, Dict[str, Any]] = {}
+_PLAN_SLUG_RE = re.compile(r"[^a-z0-9]+")
 # Patterns for sanitizing skill names into clean hyphen-separated slugs.
 _SKILL_INVALID_CHARS = re.compile(r"[^a-z0-9-]")
 _SKILL_MULTI_HYPHEN = re.compile(r"-{2,}")

+
+def build_plan_path(
+    user_instruction: str = "",
+    *,
+    now: datetime | None = None,
+) -> Path:
+    """Return the default workspace-relative markdown path for a /plan invocation.
+
+    Relative paths are intentional: file tools are task/backend-aware and resolve
+    them against the active working directory for local, docker, ssh, modal,
+    daytona, and similar terminal backends. That keeps the plan with the active
+    workspace instead of the Hermes host's global home directory.
+    """
+    slug_source = (user_instruction or "").strip().splitlines()[0] if user_instruction else ""
+    slug = _PLAN_SLUG_RE.sub("-", slug_source.lower()).strip("-")
+    if slug:
+        slug = "-".join(part for part in slug.split("-")[:8] if part)[:48].strip("-")
+    slug = slug or "conversation-plan"
+    timestamp = (now or datetime.now()).strftime("%Y-%m-%d_%H%M%S")
+    return Path(".hermes") / "plans" / f"{timestamp}-{slug}.md"
+
+
 def _load_skill_payload(skill_identifier: str, task_id: str | None = None) -> tuple[dict[str, Any], Path | None, str] | None:
    """Load a skill by name/path and return (loaded_payload, skill_dir, display_name)."""
    raw_identifier = (skill_identifier or "").strip()
@@ -42,9 +62,7 @@ def _load_skill_payload(skill_identifier: str, task_id: str | None = None) -> tu
        else:
            normalized = raw_identifier.lstrip("/")

-        loaded_skill = json.loads(
-            skill_view(normalized, task_id=task_id, preprocess=False)
-        )
+        loaded_skill = json.loads(skill_view(normalized, task_id=task_id))
    except Exception:
        return None

@@ -115,36 +133,14 @@ def _build_skill_message(
    activation_note: str,
    user_instruction: str = "",
    runtime_note: str = "",
-    session_id: str | None = None,
 ) -> str:
    """Format a loaded skill into a user/system message payload."""
    from tools.skills_tool import SKILLS_DIR

    content = str(loaded_skill.get("content") or "")

-    # ── Template substitution and inline-shell expansion ──
-    # Done before anything else so downstream blocks (setup notes,
-    # supporting-file hints) see the expanded content.
-    skills_cfg = _load_skills_config()
-    if skills_cfg.get("template_vars", True):
-        content = _substitute_template_vars(content, skill_dir, session_id)
-    if skills_cfg.get("inline_shell", False):
-        timeout = int(skills_cfg.get("inline_shell_timeout", 10) or 10)
-        content = _expand_inline_shell(content, skill_dir, timeout)
-
    parts = [activation_note, "", content.strip()]

-    # ── Inject the absolute skill directory so the agent can reference
-    #    bundled scripts without an extra skill_view() round-trip. ──
-    if skill_dir:
-        parts.append("")
-        parts.append(f"[Skill directory: {skill_dir}]")
-        parts.append(
-            "Resolve any relative paths in this skill (e.g. `scripts/foo.js`, "
-            "`templates/config.yaml`) against that directory, then run them "
-            "with the terminal tool using the absolute path."
-        )
-
    # ── Inject resolved skill config values ──
    _inject_skill_config(loaded_skill, parts)

@@ -192,13 +188,11 @@ def _build_skill_message(
            # Skill is from an external dir — use the skill name instead
            skill_view_target = skill_dir.name
        parts.append("")
-        parts.append("[This skill has supporting files:]")
+        parts.append("[This skill has supporting files you can load with the skill_view tool:]")
        for sf in supporting:
-            parts.append(f"- {sf}  ->  {skill_dir / sf}")
+            parts.append(f"- {sf}")
        parts.append(
-            f'\nLoad any of these with skill_view(name="{skill_view_target}", '
-            f'file_path="<path>"), or run scripts directly by absolute path '
-            f"(e.g. `node {skill_dir}/scripts/foo.js`)."
+            f'\nTo view any of these, use: skill_view(name="{skill_view_target}", file_path="<path>")'
        )

    if user_instruction:
@@ -222,7 +216,7 @@ def scan_skill_commands() -> Dict[str, Dict[str, Any]]:
    _skill_commands = {}
    try:
        from tools.skills_tool import SKILLS_DIR, _parse_frontmatter, skill_matches_platform, _get_disabled_skill_names
-        from agent.skill_utils import get_external_skills_dirs, iter_skill_index_files
+        from agent.skill_utils import get_external_skills_dirs
        disabled = _get_disabled_skill_names()
        seen_names: set = set()

@@ -233,7 +227,7 @@ def scan_skill_commands() -> Dict[str, Dict[str, Any]]:
        dirs_to_scan.extend(get_external_skills_dirs())

        for scan_dir in dirs_to_scan:
-            for skill_md in iter_skill_index_files(scan_dir, "SKILL.md"):
+            for skill_md in scan_dir.rglob("SKILL.md"):
                if any(part in ('.git', '.github', '.hub') for part in skill_md.parts):
                    continue
                try:
@@ -329,7 +323,7 @@ def build_skill_invocation_message(

    loaded_skill, skill_dir, skill_name = loaded
    activation_note = (
-        f'[IMPORTANT: The user has invoked the "{skill_name}" skill, indicating they want '
+        f'[SYSTEM: The user has invoked the "{skill_name}" skill, indicating they want '
        "you to follow its instructions. The full skill content is loaded below.]"
    )
    return _build_skill_message(
@@ -338,7 +332,6 @@ def build_skill_invocation_message(
        activation_note,
        user_instruction=user_instruction,
        runtime_note=runtime_note,
-        session_id=task_id,
    )


@@ -368,7 +361,7 @@ def build_preloaded_skills_prompt(

        loaded_skill, skill_dir, skill_name = loaded
        activation_note = (
-            f'[IMPORTANT: The user launched this CLI session with the "{skill_name}" skill '
+            f'[SYSTEM: The user launched this CLI session with the "{skill_name}" skill '
            "preloaded. Treat its instructions as active guidance for the duration of this "
            "session unless the user overrides them.]"
        )
@@ -377,7 +370,6 @@ def build_preloaded_skills_prompt(
                loaded_skill,
                skill_dir,
                activation_note,
-                session_id=task_id,
            )
        )
        loaded_names.append(skill_name)
@@ -1,131 +0,0 @@
-"""Shared SKILL.md preprocessing helpers."""
-
-import logging
-import re
-import subprocess
-from pathlib import Path
-
-logger = logging.getLogger(__name__)
-
-# Matches ${HERMES_SKILL_DIR} / ${HERMES_SESSION_ID} tokens in SKILL.md.
-# Tokens that don't resolve (e.g. ${HERMES_SESSION_ID} with no session) are
-# left as-is so the user can debug them.
-_SKILL_TEMPLATE_RE = re.compile(r"\$\{(HERMES_SKILL_DIR|HERMES_SESSION_ID)\}")
-
-# Matches inline shell snippets like:  !`date +%Y-%m-%d`
-# Non-greedy, single-line only -- no newlines inside the backticks.
-_INLINE_SHELL_RE = re.compile(r"!`([^`\n]+)`")
-
-# Cap inline-shell output so a runaway command can't blow out the context.
-_INLINE_SHELL_MAX_OUTPUT = 4000
-
-
-def load_skills_config() -> dict:
-    """Load the ``skills`` section of config.yaml (best-effort)."""
-    try:
-        from hermes_cli.config import load_config
-
-        cfg = load_config() or {}
-        skills_cfg = cfg.get("skills")
-        if isinstance(skills_cfg, dict):
-            return skills_cfg
-    except Exception:
-        logger.debug("Could not read skills config", exc_info=True)
-    return {}
-
-
-def substitute_template_vars(
-    content: str,
-    skill_dir: Path | None,
-    session_id: str | None,
-) -> str:
-    """Replace ${HERMES_SKILL_DIR} / ${HERMES_SESSION_ID} in skill content.
-
-    Only substitutes tokens for which a concrete value is available --
-    unresolved tokens are left in place so the author can spot them.
-    """
-    if not content:
-        return content
-
-    skill_dir_str = str(skill_dir) if skill_dir else None
-
-    def _replace(match: re.Match) -> str:
-        token = match.group(1)
-        if token == "HERMES_SKILL_DIR" and skill_dir_str:
-            return skill_dir_str
-        if token == "HERMES_SESSION_ID" and session_id:
-            return str(session_id)
-        return match.group(0)
-
-    return _SKILL_TEMPLATE_RE.sub(_replace, content)
-
-
-def run_inline_shell(command: str, cwd: Path | None, timeout: int) -> str:
-    """Execute a single inline-shell snippet and return its stdout (trimmed).
-
-    Failures return a short ``[inline-shell error: ...]`` marker instead of
-    raising, so one bad snippet can't wreck the whole skill message.
-    """
-    try:
-        completed = subprocess.run(
-            ["bash", "-c", command],
-            cwd=str(cwd) if cwd else None,
-            capture_output=True,
-            text=True,
-            timeout=max(1, int(timeout)),
-            check=False,
-        )
-    except subprocess.TimeoutExpired:
-        return f"[inline-shell timeout after {timeout}s: {command}]"
-    except FileNotFoundError:
-        return "[inline-shell error: bash not found]"
-    except Exception as exc:
-        return f"[inline-shell error: {exc}]"
-
-    output = (completed.stdout or "").rstrip("\n")
-    if not output and completed.stderr:
-        output = completed.stderr.rstrip("\n")
-    if len(output) > _INLINE_SHELL_MAX_OUTPUT:
-        output = output[:_INLINE_SHELL_MAX_OUTPUT] + "...[truncated]"
-    return output
-
-
-def expand_inline_shell(
-    content: str,
-    skill_dir: Path | None,
-    timeout: int,
-) -> str:
-    """Replace every !`cmd` snippet in ``content`` with its stdout.
-
-    Runs each snippet with the skill directory as CWD so relative paths in
-    the snippet work the way the author expects.
-    """
-    if "!`" not in content:
-        return content
-
-    def _replace(match: re.Match) -> str:
-        cmd = match.group(1).strip()
-        if not cmd:
-            return ""
-        return run_inline_shell(cmd, skill_dir, timeout)
-
-    return _INLINE_SHELL_RE.sub(_replace, content)
-
-
-def preprocess_skill_content(
-    content: str,
-    skill_dir: Path | None,
-    session_id: str | None = None,
-    skills_cfg: dict | None = None,
-) -> str:
-    """Apply configured SKILL.md template and inline-shell preprocessing."""
-    if not content:
-        return content
-
-    cfg = skills_cfg if isinstance(skills_cfg, dict) else load_skills_config()
-    if cfg.get("template_vars", True):
-        content = substitute_template_vars(content, skill_dir, session_id)
-    if cfg.get("inline_shell", False):
-        timeout = int(cfg.get("inline_shell_timeout", 10) or 10)
-        content = expand_inline_shell(content, skill_dir, timeout)
-    return content
@@ -435,7 +435,7 @@ def iter_skill_index_files(skills_dir: Path, filename: str):
    Excludes ``.git``, ``.github``, ``.hub`` directories.
    """
    matches = []
-    for root, dirs, files in os.walk(skills_dir, followlinks=True):
+    for root, dirs, files in os.walk(skills_dir):
        dirs[:] = [d for d in dirs if d not in EXCLUDED_SKILL_DIRS]
        if filename in files:
            matches.append(Path(root) / filename)
@@ -38,7 +38,7 @@ def generate_title(user_message: str, assistant_response: str, timeout: float =
        response = call_llm(
            task="title_generation",
            messages=messages,
-            max_tokens=500,
+            max_tokens=30,
            temperature=0.3,
            timeout=timeout,
        )
@@ -1,56 +0,0 @@
-"""Transport layer types and registry for provider response normalization.
-
-Usage:
-    from agent.transports import get_transport
-    transport = get_transport("anthropic_messages")
-    result = transport.normalize_response(raw_response)
-"""
-
-from agent.transports.types import NormalizedResponse, ToolCall, Usage, build_tool_call, map_finish_reason  # noqa: F401
-
-_REGISTRY: dict = {}
-
-
-def register_transport(api_mode: str, transport_cls: type) -> None:
-    """Register a transport class for an api_mode string."""
-    _REGISTRY[api_mode] = transport_cls
-
-
-def get_transport(api_mode: str):
-    """Get a transport instance for the given api_mode.
-
-    Returns None if no transport is registered for this api_mode.
-    This allows gradual migration — call sites can check for None
-    and fall back to the legacy code path.
-    """
-    cls = _REGISTRY.get(api_mode)
-    if cls is None:
-        # The registry can be partially populated when a specific transport
-        # module was imported directly (for example chat_completions before
-        # codex).  Discover on misses, not only when the registry is empty, so
-        # test/order-dependent imports do not make valid api_modes unavailable.
-        _discover_transports()
-        cls = _REGISTRY.get(api_mode)
-    if cls is None:
-        return None
-    return cls()
-
-
-def _discover_transports() -> None:
-    """Import all transport modules to trigger auto-registration."""
-    try:
-        import agent.transports.anthropic  # noqa: F401
-    except ImportError:
-        pass
-    try:
-        import agent.transports.codex  # noqa: F401
-    except ImportError:
-        pass
-    try:
-        import agent.transports.chat_completions  # noqa: F401
-    except ImportError:
-        pass
-    try:
-        import agent.transports.bedrock  # noqa: F401
-    except ImportError:
-        pass
@@ -1,177 +0,0 @@
-"""Anthropic Messages API transport.
-
-Delegates to the existing adapter functions in agent/anthropic_adapter.py.
-This transport owns format conversion and normalization — NOT client lifecycle.
-"""
-
-from typing import Any, Dict, List, Optional
-
-from agent.transports.base import ProviderTransport
-from agent.transports.types import NormalizedResponse
-
-
-class AnthropicTransport(ProviderTransport):
-    """Transport for api_mode='anthropic_messages'.
-
-    Wraps the existing functions in anthropic_adapter.py behind the
-    ProviderTransport ABC.  Each method delegates — no logic is duplicated.
-    """
-
-    @property
-    def api_mode(self) -> str:
-        return "anthropic_messages"
-
-    def convert_messages(self, messages: List[Dict[str, Any]], **kwargs) -> Any:
-        """Convert OpenAI messages to Anthropic (system, messages) tuple.
-
-        kwargs:
-            base_url: Optional[str] — affects thinking signature handling.
-        """
-        from agent.anthropic_adapter import convert_messages_to_anthropic
-
-        base_url = kwargs.get("base_url")
-        return convert_messages_to_anthropic(messages, base_url=base_url)
-
-    def convert_tools(self, tools: List[Dict[str, Any]]) -> Any:
-        """Convert OpenAI tool schemas to Anthropic input_schema format."""
-        from agent.anthropic_adapter import convert_tools_to_anthropic
-
-        return convert_tools_to_anthropic(tools)
-
-    def build_kwargs(
-        self,
-        model: str,
-        messages: List[Dict[str, Any]],
-        tools: Optional[List[Dict[str, Any]]] = None,
-        **params,
-    ) -> Dict[str, Any]:
-        """Build Anthropic messages.create() kwargs.
-
-        Calls convert_messages and convert_tools internally.
-
-        params (all optional):
-            max_tokens: int
-            reasoning_config: dict | None
-            tool_choice: str | None
-            is_oauth: bool
-            preserve_dots: bool
-            context_length: int | None
-            base_url: str | None
-            fast_mode: bool
-        """
-        from agent.anthropic_adapter import build_anthropic_kwargs
-
-        return build_anthropic_kwargs(
-            model=model,
-            messages=messages,
-            tools=tools,
-            max_tokens=params.get("max_tokens", 16384),
-            reasoning_config=params.get("reasoning_config"),
-            tool_choice=params.get("tool_choice"),
-            is_oauth=params.get("is_oauth", False),
-            preserve_dots=params.get("preserve_dots", False),
-            context_length=params.get("context_length"),
-            base_url=params.get("base_url"),
-            fast_mode=params.get("fast_mode", False),
-        )
-
-    def normalize_response(self, response: Any, **kwargs) -> NormalizedResponse:
-        """Normalize Anthropic response to NormalizedResponse.
-
-        Parses content blocks (text, thinking, tool_use), maps stop_reason
-        to OpenAI finish_reason, and collects reasoning_details in provider_data.
-        """
-        import json
-        from agent.anthropic_adapter import _to_plain_data
-        from agent.transports.types import ToolCall
-
-        strip_tool_prefix = kwargs.get("strip_tool_prefix", False)
-        _MCP_PREFIX = "mcp_"
-
-        text_parts = []
-        reasoning_parts = []
-        reasoning_details = []
-        tool_calls = []
-
-        for block in response.content:
-            if block.type == "text":
-                text_parts.append(block.text)
-            elif block.type == "thinking":
-                reasoning_parts.append(block.thinking)
-                block_dict = _to_plain_data(block)
-                if isinstance(block_dict, dict):
-                    reasoning_details.append(block_dict)
-            elif block.type == "tool_use":
-                name = block.name
-                if strip_tool_prefix and name.startswith(_MCP_PREFIX):
-                    name = name[len(_MCP_PREFIX):]
-                tool_calls.append(
-                    ToolCall(
-                        id=block.id,
-                        name=name,
-                        arguments=json.dumps(block.input),
-                    )
-                )
-
-        finish_reason = self._STOP_REASON_MAP.get(response.stop_reason, "stop")
-
-        provider_data = {}
-        if reasoning_details:
-            provider_data["reasoning_details"] = reasoning_details
-
-        return NormalizedResponse(
-            content="\n".join(text_parts) if text_parts else None,
-            tool_calls=tool_calls or None,
-            finish_reason=finish_reason,
-            reasoning="\n\n".join(reasoning_parts) if reasoning_parts else None,
-            usage=None,
-            provider_data=provider_data or None,
-        )
-
-    def validate_response(self, response: Any) -> bool:
-        """Check Anthropic response structure is valid.
-
-        An empty content list is legitimate when ``stop_reason == "end_turn"``
-        — the model's canonical way of signalling "nothing more to add" after
-        a tool turn that already delivered the user-facing text. Treating it
-        as invalid falsely retries a completed response.
-        """
-        if response is None:
-            return False
-        content_blocks = getattr(response, "content", None)
-        if not isinstance(content_blocks, list):
-            return False
-        if not content_blocks:
-            return getattr(response, "stop_reason", None) == "end_turn"
-        return True
-
-    def extract_cache_stats(self, response: Any) -> Optional[Dict[str, int]]:
-        """Extract Anthropic cache_read and cache_creation token counts."""
-        usage = getattr(response, "usage", None)
-        if usage is None:
-            return None
-        cached = getattr(usage, "cache_read_input_tokens", 0) or 0
-        written = getattr(usage, "cache_creation_input_tokens", 0) or 0
-        if cached or written:
-            return {"cached_tokens": cached, "creation_tokens": written}
-        return None
-
-    # Promote the adapter's canonical mapping to module level so it's shared
-    _STOP_REASON_MAP = {
-        "end_turn": "stop",
-        "tool_use": "tool_calls",
-        "max_tokens": "length",
-        "stop_sequence": "stop",
-        "refusal": "content_filter",
-        "model_context_window_exceeded": "length",
-    }
-
-    def map_finish_reason(self, raw_reason: str) -> str:
-        """Map Anthropic stop_reason to OpenAI finish_reason."""
-        return self._STOP_REASON_MAP.get(raw_reason, "stop")
-
-
-# Auto-register on import
-from agent.transports import register_transport  # noqa: E402
-
-register_transport("anthropic_messages", AnthropicTransport)
@@ -1,89 +0,0 @@
-"""Abstract base for provider transports.
-
-A transport owns the data path for one api_mode:
-  convert_messages → convert_tools → build_kwargs → normalize_response
-
-It does NOT own: client construction, streaming, credential refresh,
-prompt caching, interrupt handling, or retry logic.  Those stay on AIAgent.
-"""
-
-from abc import ABC, abstractmethod
-from typing import Any, Dict, List, Optional
-
-from agent.transports.types import NormalizedResponse
-
-
-class ProviderTransport(ABC):
-    """Base class for provider-specific format conversion and normalization."""
-
-    @property
-    @abstractmethod
-    def api_mode(self) -> str:
-        """The api_mode string this transport handles (e.g. 'anthropic_messages')."""
-        ...
-
-    @abstractmethod
-    def convert_messages(self, messages: List[Dict[str, Any]], **kwargs) -> Any:
-        """Convert OpenAI-format messages to provider-native format.
-
-        Returns provider-specific structure (e.g. (system, messages) for Anthropic,
-        or the messages list unchanged for chat_completions).
-        """
-        ...
-
-    @abstractmethod
-    def convert_tools(self, tools: List[Dict[str, Any]]) -> Any:
-        """Convert OpenAI-format tool definitions to provider-native format.
-
-        Returns provider-specific tool list (e.g. Anthropic input_schema format).
-        """
-        ...
-
-    @abstractmethod
-    def build_kwargs(
-        self,
-        model: str,
-        messages: List[Dict[str, Any]],
-        tools: Optional[List[Dict[str, Any]]] = None,
-        **params,
-    ) -> Dict[str, Any]:
-        """Build the complete API call kwargs dict.
-
-        This is the primary entry point — it typically calls convert_messages()
-        and convert_tools() internally, then adds model-specific config.
-
-        Returns a dict ready to be passed to the provider's SDK client.
-        """
-        ...
-
-    @abstractmethod
-    def normalize_response(self, response: Any, **kwargs) -> NormalizedResponse:
-        """Normalize a raw provider response to the shared NormalizedResponse type.
-
-        This is the only method that returns a transport-layer type.
-        """
-        ...
-
-    def validate_response(self, response: Any) -> bool:
-        """Optional: check if the raw response is structurally valid.
-
-        Returns True if valid, False if the response should be treated as invalid.
-        Default implementation always returns True.
-        """
-        return True
-
-    def extract_cache_stats(self, response: Any) -> Optional[Dict[str, int]]:
-        """Optional: extract provider-specific cache hit/creation stats.
-
-        Returns dict with 'cached_tokens' and 'creation_tokens', or None.
-        Default returns None.
-        """
-        return None
-
-    def map_finish_reason(self, raw_reason: str) -> str:
-        """Optional: map provider-specific stop reason to OpenAI equivalent.
-
-        Default returns the raw reason unchanged.  Override for providers
-        with different stop reason vocabularies.
-        """
-        return raw_reason
@@ -1,154 +0,0 @@
-"""AWS Bedrock Converse API transport.
-
-Delegates to the existing adapter functions in agent/bedrock_adapter.py.
-Bedrock uses its own boto3 client (not the OpenAI SDK), so the transport
-owns format conversion and normalization, while client construction and
-boto3 calls stay on AIAgent.
-"""
-
-from typing import Any, Dict, List, Optional
-
-from agent.transports.base import ProviderTransport
-from agent.transports.types import NormalizedResponse, ToolCall, Usage
-
-
-class BedrockTransport(ProviderTransport):
-    """Transport for api_mode='bedrock_converse'."""
-
-    @property
-    def api_mode(self) -> str:
-        return "bedrock_converse"
-
-    def convert_messages(self, messages: List[Dict[str, Any]], **kwargs) -> Any:
-        """Convert OpenAI messages to Bedrock Converse format."""
-        from agent.bedrock_adapter import convert_messages_to_converse
-        return convert_messages_to_converse(messages)
-
-    def convert_tools(self, tools: List[Dict[str, Any]]) -> Any:
-        """Convert OpenAI tool schemas to Bedrock Converse toolConfig."""
-        from agent.bedrock_adapter import convert_tools_to_converse
-        return convert_tools_to_converse(tools)
-
-    def build_kwargs(
-        self,
-        model: str,
-        messages: List[Dict[str, Any]],
-        tools: Optional[List[Dict[str, Any]]] = None,
-        **params,
-    ) -> Dict[str, Any]:
-        """Build Bedrock converse() kwargs.
-
-        Calls convert_messages and convert_tools internally.
-
-        params:
-            max_tokens: int — output token limit (default 4096)
-            temperature: float | None
-            guardrail_config: dict | None — Bedrock guardrails
-            region: str — AWS region (default 'us-east-1')
-        """
-        from agent.bedrock_adapter import build_converse_kwargs
-
-        region = params.get("region", "us-east-1")
-        guardrail = params.get("guardrail_config")
-
-        kwargs = build_converse_kwargs(
-            model=model,
-            messages=messages,
-            tools=tools,
-            max_tokens=params.get("max_tokens", 4096),
-            temperature=params.get("temperature"),
-            guardrail_config=guardrail,
-        )
-        # Sentinel keys for dispatch — agent pops these before the boto3 call
-        kwargs["__bedrock_converse__"] = True
-        kwargs["__bedrock_region__"] = region
-        return kwargs
-
-    def normalize_response(self, response: Any, **kwargs) -> NormalizedResponse:
-        """Normalize Bedrock response to NormalizedResponse.
-
-        Handles two shapes:
-        1. Raw boto3 dict (from direct converse() calls)
-        2. Already-normalized SimpleNamespace with .choices (from dispatch site)
-        """
-        from agent.bedrock_adapter import normalize_converse_response
-
-        # Normalize to OpenAI-compatible SimpleNamespace
-        if hasattr(response, "choices") and response.choices:
-            # Already normalized at dispatch site
-            ns = response
-        else:
-            # Raw boto3 dict
-            ns = normalize_converse_response(response)
-
-        choice = ns.choices[0]
-        msg = choice.message
-        finish_reason = choice.finish_reason or "stop"
-
-        tool_calls = None
-        if msg.tool_calls:
-            tool_calls = [
-                ToolCall(
-                    id=tc.id,
-                    name=tc.function.name,
-                    arguments=tc.function.arguments,
-                )
-                for tc in msg.tool_calls
-            ]
-
-        usage = None
-        if hasattr(ns, "usage") and ns.usage:
-            u = ns.usage
-            usage = Usage(
-                prompt_tokens=getattr(u, "prompt_tokens", 0) or 0,
-                completion_tokens=getattr(u, "completion_tokens", 0) or 0,
-                total_tokens=getattr(u, "total_tokens", 0) or 0,
-            )
-
-        reasoning = getattr(msg, "reasoning", None) or getattr(msg, "reasoning_content", None)
-
-        return NormalizedResponse(
-            content=msg.content,
-            tool_calls=tool_calls,
-            finish_reason=finish_reason,
-            reasoning=reasoning,
-            usage=usage,
-        )
-
-    def validate_response(self, response: Any) -> bool:
-        """Check Bedrock response structure.
-
-        After normalize_converse_response, the response has OpenAI-compatible
-        .choices — same check as chat_completions.
-        """
-        if response is None:
-            return False
-        # Raw Bedrock dict response — check for 'output' key
-        if isinstance(response, dict):
-            return "output" in response
-        # Already-normalized SimpleNamespace
-        if hasattr(response, "choices"):
-            return bool(response.choices)
-        return False
-
-    def map_finish_reason(self, raw_reason: str) -> str:
-        """Map Bedrock stop reason to OpenAI finish_reason.
-
-        The adapter already does this mapping inside normalize_converse_response,
-        so this is only used for direct access to raw responses.
-        """
-        _MAP = {
-            "end_turn": "stop",
-            "tool_use": "tool_calls",
-            "max_tokens": "length",
-            "stop_sequence": "stop",
-            "guardrail_intervened": "content_filter",
-            "content_filtered": "content_filter",
-        }
-        return _MAP.get(raw_reason, "stop")
-
-
-# Auto-register on import
-from agent.transports import register_transport  # noqa: E402
-
-register_transport("bedrock_converse", BedrockTransport)
@@ -1,394 +0,0 @@
-"""OpenAI Chat Completions transport.
-
-Handles the default api_mode ('chat_completions') used by ~16 OpenAI-compatible
-providers (OpenRouter, Nous, NVIDIA, Qwen, Ollama, DeepSeek, xAI, Kimi, etc.).
-
-Messages and tools are already in OpenAI format — convert_messages and
-convert_tools are near-identity.  The complexity lives in build_kwargs
-which has provider-specific conditionals for max_tokens defaults,
-reasoning configuration, temperature handling, and extra_body assembly.
-"""
-
-import copy
-from typing import Any, Dict, List, Optional
-
-from agent.moonshot_schema import is_moonshot_model, sanitize_moonshot_tools
-from agent.prompt_builder import DEVELOPER_ROLE_MODELS
-from agent.transports.base import ProviderTransport
-from agent.transports.types import NormalizedResponse, ToolCall, Usage
-
-
-class ChatCompletionsTransport(ProviderTransport):
-    """Transport for api_mode='chat_completions'.
-
-    The default path for OpenAI-compatible providers.
-    """
-
-    @property
-    def api_mode(self) -> str:
-        return "chat_completions"
-
-    def convert_messages(self, messages: List[Dict[str, Any]], **kwargs) -> List[Dict[str, Any]]:
-        """Messages are already in OpenAI format — sanitize Codex leaks only.
-
-        Strips Codex Responses API fields (``codex_reasoning_items`` /
-        ``codex_message_items`` on the message, ``call_id``/``response_item_id``
-        on tool_calls) that strict chat-completions providers reject with 400/422.
-        """
-        needs_sanitize = False
-        for msg in messages:
-            if not isinstance(msg, dict):
-                continue
-            if "codex_reasoning_items" in msg or "codex_message_items" in msg:
-                needs_sanitize = True
-                break
-            tool_calls = msg.get("tool_calls")
-            if isinstance(tool_calls, list):
-                for tc in tool_calls:
-                    if isinstance(tc, dict) and ("call_id" in tc or "response_item_id" in tc):
-                        needs_sanitize = True
-                        break
-                if needs_sanitize:
-                    break
-
-        if not needs_sanitize:
-            return messages
-
-        sanitized = copy.deepcopy(messages)
-        for msg in sanitized:
-            if not isinstance(msg, dict):
-                continue
-            msg.pop("codex_reasoning_items", None)
-            msg.pop("codex_message_items", None)
-            tool_calls = msg.get("tool_calls")
-            if isinstance(tool_calls, list):
-                for tc in tool_calls:
-                    if isinstance(tc, dict):
-                        tc.pop("call_id", None)
-                        tc.pop("response_item_id", None)
-        return sanitized
-
-    def convert_tools(self, tools: List[Dict[str, Any]]) -> List[Dict[str, Any]]:
-        """Tools are already in OpenAI format — identity."""
-        return tools
-
-    def build_kwargs(
-        self,
-        model: str,
-        messages: List[Dict[str, Any]],
-        tools: Optional[List[Dict[str, Any]]] = None,
-        **params,
-    ) -> Dict[str, Any]:
-        """Build chat.completions.create() kwargs.
-
-        This is the most complex transport method — it handles ~16 providers
-        via params rather than subclasses.
-
-        params:
-            timeout: float — API call timeout
-            max_tokens: int | None — user-configured max tokens
-            ephemeral_max_output_tokens: int | None — one-shot override (error recovery)
-            max_tokens_param_fn: callable — returns {max_tokens: N} or {max_completion_tokens: N}
-            reasoning_config: dict | None
-            request_overrides: dict | None
-            session_id: str | None
-            qwen_session_metadata: dict | None — {sessionId, promptId} precomputed
-            model_lower: str — lowercase model name for pattern matching
-            # Provider detection flags (all optional, default False)
-            is_openrouter: bool
-            is_nous: bool
-            is_qwen_portal: bool
-            is_github_models: bool
-            is_nvidia_nim: bool
-            is_kimi: bool
-            is_custom_provider: bool
-            ollama_num_ctx: int | None
-            # Provider routing
-            provider_preferences: dict | None
-            # Qwen-specific
-            qwen_prepare_fn: callable | None — runs AFTER codex sanitization
-            qwen_prepare_inplace_fn: callable | None — in-place variant for deepcopied lists
-            # Temperature
-            fixed_temperature: Any — from _fixed_temperature_for_model()
-            omit_temperature: bool
-            # Reasoning
-            supports_reasoning: bool
-            github_reasoning_extra: dict | None
-            # Claude on OpenRouter/Nous max output
-            anthropic_max_output: int | None
-            # Extra
-            extra_body_additions: dict | None — pre-built extra_body entries
-        """
-        # Codex sanitization: drop reasoning_items / call_id / response_item_id
-        sanitized = self.convert_messages(messages)
-
-        # Qwen portal prep AFTER codex sanitization.  If sanitize already
-        # deepcopied, reuse that copy via the in-place variant to avoid a
-        # second deepcopy.
-        is_qwen = params.get("is_qwen_portal", False)
-        if is_qwen:
-            qwen_prep = params.get("qwen_prepare_fn")
-            qwen_prep_inplace = params.get("qwen_prepare_inplace_fn")
-            if sanitized is messages:
-                if qwen_prep is not None:
-                    sanitized = qwen_prep(sanitized)
-            else:
-                # Already deepcopied — transform in place
-                if qwen_prep_inplace is not None:
-                    qwen_prep_inplace(sanitized)
-                elif qwen_prep is not None:
-                    sanitized = qwen_prep(sanitized)
-
-        # Developer role swap for GPT-5/Codex models
-        model_lower = params.get("model_lower", (model or "").lower())
-        if (
-            sanitized
-            and isinstance(sanitized[0], dict)
-            and sanitized[0].get("role") == "system"
-            and any(p in model_lower for p in DEVELOPER_ROLE_MODELS)
-        ):
-            sanitized = list(sanitized)
-            sanitized[0] = {**sanitized[0], "role": "developer"}
-
-        api_kwargs: Dict[str, Any] = {
-            "model": model,
-            "messages": sanitized,
-        }
-
-        timeout = params.get("timeout")
-        if timeout is not None:
-            api_kwargs["timeout"] = timeout
-
-        # Temperature
-        fixed_temp = params.get("fixed_temperature")
-        omit_temp = params.get("omit_temperature", False)
-        if omit_temp:
-            api_kwargs.pop("temperature", None)
-        elif fixed_temp is not None:
-            api_kwargs["temperature"] = fixed_temp
-
-        # Qwen metadata (caller precomputes {sessionId, promptId})
-        qwen_meta = params.get("qwen_session_metadata")
-        if qwen_meta and is_qwen:
-            api_kwargs["metadata"] = qwen_meta
-
-        # Tools
-        if tools:
-            # Moonshot/Kimi uses a stricter flavored JSON Schema.  Rewriting
-            # tool parameters here keeps aggregator routes (Nous, OpenRouter,
-            # etc.) compatible, in addition to direct moonshot.ai endpoints.
-            if is_moonshot_model(model):
-                tools = sanitize_moonshot_tools(tools)
-            api_kwargs["tools"] = tools
-
-        # max_tokens resolution — priority: ephemeral > user > provider default
-        max_tokens_fn = params.get("max_tokens_param_fn")
-        ephemeral = params.get("ephemeral_max_output_tokens")
-        max_tokens = params.get("max_tokens")
-        anthropic_max_out = params.get("anthropic_max_output")
-        is_nvidia_nim = params.get("is_nvidia_nim", False)
-        is_kimi = params.get("is_kimi", False)
-        reasoning_config = params.get("reasoning_config")
-
-        if ephemeral is not None and max_tokens_fn:
-            api_kwargs.update(max_tokens_fn(ephemeral))
-        elif max_tokens is not None and max_tokens_fn:
-            api_kwargs.update(max_tokens_fn(max_tokens))
-        elif is_nvidia_nim and max_tokens_fn:
-            api_kwargs.update(max_tokens_fn(16384))
-        elif is_qwen and max_tokens_fn:
-            api_kwargs.update(max_tokens_fn(65536))
-        elif is_kimi and max_tokens_fn:
-            # Kimi/Moonshot: 32000 matches Kimi CLI's default
-            api_kwargs.update(max_tokens_fn(32000))
-        elif anthropic_max_out is not None:
-            api_kwargs["max_tokens"] = anthropic_max_out
-
-        # Kimi: top-level reasoning_effort (unless thinking disabled)
-        if is_kimi:
-            _kimi_thinking_off = bool(
-                reasoning_config
-                and isinstance(reasoning_config, dict)
-                and reasoning_config.get("enabled") is False
-            )
-            if not _kimi_thinking_off:
-                _kimi_effort = "medium"
-                if reasoning_config and isinstance(reasoning_config, dict):
-                    _e = (reasoning_config.get("effort") or "").strip().lower()
-                    if _e in ("low", "medium", "high"):
-                        _kimi_effort = _e
-                api_kwargs["reasoning_effort"] = _kimi_effort
-
-        # extra_body assembly
-        extra_body: Dict[str, Any] = {}
-
-        is_openrouter = params.get("is_openrouter", False)
-        is_nous = params.get("is_nous", False)
-        is_github_models = params.get("is_github_models", False)
-
-        provider_prefs = params.get("provider_preferences")
-        if provider_prefs and is_openrouter:
-            extra_body["provider"] = provider_prefs
-
-        # Kimi extra_body.thinking
-        if is_kimi:
-            _kimi_thinking_enabled = True
-            if reasoning_config and isinstance(reasoning_config, dict):
-                if reasoning_config.get("enabled") is False:
-                    _kimi_thinking_enabled = False
-            extra_body["thinking"] = {
-                "type": "enabled" if _kimi_thinking_enabled else "disabled",
-            }
-
-        # Reasoning
-        if params.get("supports_reasoning", False):
-            if is_github_models:
-                gh_reasoning = params.get("github_reasoning_extra")
-                if gh_reasoning is not None:
-                    extra_body["reasoning"] = gh_reasoning
-            else:
-                if reasoning_config is not None:
-                    rc = dict(reasoning_config)
-                    if is_nous and rc.get("enabled") is False:
-                        pass  # omit for Nous when disabled
-                    else:
-                        extra_body["reasoning"] = rc
-                else:
-                    extra_body["reasoning"] = {"enabled": True, "effort": "medium"}
-
-        if is_nous:
-            extra_body["tags"] = ["product=hermes-agent"]
-
-        # Ollama num_ctx
-        ollama_ctx = params.get("ollama_num_ctx")
-        if ollama_ctx:
-            options = extra_body.get("options", {})
-            options["num_ctx"] = ollama_ctx
-            extra_body["options"] = options
-
-        # Ollama/custom think=false
-        if params.get("is_custom_provider", False):
-            if reasoning_config and isinstance(reasoning_config, dict):
-                _effort = (reasoning_config.get("effort") or "").strip().lower()
-                _enabled = reasoning_config.get("enabled", True)
-                if _effort == "none" or _enabled is False:
-                    extra_body["think"] = False
-
-        if is_qwen:
-            extra_body["vl_high_resolution_images"] = True
-
-        # Merge any pre-built extra_body additions
-        additions = params.get("extra_body_additions")
-        if additions:
-            extra_body.update(additions)
-
-        if extra_body:
-            api_kwargs["extra_body"] = extra_body
-
-        # Request overrides last (service_tier etc.)
-        overrides = params.get("request_overrides")
-        if overrides:
-            api_kwargs.update(overrides)
-
-        return api_kwargs
-
-    def normalize_response(self, response: Any, **kwargs) -> NormalizedResponse:
-        """Normalize OpenAI ChatCompletion to NormalizedResponse.
-
-        For chat_completions, this is near-identity — the response is already
-        in OpenAI format.  extra_content on tool_calls (Gemini thought_signature)
-        is preserved via ToolCall.provider_data.  reasoning_details (OpenRouter
-        unified format) and reasoning_content (DeepSeek/Moonshot) are also
-        preserved for downstream replay.
-        """
-        choice = response.choices[0]
-        msg = choice.message
-        finish_reason = choice.finish_reason or "stop"
-
-        tool_calls = None
-        if msg.tool_calls:
-            tool_calls = []
-            for tc in msg.tool_calls:
-                # Preserve provider-specific extras on the tool call.
-                # Gemini 3 thinking models attach extra_content with
-                # thought_signature — without replay on the next turn the API
-                # rejects the request with 400.
-                tc_provider_data: Dict[str, Any] = {}
-                extra = getattr(tc, "extra_content", None)
-                if extra is None and hasattr(tc, "model_extra"):
-                    extra = (tc.model_extra or {}).get("extra_content")
-                if extra is not None:
-                    if hasattr(extra, "model_dump"):
-                        try:
-                            extra = extra.model_dump()
-                        except Exception:
-                            pass
-                    tc_provider_data["extra_content"] = extra
-                tool_calls.append(ToolCall(
-                    id=tc.id,
-                    name=tc.function.name,
-                    arguments=tc.function.arguments,
-                    provider_data=tc_provider_data or None,
-                ))
-
-        usage = None
-        if hasattr(response, "usage") and response.usage:
-            u = response.usage
-            usage = Usage(
-                prompt_tokens=getattr(u, "prompt_tokens", 0) or 0,
-                completion_tokens=getattr(u, "completion_tokens", 0) or 0,
-                total_tokens=getattr(u, "total_tokens", 0) or 0,
-            )
-
-        # Preserve reasoning fields separately.  DeepSeek/Moonshot use
-        # ``reasoning_content``; others use ``reasoning``.  Downstream code
-        # (_extract_reasoning, thinking-prefill retry) reads both distinctly,
-        # so keep them apart in provider_data rather than merging.
-        reasoning = getattr(msg, "reasoning", None)
-        reasoning_content = getattr(msg, "reasoning_content", None)
-
-        provider_data: Dict[str, Any] = {}
-        if reasoning_content:
-            provider_data["reasoning_content"] = reasoning_content
-        rd = getattr(msg, "reasoning_details", None)
-        if rd:
-            provider_data["reasoning_details"] = rd
-
-        return NormalizedResponse(
-            content=msg.content,
-            tool_calls=tool_calls,
-            finish_reason=finish_reason,
-            reasoning=reasoning,
-            usage=usage,
-            provider_data=provider_data or None,
-        )
-
-    def validate_response(self, response: Any) -> bool:
-        """Check that response has valid choices."""
-        if response is None:
-            return False
-        if not hasattr(response, "choices") or response.choices is None:
-            return False
-        if not response.choices:
-            return False
-        return True
-
-    def extract_cache_stats(self, response: Any) -> Optional[Dict[str, int]]:
-        """Extract OpenRouter/OpenAI cache stats from prompt_tokens_details."""
-        usage = getattr(response, "usage", None)
-        if usage is None:
-            return None
-        details = getattr(usage, "prompt_tokens_details", None)
-        if details is None:
-            return None
-        cached = getattr(details, "cached_tokens", 0) or 0
-        written = getattr(details, "cache_write_tokens", 0) or 0
-        if cached or written:
-            return {"cached_tokens": cached, "creation_tokens": written}
-        return None
-
-
-# Auto-register on import
-from agent.transports import register_transport  # noqa: E402
-
-register_transport("chat_completions", ChatCompletionsTransport)
@@ -1,237 +0,0 @@
-"""OpenAI Responses API (Codex) transport.
-
-Delegates to the existing adapter functions in agent/codex_responses_adapter.py.
-This transport owns format conversion and normalization — NOT client lifecycle,
-streaming, or the _run_codex_stream() call path.
-"""
-
-from typing import Any, Dict, List, Optional
-
-from agent.transports.base import ProviderTransport
-from agent.transports.types import NormalizedResponse, ToolCall, Usage
-
-
-class ResponsesApiTransport(ProviderTransport):
-    """Transport for api_mode='codex_responses'.
-
-    Wraps the functions extracted into codex_responses_adapter.py (PR 1).
-    """
-
-    @property
-    def api_mode(self) -> str:
-        return "codex_responses"
-
-    def convert_messages(self, messages: List[Dict[str, Any]], **kwargs) -> Any:
-        """Convert OpenAI chat messages to Responses API input items."""
-        from agent.codex_responses_adapter import _chat_messages_to_responses_input
-        return _chat_messages_to_responses_input(messages)
-
-    def convert_tools(self, tools: List[Dict[str, Any]]) -> Any:
-        """Convert OpenAI tool schemas to Responses API function definitions."""
-        from agent.codex_responses_adapter import _responses_tools
-        return _responses_tools(tools)
-
-    def build_kwargs(
-        self,
-        model: str,
-        messages: List[Dict[str, Any]],
-        tools: Optional[List[Dict[str, Any]]] = None,
-        **params,
-    ) -> Dict[str, Any]:
-        """Build Responses API kwargs.
-
-        Calls convert_messages and convert_tools internally.
-
-        params:
-            instructions: str — system prompt (extracted from messages[0] if not given)
-            reasoning_config: dict | None — {effort, enabled}
-            session_id: str | None — used for prompt_cache_key + xAI conv header
-            max_tokens: int | None — max_output_tokens
-            request_overrides: dict | None — extra kwargs merged in
-            provider: str | None — provider name for backend-specific logic
-            base_url: str | None — endpoint URL
-            base_url_hostname: str | None — hostname for backend detection
-            is_github_responses: bool — Copilot/GitHub models backend
-            is_codex_backend: bool — chatgpt.com/backend-api/codex
-            is_xai_responses: bool — xAI/Grok backend
-            github_reasoning_extra: dict | None — Copilot reasoning params
-        """
-        from agent.codex_responses_adapter import (
-            _chat_messages_to_responses_input,
-            _responses_tools,
-        )
-
-        from run_agent import DEFAULT_AGENT_IDENTITY
-
-        instructions = params.get("instructions", "")
-        payload_messages = messages
-        if not instructions:
-            if messages and messages[0].get("role") == "system":
-                instructions = str(messages[0].get("content") or "").strip()
-                payload_messages = messages[1:]
-        if not instructions:
-            instructions = DEFAULT_AGENT_IDENTITY
-
-        is_github_responses = params.get("is_github_responses", False)
-        is_codex_backend = params.get("is_codex_backend", False)
-        is_xai_responses = params.get("is_xai_responses", False)
-
-        # Resolve reasoning effort
-        reasoning_effort = "medium"
-        reasoning_enabled = True
-        reasoning_config = params.get("reasoning_config")
-        if reasoning_config and isinstance(reasoning_config, dict):
-            if reasoning_config.get("enabled") is False:
-                reasoning_enabled = False
-            elif reasoning_config.get("effort"):
-                reasoning_effort = reasoning_config["effort"]
-
-        _effort_clamp = {"minimal": "low"}
-        reasoning_effort = _effort_clamp.get(reasoning_effort, reasoning_effort)
-
-        kwargs = {
-            "model": model,
-            "instructions": instructions,
-            "input": _chat_messages_to_responses_input(payload_messages),
-            "tools": _responses_tools(tools),
-            "tool_choice": "auto",
-            "parallel_tool_calls": True,
-            "store": False,
-        }
-
-        session_id = params.get("session_id")
-        if not is_github_responses and session_id:
-            kwargs["prompt_cache_key"] = session_id
-
-        if reasoning_enabled and is_xai_responses:
-            kwargs["include"] = ["reasoning.encrypted_content"]
-        elif reasoning_enabled:
-            if is_github_responses:
-                github_reasoning = params.get("github_reasoning_extra")
-                if github_reasoning is not None:
-                    kwargs["reasoning"] = github_reasoning
-            else:
-                kwargs["reasoning"] = {"effort": reasoning_effort, "summary": "auto"}
-                kwargs["include"] = ["reasoning.encrypted_content"]
-        elif not is_github_responses and not is_xai_responses:
-            kwargs["include"] = []
-
-        request_overrides = params.get("request_overrides")
-        if request_overrides:
-            kwargs.update(request_overrides)
-
-        if is_codex_backend:
-            prompt_cache_key = kwargs.get("prompt_cache_key")
-            cache_scope_id = str(prompt_cache_key or session_id or "").strip()
-            if cache_scope_id:
-                existing_extra_headers = kwargs.get("extra_headers")
-                merged_extra_headers: Dict[str, str] = {}
-                if isinstance(existing_extra_headers, dict):
-                    merged_extra_headers.update(
-                        {
-                            str(key): str(value)
-                            for key, value in existing_extra_headers.items()
-                            if key and value is not None
-                        }
-                    )
-                merged_extra_headers["session_id"] = cache_scope_id
-                merged_extra_headers["x-client-request-id"] = cache_scope_id
-                kwargs["extra_headers"] = merged_extra_headers
-
-        max_tokens = params.get("max_tokens")
-        if max_tokens is not None and not is_codex_backend:
-            kwargs["max_output_tokens"] = max_tokens
-
-        if is_xai_responses and session_id:
-            kwargs["extra_headers"] = {"x-grok-conv-id": session_id}
-
-        return kwargs
-
-    def normalize_response(self, response: Any, **kwargs) -> NormalizedResponse:
-        """Normalize Codex Responses API response to NormalizedResponse."""
-        from agent.codex_responses_adapter import (
-            _normalize_codex_response,
-            _extract_responses_message_text,
-            _extract_responses_reasoning_text,
-        )
-
-        # _normalize_codex_response returns (SimpleNamespace, finish_reason_str)
-        msg, finish_reason = _normalize_codex_response(response)
-
-        tool_calls = None
-        if msg and msg.tool_calls:
-            tool_calls = []
-            for tc in msg.tool_calls:
-                provider_data = {}
-                if hasattr(tc, "call_id") and tc.call_id:
-                    provider_data["call_id"] = tc.call_id
-                if hasattr(tc, "response_item_id") and tc.response_item_id:
-                    provider_data["response_item_id"] = tc.response_item_id
-                tool_calls.append(ToolCall(
-                    id=tc.id if hasattr(tc, "id") else (tc.function.name if hasattr(tc, "function") else None),
-                    name=tc.function.name if hasattr(tc, "function") else getattr(tc, "name", ""),
-                    arguments=tc.function.arguments if hasattr(tc, "function") else getattr(tc, "arguments", "{}"),
-                    provider_data=provider_data or None,
-                ))
-
-        # Extract reasoning items for provider_data
-        provider_data = {}
-        if msg and hasattr(msg, "codex_reasoning_items") and msg.codex_reasoning_items:
-            provider_data["codex_reasoning_items"] = msg.codex_reasoning_items
-        if msg and hasattr(msg, "codex_message_items") and msg.codex_message_items:
-            provider_data["codex_message_items"] = msg.codex_message_items
-        if msg and hasattr(msg, "reasoning_details") and msg.reasoning_details:
-            provider_data["reasoning_details"] = msg.reasoning_details
-
-        return NormalizedResponse(
-            content=msg.content if msg else None,
-            tool_calls=tool_calls,
-            finish_reason=finish_reason or "stop",
-            reasoning=msg.reasoning if msg and hasattr(msg, "reasoning") else None,
-            usage=None,  # Codex usage is extracted separately in normalize_usage()
-            provider_data=provider_data or None,
-        )
-
-    def validate_response(self, response: Any) -> bool:
-        """Check Codex Responses API response has valid output structure.
-
-        Returns True only if response.output is a non-empty list.
-        Does NOT check output_text fallback — the caller handles that
-        with diagnostic logging for stream backfill recovery.
-        """
-        if response is None:
-            return False
-        output = getattr(response, "output", None)
-        if not isinstance(output, list) or not output:
-            return False
-        return True
-
-    def preflight_kwargs(self, api_kwargs: Any, *, allow_stream: bool = False) -> dict:
-        """Validate and sanitize Codex API kwargs before the call.
-
-        Normalizes input items, strips unsupported fields, validates structure.
-        """
-        from agent.codex_responses_adapter import _preflight_codex_api_kwargs
-        return _preflight_codex_api_kwargs(api_kwargs, allow_stream=allow_stream)
-
-    def map_finish_reason(self, raw_reason: str) -> str:
-        """Map Codex response.status to OpenAI finish_reason.
-
-        Codex uses response.status ('completed', 'incomplete') +
-        response.incomplete_details.reason for granular mapping.
-        This method handles the simple status string; the caller
-        should check incomplete_details separately for 'max_output_tokens'.
-        """
-        _MAP = {
-            "completed": "stop",
-            "incomplete": "length",
-            "failed": "stop",
-            "cancelled": "stop",
-        }
-        return _MAP.get(raw_reason, "stop")
-
-
-# Auto-register on import
-from agent.transports import register_transport  # noqa: E402
-
-register_transport("codex_responses", ResponsesApiTransport)
@@ -1,161 +0,0 @@
-"""Shared types for normalized provider responses.
-
-These dataclasses define the canonical shape that all provider adapters
-normalize responses to.  The shared surface is intentionally minimal —
-only fields that every downstream consumer reads are top-level.
-Protocol-specific state goes in ``provider_data`` dicts (response-level
-and per-tool-call) so that protocol-aware code paths can access it
-without polluting the shared type.
-"""
-
-from __future__ import annotations
-
-import json
-from dataclasses import dataclass, field
-from typing import Any, Dict, List, Optional
-
-
-@dataclass
-class ToolCall:
-    """A normalized tool call from any provider.
-
-    ``id`` is the protocol's canonical identifier — what gets used in
-    ``tool_call_id`` / ``tool_use_id`` when constructing tool result
-    messages.  May be ``None`` when the provider omits it; the agent
-    fills it via ``_deterministic_call_id()`` before storing in history.
-
-    ``provider_data`` carries per-tool-call protocol metadata that only
-    protocol-aware code reads:
-
-    * Codex: ``{"call_id": "call_XXX", "response_item_id": "fc_XXX"}``
-    * Gemini: ``{"extra_content": {"google": {"thought_signature": "..."}}}``
-    * Others: ``None``
-    """
-
-    id: Optional[str]
-    name: str
-    arguments: str  # JSON string
-    provider_data: Optional[Dict[str, Any]] = field(default=None, repr=False)
-
-    # ── Backward compatibility ──────────────────────────────────
-    # The agent loop reads tc.function.name / tc.function.arguments
-    # throughout run_agent.py (45+ sites).  These properties let
-    # NormalizedResponse pass through without the _nr_to_assistant_message
-    # shim, while keeping ToolCall's canonical fields flat.
-    @property
-    def type(self) -> str:
-        return "function"
-
-    @property
-    def function(self) -> "ToolCall":
-        """Return self so tc.function.name / tc.function.arguments work."""
-        return self
-
-    @property
-    def call_id(self) -> Optional[str]:
-        """Codex call_id from provider_data, accessed via getattr by _build_assistant_message."""
-        return (self.provider_data or {}).get("call_id")
-
-    @property
-    def response_item_id(self) -> Optional[str]:
-        """Codex response_item_id from provider_data."""
-        return (self.provider_data or {}).get("response_item_id")
-
-    @property
-    def extra_content(self) -> Optional[Dict[str, Any]]:
-        """Gemini extra_content (thought_signature) from provider_data.
-
-        Gemini 3 thinking models attach ``extra_content`` with a
-        ``thought_signature`` to each tool call.  This signature must be
-        replayed on subsequent API calls — without it the API rejects the
-        request with HTTP 400.  The chat_completions transport stores this
-        in ``provider_data["extra_content"]``; this property exposes it so
-        ``_build_assistant_message`` can ``getattr(tc, "extra_content")``
-        uniformly.
-        """
-        return (self.provider_data or {}).get("extra_content")
-
-
-@dataclass
-class Usage:
-    """Token usage from an API response."""
-
-    prompt_tokens: int = 0
-    completion_tokens: int = 0
-    total_tokens: int = 0
-    cached_tokens: int = 0
-
-
-@dataclass
-class NormalizedResponse:
-    """Normalized API response from any provider.
-
-    Shared fields are truly cross-provider — every caller can rely on
-    them without branching on api_mode.  Protocol-specific state goes in
-    ``provider_data`` so that only protocol-aware code paths read it.
-
-    Response-level ``provider_data`` examples:
-
-    * Anthropic: ``{"reasoning_details": [...]}``
-    * Codex: ``{"codex_reasoning_items": [...], "codex_message_items": [...]}``
-    * Others: ``None``
-    """
-
-    content: Optional[str]
-    tool_calls: Optional[List[ToolCall]]
-    finish_reason: str  # "stop", "tool_calls", "length", "content_filter"
-    reasoning: Optional[str] = None
-    usage: Optional[Usage] = None
-    provider_data: Optional[Dict[str, Any]] = field(default=None, repr=False)
-
-    # ── Backward compatibility ──────────────────────────────────
-    # The shim _nr_to_assistant_message() mapped these from provider_data.
-    # These properties let NormalizedResponse pass through directly.
-    @property
-    def reasoning_content(self) -> Optional[str]:
-        pd = self.provider_data or {}
-        return pd.get("reasoning_content")
-
-    @property
-    def reasoning_details(self):
-        pd = self.provider_data or {}
-        return pd.get("reasoning_details")
-
-    @property
-    def codex_reasoning_items(self):
-        pd = self.provider_data or {}
-        return pd.get("codex_reasoning_items")
-
-    @property
-    def codex_message_items(self):
-        pd = self.provider_data or {}
-        return pd.get("codex_message_items")
-
-
-# ---------------------------------------------------------------------------
-# Factory helpers
-# ---------------------------------------------------------------------------
-
-def build_tool_call(
-    id: Optional[str],
-    name: str,
-    arguments: Any,
-    **provider_fields: Any,
-) -> ToolCall:
-    """Build a ``ToolCall``, auto-serialising *arguments* if it's a dict.
-
-    Any extra keyword arguments are collected into ``provider_data``.
-    """
-    args_str = json.dumps(arguments) if isinstance(arguments, dict) else str(arguments)
-    pd = dict(provider_fields) if provider_fields else None
-    return ToolCall(id=id, name=name, arguments=args_str, provider_data=pd)
-
-
-def map_finish_reason(reason: Optional[str], mapping: Dict[str, str]) -> str:
-    """Translate a provider-specific stop reason to the normalised set.
-
-    Falls back to ``"stop"`` for unknown or ``None`` reasons.
-    """
-    if reason is None:
-        return "stop"
-    return mapping.get(reason, "stop")
@@ -6,7 +6,6 @@ from decimal import Decimal
 from typing import Any, Dict, Literal, Optional

 from agent.model_metadata import fetch_endpoint_model_metadata, fetch_model_metadata
-from utils import base_url_host_matches

 DEFAULT_PRICING = {"input": 0.0, "output": 0.0}

@@ -394,7 +393,7 @@ def resolve_billing_route(

    if provider_name == "openai-codex":
        return BillingRoute(provider="openai-codex", model=model, base_url=base_url or "", billing_mode="subscription_included")
-    if provider_name == "openrouter" or base_url_host_matches(base_url or "", "openrouter.ai"):
+    if provider_name == "openrouter" or "openrouter.ai" in base:
        return BillingRoute(provider="openrouter", model=model, base_url=base_url or "", billing_mode="official_models_api")
    if provider_name == "anthropic":
        return BillingRoute(provider="anthropic", model=model.split("/")[-1], base_url=base_url or "", billing_mode="official_docs_snapshot")
@@ -533,22 +532,10 @@ def normalize_usage(
        prompt_total = _to_int(getattr(response_usage, "prompt_tokens", 0))
        output_tokens = _to_int(getattr(response_usage, "completion_tokens", 0))
        details = getattr(response_usage, "prompt_tokens_details", None)
-        # Primary: OpenAI-style prompt_tokens_details. Fallback: Anthropic-style
-        # top-level fields that some OpenAI-compatible proxies (OpenRouter, Vercel
-        # AI Gateway, Cline) expose when routing Claude models — without this
-        # fallback, cache writes are undercounted as 0 and cache reads can be
-        # missed when the proxy only surfaces them at the top level.
-        # Port of cline/cline#10266.
        cache_read_tokens = _to_int(getattr(details, "cached_tokens", 0) if details else 0)
-        if not cache_read_tokens:
-            cache_read_tokens = _to_int(getattr(response_usage, "cache_read_input_tokens", 0))
        cache_write_tokens = _to_int(
            getattr(details, "cache_write_tokens", 0) if details else 0
        )
-        if not cache_write_tokens:
-            cache_write_tokens = _to_int(
-                getattr(response_usage, "cache_creation_input_tokens", 0)
-            )
        input_tokens = max(0, prompt_total - cache_read_tokens - cache_write_tokens)

    reasoning_tokens = 0
@@ -444,7 +444,6 @@ def _process_batch_worker(args: Tuple) -> Dict[str, Any]:
            if not reasoning.get("has_any_reasoning", True):
                print(f"   🚫 Prompt {prompt_index} discarded (no reasoning in any turn)")
                discarded_no_reasoning += 1
-                completed_in_batch.append(prompt_index)
                continue
            
            # Get and normalize tool stats for consistent schema across all entries
@@ -951,9 +950,13 @@ class BatchRunner:
                    root_logger.setLevel(original_level)
        
        # Aggregate all batch statistics and update checkpoint
+        all_completed_prompts = list(completed_prompts_set)
        total_reasoning_stats = {"total_assistant_turns": 0, "turns_with_reasoning": 0, "turns_without_reasoning": 0}
-
+        
        for batch_result in results:
+            # Add newly completed prompts
+            all_completed_prompts.extend(batch_result.get("completed_prompts", []))
+            
            # Aggregate tool stats
            for tool_name, stats in batch_result.get("tool_stats", {}).items():
                if tool_name not in total_tool_stats:
@@ -973,7 +976,7 @@ class BatchRunner:
        
        # Save final checkpoint (best-effort; incremental writes already happened)
        try:
-            checkpoint_data["completed_prompts"] = sorted(completed_prompts_set)
+            checkpoint_data["completed_prompts"] = all_completed_prompts
            self._save_checkpoint(checkpoint_data, lock=checkpoint_lock)
        except Exception as ckpt_err:
            print(f"âš ï¸  Warning: Failed to save final checkpoint: {ckpt_err}")
@@ -1186,12 +1189,12 @@ def main(
    """
    # Handle list distributions
    if list_distributions:
-        from toolset_distributions import print_distribution_info
-
+        from toolset_distributions import list_distributions as get_all_dists, print_distribution_info
+        
        print("📊 Available Toolset Distributions")
        print("=" * 70)
-
-        all_dists = list_distributions()
+        
+        all_dists = get_all_dists()
        for dist_name in sorted(all_dists.keys()):
            print_distribution_info(dist_name)
        
@@ -326,16 +326,6 @@ compression:
  # To pin a specific model/provider for compression summaries, use the
  # auxiliary section below (auxiliary.compression.provider / model).

-# =============================================================================
-# Anthropic prompt caching TTL
-# =============================================================================
-# When prompt caching is active (Claude via OpenRouter or native Anthropic),
-# Anthropic supports two TTL tiers for cached prefixes: "5m" (default) and
-# "1h". Other values are ignored and "5m" is used.
-#
-prompt_caching:
-  cache_ttl: "5m" # use "1h" for long sessions with pauses between turns
-
 # =============================================================================
 # Auxiliary Models (Advanced — Experimental)
 # =============================================================================
@@ -517,13 +507,6 @@ agent:
  # finish, then interrupts anything still running after this timeout.
  # 0 = no drain, interrupt immediately.
  # restart_drain_timeout: 60
-
-  # Max app-level retry attempts for API errors (connection drops, provider
-  # timeouts, 5xx, etc.) before the agent surfaces the failure. Lower this
-  # to 1 if you use fallback providers and want fast failover on flaky
-  # primaries (default 3). The OpenAI SDK does its own low-level retries
-  # underneath this wrapper — this is the Hermes-level loop.
-  # api_max_retries: 3
  
  # Enable verbose logging
  verbose: false
@@ -606,7 +589,6 @@ platform_toolsets:
  signal: [hermes-signal]
  homeassistant: [hermes-homeassistant]
  qqbot: [hermes-qqbot]
-  yuanbao: [hermes-yuanbao]

 # =============================================================================
 # Gateway Platform Settings
@@ -788,20 +770,10 @@ code_execution:
 # Subagent Delegation
 # =============================================================================
 # The delegate_task tool spawns child agents with isolated context.
-# Supports single tasks and batch mode (default 3 parallel, configurable).
+# Supports single tasks and batch mode (up to 3 parallel).
 delegation:
  max_iterations: 50                          # Max tool-calling turns per child (default: 50)
-  # max_concurrent_children: 3                # Max parallel child agents per batch (default: 3, floor: 1, no ceiling).
-                                              # WARNING: values above 10 multiply API cost linearly.
-  # max_spawn_depth: 1                        # Delegation tree depth cap (range: 1-3, default: 1 = flat).
-                                              # Raise to 2 to allow workers to spawn their own subagents.
-                                              # Requires role="orchestrator" on intermediate agents.
-  # orchestrator_enabled: true                # Kill switch for role="orchestrator" children (default: true).
-  # subagent_auto_approve: false              # When a subagent hits a dangerous-command approval prompt, auto-deny (default: false)
-                                              # or auto-approve "once" (true) instead of blocking on stdin.
-                                              # The parent TUI owns stdin, so blocking would deadlock; non-interactive resolution is required.
-                                              # Both choices emit a logger.warning audit line. Flip to true only for cron/batch pipelines.
-  # inherit_mcp_toolsets: true                # When explicit child toolsets are narrowed, also keep the parent's MCP toolsets (default: true). Set false for strict intersection.
+  default_toolsets: ["terminal", "file", "web"]  # Default toolsets for subagents
  # model: "google/gemini-3-flash-preview"    # Override model for subagents (empty = inherit parent)
  # provider: "openrouter"                    # Override provider for subagents (empty = inherit parent)
  #                                           # Resolves full credentials (base_url, api_key) automatically.
@@ -825,9 +797,7 @@ delegation:
 # Display
 # =============================================================================
 display:
-  # Use compact banner mode (hides the ASCII-art banner, shows a single line).
-  #   true:  Compact single-line banner
-  #   false: Full ASCII banner with tool/skill summary (default)
+  # Use compact banner mode
  compact: false

  # Tool progress display level (CLI and gateway)
@@ -841,19 +811,12 @@ display:
  # Gateway-only natural mid-turn assistant updates.
  # When true, completed assistant status messages are sent as separate chat
  # messages. This is independent of tool_progress and gateway streaming.
-  #   true:  Send mid-turn assistant updates as separate messages (default)
-  #   false: Only send the final response
  interim_assistant_messages: true

-  # What Enter does when Hermes is already busy (CLI and gateway platforms).
+  # What Enter does when Hermes is already busy in the CLI.
  #   interrupt: Interrupt the current run and redirect Hermes (default)
  #   queue:     Queue your message for the next turn
-  #   steer:     Inject your message mid-run via /steer, arriving at the agent
-  #              after the next tool call — no interrupt, no role violation.
-  #              Falls back to 'queue' if the agent isn't running yet or if
-  #              images are attached (steer only carries text).
-  # Ctrl+C (or /stop in gateway) always interrupts regardless of this setting.
-  # Toggle at runtime with /busy <interrupt|queue|steer>.
+  # Ctrl+C always interrupts regardless of this setting.
  busy_input_mode: interrupt

  # Background process notifications (gateway/messaging only).
@@ -869,22 +832,17 @@ display:
  # Play terminal bell when agent finishes a response.
  # Useful for long-running tasks — your terminal will ding when the agent is done.
  # Works over SSH. Most terminals can be configured to flash the taskbar or play a sound.
-  #   true:  Ring the terminal bell on each response
-  #   false: Silent (default)
  bell_on_complete: false

  # Show model reasoning/thinking before each response.
  # When enabled, a dim box shows the model's thought process above the response.
  # Toggle at runtime with /reasoning show or /reasoning hide.
-  #   true:  Show the reasoning box
-  #   false: Hide reasoning (default)
  show_reasoning: false

  # Stream tokens to the terminal as they arrive instead of waiting for the
  # full response. The response box opens on first token and text appears
  # line-by-line. Tool calls are still captured silently.
-  #   true:  Stream tokens as they arrive (default)
-  #   false: Wait for the full response before rendering
+  # Stream tokens to the terminal in real-time. Disable to wait for full responses.
  streaming: true

  # ───────────────────────────────────────────────────────────────────────────
@@ -894,15 +852,10 @@ display:
  # response box label, and branding text. Change at runtime with /skin <name>.
  #
  # Built-in skins:
-  #   default        — Classic Hermes gold/kawaii
-  #   ares           — Crimson/bronze war-god theme with spinner wings
-  #   mono           — Clean grayscale monochrome
-  #   slate          — Cool blue developer-focused
-  #   daylight       — Bright light-mode theme
-  #   warm-lightmode — Warm paper-tone light-mode theme
-  #   poseidon       — Sea-green/teal Olympian theme
-  #   sisyphus       — Earthy stone-and-moss theme
-  #   charizard      — Fiery orange dragon theme
+  #   default  — Classic Hermes gold/kawaii
+  #   ares     — Crimson/bronze war-god theme with spinner wings
+  #   mono     — Clean grayscale monochrome
+  #   slate    — Cool blue developer-focused
  #
  # Custom skins: drop a YAML file in ~/.hermes/skins/<name>.yaml
  # Schema (all fields optional, missing values inherit from default):
@@ -964,39 +917,3 @@ display:
 #   # Names and usernames are NOT affected (user-chosen, publicly visible).
 #   # Routing/delivery still uses the original values internally.
 #   redact_pii: false
-
-# =============================================================================
-# Shell-script hooks
-# =============================================================================
-# Register shell scripts as plugin-hook callbacks.  Each entry is executed as
-# a subprocess (shell=False, shlex.split) with a JSON payload on stdin.  On
-# stdout the script may return JSON that either blocks the tool call or
-# injects context into the next LLM call.
-#
-# Valid events (mirror hermes_cli.plugins.VALID_HOOKS):
-#   pre_tool_call, post_tool_call, pre_llm_call, post_llm_call,
-#   pre_api_request, post_api_request, on_session_start, on_session_end,
-#   on_session_finalize, on_session_reset, subagent_stop
-#
-# First-use consent: each (event, command) pair prompts once on a TTY, then
-# is persisted to ~/.hermes/shell-hooks-allowlist.json.  Non-interactive
-# runs (gateway, cron) need --accept-hooks, HERMES_ACCEPT_HOOKS=1, or the
-# hooks_auto_accept key below.
-#
-# See website/docs/user-guide/features/hooks.md for the full JSON wire
-# protocol and worked examples.
-#
-# hooks:
-#   pre_tool_call:
-#     - matcher: "terminal"
-#       command: "~/.hermes/agent-hooks/block-rm-rf.sh"
-#       timeout: 10
-#   post_tool_call:
-#     - matcher: "write_file|patch"
-#       command: "~/.hermes/agent-hooks/auto-format.sh"
-#   pre_llm_call:
-#     - command: "~/.hermes/agent-hooks/inject-cwd-context.sh"
-#   subagent_stop:
-#     - command: "~/.hermes/agent-hooks/log-orchestration.sh"
-#
-# hooks_auto_accept: false
@@ -9,14 +9,13 @@ import copy
 import json
 import logging
 import tempfile
-import threading
 import os
 import re
 import uuid
 from datetime import datetime, timedelta
 from pathlib import Path
 from hermes_constants import get_hermes_home
-from typing import Optional, Dict, List, Any, Union
+from typing import Optional, Dict, List, Any

 logger = logging.getLogger(__name__)

@@ -35,11 +34,6 @@ except ImportError:
 HERMES_DIR = get_hermes_home().resolve()
 CRON_DIR = HERMES_DIR / "cron"
 JOBS_FILE = CRON_DIR / "jobs.json"
-
-# In-process lock protecting load_jobs→modify→save_jobs cycles.
-# Required when tick() runs jobs in parallel threads — without this,
-# concurrent mark_job_run / advance_next_run calls can clobber each other.
-_jobs_file_lock = threading.Lock()
 OUTPUT_DIR = CRON_DIR / "output"
 ONESHOT_GRACE_SECONDS = 120

@@ -371,39 +365,6 @@ def save_jobs(jobs: List[Dict[str, Any]]):
        raise


-def _normalize_workdir(workdir: Optional[str]) -> Optional[str]:
-    """Normalize and validate a cron job workdir.
-
-    Rules:
-      - Empty / None → None (feature off, preserves old behaviour).
-      - ``~`` is expanded.  Relative paths are rejected — cron jobs run detached
-        from any shell cwd, so relative paths have no stable meaning.
-      - The path must exist and be a directory at create/update time.  We do
-        NOT re-check at run time (a user might briefly unmount the dir; the
-        scheduler will just fall back to old behaviour with a logged warning).
-
-    Returns the absolute path string, or None when disabled.
-    Raises ValueError on invalid input.
-    """
-    if workdir is None:
-        return None
-    raw = str(workdir).strip()
-    if not raw:
-        return None
-    expanded = Path(raw).expanduser()
-    if not expanded.is_absolute():
-        raise ValueError(
-            f"Cron workdir must be an absolute path (got {raw!r}). "
-            f"Cron jobs run detached from any shell cwd, so relative paths are ambiguous."
-        )
-    resolved = expanded.resolve()
-    if not resolved.exists():
-        raise ValueError(f"Cron workdir does not exist: {resolved}")
-    if not resolved.is_dir():
-        raise ValueError(f"Cron workdir is not a directory: {resolved}")
-    return str(resolved)
-
-
 def create_job(
    prompt: str,
    schedule: str,
@@ -417,9 +378,6 @@ def create_job(
    provider: Optional[str] = None,
    base_url: Optional[str] = None,
    script: Optional[str] = None,
-    context_from: Optional[Union[str, List[str]]] = None,
-    enabled_toolsets: Optional[List[str]] = None,
-    workdir: Optional[str] = None,
 ) -> Dict[str, Any]:
    """
    Create a new cron job.
@@ -439,18 +397,6 @@ def create_job(
        script: Optional path to a Python script whose stdout is injected into the
                prompt each run.  The script runs before the agent turn, and its output
                is prepended as context.  Useful for data collection / change detection.
-        context_from: Optional job ID (or list of job IDs) whose most recent output
-                      is injected into the prompt as context before each run.
-                      Useful for chaining cron jobs: job A finds data, job B processes it.
-        enabled_toolsets: Optional list of toolset names to restrict the agent to.
-                          When set, only tools from these toolsets are loaded, reducing
-                          token overhead. When omitted, all default tools are loaded.
-        workdir: Optional absolute path.  When set, the job runs as if launched
-                from that directory: AGENTS.md / CLAUDE.md / .cursorrules from
-                that directory are injected into the system prompt, and the
-                terminal/file/code_exec tools use it as their working directory
-                (via TERMINAL_CWD).  When unset, the old behaviour is preserved
-                (no context files injected, tools use the scheduler's cwd).

    Returns:
        The created job dict
@@ -481,17 +427,6 @@ def create_job(
    normalized_base_url = normalized_base_url or None
    normalized_script = str(script).strip() if isinstance(script, str) else None
    normalized_script = normalized_script or None
-    normalized_toolsets = [str(t).strip() for t in enabled_toolsets if str(t).strip()] if enabled_toolsets else None
-    normalized_toolsets = normalized_toolsets or None
-    normalized_workdir = _normalize_workdir(workdir)
-
-    # Normalize context_from: accept str or list of str, store as list or None
-    if isinstance(context_from, str):
-        context_from = [context_from.strip()] if context_from.strip() else None
-    elif isinstance(context_from, list):
-        context_from = [str(j).strip() for j in context_from if str(j).strip()] or None
-    else:
-        context_from = None

    label_source = (prompt or (normalized_skills[0] if normalized_skills else None)) or "cron job"
    job = {
@@ -504,7 +439,6 @@ def create_job(
        "provider": normalized_provider,
        "base_url": normalized_base_url,
        "script": normalized_script,
-        "context_from": context_from,
        "schedule": parsed_schedule,
        "schedule_display": parsed_schedule.get("display", schedule),
        "repeat": {
@@ -524,8 +458,6 @@ def create_job(
        # Delivery configuration
        "deliver": deliver,
        "origin": origin,  # Tracks where job was created for "origin" delivery
-        "enabled_toolsets": normalized_toolsets,
-        "workdir": normalized_workdir,
    }

    jobs = load_jobs()
@@ -559,15 +491,6 @@ def update_job(job_id: str, updates: Dict[str, Any]) -> Optional[Dict[str, Any]]
        if job["id"] != job_id:
            continue

-        # Validate / normalize workdir if present in updates.  Empty string or
-        # None both mean "clear the field" (restore old behaviour).
-        if "workdir" in updates:
-            _wd = updates["workdir"]
-            if _wd in (None, "", False):
-                updates["workdir"] = None
-            else:
-                updates["workdir"] = _normalize_workdir(_wd)
-
        updated = _apply_skill_fields({**job, **updates})
        schedule_changed = "schedule" in updates

@@ -671,44 +594,43 @@ def mark_job_run(job_id: str, success: bool, error: Optional[str] = None,
    ``delivery_error`` is tracked separately from the agent error — a job
    can succeed (agent produced output) but fail delivery (platform down).
    """
-    with _jobs_file_lock:
-        jobs = load_jobs()
-        for i, job in enumerate(jobs):
-            if job["id"] == job_id:
-                now = _hermes_now().isoformat()
-                job["last_run_at"] = now
-                job["last_status"] = "ok" if success else "error"
-                job["last_error"] = error if not success else None
-                # Track delivery failures separately — cleared on successful delivery
-                job["last_delivery_error"] = delivery_error
+    jobs = load_jobs()
+    for i, job in enumerate(jobs):
+        if job["id"] == job_id:
+            now = _hermes_now().isoformat()
+            job["last_run_at"] = now
+            job["last_status"] = "ok" if success else "error"
+            job["last_error"] = error if not success else None
+            # Track delivery failures separately — cleared on successful delivery
+            job["last_delivery_error"] = delivery_error
+            
+            # Increment completed count
+            if job.get("repeat"):
+                job["repeat"]["completed"] = job["repeat"].get("completed", 0) + 1
                
-                # Increment completed count
-                if job.get("repeat"):
-                    job["repeat"]["completed"] = job["repeat"].get("completed", 0) + 1
-                    
-                    # Check if we've hit the repeat limit
-                    times = job["repeat"].get("times")
-                    completed = job["repeat"]["completed"]
-                    if times is not None and times > 0 and completed >= times:
-                        # Remove the job (limit reached)
-                        jobs.pop(i)
-                        save_jobs(jobs)
-                        return
-                
-                # Compute next run
-                job["next_run_at"] = compute_next_run(job["schedule"], now)
+                # Check if we've hit the repeat limit
+                times = job["repeat"].get("times")
+                completed = job["repeat"]["completed"]
+                if times is not None and times > 0 and completed >= times:
+                    # Remove the job (limit reached)
+                    jobs.pop(i)
+                    save_jobs(jobs)
+                    return
+            
+            # Compute next run
+            job["next_run_at"] = compute_next_run(job["schedule"], now)

-                # If no next run (one-shot completed), disable
-                if job["next_run_at"] is None:
-                    job["enabled"] = False
-                    job["state"] = "completed"
-                elif job.get("state") != "paused":
-                    job["state"] = "scheduled"
+            # If no next run (one-shot completed), disable
+            if job["next_run_at"] is None:
+                job["enabled"] = False
+                job["state"] = "completed"
+            elif job.get("state") != "paused":
+                job["state"] = "scheduled"

-                save_jobs(jobs)
-                return
+            save_jobs(jobs)
+            return

-        logger.warning("mark_job_run: job_id %s not found, skipping save", job_id)
+    logger.warning("mark_job_run: job_id %s not found, skipping save", job_id)


 def advance_next_run(job_id: str) -> bool:
@@ -723,21 +645,20 @@ def advance_next_run(job_id: str) -> bool:

    Returns True if next_run_at was advanced, False otherwise.
    """
-    with _jobs_file_lock:
-        jobs = load_jobs()
-        for job in jobs:
-            if job["id"] == job_id:
-                kind = job.get("schedule", {}).get("kind")
-                if kind not in ("cron", "interval"):
-                    return False
-                now = _hermes_now().isoformat()
-                new_next = compute_next_run(job["schedule"], now)
-                if new_next and new_next != job.get("next_run_at"):
-                    job["next_run_at"] = new_next
-                    save_jobs(jobs)
-                    return True
+    jobs = load_jobs()
+    for job in jobs:
+        if job["id"] == job_id:
+            kind = job.get("schedule", {}).get("kind")
+            if kind not in ("cron", "interval"):
                return False
-        return False
+            now = _hermes_now().isoformat()
+            new_next = compute_next_run(job["schedule"], now)
+            if new_next and new_next != job.get("next_run_at"):
+                job["next_run_at"] = new_next
+                save_jobs(jobs)
+                return True
+            return False
+    return False


 def get_due_jobs() -> List[Dict[str, Any]]:
@@ -40,44 +40,13 @@ from hermes_time import now as _hermes_now

 logger = logging.getLogger(__name__)

-
-def _resolve_cron_enabled_toolsets(job: dict, cfg: dict) -> list[str] | None:
-    """Resolve the toolset list for a cron job.
-
-    Precedence:
-    1. Per-job ``enabled_toolsets`` (set via ``cronjob`` tool on create/update).
-       Keeps the agent's job-scoped toolset override intact — #6130.
-    2. Per-platform ``hermes tools`` config for the ``cron`` platform.
-       Mirrors gateway behavior (``_get_platform_tools(cfg, platform_key)``)
-       so users can gate cron toolsets globally without recreating every job.
-    3. ``None`` on any lookup failure — AIAgent loads the full default set
-       (legacy behavior before this change, preserved as the safety net).
-
-    _DEFAULT_OFF_TOOLSETS ({moa, homeassistant, rl}) are removed by
-    ``_get_platform_tools`` for unconfigured platforms, so fresh installs
-    get cron WITHOUT ``moa`` by default (issue reported by Norbert —
-    surprise $4.63 run).
-    """
-    per_job = job.get("enabled_toolsets")
-    if per_job:
-        return per_job
-    try:
-        from hermes_cli.tools_config import _get_platform_tools  # lazy: avoid heavy import at cron module load
-        return sorted(_get_platform_tools(cfg or {}, "cron"))
-    except Exception as exc:
-        logger.warning(
-            "Cron toolset resolution failed, falling back to full default toolset: %s",
-            exc,
-        )
-        return None
-
 # Valid delivery platforms — used to validate user-supplied platform names
 # in cron delivery targets, preventing env var enumeration via crafted names.
 _KNOWN_DELIVERY_PLATFORMS = frozenset({
    "telegram", "discord", "slack", "whatsapp", "signal",
    "matrix", "mattermost", "homeassistant", "dingtalk", "feishu",
    "wecom", "wecom_callback", "weixin", "sms", "email", "webhook", "bluebubbles",
-    "qqbot", "yuanbao",
+    "qqbot",
 })

 # Platforms that support a configured cron/notification home target, mapped to
@@ -283,11 +252,7 @@ def _send_media_via_adapter(adapter, chat_id: str, media_files: list, metadata:
                coro = adapter.send_document(chat_id=chat_id, file_path=media_path, metadata=metadata)

            future = asyncio.run_coroutine_threadsafe(coro, loop)
-            try:
-                result = future.result(timeout=30)
-            except TimeoutError:
-                future.cancel()
-                raise
+            result = future.result(timeout=30)
            if result and not getattr(result, "success", True):
                logger.warning(
                    "Job '%s': media send failed for %s: %s",
@@ -337,7 +302,6 @@ def _deliver_result(job: dict, content: str, adapters=None, loop=None) -> Option
        "sms": Platform.SMS,
        "bluebubbles": Platform.BLUEBUBBLES,
        "qqbot": Platform.QQBOT,
-        "yuanbao": Platform.YUANBAO,
    }

    # Optionally wrap the content with a header/footer so the user knows this
@@ -418,11 +382,7 @@ def _deliver_result(job: dict, content: str, adapters=None, loop=None) -> Option
                        runtime_adapter.send(chat_id, text_to_send, metadata=send_metadata),
                        loop,
                    )
-                    try:
-                        send_result = future.result(timeout=60)
-                    except TimeoutError:
-                        future.cancel()
-                        raise
+                    send_result = future.result(timeout=60)
                    if send_result and not getattr(send_result, "success", True):
                        err = getattr(send_result, "error", "unknown")
                        logger.warning(
@@ -462,6 +422,7 @@ def _deliver_result(job: dict, content: str, adapters=None, loop=None) -> Option
                # prevent "coroutine was never awaited" RuntimeWarning, then retry in a
                # fresh thread that has no running loop.
                coro.close()
+                import concurrent.futures
                with concurrent.futures.ThreadPoolExecutor(max_workers=1) as pool:
                    future = pool.submit(asyncio.run, _send_to_platform(platform, pconfig, chat_id, cleaned_delivery_content, thread_id=thread_id, media_files=media_files))
                    result = future.result(timeout=30)
@@ -672,51 +633,10 @@ def _build_job_prompt(job: dict, prerun_script: Optional[tuple] = None) -> str:
                f"{prompt}"
            )

-    # Inject output from referenced cron jobs as context.
-    context_from = job.get("context_from")
-    if context_from:
-        from cron.jobs import OUTPUT_DIR
-        if isinstance(context_from, str):
-            context_from = [context_from]
-        for source_job_id in context_from:
-            # Guard against path traversal — valid job IDs are 12-char hex strings
-            if not source_job_id or not all(c in "0123456789abcdef" for c in source_job_id):
-                logger.warning("context_from: skipping invalid job_id %r", source_job_id)
-                continue
-            try:
-                job_output_dir = OUTPUT_DIR / source_job_id
-                if not job_output_dir.exists():
-                    continue  # silent skip — no output yet
-                output_files = sorted(
-                    job_output_dir.glob("*.md"),
-                    key=lambda f: f.stat().st_mtime,
-                    reverse=True,
-                )
-                if not output_files:
-                    continue  # silent skip — no output yet
-                latest_output = output_files[0].read_text(encoding="utf-8").strip()
-                # Truncate to 8K characters to avoid prompt bloat
-                _MAX_CONTEXT_CHARS = 8000
-                if len(latest_output) > _MAX_CONTEXT_CHARS:
-                    latest_output = latest_output[:_MAX_CONTEXT_CHARS] + "\n\n[... output truncated ...]"
-                if latest_output:
-                    prompt = (
-                        f"## Output from job '{source_job_id}'\n"
-                        "The following is the most recent output from a preceding "
-                        "cron job. Use it as context for your analysis.\n\n"
-                        f"```\n{latest_output}\n```\n\n"
-                        f"{prompt}"
-                    )
-                else:
-                    continue  # silent skip — empty output
-            except (OSError, PermissionError) as e:
-                logger.warning("context_from: failed to read output for job %r: %s", source_job_id, e)
-                # silent skip — do not pollute the prompt with error messages
-
    # Always prepend cron execution guidance so the agent knows how
    # delivery works and can suppress delivery when appropriate.
    cron_hint = (
-        "[IMPORTANT: You are running as a scheduled cron job. "
+        "[SYSTEM: You are running as a scheduled cron job. "
        "DELIVERY: Your final response will be automatically delivered "
        "to the user — do NOT use send_message or try to deliver "
        "the output yourself. Just produce your report/output as your "
@@ -752,7 +672,7 @@ def _build_job_prompt(job: dict, prerun_script: Optional[tuple] = None) -> str:
            parts.append("")
        parts.extend(
            [
-                f'[IMPORTANT: The user has invoked the "{skill_name}" skill, indicating they want you to follow its instructions. The full skill content is loaded below.]',
+                f'[SYSTEM: The user has invoked the "{skill_name}" skill, indicating they want you to follow its instructions. The full skill content is loaded below.]',
                "",
                content,
            ]
@@ -760,7 +680,7 @@ def _build_job_prompt(job: dict, prerun_script: Optional[tuple] = None) -> str:

    if skipped:
        notice = (
-            f"[IMPORTANT: The following skill(s) were listed for this job but could not be found "
+            f"[SYSTEM: The following skill(s) were listed for this job but could not be found "
            f"and were skipped: {', '.join(skipped)}. "
            f"Start your response with a brief notice so the user is aware, e.g.: "
            f"'⚠️ Skill(s) not found and skipped: {', '.join(skipped)}']"
@@ -827,41 +747,14 @@ def run_job(job: dict) -> tuple[bool, str, str, Optional[str]]:
    # scheduler process — every job this process runs is a cron job.
    os.environ["HERMES_CRON_SESSION"] = "1"

-    # Use ContextVars for per-job session/delivery state so parallel jobs
-    # don't clobber each other's targets (os.environ is process-global).
-    from gateway.session_context import set_session_vars, clear_session_vars, _VAR_MAP
-
-    _ctx_tokens = set_session_vars(
-        platform=origin["platform"] if origin else "",
-        chat_id=str(origin["chat_id"]) if origin else "",
-        chat_name=origin.get("chat_name", "") if origin else "",
-    )
-
-    # Per-job working directory.  When set (and validated at create/update
-    # time), we point TERMINAL_CWD at it so:
-    #   - build_context_files_prompt() picks up AGENTS.md / CLAUDE.md /
-    #     .cursorrules from the job's project dir, AND
-    #   - the terminal, file, and code-exec tools run commands from there.
-    #
-    # tick() serializes workdir-jobs outside the parallel pool, so mutating
-    # os.environ["TERMINAL_CWD"] here is safe for those jobs.  For workdir-less
-    # jobs we leave TERMINAL_CWD untouched — preserves the original behaviour
-    # (skip_context_files=True, tools use whatever cwd the scheduler has).
-    _job_workdir = (job.get("workdir") or "").strip() or None
-    if _job_workdir and not Path(_job_workdir).is_dir():
-        # Directory was removed between create-time validation and now.  Log
-        # and drop back to old behaviour rather than crashing the job.
-        logger.warning(
-            "Job '%s': configured workdir %r no longer exists — running without it",
-            job_id, _job_workdir,
-        )
-        _job_workdir = None
-    _prior_terminal_cwd = os.environ.get("TERMINAL_CWD", "_UNSET_")
-    if _job_workdir:
-        os.environ["TERMINAL_CWD"] = _job_workdir
-        logger.info("Job '%s': using workdir %s", job_id, _job_workdir)
-
    try:
+        # Inject origin context so the agent's send_message tool knows the chat.
+        # Must be INSIDE the try block so the finally cleanup always runs.
+        if origin:
+            os.environ["HERMES_SESSION_PLATFORM"] = origin["platform"]
+            os.environ["HERMES_SESSION_CHAT_ID"] = str(origin["chat_id"])
+            if origin.get("chat_name"):
+                os.environ["HERMES_SESSION_CHAT_NAME"] = origin["chat_name"]
        # Re-read .env and config.yaml fresh every run so provider/key
        # changes take effect without a gateway restart.
        from dotenv import load_dotenv
@@ -872,10 +765,10 @@ def run_job(job: dict) -> tuple[bool, str, str, Optional[str]]:

        delivery_target = _resolve_delivery_target(job)
        if delivery_target:
-            _VAR_MAP["HERMES_CRON_AUTO_DELIVER_PLATFORM"].set(delivery_target["platform"])
-            _VAR_MAP["HERMES_CRON_AUTO_DELIVER_CHAT_ID"].set(str(delivery_target["chat_id"]))
+            os.environ["HERMES_CRON_AUTO_DELIVER_PLATFORM"] = delivery_target["platform"]
+            os.environ["HERMES_CRON_AUTO_DELIVER_CHAT_ID"] = str(delivery_target["chat_id"])
            if delivery_target.get("thread_id") is not None:
-                _VAR_MAP["HERMES_CRON_AUTO_DELIVER_THREAD_ID"].set(str(delivery_target["thread_id"]))
+                os.environ["HERMES_CRON_AUTO_DELIVER_THREAD_ID"] = str(delivery_target["thread_id"])

        model = job.get("model") or os.getenv("HERMES_MODEL") or ""

@@ -914,13 +807,14 @@ def run_job(job: dict) -> tuple[bool, str, str, Optional[str]]:
        prefill_messages = None
        prefill_file = os.getenv("HERMES_PREFILL_MESSAGES_FILE", "") or _cfg.get("prefill_messages_file", "")
        if prefill_file:
+            import json as _json
            pfpath = Path(prefill_file).expanduser()
            if not pfpath.is_absolute():
                pfpath = _hermes_home / pfpath
            if pfpath.exists():
                try:
                    with open(pfpath, "r", encoding="utf-8") as _pf:
-                        prefill_messages = json.load(_pf)
+                        prefill_messages = _json.load(_pf)
                    if not isinstance(prefill_messages, list):
                        prefill_messages = None
                except Exception as e:
@@ -937,7 +831,6 @@ def run_job(job: dict) -> tuple[bool, str, str, Optional[str]]:
            resolve_runtime_provider,
            format_runtime_provider_error,
        )
-        from hermes_cli.auth import AuthError
        try:
            runtime_kwargs = {
                "requested": job.get("provider") or os.getenv("HERMES_INFERENCE_PROVIDER"),
@@ -945,28 +838,6 @@ def run_job(job: dict) -> tuple[bool, str, str, Optional[str]]:
            if job.get("base_url"):
                runtime_kwargs["explicit_base_url"] = job.get("base_url")
            runtime = resolve_runtime_provider(**runtime_kwargs)
-        except AuthError as auth_exc:
-            # Primary provider auth failed — try fallback chain before giving up.
-            logger.warning("Job '%s': primary auth failed (%s), trying fallback", job_id, auth_exc)
-            fb = _cfg.get("fallback_providers") or _cfg.get("fallback_model")
-            fb_list = (fb if isinstance(fb, list) else [fb]) if fb else []
-            runtime = None
-            for entry in fb_list:
-                if not isinstance(entry, dict):
-                    continue
-                try:
-                    fb_kwargs = {"requested": entry.get("provider")}
-                    if entry.get("base_url"):
-                        fb_kwargs["explicit_base_url"] = entry["base_url"]
-                    if entry.get("api_key"):
-                        fb_kwargs["explicit_api_key"] = entry["api_key"]
-                    runtime = resolve_runtime_provider(**fb_kwargs)
-                    logger.info("Job '%s': fallback resolved to %s", job_id, runtime.get("provider"))
-                    break
-                except Exception as fb_exc:
-                    logger.debug("Job '%s': fallback %s failed: %s", job_id, entry.get("provider"), fb_exc)
-            if runtime is None:
-                raise RuntimeError(format_runtime_provider_error(auth_exc)) from auth_exc
        except Exception as exc:
            message = format_runtime_provider_error(exc)
            raise RuntimeError(message) from exc
@@ -1006,13 +877,9 @@ def run_job(job: dict) -> tuple[bool, str, str, Optional[str]]:
            providers_ignored=pr.get("ignore"),
            providers_order=pr.get("order"),
            provider_sort=pr.get("sort"),
-            enabled_toolsets=_resolve_cron_enabled_toolsets(job, _cfg),
            disabled_toolsets=["cronjob", "messaging", "clarify"],
            quiet_mode=True,
-            # When a workdir is configured, inject AGENTS.md / CLAUDE.md /
-            # .cursorrules from that directory; otherwise preserve the old
-            # behaviour (don't inject SOUL.md/AGENTS.md from the scheduler cwd).
-            skip_context_files=not bool(_job_workdir),
+            skip_context_files=True,  # Don't inject SOUL.md/AGENTS.md from scheduler cwd
            skip_memory=True,  # Cron system prompts would corrupt user representations
            platform="cron",
            session_id=_cron_session_id,
@@ -1096,12 +963,6 @@ def run_job(job: dict) -> tuple[bool, str, str, Optional[str]]:
                f"— last activity: {_last_desc}"
            )

-        # Guard against non-dict returns from run_conversation under error conditions
-        if not isinstance(result, dict):
-            raise RuntimeError(
-                f"agent.run_conversation returned {type(result).__name__} instead of dict: {result!r}"
-            )
-
        final_response = result.get("final_response", "") or ""
        # Strip leaked placeholder text that upstream may inject on empty completions.
        if final_response.strip() == "(No response generated)":
@@ -1151,16 +1012,16 @@ def run_job(job: dict) -> tuple[bool, str, str, Optional[str]]:
        return False, output, "", error_msg

    finally:
-        # Restore TERMINAL_CWD to whatever it was before this job ran.  We
-        # only ever mutate it when the job has a workdir; see the setup block
-        # at the top of run_job for the serialization guarantee.
-        if _job_workdir:
-            if _prior_terminal_cwd == "_UNSET_":
-                os.environ.pop("TERMINAL_CWD", None)
-            else:
-                os.environ["TERMINAL_CWD"] = _prior_terminal_cwd
-        # Clean up ContextVar session/delivery state for this job.
-        clear_session_vars(_ctx_tokens)
+        # Clean up injected env vars so they don't leak to other jobs
+        for key in (
+            "HERMES_SESSION_PLATFORM",
+            "HERMES_SESSION_CHAT_ID",
+            "HERMES_SESSION_CHAT_NAME",
+            "HERMES_CRON_AUTO_DELIVER_PLATFORM",
+            "HERMES_CRON_AUTO_DELIVER_CHAT_ID",
+            "HERMES_CRON_AUTO_DELIVER_THREAD_ID",
+        ):
+            os.environ.pop(key, None)
        if _session_db:
            try:
                _session_db.end_session(_cron_session_id, "cron_complete")
@@ -1213,41 +1074,15 @@ def tick(verbose: bool = True, adapters=None, loop=None) -> int:
        if verbose:
            logger.info("%s - %s job(s) due", _hermes_now().strftime('%H:%M:%S'), len(due_jobs))

-        # Advance next_run_at for all recurring jobs FIRST, under the file lock,
-        # before any execution begins.  This preserves at-most-once semantics.
+        executed = 0
        for job in due_jobs:
-            advance_next_run(job["id"])
-
-        # Resolve max parallel workers: env var > config.yaml > unbounded.
-        # Set HERMES_CRON_MAX_PARALLEL=1 to restore old serial behaviour.
-        _max_workers: Optional[int] = None
-        try:
-            _env_par = os.getenv("HERMES_CRON_MAX_PARALLEL", "").strip()
-            if _env_par:
-                _max_workers = int(_env_par) or None
-        except (ValueError, TypeError):
-            logger.warning("Invalid HERMES_CRON_MAX_PARALLEL value; defaulting to unbounded")
-        if _max_workers is None:
            try:
-                _ucfg = load_config() or {}
-                _cfg_par = (
-                    _ucfg.get("cron", {}) if isinstance(_ucfg, dict) else {}
-                ).get("max_parallel_jobs")
-                if _cfg_par is not None:
-                    _max_workers = int(_cfg_par) or None
-            except Exception:
-                pass
+                # For recurring jobs (cron/interval), advance next_run_at to the
+                # next future occurrence BEFORE execution.  This way, if the
+                # process crashes mid-run, the job won't re-fire on restart.
+                # One-shot jobs are left alone so they can retry on restart.
+                advance_next_run(job["id"])

-        if verbose:
-            logger.info(
-                "Running %d job(s) in parallel (max_workers=%s)",
-                len(due_jobs),
-                _max_workers if _max_workers else "unbounded",
-            )
-
-        def _process_job(job: dict) -> bool:
-            """Run one due job end-to-end: execute, save, deliver, mark."""
-            try:
                success, output, final_response, error = run_job(job)

                output_file = save_job_output(job["id"], output)
@@ -1279,48 +1114,13 @@ def tick(verbose: bool = True, adapters=None, loop=None) -> int:
                    error = "Agent completed but produced empty response (model error, timeout, or misconfiguration)"

                mark_job_run(job["id"], success, error, delivery_error=delivery_error)
-                return True
+                executed += 1

            except Exception as e:
                logger.error("Error processing job %s: %s", job['id'], e)
                mark_job_run(job["id"], False, str(e))
-                return False

-        # Partition due jobs: those with a per-job workdir mutate
-        # os.environ["TERMINAL_CWD"] inside run_job, which is process-global —
-        # so they MUST run sequentially to avoid corrupting each other.  Jobs
-        # without a workdir leave env untouched and stay parallel-safe.
-        workdir_jobs = [j for j in due_jobs if (j.get("workdir") or "").strip()]
-        parallel_jobs = [j for j in due_jobs if not (j.get("workdir") or "").strip()]
-
-        _results: list = []
-
-        # Sequential pass for workdir jobs.
-        for job in workdir_jobs:
-            _ctx = contextvars.copy_context()
-            _results.append(_ctx.run(_process_job, job))
-
-        # Parallel pass for the rest — same behaviour as before.
-        if parallel_jobs:
-            with concurrent.futures.ThreadPoolExecutor(max_workers=_max_workers) as _tick_pool:
-                _futures = []
-                for job in parallel_jobs:
-                    _ctx = contextvars.copy_context()
-                    _futures.append(_tick_pool.submit(_ctx.run, _process_job, job))
-                _results.extend(f.result() for f in _futures)
-
-        # Best-effort sweep of MCP stdio subprocesses that survived their
-        # session teardown during this tick.  Runs AFTER every job has
-        # finished so active sessions (including live user chats) are
-        # never touched — only PIDs explicitly detected as orphans in
-        # tools.mcp_tool._run_stdio's finally block are reaped.
-        try:
-            from tools.mcp_tool import _kill_orphaned_mcp_children
-            _kill_orphaned_mcp_children()
-        except Exception as _e:
-            logger.debug("Post-tick MCP orphan cleanup failed: %s", _e)
-
-        return sum(_results)
+        return executed
    finally:
        if fcntl:
            fcntl.flock(lock_fd, fcntl.LOCK_UN)
@@ -1,52 +0,0 @@
-#
-# docker-compose.yml for Hermes Agent
-#
-# Usage:
-#   HERMES_UID=$(id -u) HERMES_GID=$(id -g) docker compose up -d
-#
-# Set HERMES_UID / HERMES_GID to the host user that owns ~/.hermes so
-# files created inside the container stay readable/writable on the host.
-# The entrypoint remaps the internal `hermes` user to these values via
-# usermod/groupmod + gosu.
-#
-# Security notes:
-#   - The dashboard service binds to 127.0.0.1 by default. It stores API
-#     keys; exposing it on LAN without auth is unsafe. If you want remote
-#     access, use an SSH tunnel or put it behind a reverse proxy that
-#     adds authentication — do NOT pass --insecure --host 0.0.0.0.
-#   - The gateway's API server is off unless you uncomment API_SERVER_KEY
-#     and API_SERVER_HOST. See docs/user-guide/api-server.md before doing
-#     this on an internet-facing host.
-#
-services:
-  gateway:
-    build: .
-    image: hermes-agent
-    container_name: hermes
-    restart: unless-stopped
-    network_mode: host
-    volumes:
-      - ~/.hermes:/opt/data
-    environment:
-      - HERMES_UID=${HERMES_UID:-10000}
-      - HERMES_GID=${HERMES_GID:-10000}
-      # To expose the OpenAI-compatible API server beyond localhost,
-      # uncomment BOTH lines (API_SERVER_KEY is mandatory for auth):
-      # - API_SERVER_HOST=0.0.0.0
-      # - API_SERVER_KEY=${API_SERVER_KEY}
-    command: ["gateway", "run"]
-
-  dashboard:
-    image: hermes-agent
-    container_name: hermes-dashboard
-    restart: unless-stopped
-    network_mode: host
-    depends_on:
-      - gateway
-    volumes:
-      - ~/.hermes:/opt/data
-    environment:
-      - HERMES_UID=${HERMES_UID:-10000}
-      - HERMES_GID=${HERMES_GID:-10000}
-    # Localhost-only. For remote access, tunnel via `ssh -L 9119:localhost:9119`.
-    command: ["dashboard", "--host", "127.0.0.1", "--no-open"]
@@ -22,18 +22,9 @@ if [ "$(id -u)" = "0" ]; then
        groupmod -o -g "$HERMES_GID" hermes 2>/dev/null || true
    fi

-    # Fix ownership of the data volume. When HERMES_UID remaps the hermes user,
-    # files created by previous runs (under the old UID) become inaccessible.
-    # Always chown -R when UID was remapped; otherwise only if top-level is wrong.
    actual_hermes_uid=$(id -u hermes)
-    needs_chown=false
-    if [ -n "$HERMES_UID" ] && [ "$HERMES_UID" != "10000" ]; then
-        needs_chown=true
-    elif [ "$(stat -c %u "$HERMES_HOME" 2>/dev/null)" != "$actual_hermes_uid" ]; then
-        needs_chown=true
-    fi
-    if [ "$needs_chown" = true ]; then
-        echo "Fixing ownership of $HERMES_HOME to hermes ($actual_hermes_uid)"
+    if [ "$(stat -c %u "$HERMES_HOME" 2>/dev/null)" != "$actual_hermes_uid" ]; then
+        echo "$HERMES_HOME is not owned by $actual_hermes_uid, fixing"
        # In rootless Podman the container's "root" is mapped to an unprivileged
        # host UID — chown will fail.  That's fine: the volume is already owned
        # by the mapped user on the host side.
@@ -41,15 +32,6 @@ if [ "$(id -u)" = "0" ]; then
            echo "Warning: chown failed (rootless container?) — continuing anyway"
    fi

-    # Ensure config.yaml is readable by the hermes runtime user even if it was
-    # edited on the host after initial ownership setup. Must run here (as root)
-    # rather than after the gosu drop, otherwise a non-root caller like
-    # `docker run -u $(id -u):$(id -g)` hits "Operation not permitted" (#15865).
-    if [ -f "$HERMES_HOME/config.yaml" ]; then
-        chown hermes:hermes "$HERMES_HOME/config.yaml" 2>/dev/null || true
-        chmod 640 "$HERMES_HOME/config.yaml" 2>/dev/null || true
-    fi
-
    echo "Dropping root privileges"
    exec gosu hermes "$0" "$@"
 fi
@@ -86,19 +68,4 @@ if [ -d "$INSTALL_DIR/skills" ]; then
    python3 "$INSTALL_DIR/tools/skills_sync.py"
 fi

-# Final exec: two supported invocation patterns.
-#
-#   docker run <image>                 -> exec `hermes` with no args (legacy default)
-#   docker run <image> chat -q "..."   -> exec `hermes chat -q "..."` (legacy wrap)
-#   docker run <image> sleep infinity  -> exec `sleep infinity` directly
-#   docker run <image> bash            -> exec `bash` directly
-#
-# If the first positional arg resolves to an executable on PATH, we assume the
-# caller wants to run it directly (needed by the launcher which runs long-lived
-# `sleep infinity` sandbox containers — see tools/environments/docker.py).
-# Otherwise we treat the args as a hermes subcommand and wrap with `hermes`,
-# preserving the documented `docker run <image> <subcommand>` behavior.
-if [ $# -gt 0 ] && command -v "$1" >/dev/null 2>&1; then
-    exec "$@"
-fi
 exec hermes "$@"
@@ -53,6 +53,7 @@ def _run_tool_in_thread(tool_name: str, arguments: Dict[str, Any], task_id: str)
    try:
        loop = asyncio.get_running_loop()
        # We're in an async context -- need to run in thread
+        import concurrent.futures
        with concurrent.futures.ThreadPoolExecutor(max_workers=1) as pool:
            future = pool.submit(
                handle_function_call, tool_name, arguments, task_id
@@ -57,7 +57,7 @@ def _session_entry_name(origin: Dict[str, Any]) -> str:
 # Build / refresh
 # ---------------------------------------------------------------------------

-async def build_channel_directory(adapters: Dict[Any, Any]) -> Dict[str, Any]:
+def build_channel_directory(adapters: Dict[Any, Any]) -> Dict[str, Any]:
    """
    Build a channel directory from connected platform adapters and session data.

@@ -72,7 +72,7 @@ async def build_channel_directory(adapters: Dict[Any, Any]) -> Dict[str, Any]:
            if platform == Platform.DISCORD:
                platforms["discord"] = _build_discord(adapter)
            elif platform == Platform.SLACK:
-                platforms["slack"] = await _build_slack(adapter)
+                platforms["slack"] = _build_slack(adapter)
        except Exception as e:
            logger.warning("Channel directory: failed to build %s: %s", platform.value, e)

@@ -136,66 +136,21 @@ def _build_discord(adapter) -> List[Dict[str, str]]:
    return channels


-async def _build_slack(adapter) -> List[Dict[str, Any]]:
-    """List Slack channels the bot has joined across all workspaces.
-
-    Uses ``users.conversations`` against each workspace's web client. Pulls
-    public + private channels the bot is a member of, then merges in DMs
-    discovered from session history (IMs aren't useful to enumerate
-    proactively).
-    """
-    team_clients = getattr(adapter, "_team_clients", None) or {}
-    if not team_clients:
+def _build_slack(adapter) -> List[Dict[str, str]]:
+    """List Slack channels the bot has joined."""
+    # Slack adapter may expose a web client
+    client = getattr(adapter, "_app", None) or getattr(adapter, "_client", None)
+    if not client:
        return _build_from_sessions("slack")

-    channels: List[Dict[str, Any]] = []
-    seen_ids: set = set()
+    try:
+        from tools.send_message_tool import _send_slack  # noqa: F401
+        # Use the Slack Web API directly if available
+    except Exception:
+        pass

-    for team_id, client in team_clients.items():
-        try:
-            cursor: Optional[str] = None
-            for _page in range(20):  # safety cap on pagination
-                response = await client.users_conversations(
-                    types="public_channel,private_channel",
-                    exclude_archived=True,
-                    limit=200,
-                    cursor=cursor,
-                )
-                if not response.get("ok"):
-                    logger.warning(
-                        "Channel directory: users.conversations not ok for team %s: %s",
-                        team_id,
-                        response.get("error", "unknown"),
-                    )
-                    break
-                for ch in response.get("channels", []):
-                    cid = ch.get("id")
-                    name = ch.get("name")
-                    if not cid or not name or cid in seen_ids:
-                        continue
-                    seen_ids.add(cid)
-                    channels.append({
-                        "id": cid,
-                        "name": name,
-                        "type": "private" if ch.get("is_private") else "channel",
-                    })
-                cursor = (response.get("response_metadata") or {}).get("next_cursor")
-                if not cursor:
-                    break
-        except Exception as e:
-            logger.warning(
-                "Channel directory: failed to list Slack channels for team %s: %s",
-                team_id, e,
-            )
-            continue
-
-    # Merge in DM/group entries discovered from session history.
-    for entry in _build_from_sessions("slack"):
-        if entry.get("id") not in seen_ids:
-            channels.append(entry)
-            seen_ids.add(entry.get("id"))
-
-    return channels
+    # Fallback to session data
+    return _build_from_sessions("slack")


 def _build_from_sessions(platform_name: str) -> List[Dict[str, str]]:
@@ -268,14 +223,6 @@ def resolve_channel_name(platform_name: str, name: str) -> Optional[str]:
    if not channels:
        return None

-    # 0. Exact ID match — case-sensitive, no normalization. Lets callers pass
-    # raw platform IDs (e.g. Slack "C0B0QV5434G") even when the format guard
-    # in _parse_target_ref hasn't recognized them as explicit.
-    raw = name.strip()
-    for ch in channels:
-        if ch.get("id") == raw:
-            return ch["id"]
-
    query = _normalize_channel_query(name)

    # 1. Exact name match, including the display labels shown by send_message(action="list")
@@ -67,7 +67,6 @@ class Platform(Enum):
    WEIXIN = "weixin"
    BLUEBUBBLES = "bluebubbles"
    QQBOT = "qqbot"
-    YUANBAO = "yuanbao"


@dataclass
@@ -136,7 +135,7 @@ class SessionResetPolicy:
            mode=mode if mode is not None else "both",
            at_hour=at_hour if at_hour is not None else 4,
            idle_minutes=idle_minutes if idle_minutes is not None else 1440,
-            notify=_coerce_bool(notify, True),
+            notify=notify if notify is not None else True,
            notify_exclude_platforms=tuple(exclude) if exclude is not None else ("api_server", "webhook"),
        )

@@ -179,7 +178,7 @@ class PlatformConfig:
            home_channel = HomeChannel.from_dict(data["home_channel"])
        
        return cls(
-            enabled=_coerce_bool(data.get("enabled"), False),
+            enabled=data.get("enabled", False),
            token=data.get("token"),
            api_key=data.get("api_key"),
            home_channel=home_channel,
@@ -196,14 +195,6 @@ class StreamingConfig:
    edit_interval: float = 1.0    # Seconds between message edits (Telegram rate-limits at ~1/s)
    buffer_threshold: int = 40    # Chars before forcing an edit
    cursor: str = " ▉"           # Cursor shown during streaming
-    # Ported from openclaw/openclaw#72038.  When >0, the final edit for
-    # a long-running streamed response is delivered as a fresh message
-    # if the original preview has been visible for at least this many
-    # seconds, so the platform's visible timestamp reflects completion
-    # time instead of the preview creation time.  Currently applied to
-    # Telegram only (other platforms ignore the setting).  Default 60s
-    # matches the OpenClaw rollout.  Set to 0 to disable.
-    fresh_final_after_seconds: float = 60.0

    def to_dict(self) -> Dict[str, Any]:
        return {
@@ -212,7 +203,6 @@ class StreamingConfig:
            "edit_interval": self.edit_interval,
            "buffer_threshold": self.buffer_threshold,
            "cursor": self.cursor,
-            "fresh_final_after_seconds": self.fresh_final_after_seconds,
        }

    @classmethod
@@ -225,9 +215,6 @@ class StreamingConfig:
            edit_interval=float(data.get("edit_interval", 1.0)),
            buffer_threshold=int(data.get("buffer_threshold", 40)),
            cursor=data.get("cursor", " ▉"),
-            fresh_final_after_seconds=float(
-                data.get("fresh_final_after_seconds", 60.0)
-            ),
        )


@@ -327,9 +314,6 @@ class GatewayConfig:
            # QQBot uses extra dict for app credentials
            elif platform == Platform.QQBOT and config.extra.get("app_id") and config.extra.get("client_secret"):
                connected.append(platform)
-            # Yuanbao uses extra dict for app credentials
-            elif platform == Platform.YUANBAO and config.extra.get("app_id") and config.extra.get("app_secret"):
-                connected.append(platform)
            # DingTalk uses client_id/client_secret from config.extra or env vars
            elif platform == Platform.DINGTALK and (
                config.extra.get("client_id") or os.getenv("DINGTALK_CLIENT_ID")
@@ -451,7 +435,7 @@ class GatewayConfig:
            reset_triggers=data.get("reset_triggers", ["/new", "/reset"]),
            quick_commands=quick_commands,
            sessions_dir=sessions_dir,
-            always_log_local=_coerce_bool(data.get("always_log_local"), True),
+            always_log_local=data.get("always_log_local", True),
            stt_enabled=_coerce_bool(stt_enabled, True),
            group_sessions_per_user=_coerce_bool(group_sessions_per_user, True),
            thread_sessions_per_user=_coerce_bool(thread_sessions_per_user, False),
@@ -586,23 +570,13 @@ def load_gateway_config() -> GatewayConfig:
                    )
                if "reply_prefix" in platform_cfg:
                    bridged["reply_prefix"] = platform_cfg["reply_prefix"]
-                if "reply_in_thread" in platform_cfg:
-                    bridged["reply_in_thread"] = platform_cfg["reply_in_thread"]
                if "require_mention" in platform_cfg:
                    bridged["require_mention"] = platform_cfg["require_mention"]
                if "free_response_channels" in platform_cfg:
                    bridged["free_response_channels"] = platform_cfg["free_response_channels"]
                if "mention_patterns" in platform_cfg:
                    bridged["mention_patterns"] = platform_cfg["mention_patterns"]
-                if "dm_policy" in platform_cfg:
-                    bridged["dm_policy"] = platform_cfg["dm_policy"]
-                if "allow_from" in platform_cfg:
-                    bridged["allow_from"] = platform_cfg["allow_from"]
-                if "group_policy" in platform_cfg:
-                    bridged["group_policy"] = platform_cfg["group_policy"]
-                if "group_allow_from" in platform_cfg:
-                    bridged["group_allow_from"] = platform_cfg["group_allow_from"]
-                if plat in (Platform.DISCORD, Platform.SLACK) and "channel_skill_bindings" in platform_cfg:
+                if plat == Platform.DISCORD and "channel_skill_bindings" in platform_cfg:
                    bridged["channel_skill_bindings"] = platform_cfg["channel_skill_bindings"]
                if "channel_prompts" in platform_cfg:
                    channel_prompts = platform_cfg["channel_prompts"]
@@ -627,8 +601,6 @@ def load_gateway_config() -> GatewayConfig:
            if isinstance(slack_cfg, dict):
                if "require_mention" in slack_cfg and not os.getenv("SLACK_REQUIRE_MENTION"):
                    os.environ["SLACK_REQUIRE_MENTION"] = str(slack_cfg["require_mention"]).lower()
-                if "strict_mention" in slack_cfg and not os.getenv("SLACK_STRICT_MENTION"):
-                    os.environ["SLACK_STRICT_MENTION"] = str(slack_cfg["strict_mention"]).lower()
                if "allow_bots" in slack_cfg and not os.getenv("SLACK_ALLOW_BOTS"):
                    os.environ["SLACK_ALLOW_BOTS"] = str(slack_cfg["allow_bots"]).lower()
                frc = slack_cfg.get("free_response_channels")
@@ -636,8 +608,6 @@ def load_gateway_config() -> GatewayConfig:
                    if isinstance(frc, list):
                        frc = ",".join(str(v) for v in frc)
                    os.environ["SLACK_FREE_RESPONSE_CHANNELS"] = str(frc)
-                if "reactions" in slack_cfg and not os.getenv("SLACK_REACTIONS"):
-                    os.environ["SLACK_REACTIONS"] = str(slack_cfg["reactions"]).lower()

            # Discord settings → env vars (env vars take precedence)
            discord_cfg = yaml_cfg.get("discord", {})
@@ -692,7 +662,8 @@ def load_gateway_config() -> GatewayConfig:
                if "require_mention" in telegram_cfg and not os.getenv("TELEGRAM_REQUIRE_MENTION"):
                    os.environ["TELEGRAM_REQUIRE_MENTION"] = str(telegram_cfg["require_mention"]).lower()
                if "mention_patterns" in telegram_cfg and not os.getenv("TELEGRAM_MENTION_PATTERNS"):
-                    os.environ["TELEGRAM_MENTION_PATTERNS"] = json.dumps(telegram_cfg["mention_patterns"])
+                    import json as _json
+                    os.environ["TELEGRAM_MENTION_PATTERNS"] = _json.dumps(telegram_cfg["mention_patterns"])
                frc = telegram_cfg.get("free_response_chats")
                if frc is not None and not os.getenv("TELEGRAM_FREE_RESPONSE_CHATS"):
                    if isinstance(frc, list):
@@ -707,11 +678,6 @@ def load_gateway_config() -> GatewayConfig:
                    os.environ["TELEGRAM_REACTIONS"] = str(telegram_cfg["reactions"]).lower()
                if "proxy_url" in telegram_cfg and not os.getenv("TELEGRAM_PROXY"):
                    os.environ["TELEGRAM_PROXY"] = str(telegram_cfg["proxy_url"]).strip()
-                if "group_allowed_chats" in telegram_cfg and not os.getenv("TELEGRAM_GROUP_ALLOWED_USERS"):
-                    gac = telegram_cfg["group_allowed_chats"]
-                    if isinstance(gac, list):
-                        gac = ",".join(str(v) for v in gac)
-                    os.environ["TELEGRAM_GROUP_ALLOWED_USERS"] = str(gac)
                if "disable_link_previews" in telegram_cfg:
                    plat_data = platforms_data.setdefault(Platform.TELEGRAM.value, {})
                    if not isinstance(plat_data, dict):
@@ -734,20 +700,6 @@ def load_gateway_config() -> GatewayConfig:
                    if isinstance(frc, list):
                        frc = ",".join(str(v) for v in frc)
                    os.environ["WHATSAPP_FREE_RESPONSE_CHATS"] = str(frc)
-                if "dm_policy" in whatsapp_cfg and not os.getenv("WHATSAPP_DM_POLICY"):
-                    os.environ["WHATSAPP_DM_POLICY"] = str(whatsapp_cfg["dm_policy"]).lower()
-                af = whatsapp_cfg.get("allow_from")
-                if af is not None and not os.getenv("WHATSAPP_ALLOWED_USERS"):
-                    if isinstance(af, list):
-                        af = ",".join(str(v) for v in af)
-                    os.environ["WHATSAPP_ALLOWED_USERS"] = str(af)
-                if "group_policy" in whatsapp_cfg and not os.getenv("WHATSAPP_GROUP_POLICY"):
-                    os.environ["WHATSAPP_GROUP_POLICY"] = str(whatsapp_cfg["group_policy"]).lower()
-                gaf = whatsapp_cfg.get("group_allow_from")
-                if gaf is not None and not os.getenv("WHATSAPP_GROUP_ALLOWED_USERS"):
-                    if isinstance(gaf, list):
-                        gaf = ",".join(str(v) for v in gaf)
-                    os.environ["WHATSAPP_GROUP_ALLOWED_USERS"] = str(gaf)

            # DingTalk settings → env vars (env vars take precedence)
            dingtalk_cfg = yaml_cfg.get("dingtalk", {})
@@ -938,12 +890,8 @@ def _apply_env_overrides(config: GatewayConfig) -> None:
    slack_token = os.getenv("SLACK_BOT_TOKEN")
    if slack_token:
        if Platform.SLACK not in config.platforms:
-            # No yaml config for Slack — env-only setup, enable it
            config.platforms[Platform.SLACK] = PlatformConfig()
-            config.platforms[Platform.SLACK].enabled = True
-        # If yaml config exists, respect its enabled flag (don't override
-        # explicit enabled: false). Token is still stored so skills that
-        # send Slack messages can use it without activating the gateway adapter.
+        config.platforms[Platform.SLACK].enabled = True
        config.platforms[Platform.SLACK].token = slack_token
    slack_home = os.getenv("SLACK_HOME_CHANNEL")
    if slack_home and Platform.SLACK in config.platforms:
@@ -1289,6 +1237,7 @@ def _apply_env_overrides(config: GatewayConfig) -> None:
            if legacy_home:
                qq_home = legacy_home
                qq_home_name_env = "QQ_HOME_CHANNEL_NAME"
+                import logging
                logging.getLogger(__name__).warning(
                    "QQ_HOME_CHANNEL is deprecated; rename to QQBOT_HOME_CHANNEL "
                    "in your .env for consistency with the platform key."
@@ -1300,48 +1249,6 @@ def _apply_env_overrides(config: GatewayConfig) -> None:
                name=os.getenv("QQBOT_HOME_CHANNEL_NAME") or os.getenv(qq_home_name_env, "Home"),
            )

-    # Yuanbao — YUANBAO_APP_ID preferred
-    yuanbao_app_id = os.getenv("YUANBAO_APP_ID") or os.getenv("YUANBAO_APP_KEY")
-    yuanbao_app_secret = os.getenv("YUANBAO_APP_SECRET")
-    if yuanbao_app_id and yuanbao_app_secret:
-        if Platform.YUANBAO not in config.platforms:
-            config.platforms[Platform.YUANBAO] = PlatformConfig()
-        config.platforms[Platform.YUANBAO].enabled = True
-        extra = config.platforms[Platform.YUANBAO].extra
-        extra["app_id"] = yuanbao_app_id
-        extra["app_secret"] = yuanbao_app_secret
-        yuanbao_bot_id = os.getenv("YUANBAO_BOT_ID")
-        if yuanbao_bot_id:
-            extra["bot_id"] = yuanbao_bot_id
-        yuanbao_ws_url = os.getenv("YUANBAO_WS_URL")
-        if yuanbao_ws_url:
-            extra["ws_url"] = yuanbao_ws_url
-        yuanbao_api_domain = os.getenv("YUANBAO_API_DOMAIN")
-        if yuanbao_api_domain:
-            extra["api_domain"] = yuanbao_api_domain
-        yuanbao_route_env = os.getenv("YUANBAO_ROUTE_ENV")
-        if yuanbao_route_env:
-            extra["route_env"] = yuanbao_route_env
-        yuanbao_home = os.getenv("YUANBAO_HOME_CHANNEL")
-        if yuanbao_home:
-            config.platforms[Platform.YUANBAO].home_channel = HomeChannel(
-                platform=Platform.YUANBAO,
-                chat_id=yuanbao_home,
-                name=os.getenv("YUANBAO_HOME_CHANNEL_NAME", "Home"),
-            )
-        yuanbao_dm_policy = os.getenv("YUANBAO_DM_POLICY")
-        if yuanbao_dm_policy:
-            extra["dm_policy"] = yuanbao_dm_policy.strip().lower()
-        yuanbao_dm_allow_from = os.getenv("YUANBAO_DM_ALLOW_FROM")
-        if yuanbao_dm_allow_from:
-            extra["dm_allow_from"] = yuanbao_dm_allow_from
-        yuanbao_group_policy = os.getenv("YUANBAO_GROUP_POLICY")
-        if yuanbao_group_policy:
-            extra["group_policy"] = yuanbao_group_policy.strip().lower()
-        yuanbao_group_allow_from = os.getenv("YUANBAO_GROUP_ALLOW_FROM")
-        if yuanbao_group_allow_from:
-            extra["group_allow_from"] = yuanbao_group_allow_from
-
    # Session settings
    idle_minutes = os.getenv("SESSION_IDLE_MINUTES")
    if idle_minutes:
@@ -79,9 +79,7 @@ _PLATFORM_DEFAULTS: dict[str, dict[str, Any]] = {
    "discord":     _TIER_HIGH,

    # Tier 2 — edit support, often customer/workspace channels
-    # Slack: tool_progress off by default — Bolt posts cannot be edited like CLI;
-    # "new"/"all" spam permanent lines in channels (hermes-agent#14663).
-    "slack":           {**_TIER_MEDIUM, "tool_progress": "off"},
+    "slack":           _TIER_MEDIUM,
    "mattermost":      _TIER_MEDIUM,
    "matrix":          _TIER_MEDIUM,
    "feishu":          _TIER_MEDIUM,
@@ -135,22 +135,9 @@ class HookRegistry:
            except Exception as e:
                print(f"[hooks] Error loading hook {hook_dir.name}: {e}", flush=True)

-    def _resolve_handlers(self, event_type: str) -> List[Callable]:
-        """Return all handlers that should fire for ``event_type``.
-
-        Exact matches fire first, followed by wildcard matches (e.g.
-        ``command:*`` matches ``command:reset``).
-        """
-        handlers = list(self._handlers.get(event_type, []))
-        if ":" in event_type:
-            base = event_type.split(":")[0]
-            wildcard_key = f"{base}:*"
-            handlers.extend(self._handlers.get(wildcard_key, []))
-        return handlers
-
    async def emit(self, event_type: str, context: Optional[Dict[str, Any]] = None) -> None:
        """
-        Fire all handlers registered for an event, discarding return values.
+        Fire all handlers registered for an event.

        Supports wildcard matching: handlers registered for "command:*" will
        fire for any "command:..." event. Handlers registered for a base type
@@ -164,7 +151,16 @@ class HookRegistry:
        if context is None:
            context = {}

-        for fn in self._resolve_handlers(event_type):
+        # Collect handlers: exact match + wildcard match
+        handlers = list(self._handlers.get(event_type, []))
+
+        # Check for wildcard patterns (e.g., "command:*" matches "command:reset")
+        if ":" in event_type:
+            base = event_type.split(":")[0]
+            wildcard_key = f"{base}:*"
+            handlers.extend(self._handlers.get(wildcard_key, []))
+
+        for fn in handlers:
            try:
                result = fn(event_type, context)
                # Support both sync and async handlers
@@ -172,32 +168,3 @@ class HookRegistry:
                    await result
            except Exception as e:
                print(f"[hooks] Error in handler for '{event_type}': {e}", flush=True)
-
-    async def emit_collect(
-        self,
-        event_type: str,
-        context: Optional[Dict[str, Any]] = None,
-    ) -> List[Any]:
-        """Fire handlers and return their non-None return values in order.
-
-        Like :meth:`emit` but captures each handler's return value. Used for
-        decision-style hooks (e.g. ``command:<name>`` policies that want to
-        allow/deny/rewrite the command before normal dispatch).
-
-        Exceptions from individual handlers are logged but do not abort the
-        remaining handlers.
-        """
-        if context is None:
-            context = {}
-
-        results: List[Any] = []
-        for fn in self._resolve_handlers(event_type):
-            try:
-                result = fn(event_type, context)
-                if asyncio.iscoroutine(result):
-                    result = await result
-                if result is not None:
-                    results.append(result)
-            except Exception as e:
-                print(f"[hooks] Error in handler for '{event_type}': {e}", flush=True)
-        return results
@@ -28,7 +28,6 @@ def mirror_to_session(
    message_text: str,
    source_label: str = "cli",
    thread_id: Optional[str] = None,
-    user_id: Optional[str] = None,
 ) -> bool:
    """
    Append a delivery-mirror message to the target session's transcript.
@@ -40,20 +39,9 @@ def mirror_to_session(
    All errors are caught -- this is never fatal.
    """
    try:
-        session_id = _find_session_id(
-            platform,
-            str(chat_id),
-            thread_id=thread_id,
-            user_id=user_id,
-        )
+        session_id = _find_session_id(platform, str(chat_id), thread_id=thread_id)
        if not session_id:
-            logger.debug(
-                "Mirror: no session found for %s:%s:%s:%s",
-                platform,
-                chat_id,
-                thread_id,
-                user_id,
-            )
+            logger.debug("Mirror: no session found for %s:%s:%s", platform, chat_id, thread_id)
            return False

        mirror_msg = {
@@ -71,33 +59,17 @@ def mirror_to_session(
        return True

    except Exception as e:
-        logger.debug(
-            "Mirror failed for %s:%s:%s:%s: %s",
-            platform,
-            chat_id,
-            thread_id,
-            user_id,
-            e,
-        )
+        logger.debug("Mirror failed for %s:%s:%s: %s", platform, chat_id, thread_id, e)
        return False


-def _find_session_id(
-    platform: str,
-    chat_id: str,
-    thread_id: Optional[str] = None,
-    user_id: Optional[str] = None,
-) -> Optional[str]:
+def _find_session_id(platform: str, chat_id: str, thread_id: Optional[str] = None) -> Optional[str]:
    """
    Find the active session_id for a platform + chat_id pair.

    Scans sessions.json entries and matches where origin.chat_id == chat_id
    on the right platform.  DM session keys don't embed the chat_id
    (e.g. "agent:main:telegram:dm"), so we check the origin dict.
-
-    When *user_id* is provided, prefer exact sender matches. If multiple
-    same-chat candidates exist and none matches the user, return None instead
-    of guessing and contaminating another participant's session.
    """
    if not _SESSIONS_INDEX.exists():
        return None
@@ -109,7 +81,8 @@ def _find_session_id(
        return None

    platform_lower = platform.lower()
-    candidates = []
+    best_match = None
+    best_updated = ""

    for _key, entry in data.items():
        origin = entry.get("origin") or {}
@@ -123,31 +96,12 @@ def _find_session_id(
            origin_thread_id = origin.get("thread_id")
            if thread_id is not None and str(origin_thread_id or "") != str(thread_id):
                continue
-            candidates.append(entry)
+            updated = entry.get("updated_at", "")
+            if updated > best_updated:
+                best_updated = updated
+                best_match = entry.get("session_id")

-    if not candidates:
-        return None
-
-    if user_id:
-        exact_user_matches = [
-            entry for entry in candidates
-            if str((entry.get("origin") or {}).get("user_id") or "") == str(user_id)
-        ]
-        if exact_user_matches:
-            candidates = exact_user_matches
-        elif len(candidates) > 1:
-            return None
-    elif len(candidates) > 1:
-        distinct_user_ids = {
-            str((entry.get("origin") or {}).get("user_id") or "").strip()
-            for entry in candidates
-            if str((entry.get("origin") or {}).get("user_id") or "").strip()
-        }
-        if len(distinct_user_ids) > 1:
-            return None
-
-    best_entry = max(candidates, key=lambda entry: entry.get("updated_at", ""))
-    return best_entry.get("session_id")
+    return best_match


 def _append_to_jsonl(session_id: str, message: dict) -> None:
@@ -10,12 +10,10 @@ Each adapter handles:

 from .base import BasePlatformAdapter, MessageEvent, SendResult
 from .qqbot import QQAdapter
-from .yuanbao import YuanbaoAdapter

 __all__ = [
    "BasePlatformAdapter",
    "MessageEvent",
    "SendResult",
    "QQAdapter",
-    "YuanbaoAdapter",
 ]
@@ -9,7 +9,6 @@ Exposes an HTTP server with endpoints:
 - GET  /v1/models                  — lists hermes-agent as an available model
 - POST /v1/runs                    — start a run, returns run_id immediately (202)
 - GET  /v1/runs/{run_id}/events    — SSE stream of structured lifecycle events
- POST /v1/runs/{run_id}/stop    — interrupt a running agent
 - GET  /health                     — health check
 - GET  /health/detailed            — rich status for cross-container dashboard probing

@@ -118,160 +117,6 @@ def _normalize_chat_content(
        return ""


-# Content part type aliases used by the OpenAI Chat Completions and Responses
-# APIs.  We accept both spellings on input and emit a single canonical internal
-# shape (``{"type": "text", ...}`` / ``{"type": "image_url", ...}``) that the
-# rest of the agent pipeline already understands.
-_TEXT_PART_TYPES = frozenset({"text", "input_text", "output_text"})
-_IMAGE_PART_TYPES = frozenset({"image_url", "input_image"})
-_FILE_PART_TYPES = frozenset({"file", "input_file"})
-
-
-def _normalize_multimodal_content(content: Any) -> Any:
-    """Validate and normalize multimodal content for the API server.
-
-    Returns a plain string when the content is text-only, or a list of
-    ``{"type": "text"|"image_url", ...}`` parts when images are present.
-    The output shape is the native OpenAI Chat Completions vision format,
-    which the agent pipeline accepts verbatim (OpenAI-wire providers) or
-    converts (``_preprocess_anthropic_content`` for Anthropic).
-
-    Raises ``ValueError`` with an OpenAI-style code on invalid input:
-      * ``unsupported_content_type`` — file/input_file/file_id parts, or
-        non-image ``data:`` URLs.
-      * ``invalid_image_url`` — missing URL or unsupported scheme.
-      * ``invalid_content_part`` — malformed text/image objects.
-
-    Callers translate the ValueError into a 400 response.
-    """
-    # Scalar passthrough mirrors ``_normalize_chat_content``.
-    if content is None:
-        return ""
-    if isinstance(content, str):
-        return content[:MAX_NORMALIZED_TEXT_LENGTH] if len(content) > MAX_NORMALIZED_TEXT_LENGTH else content
-    if not isinstance(content, list):
-        # Mirror the legacy text-normalizer's fallback so callers that
-        # pre-existed image support still get a string back.
-        return _normalize_chat_content(content)
-
-    items = content[:MAX_CONTENT_LIST_SIZE] if len(content) > MAX_CONTENT_LIST_SIZE else content
-    normalized_parts: List[Dict[str, Any]] = []
-    text_accum_len = 0
-
-    for part in items:
-        if isinstance(part, str):
-            if part:
-                trimmed = part[:MAX_NORMALIZED_TEXT_LENGTH]
-                normalized_parts.append({"type": "text", "text": trimmed})
-                text_accum_len += len(trimmed)
-            continue
-
-        if not isinstance(part, dict):
-            # Ignore unknown scalars for forward compatibility with future
-            # Responses API additions (e.g. ``refusal``).  The same policy
-            # the text normalizer applies.
-            continue
-
-        raw_type = part.get("type")
-        part_type = str(raw_type or "").strip().lower()
-
-        if part_type in _TEXT_PART_TYPES:
-            text = part.get("text")
-            if text is None:
-                continue
-            if not isinstance(text, str):
-                text = str(text)
-            if text:
-                trimmed = text[:MAX_NORMALIZED_TEXT_LENGTH]
-                normalized_parts.append({"type": "text", "text": trimmed})
-                text_accum_len += len(trimmed)
-            continue
-
-        if part_type in _IMAGE_PART_TYPES:
-            detail = part.get("detail")
-            image_ref = part.get("image_url")
-            # OpenAI Responses sends ``input_image`` with a top-level
-            # ``image_url`` string; Chat Completions sends ``image_url`` as
-            # ``{"url": "...", "detail": "..."}``.  Support both.
-            if isinstance(image_ref, dict):
-                url_value = image_ref.get("url")
-                detail = image_ref.get("detail", detail)
-            else:
-                url_value = image_ref
-            if not isinstance(url_value, str) or not url_value.strip():
-                raise ValueError("invalid_image_url:Image parts must include a non-empty image URL.")
-            url_value = url_value.strip()
-            lowered = url_value.lower()
-            if lowered.startswith("data:"):
-                if not lowered.startswith("data:image/") or "," not in url_value:
-                    raise ValueError(
-                        "unsupported_content_type:Only image data URLs are supported. "
-                        "Non-image data payloads are not supported."
-                    )
-            elif not (lowered.startswith("http://") or lowered.startswith("https://")):
-                raise ValueError(
-                    "invalid_image_url:Image inputs must use http(s) URLs or data:image/... URLs."
-                )
-            image_part: Dict[str, Any] = {"type": "image_url", "image_url": {"url": url_value}}
-            if detail is not None:
-                if not isinstance(detail, str) or not detail.strip():
-                    raise ValueError("invalid_content_part:Image detail must be a non-empty string when provided.")
-                image_part["image_url"]["detail"] = detail.strip()
-            normalized_parts.append(image_part)
-            continue
-
-        if part_type in _FILE_PART_TYPES:
-            raise ValueError(
-                "unsupported_content_type:Inline image inputs are supported, "
-                "but uploaded files and document inputs are not supported on this endpoint."
-            )
-
-        # Unknown part type — reject explicitly so clients get a clear error
-        # instead of a silently dropped turn.
-        raise ValueError(
-            f"unsupported_content_type:Unsupported content part type {raw_type!r}. "
-            "Only text and image_url/input_image parts are supported."
-        )
-
-    if not normalized_parts:
-        return ""
-
-    # Text-only: collapse to a plain string so downstream logging/trajectory
-    # code sees the native shape and prompt caching on text-only turns is
-    # unaffected.
-    if all(p.get("type") == "text" for p in normalized_parts):
-        return "\n".join(p["text"] for p in normalized_parts if p.get("text"))
-
-    return normalized_parts
-
-
-def _content_has_visible_payload(content: Any) -> bool:
-    """True when content has any text or image attachment.  Used to reject empty turns."""
-    if isinstance(content, str):
-        return bool(content.strip())
-    if isinstance(content, list):
-        for part in content:
-            if isinstance(part, dict):
-                ptype = str(part.get("type") or "").strip().lower()
-                if ptype in _TEXT_PART_TYPES and str(part.get("text") or "").strip():
-                    return True
-                if ptype in _IMAGE_PART_TYPES:
-                    return True
-    return False
-
-
-def _multimodal_validation_error(exc: ValueError, *, param: str) -> "web.Response":
-    """Translate a ``_normalize_multimodal_content`` ValueError into a 400 response."""
-    raw = str(exc)
-    code, _, message = raw.partition(":")
-    if not message:
-        code, message = "invalid_content_part", raw
-    return web.json_response(
-        _openai_error(message, code=code, param=param),
-        status=400,
-    )
-
-
 def check_api_server_requirements() -> bool:
    """Check if API server dependencies are available."""
    return AIOHTTP_AVAILABLE
@@ -324,6 +169,7 @@ class ResponseStore:
        ).fetchone()
        if row is None:
            return None
+        import time
        self._conn.execute(
            "UPDATE responses SET accessed_at = ? WHERE response_id = ?",
            (time.time(), response_id),
@@ -333,6 +179,7 @@ class ResponseStore:

    def put(self, response_id: str, data: Dict[str, Any]) -> None:
        """Store a response, evicting the oldest if at capacity."""
+        import time
        self._conn.execute(
            "INSERT OR REPLACE INTO responses (response_id, data, accessed_at) VALUES (?, ?, ?)",
            (response_id, json.dumps(data, default=str), time.time()),
@@ -468,12 +315,12 @@ class _IdempotencyCache:
    def __init__(self, max_items: int = 1000, ttl_seconds: int = 300):
        from collections import OrderedDict
        self._store = OrderedDict()
-        self._inflight: Dict[tuple[str, str], "asyncio.Task[Any]"] = {}
        self._ttl = ttl_seconds
        self._max = max_items

    def _purge(self):
-        now = time.time()
+        import time as _t
+        now = _t.time()
        expired = [k for k, v in self._store.items() if now - v["ts"] > self._ttl]
        for k in expired:
            self._store.pop(k, None)
@@ -485,27 +332,11 @@ class _IdempotencyCache:
        item = self._store.get(key)
        if item and item["fp"] == fingerprint:
            return item["resp"]
-
-        inflight_key = (key, fingerprint)
-        task = self._inflight.get(inflight_key)
-        if task is None:
-            async def _compute_and_store():
-                resp = await compute_coro()
-                import time as _t
-                self._store[key] = {"resp": resp, "fp": fingerprint, "ts": _t.time()}
-                self._purge()
-                return resp
-
-            task = asyncio.create_task(_compute_and_store())
-            self._inflight[inflight_key] = task
-
-            def _clear_inflight(done_task: "asyncio.Task[Any]") -> None:
-                if self._inflight.get(inflight_key) is done_task:
-                    self._inflight.pop(inflight_key, None)
-
-            task.add_done_callback(_clear_inflight)
-
-        return await asyncio.shield(task)
+        resp = await compute_coro()
+        import time as _t
+        self._store[key] = {"resp": resp, "fp": fingerprint, "ts": _t.time()}
+        self._purge()
+        return resp


 _idem_cache = _IdempotencyCache()
@@ -535,30 +366,6 @@ def _derive_chat_session_id(
    return f"api-{digest}"


-_CRON_AVAILABLE = False
-try:
-    from cron.jobs import (
-        list_jobs as _cron_list,
-        get_job as _cron_get,
-        create_job as _cron_create,
-        update_job as _cron_update,
-        remove_job as _cron_remove,
-        pause_job as _cron_pause,
-        resume_job as _cron_resume,
-        trigger_job as _cron_trigger,
-    )
-    _CRON_AVAILABLE = True
-except ImportError:
-    _cron_list = None
-    _cron_get = None
-    _cron_create = None
-    _cron_update = None
-    _cron_remove = None
-    _cron_pause = None
-    _cron_resume = None
-    _cron_trigger = None
-
-
 class APIServerAdapter(BasePlatformAdapter):
    """
    OpenAI-compatible HTTP API server adapter.
@@ -587,9 +394,6 @@ class APIServerAdapter(BasePlatformAdapter):
        self._run_streams: Dict[str, "asyncio.Queue[Optional[Dict]]"] = {}
        # Creation timestamps for orphaned-run TTL sweep
        self._run_streams_created: Dict[str, float] = {}
-        # Active run agent/task references for stop support
-        self._active_run_agents: Dict[str, Any] = {}
-        self._active_run_tasks: Dict[str, "asyncio.Task"] = {}
        self._session_db: Optional[Any] = None  # Lazy-init SessionDB for session continuity

    @staticmethod
@@ -833,32 +637,26 @@ class APIServerAdapter(BasePlatformAdapter):
        system_prompt = None
        conversation_messages: List[Dict[str, str]] = []

-        for idx, msg in enumerate(messages):
+        for msg in messages:
            role = msg.get("role", "")
-            raw_content = msg.get("content", "")
+            content = _normalize_chat_content(msg.get("content", ""))
            if role == "system":
-                # System messages don't support images (Anthropic rejects, OpenAI
-                # text-model systems don't render them).  Flatten to text.
-                content = _normalize_chat_content(raw_content)
+                # Accumulate system messages
                if system_prompt is None:
                    system_prompt = content
                else:
                    system_prompt = system_prompt + "\n" + content
            elif role in ("user", "assistant"):
-                try:
-                    content = _normalize_multimodal_content(raw_content)
-                except ValueError as exc:
-                    return _multimodal_validation_error(exc, param=f"messages[{idx}].content")
                conversation_messages.append({"role": role, "content": content})

        # Extract the last user message as the primary input
-        user_message: Any = ""
+        user_message = ""
        history = []
        if conversation_messages:
            user_message = conversation_messages[-1].get("content", "")
            history = conversation_messages[:-1]

-        if not _content_has_visible_payload(user_message):
+        if not user_message:
            return web.json_response(
                {"error": {"message": "No user message found in messages", "type": "invalid_request_error"}},
                status=400,
@@ -1208,12 +1006,10 @@ class APIServerAdapter(BasePlatformAdapter):

        If the client disconnects mid-stream, ``agent.interrupt()`` is
        called so the agent stops issuing upstream LLM calls, then the
-        asyncio task is cancelled.  When ``store=True`` an initial
-        ``in_progress`` snapshot is persisted immediately after
-        ``response.created`` and disconnects update it to an
-        ``incomplete`` snapshot so GET /v1/responses/{id} and
-        ``previous_response_id`` chaining still have something to
-        recover from.
+        asyncio task is cancelled.  When ``store=True`` the full response
+        is persisted to the ResponseStore in a ``finally`` block so GET
+        /v1/responses/{id} and ``previous_response_id`` chaining work the
+        same as the batch path.
        """
        import queue as _q

@@ -1275,60 +1071,6 @@ class APIServerAdapter(BasePlatformAdapter):
        final_response_text = ""
        agent_error: Optional[str] = None
        usage: Dict[str, int] = {"input_tokens": 0, "output_tokens": 0, "total_tokens": 0}
-        terminal_snapshot_persisted = False
-
-        def _persist_response_snapshot(
-            response_env: Dict[str, Any],
-            *,
-            conversation_history_snapshot: Optional[List[Dict[str, Any]]] = None,
-        ) -> None:
-            if not store:
-                return
-            if conversation_history_snapshot is None:
-                conversation_history_snapshot = list(conversation_history)
-                conversation_history_snapshot.append({"role": "user", "content": user_message})
-            self._response_store.put(response_id, {
-                "response": response_env,
-                "conversation_history": conversation_history_snapshot,
-                "instructions": instructions,
-                "session_id": session_id,
-            })
-            if conversation:
-                self._response_store.set_conversation(conversation, response_id)
-
-        def _persist_incomplete_if_needed() -> None:
-            """Persist an ``incomplete`` snapshot if no terminal one was written.
-
-            Called from both the client-disconnect (``ConnectionResetError``)
-            and server-cancellation (``asyncio.CancelledError``) paths so
-            GET /v1/responses/{id} and ``previous_response_id`` chaining keep
-            working after abrupt stream termination.
-            """
-            if not store or terminal_snapshot_persisted:
-                return
-            incomplete_text = "".join(final_text_parts) or final_response_text
-            incomplete_items: List[Dict[str, Any]] = list(emitted_items)
-            if incomplete_text:
-                incomplete_items.append({
-                    "type": "message",
-                    "role": "assistant",
-                    "content": [{"type": "output_text", "text": incomplete_text}],
-                })
-            incomplete_env = _envelope("incomplete")
-            incomplete_env["output"] = incomplete_items
-            incomplete_env["usage"] = {
-                "input_tokens": usage.get("input_tokens", 0),
-                "output_tokens": usage.get("output_tokens", 0),
-                "total_tokens": usage.get("total_tokens", 0),
-            }
-            incomplete_history = list(conversation_history)
-            incomplete_history.append({"role": "user", "content": user_message})
-            if incomplete_text:
-                incomplete_history.append({"role": "assistant", "content": incomplete_text})
-            _persist_response_snapshot(
-                incomplete_env,
-                conversation_history_snapshot=incomplete_history,
-            )

        try:
            # response.created — initial envelope, status=in_progress
@@ -1338,7 +1080,6 @@ class APIServerAdapter(BasePlatformAdapter):
                "type": "response.created",
                "response": created_env,
            })
-            _persist_response_snapshot(created_env)
            last_activity = time.monotonic()

            async def _open_message_item() -> None:
@@ -1595,18 +1336,6 @@ class APIServerAdapter(BasePlatformAdapter):
                    "output_tokens": usage.get("output_tokens", 0),
                    "total_tokens": usage.get("total_tokens", 0),
                }
-                _failed_history = list(conversation_history)
-                _failed_history.append({"role": "user", "content": user_message})
-                if final_response_text or agent_error:
-                    _failed_history.append({
-                        "role": "assistant",
-                        "content": final_response_text or agent_error,
-                    })
-                _persist_response_snapshot(
-                    failed_env,
-                    conversation_history_snapshot=_failed_history,
-                )
-                terminal_snapshot_persisted = True
                await _write_event("response.failed", {
                    "type": "response.failed",
                    "response": failed_env,
@@ -1619,24 +1348,30 @@ class APIServerAdapter(BasePlatformAdapter):
                    "output_tokens": usage.get("output_tokens", 0),
                    "total_tokens": usage.get("total_tokens", 0),
                }
-                full_history = list(conversation_history)
-                full_history.append({"role": "user", "content": user_message})
-                if isinstance(result, dict) and result.get("messages"):
-                    full_history.extend(result["messages"])
-                else:
-                    full_history.append({"role": "assistant", "content": final_response_text})
-                _persist_response_snapshot(
-                    completed_env,
-                    conversation_history_snapshot=full_history,
-                )
-                terminal_snapshot_persisted = True
                await _write_event("response.completed", {
                    "type": "response.completed",
                    "response": completed_env,
                })

+                # Persist for future chaining / GET retrieval, mirroring
+                # the batch path behavior.
+                if store:
+                    full_history = list(conversation_history)
+                    full_history.append({"role": "user", "content": user_message})
+                    if isinstance(result, dict) and result.get("messages"):
+                        full_history.extend(result["messages"])
+                    else:
+                        full_history.append({"role": "assistant", "content": final_response_text})
+                    self._response_store.put(response_id, {
+                        "response": completed_env,
+                        "conversation_history": full_history,
+                        "instructions": instructions,
+                        "session_id": session_id,
+                    })
+                    if conversation:
+                        self._response_store.set_conversation(conversation, response_id)
+
        except (ConnectionResetError, ConnectionAbortedError, BrokenPipeError, OSError):
-            _persist_incomplete_if_needed()
            # Client disconnected — interrupt the agent so it stops
            # making upstream LLM calls, then cancel the task.
            agent = agent_ref[0] if agent_ref else None
@@ -1652,22 +1387,6 @@ class APIServerAdapter(BasePlatformAdapter):
                except (asyncio.CancelledError, Exception):
                    pass
            logger.info("SSE client disconnected; interrupted agent task %s", response_id)
-        except asyncio.CancelledError:
-            # Server-side cancellation (e.g. shutdown, request timeout) —
-            # persist an incomplete snapshot so GET /v1/responses/{id} and
-            # previous_response_id chaining still work, then re-raise so the
-            # runtime's cancellation semantics are respected.
-            _persist_incomplete_if_needed()
-            agent = agent_ref[0] if agent_ref else None
-            if agent is not None:
-                try:
-                    agent.interrupt("SSE task cancelled")
-                except Exception:
-                    pass
-            if not agent_task.done():
-                agent_task.cancel()
-            logger.info("SSE task cancelled; persisted incomplete snapshot for %s", response_id)
-            raise

        return response

@@ -1705,19 +1424,16 @@ class APIServerAdapter(BasePlatformAdapter):
            # No error if conversation doesn't exist yet — it's a new conversation

        # Normalize input to message list
-        input_messages: List[Dict[str, Any]] = []
+        input_messages: List[Dict[str, str]] = []
        if isinstance(raw_input, str):
            input_messages = [{"role": "user", "content": raw_input}]
        elif isinstance(raw_input, list):
-            for idx, item in enumerate(raw_input):
+            for item in raw_input:
                if isinstance(item, str):
                    input_messages.append({"role": "user", "content": item})
                elif isinstance(item, dict):
                    role = item.get("role", "user")
-                    try:
-                        content = _normalize_multimodal_content(item.get("content", ""))
-                    except ValueError as exc:
-                        return _multimodal_validation_error(exc, param=f"input[{idx}].content")
+                    content = _normalize_chat_content(item.get("content", ""))
                    input_messages.append({"role": role, "content": content})
        else:
            return web.json_response(_openai_error("'input' must be a string or array"), status=400)
@@ -1726,7 +1442,7 @@ class APIServerAdapter(BasePlatformAdapter):
        # This lets stateless clients supply their own history instead of
        # relying on server-side response chaining via previous_response_id.
        # Precedence: explicit conversation_history > previous_response_id.
-        conversation_history: List[Dict[str, Any]] = []
+        conversation_history: List[Dict[str, str]] = []
        raw_history = body.get("conversation_history")
        if raw_history:
            if not isinstance(raw_history, list):
@@ -1740,11 +1456,7 @@ class APIServerAdapter(BasePlatformAdapter):
                        _openai_error(f"conversation_history[{i}] must have 'role' and 'content' fields"),
                        status=400,
                    )
-                try:
-                    entry_content = _normalize_multimodal_content(entry["content"])
-                except ValueError as exc:
-                    return _multimodal_validation_error(exc, param=f"conversation_history[{i}].content")
-                conversation_history.append({"role": str(entry["role"]), "content": entry_content})
+                conversation_history.append({"role": str(entry["role"]), "content": str(entry["content"])})
            if previous_response_id:
                logger.debug("Both conversation_history and previous_response_id provided; using conversation_history")

@@ -1764,8 +1476,8 @@ class APIServerAdapter(BasePlatformAdapter):
            conversation_history.append(msg)

        # Last input message is the user_message
-        user_message: Any = input_messages[-1].get("content", "") if input_messages else ""
-        if not _content_has_visible_payload(user_message):
+        user_message = input_messages[-1].get("content", "") if input_messages else ""
+        if not user_message:
            return web.json_response(_openai_error("No user message found in input"), status=400)

        # Truncation support
@@ -1970,16 +1682,44 @@ class APIServerAdapter(BasePlatformAdapter):
    # Cron jobs API
    # ------------------------------------------------------------------

+    # Check cron module availability once (not per-request)
+    _CRON_AVAILABLE = False
+    try:
+        from cron.jobs import (
+            list_jobs as _cron_list,
+            get_job as _cron_get,
+            create_job as _cron_create,
+            update_job as _cron_update,
+            remove_job as _cron_remove,
+            pause_job as _cron_pause,
+            resume_job as _cron_resume,
+            trigger_job as _cron_trigger,
+        )
+        # Wrap as staticmethod to prevent descriptor binding — these are plain
+        # module functions, not instance methods.  Without this, self._cron_*()
+        # injects ``self`` as the first positional argument and every call
+        # raises TypeError.
+        _cron_list = staticmethod(_cron_list)
+        _cron_get = staticmethod(_cron_get)
+        _cron_create = staticmethod(_cron_create)
+        _cron_update = staticmethod(_cron_update)
+        _cron_remove = staticmethod(_cron_remove)
+        _cron_pause = staticmethod(_cron_pause)
+        _cron_resume = staticmethod(_cron_resume)
+        _cron_trigger = staticmethod(_cron_trigger)
+        _CRON_AVAILABLE = True
+    except ImportError:
+        pass
+
    _JOB_ID_RE = __import__("re").compile(r"[a-f0-9]{12}")
    # Allowed fields for update — prevents clients injecting arbitrary keys
    _UPDATE_ALLOWED_FIELDS = {"name", "schedule", "prompt", "deliver", "skills", "skill", "repeat", "enabled"}
    _MAX_NAME_LENGTH = 200
    _MAX_PROMPT_LENGTH = 5000

-    @staticmethod
-    def _check_jobs_available() -> Optional["web.Response"]:
+    def _check_jobs_available(self) -> Optional["web.Response"]:
        """Return error response if cron module isn't available."""
-        if not _CRON_AVAILABLE:
+        if not self._CRON_AVAILABLE:
            return web.json_response(
                {"error": "Cron module not available"}, status=501,
            )
@@ -2004,7 +1744,7 @@ class APIServerAdapter(BasePlatformAdapter):
            return cron_err
        try:
            include_disabled = request.query.get("include_disabled", "").lower() in ("true", "1")
-            jobs = _cron_list(include_disabled=include_disabled)
+            jobs = self._cron_list(include_disabled=include_disabled)
            return web.json_response({"jobs": jobs})
        except Exception as e:
            return web.json_response({"error": str(e)}, status=500)
@@ -2052,7 +1792,7 @@ class APIServerAdapter(BasePlatformAdapter):
            if repeat is not None:
                kwargs["repeat"] = repeat

-            job = _cron_create(**kwargs)
+            job = self._cron_create(**kwargs)
            return web.json_response({"job": job})
        except Exception as e:
            return web.json_response({"error": str(e)}, status=500)
@@ -2069,7 +1809,7 @@ class APIServerAdapter(BasePlatformAdapter):
        if id_err:
            return id_err
        try:
-            job = _cron_get(job_id)
+            job = self._cron_get(job_id)
            if not job:
                return web.json_response({"error": "Job not found"}, status=404)
            return web.json_response({"job": job})
@@ -2102,7 +1842,7 @@ class APIServerAdapter(BasePlatformAdapter):
                return web.json_response(
                    {"error": f"Prompt must be ≤ {self._MAX_PROMPT_LENGTH} characters"}, status=400,
                )
-            job = _cron_update(job_id, sanitized)
+            job = self._cron_update(job_id, sanitized)
            if not job:
                return web.json_response({"error": "Job not found"}, status=404)
            return web.json_response({"job": job})
@@ -2121,7 +1861,7 @@ class APIServerAdapter(BasePlatformAdapter):
        if id_err:
            return id_err
        try:
-            success = _cron_remove(job_id)
+            success = self._cron_remove(job_id)
            if not success:
                return web.json_response({"error": "Job not found"}, status=404)
            return web.json_response({"ok": True})
@@ -2140,7 +1880,7 @@ class APIServerAdapter(BasePlatformAdapter):
        if id_err:
            return id_err
        try:
-            job = _cron_pause(job_id)
+            job = self._cron_pause(job_id)
            if not job:
                return web.json_response({"error": "Job not found"}, status=404)
            return web.json_response({"job": job})
@@ -2159,7 +1899,7 @@ class APIServerAdapter(BasePlatformAdapter):
        if id_err:
            return id_err
        try:
-            job = _cron_resume(job_id)
+            job = self._cron_resume(job_id)
            if not job:
                return web.json_response({"error": "Job not found"}, status=404)
            return web.json_response({"job": job})
@@ -2178,7 +1918,7 @@ class APIServerAdapter(BasePlatformAdapter):
        if id_err:
            return id_err
        try:
-            job = _cron_trigger(job_id)
+            job = self._cron_trigger(job_id)
            if not job:
                return web.json_response({"error": "Job not found"}, status=404)
            return web.json_response({"job": job})
@@ -2445,7 +2185,6 @@ class APIServerAdapter(BasePlatformAdapter):
                    stream_delta_callback=_text_cb,
                    tool_progress_callback=event_cb,
                )
-                self._active_run_agents[run_id] = agent
                def _run_sync():
                    r = agent.run_conversation(
                        user_message=user_message,
@@ -2485,11 +2224,8 @@ class APIServerAdapter(BasePlatformAdapter):
                    q.put_nowait(None)
                except Exception:
                    pass
-                self._active_run_agents.pop(run_id, None)
-                self._active_run_tasks.pop(run_id, None)

        task = asyncio.create_task(_run_and_close())
-        self._active_run_tasks[run_id] = task
        try:
            self._background_tasks.add(task)
        except TypeError:
@@ -2548,44 +2284,6 @@ class APIServerAdapter(BasePlatformAdapter):

        return response

-    async def _handle_stop_run(self, request: "web.Request") -> "web.Response":
-        """POST /v1/runs/{run_id}/stop — interrupt a running agent."""
-        auth_err = self._check_auth(request)
-        if auth_err:
-            return auth_err
-
-        run_id = request.match_info["run_id"]
-        agent = self._active_run_agents.get(run_id)
-        task = self._active_run_tasks.get(run_id)
-
-        if agent is None and task is None:
-            return web.json_response(_openai_error(f"Run not found: {run_id}", code="run_not_found"), status=404)
-
-        if agent is not None:
-            try:
-                agent.interrupt("Stop requested via API")
-            except Exception:
-                pass
-
-        if task is not None and not task.done():
-            task.cancel()
-            # Bounded wait: run_conversation() executes in the default
-            # executor thread which task.cancel() cannot preempt — we rely on
-            # agent.interrupt() above to break the loop. Cap the wait so a
-            # slow/unresponsive interrupt can't hang this handler.
-            try:
-                await asyncio.wait_for(asyncio.shield(task), timeout=5.0)
-            except asyncio.TimeoutError:
-                logger.warning(
-                    "[api_server] stop for run %s timed out after 5s; "
-                    "agent may still be finishing the current step",
-                    run_id,
-                )
-            except (asyncio.CancelledError, Exception):
-                pass
-
-        return web.json_response({"run_id": run_id, "status": "stopping"})
-
    async def _sweep_orphaned_runs(self) -> None:
        """Periodically clean up run streams that were never consumed."""
        while True:
@@ -2600,8 +2298,6 @@ class APIServerAdapter(BasePlatformAdapter):
                logger.debug("[api_server] sweeping orphaned run %s", run_id)
                self._run_streams.pop(run_id, None)
                self._run_streams_created.pop(run_id, None)
-                self._active_run_agents.pop(run_id, None)
-                self._active_run_tasks.pop(run_id, None)

    # ------------------------------------------------------------------
    # BasePlatformAdapter interface
@@ -2637,7 +2333,6 @@ class APIServerAdapter(BasePlatformAdapter):
            # Structured event streaming
            self._app.router.add_post("/v1/runs", self._handle_runs)
            self._app.router.add_get("/v1/runs/{run_id}/events", self._handle_run_events)
-            self._app.router.add_post("/v1/runs/{run_id}/stop", self._handle_stop_run)
            # Start background sweep to clean up orphaned (unconsumed) run streams
            sweep_task = asyncio.create_task(self._sweep_orphaned_runs())
            try:
@@ -19,8 +19,6 @@ import uuid
 from abc import ABC, abstractmethod
 from urllib.parse import urlsplit

-from utils import normalize_proxy_url
-
 logger = logging.getLogger(__name__)


@@ -148,102 +146,7 @@ def _detect_macos_system_proxy() -> str | None:
    return None


-def _split_host_port(value: str) -> tuple[str, int | None]:
-    raw = str(value or "").strip()
-    if not raw:
-        return "", None
-    if "://" in raw:
-        parsed = urlsplit(raw)
-        return (parsed.hostname or "").lower().rstrip("."), parsed.port
-    if raw.startswith("[") and "]" in raw:
-        host, _, rest = raw[1:].partition("]")
-        port = None
-        if rest.startswith(":") and rest[1:].isdigit():
-            port = int(rest[1:])
-        return host.lower().rstrip("."), port
-    if raw.count(":") == 1:
-        host, _, maybe_port = raw.rpartition(":")
-        if maybe_port.isdigit():
-            return host.lower().rstrip("."), int(maybe_port)
-    return raw.lower().strip("[]").rstrip("."), None
-
-
-def _no_proxy_entries() -> list[str]:
-    entries: list[str] = []
-    for key in ("NO_PROXY", "no_proxy"):
-        raw = os.environ.get(key, "")
-        entries.extend(part.strip() for part in raw.split(",") if part.strip())
-    return entries
-
-
-def _no_proxy_entry_matches(entry: str, host: str, port: int | None = None) -> bool:
-    token = str(entry or "").strip().lower()
-    if not token:
-        return False
-    if token == "*":
-        return True
-
-    token_host, token_port = _split_host_port(token)
-    if token_port is not None and port is not None and token_port != port:
-        return False
-    if token_port is not None and port is None:
-        return False
-    if not token_host:
-        return False
-
-    try:
-        network = ipaddress.ip_network(token_host, strict=False)
-        try:
-            return ipaddress.ip_address(host) in network
-        except ValueError:
-            return False
-    except ValueError:
-        pass
-
-    try:
-        token_ip = ipaddress.ip_address(token_host)
-        try:
-            return ipaddress.ip_address(host) == token_ip
-        except ValueError:
-            return False
-    except ValueError:
-        pass
-
-    if token_host.startswith("*."):
-        suffix = token_host[1:]
-        return host.endswith(suffix)
-    if token_host.startswith("."):
-        return host == token_host[1:] or host.endswith(token_host)
-    return host == token_host or host.endswith(f".{token_host}")
-
-
-def should_bypass_proxy(target_hosts: str | list[str] | tuple[str, ...] | set[str] | None) -> bool:
-    """Return True when NO_PROXY/no_proxy matches at least one target host.
-
-    Supports exact hosts, domain suffixes, wildcard suffixes, IP literals,
-    CIDR ranges, optional host:port entries, and ``*``.
-    """
-    entries = _no_proxy_entries()
-    if not entries or not target_hosts:
-        return False
-    if isinstance(target_hosts, str):
-        candidates = [target_hosts]
-    else:
-        candidates = list(target_hosts)
-    for candidate in candidates:
-        host, port = _split_host_port(str(candidate))
-        if not host:
-            continue
-        if any(_no_proxy_entry_matches(entry, host, port) for entry in entries):
-            return True
-    return False
-
-
-def resolve_proxy_url(
-    platform_env_var: str | None = None,
-    *,
-    target_hosts: str | list[str] | tuple[str, ...] | set[str] | None = None,
-) -> str | None:
+def resolve_proxy_url(platform_env_var: str | None = None) -> str | None:
    """Return a proxy URL from env vars, or macOS system proxy.

    Check order:
@@ -251,26 +154,18 @@ def resolve_proxy_url(
      1. HTTPS_PROXY / HTTP_PROXY / ALL_PROXY (and lowercase variants)
      2. macOS system proxy via ``scutil --proxy`` (auto-detect)

-    Returns *None* if no proxy is found, or if NO_PROXY/no_proxy matches one
-    of ``target_hosts``.
+    Returns *None* if no proxy is found.
    """
    if platform_env_var:
        value = (os.environ.get(platform_env_var) or "").strip()
        if value:
-            if should_bypass_proxy(target_hosts):
-                return None
-            return normalize_proxy_url(value)
+            return value
    for key in ("HTTPS_PROXY", "HTTP_PROXY", "ALL_PROXY",
                "https_proxy", "http_proxy", "all_proxy"):
        value = (os.environ.get(key) or "").strip()
        if value:
-            if should_bypass_proxy(target_hosts):
-                return None
-            return normalize_proxy_url(value)
-    detected = normalize_proxy_url(_detect_macos_system_proxy())
-    if detected and should_bypass_proxy(target_hosts):
-        return None
-    return detected
+            return value
+    return _detect_macos_system_proxy()


 def proxy_kwargs_for_bot(proxy_url: str | None) -> dict:
@@ -336,39 +231,6 @@ def proxy_kwargs_for_aiohttp(proxy_url: str | None) -> tuple[dict, dict]:
    return {}, {"proxy": proxy_url}


-def is_host_excluded_by_no_proxy(hostname: str, no_proxy_value: str | None = None) -> bool:
-    """Return True when ``hostname`` matches a ``NO_PROXY`` entry.
-
-    Supports comma- or whitespace-separated entries with optional leading dots
-    and ``*.`` wildcards, which match both the apex domain and subdomains.
-    """
-    raw = no_proxy_value
-    if raw is None:
-        raw = os.environ.get("NO_PROXY") or os.environ.get("no_proxy") or ""
-
-    raw = raw.strip()
-    if not raw:
-        return False
-
-    lower_hostname = hostname.lower()
-    for entry in re.split(r"[\s,]+", raw):
-        normalized = entry.strip().lower()
-        if not normalized:
-            continue
-        if normalized == "*":
-            return True
-
-        if normalized.startswith("*."):
-            normalized = normalized[2:]
-        elif normalized.startswith("."):
-            normalized = normalized[1:]
-
-        if lower_hostname == normalized or lower_hostname.endswith(f".{normalized}"):
-            return True
-
-    return False
-
-
 from dataclasses import dataclass, field
 from datetime import datetime
 from pathlib import Path
@@ -529,9 +391,12 @@ async def cache_image_from_url(url: str, ext: str = ".jpg", retries: int = 2) ->
    if not is_safe_url(url):
        raise ValueError(f"Blocked unsafe URL (SSRF protection): {safe_url_for_log(url)}")

+    import asyncio
    import httpx
-    _log = logging.getLogger(__name__)
+    import logging as _logging
+    _log = _logging.getLogger(__name__)

+    last_exc = None
    async with httpx.AsyncClient(
        timeout=30.0,
        follow_redirects=True,
@@ -549,6 +414,7 @@ async def cache_image_from_url(url: str, ext: str = ".jpg", retries: int = 2) ->
                response.raise_for_status()
                return cache_image_from_bytes(response.content, ext)
            except (httpx.TimeoutException, httpx.HTTPStatusError) as exc:
+                last_exc = exc
                if isinstance(exc, httpx.HTTPStatusError) and exc.response.status_code < 429:
                    raise
                if attempt < retries:
@@ -564,6 +430,7 @@ async def cache_image_from_url(url: str, ext: str = ".jpg", retries: int = 2) ->
                    await asyncio.sleep(wait)
                    continue
                raise
+    raise last_exc


 def cleanup_image_cache(max_age_hours: int = 24) -> int:
@@ -643,9 +510,12 @@ async def cache_audio_from_url(url: str, ext: str = ".ogg", retries: int = 2) ->
    if not is_safe_url(url):
        raise ValueError(f"Blocked unsafe URL (SSRF protection): {safe_url_for_log(url)}")

+    import asyncio
    import httpx
-    _log = logging.getLogger(__name__)
+    import logging as _logging
+    _log = _logging.getLogger(__name__)

+    last_exc = None
    async with httpx.AsyncClient(
        timeout=30.0,
        follow_redirects=True,
@@ -663,6 +533,7 @@ async def cache_audio_from_url(url: str, ext: str = ".ogg", retries: int = 2) ->
                response.raise_for_status()
                return cache_audio_from_bytes(response.content, ext)
            except (httpx.TimeoutException, httpx.HTTPStatusError) as exc:
+                last_exc = exc
                if isinstance(exc, httpx.HTTPStatusError) and exc.response.status_code < 429:
                    raise
                if attempt < retries:
@@ -678,39 +549,7 @@ async def cache_audio_from_url(url: str, ext: str = ".ogg", retries: int = 2) ->
                    await asyncio.sleep(wait)
                    continue
                raise
-
-
-# ---------------------------------------------------------------------------
-# Video cache utilities
-#
-# Same pattern as image/audio cache -- videos from platforms are downloaded
-# here so the agent can reference them by local file path.
-# ---------------------------------------------------------------------------
-
-VIDEO_CACHE_DIR = get_hermes_dir("cache/videos", "video_cache")
-
-SUPPORTED_VIDEO_TYPES = {
-    ".mp4": "video/mp4",
-    ".mov": "video/quicktime",
-    ".webm": "video/webm",
-    ".mkv": "video/x-matroska",
-    ".avi": "video/x-msvideo",
-}
-
-
-def get_video_cache_dir() -> Path:
-    """Return the video cache directory, creating it if it doesn't exist."""
-    VIDEO_CACHE_DIR.mkdir(parents=True, exist_ok=True)
-    return VIDEO_CACHE_DIR
-
-
-def cache_video_from_bytes(data: bytes, ext: str = ".mp4") -> str:
-    """Save raw video bytes to the cache and return the absolute file path."""
-    cache_dir = get_video_cache_dir()
-    filename = f"video_{uuid.uuid4().hex[:12]}{ext}"
-    filepath = cache_dir / filename
-    filepath.write_bytes(data)
-    return str(filepath)
+    raise last_exc


 # ---------------------------------------------------------------------------
@@ -726,15 +565,7 @@ SUPPORTED_DOCUMENT_TYPES = {
    ".pdf": "application/pdf",
    ".md": "text/markdown",
    ".txt": "text/plain",
-    ".csv": "text/csv",
    ".log": "text/plain",
-    ".json": "application/json",
-    ".xml": "application/xml",
-    ".yaml": "application/yaml",
-    ".yml": "application/yaml",
-    ".toml": "application/toml",
-    ".ini": "text/plain",
-    ".cfg": "text/plain",
    ".zip": "application/zip",
    ".docx": "application/vnd.openxmlformats-officedocument.wordprocessingml.document",
    ".xlsx": "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet",
@@ -896,10 +727,7 @@ class MessageEvent:
        if not self.is_command():
            return self.text
        parts = self.text.split(maxsplit=1)
-        args = parts[1] if len(parts) > 1 else ""
-        # iOS auto-corrects -- to — (em dash) and - to – (en dash)
-        args = args.replace("\u2014\u2014", "--").replace("\u2014", "--").replace("\u2013", "-")
-        return args
+        return parts[1] if len(parts) > 1 else ""


@dataclass 
@@ -1023,61 +851,6 @@ def resolve_channel_prompt(
    return None


-def resolve_channel_skills(
-    config_extra: dict,
-    channel_id: str,
-    parent_id: str | None = None,
-) -> list[str] | None:
-    """Resolve auto-loaded skill(s) for a channel/thread from platform config.
-
-    Looks up ``channel_skill_bindings`` in the adapter's ``config.extra`` dict.
-
-    Config format::
-
-        channel_skill_bindings:
-          - id: "C0123"          # Slack channel ID or Discord channel/forum ID
-            skills: ["skill-a", "skill-b"]
-          - id: "D0ABCDE"
-            skill: "solo-skill"  # single string also accepted
-
-    Prefers an exact match on *channel_id*; falls back to *parent_id*
-    (useful for forum threads / Slack threads inheriting the parent channel's
-    binding).
-
-    Returns a deduplicated list of skill names (order preserved), or None if
-    no match is found.
-    """
-    bindings = config_extra.get("channel_skill_bindings") or []
-    if not isinstance(bindings, list) or not bindings:
-        return None
-    ids_to_check: set[str] = set()
-    if channel_id:
-        ids_to_check.add(str(channel_id))
-    if parent_id:
-        ids_to_check.add(str(parent_id))
-    if not ids_to_check:
-        return None
-    for entry in bindings:
-        if not isinstance(entry, dict):
-            continue
-        entry_id = str(entry.get("id", ""))
-        if entry_id in ids_to_check:
-            skills = entry.get("skills") or entry.get("skill")
-            if isinstance(skills, str):
-                s = skills.strip()
-                return [s] if s else None
-            if isinstance(skills, list) and skills:
-                seen: list[str] = []
-                for name in skills:
-                    if not isinstance(name, str):
-                        continue
-                    nm = name.strip()
-                    if nm and nm not in seen:
-                        seen.append(nm)
-                return seen or None
-    return None
-
-
 class BasePlatformAdapter(ABC):
    """
    Base class for platform adapters.
@@ -1099,16 +872,10 @@ class BasePlatformAdapter(ABC):
        self._fatal_error_retryable = True
        self._fatal_error_handler: Optional[Callable[["BasePlatformAdapter"], Awaitable[None] | None]] = None
        
-        # Track active message handlers per session for interrupt support.
-        # _active_sessions stores the per-session interrupt Event; _session_tasks
-        # maps session → the specific Task currently processing it so that
-        # session-terminating commands (/stop, /new, /reset) can cancel the
-        # right task and release the adapter-level guard deterministically.
-        # Without the owner-task map, an old task's finally block could delete
-        # a newer task's guard, leaving stale busy state.
+        # Track active message handlers per session for interrupt support
+        # Key: session_key (e.g., chat_id), Value: (event, asyncio.Event for interrupt)
        self._active_sessions: Dict[str, asyncio.Event] = {}
        self._pending_messages: Dict[str, MessageEvent] = {}
-        self._session_tasks: Dict[str, asyncio.Task] = {}
        # Background message-processing tasks spawned by handle_message().
        # Gateway shutdown cancels these so an old gateway instance doesn't keep
        # working on a task after --replace or manual restarts.
@@ -1121,20 +888,7 @@ class BasePlatformAdapter(ABC):
        self._post_delivery_callbacks: Dict[str, Any] = {}
        self._expected_cancelled_tasks: set[asyncio.Task] = set()
        self._busy_session_handler: Optional[Callable[[MessageEvent, str], Awaitable[bool]]] = None
-        # Auto-TTS on voice input: ``_auto_tts_default`` is the global default
-        # (``voice.auto_tts`` in config.yaml, pushed by GatewayRunner on connect).
-        # Per-chat overrides live in two sets populated from ``_voice_mode``:
-        #   - ``_auto_tts_enabled_chats``: chat explicitly opted in via ``/voice on``
-        #     or ``/voice tts`` (mode is ``voice_only`` or ``all``). Fires even when
-        #     the global default is False.
-        #   - ``_auto_tts_disabled_chats``: chat explicitly opted out via
-        #     ``/voice off`` (mode is ``off``). Suppresses auto-TTS even when the
-        #     global default is True.
-        # The gate in _process_message() is:
-        #   fire if chat in _auto_tts_enabled_chats
-        #     OR (_auto_tts_default and chat not in _auto_tts_disabled_chats)
-        self._auto_tts_default: bool = False
-        self._auto_tts_enabled_chats: set = set()
+        # Chats where auto-TTS on voice input is disabled (set by /voice off)
        self._auto_tts_disabled_chats: set = set()
        # Chats where typing indicator is paused (e.g. during approval waits).
        # _keep_typing skips send_typing when the chat_id is in this set.
@@ -1156,21 +910,6 @@ class BasePlatformAdapter(ABC):
    def fatal_error_retryable(self) -> bool:
        return self._fatal_error_retryable

-    def _should_auto_tts_for_chat(self, chat_id: str) -> bool:
-        """Whether auto-TTS on voice input should fire for ``chat_id``.
-
-        Decision layers (Issue #16007):
-          1. Explicit ``/voice on`` or ``/voice tts`` → always fire (even if
-             ``voice.auto_tts`` is False).
-          2. Explicit ``/voice off`` → never fire.
-          3. Fall back to the global ``voice.auto_tts`` config default.
-        """
-        if chat_id in self._auto_tts_enabled_chats:
-            return True
-        if chat_id in self._auto_tts_disabled_chats:
-            return False
-        return bool(self._auto_tts_default)
-
    def set_fatal_error_handler(self, handler: Callable[["BasePlatformAdapter"], Awaitable[None] | None]) -> None:
        self._fatal_error_handler = handler

@@ -1354,27 +1093,6 @@ class BasePlatformAdapter(ABC):
        """
        return SendResult(success=False, error="Not supported")

-    async def delete_message(
-        self,
-        chat_id: str,
-        message_id: str,
-    ) -> bool:
-        """
-        Delete a previously sent message.  Optional — platforms that don't
-        support deletion return ``False`` and callers fall back to leaving
-        the message in place.
-
-        Used by the stream consumer's fresh-final cleanup path (see
-        openclaw/openclaw#72038) to remove long-lived preview messages
-        after sending the completed reply as a fresh message so the
-        platform's visible timestamp reflects completion time.
-
-        Returns ``True`` on successful deletion, ``False`` otherwise.
-        Subclasses should override for platforms with a deletion API
-        (e.g. Telegram ``deleteMessage``).
-        """
-        return False
-
    async def send_typing(self, chat_id: str, metadata=None) -> None:
        """
        Send a typing indicator.
@@ -1600,7 +1318,7 @@ class BasePlatformAdapter(ABC):
        # Extract MEDIA:<path> tags, allowing optional whitespace after the colon
        # and quoted/backticked paths for LLM-formatted outputs.
        media_pattern = re.compile(
-            r'''[`"']?MEDIA:\s*(?P<path>`[^`\n]+`|"[^"\n]+"|'[^'\n]+'|(?:~/|/)\S+(?:[^\S\n]+\S+)*?\.(?:png|jpe?g|gif|webp|mp4|mov|avi|mkv|webm|ogg|opus|mp3|wav|m4a|epub|pdf|zip|rar|7z|docx?|xlsx?|pptx?|txt|csv|apk|ipa)(?=[\s`"',;:)\]}]|$)|\S+)[`"']?'''
+            r'''[`"']?MEDIA:\s*(?P<path>`[^`\n]+`|"[^"\n]+"|'[^'\n]+'|(?:~/|/)\S+(?:[^\S\n]+\S+)*?\.(?:png|jpe?g|gif|webp|mp4|mov|avi|mkv|webm|ogg|opus|mp3|wav|m4a)(?=[\s`"',;:)\]}]|$)|\S+)[`"']?'''
        )
        for match in media_pattern.finditer(content):
            path = match.group("path").strip()
@@ -1934,222 +1652,6 @@ class BasePlatformAdapter(ABC):
            return f"{existing_text}\n\n{new_text}".strip()
        return existing_text

-    # ------------------------------------------------------------------
-    # Session task + guard ownership helpers
-    # ------------------------------------------------------------------
-    # These were introduced together with the _session_tasks owner map to
-    # make session lifecycle reconciliation deterministic across (a) the
-    # normal completion path, (b) /stop/ /new/ /reset bypass commands,
-    # and (c) stale-lock self-heal on the next inbound message.
-
-    def _release_session_guard(
-        self,
-        session_key: str,
-        *,
-        guard: Optional[asyncio.Event] = None,
-    ) -> None:
-        """Release the adapter-level guard for a session.
-
-        When ``guard`` is provided, only release the entry if it still points
-        at that exact Event.  This lets reset-like commands swap in a temporary
-        guard while the old processing task unwinds, without having the old
-        task's cleanup accidentally clear the replacement guard.
-        """
-        current_guard = self._active_sessions.get(session_key)
-        if current_guard is None:
-            return
-        if guard is not None and current_guard is not guard:
-            return
-        del self._active_sessions[session_key]
-
-    def _session_task_is_stale(self, session_key: str) -> bool:
-        """Return True if the owner task for ``session_key`` is done/cancelled.
-
-        A lock is "stale" when the adapter still has ``_active_sessions[key]``
-        AND a known owner task in ``_session_tasks`` that has already exited.
-        When there is no owner task at all, that usually means the guard was
-        installed by some path other than handle_message() (tests sometimes
-        install guards directly) — don't treat that as stale.  The on-entry
-        self-heal only needs to handle the production split-brain case where
-        an owner task was recorded, then exited without clearing its guard.
-        """
-        task = self._session_tasks.get(session_key)
-        if task is None:
-            return False
-        done = getattr(task, "done", None)
-        return bool(done and done())
-
-    def _heal_stale_session_lock(self, session_key: str) -> bool:
-        """Clear a stale session lock if the owner task is already gone.
-
-        Returns True if a stale lock was healed.  Returns False if there is
-        no lock, or the owner task is still alive (the normal busy case).
-
-        This is the on-entry safety net sidbin's issue #11016 analysis calls
-        for: without it, a split-brain — adapter still thinks the session is
-        active, but nothing is actually processing — traps the chat in
-        infinite "Interrupting current task..." until the gateway is
-        restarted.
-        """
-        if session_key not in self._active_sessions:
-            return False
-        if not self._session_task_is_stale(session_key):
-            return False
-        logger.warning(
-            "[%s] Healing stale session lock for %s (owner task is done/absent)",
-            self.name,
-            session_key,
-        )
-        self._active_sessions.pop(session_key, None)
-        self._pending_messages.pop(session_key, None)
-        self._session_tasks.pop(session_key, None)
-        return True
-
-    def _start_session_processing(
-        self,
-        event: MessageEvent,
-        session_key: str,
-        *,
-        interrupt_event: Optional[asyncio.Event] = None,
-    ) -> bool:
-        """Spawn a background processing task under the given session guard.
-
-        Returns True on success.  If the runtime stubs ``create_task`` with a
-        non-Task sentinel (some tests do this), the guard is rolled back and
-        False is returned so the caller isn't left holding a half-installed
-        session lock.
-        """
-        guard = interrupt_event or asyncio.Event()
-        self._active_sessions[session_key] = guard
-
-        task = asyncio.create_task(self._process_message_background(event, session_key))
-        self._session_tasks[session_key] = task
-        try:
-            self._background_tasks.add(task)
-        except TypeError:
-            # Tests stub create_task() with lightweight sentinels that are not
-            # hashable and do not support lifecycle callbacks.
-            self._session_tasks.pop(session_key, None)
-            self._release_session_guard(session_key, guard=guard)
-            return False
-        if hasattr(task, "add_done_callback"):
-            task.add_done_callback(self._background_tasks.discard)
-            task.add_done_callback(self._expected_cancelled_tasks.discard)
-        return True
-
-    async def cancel_session_processing(
-        self,
-        session_key: str,
-        *,
-        release_guard: bool = True,
-        discard_pending: bool = True,
-    ) -> None:
-        """Cancel in-flight processing for a single session.
-
-        ``release_guard=False`` keeps the adapter-level session guard in place
-        so reset-like commands can finish atomically before follow-up messages
-        are allowed to start a fresh background task.
-        """
-        task = self._session_tasks.pop(session_key, None)
-        if task is not None and not task.done():
-            logger.debug(
-                "[%s] Cancelling active processing for session %s",
-                self.name,
-                session_key,
-            )
-            self._expected_cancelled_tasks.add(task)
-            task.cancel()
-            try:
-                await task
-            except asyncio.CancelledError:
-                pass
-            except Exception:
-                logger.debug(
-                    "[%s] Session cancellation raised while unwinding %s",
-                    self.name,
-                    session_key,
-                    exc_info=True,
-                )
-        if discard_pending:
-            self._pending_messages.pop(session_key, None)
-        if release_guard:
-            self._release_session_guard(session_key)
-
-    async def _drain_pending_after_session_command(
-        self,
-        session_key: str,
-        command_guard: asyncio.Event,
-    ) -> None:
-        """Resume the latest queued follow-up once a session command completes.
-
-        Called at the tail of /stop, /new, and /reset dispatch.  Releases the
-        command-scoped guard, then — if a follow-up message landed while the
-        command was running — spawns a fresh processing task for it.
-        """
-        pending_event = self._pending_messages.pop(session_key, None)
-        self._release_session_guard(session_key, guard=command_guard)
-        if pending_event is None:
-            return
-        self._start_session_processing(pending_event, session_key)
-
-    async def _dispatch_active_session_command(
-        self,
-        event: MessageEvent,
-        session_key: str,
-        cmd: str,
-    ) -> None:
-        """Dispatch a reset-like bypass command while preserving guard ordering.
-
-        /stop, /new, and /reset must:
-          1. Keep the session guard installed while the runner processes the
-             command (so a racing follow-up message stays queued, not
-             dispatched as a second parallel run).
-          2. Cancel the old in-flight adapter task only AFTER the runner has
-             finished handling the command (so the runner sees consistent
-             state and its response is sent in order).
-          3. Release the command-scoped guard and drain the latest queued
-             follow-up exactly once, after 1 and 2 complete.
-        """
-        logger.debug(
-            "[%s] Command '/%s' bypassing active-session guard for %s",
-            self.name,
-            cmd,
-            session_key,
-        )
-
-        current_guard = self._active_sessions.get(session_key)
-        command_guard = asyncio.Event()
-        self._active_sessions[session_key] = command_guard
-        thread_meta = {"thread_id": event.source.thread_id} if event.source.thread_id else None
-
-        try:
-            response = await self._message_handler(event)
-            # Old adapter task (if any) is cancelled AFTER the runner has
-            # fully handled the command — keeps ordering deterministic.
-            await self.cancel_session_processing(
-                session_key,
-                release_guard=False,
-                discard_pending=False,
-            )
-            if response:
-                await self._send_with_retry(
-                    chat_id=event.source.chat_id,
-                    content=response,
-                    reply_to=event.message_id,
-                    metadata=thread_meta,
-                )
-        except Exception:
-            # On failure, restore the original guard if one still exists so
-            # we don't leave the session in a half-reset state.
-            if self._active_sessions.get(session_key) is command_guard:
-                if session_key in self._session_tasks and current_guard is not None:
-                    self._active_sessions[session_key] = current_guard
-                else:
-                    self._release_session_guard(session_key, guard=command_guard)
-            raise
-
-        await self._drain_pending_after_session_command(session_key, command_guard)
-
    async def handle_message(self, event: MessageEvent) -> None:
        """
        Process an incoming message.
@@ -2166,15 +1668,7 @@ class BasePlatformAdapter(ABC):
            group_sessions_per_user=self.config.extra.get("group_sessions_per_user", True),
            thread_sessions_per_user=self.config.extra.get("thread_sessions_per_user", False),
        )
-
-        # On-entry self-heal: if the adapter still has an _active_sessions
-        # entry for this key but the owner task has already exited (done or
-        # cancelled), the lock is stale.  Clear it and fall through to
-        # normal dispatch so the user isn't trapped behind a dead guard —
-        # this is the split-brain tail described in issue #11016.
-        if session_key in self._active_sessions:
-            self._heal_stale_session_lock(session_key)
-
+        
        # Check if there's already an active handler for this session
        if session_key in self._active_sessions:
            # Certain commands must bypass the active-session guard and be
@@ -2191,23 +1685,6 @@ class BasePlatformAdapter(ABC):
            from hermes_cli.commands import should_bypass_active_session

            if should_bypass_active_session(cmd):
-                # /stop, /new, /reset must cancel the in-flight adapter task
-                # and preserve ordering of queued follow-ups.  Route those
-                # through the dedicated handoff path that serializes
-                # cancellation + runner response + pending drain.
-                if cmd in ("stop", "new", "reset"):
-                    try:
-                        await self._dispatch_active_session_command(event, session_key, cmd)
-                    except Exception as e:
-                        logger.error(
-                            "[%s] Command '/%s' dispatch failed: %s",
-                            self.name, cmd, e, exc_info=True,
-                        )
-                    return
-
-                # Other bypass commands (/approve, /deny, /status,
-                # /background, /restart) just need direct dispatch — they
-                # don't cancel the running task.
                logger.debug(
                    "[%s] Command '/%s' bypassing active-session guard for %s",
                    self.name, cmd, session_key,
@@ -2253,9 +1730,19 @@ class BasePlatformAdapter(ABC):
        # starts would also pass the _active_sessions check and spawn a
        # duplicate task.  (grammY sequentialize / aiogram EventIsolation
        # pattern — set the guard synchronously, not inside the task.)
-        # _start_session_processing installs the guard AND the owner-task
-        # mapping atomically so stale-lock detection works.
-        self._start_session_processing(event, session_key)
+        self._active_sessions[session_key] = asyncio.Event()
+
+        # Spawn background task to process this message
+        task = asyncio.create_task(self._process_message_background(event, session_key))
+        try:
+            self._background_tasks.add(task)
+        except TypeError:
+            # Some tests stub create_task() with lightweight sentinels that are not
+            # hashable and do not support lifecycle callbacks.
+            return
+        if hasattr(task, "add_done_callback"):
+            task.add_done_callback(self._background_tasks.discard)
+            task.add_done_callback(self._expected_cancelled_tasks.discard)
    
    @staticmethod
    def _get_human_delay() -> float:
@@ -2267,6 +1754,8 @@ class BasePlatformAdapter(ABC):
          HERMES_HUMAN_DELAY_MIN_MS: minimum delay in ms (default 800, custom mode)
          HERMES_HUMAN_DELAY_MAX_MS: maximum delay in ms (default 2500, custom mode)
        """
+        import random
+
        mode = os.getenv("HERMES_HUMAN_DELAY_MODE", "off").lower()
        if mode == "off":
            return 0.0
@@ -2359,14 +1848,12 @@ class BasePlatformAdapter(ABC):
                    logger.info("[%s] extract_local_files found %d file(s) in response", self.name, len(local_files))
                
                # Auto-TTS: if voice message, generate audio FIRST (before sending text)
-                # Gated via ``_should_auto_tts_for_chat``: fires when the chat has
-                # an explicit ``/voice on|tts`` opt-in OR when ``voice.auto_tts`` is
-                # True globally and no ``/voice off`` has been issued.
+                # Skipped when the chat has voice mode disabled (/voice off)
                _tts_path = None
-                if (self._should_auto_tts_for_chat(event.source.chat_id)
-                        and event.message_type == MessageType.VOICE
+                if (event.message_type == MessageType.VOICE
                        and text_content
-                        and not media_files):
+                        and not media_files
+                        and event.source.chat_id not in self._auto_tts_disabled_chats):
                    try:
                        from tools.tts_tool import text_to_speech_tool, check_tts_requirements
                        if check_tts_requirements():
@@ -2617,9 +2104,6 @@ class BasePlatformAdapter(ABC):
                drain_task = asyncio.create_task(
                    self._process_message_background(late_pending, session_key)
                )
-                # Hand ownership of the session to the drain task so stale-lock
-                # detection keeps working while it runs.
-                self._session_tasks[session_key] = drain_task
                try:
                    self._background_tasks.add(drain_task)
                    drain_task.add_done_callback(self._background_tasks.discard)
@@ -2629,14 +2113,9 @@ class BasePlatformAdapter(ABC):
                # Leave _active_sessions[session_key] populated — the drain
                # task's own lifecycle will clean it up.
            else:
-                # Clean up session tracking.  Guard-match both deletes so a
-                # reset-like command that already swapped in its own
-                # command_guard (and cancelled us) can't be accidentally
-                # cleared by our unwind.  The command owns the session now.
-                current_task = asyncio.current_task()
-                if current_task is not None and self._session_tasks.get(session_key) is current_task:
-                    del self._session_tasks[session_key]
-                self._release_session_guard(session_key, guard=interrupt_event)
+                # Clean up session tracking
+                if session_key in self._active_sessions:
+                    del self._active_sessions[session_key]
    
    async def cancel_background_tasks(self) -> None:
        """Cancel any in-flight background message-processing tasks.
@@ -2666,7 +2145,6 @@ class BasePlatformAdapter(ABC):
            # will be in self._background_tasks now.  Re-check.
        self._background_tasks.clear()
        self._expected_cancelled_tasks.clear()
-        self._session_tasks.clear()
        self._pending_messages.clear()
        self._active_sessions.clear()

@@ -2690,9 +2168,6 @@ class BasePlatformAdapter(ABC):
        user_id_alt: Optional[str] = None,
        chat_id_alt: Optional[str] = None,
        is_bot: bool = False,
-        guild_id: Optional[str] = None,
-        parent_chat_id: Optional[str] = None,
-        message_id: Optional[str] = None,
    ) -> SessionSource:
        """Helper to build a SessionSource for this platform."""
        # Normalize empty topic to None
@@ -2710,9 +2185,6 @@ class BasePlatformAdapter(ABC):
            user_id_alt=user_id_alt,
            chat_id_alt=chat_id_alt,
            is_bot=is_bot,
-            guild_id=str(guild_id) if guild_id else None,
-            parent_chat_id=str(parent_chat_id) if parent_chat_id else None,
-            message_id=str(message_id) if message_id else None,
        )
    
    @abstractmethod
@@ -75,7 +75,7 @@ def _redact(text: str) -> str:
 def check_bluebubbles_requirements() -> bool:
    try:
        import aiohttp  # noqa: F401
-        import httpx  # noqa: F401
+        import httpx as _httpx  # noqa: F401
    except ImportError:
        return False
    return True
@@ -99,7 +99,6 @@ def _normalize_server_url(raw: str) -> str:

 class BlueBubblesAdapter(BasePlatformAdapter):
    platform = Platform.BLUEBUBBLES
-    SUPPORTS_MESSAGE_EDITING = False
    MAX_MESSAGE_LENGTH = MAX_TEXT_LENGTH

    def __init__(self, config: PlatformConfig):
@@ -392,13 +391,6 @@ class BlueBubblesAdapter(BasePlatformAdapter):
    # Text sending
    # ------------------------------------------------------------------

-    @staticmethod
-    def truncate_message(content: str, max_length: int = MAX_TEXT_LENGTH) -> List[str]:
-        # Use the base splitter but skip pagination indicators — iMessage
-        # bubbles flow naturally without "(1/3)" suffixes.
-        chunks = BasePlatformAdapter.truncate_message(content, max_length)
-        return [re.sub(r"\s*\(\d+/\d+\)$", "", c) for c in chunks]
-
    async def send(
        self,
        chat_id: str,
@@ -406,19 +398,10 @@ class BlueBubblesAdapter(BasePlatformAdapter):
        reply_to: Optional[str] = None,
        metadata: Optional[Dict[str, Any]] = None,
    ) -> SendResult:
-        text = self.format_message(content)
+        text = strip_markdown(content or "")
        if not text:
            return SendResult(success=False, error="BlueBubbles send requires text")
-        # Split on paragraph breaks first (double newlines) so each thought
-        # becomes its own iMessage bubble, then truncate any that are still
-        # too long.
-        paragraphs = [p.strip() for p in re.split(r'\n\s*\n', text) if p.strip()]
-        chunks: List[str] = []
-        for para in (paragraphs or [text]):
-            if len(para) <= self.MAX_MESSAGE_LENGTH:
-                chunks.append(para)
-            else:
-                chunks.extend(self.truncate_message(para, max_length=self.MAX_MESSAGE_LENGTH))
+        chunks = self.truncate_message(text, max_length=self.MAX_MESSAGE_LENGTH)
        last = SendResult(success=True)
        for chunk in chunks:
            guid = await self._resolve_chat_guid(chat_id)
@@ -23,7 +23,6 @@ from typing import Callable, Dict, Optional, Any
 logger = logging.getLogger(__name__)

 VALID_THREAD_AUTO_ARCHIVE_MINUTES = {60, 1440, 4320, 10080}
-_DISCORD_COMMAND_SYNC_POLICIES = {"safe", "bulk", "off"}

 try:
    import discord
@@ -528,7 +527,6 @@ class DiscordAdapter(BasePlatformAdapter):
        # Reply threading mode: "off" (no replies), "first" (reply on first
        # chunk only, default), "all" (reply-reference on every chunk).
        self._reply_to_mode: str = getattr(config, 'reply_to_mode', 'first') or 'first'
-        self._slash_commands: bool = self.config.extra.get("slash_commands", True)

    async def connect(self) -> bool:
        """Connect to Discord and start receiving events."""
@@ -543,6 +541,7 @@ class DiscordAdapter(BasePlatformAdapter):
            # ctypes.util.find_library fails on macOS with Homebrew-installed libs,
            # so fall back to known Homebrew paths if needed.
            if not opus_path:
+                import sys
                _homebrew_paths = (
                    "/opt/homebrew/lib/libopus.dylib",  # Apple Silicon
                    "/usr/local/lib/libopus.dylib",     # Intel Mac
@@ -746,8 +745,7 @@ class DiscordAdapter(BasePlatformAdapter):
                    )

            # Register slash commands
-            if self._slash_commands:
-                self._register_slash_commands()
+            self._register_slash_commands()

            # Start the bot in background
            self._bot_task = asyncio.create_task(self._client.start(self.config.token))
@@ -803,27 +801,8 @@ class DiscordAdapter(BasePlatformAdapter):
        if not self._client:
            return
        try:
-            sync_policy = self._get_discord_command_sync_policy()
-            if sync_policy == "off":
-                logger.info("[%s] Skipping Discord slash command sync (policy=off)", self.name)
-                return
-
-            if sync_policy == "bulk":
-                synced = await asyncio.wait_for(self._client.tree.sync(), timeout=30)
-                logger.info("[%s] Synced %d slash command(s) via bulk tree sync", self.name, len(synced))
-                return
-
-            summary = await asyncio.wait_for(self._safe_sync_slash_commands(), timeout=30)
-            logger.info(
-                "[%s] Safely reconciled %d slash command(s): unchanged=%d updated=%d recreated=%d created=%d deleted=%d",
-                self.name,
-                summary["total"],
-                summary["unchanged"],
-                summary["updated"],
-                summary["recreated"],
-                summary["created"],
-                summary["deleted"],
-            )
+            synced = await asyncio.wait_for(self._client.tree.sync(), timeout=30)
+            logger.info("[%s] Synced %d slash command(s)", self.name, len(synced))
        except asyncio.TimeoutError:
            logger.warning("[%s] Slash command sync timed out after 30s", self.name)
        except asyncio.CancelledError:
@@ -831,183 +810,6 @@ class DiscordAdapter(BasePlatformAdapter):
        except Exception as e:  # pragma: no cover - defensive logging
            logger.warning("[%s] Slash command sync failed: %s", self.name, e, exc_info=True)

-    def _get_discord_command_sync_policy(self) -> str:
-        raw = str(os.getenv("DISCORD_COMMAND_SYNC_POLICY", "safe") or "").strip().lower()
-        if raw in _DISCORD_COMMAND_SYNC_POLICIES:
-            return raw
-        if raw:
-            logger.warning(
-                "[%s] Invalid DISCORD_COMMAND_SYNC_POLICY=%r; falling back to 'safe'",
-                self.name,
-                raw,
-            )
-        return "safe"
-
-    def _canonicalize_app_command_payload(self, payload: Dict[str, Any]) -> Dict[str, Any]:
-        """Reduce command payloads to the semantic fields Hermes manages."""
-        contexts = payload.get("contexts")
-        integration_types = payload.get("integration_types")
-        return {
-            "type": int(payload.get("type", 1) or 1),
-            "name": str(payload.get("name", "") or ""),
-            "description": str(payload.get("description", "") or ""),
-            "default_member_permissions": self._normalize_permissions(
-                payload.get("default_member_permissions")
-            ),
-            "dm_permission": bool(payload.get("dm_permission", True)),
-            "nsfw": bool(payload.get("nsfw", False)),
-            "contexts": sorted(int(c) for c in contexts) if contexts else None,
-            "integration_types": (
-                sorted(int(i) for i in integration_types) if integration_types else None
-            ),
-            "options": [
-                self._canonicalize_app_command_option(item)
-                for item in payload.get("options", []) or []
-                if isinstance(item, dict)
-            ],
-        }
-
-    @staticmethod
-    def _normalize_permissions(value: Any) -> Optional[str]:
-        """Discord emits default_member_permissions as str server-side but discord.py
-        sets it as int locally. Normalize to str-or-None so the comparison is stable."""
-        if value is None:
-            return None
-        return str(value)
-
-    def _existing_command_to_payload(self, command: Any) -> Dict[str, Any]:
-        """Build a canonical-ready dict from an AppCommand.
-
-        discord.py's AppCommand.to_dict() does NOT include nsfw,
-        dm_permission, or default_member_permissions (they live only on the
-        attributes). Pull them from the attributes so the canonicalizer sees
-        the real server-side values instead of defaults — otherwise any
-        command using non-default permissions would diff on every startup.
-        """
-        payload = dict(command.to_dict())
-        nsfw = getattr(command, "nsfw", None)
-        if nsfw is not None:
-            payload["nsfw"] = bool(nsfw)
-        guild_only = getattr(command, "guild_only", None)
-        if guild_only is not None:
-            payload["dm_permission"] = not bool(guild_only)
-        default_permissions = getattr(command, "default_member_permissions", None)
-        if default_permissions is not None:
-            payload["default_member_permissions"] = getattr(
-                default_permissions, "value", default_permissions
-            )
-        return payload
-
-    def _canonicalize_app_command_option(self, payload: Dict[str, Any]) -> Dict[str, Any]:
-        return {
-            "type": int(payload.get("type", 0) or 0),
-            "name": str(payload.get("name", "") or ""),
-            "description": str(payload.get("description", "") or ""),
-            "required": bool(payload.get("required", False)),
-            "autocomplete": bool(payload.get("autocomplete", False)),
-            "choices": [
-                {
-                    "name": str(choice.get("name", "") or ""),
-                    "value": choice.get("value"),
-                }
-                for choice in payload.get("choices", []) or []
-                if isinstance(choice, dict)
-            ],
-            "channel_types": list(payload.get("channel_types", []) or []),
-            "min_value": payload.get("min_value"),
-            "max_value": payload.get("max_value"),
-            "min_length": payload.get("min_length"),
-            "max_length": payload.get("max_length"),
-            "options": [
-                self._canonicalize_app_command_option(item)
-                for item in payload.get("options", []) or []
-                if isinstance(item, dict)
-            ],
-        }
-
-    def _patchable_app_command_payload(self, payload: Dict[str, Any]) -> Dict[str, Any]:
-        """Fields supported by discord.py's edit_global_command route."""
-        canonical = self._canonicalize_app_command_payload(payload)
-        return {
-            "name": canonical["name"],
-            "description": canonical["description"],
-            "options": canonical["options"],
-        }
-
-    async def _safe_sync_slash_commands(self) -> Dict[str, int]:
-        """Diff existing global commands and only mutate the commands that changed."""
-        if not self._client:
-            return {
-                "total": 0,
-                "unchanged": 0,
-                "updated": 0,
-                "recreated": 0,
-                "created": 0,
-                "deleted": 0,
-            }
-
-        tree = self._client.tree
-        app_id = getattr(self._client, "application_id", None) or getattr(getattr(self._client, "user", None), "id", None)
-        if not app_id:
-            raise RuntimeError("Discord application ID is unavailable for slash command sync")
-
-        desired_payloads = [command.to_dict(tree) for command in tree.get_commands()]
-        desired_by_key = {
-            (int(payload.get("type", 1) or 1), str(payload.get("name", "") or "").lower()): payload
-            for payload in desired_payloads
-        }
-        existing_commands = await tree.fetch_commands()
-        existing_by_key = {
-            (
-                int(getattr(getattr(command, "type", None), "value", getattr(command, "type", 1)) or 1),
-                str(command.name or "").lower(),
-            ): command
-            for command in existing_commands
-        }
-
-        unchanged = 0
-        updated = 0
-        recreated = 0
-        created = 0
-        deleted = 0
-        http = self._client.http
-
-        for key, desired in desired_by_key.items():
-            current = existing_by_key.pop(key, None)
-            if current is None:
-                await http.upsert_global_command(app_id, desired)
-                created += 1
-                continue
-
-            current_existing_payload = self._existing_command_to_payload(current)
-            current_payload = self._canonicalize_app_command_payload(current_existing_payload)
-            desired_payload = self._canonicalize_app_command_payload(desired)
-            if current_payload == desired_payload:
-                unchanged += 1
-                continue
-
-            if self._patchable_app_command_payload(current_existing_payload) == self._patchable_app_command_payload(desired):
-                await http.delete_global_command(app_id, current.id)
-                await http.upsert_global_command(app_id, desired)
-                recreated += 1
-                continue
-
-            await http.edit_global_command(app_id, current.id, desired)
-            updated += 1
-
-        for current in existing_by_key.values():
-            await http.delete_global_command(app_id, current.id)
-            deleted += 1
-
-        return {
-            "total": len(desired_payloads),
-            "unchanged": unchanged,
-            "updated": updated,
-            "recreated": recreated,
-            "created": created,
-            "deleted": deleted,
-        }
-
    async def _add_reaction(self, message: Any, emoji: str) -> bool:
        """Add an emoji reaction to a Discord message."""
        if not message or not hasattr(message, "add_reaction"):
@@ -1620,7 +1422,8 @@ class DiscordAdapter(BasePlatformAdapter):
        speaking_user_ids: set = set()
        receiver = self._voice_receivers.get(guild_id)
        if receiver:
-            now = time.monotonic()
+            import time as _time
+            now = _time.monotonic()
            with receiver._lock:
                for ssrc, last_t in receiver._last_packet_time.items():
                    # Consider "speaking" if audio received within last 2 seconds
@@ -2246,6 +2049,10 @@ class DiscordAdapter(BasePlatformAdapter):
        async def slash_usage(interaction: discord.Interaction):
            await self._run_simple_slash(interaction, "/usage")

+        @tree.command(name="provider", description="Show available providers")
+        async def slash_provider(interaction: discord.Interaction):
+            await self._run_simple_slash(interaction, "/provider")
+
        @tree.command(name="help", description="Show available commands")
        async def slash_help(interaction: discord.Interaction):
            await self._run_simple_slash(interaction, "/help")
@@ -2315,46 +2122,19 @@ class DiscordAdapter(BasePlatformAdapter):
        async def slash_background(interaction: discord.Interaction, prompt: str):
            await self._run_simple_slash(interaction, f"/background {prompt}", "Background task started~")

+        @tree.command(name="btw", description="Ephemeral side question using session context")
+        @discord.app_commands.describe(question="Your side question (no tools, not persisted)")
+        async def slash_btw(interaction: discord.Interaction, question: str):
+            await self._run_simple_slash(interaction, f"/btw {question}")
+
        # ── Auto-register any gateway-available commands not yet on the tree ──
        # This ensures new commands added to COMMAND_REGISTRY in
        # hermes_cli/commands.py automatically appear as Discord slash
        # commands without needing a manual entry here.
-        def _build_auto_slash_command(_name: str, _description: str, _args_hint: str = ""):
-            """Build a discord.app_commands.Command that proxies to _run_simple_slash."""
-            discord_name = _name.lower()[:32]
-            desc = (_description or f"Run /{_name}")[:100]
-            has_args = bool(_args_hint)
-
-            if has_args:
-                def _make_args_handler(__name: str, __hint: str):
-                    @discord.app_commands.describe(args=f"Arguments: {__hint}"[:100])
-                    async def _handler(interaction: discord.Interaction, args: str = ""):
-                        await self._run_simple_slash(
-                            interaction, f"/{__name} {args}".strip()
-                        )
-                    _handler.__name__ = f"auto_slash_{__name.replace('-', '_')}"
-                    return _handler
-
-                handler = _make_args_handler(_name, _args_hint)
-            else:
-                def _make_simple_handler(__name: str):
-                    async def _handler(interaction: discord.Interaction):
-                        await self._run_simple_slash(interaction, f"/{__name}")
-                    _handler.__name__ = f"auto_slash_{__name.replace('-', '_')}"
-                    return _handler
-
-                handler = _make_simple_handler(_name)
-
-            return discord.app_commands.Command(
-                name=discord_name,
-                description=desc,
-                callback=handler,
-            )
-
-        already_registered: set[str] = set()
        try:
            from hermes_cli.commands import COMMAND_REGISTRY, _is_gateway_available, _resolve_config_gates

+            already_registered = set()
            try:
                already_registered = {cmd.name for cmd in tree.get_commands()}
            except Exception:
@@ -2369,10 +2149,38 @@ class DiscordAdapter(BasePlatformAdapter):
                discord_name = cmd_def.name.lower()[:32]
                if discord_name in already_registered:
                    continue
-                auto_cmd = _build_auto_slash_command(
-                    cmd_def.name,
-                    cmd_def.description,
-                    cmd_def.args_hint,
+                # Skip aliases that overlap with already-registered names
+                # (aliases for explicitly registered commands are handled above).
+                desc = (cmd_def.description or f"Run /{cmd_def.name}")[:100]
+                has_args = bool(cmd_def.args_hint)
+
+                if has_args:
+                    # Command takes optional arguments — create handler with
+                    # an optional ``args`` string parameter.
+                    def _make_args_handler(_name: str, _hint: str):
+                        @discord.app_commands.describe(args=f"Arguments: {_hint}"[:100])
+                        async def _handler(interaction: discord.Interaction, args: str = ""):
+                            await self._run_simple_slash(
+                                interaction, f"/{_name} {args}".strip()
+                            )
+                        _handler.__name__ = f"auto_slash_{_name.replace('-', '_')}"
+                        return _handler
+
+                    handler = _make_args_handler(cmd_def.name, cmd_def.args_hint)
+                else:
+                    # Parameterless command.
+                    def _make_simple_handler(_name: str):
+                        async def _handler(interaction: discord.Interaction):
+                            await self._run_simple_slash(interaction, f"/{_name}")
+                        _handler.__name__ = f"auto_slash_{_name.replace('-', '_')}"
+                        return _handler
+
+                    handler = _make_simple_handler(cmd_def.name)
+
+                auto_cmd = discord.app_commands.Command(
+                    name=discord_name,
+                    description=desc,
+                    callback=handler,
                )
                try:
                    tree.add_command(auto_cmd)
@@ -2389,35 +2197,6 @@ class DiscordAdapter(BasePlatformAdapter):
        except Exception as e:
            logger.warning("Discord auto-register from COMMAND_REGISTRY failed: %s", e)

-        # ── Plugin-registered slash commands ──
-        # Plugins register via PluginContext.register_command(); we mirror
-        # those into Discord's native slash picker so users get the same
-        # autocomplete UX as for built-in commands. No per-platform plugin
-        # API needed — plugin commands are platform-agnostic.
-        try:
-            from hermes_cli.commands import _iter_plugin_command_entries
-
-            for plugin_name, plugin_desc, plugin_args_hint in _iter_plugin_command_entries():
-                discord_name = plugin_name.lower()[:32]
-                if discord_name in already_registered:
-                    continue
-                auto_cmd = _build_auto_slash_command(
-                    plugin_name,
-                    plugin_desc,
-                    plugin_args_hint,
-                )
-                try:
-                    tree.add_command(auto_cmd)
-                    already_registered.add(discord_name)
-                except Exception:
-                    # Silently skip commands that fail registration (e.g.
-                    # name conflict with a subcommand group).
-                    pass
-        except Exception as e:
-            logger.warning(
-                "Discord auto-register from plugin commands failed: %s", e
-            )
-
        # Register skills under a single /skill command group with category
        # subcommand groups.  This uses 1 top-level slot instead of N,
        # supporting up to 25 categories × 25 skills = 625 skills.
@@ -2679,8 +2458,21 @@ class DiscordAdapter(BasePlatformAdapter):
                skills: ["skill-a", "skill-b"]
        Also checks parent_id so forum threads inherit the forum's bindings.
        """
-        from gateway.platforms.base import resolve_channel_skills
-        return resolve_channel_skills(self.config.extra, channel_id, parent_id)
+        bindings = self.config.extra.get("channel_skill_bindings", [])
+        if not bindings:
+            return None
+        ids_to_check = {channel_id}
+        if parent_id:
+            ids_to_check.add(parent_id)
+        for entry in bindings:
+            entry_id = str(entry.get("id", ""))
+            if entry_id in ids_to_check:
+                skills = entry.get("skills") or entry.get("skill")
+                if isinstance(skills, str):
+                    return [skills]
+                if isinstance(skills, list) and skills:
+                    return list(dict.fromkeys(skills))  # dedup, preserve order
+        return None

    def _resolve_channel_prompt(self, channel_id: str, parent_id: str | None = None) -> str | None:
        """Resolve a Discord per-channel prompt, preferring the exact channel over its parent."""
@@ -2697,12 +2489,7 @@ class DiscordAdapter(BasePlatformAdapter):
        return os.getenv("DISCORD_REQUIRE_MENTION", "true").lower() not in ("false", "0", "no", "off")

    def _discord_free_response_channels(self) -> set:
-        """Return Discord channel IDs where no bot mention is required.
-
-        A single ``"*"`` entry (either from a list or a comma-separated
-        string) is preserved in the returned set so callers can short-circuit
-        on wildcard membership, consistent with ``allowed_channels``.
-        """
+        """Return Discord channel IDs where no bot mention is required."""
        raw = self.config.extra.get("free_response_channels")
        if raw is None:
            raw = os.getenv("DISCORD_FREE_RESPONSE_CHANNELS", "")
@@ -3175,17 +2962,6 @@ class DiscordAdapter(BasePlatformAdapter):
            parent_channel_id = self._get_parent_channel_id(message.channel)

        is_voice_linked_channel = False
-
-        # Save mention-stripped text before auto-threading since create_thread()
-        # can clobber message.content, breaking /command detection in channels.
-        raw_content = message.content.strip()
-        normalized_content = raw_content
-        mention_prefix = False
-        if self._client.user and self._client.user in message.mentions:
-            mention_prefix = True
-            normalized_content = normalized_content.replace(f"<@{self._client.user.id}>", "").strip()
-            normalized_content = normalized_content.replace(f"<@!{self._client.user.id}>", "").strip()
-            message.content = normalized_content
        if not isinstance(message.channel, discord.DMChannel):
            channel_ids = {str(message.channel.id)}
            if parent_channel_id:
@@ -3195,14 +2971,14 @@ class DiscordAdapter(BasePlatformAdapter):
            allowed_channels_raw = os.getenv("DISCORD_ALLOWED_CHANNELS", "")
            if allowed_channels_raw:
                allowed_channels = {ch.strip() for ch in allowed_channels_raw.split(",") if ch.strip()}
-                if "*" not in allowed_channels and not (channel_ids & allowed_channels):
+                if not (channel_ids & allowed_channels):
                    logger.debug("[%s] Ignoring message in non-allowed channel: %s", self.name, channel_ids)
                    return

            # Check ignored channels - never respond even when mentioned
            ignored_channels_raw = os.getenv("DISCORD_IGNORED_CHANNELS", "")
            ignored_channels = {ch.strip() for ch in ignored_channels_raw.split(",") if ch.strip()}
-            if "*" in ignored_channels or (channel_ids & ignored_channels):
+            if channel_ids & ignored_channels:
                logger.debug("[%s] Ignoring message in ignored channel: %s", self.name, channel_ids)
                return

@@ -3216,19 +2992,20 @@ class DiscordAdapter(BasePlatformAdapter):
            voice_linked_ids = {str(ch_id) for ch_id in self._voice_text_channels.values()}
            current_channel_id = str(message.channel.id)
            is_voice_linked_channel = current_channel_id in voice_linked_ids
-            is_free_channel = (
-                "*" in free_channels
-                or bool(channel_ids & free_channels)
-                or is_voice_linked_channel
-            )
+            is_free_channel = bool(channel_ids & free_channels) or is_voice_linked_channel

            # Skip the mention check if the message is in a thread where
            # the bot has previously participated (auto-created or replied in).
            in_bot_thread = is_thread and thread_id in self._threads

            if require_mention and not is_free_channel and not in_bot_thread:
-                if self._client.user not in message.mentions and not mention_prefix:
+                if self._client.user not in message.mentions:
                    return
+
+            if self._client.user and self._client.user in message.mentions:
+                message.content = message.content.replace(f"<@{self._client.user.id}>", "").strip()
+                message.content = message.content.replace(f"<@!{self._client.user.id}>", "").strip()
+
        # Auto-thread: when enabled, automatically create a thread for every
        # @mention in a text channel so each conversation is isolated (like Slack).
        # Messages already inside threads or DMs are unaffected.
@@ -3243,7 +3020,6 @@ class DiscordAdapter(BasePlatformAdapter):
            if auto_thread and not skip_thread and not is_voice_linked_channel and not is_reply_message:
                thread = await self._auto_create_thread(message)
                if thread:
-                    parent_channel_id = str(message.channel.id)
                    is_thread = True
                    thread_id = str(thread.id)
                    auto_threaded_channel = thread
@@ -3251,7 +3027,7 @@ class DiscordAdapter(BasePlatformAdapter):

        # Determine message type
        msg_type = MessageType.TEXT
-        if normalized_content.startswith("/"):
+        if message.content.startswith("/"):
            msg_type = MessageType.COMMAND
        elif message.attachments:
            # Check attachment types
@@ -3303,9 +3079,6 @@ class DiscordAdapter(BasePlatformAdapter):
            thread_id=thread_id,
            chat_topic=chat_topic,
            is_bot=getattr(message.author, "bot", False),
-            guild_id=str(message.guild.id) if message.guild else None,
-            parent_chat_id=parent_channel_id,
-            message_id=str(message.id),
        )

        # Build media URLs -- download image attachments to local cache so the
@@ -3394,9 +3167,7 @@ class DiscordAdapter(BasePlatformAdapter):
                                att.filename, e, exc_info=True,
                            )

-        # Use normalized_content (saved before auto-threading) instead of message.content,
-        # to detect /slash commands in channel messages.
-        event_text = normalized_content
+        event_text = message.content
        if pending_text_injection:
            event_text = f"{pending_text_injection}\n\n{event_text}" if event_text else pending_text_injection

@@ -3857,15 +3628,6 @@ if DISCORD_AVAILABLE:

            self.resolved = True
            model_id = interaction.data["values"][0]
-            self.clear_items()
-            await interaction.response.edit_message(
-                embed=discord.Embed(
-                    title="⚙ Switching Model",
-                    description=f"Switching to `{model_id}`...",
-                    color=discord.Color.blue(),
-                ),
-                view=None,
-            )

            try:
                result_text = await self.on_model_selected(
@@ -3876,13 +3638,14 @@ if DISCORD_AVAILABLE:
            except Exception as exc:
                result_text = f"Error switching model: {exc}"

-            await interaction.edit_original_response(
+            self.clear_items()
+            await interaction.response.edit_message(
                embed=discord.Embed(
                    title="⚙ Model Switched",
                    description=result_text,
                    color=discord.Color.green(),
                ),
-                view=None,
+                view=self,
            )

        async def _on_back(self, interaction: discord.Interaction):
@@ -545,7 +545,6 @@ class EmailAdapter(BasePlatformAdapter):
        caption: Optional[str] = None,
        file_name: Optional[str] = None,
        reply_to: Optional[str] = None,
-        **kwargs,
    ) -> SendResult:
        """Send a file as an email attachment."""
        try:
@@ -14,35 +14,6 @@ Supports:
 - Interactive card button-click events routed as synthetic COMMAND events
 - Webhook anomaly tracking (matches openclaw createWebhookAnomalyTracker)
 - Verification token validation as second auth layer (matches openclaw)
-
-Feishu identity model
---------------------
-Feishu uses three user-ID tiers (official docs:
-https://open.feishu.cn/document/home/user-identity-introduction/introduction):
-
-  open_id  (ou_xxx)  — **App-scoped**.  The same person gets a different
-                        open_id under each Feishu app.  Always available in
-                        event payloads without extra permissions.
-  user_id  (u_xxx)   — **Tenant-scoped**.  Stable within a company but
-                        requires the ``contact:user.employee_id:readonly``
-                        scope.  May not be present.
-  union_id (on_xxx)  — **Developer-scoped**.  Same across all apps owned by
-                        one developer/ISV.  Best cross-app stable ID.
-
-For bots specifically:
-
-  app_id              — The application's canonical credential identifier.
-  bot open_id         — Returned by ``/bot/v3/info``.  This is the bot's own
-                        open_id *within its app context* and is what Feishu
-                        puts in ``mentions[].id.open_id`` when someone
-                        @-mentions the bot.  Used for mention gating only.
-
-In single-bot mode (what Hermes currently supports), open_id works as a
-de-facto unique user identifier since there is only one app context.
-
-Session-key participant isolation prefers ``union_id`` (via user_id_alt)
-over ``open_id`` (via user_id) so that sessions stay stable if the same
-user is seen through different apps in the future.
 """

 from __future__ import annotations
@@ -64,7 +35,7 @@ from dataclasses import dataclass, field
 from datetime import datetime
 from pathlib import Path
 from types import SimpleNamespace
-from typing import Any, Dict, List, Optional, Sequence
+from typing import Any, Dict, List, Optional
 from urllib.error import HTTPError, URLError
 from urllib.parse import urlencode
 from urllib.request import Request, urlopen
@@ -102,9 +73,7 @@ try:
        UpdateMessageRequest,
        UpdateMessageRequestBody,
    )
-    from lark_oapi.core import AccessTokenType, HttpMethod
    from lark_oapi.core.const import FEISHU_DOMAIN, LARK_DOMAIN
-    from lark_oapi.core.model import BaseRequest
    from lark_oapi.event.callback.model.p2_card_action_trigger import (
        CallBackCard,
        P2CardActionTriggerResponse,
@@ -265,8 +234,6 @@ FALLBACK_ATTACHMENT_TEXT = "[Attachment]"
 _PREFERRED_LOCALES = ("zh_cn", "en_us")
 _MARKDOWN_SPECIAL_CHARS_RE = re.compile(r"([\\`*_{}\[\]()#+\-!|>~])")
 _MENTION_PLACEHOLDER_RE = re.compile(r"@_user_\d+")
-_MENTION_BOUNDARY_CHARS = frozenset(" \t\n\r.,;:!?、，。；：！？()[]{}<>\"'`")
-_TRAILING_TERMINAL_PUNCT = frozenset(" \t\n\r.!?。！？")
 _WHITESPACE_RE = re.compile(r"\s+")
 _SUPPORTED_CARD_TEXT_KEYS = (
    "title",
@@ -310,36 +277,12 @@ class FeishuPostMediaRef:
    resource_type: str = "file"


-@dataclass(frozen=True)
-class FeishuMentionRef:
-    name: str = ""
-    open_id: str = ""
-    is_all: bool = False
-    is_self: bool = False
-
-
-@dataclass(frozen=True)
-class _FeishuBotIdentity:
-    open_id: str = ""
-    user_id: str = ""
-    name: str = ""
-
-    def matches(self, *, open_id: str, user_id: str, name: str) -> bool:
-        # Precedence: open_id > user_id > name. IDs are authoritative when both
-        # sides have them; the next tier is only considered when either side
-        # lacks the current one.
-        if open_id and self.open_id:
-            return open_id == self.open_id
-        if user_id and self.user_id:
-            return user_id == self.user_id
-        return bool(self.name) and name == self.name
-
-
@dataclass(frozen=True)
 class FeishuPostParseResult:
    text_content: str
    image_keys: List[str] = field(default_factory=list)
    media_refs: List[FeishuPostMediaRef] = field(default_factory=list)
+    mentioned_ids: List[str] = field(default_factory=list)


@dataclass(frozen=True)
@@ -349,14 +292,14 @@ class FeishuNormalizedMessage:
    preferred_message_type: str = "text"
    image_keys: List[str] = field(default_factory=list)
    media_refs: List[FeishuPostMediaRef] = field(default_factory=list)
-    mentions: List[FeishuMentionRef] = field(default_factory=list)
+    mentioned_ids: List[str] = field(default_factory=list)
    relation_kind: str = "plain"
    metadata: Dict[str, Any] = field(default_factory=dict)


@dataclass(frozen=True)
 class FeishuAdapterSettings:
-    app_id: str  # Canonical bot/app identifier (credential, not from event payloads)
+    app_id: str
    app_secret: str
    domain_name: str
    connection_mode: str
@@ -364,11 +307,7 @@ class FeishuAdapterSettings:
    verification_token: str
    group_policy: str
    allowed_group_users: frozenset[str]
-    # Bot's own open_id (app-scoped) — returned by /bot/v3/info.  Used only for
-    # @mention matching: Feishu puts this value in mentions[].id.open_id when
-    # a user @-mentions the bot in a group chat.
    bot_open_id: str
-    # Bot's user_id (tenant-scoped) — optional, used as fallback mention match.
    bot_user_id: str
    bot_name: str
    dedup_cache_size: int
@@ -566,17 +505,14 @@ def _build_markdown_post_rows(content: str) -> List[List[Dict[str, str]]]:
    return rows or [[{"tag": "md", "text": content}]]


-def parse_feishu_post_payload(
-    payload: Any,
-    *,
-    mentions_map: Optional[Dict[str, FeishuMentionRef]] = None,
-) -> FeishuPostParseResult:
+def parse_feishu_post_payload(payload: Any) -> FeishuPostParseResult:
    resolved = _resolve_post_payload(payload)
    if not resolved:
        return FeishuPostParseResult(text_content=FALLBACK_POST_TEXT)

    image_keys: List[str] = []
    media_refs: List[FeishuPostMediaRef] = []
+    mentioned_ids: List[str] = []
    parts: List[str] = []

    title = _normalize_feishu_text(str(resolved.get("title", "")).strip())
@@ -587,10 +523,7 @@ def parse_feishu_post_payload(
        if not isinstance(row, list):
            continue
        row_text = _normalize_feishu_text(
-            "".join(
-                _render_post_element(item, image_keys, media_refs, mentions_map)
-                for item in row
-            )
+            "".join(_render_post_element(item, image_keys, media_refs, mentioned_ids) for item in row)
        )
        if row_text:
            parts.append(row_text)
@@ -599,6 +532,7 @@ def parse_feishu_post_payload(
        text_content="\n".join(parts).strip() or FALLBACK_POST_TEXT,
        image_keys=image_keys,
        media_refs=media_refs,
+        mentioned_ids=mentioned_ids,
    )


@@ -650,7 +584,7 @@ def _render_post_element(
    element: Any,
    image_keys: List[str],
    media_refs: List[FeishuPostMediaRef],
-    mentions_map: Optional[Dict[str, FeishuMentionRef]] = None,
+    mentioned_ids: List[str],
 ) -> str:
    if isinstance(element, str):
        return element
@@ -668,21 +602,19 @@ def _render_post_element(
        escaped_label = _escape_markdown_text(label)
        return f"[{escaped_label}]({href})" if href else escaped_label
    if tag == "at":
-        # Post <at>.user_id is a placeholder ("@_user_N" or "@_all"); look up
-        # the real ref in mentions_map for the display name.
-        placeholder = str(element.get("user_id", "")).strip()
-        if placeholder == "@_all":
-            # Feishu SDK sometimes omits @_all from the top-level mentions
-            # payload; record it here so the caller's mention list stays complete.
-            if mentions_map is not None and "@_all" not in mentions_map:
-                mentions_map["@_all"] = FeishuMentionRef(is_all=True)
-            return "@all"
-        ref = (mentions_map or {}).get(placeholder)
-        if ref is not None:
-            display_name = ref.name or ref.open_id or "user"
-        else:
-            display_name = str(element.get("user_name", "")).strip() or "user"
-        return f"@{_escape_markdown_text(display_name)}"
+        mentioned_id = (
+            str(element.get("open_id", "")).strip()
+            or str(element.get("user_id", "")).strip()
+        )
+        if mentioned_id and mentioned_id not in mentioned_ids:
+            mentioned_ids.append(mentioned_id)
+        display_name = (
+            str(element.get("user_name", "")).strip()
+            or str(element.get("name", "")).strip()
+            or str(element.get("text", "")).strip()
+            or mentioned_id
+        )
+        return f"@{_escape_markdown_text(display_name)}" if display_name else "@"
    if tag in {"img", "image"}:
        image_key = str(element.get("image_key", "")).strip()
        if image_key and image_key not in image_keys:
@@ -720,7 +652,8 @@ def _render_post_element(

    nested_parts: List[str] = []
    for key in ("text", "title", "content", "children", "elements"):
-        extracted = _render_nested_post(element.get(key), image_keys, media_refs, mentions_map)
+        value = element.get(key)
+        extracted = _render_nested_post(value, image_keys, media_refs, mentioned_ids)
        if extracted:
            nested_parts.append(extracted)
    return " ".join(part for part in nested_parts if part)
@@ -730,7 +663,7 @@ def _render_nested_post(
    value: Any,
    image_keys: List[str],
    media_refs: List[FeishuPostMediaRef],
-    mentions_map: Optional[Dict[str, FeishuMentionRef]] = None,
+    mentioned_ids: List[str],
 ) -> str:
    if isinstance(value, str):
        return _escape_markdown_text(value)
@@ -738,17 +671,17 @@ def _render_nested_post(
        return " ".join(
            part
            for item in value
-            for part in [_render_nested_post(item, image_keys, media_refs, mentions_map)]
+            for part in [_render_nested_post(item, image_keys, media_refs, mentioned_ids)]
            if part
        )
    if isinstance(value, dict):
-        direct = _render_post_element(value, image_keys, media_refs, mentions_map)
+        direct = _render_post_element(value, image_keys, media_refs, mentioned_ids)
        if direct:
            return direct
        return " ".join(
            part
            for item in value.values()
-            for part in [_render_nested_post(item, image_keys, media_refs, mentions_map)]
+            for part in [_render_nested_post(item, image_keys, media_refs, mentioned_ids)]
            if part
        )
    return ""
@@ -759,48 +692,31 @@ def _render_nested_post(
 # ---------------------------------------------------------------------------


-def normalize_feishu_message(
-    *,
-    message_type: str,
-    raw_content: str,
-    mentions: Optional[Sequence[Any]] = None,
-    bot: _FeishuBotIdentity = _FeishuBotIdentity(),
-) -> FeishuNormalizedMessage:
+def normalize_feishu_message(*, message_type: str, raw_content: str) -> FeishuNormalizedMessage:
    normalized_type = str(message_type or "").strip().lower()
    payload = _load_feishu_payload(raw_content)
-    mentions_map = _build_mentions_map(mentions, bot)

    if normalized_type == "text":
-        text = str(payload.get("text", "") or "")
-        # Feishu SDK sometimes omits @_all from the mentions payload even when
-        # the text literal contains it (confirmed via im.v1.message.get).
-        if "@_all" in text and "@_all" not in mentions_map:
-            mentions_map["@_all"] = FeishuMentionRef(is_all=True)
        return FeishuNormalizedMessage(
            raw_type=normalized_type,
-            text_content=_normalize_feishu_text(text, mentions_map),
-            mentions=list(mentions_map.values()),
+            text_content=_normalize_feishu_text(str(payload.get("text", "") or "")),
        )
    if normalized_type == "post":
-        # The walker writes back to mentions_map if it encounters
-        # <at user_id="@_all">, so reading .values() after parsing is enough.
-        parsed_post = parse_feishu_post_payload(payload, mentions_map=mentions_map)
+        parsed_post = parse_feishu_post_payload(payload)
        return FeishuNormalizedMessage(
            raw_type=normalized_type,
            text_content=parsed_post.text_content,
            image_keys=list(parsed_post.image_keys),
            media_refs=list(parsed_post.media_refs),
-            mentions=list(mentions_map.values()),
+            mentioned_ids=list(parsed_post.mentioned_ids),
            relation_kind="post",
        )
-    mention_refs = list(mentions_map.values())
    if normalized_type == "image":
        image_key = str(payload.get("image_key", "") or "").strip()
        alt_text = _normalize_feishu_text(
            str(payload.get("text", "") or "")
            or str(payload.get("alt", "") or "")
-            or FALLBACK_IMAGE_TEXT,
-            mentions_map,
+            or FALLBACK_IMAGE_TEXT
        )
        return FeishuNormalizedMessage(
            raw_type=normalized_type,
@@ -808,7 +724,6 @@ def normalize_feishu_message(
            preferred_message_type="photo",
            image_keys=[image_key] if image_key else [],
            relation_kind="image",
-            mentions=mention_refs,
        )
    if normalized_type in {"file", "audio", "media"}:
        media_ref = _build_media_ref_from_payload(payload, resource_type=normalized_type)
@@ -820,7 +735,6 @@ def normalize_feishu_message(
            media_refs=[media_ref] if media_ref.file_key else [],
            relation_kind=normalized_type,
            metadata={"placeholder_text": placeholder},
-            mentions=mention_refs,
        )
    if normalized_type == "merge_forward":
        return _normalize_merge_forward_message(payload)
@@ -1095,20 +1009,8 @@ def _first_non_empty_text(*values: Any) -> str:
 # ---------------------------------------------------------------------------


-def _normalize_feishu_text(
-    text: str,
-    mentions_map: Optional[Dict[str, FeishuMentionRef]] = None,
-) -> str:
-    def _sub(match: "re.Match[str]") -> str:
-        key = match.group(0)
-        ref = (mentions_map or {}).get(key)
-        if ref is None:
-            return " "
-        name = ref.name or ref.open_id or "user"
-        return f"@{name}"
-
-    cleaned = _MENTION_PLACEHOLDER_RE.sub(_sub, text or "")
-    cleaned = cleaned.replace("@_all", "@all")
+def _normalize_feishu_text(text: str) -> str:
+    cleaned = _MENTION_PLACEHOLDER_RE.sub(" ", text or "")
    cleaned = cleaned.replace("\r\n", "\n").replace("\r", "\n")
    cleaned = "\n".join(_WHITESPACE_RE.sub(" ", line).strip() for line in cleaned.split("\n"))
    cleaned = "\n".join(line for line in cleaned.split("\n") if line)
@@ -1127,117 +1029,6 @@ def _unique_lines(lines: List[str]) -> List[str]:
    return unique


-# ---------------------------------------------------------------------------
-# Mention helpers
-# ---------------------------------------------------------------------------
-
-
-def _extract_mention_ids(mention: Any) -> tuple[str, str]:
-    # Returns (open_id, user_id). im.v1.message.get hands back id as a string
-    # plus id_type discriminator; event payloads hand back a nested UserId
-    # object carrying both fields.
-    mention_id = getattr(mention, "id", None)
-    if isinstance(mention_id, str):
-        id_type = str(getattr(mention, "id_type", "") or "").lower()
-        if id_type == "open_id":
-            return mention_id, ""
-        if id_type == "user_id":
-            return "", mention_id
-        return "", ""
-    if mention_id is None:
-        return "", ""
-    return (
-        str(getattr(mention_id, "open_id", "") or ""),
-        str(getattr(mention_id, "user_id", "") or ""),
-    )
-
-
-def _build_mentions_map(
-    mentions: Optional[Sequence[Any]],
-    bot: _FeishuBotIdentity,
-) -> Dict[str, FeishuMentionRef]:
-    result: Dict[str, FeishuMentionRef] = {}
-    for mention in mentions or []:
-        key = str(getattr(mention, "key", "") or "")
-        if not key:
-            continue
-        if key == "@_all":
-            result[key] = FeishuMentionRef(is_all=True)
-            continue
-        open_id, user_id = _extract_mention_ids(mention)
-        name = str(getattr(mention, "name", "") or "").strip()
-        result[key] = FeishuMentionRef(
-            name=name,
-            open_id=open_id,
-            is_self=bot.matches(open_id=open_id, user_id=user_id, name=name),
-        )
-    return result
-
-
-def _build_mention_hint(mentions: Sequence[FeishuMentionRef]) -> str:
-    parts: List[str] = []
-    seen: set = set()
-    for ref in mentions:
-        if ref.is_self:
-            continue
-        signature = (ref.is_all, ref.open_id, ref.name)
-        if signature in seen:
-            continue
-        seen.add(signature)
-        if ref.is_all:
-            parts.append("@all")
-        elif ref.open_id:
-            parts.append(f"{ref.name or 'unknown'} (open_id={ref.open_id})")
-        else:
-            parts.append(ref.name or "unknown")
-    return f"[Mentioned: {', '.join(parts)}]" if parts else ""
-
-
-def _strip_edge_self_mentions(
-    text: str,
-    mentions: Sequence[FeishuMentionRef],
-) -> str:
-    # Leading: strip consecutive self-mentions unconditionally.
-    # Trailing: strip only when followed by whitespace/terminal punct, so
-    # mid-sentence references ("don't @Bot again") stay intact.
-    # Leading word-boundary prevents @Al from eating @Alice.
-    if not text:
-        return text
-    self_names = [
-        f"@{ref.name or ref.open_id or 'user'}"
-        for ref in mentions
-        if ref.is_self
-    ]
-    if not self_names:
-        return text
-
-    remaining = text.lstrip()
-    while True:
-        for nm in self_names:
-            if not remaining.startswith(nm):
-                continue
-            after = remaining[len(nm):]
-            if after and after[0] not in _MENTION_BOUNDARY_CHARS:
-                continue
-            remaining = after.lstrip()
-            break
-        else:
-            break
-
-    while True:
-        i = len(remaining)
-        while i > 0 and remaining[i - 1] in _TRAILING_TERMINAL_PUNCT:
-            i -= 1
-        body = remaining[:i]
-        tail = remaining[i:]
-        for nm in self_names:
-            if body.endswith(nm):
-                remaining = body[: -len(nm)].rstrip() + tail
-                break
-        else:
-            return remaining
-
-
 def _run_official_feishu_ws_client(ws_client: Any, adapter: Any) -> None:
    """Run the official Lark WS client in its own thread-local event loop."""
    import lark_oapi.ws.client as ws_client_module
@@ -1700,7 +1491,6 @@ class FeishuAdapter(BasePlatformAdapter):
        if not self._client:
            return SendResult(success=False, error="Not connected")

-        content = self.format_message(content)
        try:
            msg_type, payload = self._build_outbound_payload(content)
            body = self._build_update_message_body(msg_type=msg_type, content=payload)
@@ -2680,22 +2470,13 @@ class FeishuAdapter(BasePlatformAdapter):
        chat_type: str,
        message_id: str,
    ) -> None:
-        text, inbound_type, media_urls, media_types, mentions = await self._extract_message_content(message)
-
-        if inbound_type == MessageType.TEXT:
-            text = _strip_edge_self_mentions(text, mentions)
-            if text.startswith("/"):
-                inbound_type = MessageType.COMMAND
-
-        # Guard runs post-strip so a pure "@Bot" message (stripped to "") is dropped.
+        text, inbound_type, media_urls, media_types = await self._extract_message_content(message)
        if inbound_type == MessageType.TEXT and not text and not media_urls:
-            logger.debug("[Feishu] Ignoring empty text message id=%s", message_id)
+            logger.debug("[Feishu] Ignoring unsupported or empty message type: %s", getattr(message, "message_type", ""))
            return

-        if inbound_type != MessageType.COMMAND:
-            hint = _build_mention_hint(mentions)
-            if hint:
-                text = f"{hint}\n\n{text}" if text else hint
+        if inbound_type == MessageType.TEXT and text.startswith("/"):
+            inbound_type = MessageType.COMMAND

        reply_to_message_id = (
            getattr(message, "parent_id", None)
@@ -3154,20 +2935,14 @@ class FeishuAdapter(BasePlatformAdapter):
    # Message content extraction and resource download
    # =========================================================================

-    async def _extract_message_content(
-        self, message: Any
-    ) -> tuple[str, MessageType, List[str], List[str], List[FeishuMentionRef]]:
+    async def _extract_message_content(self, message: Any) -> tuple[str, MessageType, List[str], List[str]]:
+        """Extract text and cached media from a normalized Feishu message."""
        raw_content = getattr(message, "content", "") or ""
        raw_type = getattr(message, "message_type", "") or ""
        message_id = str(getattr(message, "message_id", "") or "")
        logger.info("[Feishu] Received raw message type=%s message_id=%s", raw_type, message_id)

-        normalized = normalize_feishu_message(
-            message_type=raw_type,
-            raw_content=raw_content,
-            mentions=getattr(message, "mentions", None),
-            bot=self._bot_identity(),
-        )
+        normalized = normalize_feishu_message(message_type=raw_type, raw_content=raw_content)
        media_urls, media_types = await self._download_feishu_message_resources(
            message_id=message_id,
            normalized=normalized,
@@ -3184,7 +2959,7 @@ class FeishuAdapter(BasePlatformAdapter):
            if injected:
                text = injected

-        return text, inbound_type, media_urls, media_types, list(normalized.mentions)
+        return text, inbound_type, media_urls, media_types

    async def _download_feishu_message_resources(
        self,
@@ -3448,22 +3223,10 @@ class FeishuAdapter(BasePlatformAdapter):
        return "group"

    async def _resolve_sender_profile(self, sender_id: Any) -> Dict[str, Optional[str]]:
-        """Map Feishu's three-tier user IDs onto Hermes' SessionSource fields.
-
-        Preference order for the primary ``user_id`` field:
-          1. user_id  (tenant-scoped, most stable — requires permission scope)
-          2. open_id  (app-scoped, always available — different per bot app)
-
-        ``user_id_alt`` carries the union_id (developer-scoped, stable across
-        all apps by the same developer).  Session-key generation prefers
-        user_id_alt when present, so participant isolation stays stable even
-        if the primary ID is the app-scoped open_id.
-        """
        open_id = getattr(sender_id, "open_id", None) or None
        user_id = getattr(sender_id, "user_id", None) or None
        union_id = getattr(sender_id, "union_id", None) or None
-        # Prefer tenant-scoped user_id; fall back to app-scoped open_id.
-        primary_id = user_id or open_id
+        primary_id = open_id or user_id
        display_name = await self._resolve_sender_name_from_api(primary_id or union_id)
        return {
            "user_id": primary_id,
@@ -3545,31 +3308,15 @@ class FeishuAdapter(BasePlatformAdapter):
            body = getattr(parent, "body", None)
            msg_type = getattr(parent, "msg_type", "") or ""
            raw_content = getattr(body, "content", "") or ""
-            parent_mentions = getattr(parent, "mentions", None) if parent else None
-            text = self._extract_text_from_raw_content(
-                msg_type=msg_type,
-                raw_content=raw_content,
-                mentions=parent_mentions,
-            )
+            text = self._extract_text_from_raw_content(msg_type=msg_type, raw_content=raw_content)
            self._message_text_cache[message_id] = text
            return text
        except Exception:
            logger.warning("[Feishu] Failed to fetch parent message %s", message_id, exc_info=True)
            return None

-    def _extract_text_from_raw_content(
-        self,
-        *,
-        msg_type: str,
-        raw_content: str,
-        mentions: Optional[Sequence[Any]] = None,
-    ) -> Optional[str]:
-        normalized = normalize_feishu_message(
-            message_type=msg_type,
-            raw_content=raw_content,
-            mentions=mentions,
-            bot=self._bot_identity(),
-        )
+    def _extract_text_from_raw_content(self, *, msg_type: str, raw_content: str) -> Optional[str]:
+        normalized = normalize_feishu_message(message_type=msg_type, raw_content=raw_content)
        if normalized.text_content:
            return normalized.text_content
        placeholder = normalized.metadata.get("placeholder_text") if isinstance(normalized.metadata, dict) else None
@@ -3639,10 +3386,10 @@ class FeishuAdapter(BasePlatformAdapter):
        normalized = normalize_feishu_message(
            message_type=getattr(message, "message_type", "") or "",
            raw_content=raw_content,
-            mentions=getattr(message, "mentions", None),
-            bot=self._bot_identity(),
        )
-        return self._post_mentions_bot(normalized.mentions)
+        if normalized.mentioned_ids:
+            return self._post_mentions_bot(normalized.mentioned_ids)
+        return False

    def _is_self_sent_bot_message(self, event: Any) -> bool:
        """Return True only for Feishu events emitted by this Hermes bot."""
@@ -3662,37 +3409,30 @@ class FeishuAdapter(BasePlatformAdapter):
        return False

    def _message_mentions_bot(self, mentions: List[Any]) -> bool:
-        # IDs trump names: when both sides have open_id (or both user_id),
-        # match requires equal IDs. Name fallback only when either side
-        # lacks an ID.
+        """Check whether any mention targets the configured or inferred bot identity."""
        for mention in mentions:
            mention_id = getattr(mention, "id", None)
-            mention_open_id = (getattr(mention_id, "open_id", None) or "").strip()
-            mention_user_id = (getattr(mention_id, "user_id", None) or "").strip()
+            mention_open_id = getattr(mention_id, "open_id", None)
+            mention_user_id = getattr(mention_id, "user_id", None)
            mention_name = (getattr(mention, "name", None) or "").strip()

-            if mention_open_id and self._bot_open_id:
-                if mention_open_id == self._bot_open_id:
-                    return True
-                continue  # IDs differ — not the bot; skip name fallback.
-            if mention_user_id and self._bot_user_id:
-                if mention_user_id == self._bot_user_id:
-                    return True
-                continue
+            if self._bot_open_id and mention_open_id == self._bot_open_id:
+                return True
+            if self._bot_user_id and mention_user_id == self._bot_user_id:
+                return True
            if self._bot_name and mention_name == self._bot_name:
                return True

        return False

-    def _post_mentions_bot(self, mentions: List[FeishuMentionRef]) -> bool:
-        return any(m.is_self for m in mentions)
-
-    def _bot_identity(self) -> _FeishuBotIdentity:
-        return _FeishuBotIdentity(
-            open_id=self._bot_open_id,
-            user_id=self._bot_user_id,
-            name=self._bot_name,
-        )
+    def _post_mentions_bot(self, mentioned_ids: List[str]) -> bool:
+        if not mentioned_ids:
+            return False
+        if self._bot_open_id and self._bot_open_id in mentioned_ids:
+            return True
+        if self._bot_user_id and self._bot_user_id in mentioned_ids:
+            return True
+        return False

    async def _hydrate_bot_identity(self) -> None:
        """Best-effort discovery of bot identity for precise group mention gating
@@ -3717,15 +3457,14 @@ class FeishuAdapter(BasePlatformAdapter):
        # uses via probe_bot().
        if not self._bot_open_id or not self._bot_name:
            try:
-                req = (
-                    BaseRequest.builder()
-                    .http_method(HttpMethod.GET)
-                    .uri("/open-apis/bot/v3/info")
-                    .token_types({AccessTokenType.TENANT})
-                    .build()
+                resp = await asyncio.to_thread(
+                    self._client.request,
+                    method="GET",
+                    url="/open-apis/bot/v3/info",
+                    body=None,
+                    raw_response=True,
                )
-                resp = await asyncio.to_thread(self._client.request, req)
-                content = getattr(getattr(resp, "raw", None), "content", None)
+                content = getattr(resp, "content", None)
                if content:
                    payload = json.loads(content)
                    parsed = _parse_bot_response(payload) or {}
@@ -4473,9 +4212,6 @@ def probe_bot(app_id: str, app_secret: str, domain: str) -> Optional[dict]:

    Uses lark_oapi SDK when available, falls back to raw HTTP otherwise.
    Returns {"bot_name": ..., "bot_open_id": ...} on success, None on failure.
-
-    Note: ``bot_open_id`` here is the bot's app-scoped open_id — the same ID
-    that Feishu puts in @mention payloads.  It is NOT the app_id.
    """
    if FEISHU_AVAILABLE:
        return _probe_bot_sdk(app_id, app_secret, domain)
@@ -4496,12 +4232,12 @@ def _build_onboard_client(app_id: str, app_secret: str, domain: str) -> Any:


 def _parse_bot_response(data: dict) -> Optional[dict]:
-    # /bot/v3/info returns bot.app_name; legacy paths used bot_name — accept both.
+    """Extract bot_name and bot_open_id from a /bot/v3/info response."""
    if data.get("code") != 0:
        return None
    bot = data.get("bot") or data.get("data", {}).get("bot") or {}
    return {
-        "bot_name": bot.get("app_name") or bot.get("bot_name"),
+        "bot_name": bot.get("bot_name"),
        "bot_open_id": bot.get("open_id"),
    }

@@ -4510,18 +4246,13 @@ def _probe_bot_sdk(app_id: str, app_secret: str, domain: str) -> Optional[dict]:
    """Probe bot info using lark_oapi SDK."""
    try:
        client = _build_onboard_client(app_id, app_secret, domain)
-        req = (
-            BaseRequest.builder()
-            .http_method(HttpMethod.GET)
-            .uri("/open-apis/bot/v3/info")
-            .token_types({AccessTokenType.TENANT})
-            .build()
+        resp = client.request(
+            method="GET",
+            url="/open-apis/bot/v3/info",
+            body=None,
+            raw_response=True,
        )
-        resp = client.request(req)
-        content = getattr(getattr(resp, "raw", None), "content", None)
-        if content is None:
-            return None
-        return _parse_bot_response(json.loads(content))
+        return _parse_bot_response(json.loads(resp.content))
    except Exception as exc:
        logger.debug("[Feishu onboard] SDK probe failed: %s", exc)
        return None
@@ -57,15 +57,6 @@ class MessageDeduplicator:
        if len(self._seen) > self._max_size:
            cutoff = now - self._ttl
            self._seen = {k: v for k, v in self._seen.items() if v > cutoff}
-            if len(self._seen) > self._max_size:
-                # TTL pruning alone does not cap the cache when every entry is
-                # still fresh. Keep the newest entries so the helper's
-                # max_size bound is enforced under sustained traffic.
-                newest = sorted(
-                    self._seen.items(),
-                    key=lambda item: item[1],
-                )[-self._max_size:]
-                self._seen = dict(newest)
        return False

    def clear(self):
@@ -532,20 +532,6 @@ class MatrixAdapter(BasePlatformAdapter):
                )
                await crypto_store.open()

-                # Bind the store to the runtime device_id before any
-                # put_account() runs. PgCryptoStore defaults _device_id
-                # to "" and its crypto_account UPSERT never updates the
-                # device_id column on conflict — so once put_account
-                # writes blank, it stays blank forever. That breaks
-                # every downstream device-scoped olm operation: peer
-                # to-device ciphertext can't find our identity key and
-                # no megolm sessions ever land. Setting _device_id here
-                # (in-memory; the on-disk row may not exist yet) makes
-                # the first put_account write the correct value.
-                # DeviceID is a NewType(str) so plain str works at runtime.
-                if client.device_id:
-                    await crypto_store.put_device_id(client.device_id)
-
                crypto_state = _CryptoStateStore(state_store, self._joined_rooms)
                olm = OlmMachine(client, crypto_store, crypto_state)

@@ -410,6 +410,7 @@ class MattermostAdapter(BasePlatformAdapter):
            logger.warning("Mattermost: blocked unsafe URL (SSRF protection)")
            return await self.send(chat_id, f"{caption or ''}\n{url}".strip(), reply_to)

+        import asyncio
        import aiohttp

        last_exc = None
@@ -26,8 +26,9 @@ from .adapter import (  # noqa: F401
 # -- Onboard (QR-code scan-to-configure) -----------------------------------
 from .onboard import (  # noqa: F401
    BindStatus,
+    create_bind_task,
+    poll_bind_result,
    build_connect_url,
-    qr_register,
 )
 from .crypto import decrypt_secret, generate_bind_key  # noqa: F401

@@ -43,8 +44,9 @@ __all__ = [
    "_ssrf_redirect_guard",
    # onboard
    "BindStatus",
+    "create_bind_task",
+    "poll_bind_result",
    "build_connect_url",
-    "qr_register",
    # crypto
    "decrypt_secret",
    "generate_bind_key",
@@ -535,9 +535,6 @@ class QQAdapter(BasePlatformAdapter):
                    quick_disconnect_count = 0
                else:
                    backoff_idx += 1
-                    if backoff_idx >= MAX_RECONNECT_ATTEMPTS:
-                        logger.error("[%s] Max reconnect attempts reached (QQCloseError)", self._log_tag)
-                        return

            except Exception as exc:
                if not self._running:
@@ -1089,8 +1086,11 @@ class QQAdapter(BasePlatformAdapter):
            return MessageType.VIDEO
        if "image" in first_type or "photo" in first_type:
            return MessageType.PHOTO
+        # Unknown content type with an attachment — don't assume PHOTO
+        # to prevent non-image files from being sent to vision analysis.
        logger.debug(
-            "Unknown media content_type '%s', defaulting to TEXT",
+            "[%s] Unknown media content_type '%s', defaulting to TEXT",
+            self._log_tag,
            first_type,
        )
        return MessageType.TEXT
@@ -1826,12 +1826,14 @@ class QQAdapter(BasePlatformAdapter):
            body["file_name"] = file_name

        # Retry transient upload failures
+        last_exc = None
        for attempt in range(3):
            try:
                return await self._api_request(
                    "POST", path, body, timeout=FILE_UPLOAD_TIMEOUT
                )
            except RuntimeError as exc:
+                last_exc = exc
                err_msg = str(exc)
                if any(
                        kw in err_msg
@@ -1840,8 +1842,8 @@ class QQAdapter(BasePlatformAdapter):
                    raise
                if attempt < 2:
                    await asyncio.sleep(1.5 * (attempt + 1))
-                else:
-                    raise
+
+        raise last_exc  # type: ignore[misc]

    # Maximum time (seconds) to wait for reconnection before giving up on send.
    _RECONNECT_WAIT_SECONDS = 15.0
@@ -1,10 +1,6 @@
 """
 QQBot scan-to-configure (QR code onboard) module.

-Mirrors the Feishu onboarding pattern: synchronous HTTP + a single public
-entry-point ``qr_register()`` that handles the full flow (create task →
-display QR code → poll → decrypt credentials).
-
 Calls the ``q.qq.com`` ``create_bind_task`` / ``poll_bind_result`` APIs to
 generate a QR-code URL and poll for scan completion.  On success the caller
 receives the bot's *app_id*, *client_secret* (decrypted locally), and the
@@ -16,20 +12,18 @@ Reference: https://bot.q.qq.com/wiki/develop/api-v2/
 from __future__ import annotations

 import logging
-import time
 from enum import IntEnum
-from typing import Optional, Tuple
+from typing import Tuple
 from urllib.parse import quote

 from .constants import (
    ONBOARD_API_TIMEOUT,
    ONBOARD_CREATE_PATH,
-    ONBOARD_POLL_INTERVAL,
    ONBOARD_POLL_PATH,
    PORTAL_HOST,
    QR_URL_TEMPLATE,
 )
-from .crypto import decrypt_secret, generate_bind_key
+from .crypto import generate_bind_key
 from .utils import get_api_headers

 logger = logging.getLogger(__name__)
@@ -41,7 +35,7 @@ logger = logging.getLogger(__name__)


 class BindStatus(IntEnum):
-    """Status codes returned by ``_poll_bind_result``."""
+    """Status codes returned by ``poll_bind_result``."""

    NONE = 0
    PENDING = 1
@@ -50,40 +44,18 @@ class BindStatus(IntEnum):


 # ---------------------------------------------------------------------------
-# QR rendering
-# ---------------------------------------------------------------------------
-
-try:
-    import qrcode as _qrcode_mod
-except (ImportError, TypeError):
-    _qrcode_mod = None  # type: ignore[assignment]
-
-
-def _render_qr(url: str) -> bool:
-    """Try to render a QR code in the terminal. Returns True if successful."""
-    if _qrcode_mod is None:
-        return False
-    try:
-        qr = _qrcode_mod.QRCode(
-            error_correction=_qrcode_mod.constants.ERROR_CORRECT_M,
-            border=2,
-        )
-        qr.add_data(url)
-        qr.make(fit=True)
-        qr.print_ascii(invert=True)
-        return True
-    except Exception:
-        return False
-
-
-# ---------------------------------------------------------------------------
-# Synchronous HTTP helpers (mirrors Feishu _post_registration pattern)
+# Public API
 # ---------------------------------------------------------------------------


-def _create_bind_task(timeout: float = ONBOARD_API_TIMEOUT) -> Tuple[str, str]:
+async def create_bind_task(
+    timeout: float = ONBOARD_API_TIMEOUT,
+) -> Tuple[str, str]:
    """Create a bind task and return *(task_id, aes_key_base64)*.

+    The AES key is generated locally and sent to the server so it can
+    encrypt the bot credentials before returning them.
+
    Raises:
        RuntimeError: If the API returns a non-zero ``retcode``.
    """
@@ -92,8 +64,8 @@ def _create_bind_task(timeout: float = ONBOARD_API_TIMEOUT) -> Tuple[str, str]:
    url = f"https://{PORTAL_HOST}{ONBOARD_CREATE_PATH}"
    key = generate_bind_key()

-    with httpx.Client(timeout=timeout, follow_redirects=True) as client:
-        resp = client.post(url, json={"key": key}, headers=get_api_headers())
+    async with httpx.AsyncClient(timeout=timeout, follow_redirects=True) as client:
+        resp = await client.post(url, json={"key": key}, headers=get_api_headers())
        resp.raise_for_status()
        data = resp.json()

@@ -108,7 +80,7 @@ def _create_bind_task(timeout: float = ONBOARD_API_TIMEOUT) -> Tuple[str, str]:
    return task_id, key


-def _poll_bind_result(
+async def poll_bind_result(
    task_id: str,
    timeout: float = ONBOARD_API_TIMEOUT,
 ) -> Tuple[BindStatus, str, str, str]:
@@ -117,6 +89,12 @@ def _poll_bind_result(
    Returns:
        A 4-tuple of ``(status, bot_appid, bot_encrypt_secret, user_openid)``.

+        * ``bot_encrypt_secret`` is AES-256-GCM encrypted — decrypt it with
+          :func:`~gateway.platforms.qqbot.crypto.decrypt_secret` using the
+          key from :func:`create_bind_task`.
+        * ``user_openid`` is the OpenID of the person who scanned the code
+          (available when ``status == COMPLETED``).
+
    Raises:
        RuntimeError: If the API returns a non-zero ``retcode``.
    """
@@ -124,8 +102,8 @@ def _poll_bind_result(

    url = f"https://{PORTAL_HOST}{ONBOARD_POLL_PATH}"

-    with httpx.Client(timeout=timeout, follow_redirects=True) as client:
-        resp = client.post(url, json={"task_id": task_id}, headers=get_api_headers())
+    async with httpx.AsyncClient(timeout=timeout, follow_redirects=True) as client:
+        resp = await client.post(url, json={"task_id": task_id}, headers=get_api_headers())
        resp.raise_for_status()
        data = resp.json()

@@ -144,77 +122,3 @@ def _poll_bind_result(
 def build_connect_url(task_id: str) -> str:
    """Build the QR-code target URL for a given *task_id*."""
    return QR_URL_TEMPLATE.format(task_id=quote(task_id))
-
-
-# ---------------------------------------------------------------------------
-# Public entry-point
-# ---------------------------------------------------------------------------
-
-_MAX_REFRESHES = 3
-
-
-def qr_register(timeout_seconds: int = 600) -> Optional[dict]:
-    """Run the QQBot scan-to-configure QR registration flow.
-
-    Mirrors ``feishu.qr_register()``: handles create → display → poll →
-    decrypt in one call.  Unexpected errors propagate to the caller.
-
-    :returns:
-        ``{"app_id": ..., "client_secret": ..., "user_openid": ...}`` on
-        success, or ``None`` on failure / expiry / cancellation.
-    """
-    deadline = time.monotonic() + timeout_seconds
-
-    for refresh_count in range(_MAX_REFRESHES + 1):
-        # ── Create bind task ──
-        try:
-            task_id, aes_key = _create_bind_task()
-        except Exception as exc:
-            logger.warning("[QQBot onboard] Failed to create bind task: %s", exc)
-            return None
-
-        url = build_connect_url(task_id)
-
-        # ── Display QR code + URL ──
-        print()
-        if _render_qr(url):
-            print(f"  Scan the QR code above, or open this URL directly:\n  {url}")
-        else:
-            print(f"  Open this URL in QQ on your phone:\n  {url}")
-            print("  Tip: pip install qrcode  to display a scannable QR code here")
-        print()
-
-        # ── Poll loop ──
-        while time.monotonic() < deadline:
-            try:
-                status, app_id, encrypted_secret, user_openid = _poll_bind_result(task_id)
-            except Exception:
-                time.sleep(ONBOARD_POLL_INTERVAL)
-                continue
-
-            if status == BindStatus.COMPLETED:
-                client_secret = decrypt_secret(encrypted_secret, aes_key)
-                print()
-                print(f"  QR scan complete! (App ID: {app_id})")
-                if user_openid:
-                    print(f"  Scanner's OpenID: {user_openid}")
-                return {
-                    "app_id": app_id,
-                    "client_secret": client_secret,
-                    "user_openid": user_openid,
-                }
-
-            if status == BindStatus.EXPIRED:
-                if refresh_count >= _MAX_REFRESHES:
-                    logger.warning("[QQBot onboard] QR code expired %d times — giving up", _MAX_REFRESHES)
-                    return None
-                print(f"\n  QR code expired, refreshing... ({refresh_count + 1}/{_MAX_REFRESHES})")
-                break  # next for-loop iteration creates a new task
-
-            time.sleep(ONBOARD_POLL_INTERVAL)
-        else:
-            # deadline reached without completing
-            logger.warning("[QQBot onboard] Poll timed out after %ds", timeout_seconds)
-            return None
-
-    return None
@@ -71,10 +71,8 @@ from gateway.platforms.base import (
    SendResult,
    cache_image_from_bytes,
    cache_audio_from_bytes,
-    cache_video_from_bytes,
    cache_document_from_bytes,
    resolve_proxy_url,
-    SUPPORTED_VIDEO_TYPES,
    SUPPORTED_DOCUMENT_TYPES,
    utf16_len,
    _prefix_within_utf16_limit,
@@ -496,13 +494,6 @@ class TelegramAdapter(BasePlatformAdapter):
                    "[%s] DM topic '%s' already exists in chat %s (will be mapped from incoming messages)",
                    self.name, name, chat_id,
                )
-            elif "not a forum" in error_text or "forums_disabled" in error_text:
-                logger.warning(
-                    "[%s] Cannot create DM topic '%s' in chat %s: Topics mode is not enabled. "
-                    "The user must open the DM with this bot in Telegram, tap the bot name "
-                    "at the top, and enable 'Topics' in chat settings before topics can be created.",
-                    self.name, name, chat_id,
-                )
            else:
                logger.warning(
                    "[%s] Failed to create DM topic '%s' in chat %s: %s",
@@ -703,6 +694,7 @@ class TelegramAdapter(BasePlatformAdapter):
                "write_timeout": _env_float("HERMES_TELEGRAM_HTTP_WRITE_TIMEOUT", 20.0),
            }

+            proxy_url = resolve_proxy_url("TELEGRAM_PROXY")
            disable_fallback = (os.getenv("HERMES_TELEGRAM_DISABLE_FALLBACK_IPS", "").strip().lower() in ("1", "true", "yes", "on"))
            fallback_ips = self._fallback_ips()
            if not fallback_ips:
@@ -713,8 +705,6 @@ class TelegramAdapter(BasePlatformAdapter):
                    ", ".join(fallback_ips),
                )

-            proxy_targets = ["api.telegram.org", *fallback_ips]
-            proxy_url = resolve_proxy_url("TELEGRAM_PROXY", target_hosts=proxy_targets)
            if fallback_ips and not proxy_url and not disable_fallback:
                logger.info(
                    "[%s] Telegram fallback IPs active: %s",
@@ -795,28 +785,8 @@ class TelegramAdapter(BasePlatformAdapter):
                # Telegram pushes updates to our HTTP endpoint.  This
                # enables cloud platforms (Fly.io, Railway) to auto-wake
                # suspended machines on inbound HTTP traffic.
-                #
-                # SECURITY: TELEGRAM_WEBHOOK_SECRET is REQUIRED. Without it,
-                # python-telegram-bot passes secret_token=None and the
-                # webhook endpoint accepts any HTTP POST — attackers can
-                # inject forged updates as if from Telegram. Refuse to
-                # start rather than silently run in fail-open mode.
-                # See GHSA-3vpc-7q5r-276h.
                webhook_port = int(os.getenv("TELEGRAM_WEBHOOK_PORT", "8443"))
-                webhook_secret = os.getenv("TELEGRAM_WEBHOOK_SECRET", "").strip()
-                if not webhook_secret:
-                    raise RuntimeError(
-                        "TELEGRAM_WEBHOOK_SECRET is required when "
-                        "TELEGRAM_WEBHOOK_URL is set. Without it, the "
-                        "webhook endpoint accepts forged updates from "
-                        "anyone who can reach it — see "
-                        "https://github.com/NousResearch/hermes-agent/"
-                        "security/advisories/GHSA-3vpc-7q5r-276h.\n\n"
-                        "Generate a secret and set it in your .env:\n"
-                        "  export TELEGRAM_WEBHOOK_SECRET=\"$(openssl rand -hex 32)\"\n\n"
-                        "Then register it with Telegram when setting the "
-                        "webhook via setWebhook's secret_token parameter."
-                    )
+                webhook_secret = os.getenv("TELEGRAM_WEBHOOK_SECRET", "").strip() or None
                from urllib.parse import urlparse
                webhook_path = urlparse(webhook_url).path or "/telegram"

@@ -1209,31 +1179,6 @@ class TelegramAdapter(BasePlatformAdapter):
            )
            return SendResult(success=False, error=str(e))

-    async def delete_message(self, chat_id: str, message_id: str) -> bool:
-        """Delete a previously sent Telegram message.
-
-        Used by the stream consumer's fresh-final cleanup path (ported
-        from openclaw/openclaw#72038) to remove long-lived preview
-        messages after sending the completed reply as a fresh message.
-        Telegram's Bot API ``deleteMessage`` works for bot-posted
-        messages in the last 48 hours.  Failures are non-fatal — the
-        caller leaves the preview in place and logs at debug level.
-        """
-        if not self._bot:
-            return False
-        try:
-            await self._bot.delete_message(
-                chat_id=int(chat_id),
-                message_id=int(message_id),
-            )
-            return True
-        except Exception as e:
-            logger.debug(
-                "[%s] Failed to delete Telegram message %s: %s",
-                self.name, message_id, e,
-            )
-            return False
-
    async def send_update_prompt(
        self, chat_id: str, prompt: str, default: str = "",
        session_key: str = "",
@@ -1759,6 +1704,7 @@ class TelegramAdapter(BasePlatformAdapter):
            return SendResult(success=False, error="Not connected")
        
        try:
+            import os
            if not os.path.exists(audio_path):
                return SendResult(success=False, error=self._missing_media_path_error("Audio", audio_path))
            
@@ -1807,6 +1753,7 @@ class TelegramAdapter(BasePlatformAdapter):
            return SendResult(success=False, error="Not connected")

        try:
+            import os
            if not os.path.exists(image_path):
                return SendResult(success=False, error=self._missing_media_path_error("Image", image_path))

@@ -2119,7 +2066,7 @@ class TelegramAdapter(BasePlatformAdapter):
            url = m.group(2).replace('\\', '\\\\').replace(')', '\\)')
            return _ph(f'[{display}]({url})')

-        text = re.sub(r'\[([^\]]+)\]\(([^()]*(?:\([^()]*\)[^()]*)*)\)', _convert_link, text)
+        text = re.sub(r'\[([^\]]+)\]\(([^)]+)\)', _convert_link, text)

        # 4) Convert markdown headers (## Title) → bold *Title*
        def _convert_header(m):
@@ -2379,16 +2326,10 @@ class TelegramAdapter(BasePlatformAdapter):
        DMs remain unrestricted. Group/supergroup messages are accepted when:
        - the chat is explicitly allowlisted in ``free_response_chats``
        - ``require_mention`` is disabled
+        - the message is a command
        - the message replies to the bot
        - the bot is @mentioned
        - the text/caption matches a configured regex wake-word pattern
-
-        When ``require_mention`` is enabled, slash commands are not given
-        special treatment — they must pass the same mention/reply checks
-        as any other group message.  Users can still trigger commands via
-        the Telegram bot menu (``/command@botname``) or by explicitly
-        mentioning the bot (``@botname /command``), both of which are
-        recognised as mentions by :meth:`_message_mentions_bot`.
        """
        if not self._is_group_chat(message):
            return True
@@ -2403,6 +2344,8 @@ class TelegramAdapter(BasePlatformAdapter):
            return True
        if not self._telegram_require_mention():
            return True
+        if is_command:
+            return True
        if self._is_reply_to_bot(message):
            return True
        if self._message_mentions_bot(message):
@@ -2685,23 +2628,6 @@ class TelegramAdapter(BasePlatformAdapter):
            except Exception as e:
                logger.warning("[Telegram] Failed to cache audio: %s", e, exc_info=True)

-        elif msg.video:
-            try:
-                file_obj = await msg.video.get_file()
-                video_bytes = await file_obj.download_as_bytearray()
-                ext = ".mp4"
-                if getattr(file_obj, "file_path", None):
-                    for candidate in SUPPORTED_VIDEO_TYPES:
-                        if file_obj.file_path.lower().endswith(candidate):
-                            ext = candidate
-                            break
-                cached_path = cache_video_from_bytes(bytes(video_bytes), ext=ext)
-                event.media_urls = [cached_path]
-                event.media_types = [SUPPORTED_VIDEO_TYPES.get(ext, "video/mp4")]
-                logger.info("[Telegram] Cached user video at %s", cached_path)
-            except Exception as e:
-                logger.warning("[Telegram] Failed to cache video: %s", e, exc_info=True)
-
        # Download document files to cache for agent processing
        elif msg.document:
            doc = msg.document
@@ -2718,21 +2644,6 @@ class TelegramAdapter(BasePlatformAdapter):
                    mime_to_ext = {v: k for k, v in SUPPORTED_DOCUMENT_TYPES.items()}
                    ext = mime_to_ext.get(doc.mime_type, "")

-                if not ext and doc.mime_type:
-                    video_mime_to_ext = {v: k for k, v in SUPPORTED_VIDEO_TYPES.items()}
-                    ext = video_mime_to_ext.get(doc.mime_type, "")
-
-                if ext in SUPPORTED_VIDEO_TYPES:
-                    file_obj = await doc.get_file()
-                    video_bytes = await file_obj.download_as_bytearray()
-                    cached_path = cache_video_from_bytes(bytes(video_bytes), ext=ext)
-                    event.media_urls = [cached_path]
-                    event.media_types = [SUPPORTED_VIDEO_TYPES[ext]]
-                    event.message_type = MessageType.VIDEO
-                    logger.info("[Telegram] Cached user video document at %s", cached_path)
-                    await self.handle_message(event)
-                    return
-
                # Check if supported
                if ext not in SUPPORTED_DOCUMENT_TYPES:
                    supported_list = ", ".join(sorted(SUPPORTED_DOCUMENT_TYPES.keys()))
@@ -2871,11 +2782,13 @@ class TelegramAdapter(BasePlatformAdapter):
            logger.info("[Telegram] Analyzing sticker at %s", cached_path)

            from tools.vision_tools import vision_analyze_tool
+            import json as _json
+
            result_json = await vision_analyze_tool(
                image_url=cached_path,
                user_prompt=STICKER_VISION_PROMPT,
            )
-            result = json.loads(result_json)
+            result = _json.loads(result_json)

            if result.get("success"):
                description = result.get("analysis", "a sticker")
@@ -43,10 +43,10 @@ _DOH_PROVIDERS: list[dict] = [
 _SEED_FALLBACK_IPS: list[str] = ["149.154.167.220"]


-def _resolve_proxy_url(target_hosts=None) -> str | None:
+def _resolve_proxy_url() -> str | None:
    # Delegate to shared implementation (env vars + macOS system proxy detection)
    from gateway.platforms.base import resolve_proxy_url
-    return resolve_proxy_url("TELEGRAM_PROXY", target_hosts=target_hosts)
+    return resolve_proxy_url("TELEGRAM_PROXY")


 class TelegramFallbackTransport(httpx.AsyncBaseTransport):
@@ -60,7 +60,7 @@ class TelegramFallbackTransport(httpx.AsyncBaseTransport):

    def __init__(self, fallback_ips: Iterable[str], **transport_kwargs):
        self._fallback_ips = [ip for ip in dict.fromkeys(_normalize_fallback_ips(fallback_ips))]
-        proxy_url = _resolve_proxy_url(target_hosts=[_TELEGRAM_API_HOST, *self._fallback_ips])
+        proxy_url = _resolve_proxy_url()
        if proxy_url and "proxy" not in transport_kwargs:
            transport_kwargs["proxy"] = proxy_url
        self._primary = httpx.AsyncHTTPTransport(**transport_kwargs)
@@ -508,11 +508,6 @@ class WeComAdapter(BasePlatformAdapter):
        self._remember_chat_req_id(chat_id, self._payload_req_id(payload))

        text, reply_text = self._extract_text(body)
-        # Strip leading @mention in group chats so slash commands like
-        # "@BotName /approve" are correctly recognized as "/approve".
-        # Mirrors what the Telegram adapter does (re.sub @botname).
-        if is_group and text:
-            text = re.sub(r"^@\S+\s*", "", text).strip()
        media_urls, media_types = await self._extract_media(body)
        message_type = self._derive_message_type(body, text, media_types)
        has_reply_context = bool(reply_text and (text or media_urls))
@@ -629,16 +624,13 @@ class WeComAdapter(BasePlatformAdapter):
        msgtype = str(body.get("msgtype") or "").lower()

        if msgtype == "mixed":
-            _raw_mixed = body.get("mixed")
-            mixed = _raw_mixed if isinstance(_raw_mixed, dict) else {}
-            _raw_items = mixed.get("msg_item")
-            items = _raw_items if isinstance(_raw_items, list) else []
+            mixed = body.get("mixed") if isinstance(body.get("mixed"), dict) else {}
+            items = mixed.get("msg_item") if isinstance(mixed.get("msg_item"), list) else []
            for item in items:
                if not isinstance(item, dict):
                    continue
                if str(item.get("msgtype") or "").lower() == "text":
-                    _raw_text = item.get("text")
-                    text_block = _raw_text if isinstance(_raw_text, dict) else {}
+                    text_block = item.get("text") if isinstance(item.get("text"), dict) else {}
                    content = str(text_block.get("content") or "").strip()
                    if content:
                        text_parts.append(content)
@@ -680,10 +672,8 @@ class WeComAdapter(BasePlatformAdapter):
        msgtype = str(body.get("msgtype") or "").lower()

        if msgtype == "mixed":
-            _raw_mixed = body.get("mixed")
-            mixed = _raw_mixed if isinstance(_raw_mixed, dict) else {}
-            _raw_items = mixed.get("msg_item")
-            items = _raw_items if isinstance(_raw_items, list) else []
+            mixed = body.get("mixed") if isinstance(body.get("mixed"), dict) else {}
+            items = mixed.get("msg_item") if isinstance(mixed.get("msg_item"), list) else []
            for item in items:
                if not isinstance(item, dict):
                    continue
@@ -1469,134 +1459,3 @@ class WeComAdapter(BasePlatformAdapter):
            "name": chat_id,
            "type": "group" if chat_id and chat_id.lower().startswith("group") else "dm",
        }
-
-
-# ------------------------------------------------------------------
-# QR code scan flow for obtaining bot credentials
-# ------------------------------------------------------------------
-
-_QR_GENERATE_URL = "https://work.weixin.qq.com/ai/qc/generate"
-_QR_QUERY_URL = "https://work.weixin.qq.com/ai/qc/query_result"
-_QR_CODE_PAGE = "https://work.weixin.qq.com/ai/qc/gen?source=hermes&scode="
-_QR_POLL_INTERVAL = 3  # seconds
-_QR_POLL_TIMEOUT = 300  # 5 minutes
-
-
-def qr_scan_for_bot_info(
-    *,
-    timeout_seconds: int = _QR_POLL_TIMEOUT,
-) -> Optional[Dict[str, str]]:
-    """Run the WeCom QR scan flow to obtain bot_id and secret.
-
-    Fetches a QR code from WeCom, renders it in the terminal, and polls
-    until the user scans it or the timeout expires.
-
-    Returns ``{"bot_id": ..., "secret": ...}`` on success, ``None`` on
-    failure or timeout.
-
-    Note: the ``work.weixin.qq.com/ai/qc/{generate,query_result}`` endpoints
-    used here are not part of WeCom's public developer API — they back the
-    admin-console web UI's bot-creation flow and may change without notice.
-    The same pattern is used by the feishu/dingtalk QR setup wizards.
-    """
-    try:
-        import urllib.request
-        import urllib.parse
-    except ImportError:  # pragma: no cover
-        logger.error("urllib is required for WeCom QR scan")
-        return None
-
-    generate_url = f"{_QR_GENERATE_URL}?source=hermes"
-
-    # ── Step 1: Fetch QR code ──
-    print("  Connecting to WeCom...", end="", flush=True)
-    try:
-        req = urllib.request.Request(generate_url, headers={"User-Agent": "HermesAgent/1.0"})
-        with urllib.request.urlopen(req, timeout=15) as resp:
-            raw = json.loads(resp.read().decode("utf-8"))
-    except Exception as exc:
-        logger.error("WeCom QR: failed to fetch QR code: %s", exc)
-        print(f" failed: {exc}")
-        return None
-
-    data = raw.get("data") or {}
-    scode = str(data.get("scode") or "").strip()
-    auth_url = str(data.get("auth_url") or "").strip()
-
-    if not scode or not auth_url:
-        logger.error("WeCom QR: unexpected response format: %s", raw)
-        print(" failed: unexpected response format")
-        return None
-
-    print(" done.")
-
-    # ── Step 2: Render QR code in terminal ──
-    print()
-    qr_rendered = False
-    try:
-        import qrcode as _qrcode
-        qr = _qrcode.QRCode()
-        qr.add_data(auth_url)
-        qr.make(fit=True)
-        qr.print_ascii(invert=True)
-        qr_rendered = True
-    except ImportError:
-        pass
-    except Exception:
-        pass
-
-    page_url = f"{_QR_CODE_PAGE}{urllib.parse.quote(scode)}"
-    if qr_rendered:
-        print(f"\n  Scan the QR code above, or open this URL directly:\n  {page_url}")
-    else:
-        print(f"  Open this URL in WeCom on your phone:\n\n  {page_url}\n")
-        print("  Tip: pip install qrcode  to display a scannable QR code here next time")
-    print()
-    print("  Fetching configuration results...", end="", flush=True)
-
-    # ── Step 3: Poll for result ──
-    import time
-    deadline = time.time() + timeout_seconds
-    query_url = f"{_QR_QUERY_URL}?scode={urllib.parse.quote(scode)}"
-    poll_count = 0
-
-    while time.time() < deadline:
-        try:
-            req = urllib.request.Request(query_url, headers={"User-Agent": "HermesAgent/1.0"})
-            with urllib.request.urlopen(req, timeout=10) as resp:
-                result = json.loads(resp.read().decode("utf-8"))
-        except Exception as exc:
-            logger.debug("WeCom QR poll error: %s", exc)
-            time.sleep(_QR_POLL_INTERVAL)
-            continue
-
-        poll_count += 1
-        # Print a dot on every poll so progress is visible within 3s.
-        print(".", end="", flush=True)
-
-        result_data = result.get("data") or {}
-        status = str(result_data.get("status") or "").lower()
-
-        if status == "success":
-            print()  # newline after "Fetching configuration results..." dots
-            bot_info = result_data.get("bot_info") or {}
-            bot_id = str(bot_info.get("botid") or bot_info.get("bot_id") or "").strip()
-            secret = str(bot_info.get("secret") or "").strip()
-            if bot_id and secret:
-                return {"bot_id": bot_id, "secret": secret}
-            logger.warning(
-                "WeCom QR: scan reported success but bot_info missing or incomplete: %s",
-                result_data,
-            )
-            print(
-                "  QR scan reported success but no bot credentials were returned.\n"
-                "  This usually means the bot was not actually created on the WeCom side.\n"
-                "  Falling back to manual credential entry."
-            )
-            return None
-
-        time.sleep(_QR_POLL_INTERVAL)
-
-    print()  # newline after dots
-    print(f"  QR scan timed out ({timeout_seconds // 60} minutes). Please try again.")
-    return None
@@ -66,37 +66,6 @@ def _kill_port_process(port: int) -> None:
    except Exception:
        pass

-
-def _terminate_bridge_process(proc, *, force: bool = False) -> None:
-    """Terminate the bridge process using process-tree semantics where possible."""
-    if _IS_WINDOWS:
-        cmd = ["taskkill", "/PID", str(proc.pid), "/T"]
-        if force:
-            cmd.append("/F")
-        try:
-            result = subprocess.run(
-                cmd,
-                capture_output=True,
-                text=True,
-                timeout=10,
-            )
-        except FileNotFoundError:
-            if force:
-                proc.kill()
-            else:
-                proc.terminate()
-            return
-
-        if result.returncode != 0:
-            details = (result.stderr or result.stdout or "").strip()
-            raise OSError(details or f"taskkill failed for PID {proc.pid}")
-        return
-
-    import signal
-
-    sig = signal.SIGTERM if not force else signal.SIGKILL
-    os.killpg(os.getpgid(proc.pid), sig)
-
 import sys
 sys.path.insert(0, str(Path(__file__).resolve().parents[2]))

@@ -149,10 +118,6 @@ class WhatsAppAdapter(BasePlatformAdapter):
    - bridge_script: Path to the Node.js bridge script
    - bridge_port: Port for HTTP communication (default: 3000)
    - session_path: Path to store WhatsApp session data
-    - dm_policy: "open" | "allowlist" | "disabled" — how DMs are handled (default: "open")
-    - allow_from: List of sender IDs allowed in DMs (when dm_policy="allowlist")
-    - group_policy: "open" | "allowlist" | "disabled" — which groups are processed (default: "open")
-    - group_allow_from: List of group JIDs allowed (when group_policy="allowlist")
    """
    
    # WhatsApp message limits — practical UX limit, not protocol max.
@@ -175,10 +140,6 @@ class WhatsAppAdapter(BasePlatformAdapter):
            get_hermes_dir("platforms/whatsapp/session", "whatsapp/session")
        ))
        self._reply_prefix: Optional[str] = config.extra.get("reply_prefix")
-        self._dm_policy = str(config.extra.get("dm_policy") or os.getenv("WHATSAPP_DM_POLICY", "open")).strip().lower()
-        self._allow_from = self._coerce_allow_list(config.extra.get("allow_from") or config.extra.get("allowFrom"))
-        self._group_policy = str(config.extra.get("group_policy") or os.getenv("WHATSAPP_GROUP_POLICY", "open")).strip().lower()
-        self._group_allow_from = self._coerce_allow_list(config.extra.get("group_allow_from") or config.extra.get("groupAllowFrom"))
        self._mention_patterns = self._compile_mention_patterns()
        self._message_queue: asyncio.Queue = asyncio.Queue()
        self._bridge_log_fh = None
@@ -202,33 +163,6 @@ class WhatsAppAdapter(BasePlatformAdapter):
            return {str(part).strip() for part in raw if str(part).strip()}
        return {part.strip() for part in str(raw).split(",") if part.strip()}

-    @staticmethod
-    def _coerce_allow_list(raw) -> set[str]:
-        """Parse allow_from / group_allow_from from config or env var."""
-        if raw is None:
-            return set()
-        if isinstance(raw, list):
-            return {str(part).strip() for part in raw if str(part).strip()}
-        return {part.strip() for part in str(raw).split(",") if part.strip()}
-
-    def _is_dm_allowed(self, sender_id: str) -> bool:
-        """Check whether a DM from the given sender should be processed."""
-        if self._dm_policy == "disabled":
-            return False
-        if self._dm_policy == "allowlist":
-            return sender_id in self._allow_from
-        # "open" — all DMs allowed
-        return True
-
-    def _is_group_allowed(self, chat_id: str) -> bool:
-        """Check whether a group chat should be processed."""
-        if self._group_policy == "disabled":
-            return False
-        if self._group_policy == "allowlist":
-            return chat_id in self._group_allow_from
-        # "open" — all groups allowed
-        return True
-
    def _compile_mention_patterns(self):
        patterns = self.config.extra.get("mention_patterns")
        if patterns is None:
@@ -321,18 +255,8 @@ class WhatsAppAdapter(BasePlatformAdapter):
        return cleaned.strip() or text

    def _should_process_message(self, data: Dict[str, Any]) -> bool:
-        is_group = data.get("isGroup", False)
-        if is_group:
-            chat_id = str(data.get("chatId") or "")
-            if not self._is_group_allowed(chat_id):
-                return False
-        else:
-            sender_id = str(data.get("senderId") or data.get("from") or "")
-            if not self._is_dm_allowed(sender_id):
-                return False
-            # DMs that pass the policy gate are always processed
+        if not data.get("isGroup"):
            return True
-        # Group messages: check mention / free-response settings
        chat_id = str(data.get("chatId") or "")
        if chat_id in self._whatsapp_free_response_chats():
            return True
@@ -399,6 +323,7 @@ class WhatsAppAdapter(BasePlatformAdapter):
            
            # Check if bridge is already running and connected
            import aiohttp
+            import asyncio
            try:
                async with aiohttp.ClientSession() as session:
                    async with session.get(
@@ -567,14 +492,22 @@ class WhatsAppAdapter(BasePlatformAdapter):
        """Stop the WhatsApp bridge and clean up any orphaned processes."""
        if self._bridge_process:
            try:
+                # Kill the entire process group so child node processes die too
+                import signal
                try:
-                    _terminate_bridge_process(self._bridge_process, force=False)
+                    if _IS_WINDOWS:
+                        self._bridge_process.terminate()
+                    else:
+                        os.killpg(os.getpgid(self._bridge_process.pid), signal.SIGTERM)
                except (ProcessLookupError, PermissionError):
                    self._bridge_process.terminate()
                await asyncio.sleep(1)
                if self._bridge_process.poll() is None:
                    try:
-                        _terminate_bridge_process(self._bridge_process, force=True)
+                        if _IS_WINDOWS:
+                            self._bridge_process.kill()
+                        else:
+                            os.killpg(os.getpgid(self._bridge_process.pid), signal.SIGKILL)
                    except (ProcessLookupError, PermissionError):
                        self._bridge_process.kill()
            except Exception as e:
@@ -840,17 +773,6 @@ class WhatsAppAdapter(BasePlatformAdapter):
        """Send a video natively via bridge — plays inline in WhatsApp."""
        return await self._send_media_to_bridge(chat_id, video_path, "video", caption)

-    async def send_voice(
-        self,
-        chat_id: str,
-        audio_path: str,
-        caption: Optional[str] = None,
-        reply_to: Optional[str] = None,
-        **kwargs,
-    ) -> SendResult:
-        """Send an audio file as a WhatsApp voice message via bridge."""
-        return await self._send_media_to_bridge(chat_id, audio_path, "audio", caption)
-
    async def send_document(
        self,
        chat_id: str,
@@ -1,647 +0,0 @@
-"""
-yuanbao_media.py — 元宝平台媒体处理模块
-
-提供 COS 上传、文件下载、TIM 媒体消息构建等功能。
-移植自 TypeScript 版 media.ts（yuanbao-openclaw-plugin），
-使用 httpx 替代 cos-nodejs-sdk-v5，避免引入额外 SDK 依赖。
-
-COS 上传流程：
-  1. 调用 genUploadInfo 获取临时凭证（tmpSecretId/tmpSecretKey/sessionToken）
-  2. 用临时凭证通过 HMAC-SHA1 签名构建 Authorization 头
-  3. HTTP PUT 上传到 COS
-
-TIM 消息体构建：
-  - buildImageMsgBody() → TIMImageElem
-  - buildFileMsgBody()  → TIMFileElem
-"""
-
-from __future__ import annotations
-
-import hashlib
-import hmac
-import logging
-import os
-import re
-import secrets
-import struct
-import time
-import urllib.parse
-from datetime import datetime, timezone, timedelta
-from typing import Optional, Any
-
-import httpx
-
-logger = logging.getLogger(__name__)
-
-# ============ 常量 ============
-
-UPLOAD_INFO_PATH = "/api/resource/genUploadInfo"
-DEFAULT_API_DOMAIN = "yuanbao.tencent.com"
-DEFAULT_MAX_SIZE_MB = 50
-
-# COS 加速域名后缀（优先使用全球加速）
-COS_USE_ACCELERATE = True
-
-# ============ 类型映射 ============
-
-# MIME → image_format 数字（TIM 协议字段）
-_MIME_TO_IMAGE_FORMAT: dict[str, int] = {
-    "image/jpeg": 1,
-    "image/jpg": 1,
-    "image/gif": 2,
-    "image/png": 3,
-    "image/bmp": 4,
-    "image/webp": 255,
-    "image/heic": 255,
-    "image/tiff": 255,
-}
-
-# 文件扩展名 → MIME
-_EXT_TO_MIME: dict[str, str] = {
-    ".jpg": "image/jpeg",
-    ".jpeg": "image/jpeg",
-    ".png": "image/png",
-    ".gif": "image/gif",
-    ".webp": "image/webp",
-    ".bmp": "image/bmp",
-    ".heic": "image/heic",
-    ".tiff": "image/tiff",
-    ".ico": "image/x-icon",
-    ".pdf": "application/pdf",
-    ".doc": "application/msword",
-    ".docx": "application/vnd.openxmlformats-officedocument.wordprocessingml.document",
-    ".xls": "application/vnd.ms-excel",
-    ".xlsx": "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet",
-    ".ppt": "application/vnd.ms-powerpoint",
-    ".pptx": "application/vnd.openxmlformats-officedocument.presentationml.presentation",
-    ".txt": "text/plain",
-    ".zip": "application/zip",
-    ".tar": "application/x-tar",
-    ".gz": "application/gzip",
-    ".mp3": "audio/mpeg",
-    ".mp4": "video/mp4",
-    ".wav": "audio/wav",
-    ".ogg": "audio/ogg",
-    ".webm": "video/webm",
-}
-
-
-# ============ 工具函数 ============
-
-def guess_mime_type(filename: str) -> str:
-    """根据文件扩展名猜测 MIME 类型。"""
-    ext = os.path.splitext(filename)[-1].lower()
-    return _EXT_TO_MIME.get(ext, "application/octet-stream")
-
-
-def is_image(filename: str, mime_type: str = "") -> bool:
-    """判断是否为图片类型。"""
-    if mime_type.startswith("image/"):
-        return True
-    ext = os.path.splitext(filename)[-1].lower()
-    return ext in {".jpg", ".jpeg", ".png", ".gif", ".webp", ".bmp", ".heic", ".tiff", ".ico"}
-
-
-def get_image_format(mime_type: str) -> int:
-    """获取 TIM 图片格式编号。"""
-    return _MIME_TO_IMAGE_FORMAT.get(mime_type.lower(), 255)
-
-
-def md5_hex(data: bytes) -> str:
-    """计算 MD5 十六进制摘要。"""
-    return hashlib.md5(data).hexdigest()
-
-
-def generate_file_id() -> str:
-    """生成随机文件 ID（32 位 hex）。"""
-    return secrets.token_hex(16)
-
-
-
-# ============ 图片尺寸解析（纯 Python，无需 Pillow） ============
-
-def parse_image_size(data: bytes) -> Optional[dict[str, int]]:
-    """
-    解析图片宽高（支持 JPEG/PNG/GIF/WebP），无需第三方依赖。
-    返回 {"width": w, "height": h} 或 None（无法识别）。
-    """
-    return (
-        _parse_png_size(data)
-        or _parse_jpeg_size(data)
-        or _parse_gif_size(data)
-        or _parse_webp_size(data)
-    )
-
-
-def _parse_png_size(buf: bytes) -> Optional[dict[str, int]]:
-    if len(buf) < 24:
-        return None
-    if buf[:4] != b"\x89PNG":
-        return None
-    w = struct.unpack(">I", buf[16:20])[0]
-    h = struct.unpack(">I", buf[20:24])[0]
-    return {"width": w, "height": h}
-
-
-def _parse_jpeg_size(buf: bytes) -> Optional[dict[str, int]]:
-    if len(buf) < 4 or buf[0] != 0xFF or buf[1] != 0xD8:
-        return None
-    i = 2
-    while i < len(buf) - 9:
-        if buf[i] != 0xFF:
-            i += 1
-            continue
-        marker = buf[i + 1]
-        if marker in (0xC0, 0xC2):
-            h = struct.unpack(">H", buf[i + 5: i + 7])[0]
-            w = struct.unpack(">H", buf[i + 7: i + 9])[0]
-            return {"width": w, "height": h}
-        if i + 3 < len(buf):
-            i += 2 + struct.unpack(">H", buf[i + 2: i + 4])[0]
-        else:
-            break
-    return None
-
-
-def _parse_gif_size(buf: bytes) -> Optional[dict[str, int]]:
-    if len(buf) < 10:
-        return None
-    sig = buf[:6].decode("ascii", errors="replace")
-    if sig not in ("GIF87a", "GIF89a"):
-        return None
-    w = struct.unpack("<H", buf[6:8])[0]
-    h = struct.unpack("<H", buf[8:10])[0]
-    return {"width": w, "height": h}
-
-
-def _parse_webp_size(buf: bytes) -> Optional[dict[str, int]]:
-    if len(buf) < 16:
-        return None
-    if buf[:4] != b"RIFF" or buf[8:12] != b"WEBP":
-        return None
-    chunk = buf[12:16].decode("ascii", errors="replace")
-    if chunk == "VP8 ":
-        if len(buf) >= 30 and buf[23] == 0x9D and buf[24] == 0x01 and buf[25] == 0x2A:
-            w = struct.unpack("<H", buf[26:28])[0] & 0x3FFF
-            h = struct.unpack("<H", buf[28:30])[0] & 0x3FFF
-            return {"width": w, "height": h}
-    elif chunk == "VP8L":
-        if len(buf) >= 25 and buf[20] == 0x2F:
-            bits = struct.unpack("<I", buf[21:25])[0]
-            w = (bits & 0x3FFF) + 1
-            h = ((bits >> 14) & 0x3FFF) + 1
-            return {"width": w, "height": h}
-    elif chunk == "VP8X":
-        if len(buf) >= 30:
-            w = (buf[24] | (buf[25] << 8) | (buf[26] << 16)) + 1
-            h = (buf[27] | (buf[28] << 8) | (buf[29] << 16)) + 1
-            return {"width": w, "height": h}
-    return None
-
-
-# ============ URL 下载 ============
-
-async def download_url(
-    url: str,
-    max_size_mb: int = DEFAULT_MAX_SIZE_MB,
-) -> tuple[bytes, str]:
-    """
-    下载 URL 内容，返回 (bytes, content_type)。
-
-    Args:
-        url:          HTTP(S) URL
-        max_size_mb:  最大允许大小（MB），超过则抛出异常
-
-    Returns:
-        (data_bytes, content_type_string)
-
-    Raises:
-        ValueError:  内容超过大小限制
-        httpx.HTTPError: 网络/HTTP 错误
-    """
-    max_bytes = max_size_mb * 1024 * 1024
-    async with httpx.AsyncClient(timeout=30.0, follow_redirects=True) as client:
-        # 先 HEAD 检查大小
-        try:
-            head = await client.head(url)
-            content_length = int(head.headers.get("content-length", 0) or 0)
-            if content_length > 0 and content_length > max_bytes:
-                raise ValueError(
-                    f"文件过大: {content_length / 1024 / 1024:.1f} MB > {max_size_mb} MB"
-                )
-        except httpx.HTTPStatusError:
-            pass  # 部分服务器不支持 HEAD，忽略
-
-        # GET 下载（流式读取，防止超限）
-        async with client.stream("GET", url) as resp:
-            resp.raise_for_status()
-
-            content_type = resp.headers.get("content-type", "").split(";")[0].strip()
-
-            chunks: list[bytes] = []
-            downloaded = 0
-            async for chunk in resp.aiter_bytes(65536):
-                downloaded += len(chunk)
-                if downloaded > max_bytes:
-                    raise ValueError(
-                        f"文件过大: 已超过 {max_size_mb} MB 限制"
-                    )
-                chunks.append(chunk)
-
-        data = b"".join(chunks)
-        return data, content_type
-
-
-# ============ COS 鉴权（HMAC-SHA1） ============
-
-def _cos_sign(
-    method: str,
-    path: str,
-    params: dict[str, str],
-    headers: dict[str, str],
-    secret_id: str,
-    secret_key: str,
-    start_time: Optional[int] = None,
-    expire_seconds: int = 3600,
-) -> str:
-    """
-    构建 COS 请求签名（q-sign-algorithm=sha1 方案）。
-    参考：https://cloud.tencent.com/document/product/436/7778
-
-    Args:
-        method:         HTTP 方法（小写，如 "put"）
-        path:           URL 路径（URL encode 后的小写）
-        params:         URL 查询参数 dict（用于签名）
-        headers:        参与签名的请求头 dict（key 需小写）
-        secret_id:      临时 SecretId（tmpSecretId）
-        secret_key:     临时 SecretKey（tmpSecretKey）
-        start_time:     签名起始 Unix 时间戳（默认 now）
-        expire_seconds: 签名有效期（秒，默认 3600）
-
-    Returns:
-        Authorization header 值（完整字符串）
-    """
-    now = int(time.time())
-    q_sign_time = f"{start_time or now};{(start_time or now) + expire_seconds}"
-
-    # Step 1: SignKey = HMAC-SHA1(SecretKey, q-sign-time)
-    sign_key = hmac.new(
-        secret_key.encode("utf-8"),
-        q_sign_time.encode("utf-8"),
-        hashlib.sha1,
-    ).hexdigest()
-
-    # Step 2: HttpString
-    # 参数和头部需按字典序排列，key 小写
-    sorted_params = sorted((k.lower(), urllib.parse.quote(str(v), safe="") ) for k, v in params.items())
-    sorted_headers = sorted((k.lower(), urllib.parse.quote(str(v), safe="") ) for k, v in headers.items())
-
-    url_param_list = ";".join(k for k, _ in sorted_params)
-    url_params = "&".join(f"{k}={v}" for k, v in sorted_params)
-    header_list = ";".join(k for k, _ in sorted_headers)
-    header_str = "&".join(f"{k}={v}" for k, v in sorted_headers)
-
-    http_string = "\n".join([
-        method.lower(),
-        path,
-        url_params,
-        header_str,
-        "",
-    ])
-
-    # Step 3: StringToSign = sha1 hash of HttpString
-    sha1_of_http = hashlib.sha1(http_string.encode("utf-8")).hexdigest()
-    string_to_sign = "\n".join([
-        "sha1",
-        q_sign_time,
-        sha1_of_http,
-        "",
-    ])
-
-    # Step 4: Signature = HMAC-SHA1(SignKey, StringToSign)
-    signature = hmac.new(
-        sign_key.encode("utf-8"),
-        string_to_sign.encode("utf-8"),
-        hashlib.sha1,
-    ).hexdigest()
-
-    return (
-        f"q-sign-algorithm=sha1"
-        f"&q-ak={secret_id}"
-        f"&q-sign-time={q_sign_time}"
-        f"&q-key-time={q_sign_time}"
-        f"&q-header-list={header_list}"
-        f"&q-url-param-list={url_param_list}"
-        f"&q-signature={signature}"
-    )
-
-
-# ============ 主要公开 API ============
-
-async def get_cos_credentials(
-    app_key: str,
-    api_domain: str,
-    token: str,
-    filename: str = "file",
-    file_id: Optional[str] = None,
-    bot_id: str = "",
-    route_env: str = "",
-) -> dict:
-    """
-    调用 genUploadInfo 接口获取 COS 临时密钥及上传配置。
-
-    Args:
-        app_key:        应用 Key（用于 X-ID 头）
-        api_domain:     API 域名（如 https://bot.yuanbao.tencent.com）
-        token:          当前有效的签票 token（X-Token 头）
-        filename:       待上传的文件名（含扩展名）
-        file_id:        客户端生成的唯一文件 ID（不传则自动生成）
-        bot_id:         Bot 账号 ID（用于 X-ID 头）
-
-    Returns:
-        COS 上传配置 dict，包含以下字段：
-            bucketName         (str)  — COS Bucket 名称
-            region             (str)  — COS 地域
-            location           (str)  — 上传 Key（对象路径）
-            encryptTmpSecretId (str)  — 临时 SecretId
-            encryptTmpSecretKey(str)  — 临时 SecretKey
-            encryptToken       (str)  — SessionToken
-            startTime          (int)  — 凭证起始时间戳（Unix）
-            expiredTime        (int)  — 凭证过期时间戳（Unix）
-            resourceUrl        (str)  — 上传后的公网访问 URL
-            resourceID         (str)  — 资源 ID（可选）
-
-    Raises:
-        RuntimeError: 接口返回非 0 code 或字段缺失
-    """
-    if file_id is None:
-        file_id = generate_file_id()
-
-    upload_url = f"{api_domain.rstrip('/')}{UPLOAD_INFO_PATH}"
-
-    headers = {
-        "Content-Type": "application/json",
-        "X-Token": token,
-        "X-ID": bot_id or app_key,
-        "X-Source": "web",
-    }
-    if route_env:
-        headers["X-Route-Env"] = route_env
-    body = {
-        "fileName": filename,
-        "fileId": file_id,
-        "docFrom": "localDoc",
-        "docOpenId": "",
-    }
-
-    async with httpx.AsyncClient(timeout=15.0) as client:
-        resp = await client.post(upload_url, json=body, headers=headers)
-        resp.raise_for_status()
-        result: dict[str, Any] = resp.json()
-
-    code = result.get("code")
-    if code != 0 and code is not None:
-        raise RuntimeError(
-            f"genUploadInfo 失败: code={code}, msg={result.get('msg', '')}"
-        )
-
-    data = result.get("data") or result
-    required_fields = ["bucketName", "location"]
-    missing = [f for f in required_fields if not data.get(f)]
-    if missing:
-        raise RuntimeError(
-            f"genUploadInfo 返回字段不完整: 缺少字段 {missing}"
-        )
-
-    return data
-
-
-async def upload_to_cos(
-    file_bytes: bytes,
-    filename: str,
-    content_type: str,
-    credentials: dict,
-    bucket: str,
-    region: str,
-) -> dict:
-    """
-    通过 httpx PUT 请求将文件上传到 COS。
-    使用临时凭证（tmpSecretId/tmpSecretKey/sessionToken）构建 HMAC-SHA1 签名。
-
-    Args:
-        file_bytes:   文件二进制内容
-        filename:     文件名（用于辅助计算 MIME、UUID）
-        content_type: MIME 类型（如 "image/jpeg"）
-        credentials:  get_cos_credentials() 返回的 dict，包含：
-                        encryptTmpSecretId  → tmpSecretId
-                        encryptTmpSecretKey → tmpSecretKey
-                        encryptToken        → sessionToken
-                        location            → COS key（对象路径）
-                        resourceUrl         → 上传后公网 URL
-                        startTime           → 凭证起始时间（Unix）
-                        expiredTime         → 凭证过期时间（Unix）
-        bucket:       COS Bucket 名称（如 chatbot-1234567890）
-        region:       COS 地域（如 ap-guangzhou）
-
-    Returns:
-        上传结果 dict，包含：
-            url       (str)           — COS 公网访问 URL
-            uuid      (str)           — 文件内容 MD5
-            size      (int)           — 文件大小（字节）
-            width     (int, optional) — 图片宽度（仅图片）
-            height    (int, optional) — 图片高度（仅图片）
-
-    Raises:
-        httpx.HTTPStatusError: COS 返回非 2xx 状态
-        RuntimeError:          credentials 字段缺失
-    """
-    secret_id: str = credentials.get("encryptTmpSecretId", "")
-    secret_key: str = credentials.get("encryptTmpSecretKey", "")
-    session_token: str = credentials.get("encryptToken", "")
-    cos_key: str = credentials.get("location", "")
-    resource_url: str = credentials.get("resourceUrl", "")
-    start_time: Optional[int] = credentials.get("startTime")
-    expired_time: Optional[int] = credentials.get("expiredTime")
-
-    if not secret_id or not secret_key or not cos_key:
-        raise RuntimeError(
-            f"COS credentials 不完整: secretId={bool(secret_id)}, "
-            f"secretKey={bool(secret_key)}, location={bool(cos_key)}"
-        )
-
-    # 构建 COS 上传 URL（优先使用全球加速域名）
-    if COS_USE_ACCELERATE:
-        cos_host = f"{bucket}.cos.accelerate.myqcloud.com"
-    else:
-        cos_host = f"{bucket}.cos.{region}.myqcloud.com"
-
-    # URL encode cos_key（保留 /）
-    encoded_key = urllib.parse.quote(cos_key, safe="/")
-    cos_url = f"https://{cos_host}/{encoded_key.lstrip('/')}"
-
-    # 确定 Content-Type
-    if not content_type or content_type == "application/octet-stream":
-        if is_image(filename):
-            content_type = guess_mime_type(filename)
-        else:
-            content_type = "application/octet-stream"
-
-    # 计算文件 MD5 + size
-    file_uuid = md5_hex(file_bytes)
-    file_size = len(file_bytes)
-
-    # 参与签名的请求头
-    sign_headers = {
-        "host": cos_host,
-        "content-type": content_type,
-        "x-cos-security-token": session_token,
-    }
-
-    # 计算签名有效期
-    now = int(time.time())
-    sign_start = start_time if start_time else now
-    sign_expire = (expired_time - now) if expired_time and expired_time > now else 3600
-
-    authorization = _cos_sign(
-        method="put",
-        path=f"/{encoded_key.lstrip('/')}",
-        params={},
-        headers=sign_headers,
-        secret_id=secret_id,
-        secret_key=secret_key,
-        start_time=sign_start,
-        expire_seconds=sign_expire,
-    )
-
-    put_headers = {
-        "Authorization": authorization,
-        "Content-Type": content_type,
-        "x-cos-security-token": session_token,
-    }
-
-    logger.info(
-        "COS PUT: bucket=%s region=%s key=%s size=%d mime=%s",
-        bucket, region, cos_key, file_size, content_type,
-    )
-
-    async with httpx.AsyncClient(timeout=120.0) as client:
-        resp = await client.put(
-            cos_url,
-            content=file_bytes,
-            headers=put_headers,
-        )
-        resp.raise_for_status()
-
-    # 解析图片尺寸（仅图片类型）
-    result: dict[str, Any] = {
-        "url": resource_url or cos_url,
-        "uuid": file_uuid,
-        "size": file_size,
-    }
-
-    if content_type.startswith("image/"):
-        size_info = parse_image_size(file_bytes)
-        if size_info:
-            result["width"] = size_info["width"]
-            result["height"] = size_info["height"]
-
-    logger.info(
-        "COS 上传成功: url=%s size=%d",
-        result["url"], file_size,
-    )
-    return result
-
-
-# ============ TIM 媒体消息构建 ============
-
-def build_image_msg_body(
-    url: str,
-    uuid: Optional[str] = None,
-    filename: Optional[str] = None,
-    size: int = 0,
-    width: int = 0,
-    height: int = 0,
-    mime_type: str = "",
-) -> list[dict]:
-    """
-    构建腾讯 IM TIMImageElem 消息体。
-    参考：https://cloud.tencent.com/document/product/269/2720
-
-    Args:
-        url:       图片公网访问 URL（COS resourceUrl）
-        uuid:      文件 UUID（MD5 或其他唯一标识）
-        filename:  文件名（uuid 为空时作为备用）
-        size:      文件大小（字节）
-        width:     图片宽度（像素）
-        height:    图片高度（像素）
-        mime_type: MIME 类型（用于确定 image_format）
-
-    Returns:
-        TIMImageElem 消息体列表（适合直接放入 msg_body）
-    """
-    _uuid = uuid or filename or _basename_from_url(url) or "image"
-    image_format = get_image_format(mime_type) if mime_type else 255
-
-    return [
-        {
-            "msg_type": "TIMImageElem",
-            "msg_content": {
-                "uuid": _uuid,
-                "image_format": image_format,
-                "image_info_array": [
-                    {
-                        "type": 1,       # 1 = 原图
-                        "size": size,
-                        "width": width,
-                        "height": height,
-                        "url": url,
-                    }
-                ],
-            },
-        }
-    ]
-
-
-def build_file_msg_body(
-    url: str,
-    filename: str,
-    uuid: Optional[str] = None,
-    size: int = 0,
-) -> list[dict]:
-    """
-    构建腾讯 IM TIMFileElem 消息体。
-    参考：https://cloud.tencent.com/document/product/269/2720
-
-    Args:
-        url:      文件公网访问 URL（COS resourceUrl）
-        filename: 文件名（含扩展名）
-        uuid:     文件 UUID（MD5 或其他唯一标识，不传则使用 filename）
-        size:     文件大小（字节）
-
-    Returns:
-        TIMFileElem 消息体列表（适合直接放入 msg_body）
-    """
-    _uuid = uuid or filename
-
-    return [
-        {
-            "msg_type": "TIMFileElem",
-            "msg_content": {
-                "uuid": _uuid,
-                "file_name": filename,
-                "file_size": size,
-                "url": url,
-            },
-        }
-    ]
-
-
-# ============ 内部工具 ============
-
-def _basename_from_url(url: str) -> str:
-    """从 URL 提取文件名。"""
-    try:
-        parsed = urllib.parse.urlparse(url)
-        return os.path.basename(parsed.path)
-    except Exception:
-        return ""
@@ -1,558 +0,0 @@
-"""
-Yuanbao sticker (TIMFaceElem) support.
-
-Ported from yuanbao-openclaw-plugin/src/sticker/.
-
-TIMFaceElem wire format:
-    {
-        "msg_type": "TIMFaceElem",
-        "msg_content": {
-            "index": 0,          # always 0 per Yuanbao convention
-            "data": "<json>",    # serialised sticker metadata
-        }
-    }
-
-The `data` field carries a JSON string with the sticker's metadata so the
-receiver can look up the correct asset in the emoji pack.
-"""
-
-from __future__ import annotations
-
-import json
-import random
-import re
-import unicodedata
-from typing import Optional
-
-# ---------------------------------------------------------------------------
-# Sticker catalogue – ported from builtin-stickers.json
-# Key   : canonical name (Chinese)
-# Value : {sticker_id, package_id, name, description, width, height, formats}
-# ---------------------------------------------------------------------------
-STICKER_MAP: dict[str, dict] = {
-    "六六六": {
-        "sticker_id": "278", "package_id": "1003", "name": "六六六",
-        "description": "666 厉害 牛 棒 绝了 好强 awesome",
-        "width": 128, "height": 128, "formats": "png",
-    },
-    "我想开了": {
-        "sticker_id": "262", "package_id": "1003", "name": "我想开了",
-        "description": "想开 佛系 释怀 顿悟 看淡了 无所谓",
-        "width": 128, "height": 128, "formats": "png",
-    },
-    "害羞": {
-        "sticker_id": "130", "package_id": "1003", "name": "害羞",
-        "description": "腼腆 不好意思 脸红 娇羞 羞涩 捂脸",
-        "width": 128, "height": 128, "formats": "png",
-    },
-    "比心": {
-        "sticker_id": "252", "package_id": "1003", "name": "比心",
-        "description": "笔芯 爱你 爱心手势 love heart 喜欢你",
-        "width": 128, "height": 128, "formats": "png",
-    },
-    "委屈": {
-        "sticker_id": "125", "package_id": "1003", "name": "委屈",
-        "description": "难过 想哭 可怜巴巴 瘪嘴 受伤 被欺负",
-        "width": 128, "height": 128, "formats": "png",
-    },
-    "亲亲": {
-        "sticker_id": "146", "package_id": "1003", "name": "亲亲",
-        "description": "么么 mua 亲一下 kiss 飞吻 啵",
-        "width": 128, "height": 128, "formats": "png",
-    },
-    "酷": {
-        "sticker_id": "131", "package_id": "1003", "name": "酷",
-        "description": "帅 墨镜 cool 高冷 有型 swagger",
-        "width": 128, "height": 128, "formats": "png",
-    },
-    "睡": {
-        "sticker_id": "145", "package_id": "1003", "name": "睡",
-        "description": "睡觉 困 zzZ 打盹 躺平 休眠 sleepy",
-        "width": 128, "height": 128, "formats": "png",
-    },
-    "发呆": {
-        "sticker_id": "152", "package_id": "1003", "name": "发呆",
-        "description": "懵 愣住 放空 呆滞 出神 脑子空白",
-        "width": 128, "height": 128, "formats": "png",
-    },
-    "可怜": {
-        "sticker_id": "157", "package_id": "1003", "name": "可怜",
-        "description": "卖萌 求饶 委屈巴巴 弱小 拜托 眼巴巴",
-        "width": 128, "height": 128, "formats": "png",
-    },
-    "摊手": {
-        "sticker_id": "200", "package_id": "1003", "name": "摊手",
-        "description": "无奈 没办法 耸肩 随便 那咋整 whatever",
-        "width": 128, "height": 128, "formats": "png",
-    },
-    "头大": {
-        "sticker_id": "213", "package_id": "1003", "name": "头大",
-        "description": "头疼 烦恼 郁闷 难搞 崩溃 一团乱",
-        "width": 128, "height": 128, "formats": "png",
-    },
-    "吓": {
-        "sticker_id": "256", "package_id": "1003", "name": "吓",
-        "description": "害怕 惊恐 震惊 吓一跳 恐怖 怂",
-        "width": 128, "height": 128, "formats": "png",
-    },
-    "吐血": {
-        "sticker_id": "203", "package_id": "1003", "name": "吐血",
-        "description": "无语 崩溃 被雷 内伤 一口老血 屮",
-        "width": 128, "height": 128, "formats": "png",
-    },
-    "哼": {
-        "sticker_id": "185", "package_id": "1003", "name": "哼",
-        "description": "傲娇 生气 不满 撇嘴 不理 赌气",
-        "width": 128, "height": 128, "formats": "png",
-    },
-    "嘿嘿": {
-        "sticker_id": "220", "package_id": "1003", "name": "嘿嘿",
-        "description": "坏笑 猥琐笑 偷笑 憨笑 得意 你懂的",
-        "width": 128, "height": 128, "formats": "png",
-    },
-    "头秃": {
-        "sticker_id": "218", "package_id": "1003", "name": "头秃",
-        "description": "程序员 加班 焦虑 没头发 秃了 肝爆",
-        "width": 128, "height": 128, "formats": "png",
-    },
-    "暗中观察": {
-        "sticker_id": "221", "package_id": "1003", "name": "暗中观察",
-        "description": "窥屏 潜水 偷偷看 角落 围观 屏住呼吸",
-        "width": 128, "height": 128, "formats": "png",
-    },
-    "我酸了": {
-        "sticker_id": "224", "package_id": "1003", "name": "我酸了",
-        "description": "嫉妒 柠檬精 羡慕 吃柠檬 眼红 恰柠檬",
-        "width": 128, "height": 128, "formats": "png",
-    },
-    "打call": {
-        "sticker_id": "246", "package_id": "1003", "name": "打call",
-        "description": "应援 加油 支持 喝彩 助威 call",
-        "width": 128, "height": 128, "formats": "png",
-    },
-    "庆祝": {
-        "sticker_id": "251", "package_id": "1003", "name": "庆祝",
-        "description": "祝贺 开心 耶 party 胜利 干杯",
-        "width": 128, "height": 128, "formats": "png",
-    },
-    "奋斗": {
-        "sticker_id": "151", "package_id": "1003", "name": "奋斗",
-        "description": "努力 加油 拼搏 冲 干劲 卷起来",
-        "width": 128, "height": 128, "formats": "png",
-    },
-    "惊讶": {
-        "sticker_id": "143", "package_id": "1003", "name": "惊讶",
-        "description": "震惊 哇 不敢相信 OMG 居然 这么离谱",
-        "width": 128, "height": 128, "formats": "png",
-    },
-    "疑问": {
-        "sticker_id": "144", "package_id": "1003", "name": "疑问",
-        "description": "问号 不懂 啥 为什么 啥情况 懵逼问",
-        "width": 128, "height": 128, "formats": "png",
-    },
-    "仔细分析": {
-        "sticker_id": "248", "package_id": "1003", "name": "仔细分析",
-        "description": "思考 推敲 认真 研究 琢磨 让我想想",
-        "width": 128, "height": 128, "formats": "png",
-    },
-    "撅嘴": {
-        "sticker_id": "184", "package_id": "1003", "name": "撅嘴",
-        "description": "嘟嘴 卖萌 不高兴 撒娇 嘴翘",
-        "width": 128, "height": 128, "formats": "png",
-    },
-    "泪奔": {
-        "sticker_id": "199", "package_id": "1003", "name": "泪奔",
-        "description": "大哭 伤心 破防 感动哭 泪流满面 呜呜",
-        "width": 128, "height": 128, "formats": "png",
-    },
-    "尊嘟假嘟": {
-        "sticker_id": "276", "package_id": "1003", "name": "尊嘟假嘟",
-        "description": "真的假的 真假 可爱问 你骗我 是不是",
-        "width": 128, "height": 128, "formats": "png",
-    },
-    "略略略": {
-        "sticker_id": "113", "package_id": "1003", "name": "略略略",
-        "description": "调皮 吐舌 不服 略 气死你 鬼脸",
-        "width": 128, "height": 128, "formats": "png",
-    },
-    "困": {
-        "sticker_id": "180", "package_id": "1003", "name": "困",
-        "description": "想睡 倦 打哈欠 睁不开眼 好困啊 sleepy",
-        "width": 128, "height": 128, "formats": "png",
-    },
-    "折磨": {
-        "sticker_id": "181", "package_id": "1003", "name": "折磨",
-        "description": "难受 痛苦 煎熬 蚌埠住了 受不了 要命",
-        "width": 128, "height": 128, "formats": "png",
-    },
-    "抠鼻": {
-        "sticker_id": "182", "package_id": "1003", "name": "抠鼻",
-        "description": "不屑 无聊 淡定 无所谓 鄙视 挖鼻",
-        "width": 128, "height": 128, "formats": "png",
-    },
-    "鼓掌": {
-        "sticker_id": "183", "package_id": "1003", "name": "鼓掌",
-        "description": "拍手 叫好 赞同 666 喝彩 掌声",
-        "width": 128, "height": 128, "formats": "png",
-    },
-    "斜眼笑": {
-        "sticker_id": "204", "package_id": "1003", "name": "斜眼笑",
-        "description": "滑稽 坏笑 doge 意味深长 阴阳怪气 嘿嘿嘿",
-        "width": 128, "height": 128, "formats": "png",
-    },
-    "辣眼睛": {
-        "sticker_id": "216", "package_id": "1003", "name": "辣眼睛",
-        "description": "看不下去 cringe 毁三观 太丑了 瞎了",
-        "width": 128, "height": 128, "formats": "png",
-    },
-    "哦哟": {
-        "sticker_id": "217", "package_id": "1003", "name": "哦哟",
-        "description": "惊讶 起哄 哇哦 有戏 不简单 哟",
-        "width": 128, "height": 128, "formats": "png",
-    },
-    "吃瓜": {
-        "sticker_id": "222", "package_id": "1003", "name": "吃瓜",
-        "description": "围观 看戏 八卦 路人 看热闹 板凳",
-        "width": 128, "height": 128, "formats": "png",
-    },
-    "狗头": {
-        "sticker_id": "225", "package_id": "1003", "name": "狗头",
-        "description": "doge 保命 开玩笑 滑稽 反讽 懂的都懂",
-        "width": 128, "height": 128, "formats": "png",
-    },
-    "敬礼": {
-        "sticker_id": "227", "package_id": "1003", "name": "敬礼",
-        "description": "salute 尊重 收到 遵命 致敬 报告",
-        "width": 128, "height": 128, "formats": "png",
-    },
-    "哦": {
-        "sticker_id": "231", "package_id": "1003", "name": "哦",
-        "description": "知道了 明白 敷衍 嗯 这样啊 收到",
-        "width": 128, "height": 128, "formats": "png",
-    },
-    "拿到红包": {
-        "sticker_id": "236", "package_id": "1003", "name": "拿到红包",
-        "description": "红包 谢谢老板 发财 开心 抢到了 欧气",
-        "width": 128, "height": 128, "formats": "png",
-    },
-    "牛吖": {
-        "sticker_id": "239", "package_id": "1003", "name": "牛吖",
-        "description": "牛 厉害 强 666 佩服 大佬",
-        "width": 128, "height": 128, "formats": "png",
-    },
-    "贴贴": {
-        "sticker_id": "272", "package_id": "1003", "name": "贴贴",
-        "description": "抱抱 亲昵 蹭蹭 亲密 靠靠 撒娇贴",
-        "width": 128, "height": 128, "formats": "png",
-    },
-    "爱心": {
-        "sticker_id": "138", "package_id": "1003", "name": "爱心",
-        "description": "心 love 喜欢你 红心 示爱 么么哒",
-        "width": 128, "height": 128, "formats": "png",
-    },
-    "晚安": {
-        "sticker_id": "170", "package_id": "1003", "name": "晚安",
-        "description": "好梦 睡了 night 早点休息 安啦 moon",
-        "width": 128, "height": 128, "formats": "png",
-    },
-    "太阳": {
-        "sticker_id": "176", "package_id": "1003", "name": "太阳",
-        "description": "晴天 早上好 阳光 morning 好天气 日",
-        "width": 128, "height": 128, "formats": "png",
-    },
-    "柠檬": {
-        "sticker_id": "266", "package_id": "1003", "name": "柠檬",
-        "description": "酸 嫉妒 柠檬精 羡慕 我酸 恰柠檬",
-        "width": 128, "height": 128, "formats": "png",
-    },
-    "大冤种": {
-        "sticker_id": "267", "package_id": "1003", "name": "大冤种",
-        "description": "倒霉 吃亏 自嘲 好心没好报 背锅 工具人",
-        "width": 128, "height": 128, "formats": "png",
-    },
-    "吐了": {
-        "sticker_id": "132", "package_id": "1003", "name": "吐了",
-        "description": "恶心 yue 受不了 嫌弃 想吐 生理不适",
-        "width": 128, "height": 128, "formats": "png",
-    },
-    "怒": {
-        "sticker_id": "134", "package_id": "1003", "name": "怒",
-        "description": "生气 愤怒 火大 暴躁 气炸 怼",
-        "width": 128, "height": 128, "formats": "png",
-    },
-    "玫瑰": {
-        "sticker_id": "165", "package_id": "1003", "name": "玫瑰",
-        "description": "花 示爱 表白 浪漫 送你花 情人节",
-        "width": 128, "height": 128, "formats": "png",
-    },
-    "凋谢": {
-        "sticker_id": "119", "package_id": "1003", "name": "凋谢",
-        "description": "花谢 失恋 难过 枯萎 心碎 凉了",
-        "width": 128, "height": 128, "formats": "png",
-    },
-    "点赞": {
-        "sticker_id": "159", "package_id": "1003", "name": "点赞",
-        "description": "赞 认同 好棒 good like 大拇指 顶",
-        "width": 128, "height": 128, "formats": "png",
-    },
-    "握手": {
-        "sticker_id": "164", "package_id": "1003", "name": "握手",
-        "description": "合作 你好 商务 hello deal 成交 友好",
-        "width": 128, "height": 128, "formats": "png",
-    },
-    "抱拳": {
-        "sticker_id": "163", "package_id": "1003", "name": "抱拳",
-        "description": "谢谢 失敬 江湖 承让 拜托 有礼",
-        "width": 128, "height": 128, "formats": "png",
-    },
-    "ok": {
-        "sticker_id": "169", "package_id": "1003", "name": "ok",
-        "description": "好的 收到 没问题 okay 行 可以 懂了",
-        "width": 128, "height": 128, "formats": "png",
-    },
-    "拳头": {
-        "sticker_id": "174", "package_id": "1003", "name": "拳头",
-        "description": "加油 干 冲 fight 力量 击拳 硬气",
-        "width": 128, "height": 128, "formats": "png",
-    },
-    "鞭炮": {
-        "sticker_id": "191", "package_id": "1003", "name": "鞭炮",
-        "description": "过年 喜庆 爆竹 春节 噼里啪啦 红",
-        "width": 128, "height": 128, "formats": "png",
-    },
-    "烟花": {
-        "sticker_id": "258", "package_id": "1003", "name": "烟花",
-        "description": "庆典 漂亮 新年 嘭 绽放 节日快乐",
-        "width": 128, "height": 128, "formats": "png",
-    },
-}
-
-
-def get_sticker_by_name(name: str) -> Optional[dict]:
-    """
-    按名称查找贴纸，支持模糊匹配。
-
-    匹配优先级：
-      1. 完全相等（name）
-      2. name 包含查询词（前缀/子串）
-      3. description 包含查询词（同义词搜索）
-      4. 通用模糊评分（与 sticker-search 同算法），命中即返回得分最高的一条
-
-    返回 sticker dict，找不到返回 None。
-    """
-    if not name:
-        return None
-
-    query = name.strip()
-
-    if query in STICKER_MAP:
-        return STICKER_MAP[query]
-
-    for key, sticker in STICKER_MAP.items():
-        if query in key or key in query:
-            return sticker
-
-    for sticker in STICKER_MAP.values():
-        desc = sticker.get("description", "")
-        if query in desc:
-            return sticker
-
-    matches = search_stickers(query, limit=1)
-    return matches[0] if matches else None
-
-
-def get_random_sticker(category: str = None) -> dict:
-    """
-    随机返回一个贴纸。
-
-    若指定 category，则在 description 中含有该关键词的贴纸里随机选取；
-    category 为 None 时从全表随机。
-    """
-    if category:
-        candidates = [
-            s for s in STICKER_MAP.values()
-            if category in s.get("description", "") or category in s.get("name", "")
-        ]
-        if candidates:
-            return random.choice(candidates)
-    return random.choice(list(STICKER_MAP.values()))
-
-
-def get_sticker_by_id(sticker_id: str) -> Optional[dict]:
-    """按 sticker_id 精确查找贴纸。"""
-    if not sticker_id:
-        return None
-    sid = str(sticker_id).strip()
-    for sticker in STICKER_MAP.values():
-        if sticker.get("sticker_id") == sid:
-            return sticker
-    return None
-
-
-# ---------------------------------------------------------------------------
-# 模糊搜索（对齐 chatbot-web yuanbao-openclaw-plugin/sticker-cache.ts.searchStickers）
-# ---------------------------------------------------------------------------
-
-_PUNCT_RE = re.compile(r"[\s\u3000\-_·.,，。!！?？\"“”'‘’、/\\]+")
-
-
-def _normalize_text(raw: str) -> str:
-    return unicodedata.normalize("NFKC", str(raw or "")).strip().lower()
-
-
-def _compact_text(raw: str) -> str:
-    return _PUNCT_RE.sub("", _normalize_text(raw))
-
-
-def _multiset_char_hit_ratio(needle: str, haystack: str) -> float:
-    if not needle:
-        return 0.0
-    bag: dict[str, int] = {}
-    for ch in haystack:
-        bag[ch] = bag.get(ch, 0) + 1
-    hits = 0
-    for ch in needle:
-        n = bag.get(ch, 0)
-        if n > 0:
-            hits += 1
-            bag[ch] = n - 1
-    return hits / len(needle)
-
-
-def _bigram_jaccard(a: str, b: str) -> float:
-    if len(a) < 2 or len(b) < 2:
-        return 0.0
-    A = {a[i:i + 2] for i in range(len(a) - 1)}
-    B = {b[i:i + 2] for i in range(len(b) - 1)}
-    inter = len(A & B)
-    union = len(A) + len(B) - inter
-    return inter / union if union else 0.0
-
-
-def _longest_subsequence_ratio(needle: str, haystack: str) -> float:
-    if not needle:
-        return 0.0
-    j = 0
-    for ch in haystack:
-        if j >= len(needle):
-            break
-        if ch == needle[j]:
-            j += 1
-    return j / len(needle)
-
-
-def _score_field(haystack: str, query: str) -> float:
-    hay = _normalize_text(haystack)
-    q = _normalize_text(query)
-    if not hay or not q:
-        return 0.0
-    hay_c = _compact_text(haystack)
-    q_c = _compact_text(query)
-    best = 0.0
-    if hay == q:
-        best = max(best, 100.0)
-    if q in hay:
-        best = max(best, 92 + min(6, len(q)))
-    if len(q) >= 2 and hay.startswith(q):
-        best = max(best, 88.0)
-    if q_c and q_c in hay_c:
-        best = max(best, 86.0)
-    best = max(best, _multiset_char_hit_ratio(q_c, hay_c) * 62)
-    best = max(best, _bigram_jaccard(q_c, hay_c) * 58)
-    best = max(best, _longest_subsequence_ratio(q_c, hay_c) * 52)
-    if len(q) == 1 and q in hay:
-        best = max(best, 68.0)
-    return best
-
-
-def search_stickers(query: str, limit: int = 10) -> list[dict]:
-    """
-    在内置贴纸表中按模糊匹配排序返回前 N 条结果。
-
-    评分综合 name/description 字段的子串、字符多重集覆盖、bigram Jaccard、子序列比例。
-    name 权重略高于 description（×0.88）。空 query 时按字典顺序返回前 N 条。
-    """
-    safe_limit = max(1, min(500, int(limit) if limit else 10))
-    if not query or not _normalize_text(query):
-        return list(STICKER_MAP.values())[:safe_limit]
-
-    scored: list[tuple[float, dict]] = []
-    for sticker in STICKER_MAP.values():
-        name_s = _score_field(sticker.get("name", ""), query)
-        desc_s = _score_field(sticker.get("description", ""), query) * 0.88
-        sid = str(sticker.get("sticker_id", "")).strip()
-        q_norm = _normalize_text(query)
-        id_s = 0.0
-        if sid and q_norm:
-            sid_norm = _normalize_text(sid)
-            if sid_norm == q_norm:
-                id_s = 100.0
-            elif q_norm in sid_norm:
-                id_s = 84.0
-        scored.append((max(name_s, desc_s, id_s), sticker))
-
-    scored.sort(key=lambda x: x[0], reverse=True)
-    top = scored[0][0] if scored else 0
-    if top <= 0:
-        return [s for _, s in scored[:safe_limit]]
-
-    if top >= 22:
-        floor = 18.0
-    elif top >= 12:
-        floor = max(10.0, top * 0.5)
-    else:
-        floor = max(6.0, top * 0.35)
-
-    filtered = [pair for pair in scored if pair[0] >= floor]
-    out = filtered if filtered else scored
-    return [s for _, s in out[:safe_limit]]
-
-
-def build_face_msg_body(
-    face_index: int,
-    face_type: int = 1,
-    data: Optional[str] = None,
-) -> list:
-    """
-    构造 TIMFaceElem 消息体。
-
-    Yuanbao 约定：
-      - index 固定传 0（服务端通过 data 字段识别具体表情）
-      - data 为 JSON 字符串，包含 sticker_id / package_id 等字段
-
-    Args:
-        face_index: 保留字段，暂时不影响 wire format（Yuanbao 固定 index=0）。
-                    当 face_index > 0 时视为旧版 QQ 表情 ID，直接放入 index。
-        face_type:  保留字段（兼容旧接口，当前未使用）。
-        data:       已序列化的 JSON 字符串；为 None 时仅传 index。
-
-    Returns:
-        符合 Yuanbao TIM 协议的 msg_body list，如::
-
-            [{"msg_type": "TIMFaceElem", "msg_content": {"index": 0, "data": "..."}}]
-    """
-    msg_content: dict = {"index": face_index}
-    if data is not None:
-        msg_content["data"] = data
-    return [{"msg_type": "TIMFaceElem", "msg_content": msg_content}]
-
-
-def build_sticker_msg_body(sticker: dict) -> list:
-    """
-    从 STICKER_MAP 中的 sticker dict 直接构造 TIMFaceElem 消息体。
-
-    这是 send_sticker() 的内部辅助，确保 data 字段与原始 JS 插件一致。
-    """
-    data_payload = json.dumps(
-        {
-            "sticker_id": sticker["sticker_id"],
-            "package_id": sticker["package_id"],
-            "width": sticker.get("width", 128),
-            "height": sticker.get("height", 128),
-            "formats": sticker.get("formats", "png"),
-            "name": sticker["name"],
-        },
-        ensure_ascii=False,
-        separators=(",", ":"),
-    )
-    return build_face_msg_body(face_index=0, data=data_payload)
@@ -60,10 +60,6 @@ from .config import (
    SessionResetPolicy,  # noqa: F401 — re-exported via gateway/__init__.py
    HomeChannel,
 )
-from .whatsapp_identity import (
-    canonical_whatsapp_identifier,
-    normalize_whatsapp_identifier,
-)


@dataclass
@@ -84,12 +80,9 @@ class SessionSource:
    user_name: Optional[str] = None
    thread_id: Optional[str] = None  # For forum topics, Discord threads, etc.
    chat_topic: Optional[str] = None  # Channel topic/description (Discord, Slack)
-    user_id_alt: Optional[str] = None  # Platform-specific stable alt ID (Signal UUID, Feishu union_id)
+    user_id_alt: Optional[str] = None  # Signal UUID (alternative to phone number)
    chat_id_alt: Optional[str] = None  # Signal group internal ID
    is_bot: bool = False  # True when the message author is a bot/webhook (Discord)
-    guild_id: Optional[str] = None  # Discord guild / Slack workspace / Matrix server scope
-    parent_chat_id: Optional[str] = None  # Parent channel when chat_id refers to a thread
-    message_id: Optional[str] = None  # ID of the triggering message (for pin/reply/react)
    
    @property
    def description(self) -> str:
@@ -127,14 +120,8 @@ class SessionSource:
            d["user_id_alt"] = self.user_id_alt
        if self.chat_id_alt:
            d["chat_id_alt"] = self.chat_id_alt
-        if self.guild_id:
-            d["guild_id"] = self.guild_id
-        if self.parent_chat_id:
-            d["parent_chat_id"] = self.parent_chat_id
-        if self.message_id:
-            d["message_id"] = self.message_id
        return d
-
+    
    @classmethod
    def from_dict(cls, data: Dict[str, Any]) -> "SessionSource":
        return cls(
@@ -148,9 +135,6 @@ class SessionSource:
            chat_topic=data.get("chat_topic"),
            user_id_alt=data.get("user_id_alt"),
            chat_id_alt=data.get("chat_id_alt"),
-            guild_id=data.get("guild_id"),
-            parent_chat_id=data.get("parent_chat_id"),
-            message_id=data.get("message_id"),
        )
    

@@ -168,7 +152,6 @@ class SessionContext:
    source: SessionSource
    connected_platforms: List[Platform]
    home_channels: Dict[Platform, HomeChannel]
-    shared_multi_user_session: bool = False
    
    # Session metadata
    session_key: str = ""
@@ -183,7 +166,6 @@ class SessionContext:
            "home_channels": {
                p.value: hc.to_dict() for p, hc in self.home_channels.items()
            },
-            "shared_multi_user_session": self.shared_multi_user_session,
            "session_key": self.session_key,
            "session_id": self.session_id,
            "created_at": self.created_at.isoformat() if self.created_at else None,
@@ -202,31 +184,6 @@ that requires raw IDs).  Discord is excluded because mentions use ``<@user_id>``
 and the LLM needs the real ID to tag users."""


-def _discord_tools_loaded() -> bool:
-    """True iff the agent will actually have Discord tools this session.
-
-    Two conditions must hold:
-      1. The `discord` or `discord_admin` toolset is enabled for the
-         Discord platform via `hermes tools` (opt-in, default OFF).
-      2. `DISCORD_BOT_TOKEN` is set — the tool's `check_fn` gates on it
-         at registry time, so the toolset being enabled in config is not
-         enough if the token isn't configured.
-
-    Returns False (safe default — keeps the stale-API disclaimer) on any
-    error so a bad config can't silently promise tools the agent lacks.
-    """
-    if not (os.environ.get("DISCORD_BOT_TOKEN") or "").strip():
-        return False
-    try:
-        from hermes_cli.config import load_config
-        from hermes_cli.tools_config import _get_platform_tools
-        cfg = load_config()
-        enabled = _get_platform_tools(cfg, "discord", include_default_mcp_servers=False)
-        return "discord" in enabled or "discord_admin" in enabled
-    except Exception:
-        return False
-
-
 def build_session_context_prompt(
    context: SessionContext,
    *,
@@ -283,16 +240,18 @@ def build_session_context_prompt(
        lines.append(f"**Channel Topic:** {context.source.chat_topic}")

    # User identity.
-    # In shared multi-user sessions (shared threads OR shared non-thread groups
-    # when group_sessions_per_user=False), multiple users contribute to the same
-    # conversation.  Don't pin a single user name in the system prompt — it
-    # changes per-turn and would bust the prompt cache.  Instead, note that
-    # this is a multi-user session; individual sender names are prefixed on
-    # each user message by the gateway.
-    if context.shared_multi_user_session:
-        session_label = "Multi-user thread" if context.source.thread_id else "Multi-user session"
+    # In shared thread sessions (non-DM with thread_id), multiple users
+    # contribute to the same conversation.  Don't pin a single user name
+    # in the system prompt — it changes per-turn and would bust the prompt
+    # cache.  Instead, note that this is a multi-user thread; individual
+    # sender names are prefixed on each user message by the gateway.
+    _is_shared_thread = (
+        context.source.chat_type != "dm"
+        and context.source.thread_id
+    )
+    if _is_shared_thread:
        lines.append(
-            f"**Session type:** {session_label} — messages are prefixed "
+            "**Session type:** Multi-user thread — messages are prefixed "
            "with [sender name]. Multiple users may participate."
        )
    elif context.source.user_name:
@@ -310,57 +269,17 @@ def build_session_context_prompt(
            "**Platform notes:** You are running inside Slack. "
            "You do NOT have access to Slack-specific APIs — you cannot search "
            "channel history, pin/unpin messages, manage channels, or list users. "
-            "Do not promise to perform these actions. The gateway may inline the "
-            "current message's Slack block/attachment payload when available, but "
-            "you still cannot call Slack APIs yourself."
+            "Do not promise to perform these actions. If the user asks, explain "
+            "that you can only read messages sent directly to you and respond."
        )
    elif context.source.platform == Platform.DISCORD:
-        # Inject the Discord IDs block only when the agent actually has
-        # Discord tools loaded this session — i.e. the user opted into
-        # `discord` / `discord_admin` via `hermes tools` AND the bot
-        # token is configured.  Otherwise keep the stale-API disclaimer
-        # honest so we never promise tools the agent lacks.
-        if _discord_tools_loaded():
-            src = context.source
-            id_lines = ["", "**Discord IDs (for the `discord` / `discord_admin` tools):**"]
-            if src.guild_id:
-                id_lines.append(f"  - Guild: `{src.guild_id}`")
-            if src.thread_id and src.parent_chat_id:
-                id_lines.append(f"  - Parent channel: `{src.parent_chat_id}`")
-                id_lines.append(f"  - Thread: `{src.thread_id}` (use as `channel_id` for fetch_messages etc.)")
-            else:
-                id_lines.append(f"  - Channel: `{src.chat_id}`")
-            if src.message_id:
-                id_lines.append(f"  - Triggering message: `{src.message_id}`")
-            lines.extend(id_lines)
-        else:
-            lines.append("")
-            lines.append(
-                "**Platform notes:** You are running inside Discord. "
-                "You do NOT have access to Discord-specific APIs — you cannot search "
-                "channel history, pin messages, manage roles, or list server members. "
-                "Do not promise to perform these actions. If the user asks, explain "
-                "that you can only read messages sent directly to you and respond."
-            )
-    elif context.source.platform == Platform.BLUEBUBBLES:
        lines.append("")
        lines.append(
-            "**Platform notes:** You are responding via iMessage. "
-            "Keep responses short and conversational — think texts, not essays. "
-            "Structure longer replies as separate short thoughts, each separated "
-            "by a blank line (double newline). Each block between blank lines "
-            "will be delivered as its own iMessage bubble, so write accordingly: "
-            "one idea per bubble, 1–3 sentences each. "
-            "If the user needs a detailed answer, give the short version first "
-            "and offer to elaborate."
-        )
-    elif context.source.platform == Platform.YUANBAO:
-        lines.append("")
-        lines.append(
-            "**Platform notes:** You are running inside Yuanbao. "
-            "You CAN send private (DM) messages via the send_message tool. "
-            "Use target='yuanbao:direct:<account_id>' for DM "
-            "and target='yuanbao:group:<group_code>' for group chat."
+            "**Platform notes:** You are running inside Discord. "
+            "You do NOT have access to Discord-specific APIs — you cannot search "
+            "channel history, pin messages, manage roles, or list server members. "
+            "Do not promise to perform these actions. If the user asks, explain "
+            "that you can only read messages sent directly to you and respond."
        )

    # Connected platforms
@@ -448,11 +367,11 @@ class SessionEntry:
    auto_reset_reason: Optional[str] = None  # "idle" or "daily"
    reset_had_activity: bool = False  # whether the expired session had any messages
    
-    # Set by the background expiry watcher after it finalizes an expired
-    # session (invoking on_session_finalize hooks and evicting the cached
-    # agent).  Persisted to sessions.json so the flag survives gateway
-    # restarts — prevents redundant finalization runs.
-    expiry_finalized: bool = False
+    # Set by the background expiry watcher after it successfully flushes
+    # memories for this session.  Persisted to sessions.json so the flag
+    # survives gateway restarts (the old in-memory _pre_flushed_sessions
+    # set was lost on restart, causing redundant re-flushes).
+    memory_flushed: bool = False

    # When True the next call to get_or_create_session() will auto-reset
    # this session (create a new session_id) so the user starts fresh.
@@ -488,7 +407,7 @@ class SessionEntry:
            "last_prompt_tokens": self.last_prompt_tokens,
            "estimated_cost_usd": self.estimated_cost_usd,
            "cost_status": self.cost_status,
-            "expiry_finalized": self.expiry_finalized,
+            "memory_flushed": self.memory_flushed,
            "suspended": self.suspended,
            "resume_pending": self.resume_pending,
            "resume_reason": self.resume_reason,
@@ -540,7 +459,7 @@ class SessionEntry:
            last_prompt_tokens=data.get("last_prompt_tokens", 0),
            estimated_cost_usd=data.get("estimated_cost_usd", 0.0),
            cost_status=data.get("cost_status", "unknown"),
-            expiry_finalized=data.get("expiry_finalized", data.get("memory_flushed", False)),
+            memory_flushed=data.get("memory_flushed", False),
            suspended=data.get("suspended", False),
            resume_pending=data.get("resume_pending", False),
            resume_reason=data.get("resume_reason"),
@@ -548,27 +467,6 @@ class SessionEntry:
        )


-def is_shared_multi_user_session(
-    source: SessionSource,
-    *,
-    group_sessions_per_user: bool = True,
-    thread_sessions_per_user: bool = False,
-) -> bool:
-    """Return True when a non-DM session is shared across participants.
-
-    Mirrors the isolation rules in :func:`build_session_key`:
-      - DMs are never shared.
-      - Threads are shared unless ``thread_sessions_per_user`` is True.
-      - Non-thread group/channel sessions are shared unless
-        ``group_sessions_per_user`` is True (default: True = isolated).
-    """
-    if source.chat_type == "dm":
-        return False
-    if source.thread_id:
-        return not thread_sessions_per_user
-    return not group_sessions_per_user
-
-
 def build_session_key(
    source: SessionSource,
    group_sessions_per_user: bool = True,
@@ -599,24 +497,15 @@ def build_session_key(
    """
    platform = source.platform.value
    if source.chat_type == "dm":
-        dm_chat_id = source.chat_id
-        if source.platform == Platform.WHATSAPP:
-            dm_chat_id = canonical_whatsapp_identifier(source.chat_id)
-
-        if dm_chat_id:
+        if source.chat_id:
            if source.thread_id:
-                return f"agent:main:{platform}:dm:{dm_chat_id}:{source.thread_id}"
-            return f"agent:main:{platform}:dm:{dm_chat_id}"
+                return f"agent:main:{platform}:dm:{source.chat_id}:{source.thread_id}"
+            return f"agent:main:{platform}:dm:{source.chat_id}"
        if source.thread_id:
            return f"agent:main:{platform}:dm:{source.thread_id}"
        return f"agent:main:{platform}:dm"

    participant_id = source.user_id_alt or source.user_id
-    if participant_id and source.platform == Platform.WHATSAPP:
-        # Same JID/LID-flip bug as the DM case: without canonicalisation, a
-        # single group member gets two isolated per-user sessions when the
-        # bridge reshuffles alias forms.
-        participant_id = canonical_whatsapp_identifier(str(participant_id)) or participant_id
    key_parts = ["agent:main", platform, source.chat_type]

    if source.chat_id:
@@ -1237,11 +1126,6 @@ class SessionStore:
                    tool_name=message.get("tool_name"),
                    tool_calls=message.get("tool_calls"),
                    tool_call_id=message.get("tool_call_id"),
-                    reasoning=message.get("reasoning") if message.get("role") == "assistant" else None,
-                    reasoning_content=message.get("reasoning_content") if message.get("role") == "assistant" else None,
-                    reasoning_details=message.get("reasoning_details") if message.get("role") == "assistant" else None,
-                    codex_reasoning_items=message.get("codex_reasoning_items") if message.get("role") == "assistant" else None,
-                    codex_message_items=message.get("codex_message_items") if message.get("role") == "assistant" else None,
                )
            except Exception as e:
                logger.debug("Session DB operation failed: %s", e)
@@ -1271,10 +1155,8 @@ class SessionStore:
                        tool_calls=msg.get("tool_calls"),
                        tool_call_id=msg.get("tool_call_id"),
                        reasoning=msg.get("reasoning") if role == "assistant" else None,
-                        reasoning_content=msg.get("reasoning_content") if role == "assistant" else None,
                        reasoning_details=msg.get("reasoning_details") if role == "assistant" else None,
                        codex_reasoning_items=msg.get("codex_reasoning_items") if role == "assistant" else None,
-                        codex_message_items=msg.get("codex_message_items") if role == "assistant" else None,
                    )
            except Exception as e:
                logger.debug("Failed to rewrite transcript in DB: %s", e)
@@ -1356,11 +1238,6 @@ def build_session_context(
        source=source,
        connected_platforms=connected,
        home_channels=home_channels,
-        shared_multi_user_session=is_shared_multi_user_session(
-            source,
-            group_sessions_per_user=getattr(config, "group_sessions_per_user", True),
-            thread_sessions_per_user=getattr(config, "thread_sessions_per_user", False),
-        ),
    )
    
    if session_entry:
@@ -56,12 +56,6 @@ _SESSION_USER_ID: ContextVar = ContextVar("HERMES_SESSION_USER_ID", default=_UNS
 _SESSION_USER_NAME: ContextVar = ContextVar("HERMES_SESSION_USER_NAME", default=_UNSET)
 _SESSION_KEY: ContextVar = ContextVar("HERMES_SESSION_KEY", default=_UNSET)

-# Cron auto-delivery vars — set per-job in run_job() so concurrent jobs
-# don't clobber each other's delivery targets.
-_CRON_AUTO_DELIVER_PLATFORM: ContextVar = ContextVar("HERMES_CRON_AUTO_DELIVER_PLATFORM", default=_UNSET)
-_CRON_AUTO_DELIVER_CHAT_ID: ContextVar = ContextVar("HERMES_CRON_AUTO_DELIVER_CHAT_ID", default=_UNSET)
-_CRON_AUTO_DELIVER_THREAD_ID: ContextVar = ContextVar("HERMES_CRON_AUTO_DELIVER_THREAD_ID", default=_UNSET)
-
 _VAR_MAP = {
    "HERMES_SESSION_PLATFORM": _SESSION_PLATFORM,
    "HERMES_SESSION_CHAT_ID": _SESSION_CHAT_ID,
@@ -70,9 +64,6 @@ _VAR_MAP = {
    "HERMES_SESSION_USER_ID": _SESSION_USER_ID,
    "HERMES_SESSION_USER_NAME": _SESSION_USER_NAME,
    "HERMES_SESSION_KEY": _SESSION_KEY,
-    "HERMES_CRON_AUTO_DELIVER_PLATFORM": _CRON_AUTO_DELIVER_PLATFORM,
-    "HERMES_CRON_AUTO_DELIVER_CHAT_ID": _CRON_AUTO_DELIVER_CHAT_ID,
-    "HERMES_CRON_AUTO_DELIVER_THREAD_ID": _CRON_AUTO_DELIVER_THREAD_ID,
 }


@@ -22,18 +22,11 @@ from pathlib import Path
 from hermes_constants import get_hermes_home
 from typing import Any, Optional

-if sys.platform == "win32":
-    import msvcrt
-else:
-    import fcntl
-
 _GATEWAY_KIND = "hermes-gateway"
 _RUNTIME_STATUS_FILE = "gateway_state.json"
 _LOCKS_DIRNAME = "gateway-locks"
 _IS_WINDOWS = sys.platform == "win32"
 _UNSET = object()
-_GATEWAY_LOCK_FILENAME = "gateway.lock"
-_gateway_lock_handle = None


 def _get_pid_path() -> Path:
@@ -42,14 +35,6 @@ def _get_pid_path() -> Path:
    return home / "gateway.pid"


-def _get_gateway_lock_path(pid_path: Optional[Path] = None) -> Path:
-    """Return the path to the runtime gateway lock file."""
-    if pid_path is not None:
-        return pid_path.with_name(_GATEWAY_LOCK_FILENAME)
-    home = get_hermes_home()
-    return home / _GATEWAY_LOCK_FILENAME
-
-
 def _get_runtime_status_path() -> Path:
    """Return the persisted runtime health/status file path."""
    return _get_pid_path().with_name(_RUNTIME_STATUS_FILE)
@@ -113,11 +98,6 @@ def _get_process_start_time(pid: int) -> Optional[int]:
        return None


-def get_process_start_time(pid: int) -> Optional[int]:
-    """Public wrapper for retrieving a process start time when available."""
-    return _get_process_start_time(pid)
-
-
 def _read_process_cmdline(pid: int) -> Optional[str]:
    """Return the process command line as a space-separated string."""
    cmdline_path = Path(f"/proc/{pid}/cmdline")
@@ -141,7 +121,6 @@ def _looks_like_gateway_process(pid: int) -> bool:
        "hermes_cli.main gateway",
        "hermes_cli/main.py gateway",
        "hermes gateway",
-        "hermes-gateway",
        "gateway/run.py",
    )
    return any(pattern in cmdline for pattern in patterns)
@@ -233,160 +212,21 @@ def _read_pid_record(pid_path: Optional[Path] = None) -> Optional[dict]:
    return None


-def _read_gateway_lock_record(lock_path: Optional[Path] = None) -> Optional[dict[str, Any]]:
-    return _read_pid_record(lock_path or _get_gateway_lock_path())
-
-
-def _pid_from_record(record: Optional[dict[str, Any]]) -> Optional[int]:
-    if not record:
-        return None
-    try:
-        return int(record["pid"])
-    except (KeyError, TypeError, ValueError):
-        return None
-
-
 def _cleanup_invalid_pid_path(pid_path: Path, *, cleanup_stale: bool) -> None:
-    """Delete a stale gateway PID file (and its sibling lock metadata).
-
-    Called from ``get_running_pid()`` after the runtime lock has already been
-    confirmed inactive, so the on-disk metadata is known to belong to a dead
-    process.  Unlike ``remove_pid_file()`` (which defensively refuses to delete
-    a PID file whose ``pid`` field differs from ``os.getpid()`` to protect
-    ``--replace`` handoffs), this path force-unlinks both files so the next
-    startup sees a clean slate.
-    """
    if not cleanup_stale:
        return
    try:
-        pid_path.unlink(missing_ok=True)
+        if pid_path == _get_pid_path():
+            remove_pid_file()
+        else:
+            pid_path.unlink(missing_ok=True)
    except Exception:
        pass
-    try:
-        _get_gateway_lock_path(pid_path).unlink(missing_ok=True)
-    except Exception:
-        pass
-
-
-def _write_gateway_lock_record(handle) -> None:
-    handle.seek(0)
-    handle.truncate()
-    json.dump(_build_pid_record(), handle)
-    handle.flush()
-    try:
-        os.fsync(handle.fileno())
-    except OSError:
-        pass
-
-
-def _try_acquire_file_lock(handle) -> bool:
-    try:
-        if _IS_WINDOWS:
-            handle.seek(0, os.SEEK_END)
-            if handle.tell() == 0:
-                handle.write("\n")
-                handle.flush()
-            handle.seek(0)
-            msvcrt.locking(handle.fileno(), msvcrt.LK_NBLCK, 1)
-        else:
-            fcntl.flock(handle.fileno(), fcntl.LOCK_EX | fcntl.LOCK_NB)
-        return True
-    except (BlockingIOError, OSError):
-        return False
-
-
-def _release_file_lock(handle) -> None:
-    try:
-        if _IS_WINDOWS:
-            handle.seek(0)
-            msvcrt.locking(handle.fileno(), msvcrt.LK_UNLCK, 1)
-        else:
-            fcntl.flock(handle.fileno(), fcntl.LOCK_UN)
-    except OSError:
-        pass
-
-
-def acquire_gateway_runtime_lock() -> bool:
-    """Claim the cross-process runtime lock for the gateway.
-
-    Unlike the PID file, the lock is owned by the live process itself. If the
-    process dies abruptly, the OS releases the lock automatically.
-    """
-    global _gateway_lock_handle
-    if _gateway_lock_handle is not None:
-        return True
-
-    path = _get_gateway_lock_path()
-    path.parent.mkdir(parents=True, exist_ok=True)
-    handle = open(path, "a+", encoding="utf-8")
-    if not _try_acquire_file_lock(handle):
-        handle.close()
-        return False
-    _write_gateway_lock_record(handle)
-    _gateway_lock_handle = handle
-    return True
-
-
-def release_gateway_runtime_lock() -> None:
-    """Release the gateway runtime lock when owned by this process."""
-    global _gateway_lock_handle
-    handle = _gateway_lock_handle
-    if handle is None:
-        return
-    _gateway_lock_handle = None
-    _release_file_lock(handle)
-    try:
-        handle.close()
-    except OSError:
-        pass
-
-
-def is_gateway_runtime_lock_active(lock_path: Optional[Path] = None) -> bool:
-    """Return True when some process currently owns the gateway runtime lock."""
-    global _gateway_lock_handle
-    resolved_lock_path = lock_path or _get_gateway_lock_path()
-    if _gateway_lock_handle is not None and resolved_lock_path == _get_gateway_lock_path():
-        return True
-
-    if not resolved_lock_path.exists():
-        return False
-
-    handle = open(resolved_lock_path, "a+", encoding="utf-8")
-    try:
-        if _try_acquire_file_lock(handle):
-            _release_file_lock(handle)
-            return False
-        return True
-    finally:
-        try:
-            handle.close()
-        except OSError:
-            pass


 def write_pid_file() -> None:
-    """Write the current process PID and metadata to the gateway PID file.
-
-    Uses atomic O_CREAT | O_EXCL creation so that concurrent --replace
-    invocations race: exactly one process wins and the rest get
-    FileExistsError.
-    """
-    path = _get_pid_path()
-    path.parent.mkdir(parents=True, exist_ok=True)
-    record = json.dumps(_build_pid_record())
-    try:
-        fd = os.open(path, os.O_CREAT | os.O_EXCL | os.O_WRONLY)
-    except FileExistsError:
-        raise  # Let caller decide: another gateway is racing us
-    try:
-        with os.fdopen(fd, "w", encoding="utf-8") as f:
-            f.write(record)
-    except Exception:
-        try:
-            path.unlink(missing_ok=True)
-        except OSError:
-            pass
-        raise
+    """Write the current process PID and metadata to the gateway PID file."""
+    _write_json_file(_get_pid_path(), _build_pid_record())


 def write_runtime_status(
@@ -501,8 +341,7 @@ def acquire_scoped_lock(scope: str, identity: str, metadata: Optional[dict[str,
        if not stale:
            try:
                os.kill(existing_pid, 0)
-            except (ProcessLookupError, PermissionError, OSError):
-                # Windows raises OSError with WinError 87 for invalid pid check
+            except (ProcessLookupError, PermissionError):
                stale = True
            else:
                current_start = _get_process_start_time(existing_pid)
@@ -567,43 +406,17 @@ def release_scoped_lock(scope: str, identity: str) -> None:
        pass


-def release_all_scoped_locks(
-    *,
-    owner_pid: Optional[int] = None,
-    owner_start_time: Optional[int] = None,
-) -> int:
-    """Remove scoped lock files in the lock directory.
+def release_all_scoped_locks() -> int:
+    """Remove all scoped lock files in the lock directory.

    Called during --replace to clean up stale locks left by stopped/killed
-    gateway processes that did not release their locks gracefully. When an
-    ``owner_pid`` is provided, only lock records belonging to that gateway
-    process are removed. ``owner_start_time`` further narrows the match to
-    protect against PID reuse.
-
-    When no owner is provided, preserves the legacy behavior and removes every
-    scoped lock file in the directory.
-
+    gateway processes that did not release their locks gracefully.
    Returns the number of lock files removed.
    """
    lock_dir = _get_lock_dir()
    removed = 0
    if lock_dir.exists():
        for lock_file in lock_dir.glob("*.lock"):
-            if owner_pid is not None:
-                record = _read_json_file(lock_file)
-                if not isinstance(record, dict):
-                    continue
-                try:
-                    record_pid = int(record.get("pid"))
-                except (TypeError, ValueError):
-                    continue
-                if record_pid != owner_pid:
-                    continue
-                if (
-                    owner_start_time is not None
-                    and record.get("start_time") != owner_start_time
-                ):
-                    continue
            try:
                lock_file.unlink(missing_ok=True)
                removed += 1
@@ -750,46 +563,35 @@ def get_running_pid(
    Cleans up stale PID files automatically.
    """
    resolved_pid_path = pid_path or _get_pid_path()
-    resolved_lock_path = _get_gateway_lock_path(resolved_pid_path)
-    lock_active = is_gateway_runtime_lock_active(resolved_lock_path)
-    if not lock_active:
+    record = _read_pid_record(resolved_pid_path)
+    if not record:
        _cleanup_invalid_pid_path(resolved_pid_path, cleanup_stale=cleanup_stale)
        return None

-    primary_record = _read_pid_record(resolved_pid_path)
-    fallback_record = _read_gateway_lock_record(resolved_lock_path)
+    try:
+        pid = int(record["pid"])
+    except (KeyError, TypeError, ValueError):
+        _cleanup_invalid_pid_path(resolved_pid_path, cleanup_stale=cleanup_stale)
+        return None

-    for record in (primary_record, fallback_record):
-        pid = _pid_from_record(record)
-        if pid is None:
-            continue
+    try:
+        os.kill(pid, 0)  # signal 0 = existence check, no actual signal sent
+    except (ProcessLookupError, PermissionError):
+        _cleanup_invalid_pid_path(resolved_pid_path, cleanup_stale=cleanup_stale)
+        return None

-        try:
-            os.kill(pid, 0)  # signal 0 = existence check, no actual signal sent
-        except ProcessLookupError:
-            continue
-        except PermissionError:
-            # The process exists but belongs to another user/service scope.
-            # With the runtime lock still held, prefer keeping it visible
-            # rather than deleting the PID file as "stale".
-            if _record_looks_like_gateway(record):
-                return pid
-            continue
-        except OSError:
-            # Windows raises OSError with WinError 87 for an invalid pid
-            # (process is definitely gone). Treat as "process doesn't exist".
-            continue
+    recorded_start = record.get("start_time")
+    current_start = _get_process_start_time(pid)
+    if recorded_start is not None and current_start is not None and current_start != recorded_start:
+        _cleanup_invalid_pid_path(resolved_pid_path, cleanup_stale=cleanup_stale)
+        return None

-        recorded_start = record.get("start_time")
-        current_start = _get_process_start_time(pid)
-        if recorded_start is not None and current_start is not None and current_start != recorded_start:
-            continue
+    if not _looks_like_gateway_process(pid):
+        if not _record_looks_like_gateway(record):
+            _cleanup_invalid_pid_path(resolved_pid_path, cleanup_stale=cleanup_stale)
+            return None

-        if _looks_like_gateway_process(pid) or _record_looks_like_gateway(record):
-            return pid
-
-    _cleanup_invalid_pid_path(resolved_pid_path, cleanup_stale=cleanup_stale)
-    return None
+    return pid


 def is_gateway_running(
@@ -44,14 +44,6 @@ class StreamConsumerConfig:
    buffer_threshold: int = 40
    cursor: str = " ▉"
    buffer_only: bool = False
-    # When >0, the final edit for a streamed response is delivered as a
-    # fresh message if the original preview has been visible for at least
-    # this many seconds.  This makes the platform's visible timestamp
-    # reflect completion time instead of first-token time for long-running
-    # responses (e.g. reasoning models that stream slowly).  Ported from
-    # openclaw/openclaw#72038.  Default 0 = always edit in place (legacy
-    # behavior).  The gateway enables this selectively per-platform.
-    fresh_final_after_seconds: float = 0.0


 class GatewayStreamConsumer:
@@ -99,12 +91,6 @@ class GatewayStreamConsumer:
        self._queue: queue.Queue = queue.Queue()
        self._accumulated = ""
        self._message_id: Optional[str] = None
-        # Wall-clock timestamp (time.monotonic) when ``_message_id`` was
-        # first assigned from a successful first-send.  Used by the
-        # fresh-final logic to detect long-lived previews whose edit
-        # timestamps would be stale by completion time.  Ported from
-        # openclaw/openclaw#72038.
-        self._message_created_ts: Optional[float] = None
        self._already_sent = False
        self._edit_supported = True  # Disabled when progressive edits are no longer usable
        self._last_edit_time = 0.0
@@ -150,7 +136,6 @@ class GatewayStreamConsumer:
        if preserve_no_edit and self._message_id == "__no_edit__":
            return
        self._message_id = None
-        self._message_created_ts = None
        self._accumulated = ""
        self._last_sent_text = ""
        self._fallback_final_send = False
@@ -749,81 +734,6 @@ class GatewayStreamConsumer:
            logger.error("Commentary send error: %s", e)
            return False

-    def _should_send_fresh_final(self) -> bool:
-        """Return True when a long-lived preview should be replaced with a
-        fresh final message instead of an edit.
-
-        Conditions:
-        - Fresh-final is enabled (``fresh_final_after_seconds > 0``).
-        - We have a real preview message id (not the ``__no_edit__`` sentinel
-          and not ``None``).
-        - The preview has been visible for at least the configured threshold.
-
-        Ported from openclaw/openclaw#72038.
-        """
-        threshold = getattr(self.cfg, "fresh_final_after_seconds", 0.0) or 0.0
-        if threshold <= 0:
-            return False
-        if not self._message_id or self._message_id == "__no_edit__":
-            return False
-        if self._message_created_ts is None:
-            return False
-        age = time.monotonic() - self._message_created_ts
-        return age >= threshold
-
-    async def _try_fresh_final(self, text: str) -> bool:
-        """Send ``text`` as a brand-new message (best-effort delete the old
-        preview) so the platform's visible timestamp reflects completion
-        time.  Returns True on successful delivery, False on any failure so
-        the caller falls back to the normal edit path.
-
-        Ported from openclaw/openclaw#72038.
-        """
-        old_message_id = self._message_id
-        try:
-            result = await self.adapter.send(
-                chat_id=self.chat_id,
-                content=text,
-                metadata=self.metadata,
-            )
-        except Exception as e:
-            logger.debug("Fresh-final send failed, falling back to edit: %s", e)
-            return False
-        if not getattr(result, "success", False):
-            return False
-        # Successful fresh send — try to delete the stale preview so the
-        # user doesn't see the old edit-stuck message underneath.  Cleanup
-        # is best-effort; platforms that don't implement ``delete_message``
-        # just leave the preview behind (still an acceptable outcome —
-        # the visible final timestamp is the important part).
-        if old_message_id and old_message_id != "__no_edit__":
-            delete_fn = getattr(self.adapter, "delete_message", None)
-            if delete_fn is not None:
-                try:
-                    await delete_fn(self.chat_id, old_message_id)
-                except Exception as e:
-                    logger.debug(
-                        "Fresh-final preview cleanup failed (%s): %s",
-                        old_message_id, e,
-                    )
-        # Adopt the new message id as the current message so subsequent
-        # callers (e.g. overflow split loops, finalize retries) see a
-        # consistent state.
-        new_message_id = getattr(result, "message_id", None)
-        if new_message_id:
-            self._message_id = new_message_id
-            self._message_created_ts = time.monotonic()
-        else:
-            # Send succeeded but platform didn't return an id — treat the
-            # delivery as final-only and fall back to "__no_edit__" so we
-            # don't try to edit something we can't address.
-            self._message_id = "__no_edit__"
-            self._message_created_ts = None
-        self._already_sent = True
-        self._last_sent_text = text
-        self._final_response_sent = True
-        return True
-
    async def _send_or_edit(self, text: str, *, finalize: bool = False) -> bool:
        """Send or edit the streaming message.

@@ -876,22 +786,6 @@ class GatewayStreamConsumer:
                        finalize and self._adapter_requires_finalize
                    ):
                        return True
-                    # Fresh-final for long-lived previews: when finalizing
-                    # the last edit in a streaming sequence, if the
-                    # original preview has been visible for at least
-                    # ``fresh_final_after_seconds``, send the completed
-                    # reply as a fresh message so the platform's visible
-                    # timestamp reflects completion time instead of the
-                    # preview creation time.  Best-effort cleanup of the
-                    # old preview follows.  Ported from
-                    # openclaw/openclaw#72038.  Gated by config so the
-                    # legacy edit-in-place path stays the default.
-                    if (
-                        finalize
-                        and self._should_send_fresh_final()
-                        and await self._try_fresh_final(text)
-                    ):
-                        return True
                    # Edit existing message
                    result = await self.adapter.edit_message(
                        chat_id=self.chat_id,
@@ -958,10 +852,6 @@ class GatewayStreamConsumer:
                if result.success:
                    if result.message_id:
                        self._message_id = result.message_id
-                        # Track when the preview first became visible to
-                        # the user so fresh-final logic can detect stale
-                        # preview timestamps on long-running responses.
-                        self._message_created_ts = time.monotonic()
                    else:
                        self._edit_supported = False
                    self._already_sent = True
@@ -1,155 +0,0 @@
-"""Shared helpers for canonicalising WhatsApp sender identity.
-
-WhatsApp's bridge can surface the same human under two different JID shapes
-within a single conversation:
-
- LID form: ``999999999999999@lid``
- Phone form: ``15551234567@s.whatsapp.net``
-
-Both the authorisation path (:mod:`gateway.run`) and the session-key path
-(:mod:`gateway.session`) need to collapse these aliases to a single stable
-identity. This module is the single source of truth for that resolution so
-the two paths can never drift apart.
-
-Public helpers:
-
- :func:`normalize_whatsapp_identifier` — strip JID/LID/device/plus syntax
-  down to the bare numeric identifier.
- :func:`canonical_whatsapp_identifier` — walk the bridge's
-  ``lid-mapping-*.json`` files and return a stable canonical identity
-  across phone/LID variants.
- :func:`expand_whatsapp_aliases` — return the full alias set for an
-  identifier. Used by authorisation code that needs to match any known
-  form of a sender against an allow-list.
-
-Plugins that need per-sender behaviour on WhatsApp (role-based routing,
-per-contact authorisation, policy gating in a gateway hook) should use
-``canonical_whatsapp_identifier`` so their bookkeeping lines up with
-Hermes' own session keys.
-"""
-
-from __future__ import annotations
-
-import json
-import logging
-import re
-from typing import Set
-
-logger = logging.getLogger(__name__)
-
-# WhatsApp JIDs are numeric (or plus-prefixed numeric) with optional
-# ``@``, ``.`` and ``:`` separators. ``\w`` is pinned to ASCII so
-# full-width digits / Unicode word chars can't sneak through.
-_SAFE_IDENTIFIER_RE = re.compile(r"^[A-Za-z0-9@.+\-]+$")
-
-from hermes_constants import get_hermes_home
-
-
-def normalize_whatsapp_identifier(value: str) -> str:
-    """Strip WhatsApp JID/LID syntax down to its stable numeric identifier.
-
-    Accepts any of the identifier shapes the WhatsApp bridge may emit:
-    ``"60123456789@s.whatsapp.net"``, ``"60123456789:47@s.whatsapp.net"``,
-    ``"60123456789@lid"``, or a bare ``"+601****6789"`` / ``"60123456789"``.
-    Returns just the numeric identifier (``"60123456789"``) suitable for
-    equality comparisons.
-
-    Useful for plugins that want to match sender IDs against
-    user-supplied config (phone numbers in ``config.yaml``) without
-    worrying about which variant the bridge happens to deliver.
-    """
-    return (
-        str(value or "")
-        .strip()
-        .replace("+", "", 1)
-        .split(":", 1)[0]
-        .split("@", 1)[0]
-    )
-
-
-def expand_whatsapp_aliases(identifier: str) -> Set[str]:
-    """Resolve WhatsApp phone/LID aliases via bridge session mapping files.
-
-    Returns the set of all identifiers transitively reachable through the
-    bridge's ``$HERMES_HOME/whatsapp/session/lid-mapping-*.json`` files,
-    starting from ``identifier``. The result always includes the
-    normalized input itself, so callers can safely ``in`` check against
-    the return value without a separate fallback branch.
-
-    Returns an empty set if ``identifier`` normalizes to empty.
-    """
-    normalized = normalize_whatsapp_identifier(identifier)
-    if not normalized:
-        return set()
-
-    session_dir = get_hermes_home() / "whatsapp" / "session"
-    resolved: Set[str] = set()
-    queue = [normalized]
-
-    while queue:
-        current = queue.pop(0)
-        if not current or current in resolved:
-            continue
-        # Defense-in-depth: reject identifiers that could sneak path
-        # separators / traversal segments into the ``lid-mapping-{current}``
-        # filename below. The hardcoded ``lid-mapping-`` prefix already
-        # prevents escape via pathlib's component split (an attacker can't
-        # create ``lid-mapping-..`` as a real directory in session_dir), but
-        # this keeps the identifier space to the characters WhatsApp JIDs
-        # actually use and avoids depending on that filesystem-layout
-        # invariant.
-        if not _SAFE_IDENTIFIER_RE.match(current):
-            continue
-
-        resolved.add(current)
-        for suffix in ("", "_reverse"):
-            mapping_path = session_dir / f"lid-mapping-{current}{suffix}.json"
-            if not mapping_path.exists():
-                continue
-            try:
-                mapped = normalize_whatsapp_identifier(
-                    json.loads(mapping_path.read_text(encoding="utf-8"))
-                )
-            except (OSError, json.JSONDecodeError) as exc:
-                logger.debug("whatsapp_identity: failed to read %s: %s", mapping_path, exc)
-                continue
-            if mapped and mapped not in resolved:
-                queue.append(mapped)
-
-    return resolved
-
-
-def canonical_whatsapp_identifier(identifier: str) -> str:
-    """Return a stable WhatsApp sender identity across phone-JID/LID variants.
-
-    WhatsApp may surface the same person under either a phone-format JID
-    (``60123456789@s.whatsapp.net``) or a LID (``1234567890@lid``). This
-    applies to a DM ``chat_id`` *and* to the ``participant_id`` of a
-    member inside a group chat — both represent a user identity, and the
-    bridge may flip between the two for the same human.
-
-    This helper reads the bridge's ``whatsapp/session/lid-mapping-*.json``
-    files, walks the mapping transitively, and picks the shortest
-    (numeric-preferred) alias as the canonical identity.
-    :func:`gateway.session.build_session_key` uses this for both WhatsApp
-    DM chat_ids and WhatsApp group participant_ids, so callers get the
-    same session-key identity Hermes itself uses.
-
-    Plugins that need per-sender behaviour (role-based routing,
-    authorisation, per-contact policy) should use this so their
-    bookkeeping lines up with Hermes' session bookkeeping even when
-    the bridge reshuffles aliases.
-
-    Returns an empty string if ``identifier`` normalizes to empty. If no
-    mapping files exist yet (fresh bridge install), returns the
-    normalized input unchanged.
-    """
-    normalized = normalize_whatsapp_identifier(identifier)
-    if not normalized:
-        return ""
-
-    # expand_whatsapp_aliases always includes `normalized` itself in the
-    # returned set, so the min() below degrades gracefully to `normalized`
-    # when no lid-mapping files are present.
-    aliases = expand_whatsapp_aliases(normalized)
-    return min(aliases, key=lambda candidate: (len(candidate), candidate))
@@ -11,5 +11,5 @@ Provides subcommands for:
 - hermes cron          - Manage cron jobs
 """

-__version__ = "0.11.0"
-__release_date__ = "2026.4.23"
+__version__ = "0.10.0"
+__release_date__ = "2026.4.16"
@@ -110,40 +110,18 @@ def _display_source(source: str) -> str:
    return source.split(":", 1)[1] if source.startswith("manual:") else source


-def _classify_exhausted_status(entry) -> tuple[str, bool]:
-    code = getattr(entry, "last_error_code", None)
-    reason = str(getattr(entry, "last_error_reason", "") or "").strip().lower()
-    message = str(getattr(entry, "last_error_message", "") or "").strip().lower()
-
-    if code == 429 or any(token in reason for token in ("rate_limit", "usage_limit", "quota", "exhausted")) or any(
-        token in message for token in ("rate limit", "usage limit", "quota", "too many requests")
-    ):
-        return "rate-limited", True
-
-    if code in {401, 403} or any(token in reason for token in ("invalid_token", "invalid_grant", "unauthorized", "forbidden", "auth")) or any(
-        token in message for token in ("unauthorized", "forbidden", "expired", "revoked", "invalid token", "authentication")
-    ):
-        return "auth failed", False
-
-    return "exhausted", True
-
-
-
 def _format_exhausted_status(entry) -> str:
    if entry.last_status != STATUS_EXHAUSTED:
        return ""
-    label, show_retry_window = _classify_exhausted_status(entry)
    reason = getattr(entry, "last_error_reason", None)
    reason_text = f" {reason}" if isinstance(reason, str) and reason.strip() else ""
    code = f" ({entry.last_error_code})" if entry.last_error_code else ""
-    if not show_retry_window:
-        return f" {label}{reason_text}{code} (re-auth may be required)"
    exhausted_until = _exhausted_until(entry)
    if exhausted_until is None:
-        return f" {label}{reason_text}{code}"
+        return f" exhausted{reason_text}{code}"
    remaining = max(0, int(math.ceil(exhausted_until - time.time())))
    if remaining <= 0:
-        return f" {label}{reason_text}{code} (ready to retry)"
+        return f" exhausted{reason_text}{code} (ready to retry)"
    minutes, seconds = divmod(remaining, 60)
    hours, minutes = divmod(minutes, 60)
    days, hours = divmod(hours, 24)
@@ -155,7 +133,7 @@ def _format_exhausted_status(entry) -> str:
        wait = f"{minutes}m {seconds}s"
    else:
        wait = f"{seconds}s"
-    return f" {label}{reason_text}{code} ({wait} left)"
+    return f" exhausted{reason_text}{code} ({wait} left)"


 def auth_add_command(args) -> None:
@@ -174,23 +152,6 @@ def auth_add_command(args) -> None:

    pool = load_pool(provider)

-    # Clear ALL suppressions for this provider — re-adding a credential is
-    # a strong signal the user wants auth re-enabled.  This covers env:*
-    # (shell-exported vars), gh_cli (copilot), claude_code, qwen-cli,
-    # device_code (codex), etc.  One consistent re-engagement pattern.
-    # Matches the Codex device_code re-link pattern that predates this.
-    if not provider.startswith(CUSTOM_POOL_PREFIX):
-        try:
-            from hermes_cli.auth import (
-                _load_auth_store,
-                unsuppress_credential_source,
-            )
-            suppressed = _load_auth_store().get("suppressed_sources", {})
-            for src in list(suppressed.get(provider, []) or []):
-                unsuppress_credential_source(provider, src)
-        except Exception:
-            pass
-
    if requested_type == AUTH_TYPE_API_KEY:
        token = (getattr(args, "api_key", None) or "").strip()
        if not token:
@@ -377,28 +338,71 @@ def auth_remove_command(args) -> None:
        raise SystemExit(f'No credential matching "{target}" for provider {provider}.')
    print(f"Removed {provider} credential #{index} ({removed.label})")

-    # Unified removal dispatch.  Every credential source Hermes reads from
-    # (env vars, external OAuth files, auth.json blocks, custom config)
-    # has a RemovalStep registered in agent.credential_sources.  The step
-    # handles its source-specific cleanup and we centralise suppression +
-    # user-facing output here so every source behaves identically from
-    # the user's perspective.
-    from agent.credential_sources import find_removal_step
-    from hermes_cli.auth import suppress_credential_source
+    # If this was an env-seeded credential, also clear the env var from .env
+    # so it doesn't get re-seeded on the next load_pool() call.
+    if removed.source.startswith("env:"):
+        env_var = removed.source[len("env:"):]
+        if env_var:
+            from hermes_cli.config import remove_env_value
+            cleared = remove_env_value(env_var)
+            if cleared:
+                print(f"Cleared {env_var} from .env")

-    step = find_removal_step(provider, removed.source)
-    if step is None:
-        # Unregistered source — e.g. "manual", which has nothing external
-        # to clean up.  The pool entry is already gone; we're done.
-        return
+    # If this was a singleton-seeded credential (OAuth device_code, hermes_pkce),
+    # clear the underlying auth store / credential file so it doesn't get
+    # re-seeded on the next load_pool() call.
+    elif provider == "openai-codex" and (
+        removed.source == "device_code" or removed.source.endswith(":device_code")
+    ):
+        # Codex tokens live in TWO places: the Hermes auth store and
+        # ~/.codex/auth.json (the Codex CLI shared file).  On every refresh,
+        # refresh_codex_oauth_pure() writes to both.  So clearing only the
+        # Hermes auth store is not enough — _seed_from_singletons() will
+        # auto-import from ~/.codex/auth.json on the next load_pool() and
+        # the removal is instantly undone.  Mark the source as suppressed
+        # so auto-import is skipped; leave ~/.codex/auth.json untouched so
+        # the Codex CLI itself keeps working.
+        from hermes_cli.auth import (
+            _load_auth_store, _save_auth_store, _auth_store_lock,
+            suppress_credential_source,
+        )
+        with _auth_store_lock():
+            auth_store = _load_auth_store()
+            providers_dict = auth_store.get("providers")
+            if isinstance(providers_dict, dict) and provider in providers_dict:
+                del providers_dict[provider]
+                _save_auth_store(auth_store)
+                print(f"Cleared {provider} OAuth tokens from auth store")
+        suppress_credential_source(provider, "device_code")
+        print("Suppressed openai-codex device_code source — it will not be re-seeded.")
+        print("Note: Codex CLI credentials still live in ~/.codex/auth.json")
+        print("Run `hermes auth add openai-codex` to re-enable if needed.")

-    result = step.remove_fn(provider, removed)
-    for line in result.cleaned:
-        print(line)
-    if result.suppress:
-        suppress_credential_source(provider, removed.source)
-    for line in result.hints:
-        print(line)
+    elif removed.source == "device_code" and provider == "nous":
+        from hermes_cli.auth import (
+            _load_auth_store, _save_auth_store, _auth_store_lock,
+        )
+        with _auth_store_lock():
+            auth_store = _load_auth_store()
+            providers_dict = auth_store.get("providers")
+            if isinstance(providers_dict, dict) and provider in providers_dict:
+                del providers_dict[provider]
+                _save_auth_store(auth_store)
+                print(f"Cleared {provider} OAuth tokens from auth store")
+
+    elif removed.source == "hermes_pkce" and provider == "anthropic":
+        from hermes_constants import get_hermes_home
+        oauth_file = get_hermes_home() / ".anthropic_oauth.json"
+        if oauth_file.exists():
+            oauth_file.unlink()
+            print("Cleared Hermes Anthropic OAuth credentials")
+
+    elif removed.source == "claude_code" and provider == "anthropic":
+        from hermes_cli.auth import suppress_credential_source
+        suppress_credential_source(provider, "claude_code")
+        print("Suppressed claude_code credential — it will not be re-seeded.")
+        print("Note: Claude Code credentials still live in ~/.claude/.credentials.json")
+        print("Run `hermes auth add anthropic` to re-enable if needed.")


 def auth_reset_command(args) -> None:
@@ -408,44 +412,6 @@ def auth_reset_command(args) -> None:
    print(f"Reset status on {count} {provider} credentials")


-def auth_status_command(args) -> None:
-    provider = _normalize_provider(getattr(args, "provider", "") or "")
-    if not provider:
-        raise SystemExit("Provider is required. Example: `hermes auth status spotify`.")
-    status = auth_mod.get_auth_status(provider)
-    if not status.get("logged_in"):
-        reason = status.get("error")
-        if reason:
-            print(f"{provider}: logged out ({reason})")
-        else:
-            print(f"{provider}: logged out")
-        return
-
-    print(f"{provider}: logged in")
-    for key in ("auth_type", "client_id", "redirect_uri", "scope", "expires_at", "api_base_url"):
-        value = status.get(key)
-        if value:
-            print(f"  {key}: {value}")
-
-
-def auth_logout_command(args) -> None:
-    auth_mod.logout_command(SimpleNamespace(provider=getattr(args, "provider", None)))
-
-
-def auth_spotify_command(args) -> None:
-    action = str(getattr(args, "spotify_action", "") or "login").strip().lower()
-    if action in {"", "login"}:
-        auth_mod.login_spotify_command(args)
-        return
-    if action == "status":
-        auth_status_command(SimpleNamespace(provider="spotify"))
-        return
-    if action == "logout":
-        auth_logout_command(SimpleNamespace(provider="spotify"))
-        return
-    raise SystemExit(f"Unknown Spotify auth action: {action}")
-
-
 def _interactive_auth() -> None:
    """Interactive credential pool management when `hermes auth` is called bare."""
    # Show current pool status first
@@ -643,14 +609,5 @@ def auth_command(args) -> None:
    if action == "reset":
        auth_reset_command(args)
        return
-    if action == "status":
-        auth_status_command(args)
-        return
-    if action == "logout":
-        auth_logout_command(args)
-        return
-    if action == "spotify":
-        auth_spotify_command(args)
-        return
    # No subcommand — launch interactive mode
    _interactive_auth()
--- a/Show More
+++ b/Show More