style(comfyui): format SKILL.md — table alignment, YAML tags, onboarding hint

feat(skills): add comfyui skill — CLI-driven image/video/audio generation
Agent-friendly ComfyUI skill using the comfyui-skill CLI (invoked via uvx, no persistent install). The CLI wraps ComfyUI's REST API into named 'skills' with parameter schemas — the agent works with friendly args like {"prompt": "a cat"} instead of raw node graphs. - SKILL.md: setup/onboarding, core workflow, decision tree, multi-server, image upload, model/node discovery, queue management, pitfalls - references/cli-reference.md: complete command map (27 leaf commands) - references/api-notes.md: underlying REST endpoints for debugging - scripts/comfyui_setup.sh: workspace initialization Based on comfyui-skill-cli by HuangYuChuh, original skill work by kshitijk4poor.
2026-04-29 13:50:17 +05:30 · 2026-04-29 12:12:09 +05:30 · 2026-04-28 22:33:33 -07:00 · 2026-04-28 22:33:33 -07:00 · 2026-04-28 22:33:33 -07:00 · 2026-04-28 22:33:33 -07:00
1128 changed files with 184285 additions and 17074 deletions
@@ -5,7 +5,9 @@

 # Dependencies
 node_modules
+**/node_modules
 .venv
+**/.venv

 # CI/CD
 .github
@@ -14,3 +16,6 @@ node_modules
 .env

 *.md
+
+# Runtime data (bind-mounted at /opt/data; must not leak into build context)
+data/
@@ -53,6 +53,9 @@ jobs:
      - name: Extract skill metadata for dashboard
        run: python3 website/scripts/extract-skills.py

+      - name: Regenerate per-skill docs pages + catalogs
+        run: python3 website/scripts/generate-skill-docs.py
+
      - name: Build skills index (if not already present)
        env:
          GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
@@ -36,6 +36,9 @@ jobs:
      - name: Extract skill metadata for dashboard
        run: python3 website/scripts/extract-skills.py

+      - name: Regenerate per-skill docs pages + catalogs
+        run: python3 website/scripts/generate-skill-docs.py
+
      - name: Lint docs diagrams
        run: npm run lint:diagrams
        working-directory: website
@@ -13,7 +13,7 @@ concurrency:
  cancel-in-progress: true

 jobs:
-  check:
+  nix-lockfile-check:
    runs-on: ubuntu-latest
    timeout-minutes: 20
    steps:
@@ -36,6 +36,12 @@ jobs:
          LINK_SHA: ${{ steps.sha.outputs.full }}
        run: nix run .#fix-lockfiles -- --check

+      - name: Fail if check crashed without reporting
+        if: steps.check.outputs.stale != 'true' && steps.check.outputs.stale != 'false'
+        run: |
+          echo "::error::fix-lockfiles exited without reporting stale status — likely an infrastructure or script failure"
+          exit 1
+
      - name: Post sticky PR comment (stale)
        if: steps.check.outputs.stale == 'true' && github.event_name == 'pull_request'
        uses: marocchino/sticky-pull-request-comment@52423e01640425a022ef5fd42c6fb5f633a02728  # v2.9.1
@@ -1,6 +1,13 @@
 name: Nix Lockfile Fix

 on:
+  push:
+    branches: [main]
+    paths:
+      - 'ui-tui/package-lock.json'
+      - 'ui-tui/package.json'
+      - 'web/package-lock.json'
+      - 'web/package.json'
  workflow_dispatch:
    inputs:
      pr_number:
@@ -19,9 +26,103 @@ concurrency:
  cancel-in-progress: false

 jobs:
+  # ── Auto-fix on main ───────────────────────────────────────────────
+  # Fires when a push to main touches package.json or package-lock.json
+  # in ui-tui/ or web/. Runs fix-lockfiles --apply and pushes the hash
+  # update commit directly to main so Nix builds never stay broken.
+  #
+  # Safety invariants:
+  #   1. The fix commit only touches nix/*.nix files, which are NOT in
+  #      the paths filter above, so this cannot re-trigger itself.
+  #   2. An explicit file-whitelist check before commit aborts if
+  #      fix-lockfiles ever modifies unexpected files.
+  #   3. Job-level concurrency with cancel-in-progress: true ensures
+  #      back-to-back pushes collapse to the newest; ref: main checkout
+  #      always operates on the latest branch state.
+  #   4. Uses a GitHub App token (not GITHUB_TOKEN) so the fix commit
+  #      triggers downstream nix.yml verification.
+  auto-fix-main:
+    if: github.event_name == 'push'
+    runs-on: ubuntu-latest
+    timeout-minutes: 25
+    concurrency:
+      group: auto-fix-main
+      cancel-in-progress: true
+    steps:
+      - name: Generate GitHub App token
+        id: app-token
+        uses: actions/create-github-app-token@7bfa3a4717ef143a604ee0a99d859b8886a96d00  # v1.9.3
+        with:
+          app-id: ${{ secrets.APP_ID }}
+          private-key: ${{ secrets.APP_PRIVATE_KEY }}
+
+      - uses: actions/checkout@34e114876b0b11c390a56381ad16ebd13914f8d5  # v4
+        with:
+          ref: main
+          token: ${{ steps.app-token.outputs.token }}
+
+      - uses: ./.github/actions/nix-setup
+
+      - name: Apply lockfile hashes
+        id: apply
+        run: nix run .#fix-lockfiles -- --apply
+
+      - name: Commit & push
+        if: steps.apply.outputs.changed == 'true'
+        shell: bash
+        run: |
+          set -euo pipefail
+
+          # Ensure only nix files were modified — prevents accidental
+          # self-triggering if fix-lockfiles ever touches package files.
+          unexpected="$(git diff --name-only | grep -Ev '^nix/(tui|web)\.nix$' || true)"
+          if [ -n "$unexpected" ]; then
+            echo "::error::Unexpected modified files: $unexpected"
+            exit 1
+          fi
+
+          # Record the base SHA before committing — used to detect package
+          # file changes if we need to rebase after a non-fast-forward push.
+          BASE_SHA="$(git rev-parse HEAD)"
+
+          git config user.name 'github-actions[bot]'
+          git config user.email '41898282+github-actions[bot]@users.noreply.github.com'
+          git add nix/tui.nix nix/web.nix
+          git commit -m "fix(nix): auto-refresh npm lockfile hashes" \
+            -m "Source: $GITHUB_SHA" \
+            -m "Run: $GITHUB_SERVER_URL/$GITHUB_REPOSITORY/actions/runs/$GITHUB_RUN_ID"
+
+          # Retry push with rebase in case main advanced with an unrelated
+          # commit during the nix build. Without this, a non-fast-forward
+          # rejection silently loses the fix. If package files changed during
+          # the rebase, abort — a fresh auto-fix run will handle the new state.
+          for attempt in 1 2 3; do
+            if git push origin HEAD:main; then
+              exit 0
+            fi
+            echo "::warning::Push attempt $attempt failed (non-fast-forward?), rebasing…"
+            git fetch origin main
+
+            # If package files changed between our base and the new main,
+            # our computed hashes are stale. Abort and let the next triggered
+            # run recompute from the correct package-lock state.
+            pkg_changed="$(git diff --name-only "$BASE_SHA"..origin/main -- \
+              'ui-tui/package-lock.json' 'ui-tui/package.json' \
+              'web/package-lock.json' 'web/package.json' || true)"
+            if [ -n "$pkg_changed" ]; then
+              echo "::warning::Package files changed since hash computation — aborting; a fresh run will recompute"
+              exit 0
+            fi
+
+            git rebase origin/main
+          done
+          echo "::error::Failed to push after 3 rebase attempts"
+          exit 1
+
+  # ── PR fix (manual / checkbox) ─────────────────────────────────────
+  # Existing behavior: run on manual dispatch OR when a task-list
+  # checkbox in the sticky lockfile-check comment flips from [ ] to [x].
  fix:
-    # Run on manual dispatch OR when a task-list checkbox in the sticky
-    # lockfile-check comment flips from `[ ]` to `[x]`.
    if: |
      github.event_name == 'workflow_dispatch' ||
      (github.event_name == 'issue_comment'
@@ -1,3 +1,4 @@
+.DS_Store
 /venv/
 /_pycache/
 *.pyc*
@@ -68,3 +69,4 @@ mini-swe-agent/
 .nix-stamps/
 result
 website/static/api/skills-index.json
+models-dev-upstream/
@@ -5,78 +5,61 @@ Instructions for AI coding assistants and developers working on the hermes-agent
 ## Development Environment

 ```bash
-source venv/bin/activate  # ALWAYS activate before running Python
+# Prefer .venv; fall back to venv if that's what your checkout has.
+source .venv/bin/activate   # or: source venv/bin/activate
 ```

+`scripts/run_tests.sh` probes `.venv` first, then `venv`, then
+`$HOME/.hermes/hermes-agent/venv` (for worktrees that share a venv with the
+main checkout).
+
 ## Project Structure

+File counts shift constantly — don't treat the tree below as exhaustive.
+The canonical source is the filesystem. The notes call out the load-bearing
+entry points you'll actually edit.
+
 ```
 hermes-agent/
-├── run_agent.py          # AIAgent class — core conversation loop
+├── run_agent.py          # AIAgent class — core conversation loop (~12k LOC)
 ├── model_tools.py        # Tool orchestration, discover_builtin_tools(), handle_function_call()
 ├── toolsets.py           # Toolset definitions, _HERMES_CORE_TOOLS list
-├── cli.py                # HermesCLI class — interactive CLI orchestrator
+├── cli.py                # HermesCLI class — interactive CLI orchestrator (~11k LOC)
 ├── hermes_state.py       # SessionDB — SQLite session store (FTS5 search)
-├── agent/                # Agent internals
-│   ├── prompt_builder.py     # System prompt assembly
-│   ├── context_compressor.py # Auto context compression
-│   ├── prompt_caching.py     # Anthropic prompt caching
-│   ├── auxiliary_client.py   # Auxiliary LLM client (vision, summarization)
-│   ├── model_metadata.py     # Model context lengths, token estimation
-│   ├── models_dev.py         # models.dev registry integration (provider-aware context)
-│   ├── display.py            # KawaiiSpinner, tool preview formatting
-│   ├── skill_commands.py     # Skill slash commands (shared CLI/gateway)
-│   └── trajectory.py         # Trajectory saving helpers
-├── hermes_cli/           # CLI subcommands and setup
-│   ├── main.py           # Entry point — all `hermes` subcommands
-│   ├── config.py         # DEFAULT_CONFIG, OPTIONAL_ENV_VARS, migration
-│   ├── commands.py       # Slash command definitions + SlashCommandCompleter
-│   ├── callbacks.py      # Terminal callbacks (clarify, sudo, approval)
-│   ├── setup.py          # Interactive setup wizard
-│   ├── skin_engine.py    # Skin/theme engine — CLI visual customization
-│   ├── skills_config.py  # `hermes skills` — enable/disable skills per platform
-│   ├── tools_config.py   # `hermes tools` — enable/disable tools per platform
-│   ├── skills_hub.py     # `/skills` slash command (search, browse, install)
-│   ├── models.py         # Model catalog, provider model lists
-│   ├── model_switch.py   # Shared /model switch pipeline (CLI + gateway)
-│   └── auth.py           # Provider credential resolution
-├── tools/                # Tool implementations (one file per tool)
-│   ├── registry.py       # Central tool registry (schemas, handlers, dispatch)
-│   ├── approval.py       # Dangerous command detection
-│   ├── terminal_tool.py  # Terminal orchestration
-│   ├── process_registry.py # Background process management
-│   ├── file_tools.py     # File read/write/search/patch
-│   ├── web_tools.py      # Web search/extract (Parallel + Firecrawl)
-│   ├── browser_tool.py   # Browserbase browser automation
-│   ├── code_execution_tool.py # execute_code sandbox
-│   ├── delegate_tool.py  # Subagent delegation
-│   ├── mcp_tool.py       # MCP client (~1050 lines)
+├── hermes_constants.py   # get_hermes_home(), display_hermes_home() — profile-aware paths
+├── hermes_logging.py     # setup_logging() — agent.log / errors.log / gateway.log (profile-aware)
+├── batch_runner.py       # Parallel batch processing
+├── agent/                # Agent internals (provider adapters, memory, caching, compression, etc.)
+├── hermes_cli/           # CLI subcommands, setup wizard, plugins loader, skin engine
+├── tools/                # Tool implementations — auto-discovered via tools/registry.py
 │   └── environments/     # Terminal backends (local, docker, ssh, modal, daytona, singularity)
-├── gateway/              # Messaging platform gateway
-│   ├── run.py            # Main loop, slash commands, message dispatch
-│   ├── session.py        # SessionStore — conversation persistence
-│   └── platforms/        # Adapters: telegram, discord, slack, whatsapp, homeassistant, signal, qqbot
+├── gateway/              # Messaging gateway — run.py + session.py + platforms/
+│   ├── platforms/        # Adapter per platform (telegram, discord, slack, whatsapp,
+│   │                     #   homeassistant, signal, matrix, mattermost, email, sms,
+│   │                     #   dingtalk, wecom, weixin, feishu, qqbot, bluebubbles,
+│   │                     #   webhook, api_server, ...). See ADDING_A_PLATFORM.md.
+│   └── builtin_hooks/    # Extension point for always-registered gateway hooks (none shipped)
+├── plugins/              # Plugin system (see "Plugins" section below)
+│   ├── memory/           # Memory-provider plugins (honcho, mem0, supermemory, ...)
+│   ├── context_engine/   # Context-engine plugins
+│   └── <others>/         # Dashboard, image-gen, disk-cleanup, examples, ...
+├── optional-skills/      # Heavier/niche skills shipped but NOT active by default
+├── skills/               # Built-in skills bundled with the repo
 ├── ui-tui/               # Ink (React) terminal UI — `hermes --tui`
-│   ├── src/entry.tsx        # TTY gate + render()
-│   ├── src/app.tsx          # Main state machine and UI
-│   ├── src/gatewayClient.ts # Child process + JSON-RPC bridge
-│   ├── src/app/             # Decomposed app logic (event handler, slash handler, stores, hooks)
-│   ├── src/components/      # Ink components (branding, markdown, prompts, pickers, etc.)
-│   ├── src/hooks/           # useCompletion, useInputHistory, useQueue, useVirtualHistory
-│   └── src/lib/             # Pure helpers (history, osc52, text, rpc, messages)
+│   └── src/              # entry.tsx, app.tsx, gatewayClient.ts + app/components/hooks/lib
 ├── tui_gateway/          # Python JSON-RPC backend for the TUI
-│   ├── entry.py             # stdio entrypoint
-│   ├── server.py            # RPC handlers and session logic
-│   ├── render.py            # Optional rich/ANSI bridge
-│   └── slash_worker.py      # Persistent HermesCLI subprocess for slash commands
 ├── acp_adapter/          # ACP server (VS Code / Zed / JetBrains integration)
-├── cron/                 # Scheduler (jobs.py, scheduler.py)
+├── cron/                 # Scheduler — jobs.py, scheduler.py
 ├── environments/         # RL training environments (Atropos)
-├── tests/                # Pytest suite (~3000 tests)
-└── batch_runner.py       # Parallel batch processing
+├── scripts/              # run_tests.sh, release.py, auxiliary scripts
+├── website/              # Docusaurus docs site
+└── tests/                # Pytest suite (~15k tests across ~700 files as of Apr 2026)
 ```

-**User config:** `~/.hermes/config.yaml` (settings), `~/.hermes/.env` (API keys)
+**User config:** `~/.hermes/config.yaml` (settings), `~/.hermes/.env` (API keys only).
+**Logs:** `~/.hermes/logs/` — `agent.log` (INFO+), `errors.log` (WARNING+),
+`gateway.log` when running the gateway. Profile-aware via `get_hermes_home()`.
+Browse with `hermes logs [--follow] [--level ...] [--session ...]`.

 ## File Dependency Chain

@@ -94,20 +77,30 @@ run_agent.py, cli.py, batch_runner.py, environments/

 ## AIAgent Class (run_agent.py)

+The real `AIAgent.__init__` takes ~60 parameters (credentials, routing, callbacks,
+session context, budget, credential pool, etc.). The signature below is the
+minimum subset you'll usually touch — read `run_agent.py` for the full list.
+
 ```python
 class AIAgent:
    def __init__(self,
-        model: str = "anthropic/claude-opus-4.6",
-        max_iterations: int = 90,
+        base_url: str = None,
+        api_key: str = None,
+        provider: str = None,
+        api_mode: str = None,              # "chat_completions" | "codex_responses" | ...
+        model: str = "",                   # empty → resolved from config/provider later
+        max_iterations: int = 90,          # tool-calling iterations (shared with subagents)
        enabled_toolsets: list = None,
        disabled_toolsets: list = None,
        quiet_mode: bool = False,
        save_trajectories: bool = False,
-        platform: str = None,           # "cli", "telegram", etc.
+        platform: str = None,              # "cli", "telegram", etc.
        session_id: str = None,
        skip_context_files: bool = False,
        skip_memory: bool = False,
-        # ... plus provider, api_mode, callbacks, routing params
+        credential_pool=None,
+        # ... plus callbacks, thread/user/chat IDs, iteration_budget, fallback_model,
+        # checkpoints config, prefill_messages, service_tier, reasoning_config, etc.
    ): ...

    def chat(self, message: str) -> str:
@@ -120,10 +113,13 @@ class AIAgent:

 ### Agent Loop

-The core loop is inside `run_conversation()` — entirely synchronous:
+The core loop is inside `run_conversation()` — entirely synchronous, with
+interrupt checks, budget tracking, and a one-turn grace call:

 ```python
-while api_call_count < self.max_iterations and self.iteration_budget.remaining > 0:
+while (api_call_count < self.max_iterations and self.iteration_budget.remaining > 0) \
+        or self._budget_grace_call:
+    if self._interrupt_requested: break
    response = client.chat.completions.create(model=model, messages=messages, tools=tool_schemas)
    if response.tool_calls:
        for tool_call in response.tool_calls:
@@ -134,7 +130,8 @@ while api_call_count < self.max_iterations and self.iteration_budget.remaining >
        return response.content
 ```

-Messages follow OpenAI format: `{"role": "system/user/assistant/tool", ...}`. Reasoning content is stored in `assistant_msg["reasoning"]`.
+Messages follow OpenAI format: `{"role": "system/user/assistant/tool", ...}`.
+Reasoning content is stored in `assistant_msg["reasoning"]`.

 ---

@@ -243,6 +240,19 @@ npm run fmt       # prettier
 npm test          # vitest
 ```

+### TUI in the Dashboard (`hermes dashboard` → `/chat`)
+
+The dashboard embeds the real `hermes --tui` — **not** a rewrite.  See `hermes_cli/pty_bridge.py` + the `@app.websocket("/api/pty")` endpoint in `hermes_cli/web_server.py`.
+
+- Browser loads `web/src/pages/ChatPage.tsx`, which mounts xterm.js's `Terminal` with the WebGL renderer, `@xterm/addon-fit` for container-driven resize, and `@xterm/addon-unicode11` for modern wide-character widths.
+- `/api/pty?token=…` upgrades to a WebSocket; auth uses the same ephemeral `_SESSION_TOKEN` as REST, via query param (browsers can't set `Authorization` on WS upgrade).
+- The server spawns whatever `hermes --tui` would spawn, through `ptyprocess` (POSIX PTY — WSL works, native Windows does not).
+- Frames: raw PTY bytes each direction; resize via `\x1b[RESIZE:<cols>;<rows>]` intercepted on the server and applied with `TIOCSWINSZ`.
+
+**Do not re-implement the primary chat experience in React.** The main transcript, composer/input flow (including slash-command behavior), and PTY-backed terminal belong to the embedded `hermes --tui` — anything new you add to Ink shows up in the dashboard automatically. If you find yourself rebuilding the transcript or composer for the dashboard, stop and extend Ink instead.
+
+**Structured React UI around the TUI is allowed when it is not a second chat surface.** Sidebar widgets, inspectors, summaries, status panels, and similar supporting views (e.g. `ChatSidebar`, `ModelPickerDialog`, `ToolCall`) are fine when they complement the embedded TUI rather than replacing the transcript / composer / terminal. Keep their state independent of the PTY child's session and surface their failures non-destructively so the terminal pane keeps working unimpaired.
+
 ---

 ## Adding New Tools
@@ -280,7 +290,7 @@ The registry handles schema collection, dispatch, availability checking, and err

 **State files**: If a tool stores persistent state (caches, logs, checkpoints), use `get_hermes_home()` for the base directory — never `Path.home() / ".hermes"`. This ensures each profile gets its own state.

-**Agent-level tools** (todo, memory): intercepted by `run_agent.py` before `handle_function_call()`. See `todo_tool.py` for the pattern.
+**Agent-level tools** (todo, memory): intercepted by `run_agent.py` before `handle_function_call()`. See `tools/todo_tool.py` for the pattern.

 ---

@@ -288,9 +298,13 @@ The registry handles schema collection, dispatch, availability checking, and err

 ### config.yaml options:
 1. Add to `DEFAULT_CONFIG` in `hermes_cli/config.py`
-2. Bump `_config_version` (currently 5) to trigger migration for existing users
+2. Bump `_config_version` (check the current value at the top of `DEFAULT_CONFIG`)
+   ONLY if you need to actively migrate/transform existing user config
+   (renaming keys, changing structure). Adding a new key to an existing
+   section is handled automatically by the deep-merge and does NOT require
+   a version bump.

-### .env variables:
+### .env variables (SECRETS ONLY — API keys, tokens, passwords):
 1. Add to `OPTIONAL_ENV_VARS` in `hermes_cli/config.py` with metadata:
 ```python
 "NEW_API_KEY": {
@@ -302,13 +316,29 @@ The registry handles schema collection, dispatch, availability checking, and err
 },
 ```

-### Config loaders (two separate systems):
+Non-secret settings (timeouts, thresholds, feature flags, paths, display
+preferences) belong in `config.yaml`, not `.env`. If internal code needs an
+env var mirror for backward compatibility, bridge it from `config.yaml` to
+the env var in code (see `gateway_timeout`, `terminal.cwd` → `TERMINAL_CWD`).
+
+### Config loaders (three paths — know which one you're in):

 | Loader | Used by | Location |
 |--------|---------|----------|
-| `load_cli_config()` | CLI mode | `cli.py` |
-| `load_config()` | `hermes tools`, `hermes setup` | `hermes_cli/config.py` |
-| Direct YAML load | Gateway | `gateway/run.py` |
+| `load_cli_config()` | CLI mode | `cli.py` — merges CLI-specific defaults + user YAML |
+| `load_config()` | `hermes tools`, `hermes setup`, most CLI subcommands | `hermes_cli/config.py` — merges `DEFAULT_CONFIG` + user YAML |
+| Direct YAML load | Gateway runtime | `gateway/run.py` + `gateway/config.py` — reads user YAML raw |
+
+If you add a new key and the CLI sees it but the gateway doesn't (or vice
+versa), you're on the wrong loader. Check `DEFAULT_CONFIG` coverage.
+
+### Working directory:
+- **CLI** — uses the process's current directory (`os.getcwd()`).
+- **Messaging** — uses `terminal.cwd` from `config.yaml`. The gateway bridges this
+  to the `TERMINAL_CWD` env var for child tools. **`MESSAGING_CWD` has been
+  removed** — the config loader prints a deprecation warning if it's set in
+  `.env`. Same for `TERMINAL_CWD` in `.env`; the canonical setting is
+  `terminal.cwd` in `config.yaml`.

 ---

@@ -401,7 +431,95 @@ Activate with `/skin cyberpunk` or `display.skin: cyberpunk` in config.yaml.

 ---

+## Plugins
+
+Hermes has two plugin surfaces. Both live under `plugins/` in the repo so
+repo-shipped plugins can be discovered alongside user-installed ones in
+`~/.hermes/plugins/` and pip-installed entry points.
+
+### General plugins (`hermes_cli/plugins.py` + `plugins/<name>/`)
+
+`PluginManager` discovers plugins from `~/.hermes/plugins/`, `./.hermes/plugins/`,
+and pip entry points. Each plugin exposes a `register(ctx)` function that
+can:
+
+- Register Python-callback lifecycle hooks:
+  `pre_tool_call`, `post_tool_call`, `pre_llm_call`, `post_llm_call`,
+  `on_session_start`, `on_session_end`
+- Register new tools via `ctx.register_tool(...)`
+- Register CLI subcommands via `ctx.register_cli_command(...)` — the
+  plugin's argparse tree is wired into `hermes` at startup so
+  `hermes <pluginname> <subcmd>` works with no change to `main.py`
+
+Hooks are invoked from `model_tools.py` (pre/post tool) and `run_agent.py`
+(lifecycle). **Discovery timing pitfall:** `discover_plugins()` only runs
+as a side effect of importing `model_tools.py`. Code paths that read plugin
+state without importing `model_tools.py` first must call `discover_plugins()`
+explicitly (it's idempotent).
+
+### Memory-provider plugins (`plugins/memory/<name>/`)
+
+Separate discovery system for pluggable memory backends. Current built-in
+providers include **honcho, mem0, supermemory, byterover, hindsight,
+holographic, openviking, retaindb**.
+
+Each provider implements the `MemoryProvider` ABC (see `agent/memory_provider.py`)
+and is orchestrated by `agent/memory_manager.py`. Lifecycle hooks include
+`sync_turn(turn_messages)`, `prefetch(query)`, `shutdown()`, and optional
+`post_setup(hermes_home, config)` for setup-wizard integration.
+
+**CLI commands via `plugins/memory/<name>/cli.py`:** if a memory plugin
+defines `register_cli(subparser)`, `discover_plugin_cli_commands()` finds
+it at argparse setup time and wires it into `hermes <plugin>`. The
+framework only exposes CLI commands for the **currently active** memory
+provider (read from `memory.provider` in config.yaml), so disabled
+providers don't clutter `hermes --help`.
+
+**Rule (Teknium, May 2026):** plugins MUST NOT modify core files
+(`run_agent.py`, `cli.py`, `gateway/run.py`, `hermes_cli/main.py`, etc.).
+If a plugin needs a capability the framework doesn't expose, expand the
+generic plugin surface (new hook, new ctx method) — never hardcode
+plugin-specific logic into core. PR #5295 removed 95 lines of hardcoded
+honcho argparse from `main.py` for exactly this reason.
+
+### Dashboard / context-engine / image-gen plugin directories
+
+`plugins/context_engine/`, `plugins/image_gen/`, `plugins/example-dashboard/`,
+etc. follow the same pattern (ABC + orchestrator + per-plugin directory).
+Context engines plug into `agent/context_engine.py`; image-gen providers
+into `agent/image_gen_provider.py`.
+
+---
+
+## Skills
+
+Two parallel surfaces:
+
+- **`skills/`** — built-in skills shipped and loadable by default.
+  Organized by category directories (e.g. `skills/github/`, `skills/mlops/`).
+- **`optional-skills/`** — heavier or niche skills shipped with the repo but
+  NOT active by default. Installed explicitly via
+  `hermes skills install official/<category>/<skill>`. Adapter lives in
+  `tools/skills_hub.py` (`OptionalSkillSource`). Categories include
+  `autonomous-ai-agents`, `blockchain`, `communication`, `creative`,
+  `devops`, `email`, `health`, `mcp`, `migration`, `mlops`, `productivity`,
+  `research`, `security`, `web-development`.
+
+When reviewing skill PRs, check which directory they target — heavy-dep or
+niche skills belong in `optional-skills/`.
+
+### SKILL.md frontmatter
+
+Standard fields: `name`, `description`, `version`, `platforms`
+(OS-gating list: `[macos]`, `[linux, macos]`, ...),
+`metadata.hermes.tags`, `metadata.hermes.category`,
+`metadata.hermes.config` (config.yaml settings the skill needs — stored
+under `skills.config.<key>`, prompted during setup, injected at load time).
+
+---
+
 ## Important Policies
+
 ### Prompt Caching Must Not Break

 Hermes-Agent ensures caching remains valid throughout a conversation. **Do NOT implement changes that would:**
@@ -411,9 +529,10 @@ Hermes-Agent ensures caching remains valid throughout a conversation. **Do NOT i

 Cache-breaking forces dramatically higher costs. The ONLY time we alter context is during context compression.

-### Working Directory Behavior
- **CLI**: Uses current directory (`.` → `os.getcwd()`)
- **Messaging**: Uses `MESSAGING_CWD` env var (default: home directory)
+Slash commands that mutate system-prompt state (skills, tools, memory, etc.)
+must be **cache-aware**: default to deferred invalidation (change takes
+effect next session), with an opt-in `--now` flag for immediate
+invalidation. See `/skills install --now` for the canonical pattern.

 ### Background Process Notifications (Gateway)

@@ -435,7 +554,7 @@ Hermes supports **profiles** — multiple fully isolated instances, each with it
 `HERMES_HOME` directory (config, API keys, memory, sessions, skills, gateway, etc.).

 The core mechanism: `_apply_profile_override()` in `hermes_cli/main.py` sets
-`HERMES_HOME` before any module imports. All 119+ references to `get_hermes_home()`
+`HERMES_HOME` before any module imports. All `get_hermes_home()` references
 automatically scope to the active profile.

 ### Rules for profile-safe code
@@ -492,8 +611,12 @@ Use `get_hermes_home()` from `hermes_constants` for code paths. Use `display_her
 for user-facing print/log messages. Hardcoding `~/.hermes` breaks profiles — each profile
 has its own `HERMES_HOME` directory. This was the source of 5 bugs fixed in PR #3575.

-### DO NOT use `simple_term_menu` for interactive menus
-Rendering bugs in tmux/iTerm2 — ghosting on scroll. Use `curses` (stdlib) instead. See `hermes_cli/tools_config.py` for the pattern.
+### DO NOT introduce new `simple_term_menu` usage
+Existing call sites in `hermes_cli/main.py` remain for legacy fallback only;
+the preferred UI is curses (stdlib) because `simple_term_menu` has
+ghost-duplication rendering bugs in tmux/iTerm2 with arrow keys. New
+interactive menus must use `hermes_cli/curses_ui.py` — see
+`hermes_cli/tools_config.py` for the canonical pattern.

 ### DO NOT use `\033[K` (ANSI erase-to-EOL) in spinner/display code
 Leaks as literal `?[K` text under `prompt_toolkit`'s `patch_stdout`. Use space-padding: `f"\r{line}{' ' * pad}"`.
@@ -504,6 +627,30 @@ Leaks as literal `?[K` text under `prompt_toolkit`'s `patch_stdout`. Use space-p
 ### DO NOT hardcode cross-tool references in schema descriptions
 Tool schema descriptions must not mention tools from other toolsets by name (e.g., `browser_navigate` saying "prefer web_search"). Those tools may be unavailable (missing API keys, disabled toolset), causing the model to hallucinate calls to non-existent tools. If a cross-reference is needed, add it dynamically in `get_tool_definitions()` in `model_tools.py` — see the `browser_navigate` / `execute_code` post-processing blocks for the pattern.

+### The gateway has TWO message guards — both must bypass approval/control commands
+When an agent is running, messages pass through two sequential guards:
+(1) **base adapter** (`gateway/platforms/base.py`) queues messages in
+`_pending_messages` when `session_key in self._active_sessions`, and
+(2) **gateway runner** (`gateway/run.py`) intercepts `/stop`, `/new`,
+`/queue`, `/status`, `/approve`, `/deny` before they reach
+`running_agent.interrupt()`. Any new command that must reach the runner
+while the agent is blocked (e.g. approval prompts) MUST bypass BOTH
+guards and be dispatched inline, not via `_process_message_background()`
+(which races session lifecycle).
+
+### Squash merges from stale branches silently revert recent fixes
+Before squash-merging a PR, ensure the branch is up to date with `main`
+(`git fetch origin main && git reset --hard origin/main` in the worktree,
+then re-apply the PR's commits). A stale branch's version of an unrelated
+file will silently overwrite recent fixes on main when squashed. Verify
+with `git diff HEAD~1..HEAD` after merging — unexpected deletions are a
+red flag.
+
+### Don't wire in dead code without E2E validation
+Unused code that was never shipped was dead for a reason. Before wiring an
+unused module into a live code path, E2E test the real resolution chain
+with actual imports (not mocks) against a temp `HERMES_HOME`.
+
 ### Tests must not write to `~/.hermes/`
 The `_isolate_hermes_home` autouse fixture in `tests/conftest.py` redirects `HERMES_HOME` to a temp dir. Never hardcode `~/.hermes/` paths in tests.

@@ -559,7 +706,7 @@ If you can't use the wrapper (e.g. on Windows or inside an IDE that shells
 pytest directly), at minimum activate the venv and pass `-n 4`:

 ```bash
-source venv/bin/activate
+source .venv/bin/activate   # or: source venv/bin/activate
 python -m pytest tests/ -q -n 4
 ```

@@ -9,7 +9,7 @@ Thank you for contributing to Hermes Agent! This guide covers everything you nee
 We value contributions in this order:

 1. **Bug fixes** — crashes, incorrect behavior, data loss. Always top priority.
-2. **Cross-platform compatibility** — Windows, macOS, different Linux distros, different terminal emulators. We want Hermes to work everywhere.
+2. **Cross-platform compatibility** — macOS, different Linux distros, and WSL2 on Windows. We want Hermes to work everywhere.
 3. **Security hardening** — shell injection, prompt injection, path traversal, privilege escalation. See [Security](#security-considerations).
 4. **Performance and robustness** — retry logic, error handling, graceful degradation.
 5. **New skills** — but only broadly useful ones. See [Should it be a Skill or a Tool?](#should-it-be-a-skill-or-a-tool)
@@ -55,10 +55,10 @@ If your skill is specialized, community-contributed, or niche, it's better suite

 | Requirement | Notes |
 |-------------|-------|
-| **Git** | With `--recurse-submodules` support |
+| **Git** | With `--recurse-submodules` support, and the `git-lfs` extension installed |
 | **Python 3.11+** | uv will install it if missing |
 | **uv** | Fast Python package manager ([install](https://docs.astral.sh/uv/)) |
-| **Node.js 18+** | Optional — needed for browser tools and WhatsApp bridge |
+| **Node.js 20+** | Optional — needed for browser tools and WhatsApp bridge (matches root `package.json` engines) |

 ### Clone and install

@@ -88,7 +88,7 @@ cp cli-config.yaml.example ~/.hermes/config.yaml
 touch ~/.hermes/.env

 # Add at minimum an LLM provider key:
-echo 'OPENROUTER_API_KEY=sk-or-v1-your-key' >> ~/.hermes/.env
+echo "OPENROUTER_API_KEY=***" >> ~/.hermes/.env
 ```

 ### Run
@@ -494,7 +494,7 @@ branding:
  agent_name: "My Agent"
  welcome: "Welcome message"
  response_label: " ⚔ Agent "
-  prompt_symbol: "⚔ ❯ "
+  prompt_symbol: "⚔"

 tool_prefix: "╎"             # Tool output line prefix
 ```
@@ -515,7 +515,7 @@ See `hermes_cli/skin_engine.py` for the full schema and existing skins as exampl

 ## Cross-Platform Compatibility

-Hermes runs on Linux, macOS, and Windows. When writing code that touches the OS:
+Hermes runs on Linux, macOS, and WSL2 on Windows. When writing code that touches the OS:

 ### Critical rules

@@ -597,7 +597,7 @@ refactor/description   # Code restructuring

 1. **Run tests**: `pytest tests/ -v`
 2. **Test manually**: Run `hermes` and exercise the code path you changed
-3. **Check cross-platform impact**: If you touch file I/O, process management, or terminal handling, consider Windows and macOS
+3. **Check cross-platform impact**: If you touch file I/O, process management, or terminal handling, consider macOS, Linux, and WSL2
 4. **Keep PRs focused**: One logical change per PR. Don't mix a bug fix with a refactor with a new feature.

 ### PR description
@@ -10,9 +10,11 @@ ENV PYTHONUNBUFFERED=1
 ENV PLAYWRIGHT_BROWSERS_PATH=/opt/hermes/.playwright

 # Install system dependencies in one layer, clear APT cache
+# tini reaps orphaned zombie processes (MCP stdio subprocesses, git, bun, etc.)
+# that would otherwise accumulate when hermes runs as PID 1. See #15012.
 RUN apt-get update && \
    apt-get install -y --no-install-recommends \
-        build-essential nodejs npm python3 ripgrep ffmpeg gcc python3-dev libffi-dev procps git && \
+    build-essential nodejs npm python3 ripgrep ffmpeg gcc python3-dev libffi-dev procps git openssh-client docker-cli tini && \
    rm -rf /var/lib/apt/lists/*

 # Non-root user for runtime; UID can be overridden via HERMES_UID at runtime
@@ -28,27 +30,44 @@ WORKDIR /opt/hermes
 # unless the lockfiles themselves change.
 COPY package.json package-lock.json ./
 COPY web/package.json web/package-lock.json web/
+COPY ui-tui/package.json ui-tui/package-lock.json ui-tui/
+COPY ui-tui/packages/hermes-ink/package.json ui-tui/packages/hermes-ink/package-lock.json ui-tui/packages/hermes-ink/

 RUN npm install --prefer-offline --no-audit && \
    npx playwright install --with-deps chromium --only-shell && \
    (cd web && npm install --prefer-offline --no-audit) && \
+    (cd ui-tui && npm install --prefer-offline --no-audit) && \
    npm cache clean --force

 # ---------- Source code ----------
 # .dockerignore excludes node_modules, so the installs above survive.
 COPY --chown=hermes:hermes . .

-# Build web dashboard (Vite outputs to hermes_cli/web_dist/)
-RUN cd web && npm run build
+# Build browser dashboard and terminal UI assets.
+RUN cd web && npm run build && \
+    cd ../ui-tui && npm run build && \
+    rm -rf node_modules/@hermes/ink && \
+    rm -rf packages/hermes-ink/node_modules && \
+    cp -R packages/hermes-ink node_modules/@hermes/ink && \
+    npm install --omit=dev --prefer-offline --no-audit --prefix node_modules/@hermes/ink && \
+    rm -rf node_modules/@hermes/ink/node_modules/react && \
+    node --input-type=module -e "await import('@hermes/ink')"
+
+# ---------- Permissions ----------
+# Make install dir world-readable so any HERMES_UID can read it at runtime.
+# The venv needs to be traversable too.
+USER root
+RUN chmod -R a+rX /opt/hermes
+# Start as root so the entrypoint can usermod/groupmod + gosu.
+# If HERMES_UID is unset, the entrypoint drops to the default hermes user (10000).

 # ---------- Python virtualenv ----------
-RUN chown hermes:hermes /opt/hermes
-USER hermes
 RUN uv venv && \
    uv pip install --no-cache-dir -e ".[all]"

 # ---------- Runtime ----------
 ENV HERMES_WEB_DIST=/opt/hermes/hermes_cli/web_dist
 ENV HERMES_HOME=/opt/data
+ENV PATH="/opt/data/.local/bin:${PATH}"
 VOLUME [ "/opt/data" ]
-ENTRYPOINT [ "/opt/hermes/docker/entrypoint.sh" ]
+ENTRYPOINT [ "/usr/bin/tini", "-g", "--", "/opt/hermes/docker/entrypoint.sh" ]
@@ -13,7 +13,7 @@

 **The self-improving AI agent built by [Nous Research](https://nousresearch.com).** It's the only agent with a built-in learning loop — it creates skills from experience, improves them during use, nudges itself to persist knowledge, searches its own past conversations, and builds a deepening model of who you are across sessions. Run it on a $5 VPS, a GPU cluster, or serverless infrastructure that costs nearly nothing when idle. It's not tied to your laptop — talk to it from Telegram while it works on a cloud VM.

-Use any model you want — [Nous Portal](https://portal.nousresearch.com), [OpenRouter](https://openrouter.ai) (200+ models), [Volcengine](https://www.volcengine.com/product/ark), [BytePlus](https://www.byteplus.com/en/product/modelark), [NVIDIA NIM](https://build.nvidia.com) (Nemotron), [Xiaomi MiMo](https://platform.xiaomimimo.com), [z.ai/GLM](https://z.ai), [Kimi/Moonshot](https://platform.moonshot.ai), [MiniMax](https://www.minimax.io), [Hugging Face](https://huggingface.co), OpenAI, or your own endpoint. Switch with `hermes model` — no code changes, no lock-in.
+Use any model you want — [Nous Portal](https://portal.nousresearch.com), [OpenRouter](https://openrouter.ai) (200+ models), [NVIDIA NIM](https://build.nvidia.com) (Nemotron), [Xiaomi MiMo](https://platform.xiaomimimo.com), [z.ai/GLM](https://z.ai), [Kimi/Moonshot](https://platform.moonshot.ai), [MiniMax](https://www.minimax.io), [Hugging Face](https://huggingface.co), OpenAI, or your own endpoint. Switch with `hermes model` — no code changes, no lock-in.

 <table>
 <tr><td><b>A real terminal interface</b></td><td>Full TUI with multiline editing, slash-command autocomplete, conversation history, interrupt-and-redirect, and streaming tool output.</td></tr>
@@ -76,7 +76,7 @@ Hermes has two entry points: start the terminal UI with `hermes`, or run the gat
 | Set a personality | `/personality [name]` | `/personality [name]` |
 | Retry or undo the last turn | `/retry`, `/undo` | `/retry`, `/undo` |
 | Compress context / check usage | `/compress`, `/usage`, `/insights [--days N]` | `/compress`, `/usage`, `/insights [days]` |
-| Browse skills | `/skills` or `/<skill-name>` | `/skills` or `/<skill-name>` |
+| Browse skills | `/skills` or `/<skill-name>` | `/<skill-name>` |
 | Interrupt current work | `Ctrl+C` or send a new message | `/stop` or send a new message |
 | Platform-specific status | `/platforms` | `/status`, `/sethome` |

@@ -157,14 +157,10 @@ curl -LsSf https://astral.sh/uv/install.sh | sh
 uv venv venv --python 3.11
 source venv/bin/activate
 uv pip install -e ".[all,dev]"
-python -m pytest tests/ -q
+scripts/run_tests.sh
 ```

-> **RL Training (optional):** To work on the RL/Tinker-Atropos integration:
-> ```bash
-> git submodule update --init tinker-atropos
-> uv pip install -e "./tinker-atropos"
-> ```
+> **RL Training (optional):** The RL/Atropos integration (`environments/`) ships via the `atroposlib` and `tinker` dependencies pulled in by `.[all,dev]` — no submodule setup required.

 ---

@@ -173,7 +169,6 @@ python -m pytest tests/ -q
 - 💬 [Discord](https://discord.gg/NousResearch)
 - 📚 [Skills Hub](https://agentskills.io)
 - 🐛 [Issues](https://github.com/NousResearch/hermes-agent/issues)
- 💡 [Discussions](https://github.com/NousResearch/hermes-agent/discussions)
 - 🔌 [HermesClaw](https://github.com/AaronWong1999/hermesclaw) — Community WeChat bridge: Run Hermes Agent and OpenClaw on the same WeChat account.

 ---
@@ -0,0 +1,453 @@
+# Hermes Agent v0.11.0 (v2026.4.23)
+
+**Release Date:** April 23, 2026
+**Since v0.9.0:** 1,556 commits · 761 merged PRs · 1,314 files changed · 224,174 insertions · 29 community contributors (290 including co-authors)
+
+> The Interface release — a full React/Ink rewrite of the interactive CLI, a pluggable transport architecture underneath every provider, native AWS Bedrock support, five new inference paths, a 17th messaging platform (QQBot), a dramatically expanded plugin surface, and GPT-5.5 via Codex OAuth.
+
+This release also folds in all the highlights deferred from v0.10.0 (which shipped only the Nous Tool Gateway) — so it covers roughly two weeks of work across the whole stack.
+
+---
+
+## ✨ Highlights
+
+- **New Ink-based TUI** — `hermes --tui` is now a full React/Ink rewrite of the interactive CLI, with a Python JSON-RPC backend (`tui_gateway`). Sticky composer, live streaming with OSC-52 clipboard support, stable picker keys, status bar with per-turn stopwatch and git branch, `/clear` confirm, light-theme preset, and a subagent spawn observability overlay. ~310 commits to `ui-tui/` + `tui_gateway/`. (@OutThisLife + Teknium)
+
+- **Transport ABC + Native AWS Bedrock** — Format conversion and HTTP transport were extracted from `run_agent.py` into a pluggable `agent/transports/` layer. `AnthropicTransport`, `ChatCompletionsTransport`, `ResponsesApiTransport`, and `BedrockTransport` each own their own format conversion and API shape. Native AWS Bedrock support via the Converse API ships on top of the new abstraction. ([#10549](https://github.com/NousResearch/hermes-agent/pull/10549), [#13347](https://github.com/NousResearch/hermes-agent/pull/13347), [#13366](https://github.com/NousResearch/hermes-agent/pull/13366), [#13430](https://github.com/NousResearch/hermes-agent/pull/13430), [#13805](https://github.com/NousResearch/hermes-agent/pull/13805), [#13814](https://github.com/NousResearch/hermes-agent/pull/13814) — @kshitijk4poor + Teknium)
+
+- **Five new inference paths** — Native NVIDIA NIM ([#11774](https://github.com/NousResearch/hermes-agent/pull/11774)), Arcee AI ([#9276](https://github.com/NousResearch/hermes-agent/pull/9276)), Step Plan ([#13893](https://github.com/NousResearch/hermes-agent/pull/13893)), Google Gemini CLI OAuth ([#11270](https://github.com/NousResearch/hermes-agent/pull/11270)), and Vercel ai-gateway with pricing + dynamic discovery ([#13223](https://github.com/NousResearch/hermes-agent/pull/13223) — @jerilynzheng). Plus Gemini routed through the native AI Studio API for better performance ([#12674](https://github.com/NousResearch/hermes-agent/pull/12674)).
+
+- **GPT-5.5 over Codex OAuth** — OpenAI's new GPT-5.5 reasoning model is now available through your ChatGPT Codex OAuth, with live model discovery wired into the model picker so new OpenAI releases show up without catalog updates. ([#14720](https://github.com/NousResearch/hermes-agent/pull/14720))
+
+- **QQBot — 17th supported platform** — Native QQBot adapter via QQ Official API v2, with QR scan-to-configure setup wizard, streaming cursor, emoji reactions, and DM/group policy gating that matches WeCom/Weixin parity. ([#9364](https://github.com/NousResearch/hermes-agent/pull/9364), [#11831](https://github.com/NousResearch/hermes-agent/pull/11831))
+
+- **Plugin surface expanded** — Plugins can now register slash commands (`register_command`), dispatch tools directly (`dispatch_tool`), block tool execution from hooks (`pre_tool_call` can veto), rewrite tool results (`transform_tool_result`), transform terminal output (`transform_terminal_output`), ship image_gen backends, and add custom dashboard tabs. The bundled disk-cleanup plugin is opt-in by default as a reference implementation. ([#9377](https://github.com/NousResearch/hermes-agent/pull/9377), [#10626](https://github.com/NousResearch/hermes-agent/pull/10626), [#10763](https://github.com/NousResearch/hermes-agent/pull/10763), [#10951](https://github.com/NousResearch/hermes-agent/pull/10951), [#12929](https://github.com/NousResearch/hermes-agent/pull/12929), [#12944](https://github.com/NousResearch/hermes-agent/pull/12944), [#12972](https://github.com/NousResearch/hermes-agent/pull/12972), [#13799](https://github.com/NousResearch/hermes-agent/pull/13799), [#14175](https://github.com/NousResearch/hermes-agent/pull/14175))
+
+- **`/steer` — mid-run agent nudges** — `/steer <prompt>` injects a note that the running agent sees after its next tool call, without interrupting the turn or breaking prompt cache. For when you want to course-correct an agent in-flight. ([#12116](https://github.com/NousResearch/hermes-agent/pull/12116))
+
+- **Shell hooks** — Wire any shell script as a Hermes lifecycle hook (pre_tool_call, post_tool_call, on_session_start, etc.) without writing a Python plugin. ([#13296](https://github.com/NousResearch/hermes-agent/pull/13296))
+
+- **Webhook direct-delivery mode** — Webhook subscriptions can now forward payloads straight to a platform chat without going through the agent — zero-LLM push notifications for alerting, uptime checks, and event streams. ([#12473](https://github.com/NousResearch/hermes-agent/pull/12473))
+
+- **Smarter delegation** — Subagents now have an explicit `orchestrator` role that can spawn their own workers, with configurable `max_spawn_depth` (default flat). Concurrent sibling subagents share filesystem state through a file-coordination layer so they don't clobber each other's edits. ([#13691](https://github.com/NousResearch/hermes-agent/pull/13691), [#13718](https://github.com/NousResearch/hermes-agent/pull/13718))
+
+- **Auxiliary models — configurable UI + main-model-first** — `hermes model` has a dedicated "Configure auxiliary models" screen for per-task overrides (compression, vision, session_search, title_generation). `auto` routing now defaults to the main model for side tasks across all users (previously aggregator users were silently routed to a cheap provider-side default). ([#11891](https://github.com/NousResearch/hermes-agent/pull/11891), [#11900](https://github.com/NousResearch/hermes-agent/pull/11900))
+
+- **Dashboard plugin system + live theme switching** — The web dashboard is now extensible. Third-party plugins can add custom tabs, widgets, and views without forking. Paired with a live-switching theme system — themes now control colors, fonts, layout, and density — so users can hot-swap the dashboard look without a reload. Same theming discipline the CLI has, now on the web. ([#10951](https://github.com/NousResearch/hermes-agent/pull/10951), [#10687](https://github.com/NousResearch/hermes-agent/pull/10687), [#14725](https://github.com/NousResearch/hermes-agent/pull/14725))
+
+- **Dashboard polish** — i18n (English + Chinese), react-router sidebar layout, mobile-responsive, Vercel deployment, real per-session API call tracking, and one-click update + gateway restart buttons. ([#9228](https://github.com/NousResearch/hermes-agent/pull/9228), [#9370](https://github.com/NousResearch/hermes-agent/pull/9370), [#9453](https://github.com/NousResearch/hermes-agent/pull/9453), [#10686](https://github.com/NousResearch/hermes-agent/pull/10686), [#13526](https://github.com/NousResearch/hermes-agent/pull/13526), [#14004](https://github.com/NousResearch/hermes-agent/pull/14004) — @austinpickett + @DeployFaith + Teknium)
+
+---
+
+## 🏗️ Core Agent & Architecture
+
+### Transport Layer (NEW)
+- **Transport ABC** abstracts format conversion and HTTP transport from `run_agent.py` into `agent/transports/` ([#13347](https://github.com/NousResearch/hermes-agent/pull/13347))
+- **AnthropicTransport** — Anthropic Messages API path ([#13366](https://github.com/NousResearch/hermes-agent/pull/13366), @kshitijk4poor)
+- **ChatCompletionsTransport** — default path for OpenAI-compatible providers ([#13805](https://github.com/NousResearch/hermes-agent/pull/13805))
+- **ResponsesApiTransport** — OpenAI Responses API + Codex build_kwargs wiring ([#13430](https://github.com/NousResearch/hermes-agent/pull/13430), @kshitijk4poor)
+- **BedrockTransport** — AWS Bedrock Converse API transport ([#13814](https://github.com/NousResearch/hermes-agent/pull/13814))
+
+### Provider & Model Support
+- **Native AWS Bedrock provider** via Converse API ([#10549](https://github.com/NousResearch/hermes-agent/pull/10549))
+- **NVIDIA NIM native provider** (salvage of #11703) ([#11774](https://github.com/NousResearch/hermes-agent/pull/11774))
+- **Arcee AI direct provider** ([#9276](https://github.com/NousResearch/hermes-agent/pull/9276))
+- **Step Plan provider** (salvage #6005) ([#13893](https://github.com/NousResearch/hermes-agent/pull/13893), @kshitijk4poor)
+- **Google Gemini CLI OAuth** inference provider ([#11270](https://github.com/NousResearch/hermes-agent/pull/11270))
+- **Vercel ai-gateway** with pricing, attribution, and dynamic discovery ([#13223](https://github.com/NousResearch/hermes-agent/pull/13223), @jerilynzheng)
+- **GPT-5.5 over Codex OAuth** with live model discovery in the picker ([#14720](https://github.com/NousResearch/hermes-agent/pull/14720))
+- **Gemini routed through native AI Studio API** ([#12674](https://github.com/NousResearch/hermes-agent/pull/12674))
+- **xAI Grok upgraded to Responses API** ([#10783](https://github.com/NousResearch/hermes-agent/pull/10783))
+- **Ollama improvements** — Cloud provider support, GLM continuation, `think=false` control, surrogate sanitization, `/v1` hint ([#10782](https://github.com/NousResearch/hermes-agent/pull/10782))
+- **Kimi K2.6** across OpenRouter, Nous Portal, native Kimi, and HuggingFace ([#13148](https://github.com/NousResearch/hermes-agent/pull/13148), [#13152](https://github.com/NousResearch/hermes-agent/pull/13152), [#13169](https://github.com/NousResearch/hermes-agent/pull/13169))
+- **Kimi K2.5** promoted to first position in all model suggestion lists ([#11745](https://github.com/NousResearch/hermes-agent/pull/11745), @kshitijk4poor)
+- **Xiaomi MiMo v2.5-pro + v2.5** on OpenRouter, Nous Portal, and native ([#14184](https://github.com/NousResearch/hermes-agent/pull/14184), [#14635](https://github.com/NousResearch/hermes-agent/pull/14635), @kshitijk4poor)
+- **GLM-5V-Turbo** for coding plan ([#9907](https://github.com/NousResearch/hermes-agent/pull/9907))
+- **Claude Opus 4.7** in Nous Portal catalog ([#11398](https://github.com/NousResearch/hermes-agent/pull/11398))
+- **OpenRouter elephant-alpha** in curated lists ([#9378](https://github.com/NousResearch/hermes-agent/pull/9378))
+- **OpenCode-Go** — Kimi K2.6 and Qwen3.5/3.6 Plus in curated catalog ([#13429](https://github.com/NousResearch/hermes-agent/pull/13429))
+- **minimax/minimax-m2.5:free** in OpenRouter catalog ([#13836](https://github.com/NousResearch/hermes-agent/pull/13836))
+- **`/model` merges models.dev entries** for lesser-loved providers ([#14221](https://github.com/NousResearch/hermes-agent/pull/14221))
+- **Per-provider + per-model `request_timeout_seconds`** config ([#12652](https://github.com/NousResearch/hermes-agent/pull/12652))
+- **Configurable API retry count** via `agent.api_max_retries` ([#14730](https://github.com/NousResearch/hermes-agent/pull/14730))
+- **ctx_size context length key** for Lemonade server (salvage #8536) ([#14215](https://github.com/NousResearch/hermes-agent/pull/14215))
+- **Custom provider display name prompt** ([#9420](https://github.com/NousResearch/hermes-agent/pull/9420))
+- **Recommendation badges** on tool provider selection ([#9929](https://github.com/NousResearch/hermes-agent/pull/9929))
+- Fix: correct GPT-5 family context lengths in fallback defaults ([#9309](https://github.com/NousResearch/hermes-agent/pull/9309))
+- Fix: clamp `minimal` reasoning effort to `low` on Responses API ([#9429](https://github.com/NousResearch/hermes-agent/pull/9429))
+- Fix: strip reasoning item IDs from Responses API input when `store=False` ([#10217](https://github.com/NousResearch/hermes-agent/pull/10217))
+- Fix: OpenViking correct account default + commit session on `/new` and compress ([#10463](https://github.com/NousResearch/hermes-agent/pull/10463))
+- Fix: Kimi `/coding` thinking block survival + empty reasoning_content + block ordering (multiple PRs)
+- Fix: don't send Anthropic thinking to api.kimi.com/coding ([#13826](https://github.com/NousResearch/hermes-agent/pull/13826))
+- Fix: send `max_tokens`, `reasoning_effort`, and `thinking` for Kimi/Moonshot
+- Fix: stream reasoning content through OpenAI-compatible providers that emit it
+
+### Agent Loop & Conversation
+- **`/steer <prompt>`** — mid-run agent nudges after next tool call ([#12116](https://github.com/NousResearch/hermes-agent/pull/12116))
+- **Orchestrator role + configurable spawn depth** for `delegate_task` (default flat) ([#13691](https://github.com/NousResearch/hermes-agent/pull/13691))
+- **Cross-agent file state coordination** for concurrent subagents ([#13718](https://github.com/NousResearch/hermes-agent/pull/13718))
+- **Compressor smart collapse, dedup, anti-thrashing**, template upgrade, hardening ([#10088](https://github.com/NousResearch/hermes-agent/pull/10088))
+- **Compression summaries respect the conversation's language** ([#12556](https://github.com/NousResearch/hermes-agent/pull/12556))
+- **Compression model falls back to main model** on permanent 503/404 ([#10093](https://github.com/NousResearch/hermes-agent/pull/10093))
+- **Auto-continue interrupted agent work** after gateway restart ([#9934](https://github.com/NousResearch/hermes-agent/pull/9934))
+- **Activity heartbeats** prevent false gateway inactivity timeouts ([#10501](https://github.com/NousResearch/hermes-agent/pull/10501))
+- **Auxiliary models UI** — dedicated screen for per-task overrides ([#11891](https://github.com/NousResearch/hermes-agent/pull/11891))
+- **Auxiliary auto routing defaults to main model** for all users ([#11900](https://github.com/NousResearch/hermes-agent/pull/11900))
+- **PLATFORM_HINTS for Matrix, Mattermost, Feishu** ([#14428](https://github.com/NousResearch/hermes-agent/pull/14428), @alt-glitch)
+- Fix: reset retry counters after compression; stop poisoning conversation history ([#10055](https://github.com/NousResearch/hermes-agent/pull/10055))
+- Fix: break compression-exhaustion infinite loop and auto-reset session ([#10063](https://github.com/NousResearch/hermes-agent/pull/10063))
+- Fix: stale agent timeout, uv venv detection, empty response after tools ([#10065](https://github.com/NousResearch/hermes-agent/pull/10065))
+- Fix: prevent premature loop exit when weak models return empty after substantive tool calls ([#10472](https://github.com/NousResearch/hermes-agent/pull/10472))
+- Fix: preserve pre-start terminal interrupts ([#10504](https://github.com/NousResearch/hermes-agent/pull/10504))
+- Fix: improve interrupt responsiveness during concurrent tool execution ([#10935](https://github.com/NousResearch/hermes-agent/pull/10935))
+- Fix: word-wrap spinner, interruptable agent join, and delegate_task interrupt ([#10940](https://github.com/NousResearch/hermes-agent/pull/10940))
+- Fix: `/stop` no longer resets the session ([#9224](https://github.com/NousResearch/hermes-agent/pull/9224))
+- Fix: honor interrupts during MCP tool waits ([#9382](https://github.com/NousResearch/hermes-agent/pull/9382), @helix4u)
+- Fix: break stuck session resume loops after repeated restarts ([#9941](https://github.com/NousResearch/hermes-agent/pull/9941))
+- Fix: empty response nudge crash + placeholder leak to cron targets ([#11021](https://github.com/NousResearch/hermes-agent/pull/11021))
+- Fix: streaming cursor sanitization to prevent message truncation (multiple PRs)
+- Fix: resolve `context_length` for plugin context engines ([#9238](https://github.com/NousResearch/hermes-agent/pull/9238))
+
+### Session & Memory
+- **Auto-prune old sessions + VACUUM state.db** at startup ([#13861](https://github.com/NousResearch/hermes-agent/pull/13861))
+- **Honcho overhaul** — context injection, 5-tool surface, cost safety, session isolation ([#10619](https://github.com/NousResearch/hermes-agent/pull/10619))
+- **Hindsight richer session-scoped retain metadata** (salvage of #6290) ([#13987](https://github.com/NousResearch/hermes-agent/pull/13987))
+- Fix: deduplicate memory provider tools to prevent 400 on strict providers ([#10511](https://github.com/NousResearch/hermes-agent/pull/10511))
+- Fix: discover user-installed memory providers from `$HERMES_HOME/plugins/` ([#10529](https://github.com/NousResearch/hermes-agent/pull/10529))
+- Fix: add `on_memory_write` bridge to sequential tool execution path ([#10507](https://github.com/NousResearch/hermes-agent/pull/10507))
+- Fix: preserve `session_id` across `previous_response_id` chains in `/v1/responses` ([#10059](https://github.com/NousResearch/hermes-agent/pull/10059))
+
+---
+
+## 🖥️ New Ink-based TUI
+
+A full React/Ink rewrite of the interactive CLI — invoked via `hermes --tui` or `HERMES_TUI=1`. Shipped across ~310 commits to `ui-tui/` and `tui_gateway/`.
+
+### TUI Foundations
+- New TUI based on Ink + Python JSON-RPC backend
+- Prettier + ESLint + vitest tooling for `ui-tui/`
+- Entry split between `src/entry.tsx` (TTY gate) and `src/app.tsx` (state machine)
+- Persistent `_SlashWorker` subprocess for slash command dispatch
+
+### UX & Features
+- **Stable picker keys, /clear confirm, light-theme preset** ([#12312](https://github.com/NousResearch/hermes-agent/pull/12312), @OutThisLife)
+- **Git branch in status bar** cwd label ([#12305](https://github.com/NousResearch/hermes-agent/pull/12305), @OutThisLife)
+- **Per-turn elapsed stopwatch in FaceTicker + done-in sys line** ([#13105](https://github.com/NousResearch/hermes-agent/pull/13105), @OutThisLife)
+- **Subagent spawn observability overlay** ([#14045](https://github.com/NousResearch/hermes-agent/pull/14045), @OutThisLife)
+- **Per-prompt elapsed stopwatch in status bar** ([#12948](https://github.com/NousResearch/hermes-agent/pull/12948))
+- Sticky composer that freezes during scroll
+- OSC-52 clipboard support for copy across SSH sessions
+- Virtualized history rendering for performance
+- Slash command autocomplete via `complete.slash` RPC
+- Path autocomplete via `complete.path` RPC
+- Dozens of resize/ghosting/sticky-prompt fixes landed through the week
+
+### Structural Refactors
+- Decomposed `app.tsx` into `app/event-handler`, `app/slash-handler`, `app/stores`, `app/hooks` ([#14640](https://github.com/NousResearch/hermes-agent/pull/14640) and surrounding)
+- Component split: `branding.tsx`, `markdown.tsx`, `prompts.tsx`, `sessionPicker.tsx`, `messageLine.tsx`, `thinking.tsx`, `maskedPrompt.tsx`
+- Hook split: `useCompletion`, `useInputHistory`, `useQueue`, `useVirtualHistory`
+
+---
+
+## 📱 Messaging Platforms (Gateway)
+
+### New Platforms
+- **QQBot (17th platform)** — QQ Official API v2 adapter with QR setup, streaming, package split ([#9364](https://github.com/NousResearch/hermes-agent/pull/9364), [#11831](https://github.com/NousResearch/hermes-agent/pull/11831))
+
+### Telegram
+- **Dedicated `TELEGRAM_PROXY` env var + config.yaml proxy support** (closes #9414, #6530, #9074, #7786) ([#10681](https://github.com/NousResearch/hermes-agent/pull/10681))
+- **`ignored_threads` config** for Telegram groups ([#9530](https://github.com/NousResearch/hermes-agent/pull/9530))
+- **Config option to disable link previews** (closes #8728) ([#10610](https://github.com/NousResearch/hermes-agent/pull/10610))
+- **Auto-wrap markdown tables** in code blocks ([#11794](https://github.com/NousResearch/hermes-agent/pull/11794))
+- Fix: prevent duplicate replies when stream task is cancelled ([#9319](https://github.com/NousResearch/hermes-agent/pull/9319))
+- Fix: prevent streaming cursor (▉) from appearing as standalone messages ([#9538](https://github.com/NousResearch/hermes-agent/pull/9538))
+- Fix: retry transient tool sends + cold-boot budget ([#10947](https://github.com/NousResearch/hermes-agent/pull/10947))
+- Fix: Markdown special char escaping in `send_exec_approval`
+- Fix: parentheses in URLs during MarkdownV2 link conversion
+- Fix: Unicode dash normalization in model switch (closes iOS smart-punctuation issue)
+- Many platform hint / streaming / session-key fixes
+
+### Discord
+- **Forum channel support** (salvage of #10145 + media + polish) ([#11920](https://github.com/NousResearch/hermes-agent/pull/11920))
+- **`DISCORD_ALLOWED_ROLES`** for role-based access control ([#11608](https://github.com/NousResearch/hermes-agent/pull/11608))
+- **Config option to disable slash commands** (salvage #13130) ([#14315](https://github.com/NousResearch/hermes-agent/pull/14315))
+- **Native `send_animation`** for inline GIF playback ([#10283](https://github.com/NousResearch/hermes-agent/pull/10283))
+- **`send_message` Discord media attachments** ([#10246](https://github.com/NousResearch/hermes-agent/pull/10246))
+- **`/skill` command group** with category subcommands ([#9909](https://github.com/NousResearch/hermes-agent/pull/9909))
+- **Extract reply text from message references** ([#9781](https://github.com/NousResearch/hermes-agent/pull/9781))
+
+### Feishu
+- **Intelligent reply on document comments** with 3-tier access control ([#11898](https://github.com/NousResearch/hermes-agent/pull/11898))
+- **Show processing state via reactions** on user messages ([#12927](https://github.com/NousResearch/hermes-agent/pull/12927))
+- **Preserve @mention context for agent consumption** (salvage #13874) ([#14167](https://github.com/NousResearch/hermes-agent/pull/14167))
+
+### DingTalk
+- **`require_mention` + `allowed_users` gating** (parity with Slack/Telegram/Discord) ([#11564](https://github.com/NousResearch/hermes-agent/pull/11564))
+- **QR-code device-flow authorization** for setup wizard ([#11574](https://github.com/NousResearch/hermes-agent/pull/11574))
+- **AI Cards streaming, emoji reactions, and media handling** (salvage of #10985) ([#11910](https://github.com/NousResearch/hermes-agent/pull/11910))
+
+### WhatsApp
+- **`send_voice`** — native audio message delivery ([#13002](https://github.com/NousResearch/hermes-agent/pull/13002))
+- **`dm_policy` and `group_policy`** parity with WeCom/Weixin/QQ adapters ([#13151](https://github.com/NousResearch/hermes-agent/pull/13151))
+
+### WeCom / Weixin
+- **WeCom QR-scan bot creation + interactive setup wizard** (salvage #13923) ([#13961](https://github.com/NousResearch/hermes-agent/pull/13961))
+
+### Signal
+- **Media delivery support** via `send_message` ([#13178](https://github.com/NousResearch/hermes-agent/pull/13178))
+
+### Slack
+- **Per-thread sessions for DMs by default** ([#10987](https://github.com/NousResearch/hermes-agent/pull/10987))
+
+### BlueBubbles (iMessage)
+- Group chat session separation, webhook registration & auth fixes ([#9806](https://github.com/NousResearch/hermes-agent/pull/9806))
+
+### Gateway Core
+- **Gateway proxy mode** — forward messages to a remote API server ([#9787](https://github.com/NousResearch/hermes-agent/pull/9787))
+- **Per-channel ephemeral prompts** (Discord, Telegram, Slack, Mattermost) ([#10564](https://github.com/NousResearch/hermes-agent/pull/10564))
+- **Surface plugin slash commands** natively on all platforms + decision-capable command hook ([#14175](https://github.com/NousResearch/hermes-agent/pull/14175))
+- **Support document/archive extensions in MEDIA: tag extraction** (salvage #8255) ([#14307](https://github.com/NousResearch/hermes-agent/pull/14307))
+- **Recognize `.pdf` in MEDIA: tag extraction** ([#13683](https://github.com/NousResearch/hermes-agent/pull/13683))
+- **`--all` flag for `gateway start` and `restart`** ([#10043](https://github.com/NousResearch/hermes-agent/pull/10043))
+- **Notify active sessions on gateway shutdown** + update health check ([#9850](https://github.com/NousResearch/hermes-agent/pull/9850))
+- **Block agent from self-destructing the gateway** via terminal (closes #6666) ([#9895](https://github.com/NousResearch/hermes-agent/pull/9895))
+- Fix: suppress duplicate replies on interrupt and streaming flood control ([#10235](https://github.com/NousResearch/hermes-agent/pull/10235))
+- Fix: close temporary agents after one-off tasks ([#11028](https://github.com/NousResearch/hermes-agent/pull/11028), @kshitijk4poor)
+- Fix: busy-session ack when user messages during active agent run ([#10068](https://github.com/NousResearch/hermes-agent/pull/10068))
+- Fix: route watch-pattern notifications to the originating session ([#10460](https://github.com/NousResearch/hermes-agent/pull/10460))
+- Fix: preserve notify context in executor threads ([#10921](https://github.com/NousResearch/hermes-agent/pull/10921), @kshitijk4poor)
+- Fix: avoid duplicate replies after interrupted long tasks ([#11018](https://github.com/NousResearch/hermes-agent/pull/11018))
+- Fix: unlink stale PID + lock files on cleanup
+- Fix: force-unlink stale PID file after `--replace` takeover
+
+---
+
+## 🔧 Tool System
+
+### Plugin Surface (major expansion)
+- **`register_command()`** — plugins can now add slash commands ([#10626](https://github.com/NousResearch/hermes-agent/pull/10626))
+- **`dispatch_tool()`** — plugins can invoke tools from their code ([#10763](https://github.com/NousResearch/hermes-agent/pull/10763))
+- **`pre_tool_call` blocking** — plugins can veto tool execution ([#9377](https://github.com/NousResearch/hermes-agent/pull/9377))
+- **`transform_tool_result`** — plugins rewrite tool results generically ([#12972](https://github.com/NousResearch/hermes-agent/pull/12972))
+- **`transform_terminal_output`** — plugins rewrite terminal tool output ([#12929](https://github.com/NousResearch/hermes-agent/pull/12929))
+- **Namespaced skill registration** for plugin skill bundles ([#9786](https://github.com/NousResearch/hermes-agent/pull/9786))
+- **Opt-in-by-default + bundled disk-cleanup plugin** (salvage #12212) ([#12944](https://github.com/NousResearch/hermes-agent/pull/12944))
+- **Pluggable `image_gen` backends + OpenAI provider** ([#13799](https://github.com/NousResearch/hermes-agent/pull/13799))
+- **`openai-codex` image_gen plugin** (gpt-image-2 via Codex OAuth) ([#14317](https://github.com/NousResearch/hermes-agent/pull/14317))
+- **Shell hooks** — wire shell scripts as hook callbacks ([#13296](https://github.com/NousResearch/hermes-agent/pull/13296))
+
+### Browser
+- **`browser_cdp` raw DevTools Protocol passthrough** ([#12369](https://github.com/NousResearch/hermes-agent/pull/12369))
+- Camofox hardening + connection stability across the window
+
+### Execute Code
+- **Project/strict execution modes** (default: project) ([#11971](https://github.com/NousResearch/hermes-agent/pull/11971))
+
+### Image Generation
+- **Multi-model FAL support** with picker in `hermes tools` ([#11265](https://github.com/NousResearch/hermes-agent/pull/11265))
+- **Recraft V3 → V4 Pro, Nano Banana → Pro upgrades** ([#11406](https://github.com/NousResearch/hermes-agent/pull/11406))
+- **GPT Image 2** in FAL catalog ([#13677](https://github.com/NousResearch/hermes-agent/pull/13677))
+- **xAI image generation provider** (grok-imagine-image) ([#14765](https://github.com/NousResearch/hermes-agent/pull/14765))
+
+### TTS / STT / Voice
+- **Google Gemini TTS provider** ([#11229](https://github.com/NousResearch/hermes-agent/pull/11229))
+- **xAI Grok STT provider** ([#14473](https://github.com/NousResearch/hermes-agent/pull/14473))
+- **xAI TTS** (shipped with Responses API upgrade) ([#10783](https://github.com/NousResearch/hermes-agent/pull/10783))
+- **KittenTTS local provider** (salvage of #2109) ([#13395](https://github.com/NousResearch/hermes-agent/pull/13395))
+- **CLI record beep toggle** ([#13247](https://github.com/NousResearch/hermes-agent/pull/13247), @helix4u)
+
+### Webhook / Cron
+- **Webhook direct-delivery mode** — zero-LLM push notifications ([#12473](https://github.com/NousResearch/hermes-agent/pull/12473))
+- **Cron `wakeAgent` gate** — scripts can skip the agent entirely ([#12373](https://github.com/NousResearch/hermes-agent/pull/12373))
+- **Cron per-job `enabled_toolsets`** — cap token overhead + cost per job ([#14767](https://github.com/NousResearch/hermes-agent/pull/14767))
+
+### Delegate
+- **Orchestrator role** + configurable spawn depth (default flat) ([#13691](https://github.com/NousResearch/hermes-agent/pull/13691))
+- **Cross-agent file state coordination** ([#13718](https://github.com/NousResearch/hermes-agent/pull/13718))
+
+### File / Patch
+- **`patch` — "did you mean?" feedback** when patch fails to match ([#13435](https://github.com/NousResearch/hermes-agent/pull/13435))
+
+### API Server
+- **Stream `/v1/responses` SSE tool events** (salvage #9779) ([#10049](https://github.com/NousResearch/hermes-agent/pull/10049))
+- **Inline image inputs** on `/v1/chat/completions` and `/v1/responses` ([#12969](https://github.com/NousResearch/hermes-agent/pull/12969))
+
+### Docker / Podman
+- **Entry-level Podman support** — `find_docker()` + rootless entrypoint ([#10066](https://github.com/NousResearch/hermes-agent/pull/10066))
+- **Add docker-cli to Docker image** (salvage #10096) ([#14232](https://github.com/NousResearch/hermes-agent/pull/14232))
+- **File-sync back to host on teardown** (salvage of #8189 + hardening) ([#11291](https://github.com/NousResearch/hermes-agent/pull/11291))
+
+### MCP
+- 12 MCP improvements across the window (status, timeout handling, tool-call forwarding, etc.)
+
+---
+
+## 🧩 Skills Ecosystem
+
+### Skill System
+- **Namespaced skill registration** for plugin bundles ([#9786](https://github.com/NousResearch/hermes-agent/pull/9786))
+- **`hermes skills reset`** to un-stick bundled skills ([#11468](https://github.com/NousResearch/hermes-agent/pull/11468))
+- **Skills guard opt-in** — `config.skills.guard_agent_created` (default off) ([#14557](https://github.com/NousResearch/hermes-agent/pull/14557))
+- **Bundled skill scripts runnable out of the box** ([#13384](https://github.com/NousResearch/hermes-agent/pull/13384))
+- **`xitter` replaced with `xurl`** — the official X API CLI ([#12303](https://github.com/NousResearch/hermes-agent/pull/12303))
+- **MiniMax-AI/cli as default skill tap** (salvage #7501) ([#14493](https://github.com/NousResearch/hermes-agent/pull/14493))
+- **Fuzzy `@` file completions + mtime sorting** ([#9467](https://github.com/NousResearch/hermes-agent/pull/9467))
+
+### New Skills
+- **concept-diagrams** (salvage of #11045, @v1k22) ([#11363](https://github.com/NousResearch/hermes-agent/pull/11363))
+- **architecture-diagram** (Cocoon AI port) ([#9906](https://github.com/NousResearch/hermes-agent/pull/9906))
+- **pixel-art** with hardware palettes and video animation ([#12663](https://github.com/NousResearch/hermes-agent/pull/12663), [#12725](https://github.com/NousResearch/hermes-agent/pull/12725))
+- **baoyu-comic** ([#13257](https://github.com/NousResearch/hermes-agent/pull/13257), @JimLiu)
+- **baoyu-infographic** — 21 layouts × 21 styles (salvage #9901) ([#12254](https://github.com/NousResearch/hermes-agent/pull/12254))
+- **page-agent** — embed Alibaba's in-page GUI agent in your webapp ([#13976](https://github.com/NousResearch/hermes-agent/pull/13976))
+- **fitness-nutrition** optional skill + optional env var support ([#9355](https://github.com/NousResearch/hermes-agent/pull/9355))
+- **drug-discovery** — ChEMBL, PubChem, OpenFDA, ADMET ([#9443](https://github.com/NousResearch/hermes-agent/pull/9443))
+- **touchdesigner-mcp** (salvage of #10081) ([#12298](https://github.com/NousResearch/hermes-agent/pull/12298))
+- **adversarial-ux-test** optional skill (salvage of #2494, @omnissiah-comelse) ([#13425](https://github.com/NousResearch/hermes-agent/pull/13425))
+- **maps** — added `guest_house`, `camp_site`, and dual-key bakery lookup ([#13398](https://github.com/NousResearch/hermes-agent/pull/13398))
+- **llm-wiki** — port provenance markers, source hashing, and quality signals ([#13700](https://github.com/NousResearch/hermes-agent/pull/13700))
+
+---
+
+## 📊 Web Dashboard
+
+- **i18n (English + Chinese) language switcher** ([#9453](https://github.com/NousResearch/hermes-agent/pull/9453))
+- **Live-switching theme system** ([#10687](https://github.com/NousResearch/hermes-agent/pull/10687))
+- **Dashboard plugin system** — extend the web UI with custom tabs ([#10951](https://github.com/NousResearch/hermes-agent/pull/10951))
+- **react-router, sidebar layout, sticky header, dropdown component** ([#9370](https://github.com/NousResearch/hermes-agent/pull/9370), @austinpickett)
+- **Responsive for mobile** ([#9228](https://github.com/NousResearch/hermes-agent/pull/9228), @DeployFaith)
+- **Vercel deployment** ([#10686](https://github.com/NousResearch/hermes-agent/pull/10686), [#11061](https://github.com/NousResearch/hermes-agent/pull/11061), @austinpickett)
+- **Context window config support** ([#9357](https://github.com/NousResearch/hermes-agent/pull/9357))
+- **HTTP health probe for cross-container gateway detection** ([#9894](https://github.com/NousResearch/hermes-agent/pull/9894))
+- **Update + restart gateway buttons** ([#13526](https://github.com/NousResearch/hermes-agent/pull/13526), @austinpickett)
+- **Real API call count per session** (salvages #10140) ([#14004](https://github.com/NousResearch/hermes-agent/pull/14004))
+
+---
+
+## 🖱️ CLI & User Experience
+
+- **Dynamic shell completion for bash, zsh, and fish** ([#9785](https://github.com/NousResearch/hermes-agent/pull/9785))
+- **Light-mode skins + skin-aware completion menus** ([#9461](https://github.com/NousResearch/hermes-agent/pull/9461))
+- **Numbered keyboard shortcuts** on approval and clarify prompts ([#13416](https://github.com/NousResearch/hermes-agent/pull/13416))
+- **Markdown stripping, compact multiline previews, external editor** ([#12934](https://github.com/NousResearch/hermes-agent/pull/12934))
+- **`--ignore-user-config` and `--ignore-rules` flags** (port codex#18646) ([#14277](https://github.com/NousResearch/hermes-agent/pull/14277))
+- **Account limits section in `/usage`** ([#13428](https://github.com/NousResearch/hermes-agent/pull/13428))
+- **Doctor: Command Installation check** for `hermes` bin symlink ([#10112](https://github.com/NousResearch/hermes-agent/pull/10112))
+- **ESC cancels secret/sudo prompts**, clearer skip messaging ([#9902](https://github.com/NousResearch/hermes-agent/pull/9902))
+- Fix: agent-facing text uses `display_hermes_home()` instead of hardcoded `~/.hermes` ([#10285](https://github.com/NousResearch/hermes-agent/pull/10285))
+- Fix: enforce `config.yaml` as sole CWD source + deprecate `.env` CWD vars + add `hermes memory reset` ([#11029](https://github.com/NousResearch/hermes-agent/pull/11029))
+
+---
+
+## 🔒 Security & Reliability
+
+- **Global toggle to allow private/internal URL resolution** ([#14166](https://github.com/NousResearch/hermes-agent/pull/14166))
+- **Block agent from self-destructing the gateway** via terminal (closes #6666) ([#9895](https://github.com/NousResearch/hermes-agent/pull/9895))
+- **Telegram callback authorization** on update prompts ([#10536](https://github.com/NousResearch/hermes-agent/pull/10536))
+- **SECURITY.md** added ([#10532](https://github.com/NousResearch/hermes-agent/pull/10532), @I3eg1nner)
+- **Warn about legacy hermes.service units** during `hermes update` ([#11918](https://github.com/NousResearch/hermes-agent/pull/11918))
+- **Complete ASCII-locale UnicodeEncodeError recovery** for `api_messages`/`reasoning_content` (closes #6843) ([#10537](https://github.com/NousResearch/hermes-agent/pull/10537))
+- **Prevent stale `os.environ` leak** after `clear_session_vars` ([#10527](https://github.com/NousResearch/hermes-agent/pull/10527))
+- **Prevent agent hang when backgrounding processes** via terminal tool ([#10584](https://github.com/NousResearch/hermes-agent/pull/10584))
+- Many smaller session-resume, interrupt, streaming, and memory-race fixes throughout the window
+
+---
+
+## 🐛 Notable Bug Fixes
+
+The `fix:` category in this window covers 482 PRs. Highlights:
+
+- Streaming cursor artifacts filtered from Matrix, Telegram, WhatsApp, Discord (multiple PRs)
+- `<think>` and `<thought>` blocks filtered from gateway stream consumers ([#9408](https://github.com/NousResearch/hermes-agent/pull/9408))
+- Gateway display.streaming root-config override regression ([#9799](https://github.com/NousResearch/hermes-agent/pull/9799))
+- Context `session_search` coerces limit to int (prevents TypeError) ([#10522](https://github.com/NousResearch/hermes-agent/pull/10522))
+- Memory tool stays available when `fcntl` is unavailable (Windows) ([#9783](https://github.com/NousResearch/hermes-agent/pull/9783))
+- Trajectory compressor credentials load from `HERMES_HOME/.env` ([#9632](https://github.com/NousResearch/hermes-agent/pull/9632), @Dusk1e)
+- `@_context_completions` no longer crashes on `@` mention ([#9683](https://github.com/NousResearch/hermes-agent/pull/9683), @kshitijk4poor)
+- Group session `user_id` no longer treated as `thread_id` in shutdown notifications ([#10546](https://github.com/NousResearch/hermes-agent/pull/10546))
+- Telegram `platform_hint` — markdown is supported (closes #8261) ([#10612](https://github.com/NousResearch/hermes-agent/pull/10612))
+- Doctor checks for Kimi China credentials fixed
+- Streaming: don't suppress final response when commentary message is sent ([#10540](https://github.com/NousResearch/hermes-agent/pull/10540))
+- Rapid Telegram follow-ups no longer get cut off
+
+---
+
+## 🧪 Testing & CI
+
+- **Contributor attribution CI check** on PRs ([#9376](https://github.com/NousResearch/hermes-agent/pull/9376))
+- Hermetic test parity (`scripts/run_tests.sh`) held across this window
+- Test count stabilized post-Transport refactor; CI matrix held green through the transport rollout
+
+---
+
+## 📚 Documentation
+
+- Atropos + wandb links in user guide
+- ACP / VS Code / Zed / JetBrains integration docs refresh
+- Webhook subscription docs updated for direct-delivery mode
+- Plugin author guide expanded for new hooks (`register_command`, `dispatch_tool`, `transform_tool_result`)
+- Transport layer developer guide added
+- Website removed Discussions link from README
+
+---
+
+## 👥 Contributors
+
+### Core
+- **@teknium1** (Teknium)
+
+### Top Community Contributors (by merged PR count)
+- **@kshitijk4poor** — 49 PRs · Transport refactor (AnthropicTransport, ResponsesApiTransport), Step Plan provider, Xiaomi MiMo v2.5 support, numerous gateway fixes, promoted Kimi K2.5, @ mention crash fix
+- **@OutThisLife** (Brooklyn) — 31 PRs · TUI polish, git branch in status bar, per-turn stopwatch, stable picker keys, `/clear` confirm, light-theme preset, subagent spawn observability overlay
+- **@helix4u** — 11 PRs · Voice CLI record beep, MCP tool interrupt handling, assorted stability fixes
+- **@austinpickett** — 8 PRs · Dashboard react-router + sidebar + sticky header + dropdown, Vercel deployment, update + restart buttons
+- **@alt-glitch** — 8 PRs · PLATFORM_HINTS for Matrix/Mattermost/Feishu, Matrix fixes
+- **@ethernet8023** — 3 PRs
+- **@benbarclay** — 3 PRs
+- **@Aslaaen** — 2 PRs
+
+### Also contributing
+@jerilynzheng (ai-gateway pricing), @JimLiu (baoyu-comic skill), @Dusk1e (trajectory compressor credentials), @DeployFaith (mobile-responsive dashboard), @LeonSGP43, @v1k22 (concept-diagrams), @omnissiah-comelse (adversarial-ux-test), @coekfung (Telegram MarkdownV2 expandable blockquotes), @liftaris (TUI provider resolution), @arihantsethia (skill analytics dashboard), @topcheer + @xing8star (QQBot foundation), @kovyrin, @I3eg1nner (SECURITY.md), @PeterBerthelsen, @lengxii, @priveperfumes, @sjz-ks, @cuyua9, @Disaster-Terminator, @leozeli, @LehaoLin, @trevthefoolish, @loongfay, @MrNiceRicee, @WideLee, @bluefishs, @malaiwah, @bobashopcashier, @dsocolobsky, @iamagenius00, @IAvecilla, @aniruddhaadak80, @Es1la, @asheriif, @walli, @jquesnelle (original Tool Gateway work).
+
+### All Contributors (alphabetical)
+
+@0xyg3n, @10ishq, @A-afflatus, @Abnertheforeman, @admin28980, @adybag14-cyber, @akhater, @alexzhu0,
+@AllardQuek, @alt-glitch, @aniruddhaadak80, @anna-oake, @anniesurla, @anthhub, @areu01or00, @arihantsethia,
+@arthurbr11, @asheriif, @Aslaaen, @Asunfly, @austinpickett, @AviArora02-commits, @AxDSan, @azhengbot, @Bartok9,
+@benbarclay, @bennytimz, @bernylinville, @bingo906, @binhnt92, @bkadish, @bluefishs, @bobashopcashier,
+@brantzh6, @BrennerSpear, @brianclemens, @briandevans, @brooklynnicholson, @bugkill3r, @buray, @burtenshaw,
+@cdanis, @cgarwood82, @ChimingLiu, @chongweiliu, @christopherwoodall, @coekfung, @cola-runner, @corazzione,
+@counterposition, @cresslank, @cuyua9, @cypres0099, @danieldoderlein, @davetist, @davidvv, @DeployFaith,
+@Dev-Mriganka, @devorun, @dieutx, @Disaster-Terminator, @dodo-reach, @draix, @DrStrangerUJN, @dsocolobsky,
+@Dusk1e, @dyxushuai, @elkimek, @elmatadorgh, @emozilla, @entropidelic, @Erosika, @erosika, @Es1la, @etcircle,
+@etherman-os, @ethernet8023, @fancydirty, @farion1231, @fatinghenji, @Fatty911, @fengtianyu88, @Feranmi10,
+@flobo3, @francip, @fuleinist, @g-guthrie, @GenKoKo, @gianfrancopiana, @gnanam1990, @GuyCui, @haileymarshall,
+@haimu0x, @handsdiff, @hansnow, @hedgeho9X, @helix4u, @hengm3467, @HenkDz, @heykb, @hharry11, @HiddenPuppy,
+@honghua, @houko, @houziershi, @hsy5571616, @huangke19, @hxp-plus, @Hypn0sis, @I3eg1nner, @iacker,
+@iamagenius00, @IAvecilla, @iborazzi, @Ifkellx, @ifrederico, @imink, @isaachuangGMICLOUD, @ismell0992-afk,
+@j0sephz, @Jaaneek, @jackjin1997, @JackTheGit, @jaffarkeikei, @jerilynzheng, @JiaDe-Wu, @Jiawen-lee, @JimLiu,
+@jinzheng8115, @jneeee, @jplew, @jquesnelle, @Julientalbot, @Junass1, @jvcl, @kagura-agent, @keifergu,
+@kevinskysunny, @keyuyuan, @konsisumer, @kovyrin, @kshitijk4poor, @leeyang1990, @LehaoLin, @lengxii,
+@LeonSGP43, @leozeli, @li0near, @liftaris, @Lind3ey, @Linux2010, @liujinkun2025, @LLQWQ, @Llugaes, @lmoncany,
+@longsizhuo, @lrawnsley, @Lubrsy706, @lumenradley, @luyao618, @lvnilesh, @LVT382009, @m0n5t3r, @Magaav,
+@MagicRay1217, @malaiwah, @manuelschipper, @Marvae, @MassiveMassimo, @mavrickdeveloper, @maxchernin, @memosr,
+@meng93, @mengjian-github, @MestreY0d4-Uninter, @Mibayy, @MikeFac, @mikewaters, @milkoor, @minorgod,
+@MrNiceRicee, @ms-alan, @mvanhorn, @n-WN, @N0nb0at, @Nan93, @NIDNASSER-Abdelmajid, @nish3451, @niyoh120,
+@nocoo, @nosleepcassette, @NousResearch, @ogzerber, @omnissiah-comelse, @Only-Code-A, @opriz, @OwenYWT, @pedh,
+@pefontana, @PeterBerthelsen, @phpoh, @pinion05, @plgonzalezrx8, @pradeep7127, @priveperfumes,
+@projectadmin-dev, @PStarH, @rnijhara, @Roy-oss1, @roytian1217, @RucchiZ, @Ruzzgar, @RyanLee-Dev, @Salt-555,
+@Sanjays2402, @sgaofen, @sharziki, @shenuu, @shin4, @SHL0MS, @shushuzn, @sicnuyudidi, @simon-gtcl,
+@simon-marcus, @sirEven, @Sisyphus, @sjz-ks, @snreynolds, @Societus, @Somme4096, @sontianye, @sprmn24,
+@StefanIsMe, @stephenschoettler, @Swift42, @taeng0204, @taeuk178, @tannerfokkens-maker, @TaroballzChen,
+@ten-ltw, @teyrebaz33, @Tianworld, @topcheer, @Tranquil-Flow, @trevthefoolish, @TroyMitchell911, @UNLINEARITY,
+@v1k22, @vivganes, @vominh1919, @vrinek, @VTRiot, @WadydX, @walli, @wenhao7, @WhiteWorld, @WideLee, @wujhsu,
+@WuTianyi123, @Wysie, @xandersbell, @xiaoqiang243, @xiayh0107, @xinpengdr, @Xowiek, @ycbai, @yeyitech, @ygd58,
+@youngDoo, @yudaiyan, @Yukipukii1, @yule975, @yyq4193, @yzx9, @ZaynJarvis, @zhang9w0v5, @zhanggttry,
+@zhangxicen, @zhongyueming1121, @zhouxiaoya12, @zons-zhaozhy
+
+Also: @maelrx, @Marco Rutsch, @MaxsolcuCrypto, @Mind-Dragon, @Paul Bergeron, @say8hi, @whitehatjr1001.
+
+
+---
+
+**Full Changelog**: [v2026.4.13...v2026.4.23](https://github.com/NousResearch/hermes-agent/compare/v2026.4.13...v2026.4.23)
@@ -112,6 +112,17 @@ def main() -> None:
    import acp
    from .server import HermesACPAgent

+    # MCP tool discovery from config.yaml — run before asyncio.run() so
+    # it's safe to use blocking waits.  (ACP also registers per-session
+    # MCP servers dynamically via asyncio.to_thread inside the event
+    # loop; that path is unaffected.)  Moved from model_tools.py module
+    # scope to avoid freezing the gateway's loop on lazy import (#16856).
+    try:
+        from tools.mcp_tool import discover_mcp_tools
+        discover_mcp_tools()
+    except Exception:
+        logger.debug("MCP tool discovery failed at ACP startup", exc_info=True)
+
    agent = HermesACPAgent()
    try:
        asyncio.run(acp.run_agent(agent, use_unstable_protocol=True))
@@ -3,6 +3,7 @@
 from __future__ import annotations

 import asyncio
+import contextvars
 import logging
 import os
 from collections import defaultdict, deque
@@ -60,7 +61,7 @@ from acp_adapter.events import (
    make_tool_progress_cb,
 )
 from acp_adapter.permissions import make_approval_callback
-from acp_adapter.session import SessionManager, SessionState
+from acp_adapter.session import SessionManager, SessionState, _expand_acp_enabled_toolsets

 logger = logging.getLogger(__name__)

@@ -287,7 +288,11 @@ class HermesACPAgent(acp.Agent):
        try:
            from model_tools import get_tool_definitions

-            enabled_toolsets = getattr(state.agent, "enabled_toolsets", None) or ["hermes-acp"]
+            enabled_toolsets = _expand_acp_enabled_toolsets(
+                getattr(state.agent, "enabled_toolsets", None) or ["hermes-acp"],
+                mcp_server_names=[server.name for server in mcp_servers],
+            )
+            state.agent.enabled_toolsets = enabled_toolsets
            disabled_toolsets = getattr(state.agent, "disabled_toolsets", None)
            state.agent.tools = get_tool_definitions(
                enabled_toolsets=enabled_toolsets,
@@ -570,6 +575,22 @@ class HermesACPAgent(acp.Agent):

        def _run_agent() -> dict:
            nonlocal previous_approval_cb, previous_interactive
+            # Bind HERMES_SESSION_KEY for this session so per-session caches
+            # (e.g. the interactive sudo password cache in tools.terminal_tool)
+            # scope to the ACP session rather than leaking across sessions
+            # that land on the same reused executor thread. This call runs
+            # inside a contextvars.copy_context() below, so the ContextVar
+            # write is isolated from other concurrent ACP sessions.
+            try:
+                from gateway.session_context import (
+                    clear_session_vars,
+                    set_session_vars,
+                )
+                session_tokens = set_session_vars(session_key=session_id)
+            except Exception:
+                session_tokens = None
+                clear_session_vars = None  # type: ignore[assignment]
+                logger.debug("Could not set ACP session context", exc_info=True)
            if approval_cb:
                try:
                    from tools import terminal_tool as _terminal_tool
@@ -603,9 +624,19 @@ class HermesACPAgent(acp.Agent):
                        _terminal_tool.set_approval_callback(previous_approval_cb)
                    except Exception:
                        logger.debug("Could not restore approval callback", exc_info=True)
+                if session_tokens is not None and clear_session_vars is not None:
+                    try:
+                        clear_session_vars(session_tokens)
+                    except Exception:
+                        logger.debug("Could not clear ACP session context", exc_info=True)

        try:
-            result = await loop.run_in_executor(_executor, _run_agent)
+            # Wrap the executor call in a fresh copy of the current context so
+            # concurrent ACP sessions on the shared ThreadPoolExecutor don't
+            # stomp on each other's ContextVar writes (HERMES_SESSION_KEY in
+            # particular — used by the interactive sudo password cache scope).
+            ctx = contextvars.copy_context()
+            result = await loop.run_in_executor(_executor, ctx.run, _run_agent)
        except Exception:
            logger.exception("Executor error for session %s", session_id)
            return PromptResponse(stop_reason="end_turn")
@@ -754,7 +785,9 @@ class HermesACPAgent(acp.Agent):
    def _cmd_tools(self, args: str, state: SessionState) -> str:
        try:
            from model_tools import get_tool_definitions
-            toolsets = getattr(state.agent, "enabled_toolsets", None) or ["hermes-acp"]
+            toolsets = _expand_acp_enabled_toolsets(
+                getattr(state.agent, "enabled_toolsets", None) or ["hermes-acp"]
+            )
            tools = get_tool_definitions(enabled_toolsets=toolsets, quiet_mode=True)
            if not tools:
                return "No tools available."
@@ -106,6 +106,24 @@ def _register_task_cwd(task_id: str, cwd: str) -> None:
        logger.debug("Failed to register ACP task cwd override", exc_info=True)


+def _expand_acp_enabled_toolsets(
+    toolsets: List[str] | None = None,
+    mcp_server_names: List[str] | None = None,
+) -> List[str]:
+    """Return ACP toolsets plus explicit MCP server toolsets for this session."""
+    expanded: List[str] = []
+    for name in list(toolsets or ["hermes-acp"]):
+        if name and name not in expanded:
+            expanded.append(name)
+
+    for server_name in list(mcp_server_names or []):
+        toolset_name = f"mcp-{server_name}"
+        if server_name and toolset_name not in expanded:
+            expanded.append(toolset_name)
+
+    return expanded
+
+
 def _clear_task_cwd(task_id: str) -> None:
    """Remove task-specific cwd overrides for an ACP session."""
    if not task_id:
@@ -537,9 +555,18 @@ class SessionManager:
        elif isinstance(model_cfg, str) and model_cfg.strip():
            default_model = model_cfg.strip()

+        configured_mcp_servers = [
+            name
+            for name, cfg in (config.get("mcp_servers") or {}).items()
+            if not isinstance(cfg, dict) or cfg.get("enabled", True) is not False
+        ]
+
        kwargs = {
            "platform": "acp",
-            "enabled_toolsets": ["hermes-acp"],
+            "enabled_toolsets": _expand_acp_enabled_toolsets(
+                ["hermes-acp"],
+                mcp_server_names=configured_mcp_servers,
+            ),
            "quiet_mode": True,
            "session_id": session_id,
            "model": model or default_model,
@@ -14,17 +14,33 @@ import copy
 import json
 import logging
 import os
+import platform
+import subprocess
 from pathlib import Path

 from hermes_constants import get_hermes_home
-from types import SimpleNamespace
 from typing import Any, Dict, List, Optional, Tuple
 from utils import normalize_proxy_env_vars

-try:
-    import anthropic as _anthropic_sdk
-except ImportError:
-    _anthropic_sdk = None  # type: ignore[assignment]
+# NOTE: `import anthropic` is deliberately NOT at module top — the SDK pulls
+# ~220 ms of imports (anthropic.types, anthropic.lib.tools._beta_runner, etc.)
+# and the 3 usage sites (build_anthropic_client, build_anthropic_bedrock_client,
+# read_claude_code_credentials_from_keychain) are all on cold user-triggered
+# paths. Access via the `_get_anthropic_sdk()` accessor below, which caches
+# the module after the first call and returns None on ImportError.
+_anthropic_sdk: Any = ...  # sentinel — None means "tried and missing"
+
+
+def _get_anthropic_sdk():
+    """Return the ``anthropic`` SDK module, importing lazily. None if not installed."""
+    global _anthropic_sdk
+    if _anthropic_sdk is ...:
+        try:
+            import anthropic as _sdk
+            _anthropic_sdk = _sdk
+        except ImportError:
+            _anthropic_sdk = None
+    return _anthropic_sdk

 logger = logging.getLogger(__name__)

@@ -117,6 +133,63 @@ def _get_anthropic_max_output(model: str) -> int:
    return best_val


+def _resolve_positive_anthropic_max_tokens(value) -> Optional[int]:
+    """Return ``value`` floored to a positive int, or ``None`` if it is not a
+    finite positive number. Ported from openclaw/openclaw#66664.
+
+    Anthropic's Messages API rejects ``max_tokens`` values that are 0,
+    negative, non-integer, or non-finite with HTTP 400. Python's ``or``
+    idiom (``max_tokens or fallback``) correctly catches ``0`` but lets
+    negative ints and fractional floats (``-1``, ``0.5``) through to the
+    API, producing a user-visible failure instead of a local error.
+    """
+    # Booleans are a subclass of int — exclude explicitly so ``True`` doesn't
+    # silently become 1 and ``False`` doesn't become 0.
+    if isinstance(value, bool):
+        return None
+    if not isinstance(value, (int, float)):
+        return None
+    try:
+        import math
+        if not math.isfinite(value):
+            return None
+    except Exception:
+        return None
+    floored = int(value)  # truncates toward zero for floats
+    return floored if floored > 0 else None
+
+
+def _resolve_anthropic_messages_max_tokens(
+    requested,
+    model: str,
+    context_length: Optional[int] = None,
+) -> int:
+    """Resolve the ``max_tokens`` budget for an Anthropic Messages call.
+
+    Prefers ``requested`` when it is a positive finite number; otherwise
+    falls back to the model's output ceiling. Raises ``ValueError`` if no
+    positive budget can be resolved (should not happen with current model
+    table defaults, but guards against a future regression where
+    ``_get_anthropic_max_output`` could return ``0``).
+
+    Separately, callers apply a context-window clamp — this resolver does
+    not, to keep the positive-value contract independent of endpoint
+    specifics.
+
+    Ported from openclaw/openclaw#66664 (resolveAnthropicMessagesMaxTokens).
+    """
+    resolved = _resolve_positive_anthropic_max_tokens(requested)
+    if resolved is not None:
+        return resolved
+    fallback = _get_anthropic_max_output(model)
+    if fallback > 0:
+        return fallback
+    raise ValueError(
+        f"Anthropic Messages adapter requires a positive max_tokens value for "
+        f"model {model!r}; got {requested!r} and no model default resolved."
+    )
+
+
 def _supports_adaptive_thinking(model: str) -> bool:
    """Return True for Claude 4.6+ models that support adaptive thinking."""
    return any(v in model for v in _ADAPTIVE_THINKING_SUBSTRINGS)
@@ -144,19 +217,33 @@ def _forbids_sampling_params(model: str) -> bool:


 # Beta headers for enhanced features (sent with ALL auth types).
-# As of Opus 4.7 (2026-04-16), both of these are GA on Claude 4.6+ — the
+# As of Opus 4.7 (2026-04-16), the first two are GA on Claude 4.6+ — the
 # beta headers are still accepted (harmless no-op) but not required. Kept
 # here so older Claude (4.5, 4.1) + third-party Anthropic-compat endpoints
 # that still gate on the headers continue to get the enhanced features.
-# Migration guide: remove these if you no longer support ≤4.5 models.
+#
+# ``context-1m-2025-08-07`` unlocks the 1M context window on Claude Opus 4.6/4.7
+# and Sonnet 4.6 when served via AWS Bedrock or Azure AI Foundry. 1M is GA on
+# native Anthropic (api.anthropic.com) for Opus 4.6+, but Bedrock/Azure still
+# gate it behind this beta header as of 2026-04 — without it Bedrock caps Opus
+# at 200K even though model_metadata.py advertises 1M. The header is a harmless
+# no-op on endpoints where 1M is GA.
+#
+# Migration guide: remove these if you no longer support ≤4.5 models or once
+# Bedrock/Azure promote 1M to GA.
 _COMMON_BETAS = [
    "interleaved-thinking-2025-05-14",
    "fine-grained-tool-streaming-2025-05-14",
+    "context-1m-2025-08-07",
 ]
 # MiniMax's Anthropic-compatible endpoints fail tool-use requests when
 # the fine-grained tool streaming beta is present.  Omit it so tool calls
 # fall back to the provider's default response path.
 _TOOL_STREAMING_BETA = "fine-grained-tool-streaming-2025-05-14"
+# 1M context beta — see comment on _COMMON_BETAS above. Stripped for
+# Bearer-auth (MiniMax) endpoints since they host their own models and
+# unknown Anthropic beta headers risk request rejection.
+_CONTEXT_1M_BETA = "context-1m-2025-08-07"

 # Fast mode beta — enables the ``speed: "fast"`` request parameter for
 # significantly higher output token throughput on Opus 4.6 (~2.5x).
@@ -170,10 +257,11 @@ _OAUTH_ONLY_BETAS = [
    "oauth-2025-04-20",
 ]

-# Claude Code identity — required for OAuth requests to be routed correctly.
-# Without these, Anthropic's infrastructure intermittently 500s OAuth traffic.
-# The version must stay reasonably current — Anthropic rejects OAuth requests
-# when the spoofed user-agent version is too far behind the actual release.
+# Claude Code version — sent on OAuth token-exchange / refresh requests
+# (platform.claude.com/v1/oauth/token) as the client's user-agent. Anthropic's
+# OAuth flow validates the UA and may reject requests with a version that's
+# too old, so detecting dynamically keeps users on a current Claude Code
+# install from hitting stale-version errors during login/refresh.
 _CLAUDE_CODE_VERSION_FALLBACK = "2.1.74"
 _claude_code_version_cache: Optional[str] = None

@@ -181,9 +269,9 @@ _claude_code_version_cache: Optional[str] = None
 def _detect_claude_code_version() -> str:
    """Detect the installed Claude Code version, fall back to a static constant.

-    Anthropic's OAuth infrastructure validates the user-agent version and may
-    reject requests with a version that's too old.  Detecting dynamically means
-    users who keep Claude Code updated never hit stale-version 400s.
+    Used only by the OAuth token-exchange / refresh flow
+    (``platform.claude.com/v1/oauth/token``). The Messages API client no
+    longer sends a claude-cli user-agent.
    """
    import subprocess as _sp

@@ -203,12 +291,13 @@ def _detect_claude_code_version() -> str:
    return _CLAUDE_CODE_VERSION_FALLBACK


-_CLAUDE_CODE_SYSTEM_PREFIX = "You are Claude Code, Anthropic's official CLI for Claude."
-_MCP_TOOL_PREFIX = "mcp_"
-
-
 def _get_claude_code_version() -> str:
-    """Lazily detect the installed Claude Code version when OAuth headers need it."""
+    """Lazily detect the installed Claude Code version for OAuth flow headers.
+
+    Used only on the OAuth token-exchange and refresh endpoints
+    (``platform.claude.com/v1/oauth/token``). The Messages API client does
+    not send a claude-cli user-agent.
+    """
    global _claude_code_version_cache
    if _claude_code_version_cache is None:
        _claude_code_version_cache = _detect_claude_code_version()
@@ -221,8 +310,9 @@ def _is_oauth_token(key: str) -> bool:
    Positively identifies Anthropic OAuth tokens by their key format:
    - ``sk-ant-`` prefix (but NOT ``sk-ant-api``) → setup tokens, managed keys
    - ``eyJ`` prefix → JWTs from the Anthropic OAuth flow
+    - ``cc-`` prefix → Claude Code OAuth access tokens (from CLAUDE_CODE_OAUTH_TOKEN)

-    Non-Anthropic keys (MiniMax, Alibaba, etc.) don't match either pattern
+    Non-Anthropic keys (MiniMax, Alibaba, etc.) don't match any pattern
    and correctly return False.
    """
    if not key:
@@ -236,6 +326,9 @@ def _is_oauth_token(key: str) -> bool:
    # JWTs from Anthropic OAuth flow
    if key.startswith("eyJ"):
        return True
+    # Claude Code OAuth access tokens (opaque, from CLAUDE_CODE_OAUTH_TOKEN)
+    if key.startswith("cc-"):
+        return True
    return False


@@ -295,9 +388,14 @@ def _common_betas_for_base_url(base_url: str | None) -> list[str]:
    that include Anthropic's ``fine-grained-tool-streaming`` beta — every
    tool-use message triggers a connection error.  Strip that beta for
    Bearer-auth endpoints while keeping all other betas intact.
+
+    The ``context-1m-2025-08-07`` beta is also stripped for Bearer-auth
+    endpoints — MiniMax hosts its own models, not Claude, so the header is
+    irrelevant at best and risks request rejection at worst.
    """
    if _requires_bearer_auth(base_url):
-        return [b for b in _COMMON_BETAS if b != _TOOL_STREAMING_BETA]
+        _stripped = {_TOOL_STREAMING_BETA, _CONTEXT_1M_BETA}
+        return [b for b in _COMMON_BETAS if b not in _stripped]
    return _COMMON_BETAS


@@ -312,6 +410,7 @@ def build_anthropic_client(api_key: str, base_url: str = None, timeout: float =

    Returns an anthropic.Anthropic instance.
    """
+    _anthropic_sdk = _get_anthropic_sdk()
    if _anthropic_sdk is None:
        raise ImportError(
            "The 'anthropic' package is required for the Anthropic provider. "
@@ -328,7 +427,16 @@ def build_anthropic_client(api_key: str, base_url: str = None, timeout: float =
        "timeout": Timeout(timeout=float(_read_timeout), connect=10.0),
    }
    if normalized_base_url:
-        kwargs["base_url"] = normalized_base_url
+        # Azure Anthropic endpoints require an ``api-version`` query parameter.
+        # Pass it via default_query so the SDK appends it to every request URL
+        # without corrupting the base_url (appending it directly produces
+        # malformed paths like /anthropic?api-version=.../v1/messages).
+        _is_azure_endpoint = "azure.com" in normalized_base_url.lower()
+        if _is_azure_endpoint and "api-version" not in normalized_base_url:
+            kwargs["base_url"] = normalized_base_url.rstrip("/")
+            kwargs["default_query"] = {"api-version": "2025-04-15"}
+        else:
+            kwargs["base_url"] = normalized_base_url
    common_betas = _common_betas_for_base_url(normalized_base_url)

    if _is_kimi_coding_endpoint(base_url):
@@ -359,15 +467,21 @@ def build_anthropic_client(api_key: str, base_url: str = None, timeout: float =
        if common_betas:
            kwargs["default_headers"] = {"anthropic-beta": ",".join(common_betas)}
    elif _is_oauth_token(api_key):
-        # OAuth access token / setup-token → Bearer auth + Claude Code identity.
-        # Anthropic routes OAuth requests based on user-agent and headers;
-        # without Claude Code's fingerprint, requests get intermittent 500s.
-        all_betas = common_betas + _OAUTH_ONLY_BETAS
+        # OAuth access token / setup-token → Bearer auth + OAuth-only betas.
+        # The OAuth-specific beta headers are still required by Anthropic's
+        # OAuth-gated Messages API path; the Claude Code user-agent / x-app
+        # spoofing is deliberately NOT sent — Hermes identifies as itself.
+        #
+        # ``context-1m-2025-08-07`` is stripped here: Anthropic rejects
+        # OAuth requests that carry it with
+        #   "This authentication style is incompatible with the long
+        #    context beta header."
+        # Subscription-gated OAuth traffic gets the 200K default window.
+        oauth_safe_common = [b for b in common_betas if b != _CONTEXT_1M_BETA]
+        all_betas = oauth_safe_common + _OAUTH_ONLY_BETAS
        kwargs["auth_token"] = api_key
        kwargs["default_headers"] = {
            "anthropic-beta": ",".join(all_betas),
-            "user-agent": f"claude-cli/{_get_claude_code_version()} (external, cli)",
-            "x-app": "cli",
        }
    else:
        # Regular API key → x-api-key header + common betas
@@ -385,8 +499,16 @@ def build_anthropic_bedrock_client(region: str):
    Claude feature parity: prompt caching, thinking budgets, adaptive
    thinking, fast mode — features not available via the Converse API.

+    Attaches the common Anthropic beta headers as client-level defaults so
+    that Bedrock-hosted Claude models get the same enhanced features as
+    native Anthropic. The ``context-1m-2025-08-07`` beta in particular
+    unlocks the 1M context window for Opus 4.6/4.7 on Bedrock — without
+    it, Bedrock caps these models at 200K even though the Anthropic API
+    serves them with 1M natively.
+
    Auth uses the boto3 default credential chain (IAM roles, SSO, env vars).
    """
+    _anthropic_sdk = _get_anthropic_sdk()
    if _anthropic_sdk is None:
        raise ImportError(
            "The 'anthropic' package is required for the Bedrock provider. "
@@ -402,11 +524,73 @@ def build_anthropic_bedrock_client(region: str):
    return _anthropic_sdk.AnthropicBedrock(
        aws_region=region,
        timeout=Timeout(timeout=900.0, connect=10.0),
+        default_headers={"anthropic-beta": ",".join(_COMMON_BETAS)},
    )


+def _read_claude_code_credentials_from_keychain() -> Optional[Dict[str, Any]]:
+    """Read Claude Code OAuth credentials from the macOS Keychain.
+
+    Claude Code >=2.1.114 stores credentials in the macOS Keychain under the
+    service name "Claude Code-credentials" rather than (or in addition to)
+    the JSON file at ~/.claude/.credentials.json.
+
+    The password field contains a JSON string with the same claudeAiOauth
+    structure as the JSON file.
+
+    Returns dict with {accessToken, refreshToken?, expiresAt?} or None.
+    """
+    if platform.system() != "Darwin":
+        return None
+
+    try:
+        # Read the "Claude Code-credentials" generic password entry
+        result = subprocess.run(
+            ["security", "find-generic-password",
+             "-s", "Claude Code-credentials",
+             "-w"],
+            capture_output=True,
+            text=True,
+            timeout=5,
+        )
+    except (OSError, subprocess.TimeoutExpired):
+        logger.debug("Keychain: security command not available or timed out")
+        return None
+
+    if result.returncode != 0:
+        logger.debug("Keychain: no entry found for 'Claude Code-credentials'")
+        return None
+
+    raw = result.stdout.strip()
+    if not raw:
+        return None
+
+    try:
+        data = json.loads(raw)
+    except json.JSONDecodeError:
+        logger.debug("Keychain: credentials payload is not valid JSON")
+        return None
+
+    oauth_data = data.get("claudeAiOauth")
+    if oauth_data and isinstance(oauth_data, dict):
+        access_token = oauth_data.get("accessToken", "")
+        if access_token:
+            return {
+                "accessToken": access_token,
+                "refreshToken": oauth_data.get("refreshToken", ""),
+                "expiresAt": oauth_data.get("expiresAt", 0),
+                "source": "macos_keychain",
+            }
+
+    return None
+
+
 def read_claude_code_credentials() -> Optional[Dict[str, Any]]:
-    """Read refreshable Claude Code OAuth credentials from ~/.claude/.credentials.json.
+    """Read refreshable Claude Code OAuth credentials.
+
+    Checks two sources in order:
+      1. macOS Keychain (Darwin only) — "Claude Code-credentials" entry
+      2. ~/.claude/.credentials.json file

    This intentionally excludes ~/.claude.json primaryApiKey. Opencode's
    subscription flow is OAuth/setup-token based with refreshable credentials,
@@ -415,6 +599,12 @@ def read_claude_code_credentials() -> Optional[Dict[str, Any]]:

    Returns dict with {accessToken, refreshToken?, expiresAt?} or None.
    """
+    # Try macOS Keychain first (covers Claude Code >=2.1.114)
+    kc_creds = _read_claude_code_credentials_from_keychain()
+    if kc_creds:
+        return kc_creds
+
+    # Fall back to JSON file
    cred_path = Path.home() / ".claude" / ".credentials.json"
    if cred_path.exists():
        try:
@@ -585,7 +775,9 @@ def _write_claude_code_credentials(
        existing["claudeAiOauth"] = oauth_data

        cred_path.parent.mkdir(parents=True, exist_ok=True)
-        cred_path.write_text(json.dumps(existing, indent=2), encoding="utf-8")
+        _tmp_cred = cred_path.with_suffix(".tmp")
+        _tmp_cred.write_text(json.dumps(existing, indent=2), encoding="utf-8")
+        _tmp_cred.replace(cred_path)
        # Restrict permissions (credentials file)
        cred_path.chmod(0o600)
    except (OSError, IOError) as e:
@@ -633,17 +825,45 @@ def resolve_anthropic_token() -> Optional[str]:
    """Resolve an Anthropic token from all available sources.

    Priority:
-      1. ANTHROPIC_TOKEN env var (OAuth/setup token saved by Hermes)
-      2. CLAUDE_CODE_OAUTH_TOKEN env var
-      3. Claude Code credentials (~/.claude.json or ~/.claude/.credentials.json)
+      1. Hermes credential pool (``~/.hermes/auth.json`` →
+         ``credential_pool.anthropic``) — OAuth tokens minted by Hermes'
+         own PKCE login flow. Entries are auto-refreshed when near
+         expiry. Env-sourced pool entries (``source="env:..."``) are
+         skipped here so the env-var priority logic below still runs.
+      2. ANTHROPIC_TOKEN env var (OAuth/setup token saved by Hermes)
+      3. CLAUDE_CODE_OAUTH_TOKEN env var
+      4. Claude Code credentials (~/.claude.json or ~/.claude/.credentials.json)
         — with automatic refresh if expired and a refresh token is available
-      4. ANTHROPIC_API_KEY env var (regular API key, or legacy fallback)
+      5. ANTHROPIC_API_KEY env var (regular API key, or legacy fallback)

    Returns the token string or None.
    """
+    # 1. Hermes credential pool — the live source of truth for tokens
+    #    minted via ``hermes login anthropic`` / the dashboard PKCE flow.
+    #    ``select()`` picks the best available entry and refreshes it if
+    #    it's near expiry, so callers always get a fresh token.
+    #
+    #    Skip env-sourced pool entries (``env:ANTHROPIC_TOKEN``, etc.) —
+    #    those are passthroughs of the env var, and the env-var branches
+    #    below have richer priority logic (``_prefer_refreshable_claude_code_token``)
+    #    that can upgrade a static env OAuth token to a refreshed
+    #    Claude Code token. Letting the pool win here would short-circuit
+    #    that upgrade.
+    try:
+        from agent.credential_pool import load_pool
+        pool = load_pool("anthropic")
+        entry = pool.select()
+        if entry and entry.access_token and not entry.source.startswith("env:"):
+            return entry.access_token
+    except Exception as exc:
+        # Pool lookup is best-effort — fall through to env/file sources
+        # if anything goes wrong (e.g. auth.json corruption during a
+        # concurrent write).
+        logger.debug("Credential-pool lookup failed for anthropic: %s", exc)
+
    creds = read_claude_code_credentials()

-    # 1. Hermes-managed OAuth/setup token env var
+    # 2. Hermes-managed OAuth/setup token env var
    token = os.getenv("ANTHROPIC_TOKEN", "").strip()
    if token:
        preferred = _prefer_refreshable_claude_code_token(token, creds)
@@ -651,7 +871,7 @@ def resolve_anthropic_token() -> Optional[str]:
            return preferred
        return token

-    # 2. CLAUDE_CODE_OAUTH_TOKEN (used by Claude Code for setup-tokens)
+    # 3. CLAUDE_CODE_OAUTH_TOKEN (used by Claude Code for setup-tokens)
    cc_token = os.getenv("CLAUDE_CODE_OAUTH_TOKEN", "").strip()
    if cc_token:
        preferred = _prefer_refreshable_claude_code_token(cc_token, creds)
@@ -659,12 +879,12 @@ def resolve_anthropic_token() -> Optional[str]:
            return preferred
        return cc_token

-    # 3. Claude Code credential file
+    # 4. Claude Code credential file
    resolved_claude_token = _resolve_claude_code_token_from_credentials(creds)
    if resolved_claude_token:
        return resolved_claude_token

-    # 4. Regular API key, or a legacy OAuth token saved in ANTHROPIC_API_KEY.
+    # 5. Regular API key, or a legacy OAuth token saved in ANTHROPIC_API_KEY.
    # This remains as a compatibility fallback for pre-migration Hermes configs.
    api_key = os.getenv("ANTHROPIC_API_KEY", "").strip()
    if api_key:
@@ -852,6 +1072,26 @@ def read_hermes_oauth_credentials() -> Optional[Dict[str, Any]]:
 # ---------------------------------------------------------------------------


+def _is_bedrock_model_id(model: str) -> bool:
+    """Detect AWS Bedrock model IDs that use dots as namespace separators.
+
+    Bedrock model IDs come in two forms:
+    - Bare:    ``anthropic.claude-opus-4-7``
+    - Regional (inference profiles): ``us.anthropic.claude-sonnet-4-5-v1:0``
+
+    In both cases the dots separate namespace components, not version
+    numbers, and must be preserved verbatim for the Bedrock API.
+    """
+    lower = model.lower()
+    # Regional inference-profile prefixes
+    if any(lower.startswith(p) for p in ("global.", "us.", "eu.", "ap.", "jp.")):
+        return True
+    # Bare Bedrock model IDs: provider.model-family
+    if lower.startswith("anthropic."):
+        return True
+    return False
+
+
 def normalize_model_name(model: str, preserve_dots: bool = False) -> str:
    """Normalize a model name for the Anthropic API.

@@ -859,11 +1099,19 @@ def normalize_model_name(model: str, preserve_dots: bool = False) -> str:
    - Converts dots to hyphens in version numbers (OpenRouter uses dots,
      Anthropic uses hyphens: claude-opus-4.6 → claude-opus-4-6), unless
      preserve_dots is True (e.g. for Alibaba/DashScope: qwen3.5-plus).
+    - Preserves Bedrock model IDs (``anthropic.claude-opus-4-7``) and
+      regional inference profiles (``us.anthropic.claude-*``) whose dots
+      are namespace separators, not version separators.
    """
    lower = model.lower()
    if lower.startswith("anthropic/"):
        model = model[len("anthropic/"):]
    if not preserve_dots:
+        # Bedrock model IDs use dots as namespace separators
+        # (e.g. "anthropic.claude-opus-4-7", "us.anthropic.claude-*").
+        # These must not be converted to hyphens.  See issue #12295.
+        if _is_bedrock_model_id(model):
+            return model
        # OpenRouter uses dots for version separators (claude-opus-4.6),
        # Anthropic uses hyphens (claude-opus-4-6). Convert dots to hyphens.
        model = model.replace(".", "-")
@@ -883,6 +1131,33 @@ def _sanitize_tool_id(tool_id: str) -> str:
    return sanitized or "tool_0"


+def _normalize_tool_input_schema(schema: Any) -> Dict[str, Any]:
+    """Normalize tool schemas before sending them to Anthropic.
+
+    Anthropic's tool schema validator rejects nullable unions such as
+    ``anyOf: [{"type": "string"}, {"type": "null"}]`` that Pydantic/MCP
+    commonly emits for optional fields. Tool optionality is represented by
+    the parent ``required`` array, so we delegate to the shared
+    ``strip_nullable_unions`` helper to collapse nullable unions to the
+    non-null branch while preserving metadata like description/default.
+
+    ``keep_nullable_hint=False`` because the Anthropic validator does not
+    recognize the OpenAPI-style ``nullable: true`` extension and strict
+    schema-to-grammar converters may reject unknown keywords.
+    """
+    if not schema:
+        return {"type": "object", "properties": {}}
+
+    from tools.schema_sanitizer import strip_nullable_unions
+
+    normalized = strip_nullable_unions(schema, keep_nullable_hint=False)
+    if not isinstance(normalized, dict):
+        return {"type": "object", "properties": {}}
+    if normalized.get("type") == "object" and not isinstance(normalized.get("properties"), dict):
+        normalized = {**normalized, "properties": {}}
+    return normalized
+
+
 def convert_tools_to_anthropic(tools: List[Dict]) -> List[Dict]:
    """Convert OpenAI tool definitions to Anthropic format."""
    if not tools:
@@ -893,7 +1168,9 @@ def convert_tools_to_anthropic(tools: List[Dict]) -> List[Dict]:
        result.append({
            "name": fn.get("name", ""),
            "description": fn.get("description", ""),
-            "input_schema": fn.get("parameters", {"type": "object", "properties": {}}),
+            "input_schema": _normalize_tool_input_schema(
+                fn.get("parameters", {"type": "object", "properties": {}})
+            ),
        })
    return result

@@ -1372,8 +1649,10 @@ def build_anthropic_kwargs(
    "max_tokens too large given prompt" errors and retry with a smaller cap
    (see parse_available_output_tokens_from_error + _ephemeral_max_output_tokens).

-    When *is_oauth* is True, applies Claude Code compatibility transforms:
-    system prompt prefix, tool name prefixing, and prompt sanitization.
+    When *is_oauth* is True, enables the OAuth-only beta headers required by
+    Anthropic's subscription-gated Messages endpoint (fast-mode branch only;
+    the default headers are set by build_anthropic_client). No system-prompt
+    or tool-name rewriting is performed — Hermes identifies as itself.

    When *preserve_dots* is True, model name dots are not converted to hyphens
    (for Alibaba/DashScope anthropic-compatible endpoints: qwen3.5-plus).
@@ -1391,7 +1670,12 @@ def build_anthropic_kwargs(

    model = normalize_model_name(model, preserve_dots=preserve_dots)
    # effective_max_tokens = output cap for this call (≠ total context window)
-    effective_max_tokens = max_tokens or _get_anthropic_max_output(model)
+    # Use the resolver helper so non-positive values (negative ints,
+    # fractional floats, NaN, non-numeric) fail locally with a clear error
+    # rather than 400-ing at the Anthropic API. See openclaw/openclaw#66664.
+    effective_max_tokens = _resolve_anthropic_messages_max_tokens(
+        max_tokens, model, context_length=context_length
+    )

    # Clamp output cap to fit inside the total context window.
    # Only matters for small custom endpoints where context_length < native
@@ -1401,45 +1685,11 @@ def build_anthropic_kwargs(
    if context_length and effective_max_tokens > context_length:
        effective_max_tokens = max(context_length - 1, 1)

-    # ── OAuth: Claude Code identity ──────────────────────────────────
-    if is_oauth:
-        # 1. Prepend Claude Code system prompt identity
-        cc_block = {"type": "text", "text": _CLAUDE_CODE_SYSTEM_PREFIX}
-        if isinstance(system, list):
-            system = [cc_block] + system
-        elif isinstance(system, str) and system:
-            system = [cc_block, {"type": "text", "text": system}]
-        else:
-            system = [cc_block]
-
-        # 2. Sanitize system prompt — replace product name references
-        #    to avoid Anthropic's server-side content filters.
-        for block in system:
-            if isinstance(block, dict) and block.get("type") == "text":
-                text = block.get("text", "")
-                text = text.replace("Hermes Agent", "Claude Code")
-                text = text.replace("Hermes agent", "Claude Code")
-                text = text.replace("hermes-agent", "claude-code")
-                text = text.replace("Nous Research", "Anthropic")
-                block["text"] = text
-
-        # 3. Prefix tool names with mcp_ (Claude Code convention)
-        if anthropic_tools:
-            for tool in anthropic_tools:
-                if "name" in tool:
-                    tool["name"] = _MCP_TOOL_PREFIX + tool["name"]
-
-        # 4. Prefix tool names in message history (tool_use and tool_result blocks)
-        for msg in anthropic_messages:
-            content = msg.get("content")
-            if isinstance(content, list):
-                for block in content:
-                    if isinstance(block, dict):
-                        if block.get("type") == "tool_use" and "name" in block:
-                            if not block["name"].startswith(_MCP_TOOL_PREFIX):
-                                block["name"] = _MCP_TOOL_PREFIX + block["name"]
-                        elif block.get("type") == "tool_result" and "tool_use_id" in block:
-                            pass  # tool_result uses ID, not name
+    # OAuth requests go through Anthropic's subscription-gated Messages
+    # endpoint but otherwise send the real Hermes system prompt and real
+    # Hermes tool names — the only OAuth-specific wire differences are
+    # Bearer auth and the _OAUTH_ONLY_BETAS header (applied in
+    # build_anthropic_client and the fast-mode branch below).

    kwargs: Dict[str, Any] = {
        "model": model,
@@ -1513,9 +1763,9 @@ def build_anthropic_kwargs(

    # ── Strip sampling params on 4.7+ ─────────────────────────────────
    # Opus 4.7 rejects any non-default temperature/top_p/top_k with a 400.
-    # Callers (auxiliary_client, flush_memories, etc.) may set these for
-    # older models; drop them here as a safety net so upstream 4.6 → 4.7
-    # migrations don't require coordinated edits everywhere.
+    # Callers (auxiliary_client, etc.) may set these for older models;
+    # drop them here as a safety net so upstream 4.6 → 4.7 migrations
+    # don't require coordinated edits everywhere.
    if _forbids_sampling_params(model):
        for _sampling_key in ("temperature", "top_p", "top_k"):
            kwargs.pop(_sampling_key, None)
@@ -1530,6 +1780,9 @@ def build_anthropic_kwargs(
        # extra_headers override the client-level anthropic-beta header).
        betas = list(_common_betas_for_base_url(base_url))
        if is_oauth:
+            # Strip context-1m — incompatible with OAuth auth. See matching
+            # comment in build_anthropic_client().
+            betas = [b for b in betas if b != _CONTEXT_1M_BETA]
            betas.extend(_OAUTH_ONLY_BETAS)
        betas.append(_FAST_MODE_BETA)
        kwargs["extra_headers"] = {"anthropic-beta": ",".join(betas)}
@@ -1537,109 +1790,3 @@ def build_anthropic_kwargs(
    return kwargs


-def normalize_anthropic_response(
-    response,
-    strip_tool_prefix: bool = False,
-) -> Tuple[SimpleNamespace, str]:
-    """Normalize Anthropic response to match the shape expected by AIAgent.
-
-    Returns (assistant_message, finish_reason) where assistant_message has
-    .content, .tool_calls, and .reasoning attributes.
-
-    When *strip_tool_prefix* is True, removes the ``mcp_`` prefix that was
-    added to tool names for OAuth Claude Code compatibility.
-    """
-    text_parts = []
-    reasoning_parts = []
-    reasoning_details = []
-    tool_calls = []
-
-    for block in response.content:
-        if block.type == "text":
-            text_parts.append(block.text)
-        elif block.type == "thinking":
-            reasoning_parts.append(block.thinking)
-            block_dict = _to_plain_data(block)
-            if isinstance(block_dict, dict):
-                reasoning_details.append(block_dict)
-        elif block.type == "tool_use":
-            name = block.name
-            if strip_tool_prefix and name.startswith(_MCP_TOOL_PREFIX):
-                name = name[len(_MCP_TOOL_PREFIX):]
-            tool_calls.append(
-                SimpleNamespace(
-                    id=block.id,
-                    type="function",
-                    function=SimpleNamespace(
-                        name=name,
-                        arguments=json.dumps(block.input),
-                    ),
-                )
-            )
-
-    # Map Anthropic stop_reason to OpenAI finish_reason.
-    # Newer stop reasons added in Claude 4.5+ / 4.7:
-    #   - refusal: the model declined to answer (cyber safeguards, CSAM, etc.)
-    #   - model_context_window_exceeded: hit context limit (not max_tokens)
-    # Both need distinct handling upstream — a refusal should surface to the
-    # user with a clear message, and a context-window overflow should trigger
-    # compression/truncation rather than be treated as normal end-of-turn.
-    stop_reason_map = {
-        "end_turn": "stop",
-        "tool_use": "tool_calls",
-        "max_tokens": "length",
-        "stop_sequence": "stop",
-        "refusal": "content_filter",
-        "model_context_window_exceeded": "length",
-    }
-    finish_reason = stop_reason_map.get(response.stop_reason, "stop")
-
-    return (
-        SimpleNamespace(
-            content="\n".join(text_parts) if text_parts else None,
-            tool_calls=tool_calls or None,
-            reasoning="\n\n".join(reasoning_parts) if reasoning_parts else None,
-            reasoning_content=None,
-            reasoning_details=reasoning_details or None,
-        ),
-        finish_reason,
-    )
-
-
-def normalize_anthropic_response_v2(
-    response,
-    strip_tool_prefix: bool = False,
-) -> "NormalizedResponse":
-    """Normalize Anthropic response to NormalizedResponse.
-
-    Wraps the existing normalize_anthropic_response() and maps its output
-    to the shared transport types.  This allows incremental migration —
-    one call site at a time — without changing the original function.
-    """
-    from agent.transports.types import NormalizedResponse, build_tool_call
-
-    assistant_msg, finish_reason = normalize_anthropic_response(response, strip_tool_prefix)
-
-    tool_calls = None
-    if assistant_msg.tool_calls:
-        tool_calls = [
-            build_tool_call(
-                id=tc.id,
-                name=tc.function.name,
-                arguments=tc.function.arguments,
-            )
-            for tc in assistant_msg.tool_calls
-        ]
-
-    provider_data = {}
-    if getattr(assistant_msg, "reasoning_details", None):
-        provider_data["reasoning_details"] = assistant_msg.reasoning_details
-
-    return NormalizedResponse(
-        content=assistant_msg.content,
-        tool_calls=tool_calls,
-        finish_reason=finish_reason,
-        reasoning=getattr(assistant_msg, "reasoning", None),
-        usage=None,  # Anthropic usage is on the raw response, not the normaliser
-        provider_data=provider_data or None,
-    )
@@ -87,6 +87,114 @@ def reset_client_cache():
    _bedrock_control_client_cache.clear()


+def invalidate_runtime_client(region: str) -> bool:
+    """Evict the cached ``bedrock-runtime`` client for a single region.
+
+    Per-region counterpart to :func:`reset_client_cache`. Used by the converse
+    call wrappers to discard clients whose underlying HTTP connection has
+    gone stale, so the next call allocates a fresh client (with a fresh
+    connection pool) instead of reusing a dead socket.
+
+    Returns True if a cached entry was evicted, False if the region was not
+    cached.
+    """
+    existed = region in _bedrock_runtime_client_cache
+    _bedrock_runtime_client_cache.pop(region, None)
+    return existed
+
+
+# ---------------------------------------------------------------------------
+# Stale-connection detection
+# ---------------------------------------------------------------------------
+#
+# boto3 caches its HTTPS connection pool inside the client object. When a
+# pooled connection is killed out from under us (NAT timeout, VPN flap,
+# server-side TCP RST, proxy idle cull, etc.), the next use surfaces as
+# one of a handful of low-level exceptions — most commonly
+# ``botocore.exceptions.ConnectionClosedError`` or
+# ``urllib3.exceptions.ProtocolError``. urllib3 also trips an internal
+# ``assert`` in a couple of paths (connection pool state checks, chunked
+# response readers) which bubbles up as a bare ``AssertionError`` with an
+# empty ``str(exc)``.
+#
+# In all of these cases the client is the problem, not the request: retrying
+# with the same cached client reproduces the failure until the process
+# restarts. The fix is to evict the region's cached client so the next
+# attempt builds a new one.
+
+_STALE_LIB_MODULE_PREFIXES = (
+    "urllib3.",
+    "botocore.",
+    "boto3.",
+)
+
+
+def _traceback_frames_modules(exc: BaseException):
+    """Yield ``__name__``-style module strings for each frame in exc's traceback."""
+    tb = getattr(exc, "__traceback__", None)
+    while tb is not None:
+        frame = tb.tb_frame
+        module = frame.f_globals.get("__name__", "")
+        yield module or ""
+        tb = tb.tb_next
+
+
+def is_stale_connection_error(exc: BaseException) -> bool:
+    """Return True if ``exc`` indicates a dead/stale Bedrock HTTP connection.
+
+    Matches:
+      * ``botocore.exceptions.ConnectionError`` and subclasses
+        (``ConnectionClosedError``, ``EndpointConnectionError``,
+        ``ReadTimeoutError``, ``ConnectTimeoutError``).
+      * ``urllib3.exceptions.ProtocolError`` / ``NewConnectionError`` /
+        ``ConnectionError`` (best-effort import — urllib3 is a transitive
+        dependency of botocore so it is always available in practice).
+      * Bare ``AssertionError`` raised from a frame inside urllib3, botocore,
+        or boto3. These are internal-invariant failures (typically triggered
+        by corrupted connection-pool state after a dropped socket) and are
+        recoverable by swapping the client.
+
+    Non-library ``AssertionError``s (from application code or tests) are
+    intentionally not matched — only library-internal asserts signal stale
+    connection state.
+    """
+    # botocore: the canonical signal — HTTPClientError is the umbrella for
+    # ConnectionClosedError, ReadTimeoutError, EndpointConnectionError,
+    # ConnectTimeoutError, and ProxyConnectionError. ConnectionError covers
+    # the same family via a different branch of the hierarchy.
+    try:
+        from botocore.exceptions import (
+            ConnectionError as BotoConnectionError,
+            HTTPClientError,
+        )
+        botocore_errors: tuple = (BotoConnectionError, HTTPClientError)
+    except ImportError:  # pragma: no cover — botocore always present with boto3
+        botocore_errors = ()
+    if botocore_errors and isinstance(exc, botocore_errors):
+        return True
+
+    # urllib3: low-level transport failures
+    try:
+        from urllib3.exceptions import (
+            ProtocolError,
+            NewConnectionError,
+            ConnectionError as Urllib3ConnectionError,
+        )
+        urllib3_errors = (ProtocolError, NewConnectionError, Urllib3ConnectionError)
+    except ImportError:  # pragma: no cover
+        urllib3_errors = ()
+    if urllib3_errors and isinstance(exc, urllib3_errors):
+        return True
+
+    # Library-internal AssertionError (urllib3 / botocore / boto3)
+    if isinstance(exc, AssertionError):
+        for module in _traceback_frames_modules(exc):
+            if any(module.startswith(prefix) for prefix in _STALE_LIB_MODULE_PREFIXES):
+                return True
+
+    return False
+
+
 # ---------------------------------------------------------------------------
 # AWS credential detection
 # ---------------------------------------------------------------------------
@@ -183,14 +291,52 @@ def has_aws_credentials(env: Optional[Dict[str, str]] = None) -> bool:
 def resolve_bedrock_region(env: Optional[Dict[str, str]] = None) -> str:
    """Resolve the AWS region for Bedrock API calls.

-    Priority: AWS_REGION → AWS_DEFAULT_REGION → us-east-1 (fallback).
+    Priority:
+      1. AWS_REGION env var
+      2. AWS_DEFAULT_REGION env var
+      3. boto3/botocore configured region (from ~/.aws/config or SSO profile)
+      4. us-east-1 (hard fallback)
+
+    The boto3 fallback is critical for EU/AP users who configure their region
+    in ~/.aws/config via a named profile rather than env vars — without it,
+    live model discovery would always return us.* profile IDs regardless of
+    the user's actual region.
    """
    env = env if env is not None else os.environ
-    return (
+    explicit = (
        env.get("AWS_REGION", "").strip()
        or env.get("AWS_DEFAULT_REGION", "").strip()
-        or "us-east-1"
    )
+    if explicit:
+        return explicit
+    try:
+        import botocore.session
+        region = botocore.session.get_session().get_config_variable("region")
+        if region:
+            return region
+    except Exception:
+        pass
+    return "us-east-1"
+
+
+def bedrock_model_ids_or_none() -> Optional[List[str]]:
+    """Live-discover Bedrock model IDs for the active region.
+
+    Returns a list of model ID strings if discovery succeeds and yields
+    at least one model, or ``None`` on failure / empty result.  Callers
+    should fall back to the static curated list when ``None`` is returned.
+
+    This helper consolidates the discover → extract-ids → fallback
+    pattern that was previously duplicated across ``provider_model_ids``,
+    ``list_authenticated_providers`` section 2, and section 3.
+    """
+    try:
+        discovered = discover_bedrock_models(resolve_bedrock_region())
+        if discovered:
+            return [m["id"] for m in discovered]
+    except Exception:
+        pass
+    return None


 # ---------------------------------------------------------------------------
@@ -787,7 +933,17 @@ def call_converse(
        guardrail_config=guardrail_config,
    )

-    response = client.converse(**kwargs)
+    try:
+        response = client.converse(**kwargs)
+    except Exception as exc:
+        if is_stale_connection_error(exc):
+            logger.warning(
+                "bedrock: stale-connection error on converse(region=%s, model=%s): "
+                "%s — evicting cached client so the next call reconnects.",
+                region, model, type(exc).__name__,
+            )
+            invalidate_runtime_client(region)
+        raise
    return normalize_converse_response(response)


@@ -819,7 +975,17 @@ def call_converse_stream(
        guardrail_config=guardrail_config,
    )

-    response = client.converse_stream(**kwargs)
+    try:
+        response = client.converse_stream(**kwargs)
+    except Exception as exc:
+        if is_stale_connection_error(exc):
+            logger.warning(
+                "bedrock: stale-connection error on converse_stream(region=%s, "
+                "model=%s): %s — evicting cached client so the next call reconnects.",
+                region, model, type(exc).__name__,
+            )
+            invalidate_runtime_client(region)
+        raise
    return normalize_converse_stream_events(response)


@@ -23,26 +23,52 @@ from agent.prompt_builder import DEFAULT_AGENT_IDENTITY
 logger = logging.getLogger(__name__)


+# Matches Codex/Harmony tool-call serialization that occasionally leaks into
+# assistant-message content when the model fails to emit a structured
+# ``function_call`` item.  Accepts the common forms:
+#
+#   to=functions.exec_command
+#   assistant to=functions.exec_command
+#   <|channel|>commentary to=functions.exec_command
+#
+# ``to=functions.<name>`` is the stable marker — the optional ``assistant`` or
+# Harmony channel prefix varies by degeneration mode.  Case-insensitive to
+# cover lowercase/uppercase ``assistant`` variants.
+_TOOL_CALL_LEAK_PATTERN = re.compile(
+    r"(?:^|[\s>|])to=functions\.[A-Za-z_][\w.]*",
+    re.IGNORECASE,
+)
+
+
 # ---------------------------------------------------------------------------
 # Multimodal content helpers
 # ---------------------------------------------------------------------------

-def _chat_content_to_responses_parts(content: Any) -> List[Dict[str, Any]]:
+def _chat_content_to_responses_parts(content: Any, *, role: str = "user") -> List[Dict[str, Any]]:
    """Convert chat-style multimodal content to Responses API input parts.

    Input:  ``[{"type":"text"|"image_url", ...}]`` (native OpenAI Chat format)
-    Output: ``[{"type":"input_text"|"input_image", ...}]`` (Responses format)
+    Output: ``[{"type":"input_text"|"output_text"|"input_image", ...}]`` (Responses format)
+
+    The ``role`` parameter controls the text content type:
+    - ``"user"`` (default) → ``"input_text"``
+    - ``"assistant"`` → ``"output_text"``
+
+    The Responses API rejects ``input_text`` inside assistant messages and
+    ``output_text`` inside user messages, so callers MUST pass the correct
+    role for the message being converted.

    Returns an empty list when ``content`` is not a list or contains no
    recognized parts — callers fall back to the string path.
    """
+    text_type = "output_text" if role == "assistant" else "input_text"
    if not isinstance(content, list):
        return []
    converted: List[Dict[str, Any]] = []
    for part in content:
        if isinstance(part, str):
            if part:
-                converted.append({"type": "input_text", "text": part})
+                converted.append({"type": text_type, "text": part})
            continue
        if not isinstance(part, dict):
            continue
@@ -50,7 +76,7 @@ def _chat_content_to_responses_parts(content: Any) -> List[Dict[str, Any]]:
        if ptype in {"text", "input_text", "output_text"}:
            text = part.get("text")
            if isinstance(text, str) and text:
-                converted.append({"type": "input_text", "text": text})
+                converted.append({"type": text_type, "text": text})
            continue
        if ptype in {"image_url", "input_image"}:
            image_ref = part.get("image_url")
@@ -201,6 +227,23 @@ def _responses_tools(tools: Optional[List[Dict[str, Any]]] = None) -> Optional[L
 # Message format conversion
 # ---------------------------------------------------------------------------

+_RESPONSE_MESSAGE_STATUSES = {"completed", "incomplete", "in_progress"}
+
+
+def _normalize_responses_message_status(value: Any, *, default: str = "completed") -> str:
+    """Normalize a Responses assistant message status for replay.
+
+    The API accepts completed/incomplete/in_progress on replayed assistant
+    output messages.  Preserve those exactly (modulo case/hyphen spelling) so
+    incomplete Codex continuation turns don't get falsely marked completed.
+    """
+    if isinstance(value, str):
+        status = value.strip().lower().replace("-", "_").replace(" ", "_")
+        if status in _RESPONSE_MESSAGE_STATUSES:
+            return status
+    return default
+
+
 def _chat_messages_to_responses_input(messages: List[Dict[str, Any]]) -> List[Dict[str, Any]]:
    """Convert internal chat-style messages to Responses input items."""
    items: List[Dict[str, Any]] = []
@@ -216,9 +259,10 @@ def _chat_messages_to_responses_input(messages: List[Dict[str, Any]]) -> List[Di
        if role in {"user", "assistant"}:
            content = msg.get("content", "")
            if isinstance(content, list):
-                content_parts = _chat_content_to_responses_parts(content)
+                content_parts = _chat_content_to_responses_parts(content, role=role)
+                text_type = "output_text" if role == "assistant" else "input_text"
                content_text = "".join(
-                    p.get("text", "") for p in content_parts if p.get("type") == "input_text"
+                    p.get("text", "") for p in content_parts if p.get("type") == text_type
                )
            else:
                content_parts = []
@@ -245,7 +289,57 @@ def _chat_messages_to_responses_input(messages: List[Dict[str, Any]]) -> List[Di
                                seen_item_ids.add(item_id)
                            has_codex_reasoning = True

-                if content_parts:
+                # Replay exact assistant message items (with id/phase) from
+                # previous turns so the API can maintain prefix-cache hits.
+                # OpenAI docs: "preserve and resend phase on all assistant
+                # messages — dropping it can degrade performance."
+                codex_message_items = msg.get("codex_message_items")
+                replayed_message_items = 0
+                if isinstance(codex_message_items, list):
+                    for raw_item in codex_message_items:
+                        if not isinstance(raw_item, dict):
+                            continue
+                        if raw_item.get("type") != "message" or raw_item.get("role") != "assistant":
+                            continue
+                        raw_content_parts = raw_item.get("content")
+                        if not isinstance(raw_content_parts, list):
+                            continue
+
+                        normalized_content_parts = []
+                        for part in raw_content_parts:
+                            if not isinstance(part, dict):
+                                continue
+                            part_type = str(part.get("type") or "").strip()
+                            if part_type not in {"output_text", "text"}:
+                                continue
+                            text = part.get("text", "")
+                            if text is None:
+                                text = ""
+                            if not isinstance(text, str):
+                                text = str(text)
+                            normalized_content_parts.append({"type": "output_text", "text": text})
+
+                        if not normalized_content_parts:
+                            continue
+
+                        replay_item = {
+                            "type": "message",
+                            "role": "assistant",
+                            "status": _normalize_responses_message_status(raw_item.get("status")),
+                            "content": normalized_content_parts,
+                        }
+                        item_id = raw_item.get("id")
+                        if isinstance(item_id, str) and item_id.strip():
+                            replay_item["id"] = item_id.strip()
+                        phase = raw_item.get("phase")
+                        if isinstance(phase, str) and phase.strip():
+                            replay_item["phase"] = phase.strip()
+                        items.append(replay_item)
+                        replayed_message_items += 1
+
+                if replayed_message_items > 0:
+                    pass
+                elif content_parts:
                    items.append({"role": "assistant", "content": content_parts})
                elif content_text.strip():
                    items.append({"role": "assistant", "content": content_text})
@@ -405,6 +499,47 @@ def _preflight_codex_input_items(raw_items: Any) -> List[Dict[str, Any]]:
                normalized.append(reasoning_item)
            continue

+        if item_type == "message":
+            role = item.get("role")
+            if role != "assistant":
+                raise ValueError(f"Codex Responses input[{idx}] message items must have role='assistant'.")
+            content = item.get("content")
+            if not isinstance(content, list):
+                raise ValueError(f"Codex Responses input[{idx}] message item must have content list.")
+            normalized_content = []
+            for part_idx, part in enumerate(content):
+                if not isinstance(part, dict):
+                    raise ValueError(
+                        f"Codex Responses input[{idx}] message content[{part_idx}] must be an object."
+                    )
+                part_type = part.get("type")
+                if part_type not in {"output_text", "text"}:
+                    raise ValueError(
+                        f"Codex Responses input[{idx}] message content[{part_idx}] has unsupported type {part_type!r}."
+                    )
+                text = part.get("text", "")
+                if text is None:
+                    text = ""
+                if not isinstance(text, str):
+                    text = str(text)
+                normalized_content.append({"type": "output_text", "text": text})
+            if not normalized_content:
+                raise ValueError(f"Codex Responses input[{idx}] message item must contain at least one text part.")
+            normalized_item: Dict[str, Any] = {
+                "type": "message",
+                "role": "assistant",
+                "status": _normalize_responses_message_status(item.get("status")),
+                "content": normalized_content,
+            }
+            item_id = item.get("id")
+            if isinstance(item_id, str) and item_id.strip():
+                normalized_item["id"] = item_id.strip()
+            phase = item.get("phase")
+            if isinstance(phase, str) and phase.strip():
+                normalized_item["phase"] = phase.strip()
+            normalized.append(normalized_item)
+            continue
+
        role = item.get("role")
        if role in {"user", "assistant"}:
            content = item.get("content", "")
@@ -412,13 +547,16 @@ def _preflight_codex_input_items(raw_items: Any) -> List[Dict[str, Any]]:
                content = ""
            if isinstance(content, list):
                # Multimodal content from ``_chat_messages_to_responses_input``
-                # is already in Responses format (``input_text`` / ``input_image``).
-                # Validate each part and pass through.
+                # is already in Responses format (``input_text`` / ``output_text``
+                # / ``input_image``).  Validate each part and pass through.
+                # Use the correct text type for the role — ``output_text`` for
+                # assistant messages, ``input_text`` for user messages.
+                text_type = "output_text" if role == "assistant" else "input_text"
                validated: List[Dict[str, Any]] = []
                for part_idx, part in enumerate(content):
                    if isinstance(part, str):
                        if part:
-                            validated.append({"type": "input_text", "text": part})
+                            validated.append({"type": text_type, "text": part})
                        continue
                    if not isinstance(part, dict):
                        raise ValueError(
@@ -429,7 +567,7 @@ def _preflight_codex_input_items(raw_items: Any) -> List[Dict[str, Any]]:
                        text = part.get("text", "")
                        if not isinstance(text, str):
                            text = str(text or "")
-                        validated.append({"type": "input_text", "text": text})
+                        validated.append({"type": text_type, "text": text})
                    elif ptype in {"input_image", "image_url"}:
                        image_ref = part.get("image_url", "")
                        detail = part.get("detail")
@@ -686,6 +824,7 @@ def _normalize_codex_response(response: Any) -> tuple[Any, str]:
    content_parts: List[str] = []
    reasoning_parts: List[str] = []
    reasoning_items_raw: List[Dict[str, Any]] = []
+    message_items_raw: List[Dict[str, Any]] = []
    tool_calls: List[Any] = []
    has_incomplete_items = response_status in {"queued", "in_progress", "incomplete"}
    saw_commentary_phase = False
@@ -704,6 +843,7 @@ def _normalize_codex_response(response: Any) -> tuple[Any, str]:

        if item_type == "message":
            item_phase = getattr(item, "phase", None)
+            normalized_phase = None
            if isinstance(item_phase, str):
                normalized_phase = item_phase.strip().lower()
                if normalized_phase in {"commentary", "analysis"}:
@@ -713,6 +853,18 @@ def _normalize_codex_response(response: Any) -> tuple[Any, str]:
            message_text = _extract_responses_message_text(item)
            if message_text:
                content_parts.append(message_text)
+                raw_message_item: Dict[str, Any] = {
+                    "type": "message",
+                    "role": "assistant",
+                    "status": _normalize_responses_message_status(item_status),
+                    "content": [{"type": "output_text", "text": message_text}],
+                }
+                item_id = getattr(item, "id", None)
+                if isinstance(item_id, str) and item_id:
+                    raw_message_item["id"] = item_id
+                if normalized_phase:
+                    raw_message_item["phase"] = normalized_phase
+                message_items_raw.append(raw_message_item)
        elif item_type == "reasoning":
            reasoning_text = _extract_responses_reasoning_text(item)
            if reasoning_text:
@@ -787,6 +939,37 @@ def _normalize_codex_response(response: Any) -> tuple[Any, str]:
        if isinstance(out_text, str):
            final_text = out_text.strip()

+    # ── Tool-call leak recovery ──────────────────────────────────
+    # gpt-5.x on the Codex Responses API sometimes degenerates and emits
+    # what should be a structured `function_call` item as plain assistant
+    # text using the Harmony/Codex serialization (``to=functions.foo
+    # {json}`` or ``assistant to=functions.foo {json}``). The model
+    # intended to call a tool, but the intent never made it into
+    # ``response.output`` as a ``function_call`` item, so ``tool_calls``
+    # is empty here. If we pass this through, the parent sees a
+    # confident-looking summary with no audit trail (empty ``tool_trace``)
+    # and no tools actually ran — the Taiwan-embassy-email incident.
+    #
+    # Detection: leaked tokens always contain ``to=functions.<name>`` and
+    # the assistant message has no real tool calls. Treat it as incomplete
+    # so the existing Codex-incomplete continuation path (3 retries,
+    # handled in run_agent.py) gets a chance to re-elicit a proper
+    # ``function_call`` item. The existing loop already handles message
+    # append, dedup, and retry budget.
+    leaked_tool_call_text = False
+    if final_text and not tool_calls and _TOOL_CALL_LEAK_PATTERN.search(final_text):
+        leaked_tool_call_text = True
+        logger.warning(
+            "Codex response contains leaked tool-call text in assistant content "
+            "(no structured function_call items). Treating as incomplete so the "
+            "continuation path can re-elicit a proper tool call. Leaked snippet: %r",
+            final_text[:300],
+        )
+        # Clear the text so downstream code doesn't surface the garbage as
+        # a summary. The encrypted reasoning items (if any) are preserved
+        # so the model keeps its chain-of-thought on the retry.
+        final_text = ""
+
    assistant_message = SimpleNamespace(
        content=final_text,
        tool_calls=tool_calls,
@@ -794,10 +977,13 @@ def _normalize_codex_response(response: Any) -> tuple[Any, str]:
        reasoning_content=None,
        reasoning_details=None,
        codex_reasoning_items=reasoning_items_raw or None,
+        codex_message_items=message_items_raw or None,
    )

    if tool_calls:
        finish_reason = "tool_calls"
+    elif leaked_tool_call_text:
+        finish_reason = "incomplete"
    elif has_incomplete_items or (saw_commentary_phase and not saw_final_answer_phase):
        finish_reason = "incomplete"
    elif reasoning_items_raw and not final_text:
@@ -61,9 +61,93 @@ _PRUNED_TOOL_PLACEHOLDER = "[Old tool output cleared to save context space]"

 # Chars per token rough estimate
 _CHARS_PER_TOKEN = 4
+# Flat token cost per attached image part.  Real cost varies by provider and
+# dimensions (Anthropic ≈ width×height/750, GPT-4o up to ~1700 for
+# high-detail 2048×2048, Gemini 258/tile), but 1600 is a realistic ceiling
+# that keeps compression budgeting honest for multi-image conversations.
+# Matches Claude Code's IMAGE_TOKEN_ESTIMATE constant.
+_IMAGE_TOKEN_ESTIMATE = 1600
+# Same figure expressed in the char-budget currency the rest of the
+# compressor speaks in.  Used when accumulating message "content length"
+# for tail-cut decisions.
+_IMAGE_CHAR_EQUIVALENT = _IMAGE_TOKEN_ESTIMATE * _CHARS_PER_TOKEN
 _SUMMARY_FAILURE_COOLDOWN_SECONDS = 600


+def _content_length_for_budget(raw_content: Any) -> int:
+    """Return the effective char-length of a message's content for token budgeting.
+
+    Plain strings: ``len(content)``. Multimodal lists: sum of text-part
+    ``len(text)`` plus a flat ``_IMAGE_CHAR_EQUIVALENT`` per image part
+    (``image_url`` / ``input_image`` / Anthropic-style ``image``). This
+    keeps the compressor from treating a turn with 5 attached images as
+    near-zero tokens just because the text part is empty.
+    """
+    if isinstance(raw_content, str):
+        return len(raw_content)
+    if not isinstance(raw_content, list):
+        return len(str(raw_content or ""))
+
+    total = 0
+    for p in raw_content:
+        if isinstance(p, str):
+            total += len(p)
+            continue
+        if not isinstance(p, dict):
+            total += len(str(p))
+            continue
+        ptype = p.get("type")
+        if ptype in {"image_url", "input_image", "image"}:
+            total += _IMAGE_CHAR_EQUIVALENT
+        else:
+            # text / input_text / tool_result-with-text / anything else with
+            # a text field.  Ignore the raw base64 payload inside image_url
+            # dicts — dimensions don't matter, only whether it's an image.
+            total += len(p.get("text", "") or "")
+    return total
+
+
+def _content_text_for_contains(content: Any) -> str:
+    """Return a best-effort text view of message content.
+
+    Used only for substring checks when we need to know whether we've already
+    appended a note to a message. Keeps multimodal lists intact elsewhere.
+    """
+    if content is None:
+        return ""
+    if isinstance(content, str):
+        return content
+    if isinstance(content, list):
+        parts: list[str] = []
+        for item in content:
+            if isinstance(item, str):
+                parts.append(item)
+            elif isinstance(item, dict):
+                text = item.get("text")
+                if isinstance(text, str):
+                    parts.append(text)
+        return "\n".join(part for part in parts if part)
+    return str(content)
+
+
+def _append_text_to_content(content: Any, text: str, *, prepend: bool = False) -> Any:
+    """Append or prepend plain text to message content safely.
+
+    Compression sometimes needs to add a note or merge a summary into an
+    existing message. Message content may be plain text or a multimodal list of
+    blocks, so direct string concatenation is not always safe.
+    """
+    if content is None:
+        return text
+    if isinstance(content, str):
+        return text + content if prepend else content + text
+    if isinstance(content, list):
+        text_block = {"type": "text", "text": text}
+        return [text_block, *content] if prepend else [*content, text_block]
+    rendered = str(content)
+    return text + rendered if prepend else rendered + text
+
+
 def _truncate_tool_call_args_json(args: str, head_chars: int = 200) -> str:
    """Shrink long string values inside a tool-call arguments JSON blob while
    preserving JSON validity.
@@ -253,6 +337,11 @@ class ContextCompressor(ContextEngine):
        self._context_probed = False
        self._context_probe_persistable = False
        self._previous_summary = None
+        self._last_summary_error = None
+        self._last_summary_dropped_count = 0
+        self._last_summary_fallback_used = False
+        self._last_aux_model_failure_error = None
+        self._last_aux_model_failure_model = None
        self._last_compression_savings_pct = 100.0
        self._ineffective_compression_count = 0

@@ -276,6 +365,13 @@ class ContextCompressor(ContextEngine):
            int(context_length * self.threshold_percent),
            MINIMUM_CONTEXT_LENGTH,
        )
+        # Recalculate token budgets for the new context length so the
+        # compressor stays calibrated after a model switch (e.g. 200K → 32K).
+        target_tokens = int(self.threshold_tokens * self.summary_target_ratio)
+        self.tail_token_budget = target_tokens
+        self.max_summary_tokens = min(
+            int(context_length * 0.05), _SUMMARY_TOKENS_CEILING,
+        )

    def __init__(
        self,
@@ -348,6 +444,18 @@ class ContextCompressor(ContextEngine):
        self._last_compression_savings_pct: float = 100.0
        self._ineffective_compression_count: int = 0
        self._summary_failure_cooldown_until: float = 0.0
+        self._last_summary_error: Optional[str] = None
+        # When summary generation fails and a static fallback is inserted,
+        # record how many turns were unrecoverably dropped so callers
+        # (gateway hygiene, /compress) can surface a visible warning.
+        self._last_summary_dropped_count: int = 0
+        self._last_summary_fallback_used: bool = False
+        # When a user-configured summary model fails and we recover by
+        # retrying on the main model, record the failure so gateway /
+        # CLI callers can still warn the user even though compression
+        # succeeded.  Silent recovery would hide the broken config.
+        self._last_aux_model_failure_error: Optional[str] = None
+        self._last_aux_model_failure_model: Optional[str] = None

    def update_from_response(self, usage: Dict[str, Any]):
        """Update tracked token usage from API response."""
@@ -434,7 +542,7 @@ class ContextCompressor(ContextEngine):
            for i in range(len(result) - 1, -1, -1):
                msg = result[i]
                raw_content = msg.get("content") or ""
-                content_len = sum(len(p.get("text", "")) for p in raw_content) if isinstance(raw_content, list) else len(raw_content)
+                content_len = _content_length_for_budget(raw_content)
                msg_tokens = content_len // _CHARS_PER_TOKEN + 10
                for tc in msg.get("tool_calls") or []:
                    if isinstance(tc, dict):
@@ -771,10 +879,12 @@ The user has requested that this compaction PRIORITISE preserving all informatio
            self._previous_summary = summary
            self._summary_failure_cooldown_until = 0.0
            self._summary_model_fallen_back = False
+            self._last_summary_error = None
            return self._with_summary_prefix(summary)
        except RuntimeError:
            # No provider configured — long cooldown, unlikely to self-resolve
            self._summary_failure_cooldown_until = time.monotonic() + _SUMMARY_FAILURE_COOLDOWN_SECONDS
+            self._last_summary_error = "no auxiliary LLM provider configured"
            logging.warning("Context compression: no provider available for "
                            "summary. Middle turns will be dropped without summary "
                            "for %d seconds.",
@@ -805,13 +915,57 @@ The user has requested that this compaction PRIORITISE preserving all informatio
                    "Falling back to main model '%s' for compression.",
                    self.summary_model, e, self.model,
                )
+                # Record the aux-model failure so callers can warn the user
+                # even if the retry-on-main succeeds — a misconfigured aux
+                # model is something the user needs to fix.
+                _err_text = str(e).strip() or e.__class__.__name__
+                if len(_err_text) > 220:
+                    _err_text = _err_text[:217].rstrip() + "..."
+                self._last_aux_model_failure_error = _err_text
+                self._last_aux_model_failure_model = self.summary_model
                self.summary_model = ""  # empty = use main model
                self._summary_failure_cooldown_until = 0.0  # no cooldown
-                return self._generate_summary(turns_to_summarize)  # retry immediately
+                return self._generate_summary(turns_to_summarize, focus_topic=focus_topic)  # retry immediately
+
+            # Unknown-error best-effort retry on main model.  Losing N turns of
+            # context is almost always worse than one extra summary attempt, so
+            # if we haven't already fallen back and the summary model differs
+            # from the main model, try once more on main before entering
+            # cooldown.  Errors that DID match _is_model_not_found above are
+            # already handled by the fast-path retry; this branch catches
+            # everything else (400s, provider-specific "no route" strings,
+            # aggregator rejections, etc.) where auto-retry is still safer
+            # than dropping the turns.
+            if (
+                self.summary_model
+                and self.summary_model != self.model
+                and not getattr(self, "_summary_model_fallen_back", False)
+            ):
+                self._summary_model_fallen_back = True
+                logging.warning(
+                    "Summary model '%s' failed (%s). "
+                    "Retrying on main model '%s' before giving up.",
+                    self.summary_model, e, self.model,
+                )
+                # Record the aux-model failure (see 404 branch above) — user
+                # should know their configured model is broken even if main
+                # recovers the call.
+                _err_text = str(e).strip() or e.__class__.__name__
+                if len(_err_text) > 220:
+                    _err_text = _err_text[:217].rstrip() + "..."
+                self._last_aux_model_failure_error = _err_text
+                self._last_aux_model_failure_model = self.summary_model
+                self.summary_model = ""  # empty = use main model
+                self._summary_failure_cooldown_until = 0.0
+                return self._generate_summary(turns_to_summarize, focus_topic=focus_topic)

            # Transient errors (timeout, rate limit, network) — shorter cooldown
            _transient_cooldown = 60
            self._summary_failure_cooldown_until = time.monotonic() + _transient_cooldown
+            err_text = str(e).strip() or e.__class__.__name__
+            if len(err_text) > 220:
+                err_text = err_text[:217].rstrip() + "..."
+            self._last_summary_error = err_text
            logging.warning(
                "Failed to generate context summary: %s. "
                "Further summary attempts paused for %d seconds.",
@@ -1026,8 +1180,9 @@ The user has requested that this compaction PRIORITISE preserving all informatio

        for i in range(n - 1, head_end - 1, -1):
            msg = messages[i]
-            content = msg.get("content") or ""
-            msg_tokens = len(content) // _CHARS_PER_TOKEN + 10  # +10 for role/metadata
+            raw_content = msg.get("content") or ""
+            content_len = _content_length_for_budget(raw_content)
+            msg_tokens = content_len // _CHARS_PER_TOKEN + 10  # +10 for role/metadata
            # Include tool call arguments in estimate
            for tc in msg.get("tool_calls") or []:
                if isinstance(tc, dict):
@@ -1058,6 +1213,21 @@ The user has requested that this compaction PRIORITISE preserving all informatio

        return max(cut_idx, head_end + 1)

+    # ------------------------------------------------------------------
+    # ContextEngine: manual /compress preflight
+    # ------------------------------------------------------------------
+
+    def has_content_to_compress(self, messages: List[Dict[str, Any]]) -> bool:
+        """Return True if there is a non-empty middle region to compact.
+
+        Overrides the ABC default so the gateway ``/compress`` guard can
+        skip the LLM call when the transcript is still entirely inside
+        the protected head/tail.
+        """
+        compress_start = self._align_boundary_forward(messages, self.protect_first_n)
+        compress_end = self._find_tail_cut_by_tokens(messages, compress_start)
+        return compress_start < compress_end
+
    # ------------------------------------------------------------------
    # Main compression entry point
    # ------------------------------------------------------------------
@@ -1081,6 +1251,13 @@ The user has requested that this compaction PRIORITISE preserving all informatio
                related to this topic and be more aggressive about compressing
                everything else.  Inspired by Claude Code's ``/compact``.
        """
+        # Reset per-call summary failure state — callers inspect these fields
+        # after compress() returns to decide whether to surface a warning.
+        self._last_summary_dropped_count = 0
+        self._last_summary_fallback_used = False
+        self._last_summary_error = None
+        self._last_aux_model_failure_error = None
+        self._last_aux_model_failure_model = None
        n_messages = len(messages)
        # Only need head + 3 tail messages minimum (token budget decides the real tail size)
        _min_for_compress = self.protect_first_n + 3 + 1
@@ -1144,10 +1321,13 @@ The user has requested that this compaction PRIORITISE preserving all informatio
        for i in range(compress_start):
            msg = messages[i].copy()
            if i == 0 and msg.get("role") == "system":
-                existing = msg.get("content") or ""
+                existing = msg.get("content")
                _compression_note = "[Note: Some earlier conversation turns have been compacted into a handoff summary to preserve context space. The current session state may still reflect earlier work, so build on that summary and state rather than re-doing work.]"
-                if _compression_note not in existing:
-                    msg["content"] = existing + "\n\n" + _compression_note
+                if _compression_note not in _content_text_for_contains(existing):
+                    msg["content"] = _append_text_to_content(
+                        existing,
+                        "\n\n" + _compression_note if isinstance(existing, str) and existing else _compression_note,
+                    )
            compressed.append(msg)

        # If LLM summary failed, insert a static fallback so the model
@@ -1156,11 +1336,13 @@ The user has requested that this compaction PRIORITISE preserving all informatio
            if not self.quiet_mode:
                logger.warning("Summary generation failed — inserting static fallback context marker")
            n_dropped = compress_end - compress_start
+            self._last_summary_dropped_count = n_dropped
+            self._last_summary_fallback_used = True
            summary = (
                f"{SUMMARY_PREFIX}\n"
-                f"Summary generation was unavailable. {n_dropped} conversation turns were "
+                f"Summary generation was unavailable. {n_dropped} message(s) were "
                f"removed to free context space but could not be summarized. The removed "
-                f"turns contained earlier work in this session. Continue based on the "
+                f"messages contained earlier work in this session. Continue based on the "
                f"recent messages below and the current state of any files or resources."
            )

@@ -1191,12 +1373,15 @@ The user has requested that this compaction PRIORITISE preserving all informatio
        for i in range(compress_end, n_messages):
            msg = messages[i].copy()
            if _merge_summary_into_tail and i == compress_end:
-                original = msg.get("content") or ""
-                msg["content"] = (
+                merged_prefix = (
                    summary
                    + "\n\n--- END OF CONTEXT SUMMARY — "
                    "respond to the message below, not the summary above ---\n\n"
-                    + original
+                )
+                msg["content"] = _append_text_to_content(
+                    msg.get("content"),
+                    merged_prefix,
+                    prepend=True,
                )
                _merge_summary_into_tail = False
            compressed.append(msg)
@@ -78,6 +78,7 @@ class ContextEngine(ABC):
        self,
        messages: List[Dict[str, Any]],
        current_tokens: int = None,
+        focus_topic: str = None,
    ) -> List[Dict[str, Any]]:
        """Compact the message list and return the new message list.

@@ -86,6 +87,12 @@ class ContextEngine(ABC):
        context budget. The implementation is free to summarize, build a
        DAG, or do anything else — as long as the returned list is a valid
        OpenAI-format message sequence.
+
+        Args:
+            focus_topic: Optional topic string from manual ``/compress <focus>``.
+                Engines that support guided compression should prioritise
+                preserving information related to this topic.  Engines that
+                don't support it may simply ignore this argument.
        """

    # -- Optional: pre-flight check ----------------------------------------
@@ -98,6 +105,21 @@ class ContextEngine(ABC):
        """
        return False

+    # -- Optional: manual /compress preflight ------------------------------
+
+    def has_content_to_compress(self, messages: List[Dict[str, Any]]) -> bool:
+        """Quick check: is there anything in ``messages`` that can be compacted?
+
+        Used by the gateway ``/compress`` command as a preflight guard —
+        returning False lets the gateway report "nothing to compress yet"
+        without making an LLM call.
+
+        Default returns True (always attempt).  Engines with a cheap way
+        to introspect their own head/tail boundaries should override this
+        to return False when the transcript is still entirely protected.
+        """
+        return True
+
    # -- Optional: session lifecycle ---------------------------------------

    def on_session_start(self, session_id: str, **kwargs) -> None:
@@ -46,6 +46,47 @@ def _resolve_args() -> list[str]:
    return shlex.split(raw)


+def _resolve_home_dir() -> str:
+    """Return a stable HOME for child ACP processes."""
+
+    try:
+        from hermes_constants import get_subprocess_home
+
+        profile_home = get_subprocess_home()
+        if profile_home:
+            return profile_home
+    except Exception:
+        pass
+
+    home = os.environ.get("HOME", "").strip()
+    if home:
+        return home
+
+    expanded = os.path.expanduser("~")
+    if expanded and expanded != "~":
+        return expanded
+
+    try:
+        import pwd
+
+        resolved = pwd.getpwuid(os.getuid()).pw_dir.strip()
+        if resolved:
+            return resolved
+    except Exception:
+        pass
+
+    # Last resort: /tmp (writable on any POSIX system). Avoids crashing the
+    # subprocess with no HOME; callers can set HERMES_HOME explicitly if they
+    # need a different writable dir.
+    return "/tmp"
+
+
+def _build_subprocess_env() -> dict[str, str]:
+    env = os.environ.copy()
+    env["HOME"] = _resolve_home_dir()
+    return env
+
+
 def _jsonrpc_error(message_id: Any, code: int, message: str) -> dict[str, Any]:
    return {
        "jsonrpc": "2.0",
@@ -382,6 +423,7 @@ class CopilotACPClient:
                text=True,
                bufsize=1,
                cwd=self._acp_cwd,
+                env=_build_subprocess_env(),
            )
        except FileNotFoundError as exc:
            raise RuntimeError(
@@ -7,13 +7,13 @@ import random
 import threading
 import time
 import uuid
-import os
 import re
 from dataclasses import dataclass, fields, replace
 from datetime import datetime
 from typing import Any, Dict, List, Optional, Set, Tuple

 from hermes_constants import OPENROUTER_BASE_URL
+from hermes_cli.config import get_env_value
 import hermes_cli.auth as auth_mod
 from hermes_cli.auth import (
    CODEX_ACCESS_TOKEN_REFRESH_SKEW_SECONDS,
@@ -455,6 +455,125 @@ class CredentialPool:
            logger.debug("Failed to sync from credentials file: %s", exc)
        return entry

+    def _sync_codex_entry_from_auth_store(self, entry: PooledCredential) -> PooledCredential:
+        """Sync a Codex device_code pool entry from auth.json if tokens differ.
+
+        When a Codex OAuth access token expires (or the ChatGPT account hits
+        its 5h/weekly quota), the pool entry gets marked ``STATUS_EXHAUSTED``
+        with a ``last_error_reset_at`` that can be many hours in the future.
+        Meanwhile the user may run ``hermes model`` / ``hermes auth`` which
+        performs a fresh device-code login and writes new tokens to
+        ``auth.json`` under ``_auth_store_lock``.  Without this sync the pool
+        entry stays frozen until ``last_error_reset_at`` elapses — even
+        though fresh credentials are sitting on disk — and every request
+        fails with "no available entries (all exhausted or empty)".
+
+        Mirrors the Nous/Anthropic resync paths above.  Only applies to
+        device_code-sourced entries; env/API-key-sourced entries have no
+        auth.json shadow to sync from.
+        """
+        if self.provider != "openai-codex" or entry.source != "device_code":
+            return entry
+        try:
+            with _auth_store_lock():
+                auth_store = _load_auth_store()
+                state = _load_provider_state(auth_store, "openai-codex")
+            if not isinstance(state, dict):
+                return entry
+            tokens = state.get("tokens")
+            if not isinstance(tokens, dict):
+                return entry
+            store_access = tokens.get("access_token", "")
+            store_refresh = tokens.get("refresh_token", "")
+            # Adopt auth.json tokens when either side differs.  Codex refresh
+            # tokens are single-use too, so a fresh refresh_token from
+            # another process means our entry's pair is consumed/stale.
+            entry_access = entry.access_token or ""
+            entry_refresh = entry.refresh_token or ""
+            if store_access and (
+                store_access != entry_access
+                or (store_refresh and store_refresh != entry_refresh)
+            ):
+                logger.debug(
+                    "Pool entry %s: syncing Codex tokens from auth.json "
+                    "(refreshed by another process)",
+                    entry.id,
+                )
+                field_updates: Dict[str, Any] = {
+                    "access_token": store_access,
+                    "refresh_token": store_refresh or entry.refresh_token,
+                    "last_status": None,
+                    "last_status_at": None,
+                    "last_error_code": None,
+                    "last_error_reason": None,
+                    "last_error_message": None,
+                    "last_error_reset_at": None,
+                }
+                if state.get("last_refresh"):
+                    field_updates["last_refresh"] = state["last_refresh"]
+                updated = replace(entry, **field_updates)
+                self._replace_entry(entry, updated)
+                self._persist()
+                return updated
+        except Exception as exc:
+            logger.debug("Failed to sync Codex entry from auth.json: %s", exc)
+        return entry
+
+    def _sync_nous_entry_from_auth_store(self, entry: PooledCredential) -> PooledCredential:
+        """Sync a Nous pool entry from auth.json if tokens differ.
+
+        Nous OAuth refresh tokens are single-use.  When another process
+        (e.g. a concurrent cron) refreshes the token via
+        ``resolve_nous_runtime_credentials``, it writes fresh tokens to
+        auth.json under ``_auth_store_lock``.  The pool entry's tokens
+        become stale.  This method detects that and adopts the newer pair,
+        avoiding a "refresh token reuse" revocation on the Nous Portal.
+        """
+        if self.provider != "nous" or entry.source != "device_code":
+            return entry
+        try:
+            with _auth_store_lock():
+                auth_store = _load_auth_store()
+                state = _load_provider_state(auth_store, "nous")
+            if not state:
+                return entry
+            store_refresh = state.get("refresh_token", "")
+            store_access = state.get("access_token", "")
+            if store_refresh and store_refresh != entry.refresh_token:
+                logger.debug(
+                    "Pool entry %s: syncing tokens from auth.json (Nous refresh token changed)",
+                    entry.id,
+                )
+                field_updates: Dict[str, Any] = {
+                    "access_token": store_access,
+                    "refresh_token": store_refresh,
+                    "last_status": None,
+                    "last_status_at": None,
+                    "last_error_code": None,
+                }
+                if state.get("expires_at"):
+                    field_updates["expires_at"] = state["expires_at"]
+                if state.get("agent_key"):
+                    field_updates["agent_key"] = state["agent_key"]
+                if state.get("agent_key_expires_at"):
+                    field_updates["agent_key_expires_at"] = state["agent_key_expires_at"]
+                if state.get("inference_base_url"):
+                    field_updates["inference_base_url"] = state["inference_base_url"]
+                extra_updates = dict(entry.extra)
+                for extra_key in ("obtained_at", "expires_in", "agent_key_id",
+                                  "agent_key_expires_in", "agent_key_reused",
+                                  "agent_key_obtained_at"):
+                    val = state.get(extra_key)
+                    if val is not None:
+                        extra_updates[extra_key] = val
+                updated = replace(entry, extra=extra_updates, **field_updates)
+                self._replace_entry(entry, updated)
+                self._persist()
+                return updated
+        except Exception as exc:
+            logger.debug("Failed to sync Nous entry from auth.json: %s", exc)
+        return entry
+
    def _sync_device_code_entry_to_auth_store(self, entry: PooledCredential) -> None:
        """Write refreshed pool entry tokens back to auth.json providers.

@@ -561,6 +680,9 @@ class CredentialPool:
                    last_refresh=refreshed.get("last_refresh"),
                )
            elif self.provider == "nous":
+                synced = self._sync_nous_entry_from_auth_store(entry)
+                if synced is not entry:
+                    entry = synced
                nous_state = {
                    "access_token": entry.access_token,
                    "refresh_token": entry.refresh_token,
@@ -635,6 +757,26 @@ class CredentialPool:
                    # Credentials file had a valid (non-expired) token — use it directly
                    logger.debug("Credentials file has valid token, using without refresh")
                    return synced
+            # For nous: another process may have consumed the refresh token
+            # between our proactive sync and the HTTP call.  Re-sync from
+            # auth.json and adopt the fresh tokens if available.
+            if self.provider == "nous":
+                synced = self._sync_nous_entry_from_auth_store(entry)
+                if synced.refresh_token != entry.refresh_token:
+                    logger.debug("Nous refresh failed but auth.json has newer tokens — adopting")
+                    updated = replace(
+                        synced,
+                        last_status=STATUS_OK,
+                        last_status_at=None,
+                        last_error_code=None,
+                        last_error_reason=None,
+                        last_error_message=None,
+                        last_error_reset_at=None,
+                    )
+                    self._replace_entry(synced, updated)
+                    self._persist()
+                    self._sync_device_code_entry_to_auth_store(updated)
+                    return updated
            self._mark_exhausted(entry, None)
            return None

@@ -698,6 +840,29 @@ class CredentialPool:
                if synced is not entry:
                    entry = synced
                    cleared_any = True
+            # For nous entries, sync from auth.json before status checks.
+            # Another process may have successfully refreshed via
+            # resolve_nous_runtime_credentials(), making this entry's
+            # exhausted status stale.
+            if (self.provider == "nous"
+                    and entry.source == "device_code"
+                    and entry.last_status == STATUS_EXHAUSTED):
+                synced = self._sync_nous_entry_from_auth_store(entry)
+                if synced is not entry:
+                    entry = synced
+                    cleared_any = True
+            # For openai-codex entries, same pattern: the user may have
+            # re-authed via `hermes model` / `hermes auth` after a 429/401,
+            # leaving fresh tokens on disk while the pool entry is still
+            # frozen behind last_error_reset_at (can be hours in the
+            # future for ChatGPT weekly windows).
+            if (self.provider == "openai-codex"
+                    and entry.source == "device_code"
+                    and entry.last_status == STATUS_EXHAUSTED):
+                synced = self._sync_codex_entry_from_auth_store(entry)
+                if synced is not entry:
+                    entry = synced
+                    cleared_any = True
            if entry.last_status == STATUS_EXHAUSTED:
                exhausted_until = _exhausted_until(entry)
                if exhausted_until is not None and now < exhausted_until:
@@ -739,8 +904,11 @@ class CredentialPool:

        if self._strategy == STRATEGY_LEAST_USED and len(available) > 1:
            entry = min(available, key=lambda e: e.request_count)
+            # Increment usage counter so subsequent selections distribute load
+            updated = replace(entry, request_count=entry.request_count + 1)
+            self._replace_entry(entry, updated)
            self._current_id = entry.id
-            return entry
+            return updated

        if self._strategy == STRATEGY_ROUND_ROBIN and len(available) > 1:
            entry = available[0]
@@ -1056,6 +1224,18 @@ def _seed_from_singletons(provider: str, entries: List[PooledCredential]) -> Tup
                    "inference_base_url": state.get("inference_base_url"),
                    "agent_key": state.get("agent_key"),
                    "agent_key_expires_at": state.get("agent_key_expires_at"),
+                    # Carry the mint/refresh timestamps into the pool so
+                    # freshness-sensitive consumers (self-heal hooks, pool
+                    # pruning by age) can distinguish just-minted credentials
+                    # from stale ones.  Without these, fresh device_code
+                    # entries get obtained_at=None and look older than they
+                    # are (#15099).
+                    "obtained_at": state.get("obtained_at"),
+                    "expires_in": state.get("expires_in"),
+                    "agent_key_id": state.get("agent_key_id"),
+                    "agent_key_expires_in": state.get("agent_key_expires_in"),
+                    "agent_key_reused": state.get("agent_key_reused"),
+                    "agent_key_obtained_at": state.get("agent_key_obtained_at"),
                    "tls": state.get("tls") if isinstance(state.get("tls"), dict) else None,
                    "label": seeded_label,
                },
@@ -1066,9 +1246,10 @@ def _seed_from_singletons(provider: str, entries: List[PooledCredential]) -> Tup
        # env vars (COPILOT_GITHUB_TOKEN / GH_TOKEN).  They don't live in
        # the auth store or credential pool, so we resolve them here.
        try:
-            from hermes_cli.copilot_auth import resolve_copilot_token
+            from hermes_cli.copilot_auth import resolve_copilot_token, get_copilot_api_token
            token, source = resolve_copilot_token()
            if token:
+                api_token = get_copilot_api_token(token)
                source_name = "gh_cli" if "gh" in source.lower() else f"env:{source}"
                if not _is_suppressed(provider, source_name):
                    active_sources.add(source_name)
@@ -1080,7 +1261,7 @@ def _seed_from_singletons(provider: str, entries: List[PooledCredential]) -> Tup
                        {
                            "source": source_name,
                            "auth_type": AUTH_TYPE_API_KEY,
-                            "access_token": token,
+                            "access_token": api_token,
                            "base_url": pconfig.inference_base_url if pconfig else "",
                            "label": source,
                        },
@@ -1168,7 +1349,8 @@ def _seed_from_env(provider: str, entries: List[PooledCredential]) -> Tuple[bool
        def _is_source_suppressed(_p, _s):  # type: ignore[misc]
            return False
    if provider == "openrouter":
-        token = os.getenv("OPENROUTER_API_KEY", "").strip()
+        # Check both os.environ and ~/.hermes/.env file
+        token = (get_env_value("OPENROUTER_API_KEY") or "").strip()
        if token:
            source = "env:OPENROUTER_API_KEY"
            if _is_source_suppressed(provider, source):
@@ -1194,7 +1376,7 @@ def _seed_from_env(provider: str, entries: List[PooledCredential]) -> Tuple[bool

    env_url = ""
    if pconfig.base_url_env_var:
-        env_url = os.getenv(pconfig.base_url_env_var, "").strip().rstrip("/")
+        env_url = (get_env_value(pconfig.base_url_env_var) or "").strip().rstrip("/")

    env_vars = list(pconfig.api_key_env_vars)
    if provider == "anthropic":
@@ -1205,7 +1387,8 @@ def _seed_from_env(provider: str, entries: List[PooledCredential]) -> Tuple[bool
        ]

    for env_var in env_vars:
-        token = os.getenv(env_var, "").strip()
+        # Check both os.environ and ~/.hermes/.env file
+        token = (get_env_value(env_var) or "").strip()
        if not token:
            continue
        source = f"env:{env_var}"
@@ -47,7 +47,6 @@ from __future__ import annotations

 import os
 from dataclasses import dataclass, field
-from pathlib import Path
 from typing import Callable, List, Optional


@@ -0,0 +1,561 @@
+"""Curator — background skill maintenance orchestrator.
+
+The curator is an auxiliary-model task that periodically reviews agent-created
+skills and maintains the collection. It runs inactivity-triggered (no cron
+daemon): when the agent is idle and the last curator run was longer than
+``interval_hours`` ago, ``maybe_run_curator()`` spawns a forked AIAgent to do
+the review.
+
+Responsibilities:
+  - Auto-transition lifecycle states based on last_used_at timestamps
+  - Spawn a background review agent that can pin / archive / consolidate /
+    patch agent-created skills via skill_manage
+  - Persist curator state (last_run_at, paused, etc.) in .curator_state
+
+Strict invariants:
+  - Only touches agent-created skills (see tools/skill_usage.is_agent_created)
+  - Never auto-deletes — only archives. Archive is recoverable.
+  - Pinned skills bypass all auto-transitions
+  - Uses the auxiliary client; never touches the main session's prompt cache
+"""
+
+from __future__ import annotations
+
+import json
+import logging
+import os
+import tempfile
+import threading
+from datetime import datetime, timedelta, timezone
+from pathlib import Path
+from typing import Any, Callable, Dict, Optional
+
+from hermes_constants import get_hermes_home
+from tools import skill_usage
+
+logger = logging.getLogger(__name__)
+
+
+DEFAULT_INTERVAL_HOURS = 24 * 7  # 7 days
+DEFAULT_MIN_IDLE_HOURS = 2
+DEFAULT_STALE_AFTER_DAYS = 30
+DEFAULT_ARCHIVE_AFTER_DAYS = 90
+
+
+# ---------------------------------------------------------------------------
+# .curator_state — persistent scheduler + status
+# ---------------------------------------------------------------------------
+
+def _state_file() -> Path:
+    return get_hermes_home() / "skills" / ".curator_state"
+
+
+def _default_state() -> Dict[str, Any]:
+    return {
+        "last_run_at": None,
+        "last_run_duration_seconds": None,
+        "last_run_summary": None,
+        "paused": False,
+        "run_count": 0,
+    }
+
+
+def load_state() -> Dict[str, Any]:
+    path = _state_file()
+    if not path.exists():
+        return _default_state()
+    try:
+        data = json.loads(path.read_text(encoding="utf-8"))
+        if isinstance(data, dict):
+            base = _default_state()
+            base.update({k: v for k, v in data.items() if k in base or k.startswith("_")})
+            return base
+    except (OSError, json.JSONDecodeError) as e:
+        logger.debug("Failed to read curator state: %s", e)
+    return _default_state()
+
+
+def save_state(data: Dict[str, Any]) -> None:
+    path = _state_file()
+    try:
+        path.parent.mkdir(parents=True, exist_ok=True)
+        fd, tmp = tempfile.mkstemp(dir=str(path.parent), prefix=".curator_state_", suffix=".tmp")
+        try:
+            with os.fdopen(fd, "w", encoding="utf-8") as f:
+                json.dump(data, f, indent=2, sort_keys=True, ensure_ascii=False)
+                f.flush()
+                os.fsync(f.fileno())
+            os.replace(tmp, path)
+        except BaseException:
+            try:
+                os.unlink(tmp)
+            except OSError:
+                pass
+            raise
+    except Exception as e:
+        logger.debug("Failed to save curator state: %s", e, exc_info=True)
+
+
+def set_paused(paused: bool) -> None:
+    state = load_state()
+    state["paused"] = bool(paused)
+    save_state(state)
+
+
+def is_paused() -> bool:
+    return bool(load_state().get("paused"))
+
+
+# ---------------------------------------------------------------------------
+# Config access
+# ---------------------------------------------------------------------------
+
+def _load_config() -> Dict[str, Any]:
+    """Read curator.* config from ~/.hermes/config.yaml. Tolerates missing file."""
+    try:
+        from hermes_cli.config import load_config
+        cfg = load_config()
+    except Exception as e:
+        logger.debug("Failed to load config for curator: %s", e)
+        return {}
+    if not isinstance(cfg, dict):
+        return {}
+    cur = cfg.get("curator") or {}
+    if not isinstance(cur, dict):
+        return {}
+    return cur
+
+
+def is_enabled() -> bool:
+    """Default ON when no config says otherwise."""
+    cfg = _load_config()
+    return bool(cfg.get("enabled", True))
+
+
+def get_interval_hours() -> int:
+    cfg = _load_config()
+    try:
+        return int(cfg.get("interval_hours", DEFAULT_INTERVAL_HOURS))
+    except (TypeError, ValueError):
+        return DEFAULT_INTERVAL_HOURS
+
+
+def get_min_idle_hours() -> float:
+    cfg = _load_config()
+    try:
+        return float(cfg.get("min_idle_hours", DEFAULT_MIN_IDLE_HOURS))
+    except (TypeError, ValueError):
+        return DEFAULT_MIN_IDLE_HOURS
+
+
+def get_stale_after_days() -> int:
+    cfg = _load_config()
+    try:
+        return int(cfg.get("stale_after_days", DEFAULT_STALE_AFTER_DAYS))
+    except (TypeError, ValueError):
+        return DEFAULT_STALE_AFTER_DAYS
+
+
+def get_archive_after_days() -> int:
+    cfg = _load_config()
+    try:
+        return int(cfg.get("archive_after_days", DEFAULT_ARCHIVE_AFTER_DAYS))
+    except (TypeError, ValueError):
+        return DEFAULT_ARCHIVE_AFTER_DAYS
+
+
+# ---------------------------------------------------------------------------
+# Idle / interval check
+# ---------------------------------------------------------------------------
+
+def _parse_iso(ts: Optional[str]) -> Optional[datetime]:
+    if not ts:
+        return None
+    try:
+        return datetime.fromisoformat(ts)
+    except (TypeError, ValueError):
+        return None
+
+
+def should_run_now(now: Optional[datetime] = None) -> bool:
+    """Return True if the curator should run immediately.
+
+    Gates:
+      - curator.enabled == True
+      - not paused
+      - last_run_at missing, OR older than interval_hours
+
+    The idle check (min_idle_hours) is applied at the call site where we know
+    whether an agent is actively running — here we only enforce the static
+    gates.
+    """
+    if not is_enabled():
+        return False
+    if is_paused():
+        return False
+
+    state = load_state()
+    last = _parse_iso(state.get("last_run_at"))
+    if last is None:
+        return True
+
+    if now is None:
+        now = datetime.now(timezone.utc)
+    if last.tzinfo is None:
+        last = last.replace(tzinfo=timezone.utc)
+    interval = timedelta(hours=get_interval_hours())
+    return (now - last) >= interval
+
+
+# ---------------------------------------------------------------------------
+# Automatic state transitions (pure function, no LLM)
+# ---------------------------------------------------------------------------
+
+def apply_automatic_transitions(now: Optional[datetime] = None) -> Dict[str, int]:
+    """Walk every agent-created skill and move active/stale/archived based on
+    last_used_at. Pinned skills are never touched. Returns a counter dict
+    describing what changed."""
+    from tools import skill_usage as _u
+
+    if now is None:
+        now = datetime.now(timezone.utc)
+    stale_cutoff = now - timedelta(days=get_stale_after_days())
+    archive_cutoff = now - timedelta(days=get_archive_after_days())
+
+    counts = {"marked_stale": 0, "archived": 0, "reactivated": 0, "checked": 0}
+
+    for row in _u.agent_created_report():
+        counts["checked"] += 1
+        name = row["name"]
+        if row.get("pinned"):
+            continue
+
+        last_used = _parse_iso(row.get("last_used_at"))
+        # If never used, treat as using created_at as the anchor so new skills
+        # don't immediately archive themselves.
+        anchor = last_used or _parse_iso(row.get("created_at")) or now
+        if anchor.tzinfo is None:
+            anchor = anchor.replace(tzinfo=timezone.utc)
+
+        current = row.get("state", _u.STATE_ACTIVE)
+
+        if anchor <= archive_cutoff and current != _u.STATE_ARCHIVED:
+            ok, _msg = _u.archive_skill(name)
+            if ok:
+                counts["archived"] += 1
+        elif anchor <= stale_cutoff and current == _u.STATE_ACTIVE:
+            _u.set_state(name, _u.STATE_STALE)
+            counts["marked_stale"] += 1
+        elif anchor > stale_cutoff and current == _u.STATE_STALE:
+            # Skill got used again after being marked stale — reactivate.
+            _u.set_state(name, _u.STATE_ACTIVE)
+            counts["reactivated"] += 1
+
+    return counts
+
+
+# ---------------------------------------------------------------------------
+# Review prompt for the forked agent
+# ---------------------------------------------------------------------------
+
+CURATOR_REVIEW_PROMPT = (
+    "You are running as Hermes' background skill CURATOR. This is an "
+    "UMBRELLA-BUILDING consolidation pass, not a passive audit and not a "
+    "duplicate-finder.\n\n"
+    "The goal of the skill collection is a LIBRARY OF CLASS-LEVEL "
+    "INSTRUCTIONS AND EXPERIENTIAL KNOWLEDGE. A collection of hundreds of "
+    "narrow skills where each one captures one session's specific bug is "
+    "a FAILURE of the library — not a feature. An agent searching skills "
+    "matches on descriptions, not on exact names; one broad umbrella "
+    "skill with labeled subsections beats five narrow siblings for "
+    "discoverability, not the other way around.\n\n"
+    "The right target shape is CLASS-LEVEL skills with rich SKILL.md "
+    "bodies + `references/`, `templates/`, and `scripts/` subfiles for "
+    "session-specific detail — not one-session-one-skill micro-entries.\n\n"
+    "Hard rules — do not violate:\n"
+    "1. DO NOT touch bundled or hub-installed skills. The candidate list "
+    "below is already filtered to agent-created skills only.\n"
+    "2. DO NOT delete any skill. Archiving (moving the skill's directory "
+    "into ~/.hermes/skills/.archive/) is the maximum destructive action. "
+    "Archives are recoverable; deletion is not.\n"
+    "3. DO NOT touch skills shown as pinned=yes. Skip them entirely.\n"
+    "4. DO NOT use usage counters as a reason to skip consolidation. The "
+    "counters are new and often mostly zero. Judge overlap on CONTENT, "
+    "not on use_count. 'use=0' is not evidence a skill is valuable; it's "
+    "absence of evidence either way.\n"
+    "5. DO NOT reject consolidation on the grounds that 'each skill has "
+    "a distinct trigger'. Pairwise distinctness is the wrong bar. The "
+    "right bar is: 'would a human maintainer write this as N separate "
+    "skills, or as one skill with N labeled subsections?' When the "
+    "answer is the latter, merge.\n\n"
+    "How to work — not optional:\n"
+    "1. Scan the full candidate list. Identify PREFIX CLUSTERS (skills "
+    "sharing a first word or domain keyword). Examples you are likely "
+    "to find: hermes-config-*, hermes-dashboard-*, gateway-*, codex-*, "
+    "ollama-*, anthropic-*, gemini-*, mcp-*, salvage-*, pr-*, "
+    "competitor-*, python-*, security-*, etc. Expect 10-25 clusters.\n"
+    "2. For each cluster with 2+ members, do NOT ask 'are these pairs "
+    "overlapping?' — ask 'what is the UMBRELLA CLASS these skills all "
+    "serve? Would a maintainer name that class and write one skill for "
+    "it?' If yes, pick (or create) the umbrella and absorb the siblings "
+    "into it.\n"
+    "3. Three ways to consolidate — use the right one per cluster:\n"
+    "   a. MERGE INTO EXISTING UMBRELLA — one skill in the cluster is "
+    "already broad enough to be the umbrella (example: `pr-triage-"
+    "salvage` for the PR review cluster). Patch it to add a labeled "
+    "section for each sibling's unique insight, then archive the "
+    "siblings.\n"
+    "   b. CREATE A NEW UMBRELLA SKILL.md — no existing member is broad "
+    "enough. Use skill_manage action=create to write a new class-level "
+    "skill whose SKILL.md covers the shared workflow and has short "
+    "labeled subsections. Archive the now-absorbed narrow siblings.\n"
+    "   c. DEMOTE TO REFERENCES/TEMPLATES/SCRIPTS — a sibling has "
+    "narrow-but-valuable session-specific content. Move it into the "
+    "umbrella's appropriate support directory:\n"
+    "      • `references/<topic>.md` for session-specific detail OR "
+    "condensed knowledge banks (quoted research, API docs excerpts, "
+    "domain notes, provider quirks, reproduction recipes)\n"
+    "      • `templates/<name>.<ext>` for starter files meant to be "
+    "copied and modified\n"
+    "      • `scripts/<name>.<ext>` for statically re-runnable actions "
+    "(verification scripts, fixture generators, probes)\n"
+    "      Then archive the old sibling. Use `terminal` with `mkdir -p "
+    "~/.hermes/skills/<umbrella>/references/ && mv ... <umbrella>/"
+    "references/<topic>.md` (or templates/ / scripts/).\n"
+    "4. Also flag skills whose NAME is too narrow (contains a PR number, "
+    "a feature codename, a specific error string, an 'audit' / "
+    "'diagnosis' / 'salvage' session artifact). These almost always "
+    "belong as a subsection or support file under a class-level umbrella.\n"
+    "5. Iterate. After one consolidation round, scan the remaining set "
+    "and look for the NEXT umbrella opportunity. Don't stop after 3 "
+    "merges.\n\n"
+    "Your toolset:\n"
+    "  - skills_list, skill_view        — read the current landscape\n"
+    "  - skill_manage action=patch      — add sections to the umbrella\n"
+    "  - skill_manage action=create     — create a new umbrella SKILL.md\n"
+    "  - skill_manage action=write_file — add a references/, templates/, "
+    "or scripts/ file under an existing skill (the skill must already "
+    "exist)\n"
+    "  - terminal                       — mv a sibling into the archive "
+    "OR move its content into a support subfile\n\n"
+    "'keep' is a legitimate decision ONLY when the skill is already a "
+    "class-level umbrella and none of the proposed merges would improve "
+    "discoverability. 'This is narrow but distinct from its siblings' "
+    "is NOT a reason to keep — it's a reason to move it under an "
+    "umbrella as a subsection or support file.\n\n"
+    "Expected output: real umbrella-ification. Process every obvious "
+    "cluster. If you end the pass with fewer than 10 archives, you "
+    "stopped too early — go back and look at the clusters you left "
+    "alone.\n\n"
+    "When done, write a summary with: clusters processed, skills "
+    "patched/absorbed, skills demoted to references/templates/scripts, "
+    "skills archived, new umbrellas created, and clusters you "
+    "deliberately left alone with one line each."
+)
+
+
+# ---------------------------------------------------------------------------
+# Orchestrator — spawn a forked AIAgent for the LLM review pass
+# ---------------------------------------------------------------------------
+
+def _render_candidate_list() -> str:
+    """Human/agent-readable list of agent-created skills with usage stats."""
+    rows = skill_usage.agent_created_report()
+    if not rows:
+        return "No agent-created skills to review."
+    lines = [f"Agent-created skills ({len(rows)}):\n"]
+    for r in rows:
+        lines.append(
+            f"- {r['name']}  "
+            f"state={r['state']}  "
+            f"pinned={'yes' if r.get('pinned') else 'no'}  "
+            f"use={r.get('use_count', 0)}  "
+            f"view={r.get('view_count', 0)}  "
+            f"patches={r.get('patch_count', 0)}  "
+            f"last_used={r.get('last_used_at') or 'never'}"
+        )
+    return "\n".join(lines)
+
+
+def run_curator_review(
+    on_summary: Optional[Callable[[str], None]] = None,
+    synchronous: bool = False,
+) -> Dict[str, Any]:
+    """Execute a single curator review pass.
+
+    Steps:
+      1. Apply automatic state transitions (pure, no LLM).
+      2. If there are agent-created skills, spawn a forked AIAgent that runs
+         the LLM review prompt against the current candidate list.
+      3. Update .curator_state with last_run_at and a one-line summary.
+      4. Invoke *on_summary* with a user-visible description.
+
+    If *synchronous* is True, the LLM review runs in the calling thread; the
+    default is to spawn a daemon thread so the caller returns immediately.
+    """
+    start = datetime.now(timezone.utc)
+    counts = apply_automatic_transitions(now=start)
+
+    auto_summary_parts = []
+    if counts["marked_stale"]:
+        auto_summary_parts.append(f"{counts['marked_stale']} marked stale")
+    if counts["archived"]:
+        auto_summary_parts.append(f"{counts['archived']} archived")
+    if counts["reactivated"]:
+        auto_summary_parts.append(f"{counts['reactivated']} reactivated")
+    auto_summary = ", ".join(auto_summary_parts) if auto_summary_parts else "no changes"
+
+    # Persist state before the LLM pass so a crash mid-review still records
+    # the run and doesn't immediately re-trigger.
+    state = load_state()
+    state["last_run_at"] = start.isoformat()
+    state["run_count"] = int(state.get("run_count", 0)) + 1
+    state["last_run_summary"] = f"auto: {auto_summary}"
+    save_state(state)
+
+    def _llm_pass():
+        nonlocal auto_summary
+        try:
+            candidate_list = _render_candidate_list()
+            if "No agent-created skills" in candidate_list:
+                final_summary = f"auto: {auto_summary}; llm: skipped (no candidates)"
+            else:
+                prompt = f"{CURATOR_REVIEW_PROMPT}\n\n{candidate_list}"
+                llm_summary = _run_llm_review(prompt)
+                final_summary = f"auto: {auto_summary}; llm: {llm_summary}"
+        except Exception as e:
+            logger.debug("Curator LLM pass failed: %s", e, exc_info=True)
+            final_summary = f"auto: {auto_summary}; llm: error ({e})"
+
+        elapsed = (datetime.now(timezone.utc) - start).total_seconds()
+        state2 = load_state()
+        state2["last_run_duration_seconds"] = elapsed
+        state2["last_run_summary"] = final_summary
+        save_state(state2)
+
+        if on_summary:
+            try:
+                on_summary(f"curator: {final_summary}")
+            except Exception:
+                pass
+
+    if synchronous:
+        _llm_pass()
+    else:
+        t = threading.Thread(target=_llm_pass, daemon=True, name="curator-review")
+        t.start()
+
+    return {
+        "started_at": start.isoformat(),
+        "auto_transitions": counts,
+        "summary_so_far": auto_summary,
+    }
+
+
+def _run_llm_review(prompt: str) -> str:
+    """Spawn an AIAgent fork to run the curator review prompt. Returns a short
+    summary of what the model said in its final response."""
+    import contextlib
+    try:
+        from run_agent import AIAgent
+    except Exception as e:
+        return f"AIAgent import failed: {e}"
+
+    # Resolve provider + model the same way the CLI does, so the curator
+    # fork inherits the user's active main config rather than falling
+    # through to an empty provider/model pair (which sends HTTP 400
+    # "No models provided"). AIAgent() without explicit provider/model
+    # arguments hits an auto-resolution path that fails for OAuth-only
+    # providers and for pool-backed credentials.
+    _api_key = None
+    _base_url = None
+    _api_mode = None
+    _resolved_provider = None
+    _model_name = ""
+    try:
+        from hermes_cli.config import load_config
+        from hermes_cli.runtime_provider import resolve_runtime_provider
+        _cfg = load_config()
+        _m = _cfg.get("model", {}) if isinstance(_cfg.get("model"), dict) else {}
+        _provider = _m.get("provider") or "auto"
+        _model_name = _m.get("default") or _m.get("model") or ""
+        _rp = resolve_runtime_provider(
+            requested=_provider, target_model=_model_name
+        )
+        _api_key = _rp.get("api_key")
+        _base_url = _rp.get("base_url")
+        _api_mode = _rp.get("api_mode")
+        _resolved_provider = _rp.get("provider") or _provider
+    except Exception as e:
+        logger.debug("Curator provider resolution failed: %s", e, exc_info=True)
+
+    review_agent = None
+    try:
+        review_agent = AIAgent(
+            model=_model_name,
+            provider=_resolved_provider,
+            api_key=_api_key,
+            base_url=_base_url,
+            api_mode=_api_mode,
+            # Umbrella-building over a large skill collection is worth a
+            # high iteration ceiling — the pass typically takes 50-100
+            # API calls against hundreds of candidate skills. The
+            # single-session review path caps itself at a much smaller
+            # number because it's not doing a curation sweep.
+            max_iterations=9999,
+            quiet_mode=True,
+            platform="curator",
+            skip_context_files=True,
+            skip_memory=True,
+        )
+        # Disable recursive nudges — the curator must never spawn its own review.
+        review_agent._memory_nudge_interval = 0
+        review_agent._skill_nudge_interval = 0
+
+        # Redirect the forked agent's stdout/stderr to /dev/null while it
+        # runs so its tool-call chatter doesn't pollute the foreground
+        # terminal. The background-thread runner also hides it; this
+        # belt-and-suspenders path matters when a caller invokes
+        # run_curator_review(synchronous=True) from the CLI.
+        with open(os.devnull, "w") as _devnull, \
+             contextlib.redirect_stdout(_devnull), \
+             contextlib.redirect_stderr(_devnull):
+            result = review_agent.run_conversation(user_message=prompt)
+
+        final = ""
+        if isinstance(result, dict):
+            final = str(result.get("final_response") or "").strip()
+        return (final[:240] + "…") if len(final) > 240 else (final or "no change")
+    except Exception as e:
+        return f"error: {e}"
+    finally:
+        if review_agent is not None:
+            try:
+                review_agent.close()
+            except Exception:
+                pass
+
+
+# ---------------------------------------------------------------------------
+# Public entrypoint for the session-start hook
+# ---------------------------------------------------------------------------
+
+def maybe_run_curator(
+    *,
+    idle_for_seconds: Optional[float] = None,
+    on_summary: Optional[Callable[[str], None]] = None,
+) -> Optional[Dict[str, Any]]:
+    """Best-effort: run a curator pass if all gates pass. Returns the result
+    dict if a pass was started, else None. Never raises."""
+    try:
+        if not should_run_now():
+            return None
+        # Idle gating: only enforce when the caller provided a measurement.
+        if idle_for_seconds is not None:
+            min_idle_s = get_min_idle_hours() * 3600.0
+            if idle_for_seconds < min_idle_s:
+                return None
+        return run_curator_review(on_summary=on_summary)
+    except Exception as e:
+        logger.debug("maybe_run_curator failed: %s", e, exc_info=True)
+        return None
@@ -42,9 +42,11 @@ class FailoverReason(enum.Enum):
    # Context / payload
    context_overflow = "context_overflow"  # Context too large — compress, not failover
    payload_too_large = "payload_too_large"  # 413 — compress payload
+    image_too_large = "image_too_large"   # Native image part exceeds provider's per-image limit — shrink and retry

    # Model
    model_not_found = "model_not_found"  # 404 or invalid model — fallback to different model
+    provider_policy_blocked = "provider_policy_blocked"  # Aggregator (e.g. OpenRouter) blocked the only endpoint due to account data/privacy policy

    # Request format
    format_error = "format_error"        # 400 bad request — abort or strip + retry
@@ -89,6 +91,7 @@ class ClassifiedError:
 _BILLING_PATTERNS = [
    "insufficient credits",
    "insufficient_quota",
+    "insufficient balance",
    "credit balance",
    "credits have been exhausted",
    "top up your credits",
@@ -146,6 +149,20 @@ _PAYLOAD_TOO_LARGE_PATTERNS = [
    "error code: 413",
 ]

+# Image-size patterns.  Matched against 400 bodies (not 413) because most
+# providers return a 400 with a specific image-too-big message before the
+# whole request hits the 413 size limit.  Anthropic's wording is the most
+# important here (hard 5 MB per image, returned as
+# "messages.N.content.K.image.source.base64: image exceeds 5 MB maximum").
+_IMAGE_TOO_LARGE_PATTERNS = [
+    "image exceeds",        # Anthropic: "image exceeds 5 MB maximum"
+    "image too large",      # generic
+    "image_too_large",      # error_code variant
+    "image size exceeds",   # variant
+    # "request_too_large" on a request known to contain an image → image is
+    # the likely culprit; we still try the shrink path before giving up.
+]
+
 # Context overflow patterns
 _CONTEXT_OVERFLOW_PATTERNS = [
    "context length",
@@ -194,6 +211,29 @@ _MODEL_NOT_FOUND_PATTERNS = [
    "unsupported model",
 ]

+# OpenRouter aggregator policy-block patterns.
+#
+# When a user's OpenRouter account privacy setting (or a per-request
+# `provider.data_collection: deny` preference) excludes the only endpoint
+# serving a model, OpenRouter returns 404 with a *specific* message that is
+# distinct from "model not found":
+#
+#   "No endpoints available matching your guardrail restrictions and
+#    data policy. Configure: https://openrouter.ai/settings/privacy"
+#
+# We classify this as `provider_policy_blocked` rather than
+# `model_not_found` because:
+#   - The model *exists* — model_not_found is misleading in logs
+#   - Provider fallback won't help: the account-level setting applies to
+#     every call on the same OpenRouter account
+#   - The error body already contains the fix URL, so the user gets
+#     actionable guidance without us rewriting the message
+_PROVIDER_POLICY_BLOCKED_PATTERNS = [
+    "no endpoints available matching your guardrail",
+    "no endpoints available matching your data policy",
+    "no endpoints found matching your data policy",
+]
+
 # Auth patterns (non-status-code signals)
 _AUTH_PATTERNS = [
    "invalid api key",
@@ -220,12 +260,25 @@ _TRANSPORT_ERROR_TYPES = frozenset({
    "ConnectionAbortedError", "BrokenPipeError",
    "TimeoutError", "ReadError",
    "ServerDisconnectedError",
+    # SSL/TLS transport errors — transient mid-stream handshake/record
+    # failures that should retry rather than surface as a stalled session.
+    # ssl.SSLError subclasses OSError (caught by isinstance) but we list
+    # the type names here so provider-wrapped SSL errors (e.g. when the
+    # SDK re-raises without preserving the exception chain) still classify
+    # as transport rather than falling through to the unknown bucket.
+    "SSLError", "SSLZeroReturnError", "SSLWantReadError",
+    "SSLWantWriteError", "SSLEOFError", "SSLSyscallError",
    # OpenAI SDK errors (not subclasses of Python builtins)
    "APIConnectionError",
    "APITimeoutError",
 })

-# Server disconnect patterns (no status code, but transport-level)
+# Server disconnect patterns (no status code, but transport-level).
+# These are the "ambiguous" patterns — a plain connection close could be
+# transient transport hiccup OR server-side context overflow rejection
+# (common when the API gateway disconnects instead of returning an HTTP
+# error for oversized requests).  A large session + one of these patterns
+# triggers the context-overflow-with-compression recovery path.
 _SERVER_DISCONNECT_PATTERNS = [
    "server disconnected",
    "peer closed connection",
@@ -236,6 +289,40 @@ _SERVER_DISCONNECT_PATTERNS = [
    "incomplete chunked read",
 ]

+# SSL/TLS transient failure patterns — intentionally distinct from
+# _SERVER_DISCONNECT_PATTERNS above.
+#
+# An SSL alert mid-stream is almost always a transport-layer hiccup
+# (flaky network, mid-session TLS renegotiation failure, load balancer
+# dropping the connection) — NOT a server-side context overflow signal.
+# So we want the retry path but NOT the compression path; lumping these
+# into _SERVER_DISCONNECT_PATTERNS would trigger unnecessary (and
+# expensive) context compression on any large-session SSL hiccup.
+#
+# The OpenSSL library constructs error codes by prepending a format string
+# to the uppercased alert reason; OpenSSL 3.x changed the separator
+# (e.g. `SSLV3_ALERT_BAD_RECORD_MAC` → `SSL/TLS_ALERT_BAD_RECORD_MAC`),
+# which silently stopped matching anything explicit.  Matching on the
+# stable substrings (`bad record mac`, `ssl alert`, `tls alert`, etc.)
+# survives future OpenSSL format churn without code changes.
+_SSL_TRANSIENT_PATTERNS = [
+    # Space-separated (human-readable form, Python ssl module, most SDKs)
+    "bad record mac",
+    "ssl alert",
+    "tls alert",
+    "ssl handshake failure",
+    "tlsv1 alert",
+    "sslv3 alert",
+    # Underscore-separated (OpenSSL error code tokens, e.g.
+    # `ERR_SSL_SSL/TLS_ALERT_BAD_RECORD_MAC`, `SSLV3_ALERT_BAD_RECORD_MAC`)
+    "bad_record_mac",
+    "ssl_alert",
+    "tls_alert",
+    "tls_alert_internal_error",
+    # Python ssl module prefix, e.g. "[SSL: BAD_RECORD_MAC]"
+    "[ssl:",
+]
+

 # ── Classification pipeline ─────────────────────────────────────────────

@@ -255,9 +342,10 @@ def classify_api_error(
      2. HTTP status code + message-aware refinement
      3. Error code classification (from body)
      4. Message pattern matching (billing vs rate_limit vs context vs auth)
-      5. Transport error heuristics
+      5. SSL/TLS transient alert patterns → retry as timeout
      6. Server disconnect + large session → context overflow
-      7. Fallback: unknown (retryable with backoff)
+      7. Transport error heuristics
+      8. Fallback: unknown (retryable with backoff)

    Args:
        error: The exception from the API call.
@@ -271,6 +359,11 @@ def classify_api_error(
    """
    status_code = _extract_status_code(error)
    error_type = type(error).__name__
+    # Copilot/GitHub Models RateLimitError may not set .status_code; force 429
+    # so downstream rate-limit handling (classifier reason, pool rotation,
+    # fallback gating) fires correctly instead of misclassifying as generic.
+    if status_code is None and error_type == "RateLimitError":
+        status_code = 429
    body = _extract_error_body(error)
    error_code = _extract_error_code(body)

@@ -388,7 +481,18 @@ def classify_api_error(
    if classified is not None:
        return classified

-    # ── 5. Server disconnect + large session → context overflow ─────
+    # ── 5. SSL/TLS transient errors → retry as timeout (not compression) ──
+    # SSL alerts mid-stream are transport hiccups, not server-side context
+    # overflow signals.  Classify before the disconnect check so a large
+    # session doesn't incorrectly trigger context compression when the real
+    # cause is a flaky TLS handshake.  Also matches when the error is
+    # wrapped in a generic exception whose message string carries the SSL
+    # alert text but the type isn't ssl.SSLError (happens with some SDKs
+    # that re-raise without chaining).
+    if any(p in error_msg for p in _SSL_TRANSIENT_PATTERNS):
+        return _result(FailoverReason.timeout, retryable=True)
+
+    # ── 6. Server disconnect + large session → context overflow ─────
    # Must come BEFORE generic transport error catch — a disconnect on
    # a large session is more likely context overflow than a transient
    # transport hiccup.  Without this ordering, RemoteProtocolError
@@ -405,12 +509,12 @@ def classify_api_error(
            )
        return _result(FailoverReason.timeout, retryable=True)

-    # ── 6. Transport / timeout heuristics ───────────────────────────
+    # ── 7. Transport / timeout heuristics ───────────────────────────

    if error_type in _TRANSPORT_ERROR_TYPES or isinstance(error, (TimeoutError, ConnectionError, OSError)):
        return _result(FailoverReason.timeout, retryable=True)

-    # ── 7. Fallback: unknown ────────────────────────────────────────
+    # ── 8. Fallback: unknown ────────────────────────────────────────

    return _result(FailoverReason.unknown, retryable=True)

@@ -464,6 +568,17 @@ def _classify_by_status(
        return _classify_402(error_msg, result_fn)

    if status_code == 404:
+        # OpenRouter policy-block 404 — distinct from "model not found".
+        # The model exists; the user's account privacy setting excludes the
+        # only endpoint serving it. Falling back to another provider won't
+        # help (same account setting applies).  The error body already
+        # contains the fix URL, so just surface it.
+        if any(p in error_msg for p in _PROVIDER_POLICY_BLOCKED_PATTERNS):
+            return result_fn(
+                FailoverReason.provider_policy_blocked,
+                retryable=False,
+                should_fallback=False,
+            )
        if any(p in error_msg for p in _MODEL_NOT_FOUND_PATTERNS):
            return result_fn(
                FailoverReason.model_not_found,
@@ -572,6 +687,15 @@ def _classify_400(
 ) -> ClassifiedError:
    """Classify 400 Bad Request — context overflow, format error, or generic."""

+    # Image-too-large from 400 (Anthropic's 5 MB per-image check fires this way).
+    # Must be checked BEFORE context_overflow because messages can trip both
+    # patterns ("exceeds" + "image") and image-shrink is a cheaper recovery.
+    if any(p in error_msg for p in _IMAGE_TOO_LARGE_PATTERNS):
+        return result_fn(
+            FailoverReason.image_too_large,
+            retryable=True,
+        )
+
    # Context overflow from 400
    if any(p in error_msg for p in _CONTEXT_OVERFLOW_PATTERNS):
        return result_fn(
@@ -581,6 +705,12 @@ def _classify_400(
        )

    # Some providers return model-not-found as 400 instead of 404 (e.g. OpenRouter).
+    if any(p in error_msg for p in _PROVIDER_POLICY_BLOCKED_PATTERNS):
+        return result_fn(
+            FailoverReason.provider_policy_blocked,
+            retryable=False,
+            should_fallback=False,
+        )
    if any(p in error_msg for p in _MODEL_NOT_FOUND_PATTERNS):
        return result_fn(
            FailoverReason.model_not_found,
@@ -693,6 +823,13 @@ def _classify_by_message(
            should_compress=True,
        )

+    # Image-too-large patterns (from message text when no status_code)
+    if any(p in error_msg for p in _IMAGE_TOO_LARGE_PATTERNS):
+        return result_fn(
+            FailoverReason.image_too_large,
+            retryable=True,
+        )
+
    # Usage-limit patterns need the same disambiguation as 402: some providers
    # surface "usage limit" errors without an HTTP status code.  A transient
    # signal ("try again", "resets at", …) means it's a periodic quota, not
@@ -753,6 +890,15 @@ def _classify_by_message(
            should_fallback=True,
        )

+    # Provider policy-block (aggregator-side guardrail) — check before
+    # model_not_found so we don't mis-label as a missing model.
+    if any(p in error_msg for p in _PROVIDER_POLICY_BLOCKED_PATTERNS):
+        return result_fn(
+            FailoverReason.provider_policy_blocked,
+            retryable=False,
+            should_fallback=False,
+        )
+
    # Model not found patterns
    if any(p in error_msg for p in _MODEL_NOT_FOUND_PATTERNS):
        return result_fn(
@@ -30,7 +30,6 @@ from __future__ import annotations

 import json
 import logging
-import os
 import time
 import uuid
 from types import SimpleNamespace
@@ -42,7 +41,6 @@ from agent import google_oauth
 from agent.gemini_schema import sanitize_gemini_tool_parameters
 from agent.google_code_assist import (
    CODE_ASSIST_ENDPOINT,
-    FREE_TIER_ID,
    CodeAssistError,
    ProjectContext,
    resolve_project_context,
@@ -44,6 +44,97 @@ def is_native_gemini_base_url(base_url: str) -> bool:
    return not normalized.endswith("/openai")


+def probe_gemini_tier(
+    api_key: str,
+    base_url: str = DEFAULT_GEMINI_BASE_URL,
+    *,
+    model: str = "gemini-2.5-flash",
+    timeout: float = 10.0,
+) -> str:
+    """Probe a Google AI Studio API key and return its tier.
+
+    Returns one of:
+
+    - ``"free"``    -- key is on the free tier (unusable with Hermes)
+    - ``"paid"``    -- key is on a paid tier
+    - ``"unknown"`` -- probe failed; callers should proceed without blocking.
+    """
+    key = (api_key or "").strip()
+    if not key:
+        return "unknown"
+
+    normalized_base = str(base_url or DEFAULT_GEMINI_BASE_URL).strip().rstrip("/")
+    if not normalized_base:
+        normalized_base = DEFAULT_GEMINI_BASE_URL
+    if normalized_base.lower().endswith("/openai"):
+        normalized_base = normalized_base[: -len("/openai")]
+
+    url = f"{normalized_base}/models/{model}:generateContent"
+    payload = {
+        "contents": [{"role": "user", "parts": [{"text": "hi"}]}],
+        "generationConfig": {"maxOutputTokens": 1},
+    }
+
+    try:
+        with httpx.Client(timeout=timeout) as client:
+            resp = client.post(
+                url,
+                params={"key": key},
+                json=payload,
+                headers={"Content-Type": "application/json"},
+            )
+    except Exception as exc:
+        logger.debug("probe_gemini_tier: network error: %s", exc)
+        return "unknown"
+
+    headers_lower = {k.lower(): v for k, v in resp.headers.items()}
+    rpd_header = headers_lower.get("x-ratelimit-limit-requests-per-day")
+    if rpd_header:
+        try:
+            rpd_val = int(rpd_header)
+        except (TypeError, ValueError):
+            rpd_val = None
+        # Published free-tier daily caps (Dec 2025):
+        #   gemini-2.5-pro: 100, gemini-2.5-flash: 250, flash-lite: 1000
+        # Tier 1 starts at ~1500+ for Flash. We treat <= 1000 as free.
+        if rpd_val is not None and rpd_val <= 1000:
+            return "free"
+        if rpd_val is not None and rpd_val > 1000:
+            return "paid"
+
+    if resp.status_code == 429:
+        body_text = ""
+        try:
+            body_text = resp.text or ""
+        except Exception:
+            body_text = ""
+        if "free_tier" in body_text.lower():
+            return "free"
+        return "paid"
+
+    if 200 <= resp.status_code < 300:
+        return "paid"
+
+    return "unknown"
+
+
+def is_free_tier_quota_error(error_message: str) -> bool:
+    """Return True when a Gemini 429 message indicates free-tier exhaustion."""
+    if not error_message:
+        return False
+    return "free_tier" in error_message.lower()
+
+
+_FREE_TIER_GUIDANCE = (
+    "\n\nYour Google API key is on the free tier (<= 250 requests/day for "
+    "gemini-2.5-flash). Hermes typically makes 3-10 API calls per user turn, "
+    "so the free tier is exhausted in a handful of messages and cannot sustain "
+    "an agent session. Enable billing on your Google Cloud project and "
+    "regenerate the key in a billing-enabled project: "
+    "https://aistudio.google.com/apikey"
+)
+
+
 class GeminiAPIError(Exception):
    """Error shape compatible with Hermes retry/error classification."""

@@ -650,6 +741,12 @@ def gemini_http_error(response: httpx.Response) -> GeminiAPIError:
    else:
        message = f"Gemini returned HTTP {status}: {body_text[:500]}"

+    # Free-tier quota exhaustion -> append actionable guidance so users who
+    # bypassed the setup wizard (direct GOOGLE_API_KEY in .env) still learn
+    # that the free tier cannot sustain an agent session.
+    if status == 429 and is_free_tier_quota_error(err_message or body_text):
+        message = message + _FREE_TIER_GUIDANCE
+
    return GeminiAPIError(
        message,
        code=code,
@@ -704,6 +801,13 @@ class GeminiNativeClient:
        http_client: Optional[httpx.Client] = None,
        **_: Any,
    ) -> None:
+        if not (api_key or "").strip():
+            raise RuntimeError(
+                "Gemini native client requires an API key, but none was provided. "
+                "Set GOOGLE_API_KEY or GEMINI_API_KEY in your environment / ~/.hermes/.env "
+                "(get one at https://aistudio.google.com/app/apikey), or run `hermes setup` "
+                "to configure the Google provider."
+            )
        self.api_key = api_key
        normalized_base = (base_url or DEFAULT_GEMINI_BASE_URL).rstrip("/")
        if normalized_base.endswith("/openai"):
@@ -2,7 +2,7 @@

 from __future__ import annotations

-from typing import Any, Dict, List
+from typing import Any, Dict

 # Gemini's ``FunctionDeclaration.parameters`` field accepts the ``Schema``
 # object, which is only a subset of OpenAPI 3.0 / JSON Schema.  Strip fields
@@ -73,6 +73,20 @@ def sanitize_gemini_schema(schema: Any) -> Dict[str, Any]:
            ]
            continue
        cleaned[key] = value
+
+    # Gemini's Schema validator requires every ``enum`` entry to be a string,
+    # even when the parent ``type`` is ``integer`` / ``number`` / ``boolean``.
+    # OpenAI / OpenRouter / Anthropic accept typed enums (e.g. Discord's
+    # ``auto_archive_duration: {type: integer, enum: [60, 1440, 4320, 10080]}``),
+    # so we only drop the ``enum`` when it would collide with Gemini's rule.
+    # Keeping ``type: integer`` plus the human-readable description gives the
+    # model enough guidance; the tool handler still validates the value.
+    enum_val = cleaned.get("enum")
+    type_val = cleaned.get("type")
+    if isinstance(enum_val, list) and type_val in {"integer", "number", "boolean"}:
+        if any(not isinstance(item, str) for item in enum_val):
+            cleaned.pop("enum", None)
+
    return cleaned


@@ -29,7 +29,6 @@ from __future__ import annotations

 import json
 import logging
-import os
 import time
 import urllib.error
 import urllib.parse
@@ -49,14 +49,13 @@ import json
 import logging
 import os
 import secrets
-import socket
 import stat
 import threading
 import time
 import urllib.error
 import urllib.parse
 import urllib.request
-from dataclasses import dataclass, field
+from dataclasses import dataclass
 from pathlib import Path
 from typing import Any, Dict, Optional, Tuple

@@ -98,6 +97,7 @@ _DEFAULT_CLIENT_SECRET = f"GOCSPX-{_PUBLIC_CLIENT_SECRET_SUFFIX}"

 # Regex patterns for fallback scraping from an installed gemini-cli.
 import re as _re
+from utils import atomic_replace
 _CLIENT_ID_PATTERN = _re.compile(
    r"OAUTH_CLIENT_ID\s*=\s*['\"]([0-9]+-[a-z0-9]+\.apps\.googleusercontent\.com)['\"]"
 )
@@ -499,7 +499,7 @@ def save_credentials(creds: GoogleCredentials) -> Path:
                fh.flush()
                os.fsync(fh.fileno())
            os.chmod(tmp_path, stat.S_IRUSR | stat.S_IWUSR)
-            os.replace(tmp_path, path)
+            atomic_replace(tmp_path, path)
        finally:
            try:
                if tmp_path.exists():
@@ -0,0 +1,236 @@
+"""Routing helpers for inbound user-attached images.
+
+Two modes:
+
+  native  — attach images as OpenAI-style ``image_url`` content parts on the
+            user turn. Provider adapters (Anthropic, Gemini, Bedrock, Codex,
+            OpenAI chat.completions) already translate these into their
+            vendor-specific multimodal formats.
+
+  text    — run ``vision_analyze`` on each image up-front and prepend the
+            description to the user's text. The model never sees the pixels;
+            it only sees a lossy text summary. This is the pre-existing
+            behaviour and still the right choice for non-vision models.
+
+The decision is made once per message turn by :func:`decide_image_input_mode`.
+It reads ``agent.image_input_mode`` from config.yaml (``auto`` | ``native``
+| ``text``, default ``auto``) and the active model's capability metadata.
+
+In ``auto`` mode:
+  - If the user has explicitly configured ``auxiliary.vision.provider``
+    (i.e. not ``auto`` and not empty), we assume they want the text pipeline
+    regardless of the main model — they've opted in to a specific vision
+    backend for a reason (cost, quality, local-only, etc.).
+  - Otherwise, if the active model reports ``supports_vision=True`` in its
+    models.dev metadata, we attach natively.
+  - Otherwise (non-vision model, no explicit override), we fall back to text.
+
+This keeps ``vision_analyze`` surfaced as a tool in every session — skills
+and agent flows that chain it (browser screenshots, deeper inspection of
+URL-referenced images, style-gating loops) keep working. The routing only
+affects *how user-attached images on the current turn* are presented to the
+main model.
+"""
+
+from __future__ import annotations
+
+import base64
+import logging
+import mimetypes
+from pathlib import Path
+from typing import Any, Dict, List, Optional, Tuple
+
+logger = logging.getLogger(__name__)
+
+
+_VALID_MODES = frozenset({"auto", "native", "text"})
+
+
+def _coerce_mode(raw: Any) -> str:
+    """Normalize a config value into one of the valid modes."""
+    if not isinstance(raw, str):
+        return "auto"
+    val = raw.strip().lower()
+    if val in _VALID_MODES:
+        return val
+    return "auto"
+
+
+def _explicit_aux_vision_override(cfg: Optional[Dict[str, Any]]) -> bool:
+    """True when the user configured a specific auxiliary vision backend.
+
+    An explicit override means the user *wants* the text pipeline (they're
+    paying for a dedicated vision model), so we don't silently bypass it.
+    """
+    if not isinstance(cfg, dict):
+        return False
+    aux = cfg.get("auxiliary") or {}
+    if not isinstance(aux, dict):
+        return False
+    vision = aux.get("vision") or {}
+    if not isinstance(vision, dict):
+        return False
+
+    provider = str(vision.get("provider") or "").strip().lower()
+    model = str(vision.get("model") or "").strip()
+    base_url = str(vision.get("base_url") or "").strip()
+
+    # "auto" / "" / blank = not explicit
+    if provider in ("", "auto") and not model and not base_url:
+        return False
+    return True
+
+
+def _lookup_supports_vision(provider: str, model: str) -> Optional[bool]:
+    """Return True/False if we can resolve caps, None if unknown."""
+    if not provider or not model:
+        return None
+    try:
+        from agent.models_dev import get_model_capabilities
+        caps = get_model_capabilities(provider, model)
+    except Exception as exc:  # pragma: no cover - defensive
+        logger.debug("image_routing: caps lookup failed for %s:%s — %s", provider, model, exc)
+        return None
+    if caps is None:
+        return None
+    return bool(caps.supports_vision)
+
+
+def decide_image_input_mode(
+    provider: str,
+    model: str,
+    cfg: Optional[Dict[str, Any]],
+) -> str:
+    """Return ``"native"`` or ``"text"`` for the given turn.
+
+    Args:
+      provider: active inference provider ID (e.g. ``"anthropic"``, ``"openrouter"``).
+      model:    active model slug as it would be sent to the provider.
+      cfg:      loaded config.yaml dict, or None. When None, behaves as auto.
+    """
+    mode_cfg = "auto"
+    if isinstance(cfg, dict):
+        agent_cfg = cfg.get("agent") or {}
+        if isinstance(agent_cfg, dict):
+            mode_cfg = _coerce_mode(agent_cfg.get("image_input_mode"))
+
+    if mode_cfg == "native":
+        return "native"
+    if mode_cfg == "text":
+        return "text"
+
+    # auto
+    if _explicit_aux_vision_override(cfg):
+        return "text"
+
+    supports = _lookup_supports_vision(provider, model)
+    if supports is True:
+        return "native"
+    return "text"
+
+
+# Image size handling is REACTIVE rather than proactive: we attempt native
+# attachment at full size regardless of provider, and rely on
+# ``run_agent._try_shrink_image_parts_in_messages`` to shrink + retry if
+# the provider rejects the request (e.g. Anthropic's hard 5 MB per-image
+# ceiling returned as HTTP 400 "image exceeds 5 MB maximum").
+#
+# Why reactive: our knowledge of provider ceilings is partial and evolving
+# (OpenAI accepts 49 MB+, Anthropic 5 MB, Gemini 100 MB, others unknown).
+# A proactive per-provider table would be stale the moment a provider raises
+# or lowers its limit, and silently degrading quality for users on providers
+# that would have accepted the full image is the worse failure mode.
+# The shrink-on-reject path loses 1 API call + maybe 1s of Pillow work when
+# it fires, which is cheaper than permanent quality loss.
+
+
+def _guess_mime(path: Path) -> str:
+    mime, _ = mimetypes.guess_type(str(path))
+    if mime and mime.startswith("image/"):
+        return mime
+    # mimetypes on some Linux distros mis-maps .jpg; default to jpeg when
+    # the suffix looks imagey.
+    suffix = path.suffix.lower()
+    return {
+        ".jpg": "image/jpeg",
+        ".jpeg": "image/jpeg",
+        ".png": "image/png",
+        ".gif": "image/gif",
+        ".webp": "image/webp",
+        ".bmp": "image/bmp",
+    }.get(suffix, "image/jpeg")
+
+
+def _file_to_data_url(path: Path) -> Optional[str]:
+    """Encode a local image as a base64 data URL at its native size.
+
+    Size limits are NOT enforced here — the agent retry loop
+    (``run_agent._try_shrink_image_parts_in_messages``) shrinks on the
+    provider's first rejection. Keeping this simple means providers that
+    accept large images (OpenAI 49 MB+, Gemini 100 MB) don't pay a silent
+    quality tax just because one other provider is stricter.
+
+    Returns None only if the file can't be read (missing, permission
+    denied, etc.); the caller reports those paths in ``skipped``.
+    """
+    try:
+        raw = path.read_bytes()
+    except Exception as exc:
+        logger.warning("image_routing: failed to read %s — %s", path, exc)
+        return None
+    mime = _guess_mime(path)
+    b64 = base64.b64encode(raw).decode("ascii")
+    return f"data:{mime};base64,{b64}"
+
+
+def build_native_content_parts(
+    user_text: str,
+    image_paths: List[str],
+) -> Tuple[List[Dict[str, Any]], List[str]]:
+    """Build an OpenAI-style ``content`` list for a user turn.
+
+    Shape:
+      [{"type": "text", "text": "..."},
+       {"type": "image_url", "image_url": {"url": "data:image/png;base64,..."}},
+       ...]
+
+    Images are attached at their native size. If a provider rejects the
+    request because an image is too large (e.g. Anthropic's 5 MB per-image
+    ceiling), the agent's retry loop transparently shrinks and retries
+    once — see ``run_agent._try_shrink_image_parts_in_messages``.
+
+    Returns (content_parts, skipped_paths). Skipped paths are files that
+    couldn't be read from disk.
+    """
+    parts: List[Dict[str, Any]] = []
+    skipped: List[str] = []
+
+    text = (user_text or "").strip()
+    if text:
+        parts.append({"type": "text", "text": text})
+
+    for raw_path in image_paths:
+        p = Path(raw_path)
+        if not p.exists() or not p.is_file():
+            skipped.append(str(raw_path))
+            continue
+        data_url = _file_to_data_url(p)
+        if not data_url:
+            skipped.append(str(raw_path))
+            continue
+        parts.append({
+            "type": "image_url",
+            "image_url": {"url": data_url},
+        })
+
+    # If the text was empty, add a neutral prompt so the turn isn't just images.
+    if not text and any(p.get("type") == "image_url" for p in parts):
+        parts.insert(0, {"type": "text", "text": "What do you see in this image?"})
+
+    return parts, skipped
+
+
+__all__ = [
+    "decide_image_input_mode",
+    "build_native_content_parts",
+]
@@ -0,0 +1,48 @@
+"""LM Studio reasoning-effort resolution shared by the chat-completions
+transport and run_agent's iteration-limit summary path.
+
+LM Studio publishes per-model ``capabilities.reasoning.allowed_options`` (e.g.
+``["off","on"]`` for toggle-style models, ``["off","minimal","low"]`` for
+graduated models). We map the user's ``reasoning_config`` onto LM Studio's
+OpenAI-compatible vocabulary, then clamp against the model's allowed set so
+the server doesn't 400 on an unsupported effort.
+"""
+
+from __future__ import annotations
+
+from typing import List, Optional
+
+# LM Studio accepts these top-level reasoning_effort values via its
+# OpenAI-compatible chat.completions endpoint.
+_LM_VALID_EFFORTS = {"none", "minimal", "low", "medium", "high", "xhigh"}
+
+# Toggle-style models publish allowed_options as ["off","on"] in /api/v1/models.
+# Map them onto the OpenAI-compatible request vocabulary.
+_LM_EFFORT_ALIASES = {"off": "none", "on": "medium"}
+
+
+def resolve_lmstudio_effort(
+    reasoning_config: Optional[dict],
+    allowed_options: Optional[List[str]],
+) -> Optional[str]:
+    """Return the ``reasoning_effort`` string to send to LM Studio, or ``None``.
+
+    ``None`` means "omit the field": the user picked a level the model can't
+    honor, so let LM Studio fall back to the model's declared default rather
+    than silently substituting a different effort. When ``allowed_options`` is
+    falsy (probe failed), skip clamping and send the resolved effort anyway.
+    """
+    effort = "medium"
+    if reasoning_config and isinstance(reasoning_config, dict):
+        if reasoning_config.get("enabled") is False:
+            effort = "none"
+        else:
+            raw = (reasoning_config.get("effort") or "").strip().lower()
+            raw = _LM_EFFORT_ALIASES.get(raw, raw)
+            if raw in _LM_VALID_EFFORTS:
+                effort = raw
+    if allowed_options:
+        allowed = {_LM_EFFORT_ALIASES.get(opt, opt) for opt in allowed_options}
+        if effort not in allowed:
+            return None
+    return effort
@@ -28,9 +28,9 @@ Usage in run_agent.py:

 from __future__ import annotations

-import json
 import logging
 import re
+import inspect
 from typing import Any, Dict, List, Optional

 from agent.memory_provider import MemoryProvider
@@ -62,15 +62,124 @@ def sanitize_context(text: str) -> str:
    return text


-def build_memory_context_block(raw_context: str) -> str:
-    """Wrap prefetched memory in a fenced block with system note.
+class StreamingContextScrubber:
+    """Stateful scrubber for streaming text that may contain split memory-context spans.

-    The fence prevents the model from treating recalled context as user
-    discourse.  Injected at API-call time only — never persisted.
+    The one-shot ``sanitize_context`` regex cannot survive chunk boundaries:
+    a ``<memory-context>`` opened in one delta and closed in a later delta
+    leaks its payload to the UI because the non-greedy block regex needs
+    both tags in one string.  This scrubber runs a small state machine
+    across deltas, holding back partial-tag tails and discarding
+    everything inside a span (including the system-note line).
+
+    Usage::
+
+        scrubber = StreamingContextScrubber()
+        for delta in stream:
+            visible = scrubber.feed(delta)
+            if visible:
+                emit(visible)
+        trailing = scrubber.flush()  # at end of stream
+        if trailing:
+            emit(trailing)
+
+    The scrubber is re-entrant per agent instance.  Callers building new
+    top-level responses (new turn) should create a fresh scrubber or call
+    ``reset()``.
    """
+
+    _OPEN_TAG = "<memory-context>"
+    _CLOSE_TAG = "</memory-context>"
+
+    def __init__(self) -> None:
+        self._in_span: bool = False
+        self._buf: str = ""
+
+    def reset(self) -> None:
+        self._in_span = False
+        self._buf = ""
+
+    def feed(self, text: str) -> str:
+        """Return the visible portion of ``text`` after scrubbing.
+
+        Any trailing fragment that could be the start of an open/close tag
+        is held back in the internal buffer and surfaced on the next
+        ``feed()`` call or discarded/emitted by ``flush()``.
+        """
+        if not text:
+            return ""
+        buf = self._buf + text
+        self._buf = ""
+        out: list[str] = []
+
+        while buf:
+            if self._in_span:
+                idx = buf.lower().find(self._CLOSE_TAG)
+                if idx == -1:
+                    # Hold back a potential partial close tag; drop the rest
+                    held = self._max_partial_suffix(buf, self._CLOSE_TAG)
+                    self._buf = buf[-held:] if held else ""
+                    return "".join(out)
+                # Found close — skip span content + tag, continue
+                buf = buf[idx + len(self._CLOSE_TAG):]
+                self._in_span = False
+            else:
+                idx = buf.lower().find(self._OPEN_TAG)
+                if idx == -1:
+                    # No open tag — hold back a potential partial open tag
+                    held = self._max_partial_suffix(buf, self._OPEN_TAG)
+                    if held:
+                        out.append(buf[:-held])
+                        self._buf = buf[-held:]
+                    else:
+                        out.append(buf)
+                    return "".join(out)
+                # Emit text before the tag, enter span
+                if idx > 0:
+                    out.append(buf[:idx])
+                buf = buf[idx + len(self._OPEN_TAG):]
+                self._in_span = True
+
+        return "".join(out)
+
+    def flush(self) -> str:
+        """Emit any held-back buffer at end-of-stream.
+
+        If we're still inside an unterminated span the remaining content is
+        discarded (safer: leaking partial memory context is worse than a
+        truncated answer).  Otherwise the held-back partial-tag tail is
+        emitted verbatim (it turned out not to be a real tag).
+        """
+        if self._in_span:
+            self._buf = ""
+            self._in_span = False
+            return ""
+        tail = self._buf
+        self._buf = ""
+        return tail
+
+    @staticmethod
+    def _max_partial_suffix(buf: str, tag: str) -> int:
+        """Return the length of the longest buf-suffix that is a tag-prefix.
+
+        Case-insensitive.  Returns 0 if no suffix could start the tag.
+        """
+        tag_lower = tag.lower()
+        buf_lower = buf.lower()
+        max_check = min(len(buf_lower), len(tag_lower) - 1)
+        for i in range(max_check, 0, -1):
+            if tag_lower.startswith(buf_lower[-i:]):
+                return i
+        return 0
+
+
+def build_memory_context_block(raw_context: str) -> str:
+    """Wrap prefetched memory in a fenced block with system note."""
    if not raw_context or not raw_context.strip():
        return ""
    clean = sanitize_context(raw_context)
+    if clean != raw_context:
+        logger.warning("memory provider returned pre-wrapped context; stripped")
    return (
        "<memory-context>\n"
        "[System note: The following is recalled memory context, "
@@ -312,7 +421,39 @@ class MemoryManager:
                )
        return "\n\n".join(parts)

-    def on_memory_write(self, action: str, target: str, content: str) -> None:
+    @staticmethod
+    def _provider_memory_write_metadata_mode(provider: MemoryProvider) -> str:
+        """Return how to pass metadata to a provider's memory-write hook."""
+        try:
+            signature = inspect.signature(provider.on_memory_write)
+        except (TypeError, ValueError):
+            return "keyword"
+
+        params = list(signature.parameters.values())
+        if any(p.kind == inspect.Parameter.VAR_KEYWORD for p in params):
+            return "keyword"
+        if "metadata" in signature.parameters:
+            return "keyword"
+
+        accepted = [
+            p for p in params
+            if p.kind in (
+                inspect.Parameter.POSITIONAL_ONLY,
+                inspect.Parameter.POSITIONAL_OR_KEYWORD,
+                inspect.Parameter.KEYWORD_ONLY,
+            )
+        ]
+        if len(accepted) >= 4:
+            return "positional"
+        return "legacy"
+
+    def on_memory_write(
+        self,
+        action: str,
+        target: str,
+        content: str,
+        metadata: Optional[Dict[str, Any]] = None,
+    ) -> None:
        """Notify external providers when the built-in memory tool writes.

        Skips the builtin provider itself (it's the source of the write).
@@ -321,7 +462,15 @@ class MemoryManager:
            if provider.name == "builtin":
                continue
            try:
-                provider.on_memory_write(action, target, content)
+                metadata_mode = self._provider_memory_write_metadata_mode(provider)
+                if metadata_mode == "keyword":
+                    provider.on_memory_write(
+                        action, target, content, metadata=dict(metadata or {})
+                    )
+                elif metadata_mode == "positional":
+                    provider.on_memory_write(action, target, content, dict(metadata or {}))
+                else:
+                    provider.on_memory_write(action, target, content)
            except Exception as e:
                logger.debug(
                    "Memory provider '%s' on_memory_write failed: %s",
@@ -26,7 +26,7 @@ Optional hooks (override to opt in):
  on_turn_start(turn, message, **kwargs) — per-turn tick with runtime context
  on_session_end(messages)               — end-of-session extraction
  on_pre_compress(messages) -> str       — extract before context compression
-  on_memory_write(action, target, content) — mirror built-in memory writes
+  on_memory_write(action, target, content, metadata=None) — mirror built-in memory writes
  on_delegation(task, result, **kwargs)  — parent-side observation of subagent work
 """

@@ -34,7 +34,7 @@ from __future__ import annotations

 import logging
 from abc import ABC, abstractmethod
-from typing import Any, Dict, List
+from typing import Any, Dict, List, Optional

 logger = logging.getLogger(__name__)

@@ -220,12 +220,21 @@ class MemoryProvider(ABC):
          should all have ``env_var`` set and this method stays no-op).
        """

-    def on_memory_write(self, action: str, target: str, content: str) -> None:
+    def on_memory_write(
+        self,
+        action: str,
+        target: str,
+        content: str,
+        metadata: Optional[Dict[str, Any]] = None,
+    ) -> None:
        """Called when the built-in memory tool writes an entry.

        action: 'add', 'replace', or 'remove'
        target: 'memory' or 'user'
        content: the entry content
+        metadata: structured provenance for the write, when available. Common
+          keys include ``write_origin``, ``execution_context``, ``session_id``,
+          ``parent_session_id``, ``platform``, and ``tool_name``.

        Use to mirror built-in memory writes to your backend.
        """
@@ -4,7 +4,9 @@ Pure utility functions with no AIAgent dependency. Used by ContextCompressor
 and run_agent.py for pre-flight context checks.
 """

+import ipaddress
 import logging
+import os
 import re
 import time
 from pathlib import Path
@@ -14,12 +16,31 @@ from urllib.parse import urlparse
 import requests
 import yaml

-from hermes_cli.volcengine_byteplus import model_context_window
 from utils import base_url_host_matches, base_url_hostname
+
 from hermes_constants import OPENROUTER_MODELS_URL

 logger = logging.getLogger(__name__)

+
+def _resolve_requests_verify() -> bool | str:
+    """Resolve SSL verify setting for `requests` calls from env vars.
+
+    The `requests` library only honours REQUESTS_CA_BUNDLE / CURL_CA_BUNDLE
+    by default. Hermes also honours HERMES_CA_BUNDLE (its own convention)
+    and SSL_CERT_FILE (used by the stdlib `ssl` module and by httpx), so
+    that a single env var can cover both `requests` and `httpx` callsites
+    inside the same process.
+
+    Returns either a filesystem path to a CA bundle, or True to defer to
+    the requests default (certifi).
+    """
+    for env_var in ("HERMES_CA_BUNDLE", "REQUESTS_CA_BUNDLE", "SSL_CERT_FILE"):
+        val = os.getenv(env_var)
+        if val and os.path.isfile(val):
+            return val
+    return True
+
 # Provider names that can appear as a "provider:" prefix before a model ID.
 # Only these are stripped — Ollama-style "model:tag" colons (e.g. "qwen3.5:27b")
 # are preserved so the full model name reaches cache lookups and server queries.
@@ -30,10 +51,8 @@ _PROVIDER_PREFIXES: frozenset[str] = frozenset({
    "qwen-oauth",
    "xiaomi",
    "arcee",
-    "volcengine",
-    "volcengine-coding-plan",
-    "byteplus",
-    "byteplus-coding-plan",
+    "gmi",
+    "tencent-tokenhub",
    "custom", "local",
    # Common aliases
    "google", "google-gemini", "google-ai-studio",
@@ -42,7 +61,9 @@ _PROVIDER_PREFIXES: frozenset[str] = frozenset({
    "ollama",
    "stepfun", "opencode", "zen", "go", "vercel", "kilo", "dashscope", "aliyun", "qwen",
    "mimo", "xiaomi-mimo",
+    "tencent", "tokenhub", "tencent-cloud", "tencentmaas",
    "arcee-ai", "arceeai",
+    "gmi-cloud", "gmicloud",
    "xai", "x-ai", "x.ai", "grok",
    "nvidia", "nim", "nvidia-nim", "nemotron",
    "qwen-portal",
@@ -55,6 +76,13 @@ _OLLAMA_TAG_PATTERN = re.compile(
 )


+# Tailscale's CGNAT range (RFC 6598). `ipaddress.is_private` excludes this
+# block, so without an explicit check Ollama reached over Tailscale (e.g.
+# `http://100.77.243.5:11434`) wouldn't be treated as local and its stream
+# read / stale timeouts wouldn't get auto-bumped. Built once at import time.
+_TAILSCALE_CGNAT = ipaddress.IPv4Network("100.64.0.0/10")
+
+
 def _strip_provider_prefix(model: str) -> str:
    """Strip a recognised provider prefix from a model string.

@@ -82,9 +110,11 @@ _endpoint_model_metadata_cache_time: Dict[str, float] = {}
 _ENDPOINT_MODEL_CACHE_TTL = 300

 # Descending tiers for context length probing when the model is unknown.
-# We start at 128K (a safe default for most modern models) and step down
-# on context-length errors until one works.
+# We start at 256K (covers GPT-5.x, many current large-context models) and
+# step down on context-length errors until one works.  Tier[0] is also the
+# default fallback when no detection method succeeds.
 CONTEXT_PROBE_TIERS = [
+    256_000,
    128_000,
    64_000,
    32_000,
@@ -119,6 +149,11 @@ DEFAULT_CONTEXT_LENGTHS = {
    "claude": 200000,
    # OpenAI — GPT-5 family (most have 400k; specific overrides first)
    # Source: https://developers.openai.com/api/docs/models
+    # GPT-5.5 (launched Apr 23 2026) is 1.05M on the direct OpenAI API and
+    # ChatGPT Codex OAuth caps it at 272K; both paths resolve via their own
+    # provider-aware branches (_resolve_codex_oauth_context_length + models.dev).
+    # This hardcoded value is only reached when every probe misses.
+    "gpt-5.5": 1050000,
    "gpt-5.4-nano": 400000,           # 400k (not 1.05M like full 5.4)
    "gpt-5.4-mini": 400000,           # 400k (not 1.05M like full 5.4)
    "gpt-5.4": 1050000,               # GPT-5.4, GPT-5.4 Pro (1.05M context)
@@ -129,10 +164,22 @@ DEFAULT_CONTEXT_LENGTHS = {
    # Google
    "gemini": 1048576,
    # Gemma (open models served via AI Studio)
+    "gemma-4": 256000,  # Gemma 4 family
+    "gemma4": 256000,  # Ollama-style naming (e.g. gemma4:31b-cloud)
    "gemma-4-31b": 256000,
    "gemma-3": 131072,
    "gemma": 8192,  # fallback for older gemma models
-    # DeepSeek
+    # DeepSeek — V4 family ships with a 1M context window. The legacy
+    # aliases ``deepseek-chat`` / ``deepseek-reasoner`` are server-side
+    # mapped to the non-thinking / thinking modes of ``deepseek-v4-flash``
+    # and inherit the same 1M window. The ``deepseek`` substring entry
+    # below remains as a 128K fallback for older / unknown DeepSeek model
+    # ids (e.g. via custom endpoints).
+    # https://api-docs.deepseek.com/zh-cn/quick_start/pricing
+    "deepseek-v4-pro": 1_000_000,
+    "deepseek-v4-flash": 1_000_000,
+    "deepseek-chat": 1_000_000,
+    "deepseek-reasoner": 1_000_000,
    "deepseek": 128000,
    # Meta
    "llama": 131072,
@@ -163,6 +210,8 @@ DEFAULT_CONTEXT_LENGTHS = {
    "grok": 131072,             # catch-all (grok-beta, unknown grok-*)
    # Kimi
    "kimi": 262144,
+    # Tencent — Hy3 Preview (Hunyuan) with 256K context window
+    "hy3-preview": 256000,
    # Nemotron — NVIDIA's open-weights series (128K context across all sizes)
    "nemotron": 131072,
    # Arcee
@@ -177,10 +226,12 @@ DEFAULT_CONTEXT_LENGTHS = {
    "moonshotai/Kimi-K2.6": 262144,
    "moonshotai/Kimi-K2-Thinking": 262144,
    "MiniMaxAI/MiniMax-M2.5": 204800,
-    "XiaomiMiMo/MiMo-V2-Flash": 256000,
-    "mimo-v2-pro": 1000000,
-    "mimo-v2-omni": 256000,
-    "mimo-v2-flash": 256000,
+    "XiaomiMiMo/MiMo-V2-Flash": 262144,
+    "mimo-v2-pro": 1048576,
+    "mimo-v2.5-pro": 1048576,
+    "mimo-v2.5": 1048576,
+    "mimo-v2-omni": 262144,
+    "mimo-v2-flash": 262144,
    "zai-org/GLM-5": 202752,
 }

@@ -195,6 +246,7 @@ _CONTEXT_LENGTH_KEYS = (
    "max_seq_len",
    "n_ctx_train",
    "n_ctx",
+    "ctx_size",
 )

 _MAX_COMPLETION_KEYS = (
@@ -238,6 +290,7 @@ _URL_TO_PROVIDER: Dict[str, str] = {
    "chatgpt.com": "openai",
    "api.anthropic.com": "anthropic",
    "api.z.ai": "zai",
+    "open.bigmodel.cn": "zai",
    "api.moonshot.ai": "kimi-coding",
    "api.moonshot.cn": "kimi-coding-cn",
    "api.kimi.com": "kimi-coding",
@@ -260,9 +313,9 @@ _URL_TO_PROVIDER: Dict[str, str] = {
    "integrate.api.nvidia.com": "nvidia",
    "api.xiaomimimo.com": "xiaomi",
    "xiaomimimo.com": "xiaomi",
+    "api.gmi-serving.com": "gmi",
+    "tokenhub.tencentmaas.com": "tencent-tokenhub",
    "ollama.com": "ollama-cloud",
-    "ark.cn-beijing.volces.com": "volcengine",
-    "ark.ap-southeast.bytepluses.com": "byteplus",
 }


@@ -289,7 +342,15 @@ def _is_known_provider_base_url(base_url: str) -> bool:


 def is_local_endpoint(base_url: str) -> bool:
-    """Return True if base_url points to a local machine (localhost / RFC-1918 / WSL)."""
+    """Return True if base_url points to a local machine.
+
+    Recognises loopback (``localhost``, ``127.0.0.0/8``, ``::1``),
+    container-internal DNS names (``host.docker.internal`` et al.),
+    RFC-1918 private ranges (``10/8``, ``172.16/12``, ``192.168/16``),
+    link-local, and Tailscale CGNAT (``100.64.0.0/10``). Tailscale CGNAT
+    is included so remote-but-trusted Ollama boxes reached over a
+    Tailscale mesh get the same timeout auto-bumps as localhost Ollama.
+    """
    normalized = _normalize_base_url(base_url)
    if not normalized:
        return False
@@ -304,14 +365,17 @@ def is_local_endpoint(base_url: str) -> bool:
    # Docker / Podman / Lima internal DNS names (e.g. host.docker.internal)
    if any(host.endswith(suffix) for suffix in _CONTAINER_LOCAL_SUFFIXES):
        return True
-    # RFC-1918 private ranges and link-local
-    import ipaddress
+    # RFC-1918 private ranges, link-local, and Tailscale CGNAT
    try:
        addr = ipaddress.ip_address(host)
-        return addr.is_private or addr.is_loopback or addr.is_link_local
+        if addr.is_private or addr.is_loopback or addr.is_link_local:
+            return True
+        if isinstance(addr, ipaddress.IPv4Address) and addr in _TAILSCALE_CGNAT:
+            return True
    except ValueError:
        pass
    # Bare IP that looks like a private range (e.g. 172.26.x.x for WSL)
+    # or Tailscale CGNAT (100.64.x.x–100.127.x.x).
    parts = host.split(".")
    if len(parts) == 4:
        try:
@@ -322,6 +386,8 @@ def is_local_endpoint(base_url: str) -> bool:
                return True
            if first == 192 and second == 168:
                return True
+            if first == 100 and 64 <= second <= 127:
+                return True
        except ValueError:
            pass
    return False
@@ -470,7 +536,7 @@ def fetch_model_metadata(force_refresh: bool = False) -> Dict[str, Dict[str, Any
        return _model_metadata_cache

    try:
-        response = requests.get(OPENROUTER_MODELS_URL, timeout=10)
+        response = requests.get(OPENROUTER_MODELS_URL, timeout=10, verify=_resolve_requests_verify())
        response.raise_for_status()
        data = response.json()

@@ -537,6 +603,7 @@ def fetch_endpoint_model_metadata(
                    server_url.rstrip("/") + "/api/v1/models",
                    headers=headers,
                    timeout=10,
+                    verify=_resolve_requests_verify(),
                )
                response.raise_for_status()
                payload = response.json()
@@ -558,8 +625,6 @@ def fetch_endpoint_model_metadata(
                        if isinstance(ctx, int) and ctx > 0:
                            context_length = ctx
                            break
-                    if context_length is None:
-                        context_length = _extract_context_length(model)
                    if context_length is not None:
                        entry["context_length"] = context_length

@@ -585,7 +650,7 @@ def fetch_endpoint_model_metadata(
    for candidate in candidates:
        url = candidate.rstrip("/") + "/models"
        try:
-            response = requests.get(url, headers=headers, timeout=10)
+            response = requests.get(url, headers=headers, timeout=10, verify=_resolve_requests_verify())
            response.raise_for_status()
            payload = response.json()
            cache: Dict[str, Dict[str, Any]] = {}
@@ -616,9 +681,10 @@ def fetch_endpoint_model_metadata(
                try:
                    # Try /v1/props first (current llama.cpp); fall back to /props for older builds
                    base = candidate.rstrip("/").replace("/v1", "")
-                    props_resp = requests.get(base + "/v1/props", headers=headers, timeout=5)
+                    _verify = _resolve_requests_verify()
+                    props_resp = requests.get(base + "/v1/props", headers=headers, timeout=5, verify=_verify)
                    if not props_resp.ok:
-                        props_resp = requests.get(base + "/props", headers=headers, timeout=5)
+                        props_resp = requests.get(base + "/props", headers=headers, timeout=5, verify=_verify)
                    if props_resp.ok:
                        props = props_resp.json()
                        gen_settings = props.get("default_generation_settings", {})
@@ -642,6 +708,29 @@ def fetch_endpoint_model_metadata(
    return {}


+def _resolve_endpoint_context_length(
+    model: str,
+    base_url: str,
+    api_key: str = "",
+) -> Optional[int]:
+    """Resolve context length from an endpoint's live ``/models`` metadata."""
+    endpoint_metadata = fetch_endpoint_model_metadata(base_url, api_key=api_key)
+    matched = endpoint_metadata.get(model)
+    if not matched:
+        if len(endpoint_metadata) == 1:
+            matched = next(iter(endpoint_metadata.values()))
+        else:
+            for key, entry in endpoint_metadata.items():
+                if model in key or key in model:
+                    matched = entry
+                    break
+    if matched:
+        context_length = matched.get("context_length")
+        if isinstance(context_length, int):
+            return context_length
+    return None
+
+
 def _get_context_cache_path() -> Path:
    """Return path to the persistent context length cache file."""
    from hermes_constants import get_hermes_home
@@ -690,6 +779,22 @@ def get_cached_context_length(model: str, base_url: str) -> Optional[int]:
    return cache.get(key)


+def _invalidate_cached_context_length(model: str, base_url: str) -> None:
+    """Drop a stale cache entry so it gets re-resolved on the next lookup."""
+    key = f"{model}@{base_url}"
+    cache = _load_context_cache()
+    if key not in cache:
+        return
+    del cache[key]
+    path = _get_context_cache_path()
+    try:
+        path.parent.mkdir(parents=True, exist_ok=True)
+        with open(path, "w") as f:
+            yaml.dump({"context_lengths": cache}, f, default_flow_style=False)
+    except Exception as e:
+        logger.debug("Failed to invalidate context length cache entry %s: %s", key, e)
+
+
 def get_next_probe_tier(current_length: int) -> Optional[int]:
    """Return the next lower probe tier, or None if already at minimum."""
    for tier in CONTEXT_PROBE_TIERS:
@@ -909,10 +1014,7 @@ def _query_local_context_length(model: str, base_url: str, api_key: str = "") ->
                                ctx = cfg.get("context_length")
                                if ctx and isinstance(ctx, (int, float)):
                                    return int(ctx)
-                            # Fall back to max_context_length (theoretical model max)
-                            ctx = m.get("max_context_length") or m.get("context_length")
-                            if ctx and isinstance(ctx, (int, float)):
-                                return int(ctx)
+                            break

            # LM Studio / vLLM / llama.cpp: try /v1/models/{model}
            resp = client.get(f"{server_url}/v1/models/{model}")
@@ -967,7 +1069,7 @@ def _query_anthropic_context_length(model: str, base_url: str, api_key: str) ->
            "x-api-key": api_key,
            "anthropic-version": "2023-06-01",
        }
-        resp = requests.get(url, headers=headers, timeout=10)
+        resp = requests.get(url, headers=headers, timeout=10, verify=_resolve_requests_verify())
        if resp.status_code != 200:
            return None
        data = resp.json()
@@ -981,6 +1083,116 @@ def _query_anthropic_context_length(model: str, base_url: str, api_key: str) ->
    return None


+# Known ChatGPT Codex OAuth context windows (observed via live
+# chatgpt.com/backend-api/codex/models probe, Apr 2026). These are the
+# `context_window` values, which are what Codex actually enforces — the
+# direct OpenAI API has larger limits for the same slugs, but Codex OAuth
+# caps lower (e.g. gpt-5.5 is 1.05M on the API, 272K on Codex).
+#
+# Used as a fallback when the live probe fails (no token, network error).
+# Longest keys first so substring match picks the most specific entry.
+_CODEX_OAUTH_CONTEXT_FALLBACK: Dict[str, int] = {
+    "gpt-5.1-codex-max": 272_000,
+    "gpt-5.1-codex-mini": 272_000,
+    "gpt-5.3-codex": 272_000,
+    "gpt-5.2-codex": 272_000,
+    "gpt-5.4-mini": 272_000,
+    "gpt-5.5": 272_000,
+    "gpt-5.4": 272_000,
+    "gpt-5.2": 272_000,
+    "gpt-5": 272_000,
+}
+
+
+_codex_oauth_context_cache: Dict[str, int] = {}
+_codex_oauth_context_cache_time: float = 0.0
+_CODEX_OAUTH_CONTEXT_CACHE_TTL = 3600  # 1 hour
+
+
+def _fetch_codex_oauth_context_lengths(access_token: str) -> Dict[str, int]:
+    """Probe the ChatGPT Codex /models endpoint for per-slug context windows.
+
+    Codex OAuth imposes its own context limits that differ from the direct
+    OpenAI API (e.g. gpt-5.5 is 1.05M on the API, 272K on Codex). The
+    `context_window` field in each model entry is the authoritative source.
+
+    Returns a ``{slug: context_window}`` dict. Empty on failure.
+    """
+    global _codex_oauth_context_cache, _codex_oauth_context_cache_time
+    now = time.time()
+    if (
+        _codex_oauth_context_cache
+        and now - _codex_oauth_context_cache_time < _CODEX_OAUTH_CONTEXT_CACHE_TTL
+    ):
+        return _codex_oauth_context_cache
+
+    try:
+        resp = requests.get(
+            "https://chatgpt.com/backend-api/codex/models?client_version=1.0.0",
+            headers={"Authorization": f"Bearer {access_token}"},
+            timeout=10,
+            verify=_resolve_requests_verify(),
+        )
+        if resp.status_code != 200:
+            logger.debug(
+                "Codex /models probe returned HTTP %s; falling back to hardcoded defaults",
+                resp.status_code,
+            )
+            return {}
+        data = resp.json()
+    except Exception as exc:
+        logger.debug("Codex /models probe failed: %s", exc)
+        return {}
+
+    entries = data.get("models", []) if isinstance(data, dict) else []
+    result: Dict[str, int] = {}
+    for item in entries:
+        if not isinstance(item, dict):
+            continue
+        slug = item.get("slug")
+        ctx = item.get("context_window")
+        if isinstance(slug, str) and isinstance(ctx, int) and ctx > 0:
+            result[slug.strip()] = ctx
+
+    if result:
+        _codex_oauth_context_cache = result
+        _codex_oauth_context_cache_time = now
+    return result
+
+
+def _resolve_codex_oauth_context_length(
+    model: str, access_token: str = ""
+) -> Optional[int]:
+    """Resolve a Codex OAuth model's real context window.
+
+    Prefers a live probe of chatgpt.com/backend-api/codex/models (when we
+    have a bearer token), then falls back to ``_CODEX_OAUTH_CONTEXT_FALLBACK``.
+    """
+    model_bare = _strip_provider_prefix(model).strip()
+    if not model_bare:
+        return None
+
+    if access_token:
+        live = _fetch_codex_oauth_context_lengths(access_token)
+        if model_bare in live:
+            return live[model_bare]
+        # Case-insensitive match in case casing drifts
+        model_lower = model_bare.lower()
+        for slug, ctx in live.items():
+            if slug.lower() == model_lower:
+                return ctx
+
+    # Fallback: longest-key-first substring match over hardcoded defaults.
+    model_lower = model_bare.lower()
+    for slug, ctx in sorted(
+        _CODEX_OAUTH_CONTEXT_FALLBACK.items(), key=lambda x: len(x[0]), reverse=True
+    ):
+        if slug in model_lower:
+            return ctx
+
+    return None
+
+
 def _resolve_nous_context_length(model: str) -> Optional[int]:
    """Resolve Nous Portal model context length via OpenRouter metadata.

@@ -1020,12 +1232,14 @@ def get_model_context_length(
    api_key: str = "",
    config_context_length: int | None = None,
    provider: str = "",
+    custom_providers: list | None = None,
 ) -> int:
    """Get the context length for a model.

    Resolution order:
    0. Explicit config override (model.context_length or custom_providers per-model)
    1. Persistent cache (previously discovered via probing)
+    1b. AWS Bedrock static table (must precede custom-endpoint probe)
    2. Active endpoint metadata (/models for explicit custom endpoints)
    3. Local server query (for local endpoints)
    4. Anthropic /v1/models API (API-key users only, not OAuth)
@@ -1039,16 +1253,70 @@ def get_model_context_length(
    if config_context_length is not None and isinstance(config_context_length, int) and config_context_length > 0:
        return config_context_length

+    # 0b. custom_providers per-model override — check before any probe.
+    # This closes the gap where /model switch and display paths used to fall
+    # back to 128K despite the user having a per-model context_length set.
+    # See #15779.
+    if custom_providers and base_url and model:
+        try:
+            from hermes_cli.config import get_custom_provider_context_length
+            cp_ctx = get_custom_provider_context_length(
+                model=model,
+                base_url=base_url,
+                custom_providers=custom_providers,
+            )
+            if cp_ctx:
+                return cp_ctx
+        except Exception:
+            pass  # fall through to probing
+
    # Normalise provider-prefixed model names (e.g. "local:model-name" →
    # "model-name") so cache lookups and server queries use the bare ID that
    # local servers actually know about.  Ollama "model:tag" colons are preserved.
    model = _strip_provider_prefix(model)

    # 1. Check persistent cache (model+provider)
-    if base_url:
+    # LM Studio is excluded — its loaded context length is transient (the
+    # user can reload the model with a different context_length at any time
+    # via /api/v1/models/load), so a stale cached value would mask reloads.
+    if base_url and provider != "lmstudio":
        cached = get_cached_context_length(model, base_url)
        if cached is not None:
-            return cached
+            # Invalidate stale Codex OAuth cache entries: pre-PR #14935 builds
+            # resolved gpt-5.x to the direct-API value (e.g. 1.05M) via
+            # models.dev and persisted it. Codex OAuth caps at 272K for every
+            # slug, so any cached Codex entry at or above 400K is a leftover
+            # from the old resolution path. Drop it and fall through to the
+            # live /models probe in step 5 below.
+            if provider == "openai-codex" and cached >= 400_000:
+                logger.info(
+                    "Dropping stale Codex cache entry %s@%s -> %s (pre-fix value); "
+                    "re-resolving via live /models probe",
+                    model, base_url, f"{cached:,}",
+                )
+                _invalidate_cached_context_length(model, base_url)
+            else:
+                return cached
+
+    # 1b. AWS Bedrock — use static context length table.
+    # Bedrock's ListFoundationModels API doesn't expose context window sizes,
+    # so we maintain a curated table in bedrock_adapter.py that reflects
+    # AWS-imposed limits (e.g. 200K for Claude models vs 1M on the native
+    # Anthropic API).  This must run BEFORE the custom-endpoint probe at
+    # step 2 — bedrock-runtime.<region>.amazonaws.com is not in
+    # _URL_TO_PROVIDER, so it would otherwise be treated as a custom endpoint,
+    # fail the /models probe (Bedrock doesn't expose that shape), and fall
+    # back to the 128K default before reaching the original step 4b branch.
+    if provider == "bedrock" or (
+        base_url
+        and base_url_hostname(base_url).startswith("bedrock-runtime.")
+        and base_url_host_matches(base_url, "amazonaws.com")
+    ):
+        try:
+            from agent.bedrock_adapter import get_bedrock_context_length
+            return get_bedrock_context_length(model)
+        except ImportError:
+            pass  # boto3 not installed — fall through to generic resolution

    # 2. Active endpoint metadata for truly custom/unknown endpoints.
    # Known providers (Copilot, OpenAI, Anthropic, etc.) skip this — their
@@ -1056,28 +1324,16 @@ def get_model_context_length(
    # returns 128k) instead of the model's full context (400k).  models.dev
    # has the correct per-provider values and is checked at step 5+.
    if _is_custom_endpoint(base_url) and not _is_known_provider_base_url(base_url):
-        endpoint_metadata = fetch_endpoint_model_metadata(base_url, api_key=api_key)
-        matched = endpoint_metadata.get(model)
-        if not matched:
-            # Single-model servers: if only one model is loaded, use it
-            if len(endpoint_metadata) == 1:
-                matched = next(iter(endpoint_metadata.values()))
-            else:
-                # Fuzzy match: substring in either direction
-                for key, entry in endpoint_metadata.items():
-                    if model in key or key in model:
-                        matched = entry
-                        break
-        if matched:
-            context_length = matched.get("context_length")
-            if isinstance(context_length, int):
-                return context_length
+        context_length = _resolve_endpoint_context_length(model, base_url, api_key=api_key)
+        if context_length is not None:
+            return context_length
        if not _is_known_provider_base_url(base_url):
            # 3. Try querying local server directly
            if is_local_endpoint(base_url):
                local_ctx = _query_local_context_length(model, base_url, api_key=api_key)
                if local_ctx and local_ctx > 0:
-                    save_context_length(model, base_url, local_ctx)
+                    if provider != "lmstudio":
+                        save_context_length(model, base_url, local_ctx)
                    return local_ctx
            logger.info(
                "Could not detect context length for model %r at %s — "
@@ -1095,19 +1351,7 @@ def get_model_context_length(
        if ctx:
            return ctx

-    # 4b. AWS Bedrock — use static context length table.
-    # Bedrock's ListFoundationModels doesn't expose context window sizes,
-    # so we maintain a curated table in bedrock_adapter.py.
-    if provider == "bedrock" or (
-        base_url
-        and base_url_hostname(base_url).startswith("bedrock-runtime.")
-        and base_url_host_matches(base_url, "amazonaws.com")
-    ):
-        try:
-            from agent.bedrock_adapter import get_bedrock_context_length
-            return get_bedrock_context_length(model)
-        except ImportError:
-            pass  # boto3 not installed — fall through to generic resolution
+    # 4b. (Bedrock handled earlier at step 1b — before custom-endpoint probe.)

    # 5. Provider-aware lookups (before generic OpenRouter cache)
    # These are provider-specific and take priority over the generic OR cache,
@@ -1121,13 +1365,37 @@ def get_model_context_length(
            if inferred:
                effective_provider = inferred

+    # 5a. Copilot live /models API — max_prompt_tokens from the user's account.
+    # This catches account-specific models (e.g. claude-opus-4.6-1m) that
+    # don't exist in models.dev. For models that ARE in models.dev, this
+    # returns the provider-enforced limit which is what users can actually use.
+    if effective_provider in ("copilot", "copilot-acp", "github-copilot"):
+        try:
+            from hermes_cli.models import get_copilot_model_context
+            ctx = get_copilot_model_context(model, api_key=api_key)
+            if ctx:
+                return ctx
+        except Exception:
+            pass  # Fall through to models.dev
+
    if effective_provider == "nous":
        ctx = _resolve_nous_context_length(model)
        if ctx:
            return ctx
-    if effective_provider in {"volcengine", "byteplus"}:
-        ctx = model_context_window(model)
-        if ctx:
+    if effective_provider == "openai-codex":
+        # Codex OAuth enforces lower context limits than the direct OpenAI
+        # API for the same slug (e.g. gpt-5.5 is 1.05M on the API but 272K
+        # on Codex). Authoritative source is Codex's own /models endpoint.
+        codex_ctx = _resolve_codex_oauth_context_length(model, access_token=api_key or "")
+        if codex_ctx:
+            if base_url:
+                save_context_length(model, base_url, codex_ctx)
+            return codex_ctx
+    if effective_provider == "gmi" and base_url:
+        # GMI exposes authoritative context_length via /models, but it is not
+        # in models.dev yet. Preserve that higher-fidelity endpoint lookup.
+        ctx = _resolve_endpoint_context_length(model, base_url, api_key=api_key)
+        if ctx is not None:
            return ctx
    if effective_provider:
        from agent.models_dev import lookup_models_dev_context
@@ -1135,14 +1403,10 @@ def get_model_context_length(
        if ctx:
            return ctx

-    ctx = model_context_window(model)
-    if ctx:
-        return ctx
-
    # 6. OpenRouter live API metadata (provider-unaware fallback)
    metadata = fetch_model_metadata()
    if model in metadata:
-        return metadata[model].get("context_length", 128000)
+        return metadata[model].get("context_length", DEFAULT_FALLBACK_CONTEXT)

    # 8. Hardcoded defaults (fuzzy match — longest key first for specificity)
    # Only check `default_model in model` (is the key a substring of the input).
@@ -1159,7 +1423,8 @@ def get_model_context_length(
    if base_url and is_local_endpoint(base_url):
        local_ctx = _query_local_context_length(model, base_url, api_key=api_key)
        if local_ctx and local_ctx > 0:
-            save_context_length(model, base_url, local_ctx)
+            if provider != "lmstudio":
+                save_context_length(model, base_url, local_ctx)
            return local_ctx

    # 10. Default fallback — 128K
@@ -418,6 +418,9 @@ def list_provider_models(provider: str) -> List[str]:

    Returns an empty list if the provider is unknown or has no data.
    """
+    from hermes_cli.models import normalize_provider
+    provider = normalize_provider(provider) or provider
+    
    models = _get_provider_models(provider)
    if models is None:
        return []
@@ -0,0 +1,190 @@
+"""Helpers for translating OpenAI-style tool schemas to Moonshot's schema subset.
+
+Moonshot (Kimi) accepts a stricter subset of JSON Schema than standard OpenAI
+tool calling.  Requests that violate it fail with HTTP 400:
+
+    tools.function.parameters is not a valid moonshot flavored json schema,
+    details: <...>
+
+Known rejection modes documented at
+https://forum.moonshot.ai/t/tool-calling-specification-violation-on-moonshot-api/102
+and MoonshotAI/kimi-cli#1595:
+
+1. Every property schema must carry a ``type``.  Standard JSON Schema allows
+   type to be omitted (the value is then unconstrained); Moonshot refuses.
+2. When ``anyOf`` is used, ``type`` must be on the ``anyOf`` children, not
+   the parent.  Presence of both causes "type should be defined in anyOf
+   items instead of the parent schema".
+
+The ``#/definitions/...`` → ``#/$defs/...`` rewrite for draft-07 refs is
+handled separately in ``tools/mcp_tool._normalize_mcp_input_schema`` so it
+applies at MCP registration time for all providers.
+"""
+
+from __future__ import annotations
+
+import copy
+from typing import Any, Dict, List
+
+# Keys whose values are maps of name → schema (not schemas themselves).
+# When we recurse, we walk the values of these maps as schemas, but we do
+# NOT apply the missing-type repair to the map itself.
+_SCHEMA_MAP_KEYS = frozenset({"properties", "patternProperties", "$defs", "definitions"})
+
+# Keys whose values are lists of schemas.
+_SCHEMA_LIST_KEYS = frozenset({"anyOf", "oneOf", "allOf", "prefixItems"})
+
+# Keys whose values are a single nested schema.
+_SCHEMA_NODE_KEYS = frozenset({"items", "contains", "not", "additionalProperties", "propertyNames"})
+
+
+def _repair_schema(node: Any, is_schema: bool = True) -> Any:
+    """Recursively apply Moonshot repairs to a schema node.
+
+    ``is_schema=True`` means this dict is a JSON Schema node and gets the
+    missing-type + anyOf-parent repairs applied.  ``is_schema=False`` means
+    it's a container map (e.g. the value of ``properties``) and we only
+    recurse into its values.
+    """
+    if isinstance(node, list):
+        # Lists only show up under schema-list keys (anyOf/oneOf/allOf), so
+        # every element is itself a schema.
+        return [_repair_schema(item, is_schema=True) for item in node]
+    if not isinstance(node, dict):
+        return node
+
+    # Walk the dict, deciding per-key whether recursion is into a schema
+    # node, a container map, or a scalar.
+    repaired: Dict[str, Any] = {}
+    for key, value in node.items():
+        if key in _SCHEMA_MAP_KEYS and isinstance(value, dict):
+            # Map of name → schema.  Don't treat the map itself as a schema
+            # (it has no type / properties of its own), but each value is.
+            repaired[key] = {
+                sub_key: _repair_schema(sub_val, is_schema=True)
+                for sub_key, sub_val in value.items()
+            }
+        elif key in _SCHEMA_LIST_KEYS and isinstance(value, list):
+            repaired[key] = [_repair_schema(v, is_schema=True) for v in value]
+        elif key in _SCHEMA_NODE_KEYS:
+            # items / not / additionalProperties: single nested schema.
+            # additionalProperties can also be a bool — leave those alone.
+            if isinstance(value, dict):
+                repaired[key] = _repair_schema(value, is_schema=True)
+            else:
+                repaired[key] = value
+        else:
+            # Scalars (description, title, format, enum values, etc.) pass through.
+            repaired[key] = value
+
+    if not is_schema:
+        return repaired
+
+    # Rule 2: when anyOf is present, type belongs only on the children.
+    if "anyOf" in repaired and isinstance(repaired["anyOf"], list):
+        repaired.pop("type", None)
+        return repaired
+
+    # Rule 1: property schemas without type need one.  $ref nodes are exempt
+    # — their type comes from the referenced definition.
+    if "$ref" in repaired:
+        return repaired
+    return _fill_missing_type(repaired)
+
+
+def _fill_missing_type(node: Dict[str, Any]) -> Dict[str, Any]:
+    """Infer a reasonable ``type`` if this schema node has none."""
+    if "type" in node and node["type"] not in (None, ""):
+        return node
+
+    # Heuristic: presence of ``properties`` → object, ``items`` → array, ``enum``
+    # → type of first enum value, else fall back to ``string`` (safest scalar).
+    if "properties" in node or "required" in node or "additionalProperties" in node:
+        inferred = "object"
+    elif "items" in node or "prefixItems" in node:
+        inferred = "array"
+    elif "enum" in node and isinstance(node["enum"], list) and node["enum"]:
+        sample = node["enum"][0]
+        if isinstance(sample, bool):
+            inferred = "boolean"
+        elif isinstance(sample, int):
+            inferred = "integer"
+        elif isinstance(sample, float):
+            inferred = "number"
+        else:
+            inferred = "string"
+    else:
+        inferred = "string"
+
+    return {**node, "type": inferred}
+
+
+def sanitize_moonshot_tool_parameters(parameters: Any) -> Dict[str, Any]:
+    """Normalize tool parameters to a Moonshot-compatible object schema.
+
+    Returns a deep-copied schema with the two flavored-JSON-Schema repairs
+    applied.  Input is not mutated.
+    """
+    if not isinstance(parameters, dict):
+        return {"type": "object", "properties": {}}
+
+    repaired = _repair_schema(copy.deepcopy(parameters), is_schema=True)
+    if not isinstance(repaired, dict):
+        return {"type": "object", "properties": {}}
+
+    # Top-level must be an object schema
+    if repaired.get("type") != "object":
+        repaired["type"] = "object"
+    if "properties" not in repaired:
+        repaired["properties"] = {}
+
+    return repaired
+
+
+def sanitize_moonshot_tools(tools: List[Dict[str, Any]]) -> List[Dict[str, Any]]:
+    """Apply ``sanitize_moonshot_tool_parameters`` to every tool's parameters."""
+    if not tools:
+        return tools
+
+    sanitized: List[Dict[str, Any]] = []
+    any_change = False
+    for tool in tools:
+        if not isinstance(tool, dict):
+            sanitized.append(tool)
+            continue
+        fn = tool.get("function")
+        if not isinstance(fn, dict):
+            sanitized.append(tool)
+            continue
+        params = fn.get("parameters")
+        repaired = sanitize_moonshot_tool_parameters(params)
+        if repaired is not params:
+            any_change = True
+            new_fn = {**fn, "parameters": repaired}
+            sanitized.append({**tool, "function": new_fn})
+        else:
+            sanitized.append(tool)
+
+    return sanitized if any_change else tools
+
+
+def is_moonshot_model(model: str | None) -> bool:
+    """True for any Kimi / Moonshot model slug, regardless of aggregator prefix.
+
+    Matches bare names (``kimi-k2.6``, ``moonshotai/Kimi-K2.6``) and aggregator-
+    prefixed slugs (``nous/moonshotai/kimi-k2.6``, ``openrouter/moonshotai/...``).
+    Detection by model name covers Nous / OpenRouter / other aggregators that
+    route to Moonshot's inference, where the base URL is the aggregator's, not
+    ``api.moonshot.ai``.
+    """
+    if not model:
+        return False
+    bare = model.strip().lower()
+    # Last path segment (covers aggregator-prefixed slugs)
+    tail = bare.rsplit("/", 1)[-1]
+    if tail.startswith("kimi-") or tail == "kimi":
+        return True
+    # Vendor-prefixed forms commonly used on aggregators
+    if "moonshot" in bare or "/kimi" in bare or bare.startswith("kimi"):
+        return True
+    return False
@@ -18,6 +18,7 @@ import os
 import tempfile
 import time
 from typing import Any, Mapping, Optional
+from utils import atomic_replace

 logger = logging.getLogger(__name__)

@@ -118,7 +119,7 @@ def record_nous_rate_limit(
        try:
            with os.fdopen(fd, "w") as f:
                json.dump(state, f)
-            os.replace(tmp_path, path)
+            atomic_replace(tmp_path, path)
        except Exception:
            # Clean up temp file on failure
            try:
@@ -180,3 +181,145 @@ def format_remaining(seconds: float) -> str:
    h, remainder = divmod(s, 3600)
    m = remainder // 60
    return f"{h}h {m}m" if m else f"{h}h"
+
+
+# Buckets with reset windows shorter than this are treated as transient
+# (upstream jitter, secondary throttling) rather than a genuine quota
+# exhaustion worth a cross-session breaker trip.
+_MIN_RESET_FOR_BREAKER_SECONDS = 60.0
+
+
+def is_genuine_nous_rate_limit(
+    *,
+    headers: Optional[Mapping[str, str]] = None,
+    last_known_state: Optional[Any] = None,
+) -> bool:
+    """Decide whether a 429 from Nous Portal is a real account rate limit.
+
+    Nous Portal multiplexes multiple upstream providers (DeepSeek, Kimi,
+    MiMo, Hermes, ...) behind one endpoint.  A 429 can mean either:
+
+      (a) The caller's own RPM / RPH / TPM / TPH bucket on Nous is
+          exhausted — a genuine rate limit that will last until the
+          bucket resets.
+      (b) The upstream provider is out of capacity for a specific model
+          — transient, clears in seconds, and has nothing to do with
+          the caller's quota on Nous.
+
+    Tripping the cross-session breaker on (b) blocks ALL Nous requests
+    (and all models, since Nous is one provider key) for minutes even
+    though the caller's account is healthy and a different model would
+    have worked.  That's the bug users hit when DeepSeek V4 Pro 429s
+    trigger a breaker that then blocks Kimi 2.6 and MiMo V2.5 Pro.
+
+    We tell the two apart by looking at:
+
+      1. The 429 response's own ``x-ratelimit-*`` headers.  Nous emits
+         the full suite on every response including 429s.  An exhausted
+         bucket (``remaining == 0`` with a reset window >= 60s) is
+         proof of (a).
+      2. The last-known-good rate-limit state captured by
+         ``_capture_rate_limits()`` on the previous successful
+         response.  If any bucket there was already near-exhausted with
+         a substantial reset window, the current 429 is almost
+         certainly (a) continuing from that condition.
+
+    If neither signal fires, we treat the 429 as (b): fail the single
+    request, let the retry loop or model-switch proceed, and do NOT
+    write the cross-session breaker file.
+
+    Returns True when the evidence points at (a).
+    """
+    # Signal 1: current 429 response headers.
+    state = _parse_buckets_from_headers(headers)
+    if _has_exhausted_bucket(state):
+        return True
+
+    # Signal 2: last-known-good state from a recent successful response.
+    # Accepts either a RateLimitState (dataclass from rate_limit_tracker)
+    # or a dict of bucket snapshots.
+    if last_known_state is not None and _has_exhausted_bucket_in_object(last_known_state):
+        return True
+
+    return False
+
+
+def _parse_buckets_from_headers(
+    headers: Optional[Mapping[str, str]],
+) -> dict[str, tuple[Optional[int], Optional[float]]]:
+    """Extract (remaining, reset_seconds) per bucket from x-ratelimit-* headers.
+
+    Returns empty dict when no rate-limit headers are present.
+    """
+    if not headers:
+        return {}
+
+    lowered = {k.lower(): v for k, v in headers.items()}
+    if not any(k.startswith("x-ratelimit-") for k in lowered):
+        return {}
+
+    def _maybe_int(raw: Optional[str]) -> Optional[int]:
+        if raw is None:
+            return None
+        try:
+            return int(float(raw))
+        except (TypeError, ValueError):
+            return None
+
+    def _maybe_float(raw: Optional[str]) -> Optional[float]:
+        if raw is None:
+            return None
+        try:
+            return float(raw)
+        except (TypeError, ValueError):
+            return None
+
+    result: dict[str, tuple[Optional[int], Optional[float]]] = {}
+    for tag in ("requests", "requests-1h", "tokens", "tokens-1h"):
+        remaining = _maybe_int(lowered.get(f"x-ratelimit-remaining-{tag}"))
+        reset = _maybe_float(lowered.get(f"x-ratelimit-reset-{tag}"))
+        if remaining is not None or reset is not None:
+            result[tag] = (remaining, reset)
+    return result
+
+
+def _has_exhausted_bucket(
+    buckets: Mapping[str, tuple[Optional[int], Optional[float]]],
+) -> bool:
+    """Return True when any bucket has remaining == 0 AND a meaningful reset window."""
+    for remaining, reset in buckets.values():
+        if remaining is None or remaining > 0:
+            continue
+        if reset is None:
+            continue
+        if reset >= _MIN_RESET_FOR_BREAKER_SECONDS:
+            return True
+    return False
+
+
+def _has_exhausted_bucket_in_object(state: Any) -> bool:
+    """Check a RateLimitState-like object for an exhausted bucket.
+
+    Accepts the dataclass from ``agent.rate_limit_tracker`` (buckets
+    exposed as attributes ``requests_min``, ``requests_hour``,
+    ``tokens_min``, ``tokens_hour``) and falls back gracefully for any
+    object missing those attributes.
+    """
+    for attr in ("requests_min", "requests_hour", "tokens_min", "tokens_hour"):
+        bucket = getattr(state, attr, None)
+        if bucket is None:
+            continue
+        limit = getattr(bucket, "limit", 0) or 0
+        remaining = getattr(bucket, "remaining", 0) or 0
+        # Prefer the adjusted "remaining_seconds_now" property when present;
+        # fall back to raw reset_seconds.
+        reset = getattr(bucket, "remaining_seconds_now", None)
+        if reset is None:
+            reset = getattr(bucket, "reset_seconds", 0.0) or 0.0
+        if limit <= 0:
+            continue
+        if remaining > 0:
+            continue
+        if reset >= _MIN_RESET_FOR_BREAKER_SECONDS:
+            return True
+    return False
@@ -0,0 +1,191 @@
+"""
+Contextual first-touch onboarding hints.
+
+Instead of blocking first-run questionnaires, show a one-time hint the *first*
+time a user hits a behavior fork — message-while-running, first long-running
+tool, etc.  Each hint is shown once per install (tracked in ``config.yaml`` under
+``onboarding.seen.<flag>``) and then never again.
+
+Keep this module tiny and dependency-free so both the CLI and gateway can import
+it without pulling in heavy modules.
+"""
+
+from __future__ import annotations
+
+import logging
+from pathlib import Path
+from typing import Any, Mapping, Optional
+
+logger = logging.getLogger(__name__)
+
+
+# -------------------------------------------------------------------------
+# Flag names (stable — used as config.yaml keys under onboarding.seen)
+# -------------------------------------------------------------------------
+
+BUSY_INPUT_FLAG = "busy_input_prompt"
+TOOL_PROGRESS_FLAG = "tool_progress_prompt"
+OPENCLAW_RESIDUE_FLAG = "openclaw_residue_cleanup"
+
+
+# -------------------------------------------------------------------------
+# Hint content
+# -------------------------------------------------------------------------
+
+def busy_input_hint_gateway(mode: str) -> str:
+    """Hint shown the first time a user messages while the agent is busy.
+
+    ``mode`` is the effective busy_input_mode that was just applied, so the
+    message matches reality ("I just interrupted…" vs "I just queued…").
+    """
+    if mode == "queue":
+        return (
+            "💡 First-time tip — I queued your message instead of interrupting. "
+            "Send `/busy interrupt` to make new messages stop the current task "
+            "immediately, or `/busy status` to check. This notice won't appear again."
+        )
+    if mode == "steer":
+        return (
+            "💡 First-time tip — I steered your message into the current run; "
+            "it will arrive after the next tool call instead of interrupting. "
+            "Send `/busy interrupt` or `/busy queue` to change this, or "
+            "`/busy status` to check. This notice won't appear again."
+        )
+    return (
+        "💡 First-time tip — I just interrupted my current task to answer you. "
+        "Send `/busy queue` to queue follow-ups for after the current task instead, "
+        "`/busy steer` to inject them mid-run without interrupting, or "
+        "`/busy status` to check. This notice won't appear again."
+    )
+
+
+def busy_input_hint_cli(mode: str) -> str:
+    """CLI version of the busy-input hint (plain text, no markdown)."""
+    if mode == "queue":
+        return (
+            "(tip) Your message was queued for the next turn. "
+            "Use /busy interrupt to make Enter stop the current run instead, "
+            "or /busy steer to inject mid-run. This tip only shows once."
+        )
+    if mode == "steer":
+        return (
+            "(tip) Your message was steered into the current run; it arrives "
+            "after the next tool call. Use /busy interrupt or /busy queue to "
+            "change this. This tip only shows once."
+        )
+    return (
+        "(tip) Your message interrupted the current run. "
+        "Use /busy queue to queue messages for the next turn instead, "
+        "or /busy steer to inject mid-run. This tip only shows once."
+    )
+
+
+def tool_progress_hint_gateway() -> str:
+    return (
+        "💡 First-time tip — that tool took a while and I'm streaming every step. "
+        "If the progress messages feel noisy, send `/verbose` to cycle modes "
+        "(all → new → off). This notice won't appear again."
+    )
+
+
+def tool_progress_hint_cli() -> str:
+    return (
+        "(tip) That tool ran for a while. Use /verbose to cycle tool-progress "
+        "display modes (all -> new -> off -> verbose). This tip only shows once."
+    )
+
+
+def openclaw_residue_hint_cli() -> str:
+    """Banner shown the first time Hermes starts and finds ``~/.openclaw/``.
+
+    OpenClaw-era config, memory, and skill paths in ``~/.openclaw/`` will
+    otherwise attract the agent (memory entries like ``~/.openclaw/config.yaml``
+    get carried forward and the agent dutifully reads them). ``hermes claw
+    cleanup`` renames the directory so the agent stops finding it.
+    """
+    return (
+        "Heads up — an OpenClaw workspace was detected at ~/.openclaw/.\n"
+        "After migrating, the agent can still get confused and read that "
+        "directory's config/memory instead of Hermes's.\n"
+        "Run `hermes claw cleanup` to archive it (rename → .openclaw.pre-migration). "
+        "This tip only shows once; rerun it any time with `hermes claw cleanup`."
+    )
+
+
+def detect_openclaw_residue(home: Optional[Path] = None) -> bool:
+    """Return True if an OpenClaw workspace directory is present in ``$HOME``.
+
+    Pure filesystem check — no side effects. ``home`` override exists for tests.
+    """
+    base = home or Path.home()
+    try:
+        return (base / ".openclaw").is_dir()
+    except OSError:
+        return False
+
+
+# -------------------------------------------------------------------------
+# State read / write
+# -------------------------------------------------------------------------
+
+def _get_seen_dict(config: Mapping[str, Any]) -> Mapping[str, Any]:
+    onboarding = config.get("onboarding") if isinstance(config, Mapping) else None
+    if not isinstance(onboarding, Mapping):
+        return {}
+    seen = onboarding.get("seen")
+    return seen if isinstance(seen, Mapping) else {}
+
+
+def is_seen(config: Mapping[str, Any], flag: str) -> bool:
+    """Return True if the user has already been shown this first-touch hint."""
+    return bool(_get_seen_dict(config).get(flag))
+
+
+def mark_seen(config_path: Path, flag: str) -> bool:
+    """Persist ``onboarding.seen.<flag> = True`` to ``config_path``.
+
+    Uses the atomic YAML writer so a concurrent process can't observe a
+    partially-written file.  Returns True on success, False on any error
+    (including the config file being absent — onboarding is best-effort).
+    """
+    try:
+        import yaml
+        from utils import atomic_yaml_write
+    except Exception as e:  # pragma: no cover — dependency issue
+        logger.debug("onboarding: failed to import yaml/utils: %s", e)
+        return False
+
+    try:
+        cfg: dict = {}
+        if config_path.exists():
+            with open(config_path, encoding="utf-8") as f:
+                cfg = yaml.safe_load(f) or {}
+        if not isinstance(cfg.get("onboarding"), dict):
+            cfg["onboarding"] = {}
+        seen = cfg["onboarding"].get("seen")
+        if not isinstance(seen, dict):
+            seen = {}
+            cfg["onboarding"]["seen"] = seen
+        if seen.get(flag) is True:
+            return True  # already marked — nothing to do
+        seen[flag] = True
+        atomic_yaml_write(config_path, cfg)
+        return True
+    except Exception as e:
+        logger.debug("onboarding: failed to mark flag %s: %s", flag, e)
+        return False
+
+
+__all__ = [
+    "BUSY_INPUT_FLAG",
+    "TOOL_PROGRESS_FLAG",
+    "OPENCLAW_RESIDUE_FLAG",
+    "busy_input_hint_gateway",
+    "busy_input_hint_cli",
+    "tool_progress_hint_gateway",
+    "tool_progress_hint_cli",
+    "openclaw_residue_hint_cli",
+    "detect_openclaw_residue",
+    "is_seen",
+    "mark_seen",
+]
@@ -141,6 +141,12 @@ DEFAULT_AGENT_IDENTITY = (
    "Be targeted and efficient in your exploration and investigations."
 )

+HERMES_AGENT_HELP_GUIDANCE = (
+    "If the user asks about configuring, setting up, or using Hermes Agent "
+    "itself, load the `hermes-agent` skill with skill_view(name='hermes-agent') "
+    "before answering. Docs: https://hermes-agent.nousresearch.com/docs"
+)
+
 MEMORY_GUIDANCE = (
    "You have persistent memory across sessions. Save durable facts using the memory "
    "tool: user preferences, environment details, tool quirks, and stable conventions. "
@@ -304,6 +310,10 @@ PLATFORM_HINTS = {
        "Standard markdown is automatically converted to Telegram format. "
        "Supported: **bold**, *italic*, ~~strikethrough~~, ||spoiler||, "
        "`inline code`, ```code blocks```, [links](url), and ## headers. "
+        "Telegram has NO table syntax — prefer bullet lists or labeled "
+        "key: value pairs over pipe tables (any tables you do emit are "
+        "auto-rewritten into row-group bullets, which you can produce "
+        "directly for cleaner output). "
        "You can send media files natively: to deliver a file to the user, "
        "include MEDIA:/absolute/path/to/file in your response. Images "
        "(.png, .jpg, .webp) appear as photos, audio (.ogg) sends as voice "
@@ -370,6 +380,32 @@ PLATFORM_HINTS = {
        "MEDIA:/absolute/path/to/file in your response. Images (.jpg, .png, "
        ".heic) appear as photos and other files arrive as attachments."
    ),
+    "mattermost": (
+        "You are in a Mattermost workspace communicating with your user. "
+        "Mattermost renders standard Markdown — headings, bold, italic, code "
+        "blocks, and tables all work. "
+        "You can send media files natively: include MEDIA:/absolute/path/to/file "
+        "in your response. Images (.jpg, .png, .webp) are uploaded as photo "
+        "attachments, audio and video as file attachments. "
+        "Image URLs in markdown format ![alt](url) are rendered as inline previews automatically."
+    ),
+    "matrix": (
+        "You are in a Matrix room communicating with your user. "
+        "Matrix renders Markdown — bold, italic, code blocks, and links work; "
+        "the adapter converts your Markdown to HTML for rich display. "
+        "You can send media files natively: include MEDIA:/absolute/path/to/file "
+        "in your response. Images (.jpg, .png, .webp) are sent as inline photos, "
+        "audio (.ogg, .mp3) as voice/audio messages, video (.mp4) inline, "
+        "and other files as downloadable attachments."
+    ),
+    "feishu": (
+        "You are in a Feishu (Lark) workspace communicating with your user. "
+        "Feishu renders Markdown in messages — bold, italic, code blocks, and "
+        "links are supported. "
+        "You can send media files natively: include MEDIA:/absolute/path/to/file "
+        "in your response. Images (.jpg, .png, .webp) are uploaded and displayed "
+        "inline, audio files as voice messages, and other files as attachments."
+    ),
    "weixin": (
        "You are on Weixin/WeChat. Markdown formatting is supported, so you may use it when "
        "it improves readability, but keep the message compact and chat-friendly. You can send media files natively: "
@@ -396,6 +432,29 @@ PLATFORM_HINTS = {
        "your response. Images are sent as native photos, and other files arrive as downloadable "
        "documents."
    ),
+    "yuanbao": (
+        "You are on Yuanbao (腾讯元宝), a Chinese AI assistant platform. "
+        "Markdown formatting is supported (code blocks, tables, bold/italic). "
+        "You CAN send media files natively — to deliver a file to the user, include "
+        "MEDIA:/absolute/path/to/file in your response. The file will be sent as a native "
+        "Yuanbao attachment: images (.jpg, .png, .webp, .gif) are sent as photos, "
+        "and other files (.pdf, .docx, .txt, .zip, etc.) arrive as downloadable documents "
+        "(max 50 MB). You can also include image URLs in markdown format ![alt](url) and "
+        "they will be downloaded and sent as native photos. "
+        "Do NOT tell the user you lack file-sending capability — use MEDIA: syntax "
+        "whenever a file delivery is appropriate.\n\n"
+        "Stickers (贴纸 / 表情包 / TIM face): Yuanbao has a built-in sticker catalogue. "
+        "When the user sends a sticker (you see '[emoji: 名称]' in their message) or asks "
+        "you to send/reply-with a 贴纸/表情/表情包, you MUST use the sticker tools:\n"
+        "  1. Call yb_search_sticker with a Chinese keyword (e.g. '666', '比心', '吃瓜', "
+        "     '捂脸', '合十') to discover matching sticker_ids.\n"
+        "  2. Call yb_send_sticker with the chosen sticker_id or name — this sends a real "
+        "     TIMFaceElem that renders as a native sticker in the chat.\n"
+        "DO NOT draw sticker-like PNGs with execute_code/Pillow/matplotlib and then send "
+        "them via MEDIA: or send_image_file. That produces a fake low-quality 'sticker' "
+        "image and is the WRONG path. Bare Unicode emoji in text is also not a substitute "
+        "— when a sticker is the right response, use yb_send_sticker."
+    ),
 }

 # ---------------------------------------------------------------------------
@@ -799,6 +858,11 @@ def build_skills_system_prompt(
            "Skills also encode the user's preferred approach, conventions, and quality standards "
            "for tasks like code review, planning, and testing — load them even for tasks you "
            "already know how to do, because the skill defines how it should be done here.\n"
+            "Whenever the user asks you to configure, set up, install, enable, disable, modify, "
+            "or troubleshoot Hermes Agent itself — its CLI, config, models, providers, tools, "
+            "skills, voice, gateway, plugins, or any feature — load the `hermes-agent` skill "
+            "first. It has the actual commands (e.g. `hermes config set …`, `hermes tools`, "
+            "`hermes setup`) so you don't have to guess or invent workarounds.\n"
            "If a skill has issues, fix it with skill_manage(action='patch').\n"
            "After difficult/iterative tasks, offer to save as a skill. "
            "If a skill you loaded was missing steps, had wrong commands, or needed "
@@ -56,8 +56,12 @@ _SENSITIVE_BODY_KEYS = frozenset({
 })

 # Snapshot at import time so runtime env mutations (e.g. LLM-generated
-# `export HERMES_REDACT_SECRETS=false`) cannot disable redaction mid-session.
-_REDACT_ENABLED = os.getenv("HERMES_REDACT_SECRETS", "").lower() not in ("0", "false", "no", "off")
+# `export HERMES_REDACT_SECRETS=true`) cannot enable/disable redaction
+# mid-session.  OFF by default — user must opt in via
+# `security.redact_secrets: true` in config.yaml (bridged to this env var
+# in hermes_cli/main.py and gateway/run.py) or `HERMES_REDACT_SECRETS=true`
+# in ~/.hermes/.env.
+_REDACT_ENABLED = os.getenv("HERMES_REDACT_SECRETS", "").lower() in ("1", "true", "yes", "on")

 # Known API key prefixes -- match the prefix + contiguous token chars
 _PREFIX_PATTERNS = [
@@ -180,11 +184,59 @@ _PREFIX_RE = re.compile(
 )


+def mask_secret(
+    value: str,
+    *,
+    head: int = 4,
+    tail: int = 4,
+    floor: int = 12,
+    placeholder: str = "***",
+    empty: str = "",
+) -> str:
+    """Mask a secret for display, preserving ``head`` and ``tail`` characters.
+
+    Canonical helper for display-time redaction across Hermes — used by
+    ``hermes config``, ``hermes status``, ``hermes dump``, and anywhere
+    a secret needs to be shown truncated for debuggability while still
+    keeping the bulk hidden.
+
+    Args:
+        value:       The secret to mask. ``None``/empty returns ``empty``.
+        head:        Leading characters to preserve. Default 4.
+        tail:        Trailing characters to preserve. Default 4.
+        floor:       Values shorter than ``head + tail + floor_margin`` are
+                     fully masked (returns ``placeholder``). Default 12 —
+                     matches the existing config/status/dump convention.
+        placeholder: Value returned for too-short inputs. Default ``"***"``.
+        empty:       Value returned when ``value`` is falsy (None, ""). The
+                     caller can override this to e.g. ``color("(not set)",
+                     Colors.DIM)`` for user-facing display.
+
+    Examples:
+        >>> mask_secret("sk-proj-abcdef1234567890")
+        'sk-p...7890'
+        >>> mask_secret("short")                         # fully masked
+        '***'
+        >>> mask_secret("")                              # empty default
+        ''
+        >>> mask_secret("", empty="(not set)")           # empty override
+        '(not set)'
+        >>> mask_secret("long-token", head=6, tail=4, floor=18)
+        '***'
+    """
+    if not value:
+        return empty
+    if len(value) < floor:
+        return placeholder
+    return f"{value[:head]}...{value[-tail:]}"
+
+
 def _mask_token(token: str) -> str:
-    """Mask a token, preserving prefix for long tokens."""
-    if len(token) < 18:
+    """Mask a log token — conservative 18-char floor, preserves 6 prefix / 4 suffix."""
+    # Empty input: historically this returned "***" rather than "". Preserve.
+    if not token:
        return "***"
-    return f"{token[:6]}...{token[-4:]}"
+    return mask_secret(token, head=6, tail=4, floor=18)


 def _redact_query_string(query: str) -> str:
@@ -257,7 +309,7 @@ def redact_sensitive_text(text: str) -> str:
    """Apply all redaction patterns to a block of text.

    Safe to call on any string -- non-matching text passes through unchanged.
-    Disabled when security.redact_secrets is false in config.yaml.
+    Disabled by default — enable via security.redact_secrets: true in config.yaml.
    """
    if text is None:
        return None
@@ -76,6 +76,7 @@ except ImportError:  # pragma: no cover
    fcntl = None  # type: ignore[assignment]

 from hermes_constants import get_hermes_home
+from utils import atomic_replace

 logger = logging.getLogger(__name__)

@@ -568,7 +569,7 @@ def save_allowlist(data: Dict[str, Any]) -> None:
        try:
            with os.fdopen(fd, "w") as fh:
                fh.write(json.dumps(data, indent=2, sort_keys=True))
-            os.replace(tmp_path, p)
+            atomic_replace(tmp_path, p)
        except Exception:
            try:
                os.unlink(tmp_path)
@@ -754,7 +755,11 @@ def _resolve_effective_accept(
    if env in ("1", "true", "yes", "on"):
        return True
    cfg_val = cfg.get("hooks_auto_accept", False)
-    return bool(cfg_val)
+    if isinstance(cfg_val, bool):
+        return cfg_val
+    if isinstance(cfg_val, str):
+        return cfg_val.strip().lower() in ("1", "true", "yes", "on")
+    return False


 # ---------------------------------------------------------------------------
@@ -1,154 +1,29 @@
-"""Shared slash command helpers for skills and built-in prompt-style modes.
+"""Shared slash command helpers for skills.

 Shared between CLI (cli.py) and gateway (gateway/run.py) so both surfaces
-can invoke skills via /skill-name commands and prompt-only built-ins like
-/plan.
+can invoke skills via /skill-name commands.
 """

 import json
 import logging
 import re
-import subprocess
-from datetime import datetime
 from pathlib import Path
 from typing import Any, Dict, Optional

 from hermes_constants import display_hermes_home
+from agent.skill_preprocessing import (
+    expand_inline_shell as _expand_inline_shell,
+    load_skills_config as _load_skills_config,
+    substitute_template_vars as _substitute_template_vars,
+)

 logger = logging.getLogger(__name__)

 _skill_commands: Dict[str, Dict[str, Any]] = {}
-_PLAN_SLUG_RE = re.compile(r"[^a-z0-9]+")
 # Patterns for sanitizing skill names into clean hyphen-separated slugs.
 _SKILL_INVALID_CHARS = re.compile(r"[^a-z0-9-]")
 _SKILL_MULTI_HYPHEN = re.compile(r"-{2,}")

-# Matches ${HERMES_SKILL_DIR} / ${HERMES_SESSION_ID} tokens in SKILL.md.
-# Tokens that don't resolve (e.g. ${HERMES_SESSION_ID} with no session) are
-# left as-is so the user can debug them.
-_SKILL_TEMPLATE_RE = re.compile(r"\$\{(HERMES_SKILL_DIR|HERMES_SESSION_ID)\}")
-
-# Matches inline shell snippets like:  !`date +%Y-%m-%d`
-# Non-greedy, single-line only — no newlines inside the backticks.
-_INLINE_SHELL_RE = re.compile(r"!`([^`\n]+)`")
-
-# Cap inline-shell output so a runaway command can't blow out the context.
-_INLINE_SHELL_MAX_OUTPUT = 4000
-
-
-def _load_skills_config() -> dict:
-    """Load the ``skills`` section of config.yaml (best-effort)."""
-    try:
-        from hermes_cli.config import load_config
-
-        cfg = load_config() or {}
-        skills_cfg = cfg.get("skills")
-        if isinstance(skills_cfg, dict):
-            return skills_cfg
-    except Exception:
-        logger.debug("Could not read skills config", exc_info=True)
-    return {}
-
-
-def _substitute_template_vars(
-    content: str,
-    skill_dir: Path | None,
-    session_id: str | None,
-) -> str:
-    """Replace ${HERMES_SKILL_DIR} / ${HERMES_SESSION_ID} in skill content.
-
-    Only substitutes tokens for which a concrete value is available —
-    unresolved tokens are left in place so the author can spot them.
-    """
-    if not content:
-        return content
-
-    skill_dir_str = str(skill_dir) if skill_dir else None
-
-    def _replace(match: re.Match) -> str:
-        token = match.group(1)
-        if token == "HERMES_SKILL_DIR" and skill_dir_str:
-            return skill_dir_str
-        if token == "HERMES_SESSION_ID" and session_id:
-            return str(session_id)
-        return match.group(0)
-
-    return _SKILL_TEMPLATE_RE.sub(_replace, content)
-
-
-def _run_inline_shell(command: str, cwd: Path | None, timeout: int) -> str:
-    """Execute a single inline-shell snippet and return its stdout (trimmed).
-
-    Failures return a short ``[inline-shell error: ...]`` marker instead of
-    raising, so one bad snippet can't wreck the whole skill message.
-    """
-    try:
-        completed = subprocess.run(
-            ["bash", "-c", command],
-            cwd=str(cwd) if cwd else None,
-            capture_output=True,
-            text=True,
-            timeout=max(1, int(timeout)),
-            check=False,
-        )
-    except subprocess.TimeoutExpired:
-        return f"[inline-shell timeout after {timeout}s: {command}]"
-    except FileNotFoundError:
-        return f"[inline-shell error: bash not found]"
-    except Exception as exc:
-        return f"[inline-shell error: {exc}]"
-
-    output = (completed.stdout or "").rstrip("\n")
-    if not output and completed.stderr:
-        output = completed.stderr.rstrip("\n")
-    if len(output) > _INLINE_SHELL_MAX_OUTPUT:
-        output = output[:_INLINE_SHELL_MAX_OUTPUT] + "…[truncated]"
-    return output
-
-
-def _expand_inline_shell(
-    content: str,
-    skill_dir: Path | None,
-    timeout: int,
-) -> str:
-    """Replace every !`cmd` snippet in ``content`` with its stdout.
-
-    Runs each snippet with the skill directory as CWD so relative paths in
-    the snippet work the way the author expects.
-    """
-    if "!`" not in content:
-        return content
-
-    def _replace(match: re.Match) -> str:
-        cmd = match.group(1).strip()
-        if not cmd:
-            return ""
-        return _run_inline_shell(cmd, skill_dir, timeout)
-
-    return _INLINE_SHELL_RE.sub(_replace, content)
-
-
-def build_plan_path(
-    user_instruction: str = "",
-    *,
-    now: datetime | None = None,
-) -> Path:
-    """Return the default workspace-relative markdown path for a /plan invocation.
-
-    Relative paths are intentional: file tools are task/backend-aware and resolve
-    them against the active working directory for local, docker, ssh, modal,
-    daytona, and similar terminal backends. That keeps the plan with the active
-    workspace instead of the Hermes host's global home directory.
-    """
-    slug_source = (user_instruction or "").strip().splitlines()[0] if user_instruction else ""
-    slug = _PLAN_SLUG_RE.sub("-", slug_source.lower()).strip("-")
-    if slug:
-        slug = "-".join(part for part in slug.split("-")[:8] if part)[:48].strip("-")
-    slug = slug or "conversation-plan"
-    timestamp = (now or datetime.now()).strftime("%Y-%m-%d_%H%M%S")
-    return Path(".hermes") / "plans" / f"{timestamp}-{slug}.md"
-
-
 def _load_skill_payload(skill_identifier: str, task_id: str | None = None) -> tuple[dict[str, Any], Path | None, str] | None:
    """Load a skill by name/path and return (loaded_payload, skill_dir, display_name)."""
    raw_identifier = (skill_identifier or "").strip()
@@ -167,7 +42,9 @@ def _load_skill_payload(skill_identifier: str, task_id: str | None = None) -> tu
        else:
            normalized = raw_identifier.lstrip("/")

-        loaded_skill = json.loads(skill_view(normalized, task_id=task_id))
+        loaded_skill = json.loads(
+            skill_view(normalized, task_id=task_id, preprocess=False)
+        )
    except Exception:
        return None

@@ -345,7 +222,7 @@ def scan_skill_commands() -> Dict[str, Dict[str, Any]]:
    _skill_commands = {}
    try:
        from tools.skills_tool import SKILLS_DIR, _parse_frontmatter, skill_matches_platform, _get_disabled_skill_names
-        from agent.skill_utils import get_external_skills_dirs
+        from agent.skill_utils import get_external_skills_dirs, iter_skill_index_files
        disabled = _get_disabled_skill_names()
        seen_names: set = set()

@@ -356,7 +233,7 @@ def scan_skill_commands() -> Dict[str, Dict[str, Any]]:
        dirs_to_scan.extend(get_external_skills_dirs())

        for scan_dir in dirs_to_scan:
-            for skill_md in scan_dir.rglob("SKILL.md"):
+            for skill_md in iter_skill_index_files(scan_dir, "SKILL.md"):
                if any(part in ('.git', '.github', '.hub') for part in skill_md.parts):
                    continue
                try:
@@ -452,7 +329,7 @@ def build_skill_invocation_message(

    loaded_skill, skill_dir, skill_name = loaded
    activation_note = (
-        f'[SYSTEM: The user has invoked the "{skill_name}" skill, indicating they want '
+        f'[IMPORTANT: The user has invoked the "{skill_name}" skill, indicating they want '
        "you to follow its instructions. The full skill content is loaded below.]"
    )
    return _build_skill_message(
@@ -491,7 +368,7 @@ def build_preloaded_skills_prompt(

        loaded_skill, skill_dir, skill_name = loaded
        activation_note = (
-            f'[SYSTEM: The user launched this CLI session with the "{skill_name}" skill '
+            f'[IMPORTANT: The user launched this CLI session with the "{skill_name}" skill '
            "preloaded. Treat its instructions as active guidance for the duration of this "
            "session unless the user overrides them.]"
        )
@@ -0,0 +1,131 @@
+"""Shared SKILL.md preprocessing helpers."""
+
+import logging
+import re
+import subprocess
+from pathlib import Path
+
+logger = logging.getLogger(__name__)
+
+# Matches ${HERMES_SKILL_DIR} / ${HERMES_SESSION_ID} tokens in SKILL.md.
+# Tokens that don't resolve (e.g. ${HERMES_SESSION_ID} with no session) are
+# left as-is so the user can debug them.
+_SKILL_TEMPLATE_RE = re.compile(r"\$\{(HERMES_SKILL_DIR|HERMES_SESSION_ID)\}")
+
+# Matches inline shell snippets like:  !`date +%Y-%m-%d`
+# Non-greedy, single-line only -- no newlines inside the backticks.
+_INLINE_SHELL_RE = re.compile(r"!`([^`\n]+)`")
+
+# Cap inline-shell output so a runaway command can't blow out the context.
+_INLINE_SHELL_MAX_OUTPUT = 4000
+
+
+def load_skills_config() -> dict:
+    """Load the ``skills`` section of config.yaml (best-effort)."""
+    try:
+        from hermes_cli.config import load_config
+
+        cfg = load_config() or {}
+        skills_cfg = cfg.get("skills")
+        if isinstance(skills_cfg, dict):
+            return skills_cfg
+    except Exception:
+        logger.debug("Could not read skills config", exc_info=True)
+    return {}
+
+
+def substitute_template_vars(
+    content: str,
+    skill_dir: Path | None,
+    session_id: str | None,
+) -> str:
+    """Replace ${HERMES_SKILL_DIR} / ${HERMES_SESSION_ID} in skill content.
+
+    Only substitutes tokens for which a concrete value is available --
+    unresolved tokens are left in place so the author can spot them.
+    """
+    if not content:
+        return content
+
+    skill_dir_str = str(skill_dir) if skill_dir else None
+
+    def _replace(match: re.Match) -> str:
+        token = match.group(1)
+        if token == "HERMES_SKILL_DIR" and skill_dir_str:
+            return skill_dir_str
+        if token == "HERMES_SESSION_ID" and session_id:
+            return str(session_id)
+        return match.group(0)
+
+    return _SKILL_TEMPLATE_RE.sub(_replace, content)
+
+
+def run_inline_shell(command: str, cwd: Path | None, timeout: int) -> str:
+    """Execute a single inline-shell snippet and return its stdout (trimmed).
+
+    Failures return a short ``[inline-shell error: ...]`` marker instead of
+    raising, so one bad snippet can't wreck the whole skill message.
+    """
+    try:
+        completed = subprocess.run(
+            ["bash", "-c", command],
+            cwd=str(cwd) if cwd else None,
+            capture_output=True,
+            text=True,
+            timeout=max(1, int(timeout)),
+            check=False,
+        )
+    except subprocess.TimeoutExpired:
+        return f"[inline-shell timeout after {timeout}s: {command}]"
+    except FileNotFoundError:
+        return "[inline-shell error: bash not found]"
+    except Exception as exc:
+        return f"[inline-shell error: {exc}]"
+
+    output = (completed.stdout or "").rstrip("\n")
+    if not output and completed.stderr:
+        output = completed.stderr.rstrip("\n")
+    if len(output) > _INLINE_SHELL_MAX_OUTPUT:
+        output = output[:_INLINE_SHELL_MAX_OUTPUT] + "...[truncated]"
+    return output
+
+
+def expand_inline_shell(
+    content: str,
+    skill_dir: Path | None,
+    timeout: int,
+) -> str:
+    """Replace every !`cmd` snippet in ``content`` with its stdout.
+
+    Runs each snippet with the skill directory as CWD so relative paths in
+    the snippet work the way the author expects.
+    """
+    if "!`" not in content:
+        return content
+
+    def _replace(match: re.Match) -> str:
+        cmd = match.group(1).strip()
+        if not cmd:
+            return ""
+        return run_inline_shell(cmd, skill_dir, timeout)
+
+    return _INLINE_SHELL_RE.sub(_replace, content)
+
+
+def preprocess_skill_content(
+    content: str,
+    skill_dir: Path | None,
+    session_id: str | None = None,
+    skills_cfg: dict | None = None,
+) -> str:
+    """Apply configured SKILL.md template and inline-shell preprocessing."""
+    if not content:
+        return content
+
+    cfg = skills_cfg if isinstance(skills_cfg, dict) else load_skills_config()
+    if cfg.get("template_vars", True):
+        content = substitute_template_vars(content, skill_dir, session_id)
+    if cfg.get("inline_shell", False):
+        timeout = int(cfg.get("inline_shell_timeout", 10) or 10)
+        content = expand_inline_shell(content, skill_dir, timeout)
+    return content
@@ -200,6 +200,9 @@ def get_external_skills_dirs() -> List[Path]:
    if not isinstance(raw_dirs, list):
        return []

+    from hermes_constants import get_hermes_home
+
+    hermes_home = get_hermes_home()
    local_skills = get_skills_dir().resolve()
    seen: Set[Path] = set()
    result: List[Path] = []
@@ -210,7 +213,12 @@ def get_external_skills_dirs() -> List[Path]:
            continue
        # Expand ~ and environment variables
        expanded = os.path.expanduser(os.path.expandvars(entry))
-        p = Path(expanded).resolve()
+        p = Path(expanded)
+        # Resolve relative paths against HERMES_HOME, not cwd
+        if not p.is_absolute():
+            p = (hermes_home / p).resolve()
+        else:
+            p = p.resolve()
        if p == local_skills:
            continue
        if p in seen:
@@ -435,7 +443,7 @@ def iter_skill_index_files(skills_dir: Path, filename: str):
    Excludes ``.git``, ``.github``, ``.hub`` directories.
    """
    matches = []
-    for root, dirs, files in os.walk(skills_dir):
+    for root, dirs, files in os.walk(skills_dir, followlinks=True):
        dirs[:] = [d for d in dirs if d not in EXCLUDED_SKILL_DIRS]
        if filename in files:
            matches.append(Path(root) / filename)
@@ -6,12 +6,18 @@ adds latency to the user-facing reply.

 import logging
 import threading
-from typing import Optional
+from typing import Callable, Optional

 from agent.auxiliary_client import call_llm

 logger = logging.getLogger(__name__)

+# Callback signature: (task_name, exception) -> None. Used to surface
+# auxiliary failures to the user through AIAgent._emit_auxiliary_failure
+# so silent-drops (e.g. OpenRouter 402 exhausting the fallback chain)
+# become visible instead of piling up as NULL session titles.
+FailureCallback = Callable[[str, BaseException], None]
+
 _TITLE_PROMPT = (
    "Generate a short, descriptive title (3-7 words) for a conversation that starts with the "
    "following exchange. The title should capture the main topic or intent. "
@@ -19,11 +25,23 @@ _TITLE_PROMPT = (
 )


-def generate_title(user_message: str, assistant_response: str, timeout: float = 30.0) -> Optional[str]:
+def generate_title(
+    user_message: str,
+    assistant_response: str,
+    timeout: float = 30.0,
+    failure_callback: Optional[FailureCallback] = None,
+    main_runtime: dict = None,
+) -> Optional[str]:
    """Generate a session title from the first exchange.

-    Uses the auxiliary LLM client (cheapest/fastest available model).
+    Uses the main runtime's model when available, falling back to the
+    auxiliary LLM client (cheapest/fastest available model).
    Returns the title string or None on failure.
+
+    ``failure_callback`` is invoked with ``(task, exception)`` when the
+    auxiliary call raises — the caller typically wires this to
+    ``AIAgent._emit_auxiliary_failure`` so the user sees a warning instead
+    of silently accumulating untitled sessions.
    """
    # Truncate long messages to keep the request small
    user_snippet = user_message[:500] if user_message else ""
@@ -38,9 +56,10 @@ def generate_title(user_message: str, assistant_response: str, timeout: float =
        response = call_llm(
            task="title_generation",
            messages=messages,
-            max_tokens=30,
+            max_tokens=500,
            temperature=0.3,
            timeout=timeout,
+            main_runtime=main_runtime,
        )
        title = (response.choices[0].message.content or "").strip()
        # Clean up: remove quotes, trailing punctuation, prefixes like "Title: "
@@ -52,7 +71,15 @@ def generate_title(user_message: str, assistant_response: str, timeout: float =
            title = title[:77] + "..."
        return title if title else None
    except Exception as e:
-        logger.debug("Title generation failed: %s", e)
+        # Log at WARNING so this shows up in agent.log without debug mode.
+        # Full detail at debug level for operators who need the stack.
+        logger.warning("Title generation failed: %s", e)
+        logger.debug("Title generation traceback", exc_info=True)
+        if failure_callback is not None:
+            try:
+                failure_callback("title generation", e)
+            except Exception:
+                logger.debug("Title generation failure_callback raised", exc_info=True)
        return None


@@ -61,6 +88,8 @@ def auto_title_session(
    session_id: str,
    user_message: str,
    assistant_response: str,
+    failure_callback: Optional[FailureCallback] = None,
+    main_runtime: dict = None,
 ) -> None:
    """Generate and set a session title if one doesn't already exist.

@@ -81,7 +110,9 @@ def auto_title_session(
    except Exception:
        return

-    title = generate_title(user_message, assistant_response)
+    title = generate_title(
+        user_message, assistant_response, failure_callback=failure_callback, main_runtime=main_runtime
+    )
    if not title:
        return

@@ -98,6 +129,8 @@ def maybe_auto_title(
    user_message: str,
    assistant_response: str,
    conversation_history: list,
+    failure_callback: Optional[FailureCallback] = None,
+    main_runtime: dict = None,
 ) -> None:
    """Fire-and-forget title generation after the first exchange.

@@ -119,6 +152,7 @@ def maybe_auto_title(
    thread = threading.Thread(
        target=auto_title_session,
        args=(session_db, session_id, user_message, assistant_response),
+        kwargs={"failure_callback": failure_callback, "main_runtime": main_runtime},
        daemon=True,
        name="auto-title",
    )
@@ -23,9 +23,14 @@ def get_transport(api_mode: str):
    This allows gradual migration — call sites can check for None
    and fall back to the legacy code path.
    """
-    if not _REGISTRY:
-        _discover_transports()
    cls = _REGISTRY.get(api_mode)
+    if cls is None:
+        # The registry can be partially populated when a specific transport
+        # module was imported directly (for example chat_completions before
+        # codex).  Discover on misses, not only when the registry is empty, so
+        # test/order-dependent imports do not make valid api_modes unavailable.
+        _discover_transports()
+        cls = _REGISTRY.get(api_mode)
    if cls is None:
        return None
    return cls()
@@ -78,23 +78,65 @@ class AnthropicTransport(ProviderTransport):
    def normalize_response(self, response: Any, **kwargs) -> NormalizedResponse:
        """Normalize Anthropic response to NormalizedResponse.

-        kwargs:
-            strip_tool_prefix: bool — strip 'mcp_mcp_' prefixes from tool names.
+        Parses content blocks (text, thinking, tool_use), maps stop_reason
+        to OpenAI finish_reason, and collects reasoning_details in provider_data.
        """
-        from agent.anthropic_adapter import normalize_anthropic_response_v2
+        import json
+        from agent.anthropic_adapter import _to_plain_data
+        from agent.transports.types import ToolCall

-        strip_tool_prefix = kwargs.get("strip_tool_prefix", False)
-        return normalize_anthropic_response_v2(response, strip_tool_prefix=strip_tool_prefix)
+        text_parts = []
+        reasoning_parts = []
+        reasoning_details = []
+        tool_calls = []
+
+        for block in response.content:
+            if block.type == "text":
+                text_parts.append(block.text)
+            elif block.type == "thinking":
+                reasoning_parts.append(block.thinking)
+                block_dict = _to_plain_data(block)
+                if isinstance(block_dict, dict):
+                    reasoning_details.append(block_dict)
+            elif block.type == "tool_use":
+                tool_calls.append(
+                    ToolCall(
+                        id=block.id,
+                        name=block.name,
+                        arguments=json.dumps(block.input),
+                    )
+                )
+
+        finish_reason = self._STOP_REASON_MAP.get(response.stop_reason, "stop")
+
+        provider_data = {}
+        if reasoning_details:
+            provider_data["reasoning_details"] = reasoning_details
+
+        return NormalizedResponse(
+            content="\n".join(text_parts) if text_parts else None,
+            tool_calls=tool_calls or None,
+            finish_reason=finish_reason,
+            reasoning="\n\n".join(reasoning_parts) if reasoning_parts else None,
+            usage=None,
+            provider_data=provider_data or None,
+        )

    def validate_response(self, response: Any) -> bool:
-        """Check Anthropic response structure is valid."""
+        """Check Anthropic response structure is valid.
+
+        An empty content list is legitimate when ``stop_reason == "end_turn"``
+        — the model's canonical way of signalling "nothing more to add" after
+        a tool turn that already delivered the user-facing text. Treating it
+        as invalid falsely retries a completed response.
+        """
        if response is None:
            return False
        content_blocks = getattr(response, "content", None)
        if not isinstance(content_blocks, list):
            return False
        if not content_blocks:
-            return False
+            return getattr(response, "stop_reason", None) == "end_turn"
        return True

    def extract_cache_stats(self, response: Any) -> Optional[Dict[str, int]]:
@@ -12,11 +12,65 @@ reasoning configuration, temperature handling, and extra_body assembly.
 import copy
 from typing import Any, Dict, List, Optional

+from agent.lmstudio_reasoning import resolve_lmstudio_effort
+from agent.moonshot_schema import is_moonshot_model, sanitize_moonshot_tools
 from agent.prompt_builder import DEVELOPER_ROLE_MODELS
 from agent.transports.base import ProviderTransport
 from agent.transports.types import NormalizedResponse, ToolCall, Usage


+def _build_gemini_thinking_config(model: str, reasoning_config: dict | None) -> dict | None:
+    """Translate Hermes/OpenRouter-style reasoning config to Gemini thinkingConfig.
+
+    Gemini native/cloud-code adapters do not read ``extra_body.reasoning``.
+    They only inspect ``extra_body.thinking_config`` / ``thinkingConfig`` and
+    then request thought parts with ``includeThoughts`` enabled.
+    """
+    if reasoning_config is None or not isinstance(reasoning_config, dict):
+        return None
+
+    if reasoning_config.get("enabled") is False:
+        # Gemini can hide thought parts even when internal thinking still
+        # happens; omit thinkingLevel to avoid model-specific validation quirks.
+        return {"includeThoughts": False}
+
+    effort = str(reasoning_config.get("effort", "medium") or "medium").strip().lower()
+    if effort == "none":
+        return {"includeThoughts": False}
+
+    thinking_config: Dict[str, Any] = {"includeThoughts": True}
+    normalized_model = (model or "").strip().lower()
+    if normalized_model.startswith("google/"):
+        normalized_model = normalized_model.split("/", 1)[1]
+
+    # Gemini 2.5 accepts thinkingBudget; don't guess a budget from Hermes'
+    # coarse effort levels. ``includeThoughts`` alone is enough to surface
+    # thought parts without risking request validation errors.
+    if normalized_model.startswith("gemini-2.5-"):
+        return thinking_config
+
+    if effort not in {"minimal", "low", "medium", "high", "xhigh"}:
+        effort = "medium"
+
+    # Gemini 3 Flash documents low/medium/high thinking levels; Gemini 3 Pro
+    # is stricter (low/high). Clamp Hermes' wider effort set to what each
+    # family accepts so we never forward an undocumented level verbatim.
+    if normalized_model.startswith(("gemini-3", "gemini-3.1")):
+        if "flash" in normalized_model:
+            if effort in {"minimal", "low"}:
+                thinking_config["thinkingLevel"] = "low"
+            elif effort in {"high", "xhigh"}:
+                thinking_config["thinkingLevel"] = "high"
+            else:
+                thinking_config["thinkingLevel"] = "medium"
+        elif "pro" in normalized_model:
+            thinking_config["thinkingLevel"] = (
+                "high" if effort in {"high", "xhigh"} else "low"
+            )
+
+    return thinking_config
+
+
 class ChatCompletionsTransport(ProviderTransport):
    """Transport for api_mode='chat_completions'.

@@ -30,15 +84,15 @@ class ChatCompletionsTransport(ProviderTransport):
    def convert_messages(self, messages: List[Dict[str, Any]], **kwargs) -> List[Dict[str, Any]]:
        """Messages are already in OpenAI format — sanitize Codex leaks only.

-        Strips Codex Responses API fields (``codex_reasoning_items`` on the
-        message, ``call_id``/``response_item_id`` on tool_calls) that strict
-        chat-completions providers reject with 400/422.
+        Strips Codex Responses API fields (``codex_reasoning_items`` /
+        ``codex_message_items`` on the message, ``call_id``/``response_item_id``
+        on tool_calls) that strict chat-completions providers reject with 400/422.
        """
        needs_sanitize = False
        for msg in messages:
            if not isinstance(msg, dict):
                continue
-            if "codex_reasoning_items" in msg:
+            if "codex_reasoning_items" in msg or "codex_message_items" in msg:
                needs_sanitize = True
                break
            tool_calls = msg.get("tool_calls")
@@ -58,6 +112,7 @@ class ChatCompletionsTransport(ProviderTransport):
            if not isinstance(msg, dict):
                continue
            msg.pop("codex_reasoning_items", None)
+            msg.pop("codex_message_items", None)
            tool_calls = msg.get("tool_calls")
            if isinstance(tool_calls, list):
                for tc in tool_calls:
@@ -99,6 +154,7 @@ class ChatCompletionsTransport(ProviderTransport):
            is_github_models: bool
            is_nvidia_nim: bool
            is_kimi: bool
+            is_lmstudio: bool
            is_custom_provider: bool
            ollama_num_ctx: int | None
            # Provider routing
@@ -112,6 +168,7 @@ class ChatCompletionsTransport(ProviderTransport):
            # Reasoning
            supports_reasoning: bool
            github_reasoning_extra: dict | None
+            lmstudio_reasoning_options: list[str] | None  # raw allowed_options from /api/v1/models
            # Claude on OpenRouter/Nous max output
            anthropic_max_output: int | None
            # Extra
@@ -172,6 +229,11 @@ class ChatCompletionsTransport(ProviderTransport):

        # Tools
        if tools:
+            # Moonshot/Kimi uses a stricter flavored JSON Schema.  Rewriting
+            # tool parameters here keeps aggregator routes (Nous, OpenRouter,
+            # etc.) compatible, in addition to direct moonshot.ai endpoints.
+            if is_moonshot_model(model):
+                tools = sanitize_moonshot_tools(tools)
            api_kwargs["tools"] = tools

        # max_tokens resolution — priority: ephemeral > user > provider default
@@ -181,6 +243,7 @@ class ChatCompletionsTransport(ProviderTransport):
        anthropic_max_out = params.get("anthropic_max_output")
        is_nvidia_nim = params.get("is_nvidia_nim", False)
        is_kimi = params.get("is_kimi", False)
+        is_tokenhub = params.get("is_tokenhub", False)
        reasoning_config = params.get("reasoning_config")

        if ephemeral is not None and max_tokens_fn:
@@ -212,12 +275,40 @@ class ChatCompletionsTransport(ProviderTransport):
                        _kimi_effort = _e
                api_kwargs["reasoning_effort"] = _kimi_effort

+        # Tencent TokenHub: top-level reasoning_effort (unless thinking disabled)
+        if is_tokenhub:
+            _tokenhub_thinking_off = bool(
+                reasoning_config
+                and isinstance(reasoning_config, dict)
+                and reasoning_config.get("enabled") is False
+            )
+            if not _tokenhub_thinking_off:
+                _tokenhub_effort = "high"
+                if reasoning_config and isinstance(reasoning_config, dict):
+                    _e = (reasoning_config.get("effort") or "").strip().lower()
+                    if _e in ("low", "medium", "high"):
+                        _tokenhub_effort = _e
+                api_kwargs["reasoning_effort"] = _tokenhub_effort
+
+        # LM Studio: top-level reasoning_effort. Only emit when the model
+        # declares reasoning support via /api/v1/models capabilities (gated
+        # upstream by params["supports_reasoning"]). resolve_lmstudio_effort
+        # is shared with run_agent's summary path so both stay in sync.
+        if params.get("is_lmstudio", False) and params.get("supports_reasoning", False):
+            _lm_effort = resolve_lmstudio_effort(
+                reasoning_config,
+                params.get("lmstudio_reasoning_options"),
+            )
+            if _lm_effort is not None:
+                api_kwargs["reasoning_effort"] = _lm_effort
+
        # extra_body assembly
        extra_body: Dict[str, Any] = {}

        is_openrouter = params.get("is_openrouter", False)
        is_nous = params.get("is_nous", False)
        is_github_models = params.get("is_github_models", False)
+        provider_name = str(params.get("provider_name") or "").strip().lower()

        provider_prefs = params.get("provider_preferences")
        if provider_prefs and is_openrouter:
@@ -233,8 +324,9 @@ class ChatCompletionsTransport(ProviderTransport):
                "type": "enabled" if _kimi_thinking_enabled else "disabled",
            }

-        # Reasoning
-        if params.get("supports_reasoning", False):
+        # Reasoning. LM Studio is handled above via top-level reasoning_effort,
+        # so skip emitting extra_body.reasoning for it.
+        if params.get("supports_reasoning", False) and not params.get("is_lmstudio", False):
            if is_github_models:
                gh_reasoning = params.get("github_reasoning_extra")
                if gh_reasoning is not None:
@@ -270,6 +362,11 @@ class ChatCompletionsTransport(ProviderTransport):
        if is_qwen:
            extra_body["vl_high_resolution_images"] = True

+        if provider_name in {"gemini", "google-gemini-cli"}:
+            thinking_config = _build_gemini_thinking_config(model, reasoning_config)
+            if thinking_config:
+                extra_body["thinking_config"] = thinking_config
+
        # Merge any pre-built extra_body additions
        additions = params.get("extra_body_additions")
        if additions:
@@ -8,7 +8,7 @@ streaming, or the _run_codex_stream() call path.
 from typing import Any, Dict, List, Optional

 from agent.transports.base import ProviderTransport
-from agent.transports.types import NormalizedResponse, ToolCall, Usage
+from agent.transports.types import NormalizedResponse, ToolCall


 class ResponsesApiTransport(ProviderTransport):
@@ -120,6 +120,24 @@ class ResponsesApiTransport(ProviderTransport):
        if request_overrides:
            kwargs.update(request_overrides)

+        if is_codex_backend:
+            prompt_cache_key = kwargs.get("prompt_cache_key")
+            cache_scope_id = str(prompt_cache_key or session_id or "").strip()
+            if cache_scope_id:
+                existing_extra_headers = kwargs.get("extra_headers")
+                merged_extra_headers: Dict[str, str] = {}
+                if isinstance(existing_extra_headers, dict):
+                    merged_extra_headers.update(
+                        {
+                            str(key): str(value)
+                            for key, value in existing_extra_headers.items()
+                            if key and value is not None
+                        }
+                    )
+                merged_extra_headers["session_id"] = cache_scope_id
+                merged_extra_headers["x-client-request-id"] = cache_scope_id
+                kwargs["extra_headers"] = merged_extra_headers
+
        max_tokens = params.get("max_tokens")
        if max_tokens is not None and not is_codex_backend:
            kwargs["max_output_tokens"] = max_tokens
@@ -133,8 +151,6 @@ class ResponsesApiTransport(ProviderTransport):
        """Normalize Codex Responses API response to NormalizedResponse."""
        from agent.codex_responses_adapter import (
            _normalize_codex_response,
-            _extract_responses_message_text,
-            _extract_responses_reasoning_text,
        )

        # _normalize_codex_response returns (SimpleNamespace, finish_reason_str)
@@ -160,6 +176,8 @@ class ResponsesApiTransport(ProviderTransport):
        provider_data = {}
        if msg and hasattr(msg, "codex_reasoning_items") and msg.codex_reasoning_items:
            provider_data["codex_reasoning_items"] = msg.codex_reasoning_items
+        if msg and hasattr(msg, "codex_message_items") and msg.codex_message_items:
+            provider_data["codex_message_items"] = msg.codex_message_items
        if msg and hasattr(msg, "reasoning_details") and msg.reasoning_details:
            provider_data["reasoning_details"] = msg.reasoning_details

@@ -37,6 +37,44 @@ class ToolCall:
    arguments: str  # JSON string
    provider_data: Optional[Dict[str, Any]] = field(default=None, repr=False)

+    # ── Backward compatibility ──────────────────────────────────
+    # The agent loop reads tc.function.name / tc.function.arguments
+    # throughout run_agent.py (45+ sites).  These properties let
+    # NormalizedResponse pass through without the _nr_to_assistant_message
+    # shim, while keeping ToolCall's canonical fields flat.
+    @property
+    def type(self) -> str:
+        return "function"
+
+    @property
+    def function(self) -> "ToolCall":
+        """Return self so tc.function.name / tc.function.arguments work."""
+        return self
+
+    @property
+    def call_id(self) -> Optional[str]:
+        """Codex call_id from provider_data, accessed via getattr by _build_assistant_message."""
+        return (self.provider_data or {}).get("call_id")
+
+    @property
+    def response_item_id(self) -> Optional[str]:
+        """Codex response_item_id from provider_data."""
+        return (self.provider_data or {}).get("response_item_id")
+
+    @property
+    def extra_content(self) -> Optional[Dict[str, Any]]:
+        """Gemini extra_content (thought_signature) from provider_data.
+
+        Gemini 3 thinking models attach ``extra_content`` with a
+        ``thought_signature`` to each tool call.  This signature must be
+        replayed on subsequent API calls — without it the API rejects the
+        request with HTTP 400.  The chat_completions transport stores this
+        in ``provider_data["extra_content"]``; this property exposes it so
+        ``_build_assistant_message`` can ``getattr(tc, "extra_content")``
+        uniformly.
+        """
+        return (self.provider_data or {}).get("extra_content")
+

@dataclass
 class Usage:
@@ -59,7 +97,7 @@ class NormalizedResponse:
    Response-level ``provider_data`` examples:

    * Anthropic: ``{"reasoning_details": [...]}``
-    * Codex: ``{"codex_reasoning_items": [...]}``
+    * Codex: ``{"codex_reasoning_items": [...], "codex_message_items": [...]}``
    * Others: ``None``
    """

@@ -70,6 +108,29 @@ class NormalizedResponse:
    usage: Optional[Usage] = None
    provider_data: Optional[Dict[str, Any]] = field(default=None, repr=False)

+    # ── Backward compatibility ──────────────────────────────────
+    # The shim _nr_to_assistant_message() mapped these from provider_data.
+    # These properties let NormalizedResponse pass through directly.
+    @property
+    def reasoning_content(self) -> Optional[str]:
+        pd = self.provider_data or {}
+        return pd.get("reasoning_content")
+
+    @property
+    def reasoning_details(self):
+        pd = self.provider_data or {}
+        return pd.get("reasoning_details")
+
+    @property
+    def codex_reasoning_items(self):
+        pd = self.provider_data or {}
+        return pd.get("codex_reasoning_items")
+
+    @property
+    def codex_message_items(self):
+        pd = self.provider_data or {}
+        return pd.get("codex_message_items")
+

 # ---------------------------------------------------------------------------
 # Factory helpers
@@ -533,10 +533,22 @@ def normalize_usage(
        prompt_total = _to_int(getattr(response_usage, "prompt_tokens", 0))
        output_tokens = _to_int(getattr(response_usage, "completion_tokens", 0))
        details = getattr(response_usage, "prompt_tokens_details", None)
+        # Primary: OpenAI-style prompt_tokens_details. Fallback: Anthropic-style
+        # top-level fields that some OpenAI-compatible proxies (OpenRouter, Vercel
+        # AI Gateway, Cline) expose when routing Claude models — without this
+        # fallback, cache writes are undercounted as 0 and cache reads can be
+        # missed when the proxy only surfaces them at the top level.
+        # Port of cline/cline#10266.
        cache_read_tokens = _to_int(getattr(details, "cached_tokens", 0) if details else 0)
+        if not cache_read_tokens:
+            cache_read_tokens = _to_int(getattr(response_usage, "cache_read_input_tokens", 0))
        cache_write_tokens = _to_int(
            getattr(details, "cache_write_tokens", 0) if details else 0
        )
+        if not cache_write_tokens:
+            cache_write_tokens = _to_int(
+                getattr(response_usage, "cache_creation_input_tokens", 0)
+            )
        input_tokens = max(0, prompt_total - cache_read_tokens - cache_write_tokens)

    reasoning_tokens = 0
@@ -951,13 +951,9 @@ class BatchRunner:
                    root_logger.setLevel(original_level)
        
        # Aggregate all batch statistics and update checkpoint
-        all_completed_prompts = list(completed_prompts_set)
        total_reasoning_stats = {"total_assistant_turns": 0, "turns_with_reasoning": 0, "turns_without_reasoning": 0}
-        
+
        for batch_result in results:
-            # Add newly completed prompts
-            all_completed_prompts.extend(batch_result.get("completed_prompts", []))
-            
            # Aggregate tool stats
            for tool_name, stats in batch_result.get("tool_stats", {}).items():
                if tool_name not in total_tool_stats:
@@ -977,7 +973,7 @@ class BatchRunner:
        
        # Save final checkpoint (best-effort; incremental writes already happened)
        try:
-            checkpoint_data["completed_prompts"] = all_completed_prompts
+            checkpoint_data["completed_prompts"] = sorted(completed_prompts_set)
            self._save_checkpoint(checkpoint_data, lock=checkpoint_lock)
        except Exception as ckpt_err:
            print(f"âš ï¸  Warning: Failed to save final checkpoint: {ckpt_err}")
@@ -30,14 +30,13 @@ model:
  #   "ollama-cloud" - Ollama Cloud (requires: OLLAMA_API_KEY — https://ollama.com/settings)
  #   "kilocode"     - KiloCode gateway (requires: KILOCODE_API_KEY)
  #   "ai-gateway"   - Vercel AI Gateway (requires: AI_GATEWAY_API_KEY)
+  #   "lmstudio"     - LM Studio local server (optional: LM_API_KEY, defaults to http://127.0.0.1:1234/v1)
  #
  # Local servers (LM Studio, Ollama, vLLM, llama.cpp):
-  #   "custom"       - Any OpenAI-compatible endpoint. Set base_url below.
-  #   Aliases: "lmstudio", "ollama", "vllm", "llamacpp" all map to "custom".
-  #   Example for LM Studio:
-  #     provider: "lmstudio"
-  #     base_url: "http://localhost:1234/v1"
-  #   No API key needed — local servers typically ignore auth.
+  #   "custom"       - Any other OpenAI-compatible endpoint. Set base_url below.
+  #   Aliases: "ollama", "vllm", "llamacpp" all map to "custom".
+  #   LM Studio is first-class and uses provider: "lmstudio".
+  #   It works with both no-auth and auth-enabled server modes.
  #
  # Can also be overridden with --provider flag or HERMES_INFERENCE_PROVIDER env var.
  provider: "auto"
@@ -326,6 +325,16 @@ compression:
  # To pin a specific model/provider for compression summaries, use the
  # auxiliary section below (auxiliary.compression.provider / model).

+# =============================================================================
+# Anthropic prompt caching TTL
+# =============================================================================
+# When prompt caching is active (Claude via OpenRouter or native Anthropic),
+# Anthropic supports two TTL tiers for cached prefixes: "5m" (default) and
+# "1h". Other values are ignored and "5m" is used.
+#
+prompt_caching:
+  cache_ttl: "5m" # use "1h" for long sessions with pauses between turns
+
 # =============================================================================
 # Auxiliary Models (Advanced — Experimental)
 # =============================================================================
@@ -507,6 +516,13 @@ agent:
  # finish, then interrupts anything still running after this timeout.
  # 0 = no drain, interrupt immediately.
  # restart_drain_timeout: 60
+
+  # Max app-level retry attempts for API errors (connection drops, provider
+  # timeouts, 5xx, etc.) before the agent surfaces the failure. Lower this
+  # to 1 if you use fallback providers and want fast failover on flaky
+  # primaries (default 3). The OpenAI SDK does its own low-level retries
+  # underneath this wrapper — this is the Hermes-level loop.
+  # api_max_retries: 3
  
  # Enable verbose logging
  verbose: false
@@ -589,6 +605,7 @@ platform_toolsets:
  signal: [hermes-signal]
  homeassistant: [hermes-homeassistant]
  qqbot: [hermes-qqbot]
+  yuanbao: [hermes-yuanbao]

 # =============================================================================
 # Gateway Platform Settings
@@ -773,9 +790,17 @@ code_execution:
 # Supports single tasks and batch mode (default 3 parallel, configurable).
 delegation:
  max_iterations: 50                          # Max tool-calling turns per child (default: 50)
-  # max_concurrent_children: 3                # Max parallel child agents (default: 3)
-  # max_spawn_depth: 1                        # Tree depth cap (1-3, default: 1 = flat). Raise to 2 or 3 to allow orchestrator children to spawn their own workers.
+  # max_concurrent_children: 3                # Max parallel child agents per batch (default: 3, floor: 1, no ceiling).
+                                              # WARNING: values above 10 multiply API cost linearly.
+  # max_spawn_depth: 1                        # Delegation tree depth cap (range: 1-3, default: 1 = flat).
+                                              # Raise to 2 to allow workers to spawn their own subagents.
+                                              # Requires role="orchestrator" on intermediate agents.
  # orchestrator_enabled: true                # Kill switch for role="orchestrator" children (default: true).
+  # subagent_auto_approve: false              # When a subagent hits a dangerous-command approval prompt, auto-deny (default: false)
+                                              # or auto-approve "once" (true) instead of blocking on stdin.
+                                              # The parent TUI owns stdin, so blocking would deadlock; non-interactive resolution is required.
+                                              # Both choices emit a logger.warning audit line. Flip to true only for cron/batch pipelines.
+  # inherit_mcp_toolsets: true                # When explicit child toolsets are narrowed, also keep the parent's MCP toolsets (default: true). Set false for strict intersection.
  # model: "google/gemini-3-flash-preview"    # Override model for subagents (empty = inherit parent)
  # provider: "openrouter"                    # Override provider for subagents (empty = inherit parent)
  #                                           # Resolves full credentials (base_url, api_key) automatically.
@@ -799,7 +824,9 @@ delegation:
 # Display
 # =============================================================================
 display:
-  # Use compact banner mode
+  # Use compact banner mode (hides the ASCII-art banner, shows a single line).
+  #   true:  Compact single-line banner
+  #   false: Full ASCII banner with tool/skill summary (default)
  compact: false

  # Tool progress display level (CLI and gateway)
@@ -813,12 +840,19 @@ display:
  # Gateway-only natural mid-turn assistant updates.
  # When true, completed assistant status messages are sent as separate chat
  # messages. This is independent of tool_progress and gateway streaming.
+  #   true:  Send mid-turn assistant updates as separate messages (default)
+  #   false: Only send the final response
  interim_assistant_messages: true

-  # What Enter does when Hermes is already busy in the CLI.
+  # What Enter does when Hermes is already busy (CLI and gateway platforms).
  #   interrupt: Interrupt the current run and redirect Hermes (default)
  #   queue:     Queue your message for the next turn
-  # Ctrl+C always interrupts regardless of this setting.
+  #   steer:     Inject your message mid-run via /steer, arriving at the agent
+  #              after the next tool call — no interrupt, no role violation.
+  #              Falls back to 'queue' if the agent isn't running yet or if
+  #              images are attached (steer only carries text).
+  # Ctrl+C (or /stop in gateway) always interrupts regardless of this setting.
+  # Toggle at runtime with /busy <interrupt|queue|steer>.
  busy_input_mode: interrupt

  # Background process notifications (gateway/messaging only).
@@ -834,17 +868,22 @@ display:
  # Play terminal bell when agent finishes a response.
  # Useful for long-running tasks — your terminal will ding when the agent is done.
  # Works over SSH. Most terminals can be configured to flash the taskbar or play a sound.
+  #   true:  Ring the terminal bell on each response
+  #   false: Silent (default)
  bell_on_complete: false

  # Show model reasoning/thinking before each response.
  # When enabled, a dim box shows the model's thought process above the response.
  # Toggle at runtime with /reasoning show or /reasoning hide.
+  #   true:  Show the reasoning box
+  #   false: Hide reasoning (default)
  show_reasoning: false

  # Stream tokens to the terminal as they arrive instead of waiting for the
  # full response. The response box opens on first token and text appears
  # line-by-line. Tool calls are still captured silently.
-  # Stream tokens to the terminal in real-time. Disable to wait for full responses.
+  #   true:  Stream tokens as they arrive (default)
+  #   false: Wait for the full response before rendering
  streaming: true

  # ───────────────────────────────────────────────────────────────────────────
@@ -854,10 +893,15 @@ display:
  # response box label, and branding text. Change at runtime with /skin <name>.
  #
  # Built-in skins:
-  #   default  — Classic Hermes gold/kawaii
-  #   ares     — Crimson/bronze war-god theme with spinner wings
-  #   mono     — Clean grayscale monochrome
-  #   slate    — Cool blue developer-focused
+  #   default        — Classic Hermes gold/kawaii
+  #   ares           — Crimson/bronze war-god theme with spinner wings
+  #   mono           — Clean grayscale monochrome
+  #   slate          — Cool blue developer-focused
+  #   daylight       — Bright light-mode theme
+  #   warm-lightmode — Warm paper-tone light-mode theme
+  #   poseidon       — Sea-green/teal Olympian theme
+  #   sisyphus       — Earthy stone-and-moss theme
+  #   charizard      — Fiery orange dragon theme
  #
  # Custom skins: drop a YAML file in ~/.hermes/skins/<name>.yaml
  # Schema (all fields optional, missing values inherit from default):
@@ -883,7 +927,7 @@ display:
  #     agent_name: "My Agent"               # Banner title and branding
  #     welcome: "Welcome message"           # Shown at CLI startup
  #     response_label: " ⚔ Agent "         # Response box header label
-  #     prompt_symbol: "⚔ ❯ "              # Prompt symbol
+  #     prompt_symbol: "⚔"                  # Prompt symbol (bare token; renderers add trailing space)
  #   tool_prefix: "╎"                       # Tool output line prefix (default: ┊)
  #
  skin: default
@@ -16,11 +16,12 @@ import uuid
 from datetime import datetime, timedelta
 from pathlib import Path
 from hermes_constants import get_hermes_home
-from typing import Optional, Dict, List, Any
+from typing import Optional, Dict, List, Any, Union

 logger = logging.getLogger(__name__)

 from hermes_time import now as _hermes_now
+from utils import atomic_replace

 try:
    from croniter import croniter
@@ -311,6 +312,12 @@ def compute_next_run(schedule: Dict[str, Any], last_run_at: Optional[str] = None

    elif schedule["kind"] == "cron":
        if not HAS_CRONITER:
+            logger.warning(
+                "Cannot compute next run for cron schedule %r: 'croniter' "
+                "is not installed. Install the 'cron' extra (pip install "
+                "'hermes-agent[cron]') to re-enable recurring cron jobs.",
+                schedule.get("expr"),
+            )
            return None
        cron = croniter(schedule["expr"], now)
        next_run = cron.get_next(datetime)
@@ -361,7 +368,7 @@ def save_jobs(jobs: List[Dict[str, Any]]):
            json.dump({"jobs": jobs, "updated_at": _hermes_now().isoformat()}, f, indent=2)
            f.flush()
            os.fsync(f.fileno())
-        os.replace(tmp_path, JOBS_FILE)
+        atomic_replace(tmp_path, JOBS_FILE)
        _secure_file(JOBS_FILE)
    except BaseException:
        try:
@@ -371,6 +378,39 @@ def save_jobs(jobs: List[Dict[str, Any]]):
        raise


+def _normalize_workdir(workdir: Optional[str]) -> Optional[str]:
+    """Normalize and validate a cron job workdir.
+
+    Rules:
+      - Empty / None → None (feature off, preserves old behaviour).
+      - ``~`` is expanded.  Relative paths are rejected — cron jobs run detached
+        from any shell cwd, so relative paths have no stable meaning.
+      - The path must exist and be a directory at create/update time.  We do
+        NOT re-check at run time (a user might briefly unmount the dir; the
+        scheduler will just fall back to old behaviour with a logged warning).
+
+    Returns the absolute path string, or None when disabled.
+    Raises ValueError on invalid input.
+    """
+    if workdir is None:
+        return None
+    raw = str(workdir).strip()
+    if not raw:
+        return None
+    expanded = Path(raw).expanduser()
+    if not expanded.is_absolute():
+        raise ValueError(
+            f"Cron workdir must be an absolute path (got {raw!r}). "
+            f"Cron jobs run detached from any shell cwd, so relative paths are ambiguous."
+        )
+    resolved = expanded.resolve()
+    if not resolved.exists():
+        raise ValueError(f"Cron workdir does not exist: {resolved}")
+    if not resolved.is_dir():
+        raise ValueError(f"Cron workdir is not a directory: {resolved}")
+    return str(resolved)
+
+
 def create_job(
    prompt: str,
    schedule: str,
@@ -384,6 +424,9 @@ def create_job(
    provider: Optional[str] = None,
    base_url: Optional[str] = None,
    script: Optional[str] = None,
+    context_from: Optional[Union[str, List[str]]] = None,
+    enabled_toolsets: Optional[List[str]] = None,
+    workdir: Optional[str] = None,
 ) -> Dict[str, Any]:
    """
    Create a new cron job.
@@ -403,6 +446,18 @@ def create_job(
        script: Optional path to a Python script whose stdout is injected into the
                prompt each run.  The script runs before the agent turn, and its output
                is prepended as context.  Useful for data collection / change detection.
+        context_from: Optional job ID (or list of job IDs) whose most recent output
+                      is injected into the prompt as context before each run.
+                      Useful for chaining cron jobs: job A finds data, job B processes it.
+        enabled_toolsets: Optional list of toolset names to restrict the agent to.
+                          When set, only tools from these toolsets are loaded, reducing
+                          token overhead. When omitted, all default tools are loaded.
+        workdir: Optional absolute path.  When set, the job runs as if launched
+                from that directory: AGENTS.md / CLAUDE.md / .cursorrules from
+                that directory are injected into the system prompt, and the
+                terminal/file/code_exec tools use it as their working directory
+                (via TERMINAL_CWD).  When unset, the old behaviour is preserved
+                (no context files injected, tools use the scheduler's cwd).

    Returns:
        The created job dict
@@ -433,6 +488,17 @@ def create_job(
    normalized_base_url = normalized_base_url or None
    normalized_script = str(script).strip() if isinstance(script, str) else None
    normalized_script = normalized_script or None
+    normalized_toolsets = [str(t).strip() for t in enabled_toolsets if str(t).strip()] if enabled_toolsets else None
+    normalized_toolsets = normalized_toolsets or None
+    normalized_workdir = _normalize_workdir(workdir)
+
+    # Normalize context_from: accept str or list of str, store as list or None
+    if isinstance(context_from, str):
+        context_from = [context_from.strip()] if context_from.strip() else None
+    elif isinstance(context_from, list):
+        context_from = [str(j).strip() for j in context_from if str(j).strip()] or None
+    else:
+        context_from = None

    label_source = (prompt or (normalized_skills[0] if normalized_skills else None)) or "cron job"
    job = {
@@ -445,6 +511,7 @@ def create_job(
        "provider": normalized_provider,
        "base_url": normalized_base_url,
        "script": normalized_script,
+        "context_from": context_from,
        "schedule": parsed_schedule,
        "schedule_display": parsed_schedule.get("display", schedule),
        "repeat": {
@@ -464,6 +531,8 @@ def create_job(
        # Delivery configuration
        "deliver": deliver,
        "origin": origin,  # Tracks where job was created for "origin" delivery
+        "enabled_toolsets": normalized_toolsets,
+        "workdir": normalized_workdir,
    }

    jobs = load_jobs()
@@ -497,6 +566,15 @@ def update_job(job_id: str, updates: Dict[str, Any]) -> Optional[Dict[str, Any]]
        if job["id"] != job_id:
            continue

+        # Validate / normalize workdir if present in updates.  Empty string or
+        # None both mean "clear the field" (restore old behaviour).
+        if "workdir" in updates:
+            _wd = updates["workdir"]
+            if _wd in (None, "", False):
+                updates["workdir"] = None
+            else:
+                updates["workdir"] = _normalize_workdir(_wd)
+
        updated = _apply_skill_fields({**job, **updates})
        schedule_changed = "schedule" in updates

@@ -627,10 +705,32 @@ def mark_job_run(job_id: str, success: bool, error: Optional[str] = None,
                # Compute next run
                job["next_run_at"] = compute_next_run(job["schedule"], now)

-                # If no next run (one-shot completed), disable
+                # If no next run, decide whether this is terminal completion
+                # (one-shot) or a transient failure (recurring schedule couldn't
+                # compute — e.g. 'croniter' missing from the runtime env).
+                # Recurring jobs must NEVER be silently disabled: that turns a
+                # missing runtime dep into "job completed" and the user's
+                # schedule quietly goes off. See issue #16265.
                if job["next_run_at"] is None:
-                    job["enabled"] = False
-                    job["state"] = "completed"
+                    kind = job.get("schedule", {}).get("kind")
+                    if kind in ("cron", "interval"):
+                        job["state"] = "error"
+                        if not job.get("last_error"):
+                            job["last_error"] = (
+                                "Failed to compute next run for recurring "
+                                "schedule (is the 'croniter' package "
+                                "installed in the gateway's Python env?)"
+                            )
+                        logger.error(
+                            "Job '%s' (%s) could not compute next_run_at; "
+                            "leaving enabled and marking state=error so the "
+                            "job is not silently disabled.",
+                            job.get("name", job["id"]),
+                            kind,
+                        )
+                    else:
+                        job["enabled"] = False
+                        job["state"] = "completed"
                elif job.get("state") != "paused":
                    job["state"] = "scheduled"

@@ -764,7 +864,7 @@ def save_job_output(job_id: str, output: str):
            f.write(output)
            f.flush()
            os.fsync(f.fileno())
-        os.replace(tmp_path, output_file)
+        atomic_replace(tmp_path, output_file)
        _secure_file(output_file)
    except BaseException:
        try:
@@ -40,13 +40,44 @@ from hermes_time import now as _hermes_now

 logger = logging.getLogger(__name__)

+
+def _resolve_cron_enabled_toolsets(job: dict, cfg: dict) -> list[str] | None:
+    """Resolve the toolset list for a cron job.
+
+    Precedence:
+    1. Per-job ``enabled_toolsets`` (set via ``cronjob`` tool on create/update).
+       Keeps the agent's job-scoped toolset override intact — #6130.
+    2. Per-platform ``hermes tools`` config for the ``cron`` platform.
+       Mirrors gateway behavior (``_get_platform_tools(cfg, platform_key)``)
+       so users can gate cron toolsets globally without recreating every job.
+    3. ``None`` on any lookup failure — AIAgent loads the full default set
+       (legacy behavior before this change, preserved as the safety net).
+
+    _DEFAULT_OFF_TOOLSETS ({moa, homeassistant, rl}) are removed by
+    ``_get_platform_tools`` for unconfigured platforms, so fresh installs
+    get cron WITHOUT ``moa`` by default (issue reported by Norbert —
+    surprise $4.63 run).
+    """
+    per_job = job.get("enabled_toolsets")
+    if per_job:
+        return per_job
+    try:
+        from hermes_cli.tools_config import _get_platform_tools  # lazy: avoid heavy import at cron module load
+        return sorted(_get_platform_tools(cfg or {}, "cron"))
+    except Exception as exc:
+        logger.warning(
+            "Cron toolset resolution failed, falling back to full default toolset: %s",
+            exc,
+        )
+        return None
+
 # Valid delivery platforms — used to validate user-supplied platform names
 # in cron delivery targets, preventing env var enumeration via crafted names.
 _KNOWN_DELIVERY_PLATFORMS = frozenset({
    "telegram", "discord", "slack", "whatsapp", "signal",
    "matrix", "mattermost", "homeassistant", "dingtalk", "feishu",
    "wecom", "wecom_callback", "weixin", "sms", "email", "webhook", "bluebubbles",
-    "qqbot",
+    "qqbot", "yuanbao",
 })

 # Platforms that support a configured cron/notification home target, mapped to
@@ -167,7 +198,9 @@ def _resolve_single_delivery_target(job: dict, deliver_value: str) -> Optional[d
            if resolved:
                parsed_chat_id, parsed_thread_id, resolved_is_explicit = _parse_target_ref(platform_key, resolved)
                if resolved_is_explicit:
-                    chat_id, thread_id = parsed_chat_id, parsed_thread_id
+                    chat_id = parsed_chat_id
+                    if parsed_thread_id is not None:
+                        thread_id = parsed_thread_id
                else:
                    chat_id = resolved
        except Exception:
@@ -306,6 +339,7 @@ def _deliver_result(job: dict, content: str, adapters=None, loop=None) -> Option
        "sms": Platform.SMS,
        "bluebubbles": Platform.BLUEBUBBLES,
        "qqbot": Platform.QQBOT,
+        "yuanbao": Platform.YUANBAO,
    }

    # Optionally wrap the content with a header/footer so the user knows this
@@ -640,10 +674,51 @@ def _build_job_prompt(job: dict, prerun_script: Optional[tuple] = None) -> str:
                f"{prompt}"
            )

+    # Inject output from referenced cron jobs as context.
+    context_from = job.get("context_from")
+    if context_from:
+        from cron.jobs import OUTPUT_DIR
+        if isinstance(context_from, str):
+            context_from = [context_from]
+        for source_job_id in context_from:
+            # Guard against path traversal — valid job IDs are 12-char hex strings
+            if not source_job_id or not all(c in "0123456789abcdef" for c in source_job_id):
+                logger.warning("context_from: skipping invalid job_id %r", source_job_id)
+                continue
+            try:
+                job_output_dir = OUTPUT_DIR / source_job_id
+                if not job_output_dir.exists():
+                    continue  # silent skip — no output yet
+                output_files = sorted(
+                    job_output_dir.glob("*.md"),
+                    key=lambda f: f.stat().st_mtime,
+                    reverse=True,
+                )
+                if not output_files:
+                    continue  # silent skip — no output yet
+                latest_output = output_files[0].read_text(encoding="utf-8").strip()
+                # Truncate to 8K characters to avoid prompt bloat
+                _MAX_CONTEXT_CHARS = 8000
+                if len(latest_output) > _MAX_CONTEXT_CHARS:
+                    latest_output = latest_output[:_MAX_CONTEXT_CHARS] + "\n\n[... output truncated ...]"
+                if latest_output:
+                    prompt = (
+                        f"## Output from job '{source_job_id}'\n"
+                        "The following is the most recent output from a preceding "
+                        "cron job. Use it as context for your analysis.\n\n"
+                        f"```\n{latest_output}\n```\n\n"
+                        f"{prompt}"
+                    )
+                else:
+                    continue  # silent skip — empty output
+            except (OSError, PermissionError) as e:
+                logger.warning("context_from: failed to read output for job %r: %s", source_job_id, e)
+                # silent skip — do not pollute the prompt with error messages
+
    # Always prepend cron execution guidance so the agent knows how
    # delivery works and can suppress delivery when appropriate.
    cron_hint = (
-        "[SYSTEM: You are running as a scheduled cron job. "
+        "[IMPORTANT: You are running as a scheduled cron job. "
        "DELIVERY: Your final response will be automatically delivered "
        "to the user — do NOT use send_message or try to deliver "
        "the output yourself. Just produce your report/output as your "
@@ -679,7 +754,7 @@ def _build_job_prompt(job: dict, prerun_script: Optional[tuple] = None) -> str:
            parts.append("")
        parts.extend(
            [
-                f'[SYSTEM: The user has invoked the "{skill_name}" skill, indicating they want you to follow its instructions. The full skill content is loaded below.]',
+                f'[IMPORTANT: The user has invoked the "{skill_name}" skill, indicating they want you to follow its instructions. The full skill content is loaded below.]',
                "",
                content,
            ]
@@ -687,7 +762,7 @@ def _build_job_prompt(job: dict, prerun_script: Optional[tuple] = None) -> str:

    if skipped:
        notice = (
-            f"[SYSTEM: The following skill(s) were listed for this job but could not be found "
+            f"[IMPORTANT: The following skill(s) were listed for this job but could not be found "
            f"and were skipped: {', '.join(skipped)}. "
            f"Start your response with a brief notice so the user is aware, e.g.: "
            f"'⚠️ Skill(s) not found and skipped: {', '.join(skipped)}']"
@@ -749,6 +824,8 @@ def run_job(job: dict) -> tuple[bool, str, str, Optional[str]]:
    logger.info("Running job '%s' (ID: %s)", job_name, job_id)
    logger.info("Prompt: %s", prompt[:100])

+    agent = None
+
    # Mark this as a cron session so the approval system can apply cron_mode.
    # This env var is process-wide and persists for the lifetime of the
    # scheduler process — every job this process runs is a cron job.
@@ -764,6 +841,30 @@ def run_job(job: dict) -> tuple[bool, str, str, Optional[str]]:
        chat_name=origin.get("chat_name", "") if origin else "",
    )

+    # Per-job working directory.  When set (and validated at create/update
+    # time), we point TERMINAL_CWD at it so:
+    #   - build_context_files_prompt() picks up AGENTS.md / CLAUDE.md /
+    #     .cursorrules from the job's project dir, AND
+    #   - the terminal, file, and code-exec tools run commands from there.
+    #
+    # tick() serializes workdir-jobs outside the parallel pool, so mutating
+    # os.environ["TERMINAL_CWD"] here is safe for those jobs.  For workdir-less
+    # jobs we leave TERMINAL_CWD untouched — preserves the original behaviour
+    # (skip_context_files=True, tools use whatever cwd the scheduler has).
+    _job_workdir = (job.get("workdir") or "").strip() or None
+    if _job_workdir and not Path(_job_workdir).is_dir():
+        # Directory was removed between create-time validation and now.  Log
+        # and drop back to old behaviour rather than crashing the job.
+        logger.warning(
+            "Job '%s': configured workdir %r no longer exists — running without it",
+            job_id, _job_workdir,
+        )
+        _job_workdir = None
+    _prior_terminal_cwd = os.environ.get("TERMINAL_CWD", "_UNSET_")
+    if _job_workdir:
+        os.environ["TERMINAL_CWD"] = _job_workdir
+        logger.info("Job '%s': using workdir %s", job_id, _job_workdir)
+
    try:
        # Re-read .env and config.yaml fresh every run so provider/key
        # changes take effect without a gateway restart.
@@ -840,6 +941,7 @@ def run_job(job: dict) -> tuple[bool, str, str, Optional[str]]:
            resolve_runtime_provider,
            format_runtime_provider_error,
        )
+        from hermes_cli.auth import AuthError
        try:
            runtime_kwargs = {
                "requested": job.get("provider") or os.getenv("HERMES_INFERENCE_PROVIDER"),
@@ -847,6 +949,28 @@ def run_job(job: dict) -> tuple[bool, str, str, Optional[str]]:
            if job.get("base_url"):
                runtime_kwargs["explicit_base_url"] = job.get("base_url")
            runtime = resolve_runtime_provider(**runtime_kwargs)
+        except AuthError as auth_exc:
+            # Primary provider auth failed — try fallback chain before giving up.
+            logger.warning("Job '%s': primary auth failed (%s), trying fallback", job_id, auth_exc)
+            fb = _cfg.get("fallback_providers") or _cfg.get("fallback_model")
+            fb_list = (fb if isinstance(fb, list) else [fb]) if fb else []
+            runtime = None
+            for entry in fb_list:
+                if not isinstance(entry, dict):
+                    continue
+                try:
+                    fb_kwargs = {"requested": entry.get("provider")}
+                    if entry.get("base_url"):
+                        fb_kwargs["explicit_base_url"] = entry["base_url"]
+                    if entry.get("api_key"):
+                        fb_kwargs["explicit_api_key"] = entry["api_key"]
+                    runtime = resolve_runtime_provider(**fb_kwargs)
+                    logger.info("Job '%s': fallback resolved to %s", job_id, runtime.get("provider"))
+                    break
+                except Exception as fb_exc:
+                    logger.debug("Job '%s': fallback %s failed: %s", job_id, entry.get("provider"), fb_exc)
+            if runtime is None:
+                raise RuntimeError(format_runtime_provider_error(auth_exc)) from auth_exc
        except Exception as exc:
            message = format_runtime_provider_error(exc)
            raise RuntimeError(message) from exc
@@ -886,9 +1010,13 @@ def run_job(job: dict) -> tuple[bool, str, str, Optional[str]]:
            providers_ignored=pr.get("ignore"),
            providers_order=pr.get("order"),
            provider_sort=pr.get("sort"),
+            enabled_toolsets=_resolve_cron_enabled_toolsets(job, _cfg),
            disabled_toolsets=["cronjob", "messaging", "clarify"],
            quiet_mode=True,
-            skip_context_files=True,  # Don't inject SOUL.md/AGENTS.md from scheduler cwd
+            # When a workdir is configured, inject AGENTS.md / CLAUDE.md /
+            # .cursorrules from that directory; otherwise preserve the old
+            # behaviour (don't inject SOUL.md/AGENTS.md from the scheduler cwd).
+            skip_context_files=not bool(_job_workdir),
            skip_memory=True,  # Cron system prompts would corrupt user representations
            platform="cron",
            session_id=_cron_session_id,
@@ -972,6 +1100,12 @@ def run_job(job: dict) -> tuple[bool, str, str, Optional[str]]:
                f"— last activity: {_last_desc}"
            )

+        # Guard against non-dict returns from run_conversation under error conditions
+        if not isinstance(result, dict):
+            raise RuntimeError(
+                f"agent.run_conversation returned {type(result).__name__} instead of dict: {result!r}"
+            )
+
        final_response = result.get("final_response", "") or ""
        # Strip leaked placeholder text that upstream may inject on empty completions.
        if final_response.strip() == "(No response generated)":
@@ -1021,6 +1155,14 @@ def run_job(job: dict) -> tuple[bool, str, str, Optional[str]]:
        return False, output, "", error_msg

    finally:
+        # Restore TERMINAL_CWD to whatever it was before this job ran.  We
+        # only ever mutate it when the job has a workdir; see the setup block
+        # at the top of run_job for the serialization guarantee.
+        if _job_workdir:
+            if _prior_terminal_cwd == "_UNSET_":
+                os.environ.pop("TERMINAL_CWD", None)
+            else:
+                os.environ["TERMINAL_CWD"] = _prior_terminal_cwd
        # Clean up ContextVar session/delivery state for this job.
        clear_session_vars(_ctx_tokens)
        if _session_db:
@@ -1032,6 +1174,24 @@ def run_job(job: dict) -> tuple[bool, str, str, Optional[str]]:
                _session_db.close()
            except (Exception, KeyboardInterrupt) as e:
                logger.debug("Job '%s': failed to close SQLite session store: %s", job_id, e)
+        # Release subprocesses, terminal sandboxes, browser daemons, and the
+        # main OpenAI/httpx client held by this ephemeral cron agent. Without
+        # this, a gateway that ticks cron every N minutes leaks fds per job
+        # until it hits EMFILE (#10200 / "too many open files").
+        try:
+            if agent is not None:
+                agent.close()
+        except (Exception, KeyboardInterrupt) as e:
+            logger.debug("Job '%s': failed to close agent resources: %s", job_id, e)
+        # Each cron run spins up a short-lived worker thread whose event loop
+        # dies as soon as the ``ThreadPoolExecutor`` shuts down. Any async
+        # httpx clients cached under that loop are now unusable — reap them
+        # so their transports don't accumulate in the process-global cache.
+        try:
+            from agent.auxiliary_client import cleanup_stale_async_clients
+            cleanup_stale_async_clients()
+        except Exception as e:
+            logger.debug("Job '%s': failed to reap stale auxiliary clients: %s", job_id, e)


 def tick(verbose: bool = True, adapters=None, loop=None) -> int:
@@ -1148,14 +1308,39 @@ def tick(verbose: bool = True, adapters=None, loop=None) -> int:
                mark_job_run(job["id"], False, str(e))
                return False

-        # Run all due jobs concurrently, each in its own ContextVar copy
-        # so session/delivery state stays isolated per-thread.
-        with concurrent.futures.ThreadPoolExecutor(max_workers=_max_workers) as _tick_pool:
-            _futures = []
-            for job in due_jobs:
-                _ctx = contextvars.copy_context()
-                _futures.append(_tick_pool.submit(_ctx.run, _process_job, job))
-            _results = [f.result() for f in _futures]
+        # Partition due jobs: those with a per-job workdir mutate
+        # os.environ["TERMINAL_CWD"] inside run_job, which is process-global —
+        # so they MUST run sequentially to avoid corrupting each other.  Jobs
+        # without a workdir leave env untouched and stay parallel-safe.
+        workdir_jobs = [j for j in due_jobs if (j.get("workdir") or "").strip()]
+        parallel_jobs = [j for j in due_jobs if not (j.get("workdir") or "").strip()]
+
+        _results: list = []
+
+        # Sequential pass for workdir jobs.
+        for job in workdir_jobs:
+            _ctx = contextvars.copy_context()
+            _results.append(_ctx.run(_process_job, job))
+
+        # Parallel pass for the rest — same behaviour as before.
+        if parallel_jobs:
+            with concurrent.futures.ThreadPoolExecutor(max_workers=_max_workers) as _tick_pool:
+                _futures = []
+                for job in parallel_jobs:
+                    _ctx = contextvars.copy_context()
+                    _futures.append(_tick_pool.submit(_ctx.run, _process_job, job))
+                _results.extend(f.result() for f in _futures)
+
+        # Best-effort sweep of MCP stdio subprocesses that survived their
+        # session teardown during this tick.  Runs AFTER every job has
+        # finished so active sessions (including live user chats) are
+        # never touched — only PIDs explicitly detected as orphans in
+        # tools.mcp_tool._run_stdio's finally block are reaped.
+        try:
+            from tools.mcp_tool import _kill_orphaned_mcp_children
+            _kill_orphaned_mcp_children()
+        except Exception as _e:
+            logger.debug("Post-tick MCP orphan cleanup failed: %s", _e)

        return sum(_results)
    finally:
@@ -0,0 +1,52 @@
+#
+# docker-compose.yml for Hermes Agent
+#
+# Usage:
+#   HERMES_UID=$(id -u) HERMES_GID=$(id -g) docker compose up -d
+#
+# Set HERMES_UID / HERMES_GID to the host user that owns ~/.hermes so
+# files created inside the container stay readable/writable on the host.
+# The entrypoint remaps the internal `hermes` user to these values via
+# usermod/groupmod + gosu.
+#
+# Security notes:
+#   - The dashboard service binds to 127.0.0.1 by default. It stores API
+#     keys; exposing it on LAN without auth is unsafe. If you want remote
+#     access, use an SSH tunnel or put it behind a reverse proxy that
+#     adds authentication — do NOT pass --insecure --host 0.0.0.0.
+#   - The gateway's API server is off unless you uncomment API_SERVER_KEY
+#     and API_SERVER_HOST. See docs/user-guide/api-server.md before doing
+#     this on an internet-facing host.
+#
+services:
+  gateway:
+    build: .
+    image: hermes-agent
+    container_name: hermes
+    restart: unless-stopped
+    network_mode: host
+    volumes:
+      - ~/.hermes:/opt/data
+    environment:
+      - HERMES_UID=${HERMES_UID:-10000}
+      - HERMES_GID=${HERMES_GID:-10000}
+      # To expose the OpenAI-compatible API server beyond localhost,
+      # uncomment BOTH lines (API_SERVER_KEY is mandatory for auth):
+      # - API_SERVER_HOST=0.0.0.0
+      # - API_SERVER_KEY=${API_SERVER_KEY}
+    command: ["gateway", "run"]
+
+  dashboard:
+    image: hermes-agent
+    container_name: hermes-dashboard
+    restart: unless-stopped
+    network_mode: host
+    depends_on:
+      - gateway
+    volumes:
+      - ~/.hermes:/opt/data
+    environment:
+      - HERMES_UID=${HERMES_UID:-10000}
+      - HERMES_GID=${HERMES_GID:-10000}
+    # Localhost-only. For remote access, tunnel via `ssh -L 9119:localhost:9119`.
+    command: ["dashboard", "--host", "127.0.0.1", "--no-open"]
@@ -22,9 +22,18 @@ if [ "$(id -u)" = "0" ]; then
        groupmod -o -g "$HERMES_GID" hermes 2>/dev/null || true
    fi

+    # Fix ownership of the data volume. When HERMES_UID remaps the hermes user,
+    # files created by previous runs (under the old UID) become inaccessible.
+    # Always chown -R when UID was remapped; otherwise only if top-level is wrong.
    actual_hermes_uid=$(id -u hermes)
-    if [ "$(stat -c %u "$HERMES_HOME" 2>/dev/null)" != "$actual_hermes_uid" ]; then
-        echo "$HERMES_HOME is not owned by $actual_hermes_uid, fixing"
+    needs_chown=false
+    if [ -n "$HERMES_UID" ] && [ "$HERMES_UID" != "10000" ]; then
+        needs_chown=true
+    elif [ "$(stat -c %u "$HERMES_HOME" 2>/dev/null)" != "$actual_hermes_uid" ]; then
+        needs_chown=true
+    fi
+    if [ "$needs_chown" = true ]; then
+        echo "Fixing ownership of $HERMES_HOME to hermes ($actual_hermes_uid)"
        # In rootless Podman the container's "root" is mapped to an unprivileged
        # host UID — chown will fail.  That's fine: the volume is already owned
        # by the mapped user on the host side.
@@ -32,6 +41,15 @@ if [ "$(id -u)" = "0" ]; then
            echo "Warning: chown failed (rootless container?) — continuing anyway"
    fi

+    # Ensure config.yaml is readable by the hermes runtime user even if it was
+    # edited on the host after initial ownership setup. Must run here (as root)
+    # rather than after the gosu drop, otherwise a non-root caller like
+    # `docker run -u $(id -u):$(id -g)` hits "Operation not permitted" (#15865).
+    if [ -f "$HERMES_HOME/config.yaml" ]; then
+        chown hermes:hermes "$HERMES_HOME/config.yaml" 2>/dev/null || true
+        chmod 640 "$HERMES_HOME/config.yaml" 2>/dev/null || true
+    fi
+
    echo "Dropping root privileges"
    exec gosu hermes "$0" "$@"
 fi
@@ -68,4 +86,19 @@ if [ -d "$INSTALL_DIR/skills" ]; then
    python3 "$INSTALL_DIR/tools/skills_sync.py"
 fi

+# Final exec: two supported invocation patterns.
+#
+#   docker run <image>                 -> exec `hermes` with no args (legacy default)
+#   docker run <image> chat -q "..."   -> exec `hermes chat -q "..."` (legacy wrap)
+#   docker run <image> sleep infinity  -> exec `sleep infinity` directly
+#   docker run <image> bash            -> exec `bash` directly
+#
+# If the first positional arg resolves to an executable on PATH, we assume the
+# caller wants to run it directly (needed by the launcher which runs long-lived
+# `sleep infinity` sandbox containers — see tools/environments/docker.py).
+# Otherwise we treat the args as a hermes subcommand and wrap with `hermes`,
+# preserving the documented `docker run <image> <subcommand>` behavior.
+if [ $# -gt 0 ] && command -v "$1" >/dev/null 2>&1; then
+    exec "$@"
+fi
 exec hermes "$@"
@@ -36,6 +36,7 @@

      imports = [
        ./nix/packages.nix
+        ./nix/overlays.nix
        ./nix/nixosModules.nix
        ./nix/checks.nix
        ./nix/devShell.nix
@@ -1,85 +0,0 @@
-"""Built-in boot-md hook — run ~/.hermes/BOOT.md on gateway startup.
-
-This hook is always registered. It silently skips if no BOOT.md exists.
-To activate, create ``~/.hermes/BOOT.md`` with instructions for the
-agent to execute on every gateway restart.
-
-Example BOOT.md::
-
-    # Startup Checklist
-
-    1. Check if any cron jobs failed overnight
-    2. Send a status update to Discord #general
-    3. If there are errors in /opt/app/deploy.log, summarize them
-
-The agent runs in a background thread so it doesn't block gateway
-startup. If nothing needs attention, it replies with [SILENT] to
-suppress delivery.
-"""
-
-import logging
-import threading
-
-logger = logging.getLogger("hooks.boot-md")
-
-from hermes_constants import get_hermes_home
-HERMES_HOME = get_hermes_home()
-BOOT_FILE = HERMES_HOME / "BOOT.md"
-
-
-def _build_boot_prompt(content: str) -> str:
-    """Wrap BOOT.md content in a system-level instruction."""
-    return (
-        "You are running a startup boot checklist. Follow the BOOT.md "
-        "instructions below exactly.\n\n"
-        "---\n"
-        f"{content}\n"
-        "---\n\n"
-        "Execute each instruction. If you need to send a message to a "
-        "platform, use the send_message tool.\n"
-        "If nothing needs attention and there is nothing to report, "
-        "reply with ONLY: [SILENT]"
-    )
-
-
-def _run_boot_agent(content: str) -> None:
-    """Spawn a one-shot agent session to execute the boot instructions."""
-    try:
-        from run_agent import AIAgent
-
-        prompt = _build_boot_prompt(content)
-        agent = AIAgent(
-            quiet_mode=True,
-            skip_context_files=True,
-            skip_memory=True,
-            max_iterations=20,
-        )
-        result = agent.run_conversation(prompt)
-        response = result.get("final_response", "")
-        if response and "[SILENT]" not in response:
-            logger.info("boot-md completed: %s", response[:200])
-        else:
-            logger.info("boot-md completed (nothing to report)")
-    except Exception as e:
-        logger.error("boot-md agent failed: %s", e)
-
-
-async def handle(event_type: str, context: dict) -> None:
-    """Gateway startup handler — run BOOT.md if it exists."""
-    if not BOOT_FILE.exists():
-        return
-
-    content = BOOT_FILE.read_text(encoding="utf-8").strip()
-    if not content:
-        return
-
-    logger.info("Running BOOT.md (%d chars)", len(content))
-
-    # Run in a background thread so we don't block gateway startup.
-    thread = threading.Thread(
-        target=_run_boot_agent,
-        args=(content,),
-        name="boot-md",
-        daemon=True,
-    )
-    thread.start()
@@ -57,7 +57,7 @@ def _session_entry_name(origin: Dict[str, Any]) -> str:
 # Build / refresh
 # ---------------------------------------------------------------------------

-def build_channel_directory(adapters: Dict[Any, Any]) -> Dict[str, Any]:
+async def build_channel_directory(adapters: Dict[Any, Any]) -> Dict[str, Any]:
    """
    Build a channel directory from connected platform adapters and session data.

@@ -72,7 +72,7 @@ def build_channel_directory(adapters: Dict[Any, Any]) -> Dict[str, Any]:
            if platform == Platform.DISCORD:
                platforms["discord"] = _build_discord(adapter)
            elif platform == Platform.SLACK:
-                platforms["slack"] = _build_slack(adapter)
+                platforms["slack"] = await _build_slack(adapter)
        except Exception as e:
            logger.warning("Channel directory: failed to build %s: %s", platform.value, e)

@@ -136,21 +136,66 @@ def _build_discord(adapter) -> List[Dict[str, str]]:
    return channels


-def _build_slack(adapter) -> List[Dict[str, str]]:
-    """List Slack channels the bot has joined."""
-    # Slack adapter may expose a web client
-    client = getattr(adapter, "_app", None) or getattr(adapter, "_client", None)
-    if not client:
+async def _build_slack(adapter) -> List[Dict[str, Any]]:
+    """List Slack channels the bot has joined across all workspaces.
+
+    Uses ``users.conversations`` against each workspace's web client. Pulls
+    public + private channels the bot is a member of, then merges in DMs
+    discovered from session history (IMs aren't useful to enumerate
+    proactively).
+    """
+    team_clients = getattr(adapter, "_team_clients", None) or {}
+    if not team_clients:
        return _build_from_sessions("slack")

-    try:
-        from tools.send_message_tool import _send_slack  # noqa: F401
-        # Use the Slack Web API directly if available
-    except Exception:
-        pass
+    channels: List[Dict[str, Any]] = []
+    seen_ids: set = set()

-    # Fallback to session data
-    return _build_from_sessions("slack")
+    for team_id, client in team_clients.items():
+        try:
+            cursor: Optional[str] = None
+            for _page in range(20):  # safety cap on pagination
+                response = await client.users_conversations(
+                    types="public_channel,private_channel",
+                    exclude_archived=True,
+                    limit=200,
+                    cursor=cursor,
+                )
+                if not response.get("ok"):
+                    logger.warning(
+                        "Channel directory: users.conversations not ok for team %s: %s",
+                        team_id,
+                        response.get("error", "unknown"),
+                    )
+                    break
+                for ch in response.get("channels", []):
+                    cid = ch.get("id")
+                    name = ch.get("name")
+                    if not cid or not name or cid in seen_ids:
+                        continue
+                    seen_ids.add(cid)
+                    channels.append({
+                        "id": cid,
+                        "name": name,
+                        "type": "private" if ch.get("is_private") else "channel",
+                    })
+                cursor = (response.get("response_metadata") or {}).get("next_cursor")
+                if not cursor:
+                    break
+        except Exception as e:
+            logger.warning(
+                "Channel directory: failed to list Slack channels for team %s: %s",
+                team_id, e,
+            )
+            continue
+
+    # Merge in DM/group entries discovered from session history.
+    for entry in _build_from_sessions("slack"):
+        if entry.get("id") not in seen_ids:
+            channels.append(entry)
+            seen_ids.add(entry.get("id"))
+
+    return channels


 def _build_from_sessions(platform_name: str) -> List[Dict[str, str]]:
@@ -223,6 +268,14 @@ def resolve_channel_name(platform_name: str, name: str) -> Optional[str]:
    if not channels:
        return None

+    # 0. Exact ID match — case-sensitive, no normalization. Lets callers pass
+    # raw platform IDs (e.g. Slack "C0B0QV5434G") even when the format guard
+    # in _parse_target_ref hasn't recognized them as explicit.
+    raw = name.strip()
+    for ch in channels:
+        if ch.get("id") == raw:
+            return ch["id"]
+
    query = _normalize_channel_query(name)

    # 1. Exact name match, including the display labels shown by send_message(action="list")
@@ -67,6 +67,7 @@ class Platform(Enum):
    WEIXIN = "weixin"
    BLUEBUBBLES = "bluebubbles"
    QQBOT = "qqbot"
+    YUANBAO = "yuanbao"


@dataclass
@@ -135,7 +136,7 @@ class SessionResetPolicy:
            mode=mode if mode is not None else "both",
            at_hour=at_hour if at_hour is not None else 4,
            idle_minutes=idle_minutes if idle_minutes is not None else 1440,
-            notify=notify if notify is not None else True,
+            notify=_coerce_bool(notify, True),
            notify_exclude_platforms=tuple(exclude) if exclude is not None else ("api_server", "webhook"),
        )

@@ -178,7 +179,7 @@ class PlatformConfig:
            home_channel = HomeChannel.from_dict(data["home_channel"])
        
        return cls(
-            enabled=data.get("enabled", False),
+            enabled=_coerce_bool(data.get("enabled"), False),
            token=data.get("token"),
            api_key=data.get("api_key"),
            home_channel=home_channel,
@@ -195,6 +196,14 @@ class StreamingConfig:
    edit_interval: float = 1.0    # Seconds between message edits (Telegram rate-limits at ~1/s)
    buffer_threshold: int = 40    # Chars before forcing an edit
    cursor: str = " ▉"           # Cursor shown during streaming
+    # Ported from openclaw/openclaw#72038.  When >0, the final edit for
+    # a long-running streamed response is delivered as a fresh message
+    # if the original preview has been visible for at least this many
+    # seconds, so the platform's visible timestamp reflects completion
+    # time instead of the preview creation time.  Currently applied to
+    # Telegram only (other platforms ignore the setting).  Default 60s
+    # matches the OpenClaw rollout.  Set to 0 to disable.
+    fresh_final_after_seconds: float = 60.0

    def to_dict(self) -> Dict[str, Any]:
        return {
@@ -203,6 +212,7 @@ class StreamingConfig:
            "edit_interval": self.edit_interval,
            "buffer_threshold": self.buffer_threshold,
            "cursor": self.cursor,
+            "fresh_final_after_seconds": self.fresh_final_after_seconds,
        }

    @classmethod
@@ -215,6 +225,9 @@ class StreamingConfig:
            edit_interval=float(data.get("edit_interval", 1.0)),
            buffer_threshold=int(data.get("buffer_threshold", 40)),
            cursor=data.get("cursor", " ▉"),
+            fresh_final_after_seconds=float(
+                data.get("fresh_final_after_seconds", 60.0)
+            ),
        )


@@ -314,6 +327,9 @@ class GatewayConfig:
            # QQBot uses extra dict for app credentials
            elif platform == Platform.QQBOT and config.extra.get("app_id") and config.extra.get("client_secret"):
                connected.append(platform)
+            # Yuanbao uses extra dict for app credentials
+            elif platform == Platform.YUANBAO and config.extra.get("app_id") and config.extra.get("app_secret"):
+                connected.append(platform)
            # DingTalk uses client_id/client_secret from config.extra or env vars
            elif platform == Platform.DINGTALK and (
                config.extra.get("client_id") or os.getenv("DINGTALK_CLIENT_ID")
@@ -435,7 +451,7 @@ class GatewayConfig:
            reset_triggers=data.get("reset_triggers", ["/new", "/reset"]),
            quick_commands=quick_commands,
            sessions_dir=sessions_dir,
-            always_log_local=data.get("always_log_local", True),
+            always_log_local=_coerce_bool(data.get("always_log_local"), True),
            stt_enabled=_coerce_bool(stt_enabled, True),
            group_sessions_per_user=_coerce_bool(group_sessions_per_user, True),
            thread_sessions_per_user=_coerce_bool(thread_sessions_per_user, False),
@@ -550,6 +566,8 @@ def load_gateway_config() -> GatewayConfig:
                        existing = {}
                    # Deep-merge extra dicts so gateway.json defaults survive
                    merged_extra = {**existing.get("extra", {}), **plat_block.get("extra", {})}
+                    if plat_name == Platform.SLACK.value and "enabled" in plat_block:
+                        merged_extra["_enabled_explicit"] = True
                    merged = {**existing, **plat_block}
                    if merged_extra:
                        merged["extra"] = merged_extra
@@ -570,6 +588,8 @@ def load_gateway_config() -> GatewayConfig:
                    )
                if "reply_prefix" in platform_cfg:
                    bridged["reply_prefix"] = platform_cfg["reply_prefix"]
+                if "reply_in_thread" in platform_cfg:
+                    bridged["reply_in_thread"] = platform_cfg["reply_in_thread"]
                if "require_mention" in platform_cfg:
                    bridged["require_mention"] = platform_cfg["require_mention"]
                if "free_response_channels" in platform_cfg:
@@ -584,7 +604,7 @@ def load_gateway_config() -> GatewayConfig:
                    bridged["group_policy"] = platform_cfg["group_policy"]
                if "group_allow_from" in platform_cfg:
                    bridged["group_allow_from"] = platform_cfg["group_allow_from"]
-                if plat == Platform.DISCORD and "channel_skill_bindings" in platform_cfg:
+                if plat in (Platform.DISCORD, Platform.SLACK) and "channel_skill_bindings" in platform_cfg:
                    bridged["channel_skill_bindings"] = platform_cfg["channel_skill_bindings"]
                if "channel_prompts" in platform_cfg:
                    channel_prompts = platform_cfg["channel_prompts"]
@@ -592,16 +612,21 @@ def load_gateway_config() -> GatewayConfig:
                        bridged["channel_prompts"] = {str(k): v for k, v in channel_prompts.items()}
                    else:
                        bridged["channel_prompts"] = channel_prompts
-                if not bridged:
+                enabled_was_explicit = "enabled" in platform_cfg
+                if not bridged and not enabled_was_explicit:
                    continue
                plat_data = platforms_data.setdefault(plat.value, {})
                if not isinstance(plat_data, dict):
                    plat_data = {}
                    platforms_data[plat.value] = plat_data
+                if enabled_was_explicit:
+                    plat_data["enabled"] = platform_cfg["enabled"]
                extra = plat_data.setdefault("extra", {})
                if not isinstance(extra, dict):
                    extra = {}
                    plat_data["extra"] = extra
+                if plat == Platform.SLACK and enabled_was_explicit:
+                    extra["_enabled_explicit"] = True
                extra.update(bridged)

            # Slack settings → env vars (env vars take precedence)
@@ -609,6 +634,8 @@ def load_gateway_config() -> GatewayConfig:
            if isinstance(slack_cfg, dict):
                if "require_mention" in slack_cfg and not os.getenv("SLACK_REQUIRE_MENTION"):
                    os.environ["SLACK_REQUIRE_MENTION"] = str(slack_cfg["require_mention"]).lower()
+                if "strict_mention" in slack_cfg and not os.getenv("SLACK_STRICT_MENTION"):
+                    os.environ["SLACK_STRICT_MENTION"] = str(slack_cfg["strict_mention"]).lower()
                if "allow_bots" in slack_cfg and not os.getenv("SLACK_ALLOW_BOTS"):
                    os.environ["SLACK_ALLOW_BOTS"] = str(slack_cfg["allow_bots"]).lower()
                frc = slack_cfg.get("free_response_channels")
@@ -687,6 +714,11 @@ def load_gateway_config() -> GatewayConfig:
                    os.environ["TELEGRAM_REACTIONS"] = str(telegram_cfg["reactions"]).lower()
                if "proxy_url" in telegram_cfg and not os.getenv("TELEGRAM_PROXY"):
                    os.environ["TELEGRAM_PROXY"] = str(telegram_cfg["proxy_url"]).strip()
+                if "group_allowed_chats" in telegram_cfg and not os.getenv("TELEGRAM_GROUP_ALLOWED_USERS"):
+                    gac = telegram_cfg["group_allowed_chats"]
+                    if isinstance(gac, list):
+                        gac = ",".join(str(v) for v in gac)
+                    os.environ["TELEGRAM_GROUP_ALLOWED_USERS"] = str(gac)
                if "disable_link_previews" in telegram_cfg:
                    plat_data = platforms_data.setdefault(Platform.TELEGRAM.value, {})
                    if not isinstance(plat_data, dict):
@@ -913,8 +945,20 @@ def _apply_env_overrides(config: GatewayConfig) -> None:
    slack_token = os.getenv("SLACK_BOT_TOKEN")
    if slack_token:
        if Platform.SLACK not in config.platforms:
+            # No yaml config for Slack — env-only setup, enable it
            config.platforms[Platform.SLACK] = PlatformConfig()
-        config.platforms[Platform.SLACK].enabled = True
+            config.platforms[Platform.SLACK].enabled = True
+        else:
+            slack_config = config.platforms[Platform.SLACK]
+            enabled_was_explicit = bool(slack_config.extra.pop("_enabled_explicit", False))
+            if not slack_config.enabled and not enabled_was_explicit:
+                # Top-level Slack settings such as channel prompts should not
+                # turn an env-token setup into a disabled platform. Only an
+                # explicit slack.enabled/platforms.slack.enabled false should.
+                slack_config.enabled = True
+        # If yaml config exists, respect its enabled flag (don't override
+        # explicit enabled: false). Token is still stored so skills that
+        # send Slack messages can use it without activating the gateway adapter.
        config.platforms[Platform.SLACK].token = slack_token
    slack_home = os.getenv("SLACK_HOME_CHANNEL")
    if slack_home and Platform.SLACK in config.platforms:
@@ -1271,6 +1315,48 @@ def _apply_env_overrides(config: GatewayConfig) -> None:
                name=os.getenv("QQBOT_HOME_CHANNEL_NAME") or os.getenv(qq_home_name_env, "Home"),
            )

+    # Yuanbao — YUANBAO_APP_ID preferred
+    yuanbao_app_id = os.getenv("YUANBAO_APP_ID") or os.getenv("YUANBAO_APP_KEY")
+    yuanbao_app_secret = os.getenv("YUANBAO_APP_SECRET")
+    if yuanbao_app_id and yuanbao_app_secret:
+        if Platform.YUANBAO not in config.platforms:
+            config.platforms[Platform.YUANBAO] = PlatformConfig()
+        config.platforms[Platform.YUANBAO].enabled = True
+        extra = config.platforms[Platform.YUANBAO].extra
+        extra["app_id"] = yuanbao_app_id
+        extra["app_secret"] = yuanbao_app_secret
+        yuanbao_bot_id = os.getenv("YUANBAO_BOT_ID")
+        if yuanbao_bot_id:
+            extra["bot_id"] = yuanbao_bot_id
+        yuanbao_ws_url = os.getenv("YUANBAO_WS_URL")
+        if yuanbao_ws_url:
+            extra["ws_url"] = yuanbao_ws_url
+        yuanbao_api_domain = os.getenv("YUANBAO_API_DOMAIN")
+        if yuanbao_api_domain:
+            extra["api_domain"] = yuanbao_api_domain
+        yuanbao_route_env = os.getenv("YUANBAO_ROUTE_ENV")
+        if yuanbao_route_env:
+            extra["route_env"] = yuanbao_route_env
+        yuanbao_home = os.getenv("YUANBAO_HOME_CHANNEL")
+        if yuanbao_home:
+            config.platforms[Platform.YUANBAO].home_channel = HomeChannel(
+                platform=Platform.YUANBAO,
+                chat_id=yuanbao_home,
+                name=os.getenv("YUANBAO_HOME_CHANNEL_NAME", "Home"),
+            )
+        yuanbao_dm_policy = os.getenv("YUANBAO_DM_POLICY")
+        if yuanbao_dm_policy:
+            extra["dm_policy"] = yuanbao_dm_policy.strip().lower()
+        yuanbao_dm_allow_from = os.getenv("YUANBAO_DM_ALLOW_FROM")
+        if yuanbao_dm_allow_from:
+            extra["dm_allow_from"] = yuanbao_dm_allow_from
+        yuanbao_group_policy = os.getenv("YUANBAO_GROUP_POLICY")
+        if yuanbao_group_policy:
+            extra["group_policy"] = yuanbao_group_policy.strip().lower()
+        yuanbao_group_allow_from = os.getenv("YUANBAO_GROUP_ALLOW_FROM")
+        if yuanbao_group_allow_from:
+            extra["group_allow_from"] = yuanbao_group_allow_from
+
    # Session settings
    idle_minutes = os.getenv("SESSION_IDLE_MINUTES")
    if idle_minutes:
@@ -79,7 +79,9 @@ _PLATFORM_DEFAULTS: dict[str, dict[str, Any]] = {
    "discord":     _TIER_HIGH,

    # Tier 2 — edit support, often customer/workspace channels
-    "slack":           _TIER_MEDIUM,
+    # Slack: tool_progress off by default — Bolt posts cannot be edited like CLI;
+    # "new"/"all" spam permanent lines in channels (hermes-agent#14663).
+    "slack":           {**_TIER_MEDIUM, "tool_progress": "off"},
    "mattermost":      _TIER_MEDIUM,
    "matrix":          _TIER_MEDIUM,
    "feishu":          _TIER_MEDIUM,
@@ -52,19 +52,13 @@ class HookRegistry:
        return list(self._loaded_hooks)

    def _register_builtin_hooks(self) -> None:
-        """Register built-in hooks that are always active."""
-        try:
-            from gateway.builtin_hooks.boot_md import handle as boot_md_handle
+        """Register built-in hooks that are always active.

-            self._handlers.setdefault("gateway:startup", []).append(boot_md_handle)
-            self._loaded_hooks.append({
-                "name": "boot-md",
-                "description": "Run ~/.hermes/BOOT.md on gateway startup",
-                "events": ["gateway:startup"],
-                "path": "(builtin)",
-            })
-        except Exception as e:
-            print(f"[hooks] Could not load built-in boot-md hook: {e}", flush=True)
+        Currently empty — no shipped built-in hooks. Kept as the extension
+        point for future always-on gateway hooks so they drop in without
+        re-plumbing discover_and_load().
+        """
+        return

    def discover_and_load(self) -> None:
        """
@@ -135,9 +129,22 @@ class HookRegistry:
            except Exception as e:
                print(f"[hooks] Error loading hook {hook_dir.name}: {e}", flush=True)

+    def _resolve_handlers(self, event_type: str) -> List[Callable]:
+        """Return all handlers that should fire for ``event_type``.
+
+        Exact matches fire first, followed by wildcard matches (e.g.
+        ``command:*`` matches ``command:reset``).
+        """
+        handlers = list(self._handlers.get(event_type, []))
+        if ":" in event_type:
+            base = event_type.split(":")[0]
+            wildcard_key = f"{base}:*"
+            handlers.extend(self._handlers.get(wildcard_key, []))
+        return handlers
+
    async def emit(self, event_type: str, context: Optional[Dict[str, Any]] = None) -> None:
        """
-        Fire all handlers registered for an event.
+        Fire all handlers registered for an event, discarding return values.

        Supports wildcard matching: handlers registered for "command:*" will
        fire for any "command:..." event. Handlers registered for a base type
@@ -151,16 +158,7 @@ class HookRegistry:
        if context is None:
            context = {}

-        # Collect handlers: exact match + wildcard match
-        handlers = list(self._handlers.get(event_type, []))
-
-        # Check for wildcard patterns (e.g., "command:*" matches "command:reset")
-        if ":" in event_type:
-            base = event_type.split(":")[0]
-            wildcard_key = f"{base}:*"
-            handlers.extend(self._handlers.get(wildcard_key, []))
-
-        for fn in handlers:
+        for fn in self._resolve_handlers(event_type):
            try:
                result = fn(event_type, context)
                # Support both sync and async handlers
@@ -168,3 +166,32 @@ class HookRegistry:
                    await result
            except Exception as e:
                print(f"[hooks] Error in handler for '{event_type}': {e}", flush=True)
+
+    async def emit_collect(
+        self,
+        event_type: str,
+        context: Optional[Dict[str, Any]] = None,
+    ) -> List[Any]:
+        """Fire handlers and return their non-None return values in order.
+
+        Like :meth:`emit` but captures each handler's return value. Used for
+        decision-style hooks (e.g. ``command:<name>`` policies that want to
+        allow/deny/rewrite the command before normal dispatch).
+
+        Exceptions from individual handlers are logged but do not abort the
+        remaining handlers.
+        """
+        if context is None:
+            context = {}
+
+        results: List[Any] = []
+        for fn in self._resolve_handlers(event_type):
+            try:
+                result = fn(event_type, context)
+                if asyncio.iscoroutine(result):
+                    result = await result
+                if result is not None:
+                    results.append(result)
+            except Exception as e:
+                print(f"[hooks] Error in handler for '{event_type}': {e}", flush=True)
+        return results
@@ -28,6 +28,7 @@ def mirror_to_session(
    message_text: str,
    source_label: str = "cli",
    thread_id: Optional[str] = None,
+    user_id: Optional[str] = None,
 ) -> bool:
    """
    Append a delivery-mirror message to the target session's transcript.
@@ -39,9 +40,20 @@ def mirror_to_session(
    All errors are caught -- this is never fatal.
    """
    try:
-        session_id = _find_session_id(platform, str(chat_id), thread_id=thread_id)
+        session_id = _find_session_id(
+            platform,
+            str(chat_id),
+            thread_id=thread_id,
+            user_id=user_id,
+        )
        if not session_id:
-            logger.debug("Mirror: no session found for %s:%s:%s", platform, chat_id, thread_id)
+            logger.debug(
+                "Mirror: no session found for %s:%s:%s:%s",
+                platform,
+                chat_id,
+                thread_id,
+                user_id,
+            )
            return False

        mirror_msg = {
@@ -59,17 +71,33 @@ def mirror_to_session(
        return True

    except Exception as e:
-        logger.debug("Mirror failed for %s:%s:%s: %s", platform, chat_id, thread_id, e)
+        logger.debug(
+            "Mirror failed for %s:%s:%s:%s: %s",
+            platform,
+            chat_id,
+            thread_id,
+            user_id,
+            e,
+        )
        return False


-def _find_session_id(platform: str, chat_id: str, thread_id: Optional[str] = None) -> Optional[str]:
+def _find_session_id(
+    platform: str,
+    chat_id: str,
+    thread_id: Optional[str] = None,
+    user_id: Optional[str] = None,
+) -> Optional[str]:
    """
    Find the active session_id for a platform + chat_id pair.

    Scans sessions.json entries and matches where origin.chat_id == chat_id
    on the right platform.  DM session keys don't embed the chat_id
    (e.g. "agent:main:telegram:dm"), so we check the origin dict.
+
+    When *user_id* is provided, prefer exact sender matches. If multiple
+    same-chat candidates exist and none matches the user, return None instead
+    of guessing and contaminating another participant's session.
    """
    if not _SESSIONS_INDEX.exists():
        return None
@@ -81,8 +109,7 @@ def _find_session_id(platform: str, chat_id: str, thread_id: Optional[str] = Non
        return None

    platform_lower = platform.lower()
-    best_match = None
-    best_updated = ""
+    candidates = []

    for _key, entry in data.items():
        origin = entry.get("origin") or {}
@@ -96,12 +123,31 @@ def _find_session_id(platform: str, chat_id: str, thread_id: Optional[str] = Non
            origin_thread_id = origin.get("thread_id")
            if thread_id is not None and str(origin_thread_id or "") != str(thread_id):
                continue
-            updated = entry.get("updated_at", "")
-            if updated > best_updated:
-                best_updated = updated
-                best_match = entry.get("session_id")
+            candidates.append(entry)

-    return best_match
+    if not candidates:
+        return None
+
+    if user_id:
+        exact_user_matches = [
+            entry for entry in candidates
+            if str((entry.get("origin") or {}).get("user_id") or "") == str(user_id)
+        ]
+        if exact_user_matches:
+            candidates = exact_user_matches
+        elif len(candidates) > 1:
+            return None
+    elif len(candidates) > 1:
+        distinct_user_ids = {
+            str((entry.get("origin") or {}).get("user_id") or "").strip()
+            for entry in candidates
+            if str((entry.get("origin") or {}).get("user_id") or "").strip()
+        }
+        if len(distinct_user_ids) > 1:
+            return None
+
+    best_entry = max(candidates, key=lambda entry: entry.get("updated_at", ""))
+    return best_entry.get("session_id")


 def _append_to_jsonl(session_id: str, message: dict) -> None:
@@ -28,6 +28,7 @@ from pathlib import Path
 from typing import Optional

 from hermes_constants import get_hermes_dir
+from utils import atomic_replace


 # Unambiguous alphabet -- excludes 0/O, 1/I to prevent confusion
@@ -59,7 +60,7 @@ def _secure_write(path: Path, data: str) -> None:
            f.write(data)
            f.flush()
            os.fsync(f.fileno())
-        os.replace(tmp_path, str(path))
+        atomic_replace(tmp_path, path)
        try:
            os.chmod(path, 0o600)
        except OSError:
@@ -10,10 +10,12 @@ Each adapter handles:

 from .base import BasePlatformAdapter, MessageEvent, SendResult
 from .qqbot import QQAdapter
+from .yuanbao import YuanbaoAdapter

 __all__ = [
    "BasePlatformAdapter",
    "MessageEvent",
    "SendResult",
    "QQAdapter",
+    "YuanbaoAdapter",
 ]
@@ -9,6 +9,7 @@ Exposes an HTTP server with endpoints:
 - GET  /v1/models                  — lists hermes-agent as an available model
 - POST /v1/runs                    — start a run, returns run_id immediately (202)
 - GET  /v1/runs/{run_id}/events    — SSE stream of structured lifecycle events
+- POST /v1/runs/{run_id}/stop    — interrupt a running agent
 - GET  /health                     — health check
 - GET  /health/detailed            — rich status for cross-container dashboard probing

@@ -586,6 +587,9 @@ class APIServerAdapter(BasePlatformAdapter):
        self._run_streams: Dict[str, "asyncio.Queue[Optional[Dict]]"] = {}
        # Creation timestamps for orphaned-run TTL sweep
        self._run_streams_created: Dict[str, float] = {}
+        # Active run agent/task references for stop support
+        self._active_run_agents: Dict[str, Any] = {}
+        self._active_run_tasks: Dict[str, "asyncio.Task"] = {}
        self._session_db: Optional[Any] = None  # Lazy-init SessionDB for session continuity

    @staticmethod
@@ -1204,10 +1208,12 @@ class APIServerAdapter(BasePlatformAdapter):

        If the client disconnects mid-stream, ``agent.interrupt()`` is
        called so the agent stops issuing upstream LLM calls, then the
-        asyncio task is cancelled.  When ``store=True`` the full response
-        is persisted to the ResponseStore in a ``finally`` block so GET
-        /v1/responses/{id} and ``previous_response_id`` chaining work the
-        same as the batch path.
+        asyncio task is cancelled.  When ``store=True`` an initial
+        ``in_progress`` snapshot is persisted immediately after
+        ``response.created`` and disconnects update it to an
+        ``incomplete`` snapshot so GET /v1/responses/{id} and
+        ``previous_response_id`` chaining still have something to
+        recover from.
        """
        import queue as _q

@@ -1269,6 +1275,60 @@ class APIServerAdapter(BasePlatformAdapter):
        final_response_text = ""
        agent_error: Optional[str] = None
        usage: Dict[str, int] = {"input_tokens": 0, "output_tokens": 0, "total_tokens": 0}
+        terminal_snapshot_persisted = False
+
+        def _persist_response_snapshot(
+            response_env: Dict[str, Any],
+            *,
+            conversation_history_snapshot: Optional[List[Dict[str, Any]]] = None,
+        ) -> None:
+            if not store:
+                return
+            if conversation_history_snapshot is None:
+                conversation_history_snapshot = list(conversation_history)
+                conversation_history_snapshot.append({"role": "user", "content": user_message})
+            self._response_store.put(response_id, {
+                "response": response_env,
+                "conversation_history": conversation_history_snapshot,
+                "instructions": instructions,
+                "session_id": session_id,
+            })
+            if conversation:
+                self._response_store.set_conversation(conversation, response_id)
+
+        def _persist_incomplete_if_needed() -> None:
+            """Persist an ``incomplete`` snapshot if no terminal one was written.
+
+            Called from both the client-disconnect (``ConnectionResetError``)
+            and server-cancellation (``asyncio.CancelledError``) paths so
+            GET /v1/responses/{id} and ``previous_response_id`` chaining keep
+            working after abrupt stream termination.
+            """
+            if not store or terminal_snapshot_persisted:
+                return
+            incomplete_text = "".join(final_text_parts) or final_response_text
+            incomplete_items: List[Dict[str, Any]] = list(emitted_items)
+            if incomplete_text:
+                incomplete_items.append({
+                    "type": "message",
+                    "role": "assistant",
+                    "content": [{"type": "output_text", "text": incomplete_text}],
+                })
+            incomplete_env = _envelope("incomplete")
+            incomplete_env["output"] = incomplete_items
+            incomplete_env["usage"] = {
+                "input_tokens": usage.get("input_tokens", 0),
+                "output_tokens": usage.get("output_tokens", 0),
+                "total_tokens": usage.get("total_tokens", 0),
+            }
+            incomplete_history = list(conversation_history)
+            incomplete_history.append({"role": "user", "content": user_message})
+            if incomplete_text:
+                incomplete_history.append({"role": "assistant", "content": incomplete_text})
+            _persist_response_snapshot(
+                incomplete_env,
+                conversation_history_snapshot=incomplete_history,
+            )

        try:
            # response.created — initial envelope, status=in_progress
@@ -1278,6 +1338,7 @@ class APIServerAdapter(BasePlatformAdapter):
                "type": "response.created",
                "response": created_env,
            })
+            _persist_response_snapshot(created_env)
            last_activity = time.monotonic()

            async def _open_message_item() -> None:
@@ -1534,6 +1595,18 @@ class APIServerAdapter(BasePlatformAdapter):
                    "output_tokens": usage.get("output_tokens", 0),
                    "total_tokens": usage.get("total_tokens", 0),
                }
+                _failed_history = list(conversation_history)
+                _failed_history.append({"role": "user", "content": user_message})
+                if final_response_text or agent_error:
+                    _failed_history.append({
+                        "role": "assistant",
+                        "content": final_response_text or agent_error,
+                    })
+                _persist_response_snapshot(
+                    failed_env,
+                    conversation_history_snapshot=_failed_history,
+                )
+                terminal_snapshot_persisted = True
                await _write_event("response.failed", {
                    "type": "response.failed",
                    "response": failed_env,
@@ -1546,30 +1619,24 @@ class APIServerAdapter(BasePlatformAdapter):
                    "output_tokens": usage.get("output_tokens", 0),
                    "total_tokens": usage.get("total_tokens", 0),
                }
+                full_history = list(conversation_history)
+                full_history.append({"role": "user", "content": user_message})
+                if isinstance(result, dict) and result.get("messages"):
+                    full_history.extend(result["messages"])
+                else:
+                    full_history.append({"role": "assistant", "content": final_response_text})
+                _persist_response_snapshot(
+                    completed_env,
+                    conversation_history_snapshot=full_history,
+                )
+                terminal_snapshot_persisted = True
                await _write_event("response.completed", {
                    "type": "response.completed",
                    "response": completed_env,
                })

-                # Persist for future chaining / GET retrieval, mirroring
-                # the batch path behavior.
-                if store:
-                    full_history = list(conversation_history)
-                    full_history.append({"role": "user", "content": user_message})
-                    if isinstance(result, dict) and result.get("messages"):
-                        full_history.extend(result["messages"])
-                    else:
-                        full_history.append({"role": "assistant", "content": final_response_text})
-                    self._response_store.put(response_id, {
-                        "response": completed_env,
-                        "conversation_history": full_history,
-                        "instructions": instructions,
-                        "session_id": session_id,
-                    })
-                    if conversation:
-                        self._response_store.set_conversation(conversation, response_id)
-
        except (ConnectionResetError, ConnectionAbortedError, BrokenPipeError, OSError):
+            _persist_incomplete_if_needed()
            # Client disconnected — interrupt the agent so it stops
            # making upstream LLM calls, then cancel the task.
            agent = agent_ref[0] if agent_ref else None
@@ -1585,6 +1652,22 @@ class APIServerAdapter(BasePlatformAdapter):
                except (asyncio.CancelledError, Exception):
                    pass
            logger.info("SSE client disconnected; interrupted agent task %s", response_id)
+        except asyncio.CancelledError:
+            # Server-side cancellation (e.g. shutdown, request timeout) —
+            # persist an incomplete snapshot so GET /v1/responses/{id} and
+            # previous_response_id chaining still work, then re-raise so the
+            # runtime's cancellation semantics are respected.
+            _persist_incomplete_if_needed()
+            agent = agent_ref[0] if agent_ref else None
+            if agent is not None:
+                try:
+                    agent.interrupt("SSE task cancelled")
+                except Exception:
+                    pass
+            if not agent_task.done():
+                agent_task.cancel()
+            logger.info("SSE task cancelled; persisted incomplete snapshot for %s", response_id)
+            raise

        return response

@@ -2362,6 +2445,7 @@ class APIServerAdapter(BasePlatformAdapter):
                    stream_delta_callback=_text_cb,
                    tool_progress_callback=event_cb,
                )
+                self._active_run_agents[run_id] = agent
                def _run_sync():
                    r = agent.run_conversation(
                        user_message=user_message,
@@ -2401,8 +2485,11 @@ class APIServerAdapter(BasePlatformAdapter):
                    q.put_nowait(None)
                except Exception:
                    pass
+                self._active_run_agents.pop(run_id, None)
+                self._active_run_tasks.pop(run_id, None)

        task = asyncio.create_task(_run_and_close())
+        self._active_run_tasks[run_id] = task
        try:
            self._background_tasks.add(task)
        except TypeError:
@@ -2461,6 +2548,44 @@ class APIServerAdapter(BasePlatformAdapter):

        return response

+    async def _handle_stop_run(self, request: "web.Request") -> "web.Response":
+        """POST /v1/runs/{run_id}/stop — interrupt a running agent."""
+        auth_err = self._check_auth(request)
+        if auth_err:
+            return auth_err
+
+        run_id = request.match_info["run_id"]
+        agent = self._active_run_agents.get(run_id)
+        task = self._active_run_tasks.get(run_id)
+
+        if agent is None and task is None:
+            return web.json_response(_openai_error(f"Run not found: {run_id}", code="run_not_found"), status=404)
+
+        if agent is not None:
+            try:
+                agent.interrupt("Stop requested via API")
+            except Exception:
+                pass
+
+        if task is not None and not task.done():
+            task.cancel()
+            # Bounded wait: run_conversation() executes in the default
+            # executor thread which task.cancel() cannot preempt — we rely on
+            # agent.interrupt() above to break the loop. Cap the wait so a
+            # slow/unresponsive interrupt can't hang this handler.
+            try:
+                await asyncio.wait_for(asyncio.shield(task), timeout=5.0)
+            except asyncio.TimeoutError:
+                logger.warning(
+                    "[api_server] stop for run %s timed out after 5s; "
+                    "agent may still be finishing the current step",
+                    run_id,
+                )
+            except (asyncio.CancelledError, Exception):
+                pass
+
+        return web.json_response({"run_id": run_id, "status": "stopping"})
+
    async def _sweep_orphaned_runs(self) -> None:
        """Periodically clean up run streams that were never consumed."""
        while True:
@@ -2475,6 +2600,8 @@ class APIServerAdapter(BasePlatformAdapter):
                logger.debug("[api_server] sweeping orphaned run %s", run_id)
                self._run_streams.pop(run_id, None)
                self._run_streams_created.pop(run_id, None)
+                self._active_run_agents.pop(run_id, None)
+                self._active_run_tasks.pop(run_id, None)

    # ------------------------------------------------------------------
    # BasePlatformAdapter interface
@@ -2510,6 +2637,7 @@ class APIServerAdapter(BasePlatformAdapter):
            # Structured event streaming
            self._app.router.add_post("/v1/runs", self._handle_runs)
            self._app.router.add_get("/v1/runs/{run_id}/events", self._handle_run_events)
+            self._app.router.add_post("/v1/runs/{run_id}/stop", self._handle_stop_run)
            # Start background sweep to clean up orphaned (unconsumed) run streams
            sweep_task = asyncio.create_task(self._sweep_orphaned_runs())
            try:
@@ -148,7 +148,102 @@ def _detect_macos_system_proxy() -> str | None:
    return None


-def resolve_proxy_url(platform_env_var: str | None = None) -> str | None:
+def _split_host_port(value: str) -> tuple[str, int | None]:
+    raw = str(value or "").strip()
+    if not raw:
+        return "", None
+    if "://" in raw:
+        parsed = urlsplit(raw)
+        return (parsed.hostname or "").lower().rstrip("."), parsed.port
+    if raw.startswith("[") and "]" in raw:
+        host, _, rest = raw[1:].partition("]")
+        port = None
+        if rest.startswith(":") and rest[1:].isdigit():
+            port = int(rest[1:])
+        return host.lower().rstrip("."), port
+    if raw.count(":") == 1:
+        host, _, maybe_port = raw.rpartition(":")
+        if maybe_port.isdigit():
+            return host.lower().rstrip("."), int(maybe_port)
+    return raw.lower().strip("[]").rstrip("."), None
+
+
+def _no_proxy_entries() -> list[str]:
+    entries: list[str] = []
+    for key in ("NO_PROXY", "no_proxy"):
+        raw = os.environ.get(key, "")
+        entries.extend(part.strip() for part in raw.split(",") if part.strip())
+    return entries
+
+
+def _no_proxy_entry_matches(entry: str, host: str, port: int | None = None) -> bool:
+    token = str(entry or "").strip().lower()
+    if not token:
+        return False
+    if token == "*":
+        return True
+
+    token_host, token_port = _split_host_port(token)
+    if token_port is not None and port is not None and token_port != port:
+        return False
+    if token_port is not None and port is None:
+        return False
+    if not token_host:
+        return False
+
+    try:
+        network = ipaddress.ip_network(token_host, strict=False)
+        try:
+            return ipaddress.ip_address(host) in network
+        except ValueError:
+            return False
+    except ValueError:
+        pass
+
+    try:
+        token_ip = ipaddress.ip_address(token_host)
+        try:
+            return ipaddress.ip_address(host) == token_ip
+        except ValueError:
+            return False
+    except ValueError:
+        pass
+
+    if token_host.startswith("*."):
+        suffix = token_host[1:]
+        return host.endswith(suffix)
+    if token_host.startswith("."):
+        return host == token_host[1:] or host.endswith(token_host)
+    return host == token_host or host.endswith(f".{token_host}")
+
+
+def should_bypass_proxy(target_hosts: str | list[str] | tuple[str, ...] | set[str] | None) -> bool:
+    """Return True when NO_PROXY/no_proxy matches at least one target host.
+
+    Supports exact hosts, domain suffixes, wildcard suffixes, IP literals,
+    CIDR ranges, optional host:port entries, and ``*``.
+    """
+    entries = _no_proxy_entries()
+    if not entries or not target_hosts:
+        return False
+    if isinstance(target_hosts, str):
+        candidates = [target_hosts]
+    else:
+        candidates = list(target_hosts)
+    for candidate in candidates:
+        host, port = _split_host_port(str(candidate))
+        if not host:
+            continue
+        if any(_no_proxy_entry_matches(entry, host, port) for entry in entries):
+            return True
+    return False
+
+
+def resolve_proxy_url(
+    platform_env_var: str | None = None,
+    *,
+    target_hosts: str | list[str] | tuple[str, ...] | set[str] | None = None,
+) -> str | None:
    """Return a proxy URL from env vars, or macOS system proxy.

    Check order:
@@ -156,18 +251,26 @@ def resolve_proxy_url(platform_env_var: str | None = None) -> str | None:
      1. HTTPS_PROXY / HTTP_PROXY / ALL_PROXY (and lowercase variants)
      2. macOS system proxy via ``scutil --proxy`` (auto-detect)

-    Returns *None* if no proxy is found.
+    Returns *None* if no proxy is found, or if NO_PROXY/no_proxy matches one
+    of ``target_hosts``.
    """
    if platform_env_var:
        value = (os.environ.get(platform_env_var) or "").strip()
        if value:
+            if should_bypass_proxy(target_hosts):
+                return None
            return normalize_proxy_url(value)
    for key in ("HTTPS_PROXY", "HTTP_PROXY", "ALL_PROXY",
                "https_proxy", "http_proxy", "all_proxy"):
        value = (os.environ.get(key) or "").strip()
        if value:
+            if should_bypass_proxy(target_hosts):
+                return None
            return normalize_proxy_url(value)
-    return normalize_proxy_url(_detect_macos_system_proxy())
+    detected = normalize_proxy_url(_detect_macos_system_proxy())
+    if detected and should_bypass_proxy(target_hosts):
+        return None
+    return detected


 def proxy_kwargs_for_bot(proxy_url: str | None) -> dict:
@@ -204,9 +307,14 @@ def proxy_kwargs_for_aiohttp(proxy_url: str | None) -> tuple[dict, dict]:
    """Build kwargs for standalone ``aiohttp.ClientSession`` with proxy.

    Returns ``(session_kwargs, request_kwargs)`` where:
-      - SOCKS → ``({"connector": ProxyConnector(...)}, {})``
-      - HTTP  → ``({}, {"proxy": url})``
-      - None  → ``({}, {})``
+      - With aiohttp-socks → ``({"connector": ProxyConnector(...)}, {})``
+        for *all* proxy schemes (SOCKS **and** HTTP/HTTPS).
+      - HTTP without aiohttp-socks → ``({}, {"proxy": url})``.
+      - None → ``({}, {})``.
+
+    Prefer the connector path: it works transparently with libraries
+    (like mautrix) that call ``session.request()`` without forwarding
+    per-request ``proxy=`` kwargs.

    Usage::

@@ -217,20 +325,53 @@ def proxy_kwargs_for_aiohttp(proxy_url: str | None) -> tuple[dict, dict]:
    """
    if not proxy_url:
        return {}, {}
-    if proxy_url.lower().startswith("socks"):
-        try:
-            from aiohttp_socks import ProxyConnector
+    try:
+        from aiohttp_socks import ProxyConnector

-            connector = ProxyConnector.from_url(proxy_url, rdns=True)
-            return {"connector": connector}, {}
-        except ImportError:
+        connector = ProxyConnector.from_url(proxy_url, rdns=True)
+        return {"connector": connector}, {}
+    except ImportError:
+        if proxy_url.lower().startswith("socks"):
            logger.warning(
                "aiohttp_socks not installed — SOCKS proxy %s ignored. "
                "Run: pip install aiohttp-socks",
                proxy_url,
            )
            return {}, {}
-    return {}, {"proxy": proxy_url}
+        return {}, {"proxy": proxy_url}
+
+
+def is_host_excluded_by_no_proxy(hostname: str, no_proxy_value: str | None = None) -> bool:
+    """Return True when ``hostname`` matches a ``NO_PROXY`` entry.
+
+    Supports comma- or whitespace-separated entries with optional leading dots
+    and ``*.`` wildcards, which match both the apex domain and subdomains.
+    """
+    raw = no_proxy_value
+    if raw is None:
+        raw = os.environ.get("NO_PROXY") or os.environ.get("no_proxy") or ""
+
+    raw = raw.strip()
+    if not raw:
+        return False
+
+    lower_hostname = hostname.lower()
+    for entry in re.split(r"[\s,]+", raw):
+        normalized = entry.strip().lower()
+        if not normalized:
+            continue
+        if normalized == "*":
+            return True
+
+        if normalized.startswith("*."):
+            normalized = normalized[2:]
+        elif normalized.startswith("."):
+            normalized = normalized[1:]
+
+        if lower_hostname == normalized or lower_hostname.endswith(f".{normalized}"):
+            return True
+
+    return False


 from dataclasses import dataclass, field
@@ -590,7 +731,15 @@ SUPPORTED_DOCUMENT_TYPES = {
    ".pdf": "application/pdf",
    ".md": "text/markdown",
    ".txt": "text/plain",
+    ".csv": "text/csv",
    ".log": "text/plain",
+    ".json": "application/json",
+    ".xml": "application/xml",
+    ".yaml": "application/yaml",
+    ".yml": "application/yaml",
+    ".toml": "application/toml",
+    ".ini": "text/plain",
+    ".cfg": "text/plain",
    ".zip": "application/zip",
    ".docx": "application/vnd.openxmlformats-officedocument.wordprocessingml.document",
    ".xlsx": "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet",
@@ -752,7 +901,45 @@ class MessageEvent:
        if not self.is_command():
            return self.text
        parts = self.text.split(maxsplit=1)
-        return parts[1] if len(parts) > 1 else ""
+        args = parts[1] if len(parts) > 1 else ""
+        # iOS auto-corrects -- to — (em dash) and - to – (en dash)
+        args = args.replace("\u2014\u2014", "--").replace("\u2014", "--").replace("\u2013", "-")
+        return args
+
+
+_PLAINTEXT_GATEWAY_RESTART_PATTERNS: tuple[re.Pattern[str], ...] = (
+    re.compile(r"^(?:please\s+)?restart\s+(?:the\s+)?gateway[.!?\s]*$", re.IGNORECASE),
+    re.compile(r"^(?:please\s+)?restart\s+(?:the\s+)?hermes\s+gateway[.!?\s]*$", re.IGNORECASE),
+    re.compile(r"^(?:please\s+)?restart\s+hermes[.!?\s]*$", re.IGNORECASE),
+)
+
+
+def coerce_plaintext_gateway_command(event: "MessageEvent") -> None:
+    """Rewrite a tiny set of DM plaintext admin phrases into slash commands.
+
+    This keeps high-impact operational phrases like ``restart gateway`` out of
+    the LLM/tool path, where they can trigger a self-restart from inside the
+    currently running agent and leave the gateway stuck in ``draining`` while it
+    waits for that same agent to finish.
+
+    Scope is intentionally narrow: DM text messages only, exact restart-style
+    phrases only. Group chats keep natural-language semantics.
+    """
+    try:
+        if event is None or event.message_type != MessageType.TEXT:
+            return
+        text = (event.text or "").strip()
+        if not text or text.startswith("/"):
+            return
+        source = getattr(event, "source", None)
+        if getattr(source, "chat_type", None) != "dm":
+            return
+        for pattern in _PLAINTEXT_GATEWAY_RESTART_PATTERNS:
+            if pattern.match(text):
+                event.text = "/restart"
+                return
+    except Exception:
+        return


@dataclass 
@@ -876,6 +1063,61 @@ def resolve_channel_prompt(
    return None


+def resolve_channel_skills(
+    config_extra: dict,
+    channel_id: str,
+    parent_id: str | None = None,
+) -> list[str] | None:
+    """Resolve auto-loaded skill(s) for a channel/thread from platform config.
+
+    Looks up ``channel_skill_bindings`` in the adapter's ``config.extra`` dict.
+
+    Config format::
+
+        channel_skill_bindings:
+          - id: "C0123"          # Slack channel ID or Discord channel/forum ID
+            skills: ["skill-a", "skill-b"]
+          - id: "D0ABCDE"
+            skill: "solo-skill"  # single string also accepted
+
+    Prefers an exact match on *channel_id*; falls back to *parent_id*
+    (useful for forum threads / Slack threads inheriting the parent channel's
+    binding).
+
+    Returns a deduplicated list of skill names (order preserved), or None if
+    no match is found.
+    """
+    bindings = config_extra.get("channel_skill_bindings") or []
+    if not isinstance(bindings, list) or not bindings:
+        return None
+    ids_to_check: set[str] = set()
+    if channel_id:
+        ids_to_check.add(str(channel_id))
+    if parent_id:
+        ids_to_check.add(str(parent_id))
+    if not ids_to_check:
+        return None
+    for entry in bindings:
+        if not isinstance(entry, dict):
+            continue
+        entry_id = str(entry.get("id", ""))
+        if entry_id in ids_to_check:
+            skills = entry.get("skills") or entry.get("skill")
+            if isinstance(skills, str):
+                s = skills.strip()
+                return [s] if s else None
+            if isinstance(skills, list) and skills:
+                seen: list[str] = []
+                for name in skills:
+                    if not isinstance(name, str):
+                        continue
+                    nm = name.strip()
+                    if nm and nm not in seen:
+                        seen.append(nm)
+                return seen or None
+    return None
+
+
 class BasePlatformAdapter(ABC):
    """
    Base class for platform adapters.
@@ -897,10 +1139,16 @@ class BasePlatformAdapter(ABC):
        self._fatal_error_retryable = True
        self._fatal_error_handler: Optional[Callable[["BasePlatformAdapter"], Awaitable[None] | None]] = None
        
-        # Track active message handlers per session for interrupt support
-        # Key: session_key (e.g., chat_id), Value: (event, asyncio.Event for interrupt)
+        # Track active message handlers per session for interrupt support.
+        # _active_sessions stores the per-session interrupt Event; _session_tasks
+        # maps session → the specific Task currently processing it so that
+        # session-terminating commands (/stop, /new, /reset) can cancel the
+        # right task and release the adapter-level guard deterministically.
+        # Without the owner-task map, an old task's finally block could delete
+        # a newer task's guard, leaving stale busy state.
        self._active_sessions: Dict[str, asyncio.Event] = {}
        self._pending_messages: Dict[str, MessageEvent] = {}
+        self._session_tasks: Dict[str, asyncio.Task] = {}
        # Background message-processing tasks spawned by handle_message().
        # Gateway shutdown cancels these so an old gateway instance doesn't keep
        # working on a task after --replace or manual restarts.
@@ -913,7 +1161,20 @@ class BasePlatformAdapter(ABC):
        self._post_delivery_callbacks: Dict[str, Any] = {}
        self._expected_cancelled_tasks: set[asyncio.Task] = set()
        self._busy_session_handler: Optional[Callable[[MessageEvent, str], Awaitable[bool]]] = None
-        # Chats where auto-TTS on voice input is disabled (set by /voice off)
+        # Auto-TTS on voice input: ``_auto_tts_default`` is the global default
+        # (``voice.auto_tts`` in config.yaml, pushed by GatewayRunner on connect).
+        # Per-chat overrides live in two sets populated from ``_voice_mode``:
+        #   - ``_auto_tts_enabled_chats``: chat explicitly opted in via ``/voice on``
+        #     or ``/voice tts`` (mode is ``voice_only`` or ``all``). Fires even when
+        #     the global default is False.
+        #   - ``_auto_tts_disabled_chats``: chat explicitly opted out via
+        #     ``/voice off`` (mode is ``off``). Suppresses auto-TTS even when the
+        #     global default is True.
+        # The gate in _process_message() is:
+        #   fire if chat in _auto_tts_enabled_chats
+        #     OR (_auto_tts_default and chat not in _auto_tts_disabled_chats)
+        self._auto_tts_default: bool = False
+        self._auto_tts_enabled_chats: set = set()
        self._auto_tts_disabled_chats: set = set()
        # Chats where typing indicator is paused (e.g. during approval waits).
        # _keep_typing skips send_typing when the chat_id is in this set.
@@ -935,6 +1196,21 @@ class BasePlatformAdapter(ABC):
    def fatal_error_retryable(self) -> bool:
        return self._fatal_error_retryable

+    def _should_auto_tts_for_chat(self, chat_id: str) -> bool:
+        """Whether auto-TTS on voice input should fire for ``chat_id``.
+
+        Decision layers (Issue #16007):
+          1. Explicit ``/voice on`` or ``/voice tts`` → always fire (even if
+             ``voice.auto_tts`` is False).
+          2. Explicit ``/voice off`` → never fire.
+          3. Fall back to the global ``voice.auto_tts`` config default.
+        """
+        if chat_id in self._auto_tts_enabled_chats:
+            return True
+        if chat_id in self._auto_tts_disabled_chats:
+            return False
+        return bool(self._auto_tts_default)
+
    def set_fatal_error_handler(self, handler: Callable[["BasePlatformAdapter"], Awaitable[None] | None]) -> None:
        self._fatal_error_handler = handler

@@ -1118,6 +1394,27 @@ class BasePlatformAdapter(ABC):
        """
        return SendResult(success=False, error="Not supported")

+    async def delete_message(
+        self,
+        chat_id: str,
+        message_id: str,
+    ) -> bool:
+        """
+        Delete a previously sent message.  Optional — platforms that don't
+        support deletion return ``False`` and callers fall back to leaving
+        the message in place.
+
+        Used by the stream consumer's fresh-final cleanup path (see
+        openclaw/openclaw#72038) to remove long-lived preview messages
+        after sending the completed reply as a fresh message so the
+        platform's visible timestamp reflects completion time.
+
+        Returns ``True`` on successful deletion, ``False`` otherwise.
+        Subclasses should override for platforms with a deletion API
+        (e.g. Telegram ``deleteMessage``).
+        """
+        return False
+
    async def send_typing(self, chat_id: str, metadata=None) -> None:
        """
        Send a typing indicator.
@@ -1343,7 +1640,7 @@ class BasePlatformAdapter(ABC):
        # Extract MEDIA:<path> tags, allowing optional whitespace after the colon
        # and quoted/backticked paths for LLM-formatted outputs.
        media_pattern = re.compile(
-            r'''[`"']?MEDIA:\s*(?P<path>`[^`\n]+`|"[^"\n]+"|'[^'\n]+'|(?:~/|/)\S+(?:[^\S\n]+\S+)*?\.(?:png|jpe?g|gif|webp|mp4|mov|avi|mkv|webm|ogg|opus|mp3|wav|m4a|pdf)(?=[\s`"',;:)\]}]|$)|\S+)[`"']?'''
+            r'''[`"']?MEDIA:\s*(?P<path>`[^`\n]+`|"[^"\n]+"|'[^'\n]+'|(?:~/|/)\S+(?:[^\S\n]+\S+)*?\.(?:png|jpe?g|gif|webp|mp4|mov|avi|mkv|webm|ogg|opus|mp3|wav|m4a|epub|pdf|zip|rar|7z|docx?|xlsx?|pptx?|txt|csv|apk|ipa)(?=[\s`"',;:)\]}]|$)|\S+)[`"']?'''
        )
        for match in media_pattern.finditer(content):
            path = match.group("path").strip()
@@ -1445,13 +1742,41 @@ class BasePlatformAdapter(ABC):
        the agent is waiting for dangerous-command approval).  This is critical
        for Slack's Assistant API where ``assistant_threads_setStatus`` disables
        the compose box — pausing lets the user type ``/approve`` or ``/deny``.
+
+        Each ``send_typing`` call is bounded by a ~1.5s timeout so a slow
+        network round-trip can't stall the refresh cadence.  Telegram- and
+        Discord-side typing expire after ~5s; if any individual send_typing
+        takes longer than the refresh interval, the bubble would die and
+        stay dead until that call returns.  Abandoning the slow call lets
+        the next tick fire a fresh send_typing on schedule — as long as
+        one of them succeeds within the 5s platform-side window, the bubble
+        stays visible across provider stalls / upstream API timeouts.
        """
+        # Bound each send_typing round-trip so the refresh cadence isn't
+        # gated on network health.  Must stay below ``interval`` so a slow
+        # call gets abandoned before the next scheduled tick.
+        _send_typing_timeout = max(0.25, min(1.5, interval - 0.25))
        try:
            while True:
                if stop_event is not None and stop_event.is_set():
                    return
                if chat_id not in self._typing_paused:
-                    await self.send_typing(chat_id, metadata=metadata)
+                    try:
+                        await asyncio.wait_for(
+                            self.send_typing(chat_id, metadata=metadata),
+                            timeout=_send_typing_timeout,
+                        )
+                    except asyncio.TimeoutError:
+                        # Slow network — abandon this tick, keep the loop
+                        # on schedule so the next send_typing fires fresh.
+                        pass
+                    except asyncio.CancelledError:
+                        raise
+                    except Exception as typing_err:
+                        logger.debug(
+                            "[%s] send_typing error (non-fatal): %s",
+                            self.name, typing_err,
+                        )
                if stop_event is None:
                    await asyncio.sleep(interval)
                    continue
@@ -1677,6 +2002,222 @@ class BasePlatformAdapter(ABC):
            return f"{existing_text}\n\n{new_text}".strip()
        return existing_text

+    # ------------------------------------------------------------------
+    # Session task + guard ownership helpers
+    # ------------------------------------------------------------------
+    # These were introduced together with the _session_tasks owner map to
+    # make session lifecycle reconciliation deterministic across (a) the
+    # normal completion path, (b) /stop/ /new/ /reset bypass commands,
+    # and (c) stale-lock self-heal on the next inbound message.
+
+    def _release_session_guard(
+        self,
+        session_key: str,
+        *,
+        guard: Optional[asyncio.Event] = None,
+    ) -> None:
+        """Release the adapter-level guard for a session.
+
+        When ``guard`` is provided, only release the entry if it still points
+        at that exact Event.  This lets reset-like commands swap in a temporary
+        guard while the old processing task unwinds, without having the old
+        task's cleanup accidentally clear the replacement guard.
+        """
+        current_guard = self._active_sessions.get(session_key)
+        if current_guard is None:
+            return
+        if guard is not None and current_guard is not guard:
+            return
+        del self._active_sessions[session_key]
+
+    def _session_task_is_stale(self, session_key: str) -> bool:
+        """Return True if the owner task for ``session_key`` is done/cancelled.
+
+        A lock is "stale" when the adapter still has ``_active_sessions[key]``
+        AND a known owner task in ``_session_tasks`` that has already exited.
+        When there is no owner task at all, that usually means the guard was
+        installed by some path other than handle_message() (tests sometimes
+        install guards directly) — don't treat that as stale.  The on-entry
+        self-heal only needs to handle the production split-brain case where
+        an owner task was recorded, then exited without clearing its guard.
+        """
+        task = self._session_tasks.get(session_key)
+        if task is None:
+            return False
+        done = getattr(task, "done", None)
+        return bool(done and done())
+
+    def _heal_stale_session_lock(self, session_key: str) -> bool:
+        """Clear a stale session lock if the owner task is already gone.
+
+        Returns True if a stale lock was healed.  Returns False if there is
+        no lock, or the owner task is still alive (the normal busy case).
+
+        This is the on-entry safety net sidbin's issue #11016 analysis calls
+        for: without it, a split-brain — adapter still thinks the session is
+        active, but nothing is actually processing — traps the chat in
+        infinite "Interrupting current task..." until the gateway is
+        restarted.
+        """
+        if session_key not in self._active_sessions:
+            return False
+        if not self._session_task_is_stale(session_key):
+            return False
+        logger.warning(
+            "[%s] Healing stale session lock for %s (owner task is done/absent)",
+            self.name,
+            session_key,
+        )
+        self._active_sessions.pop(session_key, None)
+        self._pending_messages.pop(session_key, None)
+        self._session_tasks.pop(session_key, None)
+        return True
+
+    def _start_session_processing(
+        self,
+        event: MessageEvent,
+        session_key: str,
+        *,
+        interrupt_event: Optional[asyncio.Event] = None,
+    ) -> bool:
+        """Spawn a background processing task under the given session guard.
+
+        Returns True on success.  If the runtime stubs ``create_task`` with a
+        non-Task sentinel (some tests do this), the guard is rolled back and
+        False is returned so the caller isn't left holding a half-installed
+        session lock.
+        """
+        guard = interrupt_event or asyncio.Event()
+        self._active_sessions[session_key] = guard
+
+        task = asyncio.create_task(self._process_message_background(event, session_key))
+        self._session_tasks[session_key] = task
+        try:
+            self._background_tasks.add(task)
+        except TypeError:
+            # Tests stub create_task() with lightweight sentinels that are not
+            # hashable and do not support lifecycle callbacks.
+            self._session_tasks.pop(session_key, None)
+            self._release_session_guard(session_key, guard=guard)
+            return False
+        if hasattr(task, "add_done_callback"):
+            task.add_done_callback(self._background_tasks.discard)
+            task.add_done_callback(self._expected_cancelled_tasks.discard)
+        return True
+
+    async def cancel_session_processing(
+        self,
+        session_key: str,
+        *,
+        release_guard: bool = True,
+        discard_pending: bool = True,
+    ) -> None:
+        """Cancel in-flight processing for a single session.
+
+        ``release_guard=False`` keeps the adapter-level session guard in place
+        so reset-like commands can finish atomically before follow-up messages
+        are allowed to start a fresh background task.
+        """
+        task = self._session_tasks.pop(session_key, None)
+        if task is not None and not task.done():
+            logger.debug(
+                "[%s] Cancelling active processing for session %s",
+                self.name,
+                session_key,
+            )
+            self._expected_cancelled_tasks.add(task)
+            task.cancel()
+            try:
+                await task
+            except asyncio.CancelledError:
+                pass
+            except Exception:
+                logger.debug(
+                    "[%s] Session cancellation raised while unwinding %s",
+                    self.name,
+                    session_key,
+                    exc_info=True,
+                )
+        if discard_pending:
+            self._pending_messages.pop(session_key, None)
+        if release_guard:
+            self._release_session_guard(session_key)
+
+    async def _drain_pending_after_session_command(
+        self,
+        session_key: str,
+        command_guard: asyncio.Event,
+    ) -> None:
+        """Resume the latest queued follow-up once a session command completes.
+
+        Called at the tail of /stop, /new, and /reset dispatch.  Releases the
+        command-scoped guard, then — if a follow-up message landed while the
+        command was running — spawns a fresh processing task for it.
+        """
+        pending_event = self._pending_messages.pop(session_key, None)
+        self._release_session_guard(session_key, guard=command_guard)
+        if pending_event is None:
+            return
+        self._start_session_processing(pending_event, session_key)
+
+    async def _dispatch_active_session_command(
+        self,
+        event: MessageEvent,
+        session_key: str,
+        cmd: str,
+    ) -> None:
+        """Dispatch a reset-like bypass command while preserving guard ordering.
+
+        /stop, /new, and /reset must:
+          1. Keep the session guard installed while the runner processes the
+             command (so a racing follow-up message stays queued, not
+             dispatched as a second parallel run).
+          2. Cancel the old in-flight adapter task only AFTER the runner has
+             finished handling the command (so the runner sees consistent
+             state and its response is sent in order).
+          3. Release the command-scoped guard and drain the latest queued
+             follow-up exactly once, after 1 and 2 complete.
+        """
+        logger.debug(
+            "[%s] Command '/%s' bypassing active-session guard for %s",
+            self.name,
+            cmd,
+            session_key,
+        )
+
+        current_guard = self._active_sessions.get(session_key)
+        command_guard = asyncio.Event()
+        self._active_sessions[session_key] = command_guard
+        thread_meta = {"thread_id": event.source.thread_id} if event.source.thread_id else None
+
+        try:
+            response = await self._message_handler(event)
+            # Old adapter task (if any) is cancelled AFTER the runner has
+            # fully handled the command — keeps ordering deterministic.
+            await self.cancel_session_processing(
+                session_key,
+                release_guard=False,
+                discard_pending=False,
+            )
+            if response:
+                await self._send_with_retry(
+                    chat_id=event.source.chat_id,
+                    content=response,
+                    reply_to=event.message_id,
+                    metadata=thread_meta,
+                )
+        except Exception:
+            # On failure, restore the original guard if one still exists so
+            # we don't leave the session in a half-reset state.
+            if self._active_sessions.get(session_key) is command_guard:
+                if session_key in self._session_tasks and current_guard is not None:
+                    self._active_sessions[session_key] = current_guard
+                else:
+                    self._release_session_guard(session_key, guard=command_guard)
+            raise
+
+        await self._drain_pending_after_session_command(session_key, command_guard)
+
    async def handle_message(self, event: MessageEvent) -> None:
        """
        Process an incoming message.
@@ -1687,13 +2228,23 @@ class BasePlatformAdapter(ABC):
        """
        if not self._message_handler:
            return
+
+        coerce_plaintext_gateway_command(event)
        
        session_key = build_session_key(
            event.source,
            group_sessions_per_user=self.config.extra.get("group_sessions_per_user", True),
            thread_sessions_per_user=self.config.extra.get("thread_sessions_per_user", False),
        )
-        
+
+        # On-entry self-heal: if the adapter still has an _active_sessions
+        # entry for this key but the owner task has already exited (done or
+        # cancelled), the lock is stale.  Clear it and fall through to
+        # normal dispatch so the user isn't trapped behind a dead guard —
+        # this is the split-brain tail described in issue #11016.
+        if session_key in self._active_sessions:
+            self._heal_stale_session_lock(session_key)
+
        # Check if there's already an active handler for this session
        if session_key in self._active_sessions:
            # Certain commands must bypass the active-session guard and be
@@ -1710,6 +2261,23 @@ class BasePlatformAdapter(ABC):
            from hermes_cli.commands import should_bypass_active_session

            if should_bypass_active_session(cmd):
+                # /stop, /new, /reset must cancel the in-flight adapter task
+                # and preserve ordering of queued follow-ups.  Route those
+                # through the dedicated handoff path that serializes
+                # cancellation + runner response + pending drain.
+                if cmd in ("stop", "new", "reset"):
+                    try:
+                        await self._dispatch_active_session_command(event, session_key, cmd)
+                    except Exception as e:
+                        logger.error(
+                            "[%s] Command '/%s' dispatch failed: %s",
+                            self.name, cmd, e, exc_info=True,
+                        )
+                    return
+
+                # Other bypass commands (/approve, /deny, /status,
+                # /background, /restart) just need direct dispatch — they
+                # don't cancel the running task.
                logger.debug(
                    "[%s] Command '/%s' bypassing active-session guard for %s",
                    self.name, cmd, session_key,
@@ -1755,19 +2323,9 @@ class BasePlatformAdapter(ABC):
        # starts would also pass the _active_sessions check and spawn a
        # duplicate task.  (grammY sequentialize / aiogram EventIsolation
        # pattern — set the guard synchronously, not inside the task.)
-        self._active_sessions[session_key] = asyncio.Event()
-
-        # Spawn background task to process this message
-        task = asyncio.create_task(self._process_message_background(event, session_key))
-        try:
-            self._background_tasks.add(task)
-        except TypeError:
-            # Some tests stub create_task() with lightweight sentinels that are not
-            # hashable and do not support lifecycle callbacks.
-            return
-        if hasattr(task, "add_done_callback"):
-            task.add_done_callback(self._background_tasks.discard)
-            task.add_done_callback(self._expected_cancelled_tasks.discard)
+        # _start_session_processing installs the guard AND the owner-task
+        # mapping atomically so stale-lock detection works.
+        self._start_session_processing(event, session_key)
    
    @staticmethod
    def _get_human_delay() -> float:
@@ -1871,12 +2429,14 @@ class BasePlatformAdapter(ABC):
                    logger.info("[%s] extract_local_files found %d file(s) in response", self.name, len(local_files))
                
                # Auto-TTS: if voice message, generate audio FIRST (before sending text)
-                # Skipped when the chat has voice mode disabled (/voice off)
+                # Gated via ``_should_auto_tts_for_chat``: fires when the chat has
+                # an explicit ``/voice on|tts`` opt-in OR when ``voice.auto_tts`` is
+                # True globally and no ``/voice off`` has been issued.
                _tts_path = None
-                if (event.message_type == MessageType.VOICE
+                if (self._should_auto_tts_for_chat(event.source.chat_id)
+                        and event.message_type == MessageType.VOICE
                        and text_content
-                        and not media_files
-                        and event.source.chat_id not in self._auto_tts_disabled_chats):
+                        and not media_files):
                    try:
                        from tools.tts_tool import text_to_speech_tool, check_tts_requirements
                        if check_tts_requirements():
@@ -2127,6 +2687,9 @@ class BasePlatformAdapter(ABC):
                drain_task = asyncio.create_task(
                    self._process_message_background(late_pending, session_key)
                )
+                # Hand ownership of the session to the drain task so stale-lock
+                # detection keeps working while it runs.
+                self._session_tasks[session_key] = drain_task
                try:
                    self._background_tasks.add(drain_task)
                    drain_task.add_done_callback(self._background_tasks.discard)
@@ -2136,9 +2699,14 @@ class BasePlatformAdapter(ABC):
                # Leave _active_sessions[session_key] populated — the drain
                # task's own lifecycle will clean it up.
            else:
-                # Clean up session tracking
-                if session_key in self._active_sessions:
-                    del self._active_sessions[session_key]
+                # Clean up session tracking.  Guard-match both deletes so a
+                # reset-like command that already swapped in its own
+                # command_guard (and cancelled us) can't be accidentally
+                # cleared by our unwind.  The command owns the session now.
+                current_task = asyncio.current_task()
+                if current_task is not None and self._session_tasks.get(session_key) is current_task:
+                    del self._session_tasks[session_key]
+                self._release_session_guard(session_key, guard=interrupt_event)
    
    async def cancel_background_tasks(self) -> None:
        """Cancel any in-flight background message-processing tasks.
@@ -2168,6 +2736,7 @@ class BasePlatformAdapter(ABC):
            # will be in self._background_tasks now.  Re-check.
        self._background_tasks.clear()
        self._expected_cancelled_tasks.clear()
+        self._session_tasks.clear()
        self._pending_messages.clear()
        self._active_sessions.clear()

@@ -2191,6 +2760,9 @@ class BasePlatformAdapter(ABC):
        user_id_alt: Optional[str] = None,
        chat_id_alt: Optional[str] = None,
        is_bot: bool = False,
+        guild_id: Optional[str] = None,
+        parent_chat_id: Optional[str] = None,
+        message_id: Optional[str] = None,
    ) -> SessionSource:
        """Helper to build a SessionSource for this platform."""
        # Normalize empty topic to None
@@ -2208,6 +2780,9 @@ class BasePlatformAdapter(ABC):
            user_id_alt=user_id_alt,
            chat_id_alt=chat_id_alt,
            is_bot=is_bot,
+            guild_id=str(guild_id) if guild_id else None,
+            parent_chat_id=str(parent_chat_id) if parent_chat_id else None,
+            message_id=str(message_id) if message_id else None,
        )
    
    @abstractmethod
@@ -99,6 +99,7 @@ def _normalize_server_url(raw: str) -> str:

 class BlueBubblesAdapter(BasePlatformAdapter):
    platform = Platform.BLUEBUBBLES
+    SUPPORTS_MESSAGE_EDITING = False
    MAX_MESSAGE_LENGTH = MAX_TEXT_LENGTH

    def __init__(self, config: PlatformConfig):
@@ -391,6 +392,13 @@ class BlueBubblesAdapter(BasePlatformAdapter):
    # Text sending
    # ------------------------------------------------------------------

+    @staticmethod
+    def truncate_message(content: str, max_length: int = MAX_TEXT_LENGTH) -> List[str]:
+        # Use the base splitter but skip pagination indicators — iMessage
+        # bubbles flow naturally without "(1/3)" suffixes.
+        chunks = BasePlatformAdapter.truncate_message(content, max_length)
+        return [re.sub(r"\s*\(\d+/\d+\)$", "", c) for c in chunks]
+
    async def send(
        self,
        chat_id: str,
@@ -398,10 +406,19 @@ class BlueBubblesAdapter(BasePlatformAdapter):
        reply_to: Optional[str] = None,
        metadata: Optional[Dict[str, Any]] = None,
    ) -> SendResult:
-        text = strip_markdown(content or "")
+        text = self.format_message(content)
        if not text:
            return SendResult(success=False, error="BlueBubbles send requires text")
-        chunks = self.truncate_message(text, max_length=self.MAX_MESSAGE_LENGTH)
+        # Split on paragraph breaks first (double newlines) so each thought
+        # becomes its own iMessage bubble, then truncate any that are still
+        # too long.
+        paragraphs = [p.strip() for p in re.split(r'\n\s*\n', text) if p.strip()]
+        chunks: List[str] = []
+        for para in (paragraphs or [text]):
+            if len(para) <= self.MAX_MESSAGE_LENGTH:
+                chunks.append(para)
+            else:
+                chunks.extend(self.truncate_message(para, max_length=self.MAX_MESSAGE_LENGTH))
        last = SendResult(success=True)
        for chunk in chunks:
            guid = await self._resolve_chat_guid(chat_id)
@@ -23,6 +23,7 @@ from typing import Callable, Dict, Optional, Any
 logger = logging.getLogger(__name__)

 VALID_THREAD_AUTO_ARCHIVE_MINUTES = {60, 1440, 4320, 10080}
+_DISCORD_COMMAND_SYNC_POLICIES = {"safe", "bulk", "off"}

 try:
    import discord
@@ -304,7 +305,7 @@ class VoiceReceiver:
        encrypted = bytes(payload_with_nonce[:-4])

        try:
-            import nacl.secret  # noqa: delayed import – only in voice path
+            import nacl.secret  # noqa: E402 — delayed import, only in voice path
            box = nacl.secret.Aead(self._secret_key)
            decrypted = box.decrypt(encrypted, header, bytes(nonce))
        except Exception as e:
@@ -527,6 +528,7 @@ class DiscordAdapter(BasePlatformAdapter):
        # Reply threading mode: "off" (no replies), "first" (reply on first
        # chunk only, default), "all" (reply-reference on every chunk).
        self._reply_to_mode: str = getattr(config, 'reply_to_mode', 'first') or 'first'
+        self._slash_commands: bool = self.config.extra.get("slash_commands", True)

    async def connect(self) -> bool:
        """Connect to Discord and start receiving events."""
@@ -744,7 +746,8 @@ class DiscordAdapter(BasePlatformAdapter):
                    )

            # Register slash commands
-            self._register_slash_commands()
+            if self._slash_commands:
+                self._register_slash_commands()

            # Start the bot in background
            self._bot_task = asyncio.create_task(self._client.start(self.config.token))
@@ -800,15 +803,222 @@ class DiscordAdapter(BasePlatformAdapter):
        if not self._client:
            return
        try:
-            synced = await asyncio.wait_for(self._client.tree.sync(), timeout=30)
-            logger.info("[%s] Synced %d slash command(s)", self.name, len(synced))
+            sync_policy = self._get_discord_command_sync_policy()
+            if sync_policy == "off":
+                logger.info("[%s] Skipping Discord slash command sync (policy=off)", self.name)
+                return
+
+            if sync_policy == "bulk":
+                synced = await asyncio.wait_for(self._client.tree.sync(), timeout=30)
+                logger.info("[%s] Synced %d slash command(s) via bulk tree sync", self.name, len(synced))
+                return
+
+            # Discord's per-app command-management bucket is ~5 writes / 20 s,
+            # so a mass-prune-plus-upsert reconcile (e.g. 77 orphans + 30
+            # desired = 107 writes) takes several minutes of forced waits.
+            # A flat 30 s budget blew up reliably under bucket pressure and
+            # left slash commands broken for ~60 min until the bucket fully
+            # recovered. Use a wide ceiling; the cap still guards against a
+            # true hang. (#16713)
+            summary = await asyncio.wait_for(self._safe_sync_slash_commands(), timeout=600)
+            logger.info(
+                "[%s] Safely reconciled %d slash command(s): unchanged=%d updated=%d recreated=%d created=%d deleted=%d",
+                self.name,
+                summary["total"],
+                summary["unchanged"],
+                summary["updated"],
+                summary["recreated"],
+                summary["created"],
+                summary["deleted"],
+            )
        except asyncio.TimeoutError:
-            logger.warning("[%s] Slash command sync timed out after 30s", self.name)
+            logger.warning(
+                "[%s] Slash command sync timed out — Discord rate-limit bucket "
+                "may be saturated; will retry on next reconnect",
+                self.name,
+            )
        except asyncio.CancelledError:
            raise
        except Exception as e:  # pragma: no cover - defensive logging
            logger.warning("[%s] Slash command sync failed: %s", self.name, e, exc_info=True)

+    def _get_discord_command_sync_policy(self) -> str:
+        raw = str(os.getenv("DISCORD_COMMAND_SYNC_POLICY", "safe") or "").strip().lower()
+        if raw in _DISCORD_COMMAND_SYNC_POLICIES:
+            return raw
+        if raw:
+            logger.warning(
+                "[%s] Invalid DISCORD_COMMAND_SYNC_POLICY=%r; falling back to 'safe'",
+                self.name,
+                raw,
+            )
+        return "safe"
+
+    def _canonicalize_app_command_payload(self, payload: Dict[str, Any]) -> Dict[str, Any]:
+        """Reduce command payloads to the semantic fields Hermes manages."""
+        contexts = payload.get("contexts")
+        integration_types = payload.get("integration_types")
+        return {
+            "type": int(payload.get("type", 1) or 1),
+            "name": str(payload.get("name", "") or ""),
+            "description": str(payload.get("description", "") or ""),
+            "default_member_permissions": self._normalize_permissions(
+                payload.get("default_member_permissions")
+            ),
+            "dm_permission": bool(payload.get("dm_permission", True)),
+            "nsfw": bool(payload.get("nsfw", False)),
+            "contexts": sorted(int(c) for c in contexts) if contexts else None,
+            "integration_types": (
+                sorted(int(i) for i in integration_types) if integration_types else None
+            ),
+            "options": [
+                self._canonicalize_app_command_option(item)
+                for item in payload.get("options", []) or []
+                if isinstance(item, dict)
+            ],
+        }
+
+    @staticmethod
+    def _normalize_permissions(value: Any) -> Optional[str]:
+        """Discord emits default_member_permissions as str server-side but discord.py
+        sets it as int locally. Normalize to str-or-None so the comparison is stable."""
+        if value is None:
+            return None
+        return str(value)
+
+    def _existing_command_to_payload(self, command: Any) -> Dict[str, Any]:
+        """Build a canonical-ready dict from an AppCommand.
+
+        discord.py's AppCommand.to_dict() does NOT include nsfw,
+        dm_permission, or default_member_permissions (they live only on the
+        attributes). Pull them from the attributes so the canonicalizer sees
+        the real server-side values instead of defaults — otherwise any
+        command using non-default permissions would diff on every startup.
+        """
+        payload = dict(command.to_dict())
+        nsfw = getattr(command, "nsfw", None)
+        if nsfw is not None:
+            payload["nsfw"] = bool(nsfw)
+        guild_only = getattr(command, "guild_only", None)
+        if guild_only is not None:
+            payload["dm_permission"] = not bool(guild_only)
+        default_permissions = getattr(command, "default_member_permissions", None)
+        if default_permissions is not None:
+            payload["default_member_permissions"] = getattr(
+                default_permissions, "value", default_permissions
+            )
+        return payload
+
+    def _canonicalize_app_command_option(self, payload: Dict[str, Any]) -> Dict[str, Any]:
+        return {
+            "type": int(payload.get("type", 0) or 0),
+            "name": str(payload.get("name", "") or ""),
+            "description": str(payload.get("description", "") or ""),
+            "required": bool(payload.get("required", False)),
+            "autocomplete": bool(payload.get("autocomplete", False)),
+            "choices": [
+                {
+                    "name": str(choice.get("name", "") or ""),
+                    "value": choice.get("value"),
+                }
+                for choice in payload.get("choices", []) or []
+                if isinstance(choice, dict)
+            ],
+            "channel_types": list(payload.get("channel_types", []) or []),
+            "min_value": payload.get("min_value"),
+            "max_value": payload.get("max_value"),
+            "min_length": payload.get("min_length"),
+            "max_length": payload.get("max_length"),
+            "options": [
+                self._canonicalize_app_command_option(item)
+                for item in payload.get("options", []) or []
+                if isinstance(item, dict)
+            ],
+        }
+
+    def _patchable_app_command_payload(self, payload: Dict[str, Any]) -> Dict[str, Any]:
+        """Fields supported by discord.py's edit_global_command route."""
+        canonical = self._canonicalize_app_command_payload(payload)
+        return {
+            "name": canonical["name"],
+            "description": canonical["description"],
+            "options": canonical["options"],
+        }
+
+    async def _safe_sync_slash_commands(self) -> Dict[str, int]:
+        """Diff existing global commands and only mutate the commands that changed."""
+        if not self._client:
+            return {
+                "total": 0,
+                "unchanged": 0,
+                "updated": 0,
+                "recreated": 0,
+                "created": 0,
+                "deleted": 0,
+            }
+
+        tree = self._client.tree
+        app_id = getattr(self._client, "application_id", None) or getattr(getattr(self._client, "user", None), "id", None)
+        if not app_id:
+            raise RuntimeError("Discord application ID is unavailable for slash command sync")
+
+        desired_payloads = [command.to_dict(tree) for command in tree.get_commands()]
+        desired_by_key = {
+            (int(payload.get("type", 1) or 1), str(payload.get("name", "") or "").lower()): payload
+            for payload in desired_payloads
+        }
+        existing_commands = await tree.fetch_commands()
+        existing_by_key = {
+            (
+                int(getattr(getattr(command, "type", None), "value", getattr(command, "type", 1)) or 1),
+                str(command.name or "").lower(),
+            ): command
+            for command in existing_commands
+        }
+
+        unchanged = 0
+        updated = 0
+        recreated = 0
+        created = 0
+        deleted = 0
+        http = self._client.http
+
+        for key, desired in desired_by_key.items():
+            current = existing_by_key.pop(key, None)
+            if current is None:
+                await http.upsert_global_command(app_id, desired)
+                created += 1
+                continue
+
+            current_existing_payload = self._existing_command_to_payload(current)
+            current_payload = self._canonicalize_app_command_payload(current_existing_payload)
+            desired_payload = self._canonicalize_app_command_payload(desired)
+            if current_payload == desired_payload:
+                unchanged += 1
+                continue
+
+            if self._patchable_app_command_payload(current_existing_payload) == self._patchable_app_command_payload(desired):
+                await http.delete_global_command(app_id, current.id)
+                await http.upsert_global_command(app_id, desired)
+                recreated += 1
+                continue
+
+            await http.edit_global_command(app_id, current.id, desired)
+            updated += 1
+
+        for current in existing_by_key.values():
+            await http.delete_global_command(app_id, current.id)
+            deleted += 1
+
+        return {
+            "total": len(desired_payloads),
+            "unchanged": unchanged,
+            "updated": updated,
+            "recreated": recreated,
+            "created": created,
+            "deleted": deleted,
+        }
+
    async def _add_reaction(self, message: Any, emoji: str) -> bool:
        """Add an emoji reaction to a Discord message."""
        if not message or not hasattr(message, "add_reaction"):
@@ -2047,10 +2257,6 @@ class DiscordAdapter(BasePlatformAdapter):
        async def slash_usage(interaction: discord.Interaction):
            await self._run_simple_slash(interaction, "/usage")

-        @tree.command(name="provider", description="Show available providers")
-        async def slash_provider(interaction: discord.Interaction):
-            await self._run_simple_slash(interaction, "/provider")
-
        @tree.command(name="help", description="Show available commands")
        async def slash_help(interaction: discord.Interaction):
            await self._run_simple_slash(interaction, "/help")
@@ -2120,19 +2326,46 @@ class DiscordAdapter(BasePlatformAdapter):
        async def slash_background(interaction: discord.Interaction, prompt: str):
            await self._run_simple_slash(interaction, f"/background {prompt}", "Background task started~")

-        @tree.command(name="btw", description="Ephemeral side question using session context")
-        @discord.app_commands.describe(question="Your side question (no tools, not persisted)")
-        async def slash_btw(interaction: discord.Interaction, question: str):
-            await self._run_simple_slash(interaction, f"/btw {question}")
-
        # ── Auto-register any gateway-available commands not yet on the tree ──
        # This ensures new commands added to COMMAND_REGISTRY in
        # hermes_cli/commands.py automatically appear as Discord slash
        # commands without needing a manual entry here.
+        def _build_auto_slash_command(_name: str, _description: str, _args_hint: str = ""):
+            """Build a discord.app_commands.Command that proxies to _run_simple_slash."""
+            discord_name = _name.lower()[:32]
+            desc = (_description or f"Run /{_name}")[:100]
+            has_args = bool(_args_hint)
+
+            if has_args:
+                def _make_args_handler(__name: str, __hint: str):
+                    @discord.app_commands.describe(args=f"Arguments: {__hint}"[:100])
+                    async def _handler(interaction: discord.Interaction, args: str = ""):
+                        await self._run_simple_slash(
+                            interaction, f"/{__name} {args}".strip()
+                        )
+                    _handler.__name__ = f"auto_slash_{__name.replace('-', '_')}"
+                    return _handler
+
+                handler = _make_args_handler(_name, _args_hint)
+            else:
+                def _make_simple_handler(__name: str):
+                    async def _handler(interaction: discord.Interaction):
+                        await self._run_simple_slash(interaction, f"/{__name}")
+                    _handler.__name__ = f"auto_slash_{__name.replace('-', '_')}"
+                    return _handler
+
+                handler = _make_simple_handler(_name)
+
+            return discord.app_commands.Command(
+                name=discord_name,
+                description=desc,
+                callback=handler,
+            )
+
+        already_registered: set[str] = set()
        try:
            from hermes_cli.commands import COMMAND_REGISTRY, _is_gateway_available, _resolve_config_gates

-            already_registered = set()
            try:
                already_registered = {cmd.name for cmd in tree.get_commands()}
            except Exception:
@@ -2147,38 +2380,10 @@ class DiscordAdapter(BasePlatformAdapter):
                discord_name = cmd_def.name.lower()[:32]
                if discord_name in already_registered:
                    continue
-                # Skip aliases that overlap with already-registered names
-                # (aliases for explicitly registered commands are handled above).
-                desc = (cmd_def.description or f"Run /{cmd_def.name}")[:100]
-                has_args = bool(cmd_def.args_hint)
-
-                if has_args:
-                    # Command takes optional arguments — create handler with
-                    # an optional ``args`` string parameter.
-                    def _make_args_handler(_name: str, _hint: str):
-                        @discord.app_commands.describe(args=f"Arguments: {_hint}"[:100])
-                        async def _handler(interaction: discord.Interaction, args: str = ""):
-                            await self._run_simple_slash(
-                                interaction, f"/{_name} {args}".strip()
-                            )
-                        _handler.__name__ = f"auto_slash_{_name.replace('-', '_')}"
-                        return _handler
-
-                    handler = _make_args_handler(cmd_def.name, cmd_def.args_hint)
-                else:
-                    # Parameterless command.
-                    def _make_simple_handler(_name: str):
-                        async def _handler(interaction: discord.Interaction):
-                            await self._run_simple_slash(interaction, f"/{_name}")
-                        _handler.__name__ = f"auto_slash_{_name.replace('-', '_')}"
-                        return _handler
-
-                    handler = _make_simple_handler(cmd_def.name)
-
-                auto_cmd = discord.app_commands.Command(
-                    name=discord_name,
-                    description=desc,
-                    callback=handler,
+                auto_cmd = _build_auto_slash_command(
+                    cmd_def.name,
+                    cmd_def.description,
+                    cmd_def.args_hint,
                )
                try:
                    tree.add_command(auto_cmd)
@@ -2195,6 +2400,35 @@ class DiscordAdapter(BasePlatformAdapter):
        except Exception as e:
            logger.warning("Discord auto-register from COMMAND_REGISTRY failed: %s", e)

+        # ── Plugin-registered slash commands ──
+        # Plugins register via PluginContext.register_command(); we mirror
+        # those into Discord's native slash picker so users get the same
+        # autocomplete UX as for built-in commands. No per-platform plugin
+        # API needed — plugin commands are platform-agnostic.
+        try:
+            from hermes_cli.commands import _iter_plugin_command_entries
+
+            for plugin_name, plugin_desc, plugin_args_hint in _iter_plugin_command_entries():
+                discord_name = plugin_name.lower()[:32]
+                if discord_name in already_registered:
+                    continue
+                auto_cmd = _build_auto_slash_command(
+                    plugin_name,
+                    plugin_desc,
+                    plugin_args_hint,
+                )
+                try:
+                    tree.add_command(auto_cmd)
+                    already_registered.add(discord_name)
+                except Exception:
+                    # Silently skip commands that fail registration (e.g.
+                    # name conflict with a subcommand group).
+                    pass
+        except Exception as e:
+            logger.warning(
+                "Discord auto-register from plugin commands failed: %s", e
+            )
+
        # Register skills under a single /skill command group with category
        # subcommand groups.  This uses 1 top-level slot instead of N,
        # supporting up to 25 categories × 25 skills = 625 skills.
@@ -2456,21 +2690,8 @@ class DiscordAdapter(BasePlatformAdapter):
                skills: ["skill-a", "skill-b"]
        Also checks parent_id so forum threads inherit the forum's bindings.
        """
-        bindings = self.config.extra.get("channel_skill_bindings", [])
-        if not bindings:
-            return None
-        ids_to_check = {channel_id}
-        if parent_id:
-            ids_to_check.add(parent_id)
-        for entry in bindings:
-            entry_id = str(entry.get("id", ""))
-            if entry_id in ids_to_check:
-                skills = entry.get("skills") or entry.get("skill")
-                if isinstance(skills, str):
-                    return [skills]
-                if isinstance(skills, list) and skills:
-                    return list(dict.fromkeys(skills))  # dedup, preserve order
-        return None
+        from gateway.platforms.base import resolve_channel_skills
+        return resolve_channel_skills(self.config.extra, channel_id, parent_id)

    def _resolve_channel_prompt(self, channel_id: str, parent_id: str | None = None) -> str | None:
        """Resolve a Discord per-channel prompt, preferring the exact channel over its parent."""
@@ -2487,7 +2708,12 @@ class DiscordAdapter(BasePlatformAdapter):
        return os.getenv("DISCORD_REQUIRE_MENTION", "true").lower() not in ("false", "0", "no", "off")

    def _discord_free_response_channels(self) -> set:
-        """Return Discord channel IDs where no bot mention is required."""
+        """Return Discord channel IDs where no bot mention is required.
+
+        A single ``"*"`` entry (either from a list or a comma-separated
+        string) is preserved in the returned set so callers can short-circuit
+        on wildcard membership, consistent with ``allowed_channels``.
+        """
        raw = self.config.extra.get("free_response_channels")
        if raw is None:
            raw = os.getenv("DISCORD_FREE_RESPONSE_CHANNELS", "")
@@ -2980,14 +3206,14 @@ class DiscordAdapter(BasePlatformAdapter):
            allowed_channels_raw = os.getenv("DISCORD_ALLOWED_CHANNELS", "")
            if allowed_channels_raw:
                allowed_channels = {ch.strip() for ch in allowed_channels_raw.split(",") if ch.strip()}
-                if not (channel_ids & allowed_channels):
+                if "*" not in allowed_channels and not (channel_ids & allowed_channels):
                    logger.debug("[%s] Ignoring message in non-allowed channel: %s", self.name, channel_ids)
                    return

            # Check ignored channels - never respond even when mentioned
            ignored_channels_raw = os.getenv("DISCORD_IGNORED_CHANNELS", "")
            ignored_channels = {ch.strip() for ch in ignored_channels_raw.split(",") if ch.strip()}
-            if channel_ids & ignored_channels:
+            if "*" in ignored_channels or (channel_ids & ignored_channels):
                logger.debug("[%s] Ignoring message in ignored channel: %s", self.name, channel_ids)
                return

@@ -3001,7 +3227,11 @@ class DiscordAdapter(BasePlatformAdapter):
            voice_linked_ids = {str(ch_id) for ch_id in self._voice_text_channels.values()}
            current_channel_id = str(message.channel.id)
            is_voice_linked_channel = current_channel_id in voice_linked_ids
-            is_free_channel = bool(channel_ids & free_channels) or is_voice_linked_channel
+            is_free_channel = (
+                "*" in free_channels
+                or bool(channel_ids & free_channels)
+                or is_voice_linked_channel
+            )

            # Skip the mention check if the message is in a thread where
            # the bot has previously participated (auto-created or replied in).
@@ -3024,6 +3254,7 @@ class DiscordAdapter(BasePlatformAdapter):
            if auto_thread and not skip_thread and not is_voice_linked_channel and not is_reply_message:
                thread = await self._auto_create_thread(message)
                if thread:
+                    parent_channel_id = str(message.channel.id)
                    is_thread = True
                    thread_id = str(thread.id)
                    auto_threaded_channel = thread
@@ -3074,6 +3305,7 @@ class DiscordAdapter(BasePlatformAdapter):
        chat_topic = self._get_effective_topic(message.channel, is_thread=is_thread)

        # Build source
+        guild = getattr(message, "guild", None)
        source = self.build_source(
            chat_id=str(effective_channel.id),
            chat_name=chat_name,
@@ -3083,6 +3315,9 @@ class DiscordAdapter(BasePlatformAdapter):
            thread_id=thread_id,
            chat_topic=chat_topic,
            is_bot=getattr(message.author, "bot", False),
+            guild_id=str(guild.id) if guild else None,
+            parent_chat_id=parent_channel_id,
+            message_id=str(message.id),
        )

        # Build media URLs -- download image attachments to local cache so the
@@ -3634,6 +3869,15 @@ if DISCORD_AVAILABLE:

            self.resolved = True
            model_id = interaction.data["values"][0]
+            self.clear_items()
+            await interaction.response.edit_message(
+                embed=discord.Embed(
+                    title="⚙ Switching Model",
+                    description=f"Switching to `{model_id}`...",
+                    color=discord.Color.blue(),
+                ),
+                view=None,
+            )

            try:
                result_text = await self.on_model_selected(
@@ -3644,14 +3888,13 @@ if DISCORD_AVAILABLE:
            except Exception as exc:
                result_text = f"Error switching model: {exc}"

-            self.clear_items()
-            await interaction.response.edit_message(
+            await interaction.edit_original_response(
                embed=discord.Embed(
                    title="⚙ Model Switched",
                    description=result_text,
                    color=discord.Color.green(),
                ),
-                view=self,
+                view=None,
            )

        async def _on_back(self, interaction: discord.Interaction):
@@ -28,6 +28,7 @@ from email.header import decode_header
 from email.mime.multipart import MIMEMultipart
 from email.mime.text import MIMEText
 from email.mime.base import MIMEBase
+from email.utils import formatdate
 from email import encoders
 from pathlib import Path
 from typing import Any, Dict, List, Optional
@@ -504,6 +505,7 @@ class EmailAdapter(BasePlatformAdapter):
            msg["In-Reply-To"] = original_msg_id
            msg["References"] = original_msg_id

+        msg["Date"] = formatdate(localtime=True)
        msg_id = f"<hermes-{uuid.uuid4().hex[:12]}@{self._address.split('@')[1]}>"
        msg["Message-ID"] = msg_id

@@ -545,6 +547,7 @@ class EmailAdapter(BasePlatformAdapter):
        caption: Optional[str] = None,
        file_name: Optional[str] = None,
        reply_to: Optional[str] = None,
+        **kwargs,
    ) -> SendResult:
        """Send a file as an email attachment."""
        try:
@@ -585,6 +588,7 @@ class EmailAdapter(BasePlatformAdapter):
            msg["In-Reply-To"] = original_msg_id
            msg["References"] = original_msg_id

+        msg["Date"] = formatdate(localtime=True)
        msg_id = f"<hermes-{uuid.uuid4().hex[:12]}@{self._address.split('@')[1]}>"
        msg["Message-ID"] = msg_id

@@ -14,6 +14,35 @@ Supports:
 - Interactive card button-click events routed as synthetic COMMAND events
 - Webhook anomaly tracking (matches openclaw createWebhookAnomalyTracker)
 - Verification token validation as second auth layer (matches openclaw)
+
+Feishu identity model
+---------------------
+Feishu uses three user-ID tiers (official docs:
+https://open.feishu.cn/document/home/user-identity-introduction/introduction):
+
+  open_id  (ou_xxx)  — **App-scoped**.  The same person gets a different
+                        open_id under each Feishu app.  Always available in
+                        event payloads without extra permissions.
+  user_id  (u_xxx)   — **Tenant-scoped**.  Stable within a company but
+                        requires the ``contact:user.employee_id:readonly``
+                        scope.  May not be present.
+  union_id (on_xxx)  — **Developer-scoped**.  Same across all apps owned by
+                        one developer/ISV.  Best cross-app stable ID.
+
+For bots specifically:
+
+  app_id              — The application's canonical credential identifier.
+  bot open_id         — Returned by ``/bot/v3/info``.  This is the bot's own
+                        open_id *within its app context* and is what Feishu
+                        puts in ``mentions[].id.open_id`` when someone
+                        @-mentions the bot.  Used for mention gating only.
+
+In single-bot mode (what Hermes currently supports), open_id works as a
+de-facto unique user identifier since there is only one app context.
+
+Session-key participant isolation prefers ``union_id`` (via user_id_alt)
+over ``open_id`` (via user_id) so that sessions stay stable if the same
+user is seen through different apps in the future.
 """

 from __future__ import annotations
@@ -35,7 +64,7 @@ from dataclasses import dataclass, field
 from datetime import datetime
 from pathlib import Path
 from types import SimpleNamespace
-from typing import Any, Dict, List, Optional
+from typing import Any, Dict, List, Optional, Sequence
 from urllib.error import HTTPError, URLError
 from urllib.parse import urlencode
 from urllib.request import Request, urlopen
@@ -73,7 +102,9 @@ try:
        UpdateMessageRequest,
        UpdateMessageRequestBody,
    )
+    from lark_oapi.core import AccessTokenType, HttpMethod
    from lark_oapi.core.const import FEISHU_DOMAIN, LARK_DOMAIN
+    from lark_oapi.core.model import BaseRequest
    from lark_oapi.event.callback.model.p2_card_action_trigger import (
        CallBackCard,
        P2CardActionTriggerResponse,
@@ -234,6 +265,8 @@ FALLBACK_ATTACHMENT_TEXT = "[Attachment]"
 _PREFERRED_LOCALES = ("zh_cn", "en_us")
 _MARKDOWN_SPECIAL_CHARS_RE = re.compile(r"([\\`*_{}\[\]()#+\-!|>~])")
 _MENTION_PLACEHOLDER_RE = re.compile(r"@_user_\d+")
+_MENTION_BOUNDARY_CHARS = frozenset(" \t\n\r.,;:!?、，。；：！？()[]{}<>\"'`")
+_TRAILING_TERMINAL_PUNCT = frozenset(" \t\n\r.!?。！？")
 _WHITESPACE_RE = re.compile(r"\s+")
 _SUPPORTED_CARD_TEXT_KEYS = (
    "title",
@@ -277,12 +310,36 @@ class FeishuPostMediaRef:
    resource_type: str = "file"


+@dataclass(frozen=True)
+class FeishuMentionRef:
+    name: str = ""
+    open_id: str = ""
+    is_all: bool = False
+    is_self: bool = False
+
+
+@dataclass(frozen=True)
+class _FeishuBotIdentity:
+    open_id: str = ""
+    user_id: str = ""
+    name: str = ""
+
+    def matches(self, *, open_id: str, user_id: str, name: str) -> bool:
+        # Precedence: open_id > user_id > name. IDs are authoritative when both
+        # sides have them; the next tier is only considered when either side
+        # lacks the current one.
+        if open_id and self.open_id:
+            return open_id == self.open_id
+        if user_id and self.user_id:
+            return user_id == self.user_id
+        return bool(self.name) and name == self.name
+
+
@dataclass(frozen=True)
 class FeishuPostParseResult:
    text_content: str
    image_keys: List[str] = field(default_factory=list)
    media_refs: List[FeishuPostMediaRef] = field(default_factory=list)
-    mentioned_ids: List[str] = field(default_factory=list)


@dataclass(frozen=True)
@@ -292,14 +349,14 @@ class FeishuNormalizedMessage:
    preferred_message_type: str = "text"
    image_keys: List[str] = field(default_factory=list)
    media_refs: List[FeishuPostMediaRef] = field(default_factory=list)
-    mentioned_ids: List[str] = field(default_factory=list)
+    mentions: List[FeishuMentionRef] = field(default_factory=list)
    relation_kind: str = "plain"
    metadata: Dict[str, Any] = field(default_factory=dict)


@dataclass(frozen=True)
 class FeishuAdapterSettings:
-    app_id: str
+    app_id: str  # Canonical bot/app identifier (credential, not from event payloads)
    app_secret: str
    domain_name: str
    connection_mode: str
@@ -307,7 +364,11 @@ class FeishuAdapterSettings:
    verification_token: str
    group_policy: str
    allowed_group_users: frozenset[str]
+    # Bot's own open_id (app-scoped) — returned by /bot/v3/info.  Used only for
+    # @mention matching: Feishu puts this value in mentions[].id.open_id when
+    # a user @-mentions the bot in a group chat.
    bot_open_id: str
+    # Bot's user_id (tenant-scoped) — optional, used as fallback mention match.
    bot_user_id: str
    bot_name: str
    dedup_cache_size: int
@@ -505,14 +566,17 @@ def _build_markdown_post_rows(content: str) -> List[List[Dict[str, str]]]:
    return rows or [[{"tag": "md", "text": content}]]


-def parse_feishu_post_payload(payload: Any) -> FeishuPostParseResult:
+def parse_feishu_post_payload(
+    payload: Any,
+    *,
+    mentions_map: Optional[Dict[str, FeishuMentionRef]] = None,
+) -> FeishuPostParseResult:
    resolved = _resolve_post_payload(payload)
    if not resolved:
        return FeishuPostParseResult(text_content=FALLBACK_POST_TEXT)

    image_keys: List[str] = []
    media_refs: List[FeishuPostMediaRef] = []
-    mentioned_ids: List[str] = []
    parts: List[str] = []

    title = _normalize_feishu_text(str(resolved.get("title", "")).strip())
@@ -523,7 +587,10 @@ def parse_feishu_post_payload(payload: Any) -> FeishuPostParseResult:
        if not isinstance(row, list):
            continue
        row_text = _normalize_feishu_text(
-            "".join(_render_post_element(item, image_keys, media_refs, mentioned_ids) for item in row)
+            "".join(
+                _render_post_element(item, image_keys, media_refs, mentions_map)
+                for item in row
+            )
        )
        if row_text:
            parts.append(row_text)
@@ -532,7 +599,6 @@ def parse_feishu_post_payload(payload: Any) -> FeishuPostParseResult:
        text_content="\n".join(parts).strip() or FALLBACK_POST_TEXT,
        image_keys=image_keys,
        media_refs=media_refs,
-        mentioned_ids=mentioned_ids,
    )


@@ -584,7 +650,7 @@ def _render_post_element(
    element: Any,
    image_keys: List[str],
    media_refs: List[FeishuPostMediaRef],
-    mentioned_ids: List[str],
+    mentions_map: Optional[Dict[str, FeishuMentionRef]] = None,
 ) -> str:
    if isinstance(element, str):
        return element
@@ -602,19 +668,21 @@ def _render_post_element(
        escaped_label = _escape_markdown_text(label)
        return f"[{escaped_label}]({href})" if href else escaped_label
    if tag == "at":
-        mentioned_id = (
-            str(element.get("open_id", "")).strip()
-            or str(element.get("user_id", "")).strip()
-        )
-        if mentioned_id and mentioned_id not in mentioned_ids:
-            mentioned_ids.append(mentioned_id)
-        display_name = (
-            str(element.get("user_name", "")).strip()
-            or str(element.get("name", "")).strip()
-            or str(element.get("text", "")).strip()
-            or mentioned_id
-        )
-        return f"@{_escape_markdown_text(display_name)}" if display_name else "@"
+        # Post <at>.user_id is a placeholder ("@_user_N" or "@_all"); look up
+        # the real ref in mentions_map for the display name.
+        placeholder = str(element.get("user_id", "")).strip()
+        if placeholder == "@_all":
+            # Feishu SDK sometimes omits @_all from the top-level mentions
+            # payload; record it here so the caller's mention list stays complete.
+            if mentions_map is not None and "@_all" not in mentions_map:
+                mentions_map["@_all"] = FeishuMentionRef(is_all=True)
+            return "@all"
+        ref = (mentions_map or {}).get(placeholder)
+        if ref is not None:
+            display_name = ref.name or ref.open_id or "user"
+        else:
+            display_name = str(element.get("user_name", "")).strip() or "user"
+        return f"@{_escape_markdown_text(display_name)}"
    if tag in {"img", "image"}:
        image_key = str(element.get("image_key", "")).strip()
        if image_key and image_key not in image_keys:
@@ -652,8 +720,7 @@ def _render_post_element(

    nested_parts: List[str] = []
    for key in ("text", "title", "content", "children", "elements"):
-        value = element.get(key)
-        extracted = _render_nested_post(value, image_keys, media_refs, mentioned_ids)
+        extracted = _render_nested_post(element.get(key), image_keys, media_refs, mentions_map)
        if extracted:
            nested_parts.append(extracted)
    return " ".join(part for part in nested_parts if part)
@@ -663,7 +730,7 @@ def _render_nested_post(
    value: Any,
    image_keys: List[str],
    media_refs: List[FeishuPostMediaRef],
-    mentioned_ids: List[str],
+    mentions_map: Optional[Dict[str, FeishuMentionRef]] = None,
 ) -> str:
    if isinstance(value, str):
        return _escape_markdown_text(value)
@@ -671,17 +738,17 @@ def _render_nested_post(
        return " ".join(
            part
            for item in value
-            for part in [_render_nested_post(item, image_keys, media_refs, mentioned_ids)]
+            for part in [_render_nested_post(item, image_keys, media_refs, mentions_map)]
            if part
        )
    if isinstance(value, dict):
-        direct = _render_post_element(value, image_keys, media_refs, mentioned_ids)
+        direct = _render_post_element(value, image_keys, media_refs, mentions_map)
        if direct:
            return direct
        return " ".join(
            part
            for item in value.values()
-            for part in [_render_nested_post(item, image_keys, media_refs, mentioned_ids)]
+            for part in [_render_nested_post(item, image_keys, media_refs, mentions_map)]
            if part
        )
    return ""
@@ -692,31 +759,48 @@ def _render_nested_post(
 # ---------------------------------------------------------------------------


-def normalize_feishu_message(*, message_type: str, raw_content: str) -> FeishuNormalizedMessage:
+def normalize_feishu_message(
+    *,
+    message_type: str,
+    raw_content: str,
+    mentions: Optional[Sequence[Any]] = None,
+    bot: _FeishuBotIdentity = _FeishuBotIdentity(),
+) -> FeishuNormalizedMessage:
    normalized_type = str(message_type or "").strip().lower()
    payload = _load_feishu_payload(raw_content)
+    mentions_map = _build_mentions_map(mentions, bot)

    if normalized_type == "text":
+        text = str(payload.get("text", "") or "")
+        # Feishu SDK sometimes omits @_all from the mentions payload even when
+        # the text literal contains it (confirmed via im.v1.message.get).
+        if "@_all" in text and "@_all" not in mentions_map:
+            mentions_map["@_all"] = FeishuMentionRef(is_all=True)
        return FeishuNormalizedMessage(
            raw_type=normalized_type,
-            text_content=_normalize_feishu_text(str(payload.get("text", "") or "")),
+            text_content=_normalize_feishu_text(text, mentions_map),
+            mentions=list(mentions_map.values()),
        )
    if normalized_type == "post":
-        parsed_post = parse_feishu_post_payload(payload)
+        # The walker writes back to mentions_map if it encounters
+        # <at user_id="@_all">, so reading .values() after parsing is enough.
+        parsed_post = parse_feishu_post_payload(payload, mentions_map=mentions_map)
        return FeishuNormalizedMessage(
            raw_type=normalized_type,
            text_content=parsed_post.text_content,
            image_keys=list(parsed_post.image_keys),
            media_refs=list(parsed_post.media_refs),
-            mentioned_ids=list(parsed_post.mentioned_ids),
+            mentions=list(mentions_map.values()),
            relation_kind="post",
        )
+    mention_refs = list(mentions_map.values())
    if normalized_type == "image":
        image_key = str(payload.get("image_key", "") or "").strip()
        alt_text = _normalize_feishu_text(
            str(payload.get("text", "") or "")
            or str(payload.get("alt", "") or "")
-            or FALLBACK_IMAGE_TEXT
+            or FALLBACK_IMAGE_TEXT,
+            mentions_map,
        )
        return FeishuNormalizedMessage(
            raw_type=normalized_type,
@@ -724,6 +808,7 @@ def normalize_feishu_message(*, message_type: str, raw_content: str) -> FeishuNo
            preferred_message_type="photo",
            image_keys=[image_key] if image_key else [],
            relation_kind="image",
+            mentions=mention_refs,
        )
    if normalized_type in {"file", "audio", "media"}:
        media_ref = _build_media_ref_from_payload(payload, resource_type=normalized_type)
@@ -735,6 +820,7 @@ def normalize_feishu_message(*, message_type: str, raw_content: str) -> FeishuNo
            media_refs=[media_ref] if media_ref.file_key else [],
            relation_kind=normalized_type,
            metadata={"placeholder_text": placeholder},
+            mentions=mention_refs,
        )
    if normalized_type == "merge_forward":
        return _normalize_merge_forward_message(payload)
@@ -1009,8 +1095,20 @@ def _first_non_empty_text(*values: Any) -> str:
 # ---------------------------------------------------------------------------


-def _normalize_feishu_text(text: str) -> str:
-    cleaned = _MENTION_PLACEHOLDER_RE.sub(" ", text or "")
+def _normalize_feishu_text(
+    text: str,
+    mentions_map: Optional[Dict[str, FeishuMentionRef]] = None,
+) -> str:
+    def _sub(match: "re.Match[str]") -> str:
+        key = match.group(0)
+        ref = (mentions_map or {}).get(key)
+        if ref is None:
+            return " "
+        name = ref.name or ref.open_id or "user"
+        return f"@{name}"
+
+    cleaned = _MENTION_PLACEHOLDER_RE.sub(_sub, text or "")
+    cleaned = cleaned.replace("@_all", "@all")
    cleaned = cleaned.replace("\r\n", "\n").replace("\r", "\n")
    cleaned = "\n".join(_WHITESPACE_RE.sub(" ", line).strip() for line in cleaned.split("\n"))
    cleaned = "\n".join(line for line in cleaned.split("\n") if line)
@@ -1029,6 +1127,117 @@ def _unique_lines(lines: List[str]) -> List[str]:
    return unique


+# ---------------------------------------------------------------------------
+# Mention helpers
+# ---------------------------------------------------------------------------
+
+
+def _extract_mention_ids(mention: Any) -> tuple[str, str]:
+    # Returns (open_id, user_id). im.v1.message.get hands back id as a string
+    # plus id_type discriminator; event payloads hand back a nested UserId
+    # object carrying both fields.
+    mention_id = getattr(mention, "id", None)
+    if isinstance(mention_id, str):
+        id_type = str(getattr(mention, "id_type", "") or "").lower()
+        if id_type == "open_id":
+            return mention_id, ""
+        if id_type == "user_id":
+            return "", mention_id
+        return "", ""
+    if mention_id is None:
+        return "", ""
+    return (
+        str(getattr(mention_id, "open_id", "") or ""),
+        str(getattr(mention_id, "user_id", "") or ""),
+    )
+
+
+def _build_mentions_map(
+    mentions: Optional[Sequence[Any]],
+    bot: _FeishuBotIdentity,
+) -> Dict[str, FeishuMentionRef]:
+    result: Dict[str, FeishuMentionRef] = {}
+    for mention in mentions or []:
+        key = str(getattr(mention, "key", "") or "")
+        if not key:
+            continue
+        if key == "@_all":
+            result[key] = FeishuMentionRef(is_all=True)
+            continue
+        open_id, user_id = _extract_mention_ids(mention)
+        name = str(getattr(mention, "name", "") or "").strip()
+        result[key] = FeishuMentionRef(
+            name=name,
+            open_id=open_id,
+            is_self=bot.matches(open_id=open_id, user_id=user_id, name=name),
+        )
+    return result
+
+
+def _build_mention_hint(mentions: Sequence[FeishuMentionRef]) -> str:
+    parts: List[str] = []
+    seen: set = set()
+    for ref in mentions:
+        if ref.is_self:
+            continue
+        signature = (ref.is_all, ref.open_id, ref.name)
+        if signature in seen:
+            continue
+        seen.add(signature)
+        if ref.is_all:
+            parts.append("@all")
+        elif ref.open_id:
+            parts.append(f"{ref.name or 'unknown'} (open_id={ref.open_id})")
+        else:
+            parts.append(ref.name or "unknown")
+    return f"[Mentioned: {', '.join(parts)}]" if parts else ""
+
+
+def _strip_edge_self_mentions(
+    text: str,
+    mentions: Sequence[FeishuMentionRef],
+) -> str:
+    # Leading: strip consecutive self-mentions unconditionally.
+    # Trailing: strip only when followed by whitespace/terminal punct, so
+    # mid-sentence references ("don't @Bot again") stay intact.
+    # Leading word-boundary prevents @Al from eating @Alice.
+    if not text:
+        return text
+    self_names = [
+        f"@{ref.name or ref.open_id or 'user'}"
+        for ref in mentions
+        if ref.is_self
+    ]
+    if not self_names:
+        return text
+
+    remaining = text.lstrip()
+    while True:
+        for nm in self_names:
+            if not remaining.startswith(nm):
+                continue
+            after = remaining[len(nm):]
+            if after and after[0] not in _MENTION_BOUNDARY_CHARS:
+                continue
+            remaining = after.lstrip()
+            break
+        else:
+            break
+
+    while True:
+        i = len(remaining)
+        while i > 0 and remaining[i - 1] in _TRAILING_TERMINAL_PUNCT:
+            i -= 1
+        body = remaining[:i]
+        tail = remaining[i:]
+        for nm in self_names:
+            if body.endswith(nm):
+                remaining = body[: -len(nm)].rstrip() + tail
+                break
+        else:
+            return remaining
+
+
 def _run_official_feishu_ws_client(ws_client: Any, adapter: Any) -> None:
    """Run the official Lark WS client in its own thread-local event loop."""
    import lark_oapi.ws.client as ws_client_module
@@ -1491,6 +1700,7 @@ class FeishuAdapter(BasePlatformAdapter):
        if not self._client:
            return SendResult(success=False, error="Not connected")

+        content = self.format_message(content)
        try:
            msg_type, payload = self._build_outbound_payload(content)
            body = self._build_update_message_body(msg_type=msg_type, content=payload)
@@ -2470,13 +2680,22 @@ class FeishuAdapter(BasePlatformAdapter):
        chat_type: str,
        message_id: str,
    ) -> None:
-        text, inbound_type, media_urls, media_types = await self._extract_message_content(message)
+        text, inbound_type, media_urls, media_types, mentions = await self._extract_message_content(message)
+
+        if inbound_type == MessageType.TEXT:
+            text = _strip_edge_self_mentions(text, mentions)
+            if text.startswith("/"):
+                inbound_type = MessageType.COMMAND
+
+        # Guard runs post-strip so a pure "@Bot" message (stripped to "") is dropped.
        if inbound_type == MessageType.TEXT and not text and not media_urls:
-            logger.debug("[Feishu] Ignoring unsupported or empty message type: %s", getattr(message, "message_type", ""))
+            logger.debug("[Feishu] Ignoring empty text message id=%s", message_id)
            return

-        if inbound_type == MessageType.TEXT and text.startswith("/"):
-            inbound_type = MessageType.COMMAND
+        if inbound_type != MessageType.COMMAND:
+            hint = _build_mention_hint(mentions)
+            if hint:
+                text = f"{hint}\n\n{text}" if text else hint

        reply_to_message_id = (
            getattr(message, "parent_id", None)
@@ -2935,14 +3154,20 @@ class FeishuAdapter(BasePlatformAdapter):
    # Message content extraction and resource download
    # =========================================================================

-    async def _extract_message_content(self, message: Any) -> tuple[str, MessageType, List[str], List[str]]:
-        """Extract text and cached media from a normalized Feishu message."""
+    async def _extract_message_content(
+        self, message: Any
+    ) -> tuple[str, MessageType, List[str], List[str], List[FeishuMentionRef]]:
        raw_content = getattr(message, "content", "") or ""
        raw_type = getattr(message, "message_type", "") or ""
        message_id = str(getattr(message, "message_id", "") or "")
        logger.info("[Feishu] Received raw message type=%s message_id=%s", raw_type, message_id)

-        normalized = normalize_feishu_message(message_type=raw_type, raw_content=raw_content)
+        normalized = normalize_feishu_message(
+            message_type=raw_type,
+            raw_content=raw_content,
+            mentions=getattr(message, "mentions", None),
+            bot=self._bot_identity(),
+        )
        media_urls, media_types = await self._download_feishu_message_resources(
            message_id=message_id,
            normalized=normalized,
@@ -2959,7 +3184,7 @@ class FeishuAdapter(BasePlatformAdapter):
            if injected:
                text = injected

-        return text, inbound_type, media_urls, media_types
+        return text, inbound_type, media_urls, media_types, list(normalized.mentions)

    async def _download_feishu_message_resources(
        self,
@@ -3223,10 +3448,22 @@ class FeishuAdapter(BasePlatformAdapter):
        return "group"

    async def _resolve_sender_profile(self, sender_id: Any) -> Dict[str, Optional[str]]:
+        """Map Feishu's three-tier user IDs onto Hermes' SessionSource fields.
+
+        Preference order for the primary ``user_id`` field:
+          1. user_id  (tenant-scoped, most stable — requires permission scope)
+          2. open_id  (app-scoped, always available — different per bot app)
+
+        ``user_id_alt`` carries the union_id (developer-scoped, stable across
+        all apps by the same developer).  Session-key generation prefers
+        user_id_alt when present, so participant isolation stays stable even
+        if the primary ID is the app-scoped open_id.
+        """
        open_id = getattr(sender_id, "open_id", None) or None
        user_id = getattr(sender_id, "user_id", None) or None
        union_id = getattr(sender_id, "union_id", None) or None
-        primary_id = open_id or user_id
+        # Prefer tenant-scoped user_id; fall back to app-scoped open_id.
+        primary_id = user_id or open_id
        display_name = await self._resolve_sender_name_from_api(primary_id or union_id)
        return {
            "user_id": primary_id,
@@ -3308,15 +3545,31 @@ class FeishuAdapter(BasePlatformAdapter):
            body = getattr(parent, "body", None)
            msg_type = getattr(parent, "msg_type", "") or ""
            raw_content = getattr(body, "content", "") or ""
-            text = self._extract_text_from_raw_content(msg_type=msg_type, raw_content=raw_content)
+            parent_mentions = getattr(parent, "mentions", None) if parent else None
+            text = self._extract_text_from_raw_content(
+                msg_type=msg_type,
+                raw_content=raw_content,
+                mentions=parent_mentions,
+            )
            self._message_text_cache[message_id] = text
            return text
        except Exception:
            logger.warning("[Feishu] Failed to fetch parent message %s", message_id, exc_info=True)
            return None

-    def _extract_text_from_raw_content(self, *, msg_type: str, raw_content: str) -> Optional[str]:
-        normalized = normalize_feishu_message(message_type=msg_type, raw_content=raw_content)
+    def _extract_text_from_raw_content(
+        self,
+        *,
+        msg_type: str,
+        raw_content: str,
+        mentions: Optional[Sequence[Any]] = None,
+    ) -> Optional[str]:
+        normalized = normalize_feishu_message(
+            message_type=msg_type,
+            raw_content=raw_content,
+            mentions=mentions,
+            bot=self._bot_identity(),
+        )
        if normalized.text_content:
            return normalized.text_content
        placeholder = normalized.metadata.get("placeholder_text") if isinstance(normalized.metadata, dict) else None
@@ -3386,10 +3639,10 @@ class FeishuAdapter(BasePlatformAdapter):
        normalized = normalize_feishu_message(
            message_type=getattr(message, "message_type", "") or "",
            raw_content=raw_content,
+            mentions=getattr(message, "mentions", None),
+            bot=self._bot_identity(),
        )
-        if normalized.mentioned_ids:
-            return self._post_mentions_bot(normalized.mentioned_ids)
-        return False
+        return self._post_mentions_bot(normalized.mentions)

    def _is_self_sent_bot_message(self, event: Any) -> bool:
        """Return True only for Feishu events emitted by this Hermes bot."""
@@ -3409,30 +3662,37 @@ class FeishuAdapter(BasePlatformAdapter):
        return False

    def _message_mentions_bot(self, mentions: List[Any]) -> bool:
-        """Check whether any mention targets the configured or inferred bot identity."""
+        # IDs trump names: when both sides have open_id (or both user_id),
+        # match requires equal IDs. Name fallback only when either side
+        # lacks an ID.
        for mention in mentions:
            mention_id = getattr(mention, "id", None)
-            mention_open_id = getattr(mention_id, "open_id", None)
-            mention_user_id = getattr(mention_id, "user_id", None)
+            mention_open_id = (getattr(mention_id, "open_id", None) or "").strip()
+            mention_user_id = (getattr(mention_id, "user_id", None) or "").strip()
            mention_name = (getattr(mention, "name", None) or "").strip()

-            if self._bot_open_id and mention_open_id == self._bot_open_id:
-                return True
-            if self._bot_user_id and mention_user_id == self._bot_user_id:
-                return True
+            if mention_open_id and self._bot_open_id:
+                if mention_open_id == self._bot_open_id:
+                    return True
+                continue  # IDs differ — not the bot; skip name fallback.
+            if mention_user_id and self._bot_user_id:
+                if mention_user_id == self._bot_user_id:
+                    return True
+                continue
            if self._bot_name and mention_name == self._bot_name:
                return True

        return False

-    def _post_mentions_bot(self, mentioned_ids: List[str]) -> bool:
-        if not mentioned_ids:
-            return False
-        if self._bot_open_id and self._bot_open_id in mentioned_ids:
-            return True
-        if self._bot_user_id and self._bot_user_id in mentioned_ids:
-            return True
-        return False
+    def _post_mentions_bot(self, mentions: List[FeishuMentionRef]) -> bool:
+        return any(m.is_self for m in mentions)
+
+    def _bot_identity(self) -> _FeishuBotIdentity:
+        return _FeishuBotIdentity(
+            open_id=self._bot_open_id,
+            user_id=self._bot_user_id,
+            name=self._bot_name,
+        )

    async def _hydrate_bot_identity(self) -> None:
        """Best-effort discovery of bot identity for precise group mention gating
@@ -3457,14 +3717,15 @@ class FeishuAdapter(BasePlatformAdapter):
        # uses via probe_bot().
        if not self._bot_open_id or not self._bot_name:
            try:
-                resp = await asyncio.to_thread(
-                    self._client.request,
-                    method="GET",
-                    url="/open-apis/bot/v3/info",
-                    body=None,
-                    raw_response=True,
+                req = (
+                    BaseRequest.builder()
+                    .http_method(HttpMethod.GET)
+                    .uri("/open-apis/bot/v3/info")
+                    .token_types({AccessTokenType.TENANT})
+                    .build()
                )
-                content = getattr(resp, "content", None)
+                resp = await asyncio.to_thread(self._client.request, req)
+                content = getattr(getattr(resp, "raw", None), "content", None)
                if content:
                    payload = json.loads(content)
                    parsed = _parse_bot_response(payload) or {}
@@ -4212,6 +4473,9 @@ def probe_bot(app_id: str, app_secret: str, domain: str) -> Optional[dict]:

    Uses lark_oapi SDK when available, falls back to raw HTTP otherwise.
    Returns {"bot_name": ..., "bot_open_id": ...} on success, None on failure.
+
+    Note: ``bot_open_id`` here is the bot's app-scoped open_id — the same ID
+    that Feishu puts in @mention payloads.  It is NOT the app_id.
    """
    if FEISHU_AVAILABLE:
        return _probe_bot_sdk(app_id, app_secret, domain)
@@ -4232,12 +4496,12 @@ def _build_onboard_client(app_id: str, app_secret: str, domain: str) -> Any:


 def _parse_bot_response(data: dict) -> Optional[dict]:
-    """Extract bot_name and bot_open_id from a /bot/v3/info response."""
+    # /bot/v3/info returns bot.app_name; legacy paths used bot_name — accept both.
    if data.get("code") != 0:
        return None
    bot = data.get("bot") or data.get("data", {}).get("bot") or {}
    return {
-        "bot_name": bot.get("bot_name"),
+        "bot_name": bot.get("app_name") or bot.get("bot_name"),
        "bot_open_id": bot.get("open_id"),
    }

@@ -4246,13 +4510,18 @@ def _probe_bot_sdk(app_id: str, app_secret: str, domain: str) -> Optional[dict]:
    """Probe bot info using lark_oapi SDK."""
    try:
        client = _build_onboard_client(app_id, app_secret, domain)
-        resp = client.request(
-            method="GET",
-            url="/open-apis/bot/v3/info",
-            body=None,
-            raw_response=True,
+        req = (
+            BaseRequest.builder()
+            .http_method(HttpMethod.GET)
+            .uri("/open-apis/bot/v3/info")
+            .token_types({AccessTokenType.TENANT})
+            .build()
        )
-        return _parse_bot_response(json.loads(resp.content))
+        resp = client.request(req)
+        content = getattr(getattr(resp, "raw", None), "content", None)
+        if content is None:
+            return None
+        return _parse_bot_response(json.loads(content))
    except Exception as exc:
        logger.debug("[Feishu onboard] SDK probe failed: %s", exc)
        return None
@@ -974,7 +974,6 @@ def build_whole_comment_prompt(

 def _resolve_model_and_runtime() -> Tuple[str, dict]:
    """Resolve model and provider credentials, same as gateway message handling."""
-    import os
    from gateway.run import _load_gateway_config, _resolve_gateway_model

    user_config = _load_gateway_config()
@@ -11,10 +11,10 @@ import logging
 import re
 import time
 from pathlib import Path
-from typing import TYPE_CHECKING, Dict, Optional
+from typing import TYPE_CHECKING, Dict

 if TYPE_CHECKING:
-    from gateway.platforms.base import BasePlatformAdapter, MessageEvent
+    from gateway.platforms.base import MessageEvent

 logger = logging.getLogger(__name__)

@@ -57,6 +57,15 @@ class MessageDeduplicator:
        if len(self._seen) > self._max_size:
            cutoff = now - self._ttl
            self._seen = {k: v for k, v in self._seen.items() if v > cutoff}
+            if len(self._seen) > self._max_size:
+                # TTL pruning alone does not cap the cache when every entry is
+                # still fresh. Keep the newest entries so the helper's
+                # max_size bound is enforced under sustained traffic.
+                newest = sorted(
+                    self._seen.items(),
+                    key=lambda item: item[1],
+                )[-self._max_size:]
+                self._seen = dict(newest)
        return False

    def clear(self):
@@ -11,6 +11,7 @@ Environment variables:
    MATRIX_PASSWORD             Password (alternative to access token)
    MATRIX_ENCRYPTION           Set "true" to enable E2EE
    MATRIX_DEVICE_ID            Stable device ID for E2EE persistence across restarts
+    MATRIX_PROXY                HTTP(S) or SOCKS proxy URL for Matrix traffic
    MATRIX_ALLOWED_USERS    Comma-separated Matrix user IDs (@user:server)
    MATRIX_HOME_ROOM        Room ID for cron/notification delivery
    MATRIX_REACTIONS        Set "false" to disable processing lifecycle reactions
@@ -18,6 +19,7 @@ Environment variables:
    MATRIX_REQUIRE_MENTION      Require @mention in rooms (default: true)
    MATRIX_FREE_RESPONSE_ROOMS  Comma-separated room IDs exempt from mention requirement
    MATRIX_AUTO_THREAD          Auto-create threads for room messages (default: true)
+    MATRIX_DM_AUTO_THREAD       Auto-create threads for DM messages (default: false)
    MATRIX_RECOVERY_KEY         Recovery key for cross-signing verification after device key rotation
    MATRIX_DM_MENTION_THREADS   Create a thread when bot is @mentioned in a DM (default: false)
 """
@@ -30,6 +32,8 @@ import mimetypes
 import os
 import re
 import time
+from dataclasses import dataclass
+
 from html import escape as _html_escape
 from pathlib import Path
 from typing import Any, Dict, Optional, Set
@@ -95,11 +99,25 @@ from gateway.platforms.base import (
    MessageType,
    ProcessingOutcome,
    SendResult,
+    resolve_proxy_url,
+    proxy_kwargs_for_aiohttp,
 )
 from gateway.platforms.helpers import ThreadParticipationTracker

 logger = logging.getLogger(__name__)

+
+@dataclass
+class _MatrixApprovalPrompt:
+    """Tracks a pending Matrix reaction-based exec approval prompt."""
+
+    def __init__(self, session_key: str, chat_id: str, message_id: str, resolved: bool = False):
+        self.session_key = session_key
+        self.chat_id = chat_id
+        self.message_id = message_id
+        self.resolved = resolved
+        self.bot_reaction_events: dict[str, str] = {}  # emoji -> event_id
+
 # Matrix message size limit (4000 chars practical, spec has no hard limit
 # but clients render poorly above this).
 MAX_MESSAGE_LENGTH = 4000
@@ -114,11 +132,85 @@ _CRYPTO_DB_PATH = _STORE_DIR / "crypto.db"
 # Grace period: ignore messages older than this many seconds before startup.
 _STARTUP_GRACE_SECONDS = 5

+_OUTBOUND_MENTION_RE = re.compile(
+    r"(?<![\w/])(@[0-9A-Za-z._=/-]+:[0-9A-Za-z.-]+(?::\d+)?)"
+)

 _E2EE_INSTALL_HINT = (
    "Install with: pip install 'mautrix[encryption]'  (requires libolm C library)"
 )

+_MATRIX_IMAGE_FILENAME_EXTS = frozenset({
+    ".jpg",
+    ".jpeg",
+    ".png",
+    ".gif",
+    ".webp",
+    ".bmp",
+    ".svg",
+    ".heic",
+    ".heif",
+    ".avif",
+})
+
+
+def _looks_like_matrix_image_filename(text: str) -> bool:
+    """Return True when Matrix image body text is probably just a transport filename.
+
+    Matrix ``m.image`` events commonly populate ``content.body`` with the uploaded
+    filename when the user did not add a caption. Treating that raw filename as
+    user-authored text confuses downstream vision enrichment.
+    """
+    candidate = str(text or "").strip()
+    if not candidate or "\n" in candidate or candidate.endswith("/"):
+        return False
+
+    name = Path(candidate).name
+    if not name or name != candidate:
+        return False
+
+    suffix = Path(name).suffix.lower()
+    if not suffix:
+        return False
+
+    guessed_type, _ = mimetypes.guess_type(name)
+    if guessed_type and guessed_type.startswith("image/"):
+        return True
+    return suffix in _MATRIX_IMAGE_FILENAME_EXTS
+
+
+def _create_matrix_session(proxy_url: str | None):
+    """Create an ``aiohttp.ClientSession`` whose proxy applies to *all* requests.
+
+    mautrix's ``HTTPAPI._send()`` calls ``session.request()`` without forwarding
+    per-request ``proxy=`` kwargs.  For HTTP(S) proxies we use aiohttp's native
+    ``proxy=`` session parameter which sets a default for every request.  For SOCKS
+    we use ``aiohttp_socks.ProxyConnector`` (connector-level).
+    When no proxy is configured we enable ``trust_env`` so standard env vars
+    (``HTTP_PROXY`` / ``HTTPS_PROXY``) are honoured automatically.
+    """
+    import aiohttp
+
+    if not proxy_url:
+        return aiohttp.ClientSession(trust_env=True)
+
+    if proxy_url.split("://")[0].lower().startswith("socks"):
+        try:
+            from aiohttp_socks import ProxyConnector
+
+            return aiohttp.ClientSession(
+                connector=ProxyConnector.from_url(proxy_url, rdns=True),
+            )
+        except ImportError:
+            logger.warning(
+                "aiohttp_socks not installed — SOCKS proxy %s ignored. "
+                "Run: pip install aiohttp-socks",
+                proxy_url,
+            )
+            return aiohttp.ClientSession(trust_env=True)
+
+    return aiohttp.ClientSession(proxy=proxy_url)
+

 def _check_e2ee_deps() -> bool:
    """Return True if mautrix E2EE dependencies (python-olm) are available."""
@@ -260,6 +352,9 @@ class MatrixAdapter(BasePlatformAdapter):
            "1",
            "yes",
        )
+        self._dm_auto_thread: bool = os.getenv(
+            "MATRIX_DM_AUTO_THREAD", "false"
+        ).lower() in ("true", "1", "yes")
        self._dm_mention_threads: bool = os.getenv(
            "MATRIX_DM_MENTION_THREADS", "false"
        ).lower() in ("true", "1", "yes")
@@ -270,6 +365,11 @@ class MatrixAdapter(BasePlatformAdapter):
        ).lower() not in ("false", "0", "no")
        self._pending_reactions: dict[tuple[str, str], str] = {}

+        # Proxy support — resolve once at init, reuse for all HTTP traffic.
+        self._proxy_url: str | None = resolve_proxy_url(platform_env_var="MATRIX_PROXY")
+        if self._proxy_url:
+            logger.info("Matrix: proxy configured — %s", self._proxy_url)
+
        # Text batching: merge rapid successive messages (Telegram-style).
        # Matrix clients split long messages around 4000 chars.
        self._text_batch_delay_seconds = float(
@@ -281,6 +381,18 @@ class MatrixAdapter(BasePlatformAdapter):
        self._pending_text_batches: Dict[str, MessageEvent] = {}
        self._pending_text_batch_tasks: Dict[str, asyncio.Task] = {}

+        # Matrix reaction-based dangerous command approvals.
+        self._approval_reaction_map = {
+            "✅": "once",
+            "❎": "deny",
+        }
+        self._approval_prompts_by_event: Dict[str, _MatrixApprovalPrompt] = {}
+        self._approval_prompt_by_session: Dict[str, str] = {}
+        allowed_users_raw = os.getenv("MATRIX_ALLOWED_USERS", "")
+        self._allowed_user_ids: Set[str] = {
+            u.strip() for u in allowed_users_raw.split(",") if u.strip()
+        }
+
    def _is_duplicate_event(self, event_id) -> bool:
        """Return True if this event was already processed. Tracks the ID otherwise."""
        if not event_id:
@@ -326,7 +438,7 @@ class MatrixAdapter(BasePlatformAdapter):
                    )
                    return False
        except Exception as exc:
-            logger.error("Matrix: post-upload key verification failed: %s", exc)
+            logger.error("Matrix: post-upload key verification failed: %s", exc, exc_info=True)
            return False
        return True

@@ -342,6 +454,7 @@ class MatrixAdapter(BasePlatformAdapter):
            logger.error(
                "Matrix: cannot verify device keys on server: %s — refusing E2EE",
                exc,
+                exc_info=True,
            )
            return False

@@ -356,7 +469,7 @@ class MatrixAdapter(BasePlatformAdapter):
            try:
                await olm.share_keys()
            except Exception as exc:
-                logger.error("Matrix: failed to re-upload device keys: %s", exc)
+                logger.error("Matrix: failed to re-upload device keys: %s", exc, exc_info=True)
                return False
            return await self._reverify_keys_after_upload(client, local_ed25519)

@@ -396,6 +509,7 @@ class MatrixAdapter(BasePlatformAdapter):
                    "Try generating a new access token to get a fresh device.",
                    client.device_id,
                    exc,
+                    exc_info=True,
                )
                return False
            return await self._reverify_keys_after_upload(client, local_ed25519)
@@ -420,9 +534,11 @@ class MatrixAdapter(BasePlatformAdapter):
        _STORE_DIR.mkdir(parents=True, exist_ok=True)

        # Create the HTTP API layer.
+        client_session = _create_matrix_session(self._proxy_url)
        api = HTTPAPI(
            base_url=self._homeserver,
            token=self._access_token or "",
+            client_session=client_session,
        )

        # Create the client.
@@ -465,6 +581,7 @@ class MatrixAdapter(BasePlatformAdapter):
                logger.error(
                    "Matrix: whoami failed — check MATRIX_ACCESS_TOKEN and MATRIX_HOMESERVER: %s",
                    exc,
+                    exc_info=True,
                )
                await api.session.close()
                return False
@@ -532,6 +649,20 @@ class MatrixAdapter(BasePlatformAdapter):
                )
                await crypto_store.open()

+                # Bind the store to the runtime device_id before any
+                # put_account() runs. PgCryptoStore defaults _device_id
+                # to "" and its crypto_account UPSERT never updates the
+                # device_id column on conflict — so once put_account
+                # writes blank, it stays blank forever. That breaks
+                # every downstream device-scoped olm operation: peer
+                # to-device ciphertext can't find our identity key and
+                # no megolm sessions ever land. Setting _device_id here
+                # (in-memory; the on-disk row may not exist yet) makes
+                # the first put_account write the correct value.
+                # DeviceID is a NewType(str) so plain str works at runtime.
+                if client.device_id:
+                    await crypto_store.put_device_id(client.device_id)
+
                crypto_state = _CryptoStateStore(state_store, self._joined_rooms)
                olm = OlmMachine(client, crypto_store, crypto_state)

@@ -593,6 +724,44 @@ class MatrixAdapter(BasePlatformAdapter):
                        logger.warning(
                            "Matrix: recovery key verification failed: %s", exc
                        )
+                else:
+                    # No recovery key — bootstrap cross-signing if the bot
+                    # has none yet. Without this, Element shows "Encrypted
+                    # by a device not verified by its owner" on every
+                    # message from this bot, indefinitely. mautrix's
+                    # generate_recovery_key does the full flow: generates
+                    # MSK/SSK/USK, uploads private keys to SSSS, publishes
+                    # public keys to the homeserver, and signs the current
+                    # device with the new SSK. Some homeservers require UIA
+                    # for /keys/device_signing/upload — those will need an
+                    # alternate path; Continuwuity and Synapse-with-shared-
+                    # secret accept the unauthenticated upload.
+                    try:
+                        own_xsign = await olm.get_own_cross_signing_public_keys()
+                    except Exception as exc:
+                        own_xsign = None
+                        logger.warning(
+                            "Matrix: cross-signing key lookup failed: %s", exc
+                        )
+                    if own_xsign is None:
+                        try:
+                            new_recovery_key = await olm.generate_recovery_key()
+                            logger.warning(
+                                "Matrix: bootstrapped cross-signing for %s. "
+                                "SAVE THIS RECOVERY KEY — set "
+                                "MATRIX_RECOVERY_KEY for future restarts so "
+                                "the bot can re-sign its device after key "
+                                "rotation: %s",
+                                client.mxid,
+                                new_recovery_key,
+                            )
+                        except Exception as exc:
+                            logger.warning(
+                                "Matrix: cross-signing bootstrap failed "
+                                "(non-fatal — Element will show 'not "
+                                "verified by its owner'): %s",
+                                exc,
+                            )

                client.crypto = olm
                logger.info(
@@ -650,6 +819,7 @@ class MatrixAdapter(BasePlatformAdapter):
                        await asyncio.gather(*tasks)
                except Exception as exc:
                    logger.warning("Matrix: initial sync event dispatch error: %s", exc)
+                await self._join_pending_invites(sync_data)
            else:
                logger.warning(
                    "Matrix: initial sync returned unexpected type %s",
@@ -713,17 +883,8 @@ class MatrixAdapter(BasePlatformAdapter):
        chunks = self.truncate_message(formatted, MAX_MESSAGE_LENGTH)

        last_event_id = None
-        for chunk in chunks:
-            msg_content: Dict[str, Any] = {
-                "msgtype": "m.text",
-                "body": chunk,
-            }
-
-            # Convert markdown to HTML for rich rendering.
-            html = self._markdown_to_html(chunk)
-            if html and html != chunk:
-                msg_content["format"] = "org.matrix.custom.html"
-                msg_content["formatted_body"] = html
+        for i, chunk in enumerate(chunks):
+            msg_content = self._build_text_message_content(chunk)

            # Reply-to support.
            if reply_to:
@@ -830,25 +991,21 @@ class MatrixAdapter(BasePlatformAdapter):
        """Edit an existing message (via m.replace)."""

        formatted = self.format_message(content)
+        new_content = self._build_text_message_content(formatted)
        msg_content: Dict[str, Any] = {
            "msgtype": "m.text",
            "body": f"* {formatted}",
-            "m.new_content": {
-                "msgtype": "m.text",
-                "body": formatted,
-            },
-            "m.relates_to": {
-                "rel_type": "m.replace",
-                "event_id": message_id,
-            },
+            "m.new_content": new_content,
        }
-
-        html = self._markdown_to_html(formatted)
-        if html and html != formatted:
-            msg_content["m.new_content"]["format"] = "org.matrix.custom.html"
-            msg_content["m.new_content"]["formatted_body"] = html
+        if "m.mentions" in new_content:
+            msg_content["m.mentions"] = new_content["m.mentions"]
+        if "formatted_body" in new_content:
            msg_content["format"] = "org.matrix.custom.html"
-            msg_content["formatted_body"] = f"* {html}"
+            msg_content["formatted_body"] = f'* {new_content["formatted_body"]}'
+        msg_content["m.relates_to"] = {
+            "rel_type": "m.replace",
+            "event_id": message_id,
+        }

        try:
            event_id = await self._client.send_message_event(
@@ -881,10 +1038,12 @@ class MatrixAdapter(BasePlatformAdapter):
            # Try aiohttp first (always available), fall back to httpx
            try:
                import aiohttp as _aiohttp
-
-                async with _aiohttp.ClientSession(trust_env=True) as http:
+                _sess_kw, _req_kw = proxy_kwargs_for_aiohttp(self._proxy_url)
+                async with _aiohttp.ClientSession(**_sess_kw) as http:
                    async with http.get(
-                        image_url, timeout=_aiohttp.ClientTimeout(total=30)
+                        image_url,
+                        timeout=_aiohttp.ClientTimeout(total=30),
+                        **_req_kw,
                    ) as resp:
                        resp.raise_for_status()
                        data = await resp.read()
@@ -894,8 +1053,10 @@ class MatrixAdapter(BasePlatformAdapter):
                        )
            except ImportError:
                import httpx
-
-                async with httpx.AsyncClient() as http:
+                _httpx_kw: dict = {}
+                if self._proxy_url:
+                    _httpx_kw["proxy"] = self._proxy_url
+                async with httpx.AsyncClient(**_httpx_kw) as http:
                    resp = await http.get(image_url, follow_redirects=True, timeout=30)
                    resp.raise_for_status()
                    data = resp.content
@@ -970,6 +1131,56 @@ class MatrixAdapter(BasePlatformAdapter):
            chat_id, video_path, "m.video", caption, reply_to, metadata=metadata
        )

+    async def send_exec_approval(
+        self,
+        chat_id: str,
+        command: str,
+        session_key: str,
+        description: str = "dangerous command",
+        metadata: Optional[dict] = None,
+    ) -> SendResult:
+        """Send a reaction-based exec approval prompt for Matrix."""
+        if not self._client:
+            return SendResult(success=False, error="Not connected")
+
+        cmd_preview = command[:2000] + "..." if len(command) > 2000 else command
+        text = (
+            "⚠️ **Dangerous command requires approval**\n"
+            f"```\n{cmd_preview}\n```\n"
+            f"Reason: {description}\n\n"
+            "Reply `/approve` to execute, `/approve session` to approve this pattern for the session, "
+            "`/approve always` to approve permanently, or `/deny` to cancel.\n\n"
+            "You can also click the reaction to approve:\n"
+            "✅ = /approve\n"
+            "❎ = /deny"
+        )
+
+        result = await self.send(chat_id, text, metadata=metadata)
+        if not result.success or not result.message_id:
+            return result
+
+        prompt = _MatrixApprovalPrompt(
+            session_key=session_key,
+            chat_id=chat_id,
+            message_id=result.message_id,
+        )
+        old_event = self._approval_prompt_by_session.get(session_key)
+        if old_event:
+            self._approval_prompts_by_event.pop(old_event, None)
+        self._approval_prompts_by_event[result.message_id] = prompt
+        self._approval_prompt_by_session[session_key] = result.message_id
+
+        for emoji in ("✅", "❎"):
+            try:
+                reaction_result = await self._send_reaction(chat_id, result.message_id, emoji)
+                # Save the bot's reaction event_id for later cleanup
+                if reaction_result:
+                    prompt.bot_reaction_events[emoji] = str(reaction_result)
+            except Exception as exc:
+                logger.debug("Matrix: failed to add approval reaction %s: %s", emoji, exc)
+
+        return result
+
    def format_message(self, content: str) -> str:
        """Pass-through — Matrix supports standard Markdown natively."""
        # Strip image markdown; media is uploaded separately.
@@ -1101,9 +1312,15 @@ class MatrixAdapter(BasePlatformAdapter):
        next_batch = await client.sync_store.get_next_batch()
        while not self._closing:
            try:
-                sync_data = await client.sync(
-                    since=next_batch,
-                    timeout=30000,
+                # Wrap in asyncio.wait_for to guard against TCP-level hangs
+                # that the Matrix long-poll timeout cannot catch. Long-poll
+                # is 30s, so 45s gives 15s slack for network drain.
+                sync_data = await asyncio.wait_for(
+                    client.sync(
+                        since=next_batch,
+                        timeout=30000,
+                    ),
+                    timeout=45.0,
                )

                # nio returns SyncError objects (not exceptions) for auth
@@ -1139,6 +1356,7 @@ class MatrixAdapter(BasePlatformAdapter):
                            await asyncio.gather(*tasks)
                    except Exception as exc:
                        logger.warning("Matrix: sync event dispatch error: %s", exc)
+                    await self._join_pending_invites(sync_data)

            except asyncio.CancelledError:
                return
@@ -1164,13 +1382,92 @@ class MatrixAdapter(BasePlatformAdapter):
    # Event callbacks
    # ------------------------------------------------------------------

+    def _is_self_sender(self, sender: str) -> bool:
+        """Return True if the sender refers to the bot's own account.
+
+        Matrix user IDs are byte-compared after trimming whitespace and
+        lowercasing — some homeservers normalize the localpart case
+        differently at different API surfaces, and the reply-loop tail
+        of the "hall of mirrors" bug (#15763) has been observed with the
+        bot's own account bypassing a case-sensitive equality check.
+
+        When ``self._user_id`` is empty (whoami hasn't resolved yet, or
+        login failed), we cannot prove a sender is NOT us, so we return
+        True defensively — an unidentified bot dropping its own events
+        is always preferable to falling into an echo loop.
+        """
+        own = (self._user_id or "").strip().lower()
+        if not own:
+            return True
+        return sender.strip().lower() == own
+
+    @staticmethod
+    def _is_system_or_bridge_sender(sender: str) -> bool:
+        """Return True if the sender looks like a system / bridge / appservice
+        identity rather than a real user.
+
+        Appservice namespaces on Matrix conventionally prefix bot / puppet
+        user IDs with an underscore (e.g. ``@_telegram_12345:server``,
+        ``@_discord_999:server``, ``@_slack_...:server``).  Server-notices
+        bots and bridge-controller bots on many homeservers use the same
+        pattern.
+
+        We treat these as system identities for pairing purposes: they
+        should never be offered a pairing code, because an operator
+        approving the code would hand the bridge itself permanent
+        authorization — and every outbound message relayed by the bridge
+        would then loop back into the agent as an "authorized user
+        message", which is the root of issue #15763.
+
+        Matches:
+            ``@_something:server``   — appservice namespace convention
+            ``@:server``             — malformed / empty localpart
+            ``:server``              — malformed, no leading ``@``
+        """
+        s = (sender or "").strip()
+        if not s:
+            return True
+        # Localpart is everything between leading '@' and ':'
+        if s.startswith("@"):
+            s = s[1:]
+        if ":" in s:
+            localpart, _, _ = s.partition(":")
+        else:
+            localpart = s
+        if not localpart:
+            return True
+        return localpart.startswith("_")
+
    async def _on_room_message(self, event: Any) -> None:
        """Handle incoming room message events (text, media)."""
        room_id = str(getattr(event, "room_id", ""))
        sender = str(getattr(event, "sender", ""))

-        # Ignore own messages.
-        if sender == self._user_id:
+        # Diagnostic: confirm the callback is firing at all when DEBUG is on.
+        # Helps users troubleshoot silent inbound issues like #5819, #7914, #12614.
+        logger.debug(
+            "Matrix: callback fired — event %s from %s in %s",
+            getattr(event, "event_id", "?"),
+            sender,
+            room_id,
+        )
+
+        # Ignore own messages (case-insensitive; also drops when our own
+        # user_id hasn't been resolved yet — see _is_self_sender docstring
+        # and issue #15763).
+        if self._is_self_sender(sender):
+            return
+
+        # Ignore appservice / bridge / system identities so they never
+        # trigger the pairing flow.  Once a bridge user is paired, every
+        # outbound message it relays would loop back as an authorized
+        # user message (the "hall of mirrors" in #15763).
+        if self._is_system_or_bridge_sender(sender):
+            logger.debug(
+                "Matrix: ignoring system/bridge sender %s in %s",
+                sender,
+                room_id,
+            )
            return

        # Deduplicate by event ID.
@@ -1266,6 +1563,12 @@ class MatrixAdapter(BasePlatformAdapter):
            in_bot_thread = bool(thread_id and thread_id in self._threads)
            if self._require_mention and not is_free_room and not in_bot_thread:
                if not is_mentioned:
+                    logger.debug(
+                        "Matrix: ignoring message %s in %s — no @mention "
+                        "(set MATRIX_REQUIRE_MENTION=false to disable)",
+                        event_id,
+                        room_id,
+                    )
                    return None

        # DM mention-thread.
@@ -1278,7 +1581,7 @@ class MatrixAdapter(BasePlatformAdapter):
            body = self._strip_mention(body)

        # Auto-thread.
-        if not is_dm and not thread_id and self._auto_thread:
+        if not thread_id and ((not is_dm and self._auto_thread) or (is_dm and self._dm_auto_thread)):
            thread_id = event_id
            self._threads.mark(thread_id)

@@ -1520,6 +1823,9 @@ class MatrixAdapter(BasePlatformAdapter):
            return
        body, is_dm, chat_type, thread_id, display_name, source = ctx

+        if msgtype == "m.image" and _looks_like_matrix_image_filename(body):
+            body = ""
+
        allow_http_fallback = bool(http_url) and not is_encrypted_media
        media_urls = (
            [cached_path]
@@ -1549,13 +1855,35 @@ class MatrixAdapter(BasePlatformAdapter):
            "Matrix: invited to %s — joining",
            room_id,
        )
+        await self._join_room_by_id(room_id)
+
+    async def _join_room_by_id(self, room_id: str) -> bool:
+        """Join a room by ID and refresh local caches on success."""
+        if not room_id:
+            return False
+        if room_id in self._joined_rooms:
+            return True
        try:
            await self._client.join_room(RoomID(room_id))
            self._joined_rooms.add(room_id)
            logger.info("Matrix: joined %s", room_id)
            await self._refresh_dm_cache()
+            return True
        except Exception as exc:
            logger.warning("Matrix: error joining %s: %s", room_id, exc)
+            return False
+
+    async def _join_pending_invites(self, sync_data: Dict[str, Any]) -> None:
+        """Join rooms still present in rooms.invite after sync processing."""
+        rooms = sync_data.get("rooms", {}) if isinstance(sync_data, dict) else {}
+        invites = rooms.get("invite", {})
+        if not isinstance(invites, dict):
+            return
+        for room_id in invites:
+            if room_id in self._joined_rooms:
+                continue
+            logger.info("Matrix: reconciling pending invite for %s", room_id)
+            await self._join_room_by_id(str(room_id))

    # ------------------------------------------------------------------
    # Reactions (send, receive, processing lifecycle)
@@ -1640,7 +1968,7 @@ class MatrixAdapter(BasePlatformAdapter):
    async def _on_reaction(self, event: Any) -> None:
        """Handle incoming reaction events."""
        sender = str(getattr(event, "sender", ""))
-        if sender == self._user_id:
+        if self._is_self_sender(sender):
            return
        event_id = str(getattr(event, "event_id", ""))
        if self._is_duplicate_event(event_id):
@@ -1670,6 +1998,51 @@ class MatrixAdapter(BasePlatformAdapter):
                room_id,
            )

+            # Check if this reaction resolves a pending approval prompt.
+            prompt = self._approval_prompts_by_event.get(reacts_to)
+            if prompt and not prompt.resolved:
+                if room_id != prompt.chat_id:
+                    return
+                if self._allowed_user_ids and sender not in self._allowed_user_ids:
+                    logger.info(
+                        "Matrix: ignoring approval reaction from unauthorized user %s on %s",
+                        sender, reacts_to,
+                    )
+                    return
+                choice = self._approval_reaction_map.get(key)
+                if not choice:
+                    return
+                try:
+                    from tools.approval import resolve_gateway_approval
+
+                    count = resolve_gateway_approval(prompt.session_key, choice)
+                    if count:
+                        prompt.resolved = True
+                        self._approval_prompts_by_event.pop(reacts_to, None)
+                        self._approval_prompt_by_session.pop(prompt.session_key, None)
+                        logger.info(
+                            "Matrix reaction resolved %d approval(s) for session %s "
+                            "(choice=%s, user=%s)",
+                            count, prompt.session_key, choice, sender,
+                        )
+                        # Redact bot's seed reactions, leaving only the user's
+                        await self._redact_bot_approval_reactions(room_id, prompt)
+                except Exception as exc:
+                    logger.error("Failed to resolve gateway approval from Matrix reaction: %s", exc)
+
+    async def _redact_bot_approval_reactions(
+        self,
+        room_id: str,
+        prompt: "_MatrixApprovalPrompt",
+    ) -> None:
+        """Redact the bot's seed ✅/❎ reactions, leaving only the user's reaction."""
+        for emoji, evt_id in prompt.bot_reaction_events.items():
+            try:
+                await self.redact_message(room_id, evt_id, "approval resolved")
+                logger.debug("Matrix: redacted bot reaction %s (%s)", emoji, evt_id)
+            except Exception as exc:
+                logger.debug("Matrix: failed to redact bot reaction %s: %s", emoji, exc)
+
    # ------------------------------------------------------------------
    # Text message aggregation (handles Matrix client-side splits)
    # ------------------------------------------------------------------
@@ -1895,11 +2268,7 @@ class MatrixAdapter(BasePlatformAdapter):
        if not self._client or not text:
            return SendResult(success=False, error="No client or empty text")

-        msg_content: Dict[str, Any] = {"msgtype": msgtype, "body": text}
-        html = self._markdown_to_html(text)
-        if html and html != text:
-            msg_content["format"] = "org.matrix.custom.html"
-            msg_content["formatted_body"] = html
+        msg_content = self._build_text_message_content(text, msgtype=msgtype)

        try:
            event_id = await self._client.send_message_event(
@@ -1962,6 +2331,77 @@ class MatrixAdapter(BasePlatformAdapter):
    # Mention detection helpers
    # ------------------------------------------------------------------

+    def _build_text_message_content(self, text: str, msgtype: str = "m.text") -> Dict[str, Any]:
+        """Build Matrix text content with HTML and outbound mention metadata."""
+        msg_content: Dict[str, Any] = {"msgtype": msgtype, "body": text}
+        mention_user_ids = self._extract_outbound_mentions(text)
+        if mention_user_ids:
+            msg_content["m.mentions"] = {"user_ids": mention_user_ids}
+
+        html_source = self._inject_outbound_mention_links(text)
+        html = self._markdown_to_html(html_source)
+        if html and html != text:
+            msg_content["format"] = "org.matrix.custom.html"
+            msg_content["formatted_body"] = html
+
+        return msg_content
+
+    def _extract_outbound_mentions(self, text: str) -> list[str]:
+        """Return unique Matrix user IDs mentioned in outbound text."""
+        protected, _ = self._protect_outbound_mention_regions(text)
+        seen: Set[str] = set()
+        mentions: list[str] = []
+        for match in _OUTBOUND_MENTION_RE.finditer(protected):
+            user_id = match.group(1)
+            if user_id not in seen:
+                seen.add(user_id)
+                mentions.append(user_id)
+        return mentions
+
+    def _inject_outbound_mention_links(self, text: str) -> str:
+        """Wrap outbound Matrix mentions in markdown links outside code spans."""
+        if not text:
+            return text
+
+        protected, placeholders = self._protect_outbound_mention_regions(text)
+
+        linked = _OUTBOUND_MENTION_RE.sub(
+            lambda match: f"[{match.group(1)}](https://matrix.to/#/{match.group(1)})",
+            protected,
+        )
+
+        for idx, original in enumerate(placeholders):
+            linked = linked.replace(f"\x00MENTION_PROTECTED{idx}\x00", original)
+
+        return linked
+
+    def _protect_outbound_mention_regions(self, text: str) -> tuple[str, list[str]]:
+        """Protect markdown regions where outbound mentions should stay literal."""
+        placeholders: list[str] = []
+
+        def _protect(fragment: str) -> str:
+            idx = len(placeholders)
+            placeholders.append(fragment)
+            return f"\x00MENTION_PROTECTED{idx}\x00"
+
+        protected = re.sub(
+            r"```[\s\S]*?```",
+            lambda match: _protect(match.group(0)),
+            text or "",
+        )
+        protected = re.sub(
+            r"`[^`\n]+`",
+            lambda match: _protect(match.group(0)),
+            protected,
+        )
+        protected = re.sub(
+            r"\[[^\]]+\]\([^)]+\)",
+            lambda match: _protect(match.group(0)),
+            protected,
+        )
+
+        return protected, placeholders
+
    def _is_bot_mentioned(
        self,
        body: str,
@@ -1996,13 +2436,33 @@ class MatrixAdapter(BasePlatformAdapter):
        return False

    def _strip_mention(self, body: str) -> str:
-        """Strip the bot's full MXID (``@user:server``) from *body*.
+        """Remove explicit bot mentions from message body.

-        The bare localpart is intentionally *not* stripped — it would
-        mangle file paths like ``/home/hermes/media/file.png``.
+        Important: only strip explicit mention tokens (``@user:server`` or
+        ``@localpart``). Do NOT strip bare words matching the bot localpart,
+        otherwise normal phrases like "Hermes Agent" become "Agent".
        """
+        if not body:
+            return ""
+
+        # Strip explicit full MXID mentions.
        if self._user_id:
            body = body.replace(self._user_id, "")
+
+        # Strip explicit @localpart mentions only (not bare localpart words).
+        if self._user_id and ":" in self._user_id:
+            localpart = self._user_id.split(":")[0].lstrip("@")
+            if localpart:
+                body = re.sub(
+                    r'(?<![\w])@' + re.escape(localpart) + r'\b',
+                    '',
+                    body,
+                    flags=re.IGNORECASE,
+                )
+
+        # Normalize spacing after mention removal.
+        body = re.sub(r'[ \t]{2,}', ' ', body)
+        body = re.sub(r'\s+([,.;:!?])', r'\1', body)
        return body.strip()

    async def _get_display_name(self, room_id: str, user_id: str) -> str:
@@ -412,7 +412,6 @@ class MattermostAdapter(BasePlatformAdapter):

        import aiohttp

-        last_exc = None
        file_data = None
        ct = "application/octet-stream"
        fname = url.rsplit("/", 1)[-1].split("?")[0] or f"{kind}.png"
@@ -535,6 +535,9 @@ class QQAdapter(BasePlatformAdapter):
                    quick_disconnect_count = 0
                else:
                    backoff_idx += 1
+                    if backoff_idx >= MAX_RECONNECT_ATTEMPTS:
+                        logger.error("[%s] Max reconnect attempts reached (QQCloseError)", self._log_tag)
+                        return

            except Exception as exc:
                if not self._running:
@@ -1954,7 +1957,7 @@ class QQAdapter(BasePlatformAdapter):
            self, openid: str, content: str, reply_to: Optional[str] = None
    ) -> SendResult:
        """Send text to a C2C user via REST API."""
-        msg_seq = self._next_msg_seq(reply_to or openid)
+        self._next_msg_seq(reply_to or openid)
        body = self._build_text_body(content, reply_to)
        if reply_to:
            body["msg_id"] = reply_to
@@ -1967,7 +1970,7 @@ class QQAdapter(BasePlatformAdapter):
            self, group_openid: str, content: str, reply_to: Optional[str] = None
    ) -> SendResult:
        """Send text to a group via REST API."""
-        msg_seq = self._next_msg_seq(reply_to or group_openid)
+        self._next_msg_seq(reply_to or group_openid)
        body = self._build_text_body(content, reply_to)
        if reply_to:
            body["msg_id"] = reply_to
@@ -2132,11 +2135,6 @@ class QQAdapter(BasePlatformAdapter):

            # Route
            chat_type = self._guess_chat_type(chat_id)
-            target_path = (
-                f"/v2/users/{chat_id}/files"
-                if chat_type == "c2c"
-                else f"/v2/groups/{chat_id}/files"
-            )

            if chat_type == "guild":
                # Guild channels don't support native media upload in the same way
@@ -84,6 +84,7 @@ from gateway.platforms.telegram_network import (
    discover_fallback_ips,
    parse_fallback_ip_env,
 )
+from utils import atomic_replace


 def check_telegram_requirements() -> bool:
@@ -122,12 +123,12 @@ def _strip_mdv2(text: str) -> str:


 # ---------------------------------------------------------------------------
-# Markdown table → code block conversion
+# Markdown table → Telegram-friendly row groups
 # ---------------------------------------------------------------------------
 # Telegram's MarkdownV2 has no table syntax — '|' is just an escaped literal,
 # so pipe tables render as noisy backslash-pipe text with no alignment.
-# Wrapping the table in a fenced code block makes Telegram render it as
-# monospace preformatted text with columns intact.
+# Reformating each row into a bold heading plus bullet list keeps the content
+# readable on mobile clients while preserving the source data.

 # Matches a GFM table delimiter row: optional outer pipes, cells containing
 # only dashes (with optional leading/trailing colons for alignment) separated
@@ -144,13 +145,49 @@ def _is_table_row(line: str) -> bool:
    return bool(stripped) and '|' in stripped


+def _split_markdown_table_row(line: str) -> list[str]:
+    """Split a simple GFM table row into stripped cell values."""
+    stripped = line.strip()
+    if stripped.startswith("|"):
+        stripped = stripped[1:]
+    if stripped.endswith("|"):
+        stripped = stripped[:-1]
+    return [cell.strip() for cell in stripped.split("|")]
+
+
+def _render_table_block_for_telegram(table_block: list[str]) -> str:
+    """Render a detected GFM table as Telegram-friendly row groups."""
+    if len(table_block) < 3:
+        return "\n".join(table_block)
+
+    headers = _split_markdown_table_row(table_block[0])
+    if len(headers) < 2:
+        return "\n".join(table_block)
+
+    rendered_rows: list[str] = []
+    for index, row in enumerate(table_block[2:], start=1):
+        cells = _split_markdown_table_row(row)
+        if len(cells) < len(headers):
+            cells.extend([""] * (len(headers) - len(cells)))
+        elif len(cells) > len(headers):
+            cells = cells[: len(headers)]
+
+        heading = next((cell for cell in cells if cell), f"Row {index}")
+        rendered_rows.append(f"**{heading}**")
+        rendered_rows.extend(
+            f"• {header}: {value}" for header, value in zip(headers, cells)
+        )
+
+    return "\n\n".join(rendered_rows)
+
+
 def _wrap_markdown_tables(text: str) -> str:
-    """Wrap GFM-style pipe tables in ``` fences so Telegram renders them.
+    """Rewrite GFM-style pipe tables into Telegram-friendly bullet groups.

    Detected by a row containing '|' immediately followed by a delimiter
    row matching :data:`_TABLE_SEPARATOR_RE`.  Subsequent pipe-containing
-    non-blank lines are consumed as the table body and included in the
-    wrapped block.  Tables inside existing fenced code blocks are left
+    non-blank lines are consumed as the table body and rewritten as
+    per-row bullet groups. Tables inside existing fenced code blocks are left
    alone.
    """
    if '|' not in text or '-' not in text:
@@ -187,9 +224,7 @@ def _wrap_markdown_tables(text: str) -> str:
            while j < len(lines) and _is_table_row(lines[j]):
                table_block.append(lines[j])
                j += 1
-            out.append('```')
-            out.extend(table_block)
-            out.append('```')
+            out.append(_render_table_block_for_telegram(table_block))
            i = j
            continue

@@ -334,6 +369,49 @@ class TelegramAdapter(BasePlatformAdapter):
            return {"link_preview_options": LinkPreviewOptions(is_disabled=True)}
        return {"disable_web_page_preview": True}

+    async def _drain_polling_connections(self) -> None:
+        """Reset the httpx connection pool used for getUpdates polling.
+
+        Network errors (especially through proxies like sing-box) can leave
+        httpx connections in a half-closed state that still occupy pool slots.
+        After enough reconnect cycles the pool fills up entirely, causing
+        ``Pool timeout: All connections in the connection pool are occupied.``
+
+        We reset ONLY ``_request[0]`` (the getUpdates request) — the general
+        request (``_request[1]``) is left untouched so concurrent
+        ``send_message`` / ``edit_message`` calls are never interrupted.
+
+        Implementation note: accesses ``Bot._request[0]`` which is the
+        get-updates ``BaseRequest`` in the PTB 22.x internal tuple
+        ``(get_updates_request, general_request)``.  There is no public
+        accessor for the polling request; review if upgrading to PTB 23+.
+        """
+        if not (self._app and self._app.bot):
+            return
+        try:
+            # PTB 22.x: _request is a (get_updates, general) tuple;
+            # no public accessor exists for the polling request.
+            polling_req = self._app.bot._request[0]  # noqa: SLF001
+        except Exception:
+            return
+        try:
+            await polling_req.shutdown()
+        except Exception:
+            logger.debug(
+                "[%s] Polling request shutdown failed (non-fatal)",
+                self.name, exc_info=True,
+            )
+        try:
+            await polling_req.initialize()
+            logger.debug(
+                "[%s] Polling request pool drained before reconnect", self.name
+            )
+        except Exception:
+            logger.debug(
+                "[%s] Polling request re-initialize failed (non-fatal)",
+                self.name, exc_info=True,
+            )
+
    async def _handle_polling_network_error(self, error: Exception) -> None:
        """Reconnect polling after a transient network interruption.

@@ -379,6 +457,8 @@ class TelegramAdapter(BasePlatformAdapter):
        except Exception:
            pass

+        await self._drain_polling_connections()
+
        try:
            await self._app.updater.start_polling(
                allowed_updates=Update.ALL_TYPES,
@@ -426,6 +506,7 @@ class TelegramAdapter(BasePlatformAdapter):
            except Exception:
                pass
            await asyncio.sleep(RETRY_DELAY)
+            await self._drain_polling_connections()
            try:
                await self._app.updater.start_polling(
                    allowed_updates=Update.ALL_TYPES,
@@ -554,7 +635,7 @@ class TelegramAdapter(BasePlatformAdapter):
                        _yaml.dump(config, f, default_flow_style=False, sort_keys=False)
                        f.flush()
                        os.fsync(f.fileno())
-                    os.replace(tmp_path, config_path)
+                    atomic_replace(tmp_path, config_path)
                except BaseException:
                    try:
                        os.unlink(tmp_path)
@@ -703,7 +784,6 @@ class TelegramAdapter(BasePlatformAdapter):
                "write_timeout": _env_float("HERMES_TELEGRAM_HTTP_WRITE_TIMEOUT", 20.0),
            }

-            proxy_url = resolve_proxy_url("TELEGRAM_PROXY")
            disable_fallback = (os.getenv("HERMES_TELEGRAM_DISABLE_FALLBACK_IPS", "").strip().lower() in ("1", "true", "yes", "on"))
            fallback_ips = self._fallback_ips()
            if not fallback_ips:
@@ -714,6 +794,8 @@ class TelegramAdapter(BasePlatformAdapter):
                    ", ".join(fallback_ips),
                )

+            proxy_targets = ["api.telegram.org", *fallback_ips]
+            proxy_url = resolve_proxy_url("TELEGRAM_PROXY", target_hosts=proxy_targets)
            if fallback_ips and not proxy_url and not disable_fallback:
                logger.info(
                    "[%s] Telegram fallback IPs active: %s",
@@ -1208,6 +1290,31 @@ class TelegramAdapter(BasePlatformAdapter):
            )
            return SendResult(success=False, error=str(e))

+    async def delete_message(self, chat_id: str, message_id: str) -> bool:
+        """Delete a previously sent Telegram message.
+
+        Used by the stream consumer's fresh-final cleanup path (ported
+        from openclaw/openclaw#72038) to remove long-lived preview
+        messages after sending the completed reply as a fresh message.
+        Telegram's Bot API ``deleteMessage`` works for bot-posted
+        messages in the last 48 hours.  Failures are non-fatal — the
+        caller leaves the preview in place and logs at debug level.
+        """
+        if not self._bot:
+            return False
+        try:
+            await self._bot.delete_message(
+                chat_id=int(chat_id),
+                message_id=int(message_id),
+            )
+            return True
+        except Exception as e:
+            logger.debug(
+                "[%s] Failed to delete Telegram message %s: %s",
+                self.name, message_id, e,
+            )
+            return False
+
    async def send_update_prompt(
        self, chat_id: str, prompt: str, default: str = "",
        session_key: str = "",
@@ -2054,10 +2161,8 @@ class TelegramAdapter(BasePlatformAdapter):

        text = content

-        # 0) Pre-wrap GFM-style pipe tables in ``` fences.  Telegram can't
-        #    render tables natively, but fenced code blocks render as
-        #    monospace preformatted text with columns intact.  The wrapped
-        #    tables then flow through step (1) below as protected regions.
+        # 0) Rewrite GFM-style pipe tables into Telegram-friendly row groups
+        #    before the normal MarkdownV2 conversions run.
        text = _wrap_markdown_tables(text)

        # 1) Protect fenced code blocks (``` ... ```)
@@ -2327,6 +2432,26 @@ class TelegramAdapter(BasePlatformAdapter):
                    user = getattr(entity, "user", None)
                    if user and getattr(user, "id", None) == bot_id:
                        return True
+                elif entity_type == "bot_command" and expected:
+                    # Telegram's official group-disambiguation form for slash
+                    # commands (``/cmd@botname``) is emitted as a single
+                    # ``bot_command`` entity covering the whole span — there
+                    # is no accompanying ``mention`` entity. Treat it as a
+                    # direct address to this bot when the ``@botname`` suffix
+                    # matches. This is the form Telegram's own command menu
+                    # autocomplete produces in groups, so dropping it at the
+                    # mention gate would break /new, /reset, /help, ... for
+                    # every group that has ``require_mention`` enabled (#15415).
+                    offset = int(getattr(entity, "offset", -1))
+                    length = int(getattr(entity, "length", 0))
+                    if offset < 0 or length <= 0:
+                        continue
+                    command_text = source_text[offset:offset + length]
+                    at_index = command_text.find("@")
+                    if at_index < 0:
+                        continue
+                    if command_text[at_index:].strip().lower() == expected:
+                        return True
        return False

    def _message_matches_mention_patterns(self, message: Message) -> bool:
@@ -43,10 +43,10 @@ _DOH_PROVIDERS: list[dict] = [
 _SEED_FALLBACK_IPS: list[str] = ["149.154.167.220"]


-def _resolve_proxy_url() -> str | None:
+def _resolve_proxy_url(target_hosts=None) -> str | None:
    # Delegate to shared implementation (env vars + macOS system proxy detection)
    from gateway.platforms.base import resolve_proxy_url
-    return resolve_proxy_url("TELEGRAM_PROXY")
+    return resolve_proxy_url("TELEGRAM_PROXY", target_hosts=target_hosts)


 class TelegramFallbackTransport(httpx.AsyncBaseTransport):
@@ -60,7 +60,7 @@ class TelegramFallbackTransport(httpx.AsyncBaseTransport):

    def __init__(self, fallback_ips: Iterable[str], **transport_kwargs):
        self._fallback_ips = [ip for ip in dict.fromkeys(_normalize_fallback_ips(fallback_ips))]
-        proxy_url = _resolve_proxy_url()
+        proxy_url = _resolve_proxy_url(target_hosts=[_TELEGRAM_API_HOST, *self._fallback_ips])
        if proxy_url and "proxy" not in transport_kwargs:
            transport_kwargs["proxy"] = proxy_url
        self._primary = httpx.AsyncHTTPTransport(**transport_kwargs)
@@ -508,6 +508,11 @@ class WeComAdapter(BasePlatformAdapter):
        self._remember_chat_req_id(chat_id, self._payload_req_id(payload))

        text, reply_text = self._extract_text(body)
+        # Strip leading @mention in group chats so slash commands like
+        # "@BotName /approve" are correctly recognized as "/approve".
+        # Mirrors what the Telegram adapter does (re.sub @botname).
+        if is_group and text:
+            text = re.sub(r"^@\S+\s*", "", text).strip()
        media_urls, media_types = await self._extract_media(body)
        message_type = self._derive_message_type(body, text, media_types)
        has_reply_context = bool(reply_text and (text or media_urls))
@@ -89,6 +89,7 @@ MAX_CONSECUTIVE_FAILURES = 3
 RETRY_DELAY_SECONDS = 2
 BACKOFF_DELAY_SECONDS = 30
 SESSION_EXPIRED_ERRCODE = -14
+RATE_LIMIT_ERRCODE = -2  # iLink frequency limit — backoff and retry
 MESSAGE_DEDUP_TTL_SECONDS = 300

 MEDIA_IMAGE = 1
@@ -1113,7 +1114,7 @@ async def qr_login(
 class WeixinAdapter(BasePlatformAdapter):
    """Native Hermes adapter for Weixin personal accounts."""

-    MAX_MESSAGE_LENGTH = 4000
+    MAX_MESSAGE_LENGTH = 2000

    # WeChat does not support editing sent messages — streaming must use the
    # fallback "send-final-only" path so the cursor (▉) is never left visible.
@@ -1138,10 +1139,10 @@ class WeixinAdapter(BasePlatformAdapter):
            extra.get("cdn_base_url") or os.getenv("WEIXIN_CDN_BASE_URL", WEIXIN_CDN_BASE_URL)
        ).strip().rstrip("/")
        self._send_chunk_delay_seconds = float(
-            extra.get("send_chunk_delay_seconds") or os.getenv("WEIXIN_SEND_CHUNK_DELAY_SECONDS", "0.35")
+            extra.get("send_chunk_delay_seconds") or os.getenv("WEIXIN_SEND_CHUNK_DELAY_SECONDS", "1.5")
        )
        self._send_chunk_retries = int(
-            extra.get("send_chunk_retries") or os.getenv("WEIXIN_SEND_CHUNK_RETRIES", "2")
+            extra.get("send_chunk_retries") or os.getenv("WEIXIN_SEND_CHUNK_RETRIES", "4")
        )
        self._send_chunk_retry_delay_seconds = float(
            extra.get("send_chunk_retry_delay_seconds")
@@ -1531,6 +1532,28 @@ class WeixinAdapter(BasePlatformAdapter):
                                self.name, _safe_id(chat_id),
                            )
                            continue
+                        # Rate limit (-2) — backoff and retry
+                        is_rate_limited = (
+                            ret == RATE_LIMIT_ERRCODE
+                            or errcode == RATE_LIMIT_ERRCODE
+                        )
+                        if is_rate_limited:
+                            errmsg = resp.get("errmsg") or resp.get("msg") or "rate limited"
+                            # Record the error so we raise a descriptive
+                            # RuntimeError (instead of AssertionError) if the
+                            # loop exhausts with the server still rate-limiting.
+                            last_error = RuntimeError(
+                                f"iLink sendmessage rate limited: ret={ret} errcode={errcode} errmsg={errmsg}"
+                            )
+                            if attempt >= self._send_chunk_retries:
+                                break
+                            wait = self._send_chunk_retry_delay_seconds * 3  # 3x backoff for rate limit
+                            logger.warning(
+                                "[%s] rate limited for %s; backing off %.1fs before retry",
+                                self.name, _safe_id(chat_id), wait,
+                            )
+                            await asyncio.sleep(wait)
+                            continue
                        errmsg = resp.get("errmsg") or resp.get("msg") or "unknown error"
                        raise RuntimeError(
                            f"iLink sendmessage error: ret={ret} errcode={errcode} errmsg={errmsg}"
@@ -0,0 +1,645 @@
+"""
+yuanbao_media.py — 元宝平台媒体处理模块
+
+提供 COS 上传、文件下载、TIM 媒体消息构建等功能。
+移植自 TypeScript 版 media.ts（yuanbao-openclaw-plugin），
+使用 httpx 替代 cos-nodejs-sdk-v5，避免引入额外 SDK 依赖。
+
+COS 上传流程：
+  1. 调用 genUploadInfo 获取临时凭证（tmpSecretId/tmpSecretKey/sessionToken）
+  2. 用临时凭证通过 HMAC-SHA1 签名构建 Authorization 头
+  3. HTTP PUT 上传到 COS
+
+TIM 消息体构建：
+  - buildImageMsgBody() → TIMImageElem
+  - buildFileMsgBody()  → TIMFileElem
+"""
+
+from __future__ import annotations
+
+import hashlib
+import hmac
+import logging
+import os
+import secrets
+import struct
+import time
+import urllib.parse
+from typing import Optional, Any
+
+import httpx
+
+logger = logging.getLogger(__name__)
+
+# ============ 常量 ============
+
+UPLOAD_INFO_PATH = "/api/resource/genUploadInfo"
+DEFAULT_API_DOMAIN = "yuanbao.tencent.com"
+DEFAULT_MAX_SIZE_MB = 50
+
+# COS 加速域名后缀（优先使用全球加速）
+COS_USE_ACCELERATE = True
+
+# ============ 类型映射 ============
+
+# MIME → image_format 数字（TIM 协议字段）
+_MIME_TO_IMAGE_FORMAT: dict[str, int] = {
+    "image/jpeg": 1,
+    "image/jpg": 1,
+    "image/gif": 2,
+    "image/png": 3,
+    "image/bmp": 4,
+    "image/webp": 255,
+    "image/heic": 255,
+    "image/tiff": 255,
+}
+
+# 文件扩展名 → MIME
+_EXT_TO_MIME: dict[str, str] = {
+    ".jpg": "image/jpeg",
+    ".jpeg": "image/jpeg",
+    ".png": "image/png",
+    ".gif": "image/gif",
+    ".webp": "image/webp",
+    ".bmp": "image/bmp",
+    ".heic": "image/heic",
+    ".tiff": "image/tiff",
+    ".ico": "image/x-icon",
+    ".pdf": "application/pdf",
+    ".doc": "application/msword",
+    ".docx": "application/vnd.openxmlformats-officedocument.wordprocessingml.document",
+    ".xls": "application/vnd.ms-excel",
+    ".xlsx": "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet",
+    ".ppt": "application/vnd.ms-powerpoint",
+    ".pptx": "application/vnd.openxmlformats-officedocument.presentationml.presentation",
+    ".txt": "text/plain",
+    ".zip": "application/zip",
+    ".tar": "application/x-tar",
+    ".gz": "application/gzip",
+    ".mp3": "audio/mpeg",
+    ".mp4": "video/mp4",
+    ".wav": "audio/wav",
+    ".ogg": "audio/ogg",
+    ".webm": "video/webm",
+}
+
+
+# ============ 工具函数 ============
+
+def guess_mime_type(filename: str) -> str:
+    """根据文件扩展名猜测 MIME 类型。"""
+    ext = os.path.splitext(filename)[-1].lower()
+    return _EXT_TO_MIME.get(ext, "application/octet-stream")
+
+
+def is_image(filename: str, mime_type: str = "") -> bool:
+    """判断是否为图片类型。"""
+    if mime_type.startswith("image/"):
+        return True
+    ext = os.path.splitext(filename)[-1].lower()
+    return ext in {".jpg", ".jpeg", ".png", ".gif", ".webp", ".bmp", ".heic", ".tiff", ".ico"}
+
+
+def get_image_format(mime_type: str) -> int:
+    """获取 TIM 图片格式编号。"""
+    return _MIME_TO_IMAGE_FORMAT.get(mime_type.lower(), 255)
+
+
+def md5_hex(data: bytes) -> str:
+    """计算 MD5 十六进制摘要。"""
+    return hashlib.md5(data).hexdigest()
+
+
+def generate_file_id() -> str:
+    """生成随机文件 ID（32 位 hex）。"""
+    return secrets.token_hex(16)
+
+
+
+# ============ 图片尺寸解析（纯 Python，无需 Pillow） ============
+
+def parse_image_size(data: bytes) -> Optional[dict[str, int]]:
+    """
+    解析图片宽高（支持 JPEG/PNG/GIF/WebP），无需第三方依赖。
+    返回 {"width": w, "height": h} 或 None（无法识别）。
+    """
+    return (
+        _parse_png_size(data)
+        or _parse_jpeg_size(data)
+        or _parse_gif_size(data)
+        or _parse_webp_size(data)
+    )
+
+
+def _parse_png_size(buf: bytes) -> Optional[dict[str, int]]:
+    if len(buf) < 24:
+        return None
+    if buf[:4] != b"\x89PNG":
+        return None
+    w = struct.unpack(">I", buf[16:20])[0]
+    h = struct.unpack(">I", buf[20:24])[0]
+    return {"width": w, "height": h}
+
+
+def _parse_jpeg_size(buf: bytes) -> Optional[dict[str, int]]:
+    if len(buf) < 4 or buf[0] != 0xFF or buf[1] != 0xD8:
+        return None
+    i = 2
+    while i < len(buf) - 9:
+        if buf[i] != 0xFF:
+            i += 1
+            continue
+        marker = buf[i + 1]
+        if marker in (0xC0, 0xC2):
+            h = struct.unpack(">H", buf[i + 5: i + 7])[0]
+            w = struct.unpack(">H", buf[i + 7: i + 9])[0]
+            return {"width": w, "height": h}
+        if i + 3 < len(buf):
+            i += 2 + struct.unpack(">H", buf[i + 2: i + 4])[0]
+        else:
+            break
+    return None
+
+
+def _parse_gif_size(buf: bytes) -> Optional[dict[str, int]]:
+    if len(buf) < 10:
+        return None
+    sig = buf[:6].decode("ascii", errors="replace")
+    if sig not in ("GIF87a", "GIF89a"):
+        return None
+    w = struct.unpack("<H", buf[6:8])[0]
+    h = struct.unpack("<H", buf[8:10])[0]
+    return {"width": w, "height": h}
+
+
+def _parse_webp_size(buf: bytes) -> Optional[dict[str, int]]:
+    if len(buf) < 16:
+        return None
+    if buf[:4] != b"RIFF" or buf[8:12] != b"WEBP":
+        return None
+    chunk = buf[12:16].decode("ascii", errors="replace")
+    if chunk == "VP8 ":
+        if len(buf) >= 30 and buf[23] == 0x9D and buf[24] == 0x01 and buf[25] == 0x2A:
+            w = struct.unpack("<H", buf[26:28])[0] & 0x3FFF
+            h = struct.unpack("<H", buf[28:30])[0] & 0x3FFF
+            return {"width": w, "height": h}
+    elif chunk == "VP8L":
+        if len(buf) >= 25 and buf[20] == 0x2F:
+            bits = struct.unpack("<I", buf[21:25])[0]
+            w = (bits & 0x3FFF) + 1
+            h = ((bits >> 14) & 0x3FFF) + 1
+            return {"width": w, "height": h}
+    elif chunk == "VP8X":
+        if len(buf) >= 30:
+            w = (buf[24] | (buf[25] << 8) | (buf[26] << 16)) + 1
+            h = (buf[27] | (buf[28] << 8) | (buf[29] << 16)) + 1
+            return {"width": w, "height": h}
+    return None
+
+
+# ============ URL 下载 ============
+
+async def download_url(
+    url: str,
+    max_size_mb: int = DEFAULT_MAX_SIZE_MB,
+) -> tuple[bytes, str]:
+    """
+    下载 URL 内容，返回 (bytes, content_type)。
+
+    Args:
+        url:          HTTP(S) URL
+        max_size_mb:  最大允许大小（MB），超过则抛出异常
+
+    Returns:
+        (data_bytes, content_type_string)
+
+    Raises:
+        ValueError:  内容超过大小限制
+        httpx.HTTPError: 网络/HTTP 错误
+    """
+    max_bytes = max_size_mb * 1024 * 1024
+    async with httpx.AsyncClient(timeout=30.0, follow_redirects=True) as client:
+        # 先 HEAD 检查大小
+        try:
+            head = await client.head(url)
+            content_length = int(head.headers.get("content-length", 0) or 0)
+            if content_length > 0 and content_length > max_bytes:
+                raise ValueError(
+                    f"文件过大: {content_length / 1024 / 1024:.1f} MB > {max_size_mb} MB"
+                )
+        except httpx.HTTPStatusError:
+            pass  # 部分服务器不支持 HEAD，忽略
+
+        # GET 下载（流式读取，防止超限）
+        async with client.stream("GET", url) as resp:
+            resp.raise_for_status()
+
+            content_type = resp.headers.get("content-type", "").split(";")[0].strip()
+
+            chunks: list[bytes] = []
+            downloaded = 0
+            async for chunk in resp.aiter_bytes(65536):
+                downloaded += len(chunk)
+                if downloaded > max_bytes:
+                    raise ValueError(
+                        f"文件过大: 已超过 {max_size_mb} MB 限制"
+                    )
+                chunks.append(chunk)
+
+        data = b"".join(chunks)
+        return data, content_type
+
+
+# ============ COS 鉴权（HMAC-SHA1） ============
+
+def _cos_sign(
+    method: str,
+    path: str,
+    params: dict[str, str],
+    headers: dict[str, str],
+    secret_id: str,
+    secret_key: str,
+    start_time: Optional[int] = None,
+    expire_seconds: int = 3600,
+) -> str:
+    """
+    构建 COS 请求签名（q-sign-algorithm=sha1 方案）。
+    参考：https://cloud.tencent.com/document/product/436/7778
+
+    Args:
+        method:         HTTP 方法（小写，如 "put"）
+        path:           URL 路径（URL encode 后的小写）
+        params:         URL 查询参数 dict（用于签名）
+        headers:        参与签名的请求头 dict（key 需小写）
+        secret_id:      临时 SecretId（tmpSecretId）
+        secret_key:     临时 SecretKey（tmpSecretKey）
+        start_time:     签名起始 Unix 时间戳（默认 now）
+        expire_seconds: 签名有效期（秒，默认 3600）
+
+    Returns:
+        Authorization header 值（完整字符串）
+    """
+    now = int(time.time())
+    q_sign_time = f"{start_time or now};{(start_time or now) + expire_seconds}"
+
+    # Step 1: SignKey = HMAC-SHA1(SecretKey, q-sign-time)
+    sign_key = hmac.new(
+        secret_key.encode("utf-8"),
+        q_sign_time.encode("utf-8"),
+        hashlib.sha1,
+    ).hexdigest()
+
+    # Step 2: HttpString
+    # 参数和头部需按字典序排列，key 小写
+    sorted_params = sorted((k.lower(), urllib.parse.quote(str(v), safe="") ) for k, v in params.items())
+    sorted_headers = sorted((k.lower(), urllib.parse.quote(str(v), safe="") ) for k, v in headers.items())
+
+    url_param_list = ";".join(k for k, _ in sorted_params)
+    url_params = "&".join(f"{k}={v}" for k, v in sorted_params)
+    header_list = ";".join(k for k, _ in sorted_headers)
+    header_str = "&".join(f"{k}={v}" for k, v in sorted_headers)
+
+    http_string = "\n".join([
+        method.lower(),
+        path,
+        url_params,
+        header_str,
+        "",
+    ])
+
+    # Step 3: StringToSign = sha1 hash of HttpString
+    sha1_of_http = hashlib.sha1(http_string.encode("utf-8")).hexdigest()
+    string_to_sign = "\n".join([
+        "sha1",
+        q_sign_time,
+        sha1_of_http,
+        "",
+    ])
+
+    # Step 4: Signature = HMAC-SHA1(SignKey, StringToSign)
+    signature = hmac.new(
+        sign_key.encode("utf-8"),
+        string_to_sign.encode("utf-8"),
+        hashlib.sha1,
+    ).hexdigest()
+
+    return (
+        f"q-sign-algorithm=sha1"
+        f"&q-ak={secret_id}"
+        f"&q-sign-time={q_sign_time}"
+        f"&q-key-time={q_sign_time}"
+        f"&q-header-list={header_list}"
+        f"&q-url-param-list={url_param_list}"
+        f"&q-signature={signature}"
+    )
+
+
+# ============ 主要公开 API ============
+
+async def get_cos_credentials(
+    app_key: str,
+    api_domain: str,
+    token: str,
+    filename: str = "file",
+    file_id: Optional[str] = None,
+    bot_id: str = "",
+    route_env: str = "",
+) -> dict:
+    """
+    调用 genUploadInfo 接口获取 COS 临时密钥及上传配置。
+
+    Args:
+        app_key:        应用 Key（用于 X-ID 头）
+        api_domain:     API 域名（如 https://bot.yuanbao.tencent.com）
+        token:          当前有效的签票 token（X-Token 头）
+        filename:       待上传的文件名（含扩展名）
+        file_id:        客户端生成的唯一文件 ID（不传则自动生成）
+        bot_id:         Bot 账号 ID（用于 X-ID 头）
+
+    Returns:
+        COS 上传配置 dict，包含以下字段：
+            bucketName         (str)  — COS Bucket 名称
+            region             (str)  — COS 地域
+            location           (str)  — 上传 Key（对象路径）
+            encryptTmpSecretId (str)  — 临时 SecretId
+            encryptTmpSecretKey(str)  — 临时 SecretKey
+            encryptToken       (str)  — SessionToken
+            startTime          (int)  — 凭证起始时间戳（Unix）
+            expiredTime        (int)  — 凭证过期时间戳（Unix）
+            resourceUrl        (str)  — 上传后的公网访问 URL
+            resourceID         (str)  — 资源 ID（可选）
+
+    Raises:
+        RuntimeError: 接口返回非 0 code 或字段缺失
+    """
+    if file_id is None:
+        file_id = generate_file_id()
+
+    upload_url = f"{api_domain.rstrip('/')}{UPLOAD_INFO_PATH}"
+
+    headers = {
+        "Content-Type": "application/json",
+        "X-Token": token,
+        "X-ID": bot_id or app_key,
+        "X-Source": "web",
+    }
+    if route_env:
+        headers["X-Route-Env"] = route_env
+    body = {
+        "fileName": filename,
+        "fileId": file_id,
+        "docFrom": "localDoc",
+        "docOpenId": "",
+    }
+
+    async with httpx.AsyncClient(timeout=15.0) as client:
+        resp = await client.post(upload_url, json=body, headers=headers)
+        resp.raise_for_status()
+        result: dict[str, Any] = resp.json()
+
+    code = result.get("code")
+    if code != 0 and code is not None:
+        raise RuntimeError(
+            f"genUploadInfo 失败: code={code}, msg={result.get('msg', '')}"
+        )
+
+    data = result.get("data") or result
+    required_fields = ["bucketName", "location"]
+    missing = [f for f in required_fields if not data.get(f)]
+    if missing:
+        raise RuntimeError(
+            f"genUploadInfo 返回字段不完整: 缺少字段 {missing}"
+        )
+
+    return data
+
+
+async def upload_to_cos(
+    file_bytes: bytes,
+    filename: str,
+    content_type: str,
+    credentials: dict,
+    bucket: str,
+    region: str,
+) -> dict:
+    """
+    通过 httpx PUT 请求将文件上传到 COS。
+    使用临时凭证（tmpSecretId/tmpSecretKey/sessionToken）构建 HMAC-SHA1 签名。
+
+    Args:
+        file_bytes:   文件二进制内容
+        filename:     文件名（用于辅助计算 MIME、UUID）
+        content_type: MIME 类型（如 "image/jpeg"）
+        credentials:  get_cos_credentials() 返回的 dict，包含：
+                        encryptTmpSecretId  → tmpSecretId
+                        encryptTmpSecretKey → tmpSecretKey
+                        encryptToken        → sessionToken
+                        location            → COS key（对象路径）
+                        resourceUrl         → 上传后公网 URL
+                        startTime           → 凭证起始时间（Unix）
+                        expiredTime         → 凭证过期时间（Unix）
+        bucket:       COS Bucket 名称（如 chatbot-1234567890）
+        region:       COS 地域（如 ap-guangzhou）
+
+    Returns:
+        上传结果 dict，包含：
+            url       (str)           — COS 公网访问 URL
+            uuid      (str)           — 文件内容 MD5
+            size      (int)           — 文件大小（字节）
+            width     (int, optional) — 图片宽度（仅图片）
+            height    (int, optional) — 图片高度（仅图片）
+
+    Raises:
+        httpx.HTTPStatusError: COS 返回非 2xx 状态
+        RuntimeError:          credentials 字段缺失
+    """
+    secret_id: str = credentials.get("encryptTmpSecretId", "")
+    secret_key: str = credentials.get("encryptTmpSecretKey", "")
+    session_token: str = credentials.get("encryptToken", "")
+    cos_key: str = credentials.get("location", "")
+    resource_url: str = credentials.get("resourceUrl", "")
+    start_time: Optional[int] = credentials.get("startTime")
+    expired_time: Optional[int] = credentials.get("expiredTime")
+
+    if not secret_id or not secret_key or not cos_key:
+        raise RuntimeError(
+            f"COS credentials 不完整: secretId={bool(secret_id)}, "
+            f"secretKey={bool(secret_key)}, location={bool(cos_key)}"
+        )
+
+    # 构建 COS 上传 URL（优先使用全球加速域名）
+    if COS_USE_ACCELERATE:
+        cos_host = f"{bucket}.cos.accelerate.myqcloud.com"
+    else:
+        cos_host = f"{bucket}.cos.{region}.myqcloud.com"
+
+    # URL encode cos_key（保留 /）
+    encoded_key = urllib.parse.quote(cos_key, safe="/")
+    cos_url = f"https://{cos_host}/{encoded_key.lstrip('/')}"
+
+    # 确定 Content-Type
+    if not content_type or content_type == "application/octet-stream":
+        if is_image(filename):
+            content_type = guess_mime_type(filename)
+        else:
+            content_type = "application/octet-stream"
+
+    # 计算文件 MD5 + size
+    file_uuid = md5_hex(file_bytes)
+    file_size = len(file_bytes)
+
+    # 参与签名的请求头
+    sign_headers = {
+        "host": cos_host,
+        "content-type": content_type,
+        "x-cos-security-token": session_token,
+    }
+
+    # 计算签名有效期
+    now = int(time.time())
+    sign_start = start_time if start_time else now
+    sign_expire = (expired_time - now) if expired_time and expired_time > now else 3600
+
+    authorization = _cos_sign(
+        method="put",
+        path=f"/{encoded_key.lstrip('/')}",
+        params={},
+        headers=sign_headers,
+        secret_id=secret_id,
+        secret_key=secret_key,
+        start_time=sign_start,
+        expire_seconds=sign_expire,
+    )
+
+    put_headers = {
+        "Authorization": authorization,
+        "Content-Type": content_type,
+        "x-cos-security-token": session_token,
+    }
+
+    logger.info(
+        "COS PUT: bucket=%s region=%s key=%s size=%d mime=%s",
+        bucket, region, cos_key, file_size, content_type,
+    )
+
+    async with httpx.AsyncClient(timeout=120.0) as client:
+        resp = await client.put(
+            cos_url,
+            content=file_bytes,
+            headers=put_headers,
+        )
+        resp.raise_for_status()
+
+    # 解析图片尺寸（仅图片类型）
+    result: dict[str, Any] = {
+        "url": resource_url or cos_url,
+        "uuid": file_uuid,
+        "size": file_size,
+    }
+
+    if content_type.startswith("image/"):
+        size_info = parse_image_size(file_bytes)
+        if size_info:
+            result["width"] = size_info["width"]
+            result["height"] = size_info["height"]
+
+    logger.info(
+        "COS 上传成功: url=%s size=%d",
+        result["url"], file_size,
+    )
+    return result
+
+
+# ============ TIM 媒体消息构建 ============
+
+def build_image_msg_body(
+    url: str,
+    uuid: Optional[str] = None,
+    filename: Optional[str] = None,
+    size: int = 0,
+    width: int = 0,
+    height: int = 0,
+    mime_type: str = "",
+) -> list[dict]:
+    """
+    构建腾讯 IM TIMImageElem 消息体。
+    参考：https://cloud.tencent.com/document/product/269/2720
+
+    Args:
+        url:       图片公网访问 URL（COS resourceUrl）
+        uuid:      文件 UUID（MD5 或其他唯一标识）
+        filename:  文件名（uuid 为空时作为备用）
+        size:      文件大小（字节）
+        width:     图片宽度（像素）
+        height:    图片高度（像素）
+        mime_type: MIME 类型（用于确定 image_format）
+
+    Returns:
+        TIMImageElem 消息体列表（适合直接放入 msg_body）
+    """
+    _uuid = uuid or filename or _basename_from_url(url) or "image"
+    image_format = get_image_format(mime_type) if mime_type else 255
+
+    return [
+        {
+            "msg_type": "TIMImageElem",
+            "msg_content": {
+                "uuid": _uuid,
+                "image_format": image_format,
+                "image_info_array": [
+                    {
+                        "type": 1,       # 1 = 原图
+                        "size": size,
+                        "width": width,
+                        "height": height,
+                        "url": url,
+                    }
+                ],
+            },
+        }
+    ]
+
+
+def build_file_msg_body(
+    url: str,
+    filename: str,
+    uuid: Optional[str] = None,
+    size: int = 0,
+) -> list[dict]:
+    """
+    构建腾讯 IM TIMFileElem 消息体。
+    参考：https://cloud.tencent.com/document/product/269/2720
+
+    Args:
+        url:      文件公网访问 URL（COS resourceUrl）
+        filename: 文件名（含扩展名）
+        uuid:     文件 UUID（MD5 或其他唯一标识，不传则使用 filename）
+        size:     文件大小（字节）
+
+    Returns:
+        TIMFileElem 消息体列表（适合直接放入 msg_body）
+    """
+    _uuid = uuid or filename
+
+    return [
+        {
+            "msg_type": "TIMFileElem",
+            "msg_content": {
+                "uuid": _uuid,
+                "file_name": filename,
+                "file_size": size,
+                "url": url,
+            },
+        }
+    ]
+
+
+# ============ 内部工具 ============
+
+def _basename_from_url(url: str) -> str:
+    """从 URL 提取文件名。"""
+    try:
+        parsed = urllib.parse.urlparse(url)
+        return os.path.basename(parsed.path)
+    except Exception:
+        return ""
@@ -0,0 +1,558 @@
+"""
+Yuanbao sticker (TIMFaceElem) support.
+
+Ported from yuanbao-openclaw-plugin/src/sticker/.
+
+TIMFaceElem wire format:
+    {
+        "msg_type": "TIMFaceElem",
+        "msg_content": {
+            "index": 0,          # always 0 per Yuanbao convention
+            "data": "<json>",    # serialised sticker metadata
+        }
+    }
+
+The `data` field carries a JSON string with the sticker's metadata so the
+receiver can look up the correct asset in the emoji pack.
+"""
+
+from __future__ import annotations
+
+import json
+import random
+import re
+import unicodedata
+from typing import Optional
+
+# ---------------------------------------------------------------------------
+# Sticker catalogue – ported from builtin-stickers.json
+# Key   : canonical name (Chinese)
+# Value : {sticker_id, package_id, name, description, width, height, formats}
+# ---------------------------------------------------------------------------
+STICKER_MAP: dict[str, dict] = {
+    "六六六": {
+        "sticker_id": "278", "package_id": "1003", "name": "六六六",
+        "description": "666 厉害 牛 棒 绝了 好强 awesome",
+        "width": 128, "height": 128, "formats": "png",
+    },
+    "我想开了": {
+        "sticker_id": "262", "package_id": "1003", "name": "我想开了",
+        "description": "想开 佛系 释怀 顿悟 看淡了 无所谓",
+        "width": 128, "height": 128, "formats": "png",
+    },
+    "害羞": {
+        "sticker_id": "130", "package_id": "1003", "name": "害羞",
+        "description": "腼腆 不好意思 脸红 娇羞 羞涩 捂脸",
+        "width": 128, "height": 128, "formats": "png",
+    },
+    "比心": {
+        "sticker_id": "252", "package_id": "1003", "name": "比心",
+        "description": "笔芯 爱你 爱心手势 love heart 喜欢你",
+        "width": 128, "height": 128, "formats": "png",
+    },
+    "委屈": {
+        "sticker_id": "125", "package_id": "1003", "name": "委屈",
+        "description": "难过 想哭 可怜巴巴 瘪嘴 受伤 被欺负",
+        "width": 128, "height": 128, "formats": "png",
+    },
+    "亲亲": {
+        "sticker_id": "146", "package_id": "1003", "name": "亲亲",
+        "description": "么么 mua 亲一下 kiss 飞吻 啵",
+        "width": 128, "height": 128, "formats": "png",
+    },
+    "酷": {
+        "sticker_id": "131", "package_id": "1003", "name": "酷",
+        "description": "帅 墨镜 cool 高冷 有型 swagger",
+        "width": 128, "height": 128, "formats": "png",
+    },
+    "睡": {
+        "sticker_id": "145", "package_id": "1003", "name": "睡",
+        "description": "睡觉 困 zzZ 打盹 躺平 休眠 sleepy",
+        "width": 128, "height": 128, "formats": "png",
+    },
+    "发呆": {
+        "sticker_id": "152", "package_id": "1003", "name": "发呆",
+        "description": "懵 愣住 放空 呆滞 出神 脑子空白",
+        "width": 128, "height": 128, "formats": "png",
+    },
+    "可怜": {
+        "sticker_id": "157", "package_id": "1003", "name": "可怜",
+        "description": "卖萌 求饶 委屈巴巴 弱小 拜托 眼巴巴",
+        "width": 128, "height": 128, "formats": "png",
+    },
+    "摊手": {
+        "sticker_id": "200", "package_id": "1003", "name": "摊手",
+        "description": "无奈 没办法 耸肩 随便 那咋整 whatever",
+        "width": 128, "height": 128, "formats": "png",
+    },
+    "头大": {
+        "sticker_id": "213", "package_id": "1003", "name": "头大",
+        "description": "头疼 烦恼 郁闷 难搞 崩溃 一团乱",
+        "width": 128, "height": 128, "formats": "png",
+    },
+    "吓": {
+        "sticker_id": "256", "package_id": "1003", "name": "吓",
+        "description": "害怕 惊恐 震惊 吓一跳 恐怖 怂",
+        "width": 128, "height": 128, "formats": "png",
+    },
+    "吐血": {
+        "sticker_id": "203", "package_id": "1003", "name": "吐血",
+        "description": "无语 崩溃 被雷 内伤 一口老血 屮",
+        "width": 128, "height": 128, "formats": "png",
+    },
+    "哼": {
+        "sticker_id": "185", "package_id": "1003", "name": "哼",
+        "description": "傲娇 生气 不满 撇嘴 不理 赌气",
+        "width": 128, "height": 128, "formats": "png",
+    },
+    "嘿嘿": {
+        "sticker_id": "220", "package_id": "1003", "name": "嘿嘿",
+        "description": "坏笑 猥琐笑 偷笑 憨笑 得意 你懂的",
+        "width": 128, "height": 128, "formats": "png",
+    },
+    "头秃": {
+        "sticker_id": "218", "package_id": "1003", "name": "头秃",
+        "description": "程序员 加班 焦虑 没头发 秃了 肝爆",
+        "width": 128, "height": 128, "formats": "png",
+    },
+    "暗中观察": {
+        "sticker_id": "221", "package_id": "1003", "name": "暗中观察",
+        "description": "窥屏 潜水 偷偷看 角落 围观 屏住呼吸",
+        "width": 128, "height": 128, "formats": "png",
+    },
+    "我酸了": {
+        "sticker_id": "224", "package_id": "1003", "name": "我酸了",
+        "description": "嫉妒 柠檬精 羡慕 吃柠檬 眼红 恰柠檬",
+        "width": 128, "height": 128, "formats": "png",
+    },
+    "打call": {
+        "sticker_id": "246", "package_id": "1003", "name": "打call",
+        "description": "应援 加油 支持 喝彩 助威 call",
+        "width": 128, "height": 128, "formats": "png",
+    },
+    "庆祝": {
+        "sticker_id": "251", "package_id": "1003", "name": "庆祝",
+        "description": "祝贺 开心 耶 party 胜利 干杯",
+        "width": 128, "height": 128, "formats": "png",
+    },
+    "奋斗": {
+        "sticker_id": "151", "package_id": "1003", "name": "奋斗",
+        "description": "努力 加油 拼搏 冲 干劲 卷起来",
+        "width": 128, "height": 128, "formats": "png",
+    },
+    "惊讶": {
+        "sticker_id": "143", "package_id": "1003", "name": "惊讶",
+        "description": "震惊 哇 不敢相信 OMG 居然 这么离谱",
+        "width": 128, "height": 128, "formats": "png",
+    },
+    "疑问": {
+        "sticker_id": "144", "package_id": "1003", "name": "疑问",
+        "description": "问号 不懂 啥 为什么 啥情况 懵逼问",
+        "width": 128, "height": 128, "formats": "png",
+    },
+    "仔细分析": {
+        "sticker_id": "248", "package_id": "1003", "name": "仔细分析",
+        "description": "思考 推敲 认真 研究 琢磨 让我想想",
+        "width": 128, "height": 128, "formats": "png",
+    },
+    "撅嘴": {
+        "sticker_id": "184", "package_id": "1003", "name": "撅嘴",
+        "description": "嘟嘴 卖萌 不高兴 撒娇 嘴翘",
+        "width": 128, "height": 128, "formats": "png",
+    },
+    "泪奔": {
+        "sticker_id": "199", "package_id": "1003", "name": "泪奔",
+        "description": "大哭 伤心 破防 感动哭 泪流满面 呜呜",
+        "width": 128, "height": 128, "formats": "png",
+    },
+    "尊嘟假嘟": {
+        "sticker_id": "276", "package_id": "1003", "name": "尊嘟假嘟",
+        "description": "真的假的 真假 可爱问 你骗我 是不是",
+        "width": 128, "height": 128, "formats": "png",
+    },
+    "略略略": {
+        "sticker_id": "113", "package_id": "1003", "name": "略略略",
+        "description": "调皮 吐舌 不服 略 气死你 鬼脸",
+        "width": 128, "height": 128, "formats": "png",
+    },
+    "困": {
+        "sticker_id": "180", "package_id": "1003", "name": "困",
+        "description": "想睡 倦 打哈欠 睁不开眼 好困啊 sleepy",
+        "width": 128, "height": 128, "formats": "png",
+    },
+    "折磨": {
+        "sticker_id": "181", "package_id": "1003", "name": "折磨",
+        "description": "难受 痛苦 煎熬 蚌埠住了 受不了 要命",
+        "width": 128, "height": 128, "formats": "png",
+    },
+    "抠鼻": {
+        "sticker_id": "182", "package_id": "1003", "name": "抠鼻",
+        "description": "不屑 无聊 淡定 无所谓 鄙视 挖鼻",
+        "width": 128, "height": 128, "formats": "png",
+    },
+    "鼓掌": {
+        "sticker_id": "183", "package_id": "1003", "name": "鼓掌",
+        "description": "拍手 叫好 赞同 666 喝彩 掌声",
+        "width": 128, "height": 128, "formats": "png",
+    },
+    "斜眼笑": {
+        "sticker_id": "204", "package_id": "1003", "name": "斜眼笑",
+        "description": "滑稽 坏笑 doge 意味深长 阴阳怪气 嘿嘿嘿",
+        "width": 128, "height": 128, "formats": "png",
+    },
+    "辣眼睛": {
+        "sticker_id": "216", "package_id": "1003", "name": "辣眼睛",
+        "description": "看不下去 cringe 毁三观 太丑了 瞎了",
+        "width": 128, "height": 128, "formats": "png",
+    },
+    "哦哟": {
+        "sticker_id": "217", "package_id": "1003", "name": "哦哟",
+        "description": "惊讶 起哄 哇哦 有戏 不简单 哟",
+        "width": 128, "height": 128, "formats": "png",
+    },
+    "吃瓜": {
+        "sticker_id": "222", "package_id": "1003", "name": "吃瓜",
+        "description": "围观 看戏 八卦 路人 看热闹 板凳",
+        "width": 128, "height": 128, "formats": "png",
+    },
+    "狗头": {
+        "sticker_id": "225", "package_id": "1003", "name": "狗头",
+        "description": "doge 保命 开玩笑 滑稽 反讽 懂的都懂",
+        "width": 128, "height": 128, "formats": "png",
+    },
+    "敬礼": {
+        "sticker_id": "227", "package_id": "1003", "name": "敬礼",
+        "description": "salute 尊重 收到 遵命 致敬 报告",
+        "width": 128, "height": 128, "formats": "png",
+    },
+    "哦": {
+        "sticker_id": "231", "package_id": "1003", "name": "哦",
+        "description": "知道了 明白 敷衍 嗯 这样啊 收到",
+        "width": 128, "height": 128, "formats": "png",
+    },
+    "拿到红包": {
+        "sticker_id": "236", "package_id": "1003", "name": "拿到红包",
+        "description": "红包 谢谢老板 发财 开心 抢到了 欧气",
+        "width": 128, "height": 128, "formats": "png",
+    },
+    "牛吖": {
+        "sticker_id": "239", "package_id": "1003", "name": "牛吖",
+        "description": "牛 厉害 强 666 佩服 大佬",
+        "width": 128, "height": 128, "formats": "png",
+    },
+    "贴贴": {
+        "sticker_id": "272", "package_id": "1003", "name": "贴贴",
+        "description": "抱抱 亲昵 蹭蹭 亲密 靠靠 撒娇贴",
+        "width": 128, "height": 128, "formats": "png",
+    },
+    "爱心": {
+        "sticker_id": "138", "package_id": "1003", "name": "爱心",
+        "description": "心 love 喜欢你 红心 示爱 么么哒",
+        "width": 128, "height": 128, "formats": "png",
+    },
+    "晚安": {
+        "sticker_id": "170", "package_id": "1003", "name": "晚安",
+        "description": "好梦 睡了 night 早点休息 安啦 moon",
+        "width": 128, "height": 128, "formats": "png",
+    },
+    "太阳": {
+        "sticker_id": "176", "package_id": "1003", "name": "太阳",
+        "description": "晴天 早上好 阳光 morning 好天气 日",
+        "width": 128, "height": 128, "formats": "png",
+    },
+    "柠檬": {
+        "sticker_id": "266", "package_id": "1003", "name": "柠檬",
+        "description": "酸 嫉妒 柠檬精 羡慕 我酸 恰柠檬",
+        "width": 128, "height": 128, "formats": "png",
+    },
+    "大冤种": {
+        "sticker_id": "267", "package_id": "1003", "name": "大冤种",
+        "description": "倒霉 吃亏 自嘲 好心没好报 背锅 工具人",
+        "width": 128, "height": 128, "formats": "png",
+    },
+    "吐了": {
+        "sticker_id": "132", "package_id": "1003", "name": "吐了",
+        "description": "恶心 yue 受不了 嫌弃 想吐 生理不适",
+        "width": 128, "height": 128, "formats": "png",
+    },
+    "怒": {
+        "sticker_id": "134", "package_id": "1003", "name": "怒",
+        "description": "生气 愤怒 火大 暴躁 气炸 怼",
+        "width": 128, "height": 128, "formats": "png",
+    },
+    "玫瑰": {
+        "sticker_id": "165", "package_id": "1003", "name": "玫瑰",
+        "description": "花 示爱 表白 浪漫 送你花 情人节",
+        "width": 128, "height": 128, "formats": "png",
+    },
+    "凋谢": {
+        "sticker_id": "119", "package_id": "1003", "name": "凋谢",
+        "description": "花谢 失恋 难过 枯萎 心碎 凉了",
+        "width": 128, "height": 128, "formats": "png",
+    },
+    "点赞": {
+        "sticker_id": "159", "package_id": "1003", "name": "点赞",
+        "description": "赞 认同 好棒 good like 大拇指 顶",
+        "width": 128, "height": 128, "formats": "png",
+    },
+    "握手": {
+        "sticker_id": "164", "package_id": "1003", "name": "握手",
+        "description": "合作 你好 商务 hello deal 成交 友好",
+        "width": 128, "height": 128, "formats": "png",
+    },
+    "抱拳": {
+        "sticker_id": "163", "package_id": "1003", "name": "抱拳",
+        "description": "谢谢 失敬 江湖 承让 拜托 有礼",
+        "width": 128, "height": 128, "formats": "png",
+    },
+    "ok": {
+        "sticker_id": "169", "package_id": "1003", "name": "ok",
+        "description": "好的 收到 没问题 okay 行 可以 懂了",
+        "width": 128, "height": 128, "formats": "png",
+    },
+    "拳头": {
+        "sticker_id": "174", "package_id": "1003", "name": "拳头",
+        "description": "加油 干 冲 fight 力量 击拳 硬气",
+        "width": 128, "height": 128, "formats": "png",
+    },
+    "鞭炮": {
+        "sticker_id": "191", "package_id": "1003", "name": "鞭炮",
+        "description": "过年 喜庆 爆竹 春节 噼里啪啦 红",
+        "width": 128, "height": 128, "formats": "png",
+    },
+    "烟花": {
+        "sticker_id": "258", "package_id": "1003", "name": "烟花",
+        "description": "庆典 漂亮 新年 嘭 绽放 节日快乐",
+        "width": 128, "height": 128, "formats": "png",
+    },
+}
+
+
+def get_sticker_by_name(name: str) -> Optional[dict]:
+    """
+    按名称查找贴纸，支持模糊匹配。
+
+    匹配优先级：
+      1. 完全相等（name）
+      2. name 包含查询词（前缀/子串）
+      3. description 包含查询词（同义词搜索）
+      4. 通用模糊评分（与 sticker-search 同算法），命中即返回得分最高的一条
+
+    返回 sticker dict，找不到返回 None。
+    """
+    if not name:
+        return None
+
+    query = name.strip()
+
+    if query in STICKER_MAP:
+        return STICKER_MAP[query]
+
+    for key, sticker in STICKER_MAP.items():
+        if query in key or key in query:
+            return sticker
+
+    for sticker in STICKER_MAP.values():
+        desc = sticker.get("description", "")
+        if query in desc:
+            return sticker
+
+    matches = search_stickers(query, limit=1)
+    return matches[0] if matches else None
+
+
+def get_random_sticker(category: str = None) -> dict:
+    """
+    随机返回一个贴纸。
+
+    若指定 category，则在 description 中含有该关键词的贴纸里随机选取；
+    category 为 None 时从全表随机。
+    """
+    if category:
+        candidates = [
+            s for s in STICKER_MAP.values()
+            if category in s.get("description", "") or category in s.get("name", "")
+        ]
+        if candidates:
+            return random.choice(candidates)
+    return random.choice(list(STICKER_MAP.values()))
+
+
+def get_sticker_by_id(sticker_id: str) -> Optional[dict]:
+    """按 sticker_id 精确查找贴纸。"""
+    if not sticker_id:
+        return None
+    sid = str(sticker_id).strip()
+    for sticker in STICKER_MAP.values():
+        if sticker.get("sticker_id") == sid:
+            return sticker
+    return None
+
+
+# ---------------------------------------------------------------------------
+# 模糊搜索（对齐 chatbot-web yuanbao-openclaw-plugin/sticker-cache.ts.searchStickers）
+# ---------------------------------------------------------------------------
+
+_PUNCT_RE = re.compile(r"[\s\u3000\-_·.,，。!！?？\"“”'‘’、/\\]+")
+
+
+def _normalize_text(raw: str) -> str:
+    return unicodedata.normalize("NFKC", str(raw or "")).strip().lower()
+
+
+def _compact_text(raw: str) -> str:
+    return _PUNCT_RE.sub("", _normalize_text(raw))
+
+
+def _multiset_char_hit_ratio(needle: str, haystack: str) -> float:
+    if not needle:
+        return 0.0
+    bag: dict[str, int] = {}
+    for ch in haystack:
+        bag[ch] = bag.get(ch, 0) + 1
+    hits = 0
+    for ch in needle:
+        n = bag.get(ch, 0)
+        if n > 0:
+            hits += 1
+            bag[ch] = n - 1
+    return hits / len(needle)
+
+
+def _bigram_jaccard(a: str, b: str) -> float:
+    if len(a) < 2 or len(b) < 2:
+        return 0.0
+    A = {a[i:i + 2] for i in range(len(a) - 1)}
+    B = {b[i:i + 2] for i in range(len(b) - 1)}
+    inter = len(A & B)
+    union = len(A) + len(B) - inter
+    return inter / union if union else 0.0
+
+
+def _longest_subsequence_ratio(needle: str, haystack: str) -> float:
+    if not needle:
+        return 0.0
+    j = 0
+    for ch in haystack:
+        if j >= len(needle):
+            break
+        if ch == needle[j]:
+            j += 1
+    return j / len(needle)
+
+
+def _score_field(haystack: str, query: str) -> float:
+    hay = _normalize_text(haystack)
+    q = _normalize_text(query)
+    if not hay or not q:
+        return 0.0
+    hay_c = _compact_text(haystack)
+    q_c = _compact_text(query)
+    best = 0.0
+    if hay == q:
+        best = max(best, 100.0)
+    if q in hay:
+        best = max(best, 92 + min(6, len(q)))
+    if len(q) >= 2 and hay.startswith(q):
+        best = max(best, 88.0)
+    if q_c and q_c in hay_c:
+        best = max(best, 86.0)
+    best = max(best, _multiset_char_hit_ratio(q_c, hay_c) * 62)
+    best = max(best, _bigram_jaccard(q_c, hay_c) * 58)
+    best = max(best, _longest_subsequence_ratio(q_c, hay_c) * 52)
+    if len(q) == 1 and q in hay:
+        best = max(best, 68.0)
+    return best
+
+
+def search_stickers(query: str, limit: int = 10) -> list[dict]:
+    """
+    在内置贴纸表中按模糊匹配排序返回前 N 条结果。
+
+    评分综合 name/description 字段的子串、字符多重集覆盖、bigram Jaccard、子序列比例。
+    name 权重略高于 description（×0.88）。空 query 时按字典顺序返回前 N 条。
+    """
+    safe_limit = max(1, min(500, int(limit) if limit else 10))
+    if not query or not _normalize_text(query):
+        return list(STICKER_MAP.values())[:safe_limit]
+
+    scored: list[tuple[float, dict]] = []
+    for sticker in STICKER_MAP.values():
+        name_s = _score_field(sticker.get("name", ""), query)
+        desc_s = _score_field(sticker.get("description", ""), query) * 0.88
+        sid = str(sticker.get("sticker_id", "")).strip()
+        q_norm = _normalize_text(query)
+        id_s = 0.0
+        if sid and q_norm:
+            sid_norm = _normalize_text(sid)
+            if sid_norm == q_norm:
+                id_s = 100.0
+            elif q_norm in sid_norm:
+                id_s = 84.0
+        scored.append((max(name_s, desc_s, id_s), sticker))
+
+    scored.sort(key=lambda x: x[0], reverse=True)
+    top = scored[0][0] if scored else 0
+    if top <= 0:
+        return [s for _, s in scored[:safe_limit]]
+
+    if top >= 22:
+        floor = 18.0
+    elif top >= 12:
+        floor = max(10.0, top * 0.5)
+    else:
+        floor = max(6.0, top * 0.35)
+
+    filtered = [pair for pair in scored if pair[0] >= floor]
+    out = filtered if filtered else scored
+    return [s for _, s in out[:safe_limit]]
+
+
+def build_face_msg_body(
+    face_index: int,
+    face_type: int = 1,
+    data: Optional[str] = None,
+) -> list:
+    """
+    构造 TIMFaceElem 消息体。
+
+    Yuanbao 约定：
+      - index 固定传 0（服务端通过 data 字段识别具体表情）
+      - data 为 JSON 字符串，包含 sticker_id / package_id 等字段
+
+    Args:
+        face_index: 保留字段，暂时不影响 wire format（Yuanbao 固定 index=0）。
+                    当 face_index > 0 时视为旧版 QQ 表情 ID，直接放入 index。
+        face_type:  保留字段（兼容旧接口，当前未使用）。
+        data:       已序列化的 JSON 字符串；为 None 时仅传 index。
+
+    Returns:
+        符合 Yuanbao TIM 协议的 msg_body list，如::
+
+            [{"msg_type": "TIMFaceElem", "msg_content": {"index": 0, "data": "..."}}]
+    """
+    msg_content: dict = {"index": face_index}
+    if data is not None:
+        msg_content["data"] = data
+    return [{"msg_type": "TIMFaceElem", "msg_content": msg_content}]
+
+
+def build_sticker_msg_body(sticker: dict) -> list:
+    """
+    从 STICKER_MAP 中的 sticker dict 直接构造 TIMFaceElem 消息体。
+
+    这是 send_sticker() 的内部辅助，确保 data 字段与原始 JS 插件一致。
+    """
+    data_payload = json.dumps(
+        {
+            "sticker_id": sticker["sticker_id"],
+            "package_id": sticker["package_id"],
+            "width": sticker.get("width", 128),
+            "height": sticker.get("height", 128),
+            "formats": sticker.get("formats", "png"),
+            "name": sticker["name"],
+        },
+        ensure_ascii=False,
+        separators=(",", ":"),
+    )
+    return build_face_msg_body(face_index=0, data=data_payload)
@@ -0,0 +1,150 @@
+"""Gateway runtime-metadata footer.
+
+Renders a compact footer showing runtime state (model, context %, cwd) and
+appends it to the FINAL message of an agent turn when enabled.  Off by default
+to keep replies minimal.
+
+Config (``~/.hermes/config.yaml``)::
+
+    display:
+      runtime_footer:
+        enabled: true                       # off by default
+        fields: [model, context_pct, cwd]   # order shown; drop any to hide
+
+Per-platform overrides live under ``display.platforms.<platform>.runtime_footer``.
+Users can toggle the global setting with ``/footer on|off`` from both the CLI
+and any gateway platform.
+
+The footer is appended to the final response text in ``gateway/run.py`` right
+before returning the response to the adapter send path — so it only lands on
+the final message a user sees, not on tool-progress updates or streaming
+partials.  When streaming is on and the final text has already been delivered
+piecemeal, the footer is sent as a separate trailing message via
+``send_trailing_footer()``.
+"""
+
+from __future__ import annotations
+
+import os
+from pathlib import Path
+from typing import Any, Iterable, Optional
+
+_DEFAULT_FIELDS: tuple[str, ...] = ("model", "context_pct", "cwd")
+_SEP = " · "
+
+
+def _home_relative_cwd(cwd: str) -> str:
+    """Return *cwd* with ``$HOME`` collapsed to ``~``.  Empty string if unset."""
+    if not cwd:
+        return ""
+    try:
+        home = os.path.expanduser("~")
+        p = os.path.abspath(cwd)
+        if home and (p == home or p.startswith(home + os.sep)):
+            return "~" + p[len(home):]
+        return p
+    except Exception:
+        return cwd
+
+
+def _model_short(model: Optional[str]) -> str:
+    """Drop ``vendor/`` prefix for readability (``openai/gpt-5.4`` → ``gpt-5.4``)."""
+    if not model:
+        return ""
+    return model.rsplit("/", 1)[-1]
+
+
+def resolve_footer_config(
+    user_config: dict[str, Any] | None,
+    platform_key: str | None = None,
+) -> dict[str, Any]:
+    """Resolve effective runtime-footer config for *platform_key*.
+
+    Merge order (later wins):
+        1. Built-in defaults (enabled=False)
+        2. ``display.runtime_footer``
+        3. ``display.platforms.<platform_key>.runtime_footer``
+    """
+    resolved = {"enabled": False, "fields": list(_DEFAULT_FIELDS)}
+    cfg = (user_config or {}).get("display") or {}
+
+    global_cfg = cfg.get("runtime_footer")
+    if isinstance(global_cfg, dict):
+        if "enabled" in global_cfg:
+            resolved["enabled"] = bool(global_cfg.get("enabled"))
+        if isinstance(global_cfg.get("fields"), list) and global_cfg["fields"]:
+            resolved["fields"] = [str(f) for f in global_cfg["fields"]]
+
+    if platform_key:
+        platforms = cfg.get("platforms") or {}
+        plat_cfg = platforms.get(platform_key)
+        if isinstance(plat_cfg, dict):
+            plat_footer = plat_cfg.get("runtime_footer")
+            if isinstance(plat_footer, dict):
+                if "enabled" in plat_footer:
+                    resolved["enabled"] = bool(plat_footer.get("enabled"))
+                if isinstance(plat_footer.get("fields"), list) and plat_footer["fields"]:
+                    resolved["fields"] = [str(f) for f in plat_footer["fields"]]
+
+    return resolved
+
+
+def format_runtime_footer(
+    *,
+    model: Optional[str],
+    context_tokens: int,
+    context_length: Optional[int],
+    cwd: Optional[str] = None,
+    fields: Iterable[str] = _DEFAULT_FIELDS,
+) -> str:
+    """Render the footer line, or return "" if no fields have data.
+
+    Fields are skipped silently when their underlying data is missing — a
+    partially-populated footer is better than a line with ``?%`` or empty slots.
+    """
+    parts: list[str] = []
+    for field in fields:
+        if field == "model":
+            m = _model_short(model)
+            if m:
+                parts.append(m)
+        elif field == "context_pct":
+            if context_length and context_length > 0 and context_tokens >= 0:
+                pct = max(0, min(100, round((context_tokens / context_length) * 100)))
+                parts.append(f"{pct}%")
+        elif field == "cwd":
+            rel = _home_relative_cwd(cwd or os.environ.get("TERMINAL_CWD", ""))
+            if rel:
+                parts.append(rel)
+        # Unknown field names are silently ignored.
+
+    if not parts:
+        return ""
+    return _SEP.join(parts)
+
+
+def build_footer_line(
+    *,
+    user_config: dict[str, Any] | None,
+    platform_key: str | None,
+    model: Optional[str],
+    context_tokens: int,
+    context_length: Optional[int],
+    cwd: Optional[str] = None,
+) -> str:
+    """Top-level entry point used by gateway/run.py.
+
+    Returns the footer text (empty string when disabled or no data).  Callers
+    append this to the final response themselves, preserving a single blank
+    line of separation.
+    """
+    cfg = resolve_footer_config(user_config, platform_key)
+    if not cfg.get("enabled"):
+        return ""
+    return format_runtime_footer(
+        model=model,
+        context_tokens=context_tokens,
+        context_length=context_length,
+        cwd=cwd,
+        fields=cfg.get("fields") or _DEFAULT_FIELDS,
+    )
@@ -60,6 +60,10 @@ from .config import (
    SessionResetPolicy,  # noqa: F401 — re-exported via gateway/__init__.py
    HomeChannel,
 )
+from .whatsapp_identity import (
+    canonical_whatsapp_identifier,
+)
+from utils import atomic_replace


@dataclass
@@ -80,9 +84,12 @@ class SessionSource:
    user_name: Optional[str] = None
    thread_id: Optional[str] = None  # For forum topics, Discord threads, etc.
    chat_topic: Optional[str] = None  # Channel topic/description (Discord, Slack)
-    user_id_alt: Optional[str] = None  # Signal UUID (alternative to phone number)
+    user_id_alt: Optional[str] = None  # Platform-specific stable alt ID (Signal UUID, Feishu union_id)
    chat_id_alt: Optional[str] = None  # Signal group internal ID
    is_bot: bool = False  # True when the message author is a bot/webhook (Discord)
+    guild_id: Optional[str] = None  # Discord guild / Slack workspace / Matrix server scope
+    parent_chat_id: Optional[str] = None  # Parent channel when chat_id refers to a thread
+    message_id: Optional[str] = None  # ID of the triggering message (for pin/reply/react)
    
    @property
    def description(self) -> str:
@@ -120,8 +127,14 @@ class SessionSource:
            d["user_id_alt"] = self.user_id_alt
        if self.chat_id_alt:
            d["chat_id_alt"] = self.chat_id_alt
+        if self.guild_id:
+            d["guild_id"] = self.guild_id
+        if self.parent_chat_id:
+            d["parent_chat_id"] = self.parent_chat_id
+        if self.message_id:
+            d["message_id"] = self.message_id
        return d
-    
+
    @classmethod
    def from_dict(cls, data: Dict[str, Any]) -> "SessionSource":
        return cls(
@@ -135,6 +148,9 @@ class SessionSource:
            chat_topic=data.get("chat_topic"),
            user_id_alt=data.get("user_id_alt"),
            chat_id_alt=data.get("chat_id_alt"),
+            guild_id=data.get("guild_id"),
+            parent_chat_id=data.get("parent_chat_id"),
+            message_id=data.get("message_id"),
        )
    

@@ -186,6 +202,31 @@ that requires raw IDs).  Discord is excluded because mentions use ``<@user_id>``
 and the LLM needs the real ID to tag users."""


+def _discord_tools_loaded() -> bool:
+    """True iff the agent will actually have Discord tools this session.
+
+    Two conditions must hold:
+      1. The `discord` or `discord_admin` toolset is enabled for the
+         Discord platform via `hermes tools` (opt-in, default OFF).
+      2. `DISCORD_BOT_TOKEN` is set — the tool's `check_fn` gates on it
+         at registry time, so the toolset being enabled in config is not
+         enough if the token isn't configured.
+
+    Returns False (safe default — keeps the stale-API disclaimer) on any
+    error so a bad config can't silently promise tools the agent lacks.
+    """
+    if not (os.environ.get("DISCORD_BOT_TOKEN") or "").strip():
+        return False
+    try:
+        from hermes_cli.config import load_config
+        from hermes_cli.tools_config import _get_platform_tools
+        cfg = load_config()
+        enabled = _get_platform_tools(cfg, "discord", include_default_mcp_servers=False)
+        return "discord" in enabled or "discord_admin" in enabled
+    except Exception:
+        return False
+
+
 def build_session_context_prompt(
    context: SessionContext,
    *,
@@ -269,17 +310,57 @@ def build_session_context_prompt(
            "**Platform notes:** You are running inside Slack. "
            "You do NOT have access to Slack-specific APIs — you cannot search "
            "channel history, pin/unpin messages, manage channels, or list users. "
-            "Do not promise to perform these actions. If the user asks, explain "
-            "that you can only read messages sent directly to you and respond."
+            "Do not promise to perform these actions. The gateway may inline the "
+            "current message's Slack block/attachment payload when available, but "
+            "you still cannot call Slack APIs yourself."
        )
    elif context.source.platform == Platform.DISCORD:
+        # Inject the Discord IDs block only when the agent actually has
+        # Discord tools loaded this session — i.e. the user opted into
+        # `discord` / `discord_admin` via `hermes tools` AND the bot
+        # token is configured.  Otherwise keep the stale-API disclaimer
+        # honest so we never promise tools the agent lacks.
+        if _discord_tools_loaded():
+            src = context.source
+            id_lines = ["", "**Discord IDs (for the `discord` / `discord_admin` tools):**"]
+            if src.guild_id:
+                id_lines.append(f"  - Guild: `{src.guild_id}`")
+            if src.thread_id and src.parent_chat_id:
+                id_lines.append(f"  - Parent channel: `{src.parent_chat_id}`")
+                id_lines.append(f"  - Thread: `{src.thread_id}` (use as `channel_id` for fetch_messages etc.)")
+            else:
+                id_lines.append(f"  - Channel: `{src.chat_id}`")
+            if src.message_id:
+                id_lines.append(f"  - Triggering message: `{src.message_id}`")
+            lines.extend(id_lines)
+        else:
+            lines.append("")
+            lines.append(
+                "**Platform notes:** You are running inside Discord. "
+                "You do NOT have access to Discord-specific APIs — you cannot search "
+                "channel history, pin messages, manage roles, or list server members. "
+                "Do not promise to perform these actions. If the user asks, explain "
+                "that you can only read messages sent directly to you and respond."
+            )
+    elif context.source.platform == Platform.BLUEBUBBLES:
        lines.append("")
        lines.append(
-            "**Platform notes:** You are running inside Discord. "
-            "You do NOT have access to Discord-specific APIs — you cannot search "
-            "channel history, pin messages, manage roles, or list server members. "
-            "Do not promise to perform these actions. If the user asks, explain "
-            "that you can only read messages sent directly to you and respond."
+            "**Platform notes:** You are responding via iMessage. "
+            "Keep responses short and conversational — think texts, not essays. "
+            "Structure longer replies as separate short thoughts, each separated "
+            "by a blank line (double newline). Each block between blank lines "
+            "will be delivered as its own iMessage bubble, so write accordingly: "
+            "one idea per bubble, 1–3 sentences each. "
+            "If the user needs a detailed answer, give the short version first "
+            "and offer to elaborate."
+        )
+    elif context.source.platform == Platform.YUANBAO:
+        lines.append("")
+        lines.append(
+            "**Platform notes:** You are running inside Yuanbao. "
+            "You CAN send private (DM) messages via the send_message tool. "
+            "Use target='yuanbao:direct:<account_id>' for DM "
+            "and target='yuanbao:group:<group_code>' for group chat."
        )

    # Connected platforms
@@ -367,11 +448,11 @@ class SessionEntry:
    auto_reset_reason: Optional[str] = None  # "idle" or "daily"
    reset_had_activity: bool = False  # whether the expired session had any messages
    
-    # Set by the background expiry watcher after it successfully flushes
-    # memories for this session.  Persisted to sessions.json so the flag
-    # survives gateway restarts (the old in-memory _pre_flushed_sessions
-    # set was lost on restart, causing redundant re-flushes).
-    memory_flushed: bool = False
+    # Set by the background expiry watcher after it finalizes an expired
+    # session (invoking on_session_finalize hooks and evicting the cached
+    # agent).  Persisted to sessions.json so the flag survives gateway
+    # restarts — prevents redundant finalization runs.
+    expiry_finalized: bool = False

    # When True the next call to get_or_create_session() will auto-reset
    # this session (create a new session_id) so the user starts fresh.
@@ -407,7 +488,7 @@ class SessionEntry:
            "last_prompt_tokens": self.last_prompt_tokens,
            "estimated_cost_usd": self.estimated_cost_usd,
            "cost_status": self.cost_status,
-            "memory_flushed": self.memory_flushed,
+            "expiry_finalized": self.expiry_finalized,
            "suspended": self.suspended,
            "resume_pending": self.resume_pending,
            "resume_reason": self.resume_reason,
@@ -459,7 +540,7 @@ class SessionEntry:
            last_prompt_tokens=data.get("last_prompt_tokens", 0),
            estimated_cost_usd=data.get("estimated_cost_usd", 0.0),
            cost_status=data.get("cost_status", "unknown"),
-            memory_flushed=data.get("memory_flushed", False),
+            expiry_finalized=data.get("expiry_finalized", data.get("memory_flushed", False)),
            suspended=data.get("suspended", False),
            resume_pending=data.get("resume_pending", False),
            resume_reason=data.get("resume_reason"),
@@ -518,15 +599,24 @@ def build_session_key(
    """
    platform = source.platform.value
    if source.chat_type == "dm":
-        if source.chat_id:
+        dm_chat_id = source.chat_id
+        if source.platform == Platform.WHATSAPP:
+            dm_chat_id = canonical_whatsapp_identifier(source.chat_id)
+
+        if dm_chat_id:
            if source.thread_id:
-                return f"agent:main:{platform}:dm:{source.chat_id}:{source.thread_id}"
-            return f"agent:main:{platform}:dm:{source.chat_id}"
+                return f"agent:main:{platform}:dm:{dm_chat_id}:{source.thread_id}"
+            return f"agent:main:{platform}:dm:{dm_chat_id}"
        if source.thread_id:
            return f"agent:main:{platform}:dm:{source.thread_id}"
        return f"agent:main:{platform}:dm"

    participant_id = source.user_id_alt or source.user_id
+    if participant_id and source.platform == Platform.WHATSAPP:
+        # Same JID/LID-flip bug as the DM case: without canonicalisation, a
+        # single group member gets two isolated per-user sessions when the
+        # bridge reshuffles alias forms.
+        participant_id = canonical_whatsapp_identifier(str(participant_id)) or participant_id
    key_parts = ["agent:main", platform, source.chat_type]

    if source.chat_id:
@@ -615,7 +705,7 @@ class SessionStore:
                json.dump(data, f, indent=2)
                f.flush()
                os.fsync(f.fileno())
-            os.replace(tmp_path, sessions_file)
+            atomic_replace(tmp_path, sessions_file)
        except BaseException:
            try:
                os.unlink(tmp_path)
@@ -1151,6 +1241,7 @@ class SessionStore:
                    reasoning_content=message.get("reasoning_content") if message.get("role") == "assistant" else None,
                    reasoning_details=message.get("reasoning_details") if message.get("role") == "assistant" else None,
                    codex_reasoning_items=message.get("codex_reasoning_items") if message.get("role") == "assistant" else None,
+                    codex_message_items=message.get("codex_message_items") if message.get("role") == "assistant" else None,
                )
            except Exception as e:
                logger.debug("Session DB operation failed: %s", e)
@@ -1166,24 +1257,11 @@ class SessionStore:
        Used by /retry, /undo, and /compress to persist modified conversation history.
        Rewrites both SQLite and legacy JSONL storage.
        """
-        # SQLite: clear old messages and re-insert
+        # SQLite: replace atomically so a mid-rewrite failure doesn't leave
+        # the session half-empty in the DB while JSONL still has history.
        if self._db:
            try:
-                self._db.clear_messages(session_id)
-                for msg in messages:
-                    role = msg.get("role", "unknown")
-                    self._db.append_message(
-                        session_id=session_id,
-                        role=role,
-                        content=msg.get("content"),
-                        tool_name=msg.get("tool_name"),
-                        tool_calls=msg.get("tool_calls"),
-                        tool_call_id=msg.get("tool_call_id"),
-                        reasoning=msg.get("reasoning") if role == "assistant" else None,
-                        reasoning_content=msg.get("reasoning_content") if role == "assistant" else None,
-                        reasoning_details=msg.get("reasoning_details") if role == "assistant" else None,
-                        codex_reasoning_items=msg.get("codex_reasoning_items") if role == "assistant" else None,
-                    )
+                self._db.replace_messages(session_id, messages)
            except Exception as e:
                logger.debug("Failed to rewrite transcript in DB: %s", e)
        
@@ -22,11 +22,18 @@ from pathlib import Path
 from hermes_constants import get_hermes_home
 from typing import Any, Optional

+if sys.platform == "win32":
+    import msvcrt
+else:
+    import fcntl
+
 _GATEWAY_KIND = "hermes-gateway"
 _RUNTIME_STATUS_FILE = "gateway_state.json"
 _LOCKS_DIRNAME = "gateway-locks"
 _IS_WINDOWS = sys.platform == "win32"
 _UNSET = object()
+_GATEWAY_LOCK_FILENAME = "gateway.lock"
+_gateway_lock_handle = None


 def _get_pid_path() -> Path:
@@ -35,6 +42,14 @@ def _get_pid_path() -> Path:
    return home / "gateway.pid"


+def _get_gateway_lock_path(pid_path: Optional[Path] = None) -> Path:
+    """Return the path to the runtime gateway lock file."""
+    if pid_path is not None:
+        return pid_path.with_name(_GATEWAY_LOCK_FILENAME)
+    home = get_hermes_home()
+    return home / _GATEWAY_LOCK_FILENAME
+
+
 def _get_runtime_status_path() -> Path:
    """Return the persisted runtime health/status file path."""
    return _get_pid_path().with_name(_RUNTIME_STATUS_FILE)
@@ -98,6 +113,11 @@ def _get_process_start_time(pid: int) -> Optional[int]:
        return None


+def get_process_start_time(pid: int) -> Optional[int]:
+    """Public wrapper for retrieving a process start time when available."""
+    return _get_process_start_time(pid)
+
+
 def _read_process_cmdline(pid: int) -> Optional[str]:
    """Return the process command line as a space-separated string."""
    cmdline_path = Path(f"/proc/{pid}/cmdline")
@@ -121,6 +141,7 @@ def _looks_like_gateway_process(pid: int) -> bool:
        "hermes_cli.main gateway",
        "hermes_cli/main.py gateway",
        "hermes gateway",
+        "hermes-gateway",
        "gateway/run.py",
    )
    return any(pattern in cmdline for pattern in patterns)
@@ -212,16 +233,135 @@ def _read_pid_record(pid_path: Optional[Path] = None) -> Optional[dict]:
    return None


+def _read_gateway_lock_record(lock_path: Optional[Path] = None) -> Optional[dict[str, Any]]:
+    return _read_pid_record(lock_path or _get_gateway_lock_path())
+
+
+def _pid_from_record(record: Optional[dict[str, Any]]) -> Optional[int]:
+    if not record:
+        return None
+    try:
+        return int(record["pid"])
+    except (KeyError, TypeError, ValueError):
+        return None
+
+
 def _cleanup_invalid_pid_path(pid_path: Path, *, cleanup_stale: bool) -> None:
+    """Delete a stale gateway PID file (and its sibling lock metadata).
+
+    Called from ``get_running_pid()`` after the runtime lock has already been
+    confirmed inactive, so the on-disk metadata is known to belong to a dead
+    process.  Unlike ``remove_pid_file()`` (which defensively refuses to delete
+    a PID file whose ``pid`` field differs from ``os.getpid()`` to protect
+    ``--replace`` handoffs), this path force-unlinks both files so the next
+    startup sees a clean slate.
+    """
    if not cleanup_stale:
        return
    try:
-        if pid_path == _get_pid_path():
-            remove_pid_file()
-        else:
-            pid_path.unlink(missing_ok=True)
+        pid_path.unlink(missing_ok=True)
    except Exception:
        pass
+    try:
+        _get_gateway_lock_path(pid_path).unlink(missing_ok=True)
+    except Exception:
+        pass
+
+
+def _write_gateway_lock_record(handle) -> None:
+    handle.seek(0)
+    handle.truncate()
+    json.dump(_build_pid_record(), handle)
+    handle.flush()
+    try:
+        os.fsync(handle.fileno())
+    except OSError:
+        pass
+
+
+def _try_acquire_file_lock(handle) -> bool:
+    try:
+        if _IS_WINDOWS:
+            handle.seek(0, os.SEEK_END)
+            if handle.tell() == 0:
+                handle.write("\n")
+                handle.flush()
+            handle.seek(0)
+            msvcrt.locking(handle.fileno(), msvcrt.LK_NBLCK, 1)
+        else:
+            fcntl.flock(handle.fileno(), fcntl.LOCK_EX | fcntl.LOCK_NB)
+        return True
+    except (BlockingIOError, OSError):
+        return False
+
+
+def _release_file_lock(handle) -> None:
+    try:
+        if _IS_WINDOWS:
+            handle.seek(0)
+            msvcrt.locking(handle.fileno(), msvcrt.LK_UNLCK, 1)
+        else:
+            fcntl.flock(handle.fileno(), fcntl.LOCK_UN)
+    except OSError:
+        pass
+
+
+def acquire_gateway_runtime_lock() -> bool:
+    """Claim the cross-process runtime lock for the gateway.
+
+    Unlike the PID file, the lock is owned by the live process itself. If the
+    process dies abruptly, the OS releases the lock automatically.
+    """
+    global _gateway_lock_handle
+    if _gateway_lock_handle is not None:
+        return True
+
+    path = _get_gateway_lock_path()
+    path.parent.mkdir(parents=True, exist_ok=True)
+    handle = open(path, "a+", encoding="utf-8")
+    if not _try_acquire_file_lock(handle):
+        handle.close()
+        return False
+    _write_gateway_lock_record(handle)
+    _gateway_lock_handle = handle
+    return True
+
+
+def release_gateway_runtime_lock() -> None:
+    """Release the gateway runtime lock when owned by this process."""
+    global _gateway_lock_handle
+    handle = _gateway_lock_handle
+    if handle is None:
+        return
+    _gateway_lock_handle = None
+    _release_file_lock(handle)
+    try:
+        handle.close()
+    except OSError:
+        pass
+
+
+def is_gateway_runtime_lock_active(lock_path: Optional[Path] = None) -> bool:
+    """Return True when some process currently owns the gateway runtime lock."""
+    global _gateway_lock_handle
+    resolved_lock_path = lock_path or _get_gateway_lock_path()
+    if _gateway_lock_handle is not None and resolved_lock_path == _get_gateway_lock_path():
+        return True
+
+    if not resolved_lock_path.exists():
+        return False
+
+    handle = open(resolved_lock_path, "a+", encoding="utf-8")
+    try:
+        if _try_acquire_file_lock(handle):
+            _release_file_lock(handle)
+            return False
+        return True
+    finally:
+        try:
+            handle.close()
+        except OSError:
+            pass


 def write_pid_file() -> None:
@@ -361,7 +501,8 @@ def acquire_scoped_lock(scope: str, identity: str, metadata: Optional[dict[str,
        if not stale:
            try:
                os.kill(existing_pid, 0)
-            except (ProcessLookupError, PermissionError):
+            except (ProcessLookupError, PermissionError, OSError):
+                # Windows raises OSError with WinError 87 for invalid pid check
                stale = True
            else:
                current_start = _get_process_start_time(existing_pid)
@@ -426,17 +567,43 @@ def release_scoped_lock(scope: str, identity: str) -> None:
        pass


-def release_all_scoped_locks() -> int:
-    """Remove all scoped lock files in the lock directory.
+def release_all_scoped_locks(
+    *,
+    owner_pid: Optional[int] = None,
+    owner_start_time: Optional[int] = None,
+) -> int:
+    """Remove scoped lock files in the lock directory.

    Called during --replace to clean up stale locks left by stopped/killed
-    gateway processes that did not release their locks gracefully.
+    gateway processes that did not release their locks gracefully. When an
+    ``owner_pid`` is provided, only lock records belonging to that gateway
+    process are removed. ``owner_start_time`` further narrows the match to
+    protect against PID reuse.
+
+    When no owner is provided, preserves the legacy behavior and removes every
+    scoped lock file in the directory.
+
    Returns the number of lock files removed.
    """
    lock_dir = _get_lock_dir()
    removed = 0
    if lock_dir.exists():
        for lock_file in lock_dir.glob("*.lock"):
+            if owner_pid is not None:
+                record = _read_json_file(lock_file)
+                if not isinstance(record, dict):
+                    continue
+                try:
+                    record_pid = int(record.get("pid"))
+                except (TypeError, ValueError):
+                    continue
+                if record_pid != owner_pid:
+                    continue
+                if (
+                    owner_start_time is not None
+                    and record.get("start_time") != owner_start_time
+                ):
+                    continue
            try:
                lock_file.unlink(missing_ok=True)
                removed += 1
@@ -583,35 +750,46 @@ def get_running_pid(
    Cleans up stale PID files automatically.
    """
    resolved_pid_path = pid_path or _get_pid_path()
-    record = _read_pid_record(resolved_pid_path)
-    if not record:
+    resolved_lock_path = _get_gateway_lock_path(resolved_pid_path)
+    lock_active = is_gateway_runtime_lock_active(resolved_lock_path)
+    if not lock_active:
        _cleanup_invalid_pid_path(resolved_pid_path, cleanup_stale=cleanup_stale)
        return None

-    try:
-        pid = int(record["pid"])
-    except (KeyError, TypeError, ValueError):
-        _cleanup_invalid_pid_path(resolved_pid_path, cleanup_stale=cleanup_stale)
-        return None
+    primary_record = _read_pid_record(resolved_pid_path)
+    fallback_record = _read_gateway_lock_record(resolved_lock_path)

-    try:
-        os.kill(pid, 0)  # signal 0 = existence check, no actual signal sent
-    except (ProcessLookupError, PermissionError):
-        _cleanup_invalid_pid_path(resolved_pid_path, cleanup_stale=cleanup_stale)
-        return None
+    for record in (primary_record, fallback_record):
+        pid = _pid_from_record(record)
+        if pid is None:
+            continue

-    recorded_start = record.get("start_time")
-    current_start = _get_process_start_time(pid)
-    if recorded_start is not None and current_start is not None and current_start != recorded_start:
-        _cleanup_invalid_pid_path(resolved_pid_path, cleanup_stale=cleanup_stale)
-        return None
+        try:
+            os.kill(pid, 0)  # signal 0 = existence check, no actual signal sent
+        except ProcessLookupError:
+            continue
+        except PermissionError:
+            # The process exists but belongs to another user/service scope.
+            # With the runtime lock still held, prefer keeping it visible
+            # rather than deleting the PID file as "stale".
+            if _record_looks_like_gateway(record):
+                return pid
+            continue
+        except OSError:
+            # Windows raises OSError with WinError 87 for an invalid pid
+            # (process is definitely gone). Treat as "process doesn't exist".
+            continue

-    if not _looks_like_gateway_process(pid):
-        if not _record_looks_like_gateway(record):
-            _cleanup_invalid_pid_path(resolved_pid_path, cleanup_stale=cleanup_stale)
-            return None
+        recorded_start = record.get("start_time")
+        current_start = _get_process_start_time(pid)
+        if recorded_start is not None and current_start is not None and current_start != recorded_start:
+            continue

-    return pid
+        if _looks_like_gateway_process(pid) or _record_looks_like_gateway(record):
+            return pid
+
+    _cleanup_invalid_pid_path(resolved_pid_path, cleanup_stale=cleanup_stale)
+    return None


 def is_gateway_running(
@@ -44,6 +44,14 @@ class StreamConsumerConfig:
    buffer_threshold: int = 40
    cursor: str = " ▉"
    buffer_only: bool = False
+    # When >0, the final edit for a streamed response is delivered as a
+    # fresh message if the original preview has been visible for at least
+    # this many seconds.  This makes the platform's visible timestamp
+    # reflect completion time instead of first-token time for long-running
+    # responses (e.g. reasoning models that stream slowly).  Ported from
+    # openclaw/openclaw#72038.  Default 0 = always edit in place (legacy
+    # behavior).  The gateway enables this selectively per-platform.
+    fresh_final_after_seconds: float = 0.0


 class GatewayStreamConsumer:
@@ -83,14 +91,29 @@ class GatewayStreamConsumer:
        chat_id: str,
        config: Optional[StreamConsumerConfig] = None,
        metadata: Optional[dict] = None,
+        on_new_message: Optional[callable] = None,
    ):
        self.adapter = adapter
        self.chat_id = chat_id
        self.cfg = config or StreamConsumerConfig()
        self.metadata = metadata
+        # Fired whenever a fresh content bubble is created on the platform
+        # (first-send of a new message, commentary, overflow chunk, or
+        # fallback continuation). The gateway uses this to linearize the
+        # tool-progress bubble: when content resumes after a tool batch,
+        # the next tool.started should open a NEW progress bubble below
+        # the content, not edit the old bubble above it.
+        # Called with no arguments. Exceptions are swallowed.
+        self._on_new_message = on_new_message
        self._queue: queue.Queue = queue.Queue()
        self._accumulated = ""
        self._message_id: Optional[str] = None
+        # Wall-clock timestamp (time.monotonic) when ``_message_id`` was
+        # first assigned from a successful first-send.  Used by the
+        # fresh-final logic to detect long-lived previews whose edit
+        # timestamps would be stale by completion time.  Ported from
+        # openclaw/openclaw#72038.
+        self._message_created_ts: Optional[float] = None
        self._already_sent = False
        self._edit_supported = True  # Disabled when progressive edits are no longer usable
        self._last_edit_time = 0.0
@@ -132,10 +155,21 @@ class GatewayStreamConsumer:
        if text:
            self._queue.put((_COMMENTARY, text))

+    def _notify_new_message(self) -> None:
+        """Fire the on_new_message callback, swallowing any errors."""
+        cb = self._on_new_message
+        if cb is None:
+            return
+        try:
+            cb()
+        except Exception:
+            logger.debug("on_new_message callback error", exc_info=True)
+
    def _reset_segment_state(self, *, preserve_no_edit: bool = False) -> None:
        if preserve_no_edit and self._message_id == "__no_edit__":
            return
        self._message_id = None
+        self._message_created_ts = None
        self._accumulated = ""
        self._last_sent_text = ""
        self._fallback_final_send = False
@@ -514,6 +548,9 @@ class GatewayStreamConsumer:
                self._message_id = str(result.message_id)
                self._already_sent = True
                self._last_sent_text = text
+                # Fresh content bubble — close off any stale tool bubble
+                # above so the next tool starts a new bubble below.
+                self._notify_new_message()
                return str(result.message_id)
            else:
                self._edit_supported = False
@@ -646,6 +683,9 @@ class GatewayStreamConsumer:
            sent_any_chunk = True
            last_successful_chunk = chunk
            last_message_id = result.message_id or last_message_id
+            # Each fallback chunk is a fresh platform message — notify
+            # so any stale tool-progress bubble gets closed off.
+            self._notify_new_message()

        self._message_id = last_message_id
        self._already_sent = True
@@ -729,11 +769,91 @@ class GatewayStreamConsumer:
            # tool..."), not the final response. Setting already_sent would cause
            # the final response to be incorrectly suppressed when there are
            # multiple tool calls. See: https://github.com/NousResearch/hermes-agent/issues/10454
+            if result.success:
+                # Commentary counts as fresh content — close off any
+                # stale tool bubble above it so the next tool starts a
+                # new bubble below.
+                self._notify_new_message()
            return result.success
        except Exception as e:
            logger.error("Commentary send error: %s", e)
            return False

+    def _should_send_fresh_final(self) -> bool:
+        """Return True when a long-lived preview should be replaced with a
+        fresh final message instead of an edit.
+
+        Conditions:
+        - Fresh-final is enabled (``fresh_final_after_seconds > 0``).
+        - We have a real preview message id (not the ``__no_edit__`` sentinel
+          and not ``None``).
+        - The preview has been visible for at least the configured threshold.
+
+        Ported from openclaw/openclaw#72038.
+        """
+        threshold = getattr(self.cfg, "fresh_final_after_seconds", 0.0) or 0.0
+        if threshold <= 0:
+            return False
+        if not self._message_id or self._message_id == "__no_edit__":
+            return False
+        if self._message_created_ts is None:
+            return False
+        age = time.monotonic() - self._message_created_ts
+        return age >= threshold
+
+    async def _try_fresh_final(self, text: str) -> bool:
+        """Send ``text`` as a brand-new message (best-effort delete the old
+        preview) so the platform's visible timestamp reflects completion
+        time.  Returns True on successful delivery, False on any failure so
+        the caller falls back to the normal edit path.
+
+        Ported from openclaw/openclaw#72038.
+        """
+        old_message_id = self._message_id
+        try:
+            result = await self.adapter.send(
+                chat_id=self.chat_id,
+                content=text,
+                metadata=self.metadata,
+            )
+        except Exception as e:
+            logger.debug("Fresh-final send failed, falling back to edit: %s", e)
+            return False
+        if not getattr(result, "success", False):
+            return False
+        # Successful fresh send — try to delete the stale preview so the
+        # user doesn't see the old edit-stuck message underneath.  Cleanup
+        # is best-effort; platforms that don't implement ``delete_message``
+        # just leave the preview behind (still an acceptable outcome —
+        # the visible final timestamp is the important part).
+        if old_message_id and old_message_id != "__no_edit__":
+            delete_fn = getattr(self.adapter, "delete_message", None)
+            if delete_fn is not None:
+                try:
+                    await delete_fn(self.chat_id, old_message_id)
+                except Exception as e:
+                    logger.debug(
+                        "Fresh-final preview cleanup failed (%s): %s",
+                        old_message_id, e,
+                    )
+        # Adopt the new message id as the current message so subsequent
+        # callers (e.g. overflow split loops, finalize retries) see a
+        # consistent state.
+        new_message_id = getattr(result, "message_id", None)
+        if new_message_id:
+            self._message_id = new_message_id
+            self._message_created_ts = time.monotonic()
+        else:
+            # Send succeeded but platform didn't return an id — treat the
+            # delivery as final-only and fall back to "__no_edit__" so we
+            # don't try to edit something we can't address.
+            self._message_id = "__no_edit__"
+            self._message_created_ts = None
+        self._already_sent = True
+        self._last_sent_text = text
+        self._final_response_sent = True
+        return True
+
    async def _send_or_edit(self, text: str, *, finalize: bool = False) -> bool:
        """Send or edit the streaming message.

@@ -786,6 +906,22 @@ class GatewayStreamConsumer:
                        finalize and self._adapter_requires_finalize
                    ):
                        return True
+                    # Fresh-final for long-lived previews: when finalizing
+                    # the last edit in a streaming sequence, if the
+                    # original preview has been visible for at least
+                    # ``fresh_final_after_seconds``, send the completed
+                    # reply as a fresh message so the platform's visible
+                    # timestamp reflects completion time instead of the
+                    # preview creation time.  Best-effort cleanup of the
+                    # old preview follows.  Ported from
+                    # openclaw/openclaw#72038.  Gated by config so the
+                    # legacy edit-in-place path stays the default.
+                    if (
+                        finalize
+                        and self._should_send_fresh_final()
+                        and await self._try_fresh_final(text)
+                    ):
+                        return True
                    # Edit existing message
                    result = await self.adapter.edit_message(
                        chat_id=self.chat_id,
@@ -852,6 +988,10 @@ class GatewayStreamConsumer:
                if result.success:
                    if result.message_id:
                        self._message_id = result.message_id
+                        # Track when the preview first became visible to
+                        # the user so fresh-final logic can detect stale
+                        # preview timestamps on long-running responses.
+                        self._message_created_ts = time.monotonic()
                    else:
                        self._edit_supported = False
                    self._already_sent = True
@@ -863,6 +1003,11 @@ class GatewayStreamConsumer:
                        # every delta/tool boundary when platforms accept a
                        # message but do not return an editable message id.
                        self._message_id = "__no_edit__"
+                    # Notify the gateway that a fresh content bubble was
+                    # created so any accumulated tool-progress bubble above
+                    # gets closed off — the next tool fires into a new
+                    # bubble below, preserving chronological order.
+                    self._notify_new_message()
                    return True
                else:
                    # Initial send failed — disable streaming for this session
@@ -0,0 +1,155 @@
+"""Shared helpers for canonicalising WhatsApp sender identity.
+
+WhatsApp's bridge can surface the same human under two different JID shapes
+within a single conversation:
+
+- LID form: ``999999999999999@lid``
+- Phone form: ``15551234567@s.whatsapp.net``
+
+Both the authorisation path (:mod:`gateway.run`) and the session-key path
+(:mod:`gateway.session`) need to collapse these aliases to a single stable
+identity. This module is the single source of truth for that resolution so
+the two paths can never drift apart.
+
+Public helpers:
+
+- :func:`normalize_whatsapp_identifier` — strip JID/LID/device/plus syntax
+  down to the bare numeric identifier.
+- :func:`canonical_whatsapp_identifier` — walk the bridge's
+  ``lid-mapping-*.json`` files and return a stable canonical identity
+  across phone/LID variants.
+- :func:`expand_whatsapp_aliases` — return the full alias set for an
+  identifier. Used by authorisation code that needs to match any known
+  form of a sender against an allow-list.
+
+Plugins that need per-sender behaviour on WhatsApp (role-based routing,
+per-contact authorisation, policy gating in a gateway hook) should use
+``canonical_whatsapp_identifier`` so their bookkeeping lines up with
+Hermes' own session keys.
+"""
+
+from __future__ import annotations
+
+import json
+import logging
+import re
+from typing import Set
+
+logger = logging.getLogger(__name__)
+
+# WhatsApp JIDs are numeric (or plus-prefixed numeric) with optional
+# ``@``, ``.`` and ``:`` separators. ``\w`` is pinned to ASCII so
+# full-width digits / Unicode word chars can't sneak through.
+_SAFE_IDENTIFIER_RE = re.compile(r"^[A-Za-z0-9@.+\-]+$")
+
+from hermes_constants import get_hermes_home
+
+
+def normalize_whatsapp_identifier(value: str) -> str:
+    """Strip WhatsApp JID/LID syntax down to its stable numeric identifier.
+
+    Accepts any of the identifier shapes the WhatsApp bridge may emit:
+    ``"60123456789@s.whatsapp.net"``, ``"60123456789:47@s.whatsapp.net"``,
+    ``"60123456789@lid"``, or a bare ``"+601****6789"`` / ``"60123456789"``.
+    Returns just the numeric identifier (``"60123456789"``) suitable for
+    equality comparisons.
+
+    Useful for plugins that want to match sender IDs against
+    user-supplied config (phone numbers in ``config.yaml``) without
+    worrying about which variant the bridge happens to deliver.
+    """
+    return (
+        str(value or "")
+        .strip()
+        .replace("+", "", 1)
+        .split(":", 1)[0]
+        .split("@", 1)[0]
+    )
+
+
+def expand_whatsapp_aliases(identifier: str) -> Set[str]:
+    """Resolve WhatsApp phone/LID aliases via bridge session mapping files.
+
+    Returns the set of all identifiers transitively reachable through the
+    bridge's ``$HERMES_HOME/whatsapp/session/lid-mapping-*.json`` files,
+    starting from ``identifier``. The result always includes the
+    normalized input itself, so callers can safely ``in`` check against
+    the return value without a separate fallback branch.
+
+    Returns an empty set if ``identifier`` normalizes to empty.
+    """
+    normalized = normalize_whatsapp_identifier(identifier)
+    if not normalized:
+        return set()
+
+    session_dir = get_hermes_home() / "whatsapp" / "session"
+    resolved: Set[str] = set()
+    queue = [normalized]
+
+    while queue:
+        current = queue.pop(0)
+        if not current or current in resolved:
+            continue
+        # Defense-in-depth: reject identifiers that could sneak path
+        # separators / traversal segments into the ``lid-mapping-{current}``
+        # filename below. The hardcoded ``lid-mapping-`` prefix already
+        # prevents escape via pathlib's component split (an attacker can't
+        # create ``lid-mapping-..`` as a real directory in session_dir), but
+        # this keeps the identifier space to the characters WhatsApp JIDs
+        # actually use and avoids depending on that filesystem-layout
+        # invariant.
+        if not _SAFE_IDENTIFIER_RE.match(current):
+            continue
+
+        resolved.add(current)
+        for suffix in ("", "_reverse"):
+            mapping_path = session_dir / f"lid-mapping-{current}{suffix}.json"
+            if not mapping_path.exists():
+                continue
+            try:
+                mapped = normalize_whatsapp_identifier(
+                    json.loads(mapping_path.read_text(encoding="utf-8"))
+                )
+            except (OSError, json.JSONDecodeError) as exc:
+                logger.debug("whatsapp_identity: failed to read %s: %s", mapping_path, exc)
+                continue
+            if mapped and mapped not in resolved:
+                queue.append(mapped)
+
+    return resolved
+
+
+def canonical_whatsapp_identifier(identifier: str) -> str:
+    """Return a stable WhatsApp sender identity across phone-JID/LID variants.
+
+    WhatsApp may surface the same person under either a phone-format JID
+    (``60123456789@s.whatsapp.net``) or a LID (``1234567890@lid``). This
+    applies to a DM ``chat_id`` *and* to the ``participant_id`` of a
+    member inside a group chat — both represent a user identity, and the
+    bridge may flip between the two for the same human.
+
+    This helper reads the bridge's ``whatsapp/session/lid-mapping-*.json``
+    files, walks the mapping transitively, and picks the shortest
+    (numeric-preferred) alias as the canonical identity.
+    :func:`gateway.session.build_session_key` uses this for both WhatsApp
+    DM chat_ids and WhatsApp group participant_ids, so callers get the
+    same session-key identity Hermes itself uses.
+
+    Plugins that need per-sender behaviour (role-based routing,
+    authorisation, per-contact policy) should use this so their
+    bookkeeping lines up with Hermes' session bookkeeping even when
+    the bridge reshuffles aliases.
+
+    Returns an empty string if ``identifier`` normalizes to empty. If no
+    mapping files exist yet (fresh bridge install), returns the
+    normalized input unchanged.
+    """
+    normalized = normalize_whatsapp_identifier(identifier)
+    if not normalized:
+        return ""
+
+    # expand_whatsapp_aliases always includes `normalized` itself in the
+    # returned set, so the min() below degrades gracefully to `normalized`
+    # when no lid-mapping files are present.
+    aliases = expand_whatsapp_aliases(normalized)
+    return min(aliases, key=lambda candidate: (len(candidate), candidate))
@@ -11,5 +11,5 @@ Provides subcommands for:
 - hermes cron          - Manage cron jobs
 """

-__version__ = "0.10.0"
-__release_date__ = "2026.4.16"
+__version__ = "0.11.0"
+__release_date__ = "2026.4.23"
@@ -110,18 +110,40 @@ def _display_source(source: str) -> str:
    return source.split(":", 1)[1] if source.startswith("manual:") else source


+def _classify_exhausted_status(entry) -> tuple[str, bool]:
+    code = getattr(entry, "last_error_code", None)
+    reason = str(getattr(entry, "last_error_reason", "") or "").strip().lower()
+    message = str(getattr(entry, "last_error_message", "") or "").strip().lower()
+
+    if code == 429 or any(token in reason for token in ("rate_limit", "usage_limit", "quota", "exhausted")) or any(
+        token in message for token in ("rate limit", "usage limit", "quota", "too many requests")
+    ):
+        return "rate-limited", True
+
+    if code in {401, 403} or any(token in reason for token in ("invalid_token", "invalid_grant", "unauthorized", "forbidden", "auth")) or any(
+        token in message for token in ("unauthorized", "forbidden", "expired", "revoked", "invalid token", "authentication")
+    ):
+        return "auth failed", False
+
+    return "exhausted", True
+
+
+
 def _format_exhausted_status(entry) -> str:
    if entry.last_status != STATUS_EXHAUSTED:
        return ""
+    label, show_retry_window = _classify_exhausted_status(entry)
    reason = getattr(entry, "last_error_reason", None)
    reason_text = f" {reason}" if isinstance(reason, str) and reason.strip() else ""
    code = f" ({entry.last_error_code})" if entry.last_error_code else ""
+    if not show_retry_window:
+        return f" {label}{reason_text}{code} (re-auth may be required)"
    exhausted_until = _exhausted_until(entry)
    if exhausted_until is None:
-        return f" exhausted{reason_text}{code}"
+        return f" {label}{reason_text}{code}"
    remaining = max(0, int(math.ceil(exhausted_until - time.time())))
    if remaining <= 0:
-        return f" exhausted{reason_text}{code} (ready to retry)"
+        return f" {label}{reason_text}{code} (ready to retry)"
    minutes, seconds = divmod(remaining, 60)
    hours, minutes = divmod(minutes, 60)
    days, hours = divmod(hours, 24)
@@ -133,7 +155,7 @@ def _format_exhausted_status(entry) -> str:
        wait = f"{minutes}m {seconds}s"
    else:
        wait = f"{seconds}s"
-    return f" exhausted{reason_text}{code} ({wait} left)"
+    return f" {label}{reason_text}{code} ({wait} left)"


 def auth_add_command(args) -> None:
@@ -386,6 +408,44 @@ def auth_reset_command(args) -> None:
    print(f"Reset status on {count} {provider} credentials")


+def auth_status_command(args) -> None:
+    provider = _normalize_provider(getattr(args, "provider", "") or "")
+    if not provider:
+        raise SystemExit("Provider is required. Example: `hermes auth status spotify`.")
+    status = auth_mod.get_auth_status(provider)
+    if not status.get("logged_in"):
+        reason = status.get("error")
+        if reason:
+            print(f"{provider}: logged out ({reason})")
+        else:
+            print(f"{provider}: logged out")
+        return
+
+    print(f"{provider}: logged in")
+    for key in ("auth_type", "client_id", "redirect_uri", "scope", "expires_at", "api_base_url"):
+        value = status.get(key)
+        if value:
+            print(f"  {key}: {value}")
+
+
+def auth_logout_command(args) -> None:
+    auth_mod.logout_command(SimpleNamespace(provider=getattr(args, "provider", None)))
+
+
+def auth_spotify_command(args) -> None:
+    action = str(getattr(args, "spotify_action", "") or "login").strip().lower()
+    if action in {"", "login"}:
+        auth_mod.login_spotify_command(args)
+        return
+    if action == "status":
+        auth_status_command(SimpleNamespace(provider="spotify"))
+        return
+    if action == "logout":
+        auth_logout_command(SimpleNamespace(provider="spotify"))
+        return
+    raise SystemExit(f"Unknown Spotify auth action: {action}")
+
+
 def _interactive_auth() -> None:
    """Interactive credential pool management when `hermes auth` is called bare."""
    # Show current pool status first
@@ -583,5 +643,14 @@ def auth_command(args) -> None:
    if action == "reset":
        auth_reset_command(args)
        return
+    if action == "status":
+        auth_status_command(args)
+        return
+    if action == "logout":
+        auth_logout_command(args)
+        return
+    if action == "spotify":
+        auth_spotify_command(args)
+        return
    # No subcommand — launch interactive mode
    _interactive_auth()
@@ -0,0 +1,300 @@
+"""Azure Foundry endpoint auto-detection.
+
+Inspect an Azure AI Foundry / Azure OpenAI endpoint to determine:
+  - API transport (OpenAI-style ``chat_completions`` vs
+    Anthropic-style ``anthropic_messages``)
+  - Available models (best effort — Azure does not expose a deployment
+    listing via the inference API key, but Azure OpenAI v1 endpoints
+    return the resource's model catalog via ``GET /models``)
+  - Context length for each discovered/entered model, via the existing
+    :func:`agent.model_metadata.get_model_context_length` resolver.
+
+Rationale:
+
+Azure has no pure-API-key deployment-listing endpoint — per Microsoft,
+deployment enumeration requires ARM management-plane auth.  Azure
+OpenAI v1 endpoints ``{resource}.openai.azure.com/openai/v1`` do return
+a ``/models`` list, but it reflects the resource's *available* models
+rather than the user's *deployed* deployment names.  In practice it is
+still a useful hint — the user picks a familiar model name and we look
+up its context length from the catalog.
+
+The detector never crashes on errors (every HTTP call is wrapped in a
+broad try/except).  Callers get a :class:`DetectionResult` with whatever
+information could be gathered, and fall back to manual entry for the
+rest.
+"""
+
+from __future__ import annotations
+
+import json
+import logging
+import re
+from dataclasses import dataclass, field
+from typing import Optional
+from urllib import request as urllib_request
+from urllib.error import HTTPError, URLError
+from urllib.parse import urlparse
+
+logger = logging.getLogger(__name__)
+
+
+# Default Azure OpenAI ``api-version`` to probe with.  The v1 GA endpoint
+# accepts requests without ``api-version`` entirely, so this is only used
+# as a fallback for pre-v1 resources that still require it.
+_AZURE_OPENAI_PROBE_API_VERSIONS = (
+    "2025-04-01-preview",
+    "2024-10-21",  # oldest GA that supports /models
+)
+
+# Default Azure Anthropic ``api-version``.  Matches the value used by
+# ``agent/anthropic_adapter.py`` when building the Anthropic client.
+_AZURE_ANTHROPIC_API_VERSION = "2025-04-15"
+
+
+@dataclass
+class DetectionResult:
+    """Everything auto-detection could gather from a base URL + API key."""
+
+    #: Detected API transport: ``"chat_completions"``,
+    #: ``"anthropic_messages"``, or ``None`` when detection failed.
+    api_mode: Optional[str] = None
+
+    #: Deployment / model IDs returned by ``/models`` (best effort).
+    #: Empty when the endpoint doesn't expose the list with an API key.
+    models: list[str] = field(default_factory=list)
+
+    #: Lowercased host from the base URL (used for display messages).
+    hostname: str = ""
+
+    #: Human-readable reason the detector chose ``api_mode``.  Useful
+    #: for explaining auto-detection to the user in the wizard.
+    reason: str = ""
+
+    #: ``True`` when ``/models`` returned a valid OpenAI-shaped payload.
+    models_probe_ok: bool = False
+
+    #: ``True`` when the URL was determined to be an Anthropic-style
+    #: endpoint (from path suffix or live probe).
+    is_anthropic: bool = False
+
+
+def _http_get_json(url: str, api_key: str, timeout: float = 6.0) -> tuple[int, Optional[dict]]:
+    """GET a URL with ``api-key`` + ``Authorization`` headers.  Return
+    ``(status_code, parsed_json_or_None)``.  Never raises."""
+    req = urllib_request.Request(url, method="GET")
+    # Azure OpenAI uses ``api-key``.  Some Azure deployments (and
+    # Anthropic-style routes) use ``Authorization: Bearer``.  Send both
+    # so we probe once per URL rather than twice.
+    req.add_header("api-key", api_key)
+    req.add_header("Authorization", f"Bearer {api_key}")
+    req.add_header("User-Agent", "hermes-agent/azure-detect")
+    try:
+        with urllib_request.urlopen(req, timeout=timeout) as resp:
+            body = resp.read()
+            try:
+                return resp.status, json.loads(body.decode("utf-8", errors="replace"))
+            except Exception:
+                return resp.status, None
+    except HTTPError as exc:
+        return exc.code, None
+    except (URLError, TimeoutError, OSError) as exc:
+        logger.debug("azure_detect: GET %s failed: %s", url, exc)
+        return 0, None
+    except Exception as exc:  # pragma: no cover — defensive
+        logger.debug("azure_detect: GET %s unexpected error: %s", url, exc)
+        return 0, None
+
+
+def _strip_trailing_v1(url: str) -> str:
+    """Strip trailing ``/v1`` or ``/v1/`` so we can construct sub-paths."""
+    return re.sub(r"/v1/?$", "", url.rstrip("/"))
+
+
+def _looks_like_anthropic_path(url: str) -> bool:
+    """Return True when the URL's path ends in ``/anthropic`` or
+    contains a ``/anthropic/`` segment.  Used by Azure Foundry
+    resources that route Claude traffic through a dedicated path."""
+    try:
+        parsed = urlparse(url)
+        path = (parsed.path or "").lower().rstrip("/")
+        return path.endswith("/anthropic") or "/anthropic/" in path + "/"
+    except Exception:
+        return False
+
+
+def _extract_model_ids(payload: dict) -> list[str]:
+    """Extract a list of model IDs from an OpenAI-shaped ``/models``
+    response.  Returns ``[]`` on any shape mismatch."""
+    data = payload.get("data") if isinstance(payload, dict) else None
+    if not isinstance(data, list):
+        return []
+    ids: list[str] = []
+    for item in data:
+        if not isinstance(item, dict):
+            continue
+        # OpenAI shape: {"id": "gpt-5.4", "object": "model", ...}
+        mid = item.get("id") or item.get("model") or item.get("name")
+        if isinstance(mid, str) and mid:
+            ids.append(mid)
+    return ids
+
+
+def _probe_openai_models(base_url: str, api_key: str) -> tuple[bool, list[str]]:
+    """Probe ``<base>/models`` for an OpenAI-shaped response.
+
+    Returns ``(ok, models)``.  ``ok`` is True iff the endpoint accepted
+    us as an OpenAI-style caller (200 OK + OpenAI-shaped JSON body).
+    """
+    base_url = base_url.rstrip("/")
+
+    # Azure OpenAI v1: {resource}.openai.azure.com/openai/v1 — no
+    # api-version required for GA paths, so probe without first.
+    candidates = [f"{base_url}/models"]
+    # Fallback: explicit api-version for pre-v1 resources
+    for v in _AZURE_OPENAI_PROBE_API_VERSIONS:
+        candidates.append(f"{base_url}/models?api-version={v}")
+
+    for url in candidates:
+        status, body = _http_get_json(url, api_key)
+        if status == 200 and body is not None:
+            ids = _extract_model_ids(body)
+            if ids:
+                logger.info(
+                    "azure_detect: /models probe OK at %s (%d models)",
+                    url, len(ids),
+                )
+                return True, ids
+            # 200 + empty list still counts as "OpenAI shape, no models
+            # listed" — let the user proceed with manual entry.
+            if isinstance(body, dict) and "data" in body:
+                return True, []
+    return False, []
+
+
+def _probe_anthropic_messages(base_url: str, api_key: str) -> bool:
+    """Send a zero-token request to ``<base>/v1/messages`` and check
+    whether the endpoint at least *recognises* the Anthropic Messages
+    shape (any 4xx that mentions ``messages`` or ``model``, or a 400
+    ``invalid_request`` with an Anthropic error shape).  Never completes
+    a real chat.
+    """
+    base = _strip_trailing_v1(base_url)
+    url = f"{base}/v1/messages?api-version={_AZURE_ANTHROPIC_API_VERSION}"
+    payload = json.dumps({
+        "model": "probe",
+        "max_tokens": 1,
+        "messages": [{"role": "user", "content": "ping"}],
+    }).encode("utf-8")
+    req = urllib_request.Request(url, method="POST", data=payload)
+    req.add_header("api-key", api_key)
+    req.add_header("Authorization", f"Bearer {api_key}")
+    req.add_header("anthropic-version", "2023-06-01")
+    req.add_header("content-type", "application/json")
+    req.add_header("User-Agent", "hermes-agent/azure-detect")
+    try:
+        with urllib_request.urlopen(req, timeout=6.0) as resp:
+            # Should never 200 — "probe" isn't a real deployment.  But
+            # if it does, the endpoint definitely speaks Anthropic.
+            return resp.status < 500
+    except HTTPError as exc:
+        # 4xx with an Anthropic-shaped error body = Anthropic endpoint.
+        try:
+            body = exc.read().decode("utf-8", errors="replace")
+            lowered = body.lower()
+            if "anthropic" in lowered or '"type"' in lowered and '"error"' in lowered:
+                return True
+            # Pre-Azure-v1 Azure Foundry returns a plain 404 for
+            # Anthropic-style calls on non-Anthropic deployments.  A
+            # 400 "model not found" IS Anthropic though.
+            if exc.code == 400 and ("messages" in lowered or "model" in lowered):
+                return True
+            return False
+        except Exception:
+            return False
+    except (URLError, TimeoutError, OSError):
+        return False
+    except Exception:  # pragma: no cover
+        return False
+
+
+def detect(base_url: str, api_key: str) -> DetectionResult:
+    """Inspect an Azure endpoint and describe its transport + models.
+
+    Call this from the wizard before asking the user to pick an API
+    mode manually.  The caller should treat the returned
+    :class:`DetectionResult` as *advisory* — if ``api_mode`` is None,
+    fall back to asking the user.
+    """
+    result = DetectionResult()
+
+    try:
+        parsed = urlparse(base_url)
+        result.hostname = (parsed.hostname or "").lower()
+    except Exception:
+        result.hostname = ""
+
+    # 1. Path sniff.  Azure Foundry exposes Anthropic-style deployments
+    #    under a dedicated ``/anthropic`` path.
+    if _looks_like_anthropic_path(base_url):
+        result.is_anthropic = True
+        result.api_mode = "anthropic_messages"
+        result.reason = "URL path ends in /anthropic → Anthropic Messages API"
+        return result
+
+    # 2. Try the OpenAI-style /models probe.  If this works, the
+    #    endpoint definitely speaks OpenAI wire.
+    ok, models = _probe_openai_models(base_url, api_key)
+    if ok:
+        result.models_probe_ok = True
+        result.models = models
+        result.api_mode = "chat_completions"
+        result.reason = (
+            f"GET /models returned {len(models)} model(s) — OpenAI-style endpoint"
+            if models
+            else "GET /models returned an OpenAI-shaped empty list — OpenAI-style endpoint"
+        )
+        return result
+
+    # 3. Fallback: probe the Anthropic Messages shape.  Slower and more
+    #    intrusive than /models, so only run it when the OpenAI probe
+    #    failed.
+    if _probe_anthropic_messages(base_url, api_key):
+        result.is_anthropic = True
+        result.api_mode = "anthropic_messages"
+        result.reason = "Endpoint accepts Anthropic Messages shape"
+        return result
+
+    # Nothing matched.  Caller falls back to manual selection.
+    result.reason = (
+        "Could not probe endpoint (private network, missing model list, or "
+        "non-standard path) — falling back to manual API-mode selection"
+    )
+    return result
+
+
+def lookup_context_length(model: str, base_url: str, api_key: str) -> Optional[int]:
+    """Thin wrapper around :func:`agent.model_metadata.get_model_context_length`
+    that returns ``None`` when only the fallback default (128k) would
+    fire, so the wizard can distinguish "we actually know this" from
+    "we guessed."""
+    try:
+        from agent.model_metadata import (
+            DEFAULT_FALLBACK_CONTEXT,
+            get_model_context_length,
+        )
+    except Exception:
+        return None
+
+    try:
+        n = get_model_context_length(model, base_url=base_url, api_key=api_key)
+    except Exception as exc:
+        logger.debug("azure_detect: context length lookup failed: %s", exc)
+        return None
+
+    if isinstance(n, int) and n > 0 and n != DEFAULT_FALLBACK_CONTEXT:
+        return n
+    return None
+
+
+__all__ = ["DetectionResult", "detect", "lookup_context_length"]
@@ -36,12 +36,23 @@ _EXCLUDED_DIRS = {
    "__pycache__",      # bytecode caches — regenerated on import
    ".git",             # nested git dirs (profiles shouldn't have these, but safety)
    "node_modules",     # js deps if website/ somehow leaks in
+    "backups",          # prior auto-backups — don't nest backups exponentially
+    "checkpoints",      # session-local trajectory caches — regenerated per-session,
+                        # session-hash-keyed so they don't port to another machine anyway
 }

 # File-name suffixes to skip
 _EXCLUDED_SUFFIXES = (
    ".pyc",
    ".pyo",
+    # SQLite sidecar files — the backup takes a consistent snapshot of ``*.db``
+    # via ``sqlite3.backup()``, so shipping the live WAL / shared-memory /
+    # rollback-journal alongside would pair a fresh snapshot with stale sidecar
+    # state and produce a torn restore on the next open. They're transient and
+    # regenerated on first connection anyway.
+    ".db-wal",
+    ".db-shm",
+    ".db-journal",
 )

 # File names to skip (runtime state that's meaningless on another machine)
@@ -454,6 +465,12 @@ def run_import(args) -> None:
 # Critical state files to include in quick snapshots (relative to HERMES_HOME).
 # Everything else is either regeneratable (logs, cache) or managed separately
 # (skills, repo, sessions/).
+#
+# Entries may be individual files OR directories.  Directories are captured
+# recursively; missing entries are silently skipped.  Pairing data lives in
+# platform-specific JSON blobs outside state.db, so it's listed here explicitly
+# — `hermes update` snapshots this set before pulling so approved-user lists
+# are recoverable if anything goes wrong (issue #15733).
 _QUICK_STATE_FILES = (
    "state.db",
    "config.yaml",
@@ -463,6 +480,10 @@ _QUICK_STATE_FILES = (
    "gateway_state.json",
    "channel_directory.json",
    "processes.json",
+    # Pairing stores (generic + per-platform JSONs outside state.db)
+    "pairing",                          # legacy location (gateway/pairing.py)
+    "platforms/pairing",                # new location (gateway/pairing.py)
+    "feishu_comment_pairing.json",      # Feishu comment subscription pairings
 )

 _QUICK_SNAPSHOTS_DIR = "state-snapshots"
@@ -498,7 +519,27 @@ def create_quick_snapshot(

    for rel in _QUICK_STATE_FILES:
        src = home / rel
-        if not src.exists() or not src.is_file():
+        if not src.exists():
+            continue
+
+        if src.is_dir():
+            # Walk the directory and record each file individually in the
+            # manifest so restore can treat them uniformly.  Empty dirs are
+            # skipped (nothing to snapshot).
+            for sub in src.rglob("*"):
+                if not sub.is_file():
+                    continue
+                sub_rel = sub.relative_to(home).as_posix()
+                dst = snap_dir / sub_rel
+                dst.parent.mkdir(parents=True, exist_ok=True)
+                try:
+                    shutil.copy2(sub, dst)
+                    manifest[sub_rel] = dst.stat().st_size
+                except (OSError, PermissionError) as exc:
+                    logger.warning("Could not snapshot %s: %s", sub_rel, exc)
+            continue
+
+        if not src.is_file():
            continue

        dst = snap_dir / rel
@@ -653,3 +694,233 @@ def run_quick_backup(args) -> None:
        print(f"  Restore with: /snapshot restore {snap_id}")
    else:
        print("No state files found to snapshot.")
+
+
+# ---------------------------------------------------------------------------
+# Shared full-zip backup helper
+# ---------------------------------------------------------------------------
+
+def _write_full_zip_backup(out_path: Path, hermes_root: Path) -> Optional[Path]:
+    """Write a full zip snapshot of ``hermes_root`` to ``out_path``.
+
+    Uses the same exclusion rules and SQLite safe-copy as :func:`run_backup`.
+    Returns the output path on success, None on failure (nothing to back up,
+    or write error — caller should surface the outcome but not raise).
+    """
+    files_to_add: list[tuple[Path, Path]] = []
+    try:
+        for dirpath, dirnames, filenames in os.walk(hermes_root, followlinks=False):
+            dp = Path(dirpath)
+            # Prune excluded directories in-place so os.walk doesn't descend
+            dirnames[:] = [d for d in dirnames if d not in _EXCLUDED_DIRS]
+
+            for fname in filenames:
+                fpath = dp / fname
+                try:
+                    rel = fpath.relative_to(hermes_root)
+                except ValueError:
+                    continue
+
+                if _should_exclude(rel):
+                    continue
+
+                # Skip the output zip itself if it already exists inside root.
+                try:
+                    if fpath.resolve() == out_path.resolve():
+                        continue
+                except (OSError, ValueError):
+                    pass
+
+                files_to_add.append((fpath, rel))
+    except OSError as exc:
+        logger.warning("Full-zip backup: walk failed: %s", exc)
+        return None
+
+    if not files_to_add:
+        return None
+
+    try:
+        with zipfile.ZipFile(out_path, "w", zipfile.ZIP_DEFLATED, compresslevel=6) as zf:
+            for abs_path, rel_path in files_to_add:
+                try:
+                    if abs_path.suffix == ".db":
+                        with tempfile.NamedTemporaryFile(suffix=".db", delete=False) as tmp:
+                            tmp_db = Path(tmp.name)
+                        try:
+                            if _safe_copy_db(abs_path, tmp_db):
+                                zf.write(tmp_db, arcname=str(rel_path))
+                        finally:
+                            tmp_db.unlink(missing_ok=True)
+                    else:
+                        zf.write(abs_path, arcname=str(rel_path))
+                except (PermissionError, OSError, ValueError) as exc:
+                    logger.debug("Skipping %s in zip backup: %s", rel_path, exc)
+                    continue
+    except OSError as exc:
+        logger.warning("Full-zip backup: zip write failed: %s", exc)
+        # Best-effort cleanup of partial file
+        try:
+            out_path.unlink(missing_ok=True)
+        except OSError:
+            pass
+        return None
+
+    return out_path
+
+
+# ---------------------------------------------------------------------------
+# Pre-update auto-backup
+# ---------------------------------------------------------------------------
+
+_PRE_UPDATE_BACKUPS_DIR = "backups"
+_PRE_UPDATE_PREFIX = "pre-update-"
+_PRE_UPDATE_DEFAULT_KEEP = 5
+
+
+def _pre_update_backup_dir(hermes_home: Optional[Path] = None) -> Path:
+    home = hermes_home or get_hermes_home()
+    return home / _PRE_UPDATE_BACKUPS_DIR
+
+
+def _prune_pre_update_backups(backup_dir: Path, keep: int) -> int:
+    """Remove oldest pre-update backups beyond the keep limit.
+
+    Returns the number of files deleted.  Only touches files matching
+    ``pre-update-*.zip`` so hand-made zips dropped in the same directory
+    are never touched.
+    """
+    if keep < 0:
+        keep = 0
+    if not backup_dir.exists():
+        return 0
+
+    backups = sorted(
+        (p for p in backup_dir.iterdir()
+         if p.is_file() and p.name.startswith(_PRE_UPDATE_PREFIX) and p.suffix.lower() == ".zip"),
+        key=lambda p: p.name,
+        reverse=True,
+    )
+
+    deleted = 0
+    for p in backups[keep:]:
+        try:
+            p.unlink()
+            deleted += 1
+        except OSError as exc:
+            logger.warning("Failed to prune backup %s: %s", p.name, exc)
+
+    return deleted
+
+
+def create_pre_update_backup(
+    hermes_home: Optional[Path] = None,
+    keep: int = _PRE_UPDATE_DEFAULT_KEEP,
+) -> Optional[Path]:
+    """Create a full zip backup of HERMES_HOME under ``backups/``.
+
+    Mirrors :func:`run_backup` (same exclusion rules, same SQLite safe-copy)
+    but writes to ``<HERMES_HOME>/backups/pre-update-<timestamp>.zip`` and
+    auto-prunes old pre-update backups.
+
+    Returns the path to the created zip, or ``None`` if no files were
+    found or the backup could not be created.  Never raises — the caller
+    (``hermes update``) should continue even if the backup fails.
+    """
+    hermes_root = hermes_home or get_default_hermes_root()
+    if not hermes_root.is_dir():
+        return None
+
+    backup_dir = _pre_update_backup_dir(hermes_root)
+    try:
+        backup_dir.mkdir(parents=True, exist_ok=True)
+    except OSError as exc:
+        logger.warning("Could not create pre-update backup dir %s: %s", backup_dir, exc)
+        return None
+
+    stamp = datetime.now().strftime("%Y-%m-%d-%H%M%S")
+    out_path = backup_dir / f"{_PRE_UPDATE_PREFIX}{stamp}.zip"
+
+    result = _write_full_zip_backup(out_path, hermes_root)
+    if result is None:
+        return None
+
+    _prune_pre_update_backups(backup_dir, keep=keep)
+    return out_path
+
+
+# ---------------------------------------------------------------------------
+# Pre-migration auto-backup (used by `hermes claw migrate`)
+# ---------------------------------------------------------------------------
+
+_PRE_MIGRATION_PREFIX = "pre-migration-"
+_PRE_MIGRATION_DEFAULT_KEEP = 5
+
+
+def _prune_pre_migration_backups(backup_dir: Path, keep: int) -> int:
+    """Remove oldest pre-migration backups beyond the keep limit.
+
+    Only touches files matching ``pre-migration-*.zip`` so other backups in
+    the same directory are never touched.
+    """
+    if keep < 0:
+        keep = 0
+    if not backup_dir.exists():
+        return 0
+
+    backups = sorted(
+        (p for p in backup_dir.iterdir()
+         if p.is_file() and p.name.startswith(_PRE_MIGRATION_PREFIX) and p.suffix.lower() == ".zip"),
+        key=lambda p: p.name,
+        reverse=True,
+    )
+
+    deleted = 0
+    for p in backups[keep:]:
+        try:
+            p.unlink()
+            deleted += 1
+        except OSError as exc:
+            logger.warning("Failed to prune pre-migration backup %s: %s", p.name, exc)
+
+    return deleted
+
+
+def create_pre_migration_backup(
+    hermes_home: Optional[Path] = None,
+    keep: int = _PRE_MIGRATION_DEFAULT_KEEP,
+) -> Optional[Path]:
+    """Create a full zip backup of HERMES_HOME under ``backups/`` before a
+    ``hermes claw migrate`` apply.
+
+    Shares implementation with :func:`create_pre_update_backup` via
+    ``_write_full_zip_backup`` — same exclusions, same SQLite safe-copy,
+    restorable with ``hermes import <archive>``.  Writes to
+    ``<HERMES_HOME>/backups/pre-migration-<timestamp>.zip`` and auto-prunes
+    old pre-migration backups.
+
+    Returns the path to the created zip, or ``None`` if nothing was found
+    to back up (fresh install) or the write failed.  Never raises — the
+    caller decides whether to abort or proceed.
+    """
+    hermes_root = hermes_home or get_default_hermes_root()
+    if not hermes_root.is_dir():
+        return None
+
+    # Reuses the shared backups/ directory so `hermes import` and the
+    # update-backup listing pick up pre-migration archives too.
+    backup_dir = _pre_update_backup_dir(hermes_root)
+    try:
+        backup_dir.mkdir(parents=True, exist_ok=True)
+    except OSError as exc:
+        logger.warning("Could not create pre-migration backup dir %s: %s", backup_dir, exc)
+        return None
+
+    stamp = datetime.now().strftime("%Y-%m-%d-%H%M%S")
+    out_path = backup_dir / f"{_PRE_MIGRATION_PREFIX}{stamp}.zip"
+
+    result = _write_full_zip_backup(out_path, hermes_root)
+    if result is None:
+        return None
+
+    _prune_pre_migration_backups(backup_dir, keep=keep)
+    return out_path
@@ -238,6 +238,52 @@ def get_git_banner_state(repo_dir: Optional[Path] = None) -> Optional[dict]:
    return {"upstream": upstream, "local": local, "ahead": max(ahead, 0)}


+_RELEASE_URL_BASE = "https://github.com/NousResearch/hermes-agent/releases/tag"
+_latest_release_cache: Optional[tuple] = None  # (tag, url) once resolved
+
+
+def get_latest_release_tag(repo_dir: Optional[Path] = None) -> Optional[tuple]:
+    """Return ``(tag, release_url)`` for the latest git tag, or None.
+
+    Local-only — runs ``git describe --tags --abbrev=0`` against the
+    Hermes checkout. Cached per-process. Release URL always points at the
+    canonical NousResearch/hermes-agent repo (forks don't get a link).
+    """
+    global _latest_release_cache
+    if _latest_release_cache is not None:
+        return _latest_release_cache or None
+
+    repo_dir = repo_dir or _resolve_repo_dir()
+    if repo_dir is None:
+        _latest_release_cache = ()  # falsy sentinel — skip future lookups
+        return None
+
+    try:
+        result = subprocess.run(
+            ["git", "describe", "--tags", "--abbrev=0"],
+            capture_output=True,
+            text=True,
+            timeout=3,
+            cwd=str(repo_dir),
+        )
+    except Exception:
+        _latest_release_cache = ()
+        return None
+
+    if result.returncode != 0:
+        _latest_release_cache = ()
+        return None
+
+    tag = (result.stdout or "").strip()
+    if not tag:
+        _latest_release_cache = ()
+        return None
+
+    url = f"{_RELEASE_URL_BASE}/{tag}"
+    _latest_release_cache = (tag, url)
+    return _latest_release_cache
+
+
 def format_banner_version_label() -> str:
    """Return the version label shown in the startup banner title."""
    base = f"Hermes Agent v{VERSION} ({RELEASE_DATE})"
@@ -516,12 +562,18 @@ def build_welcome_banner(console: Console, model: str, cwd: str,
    right_content = "\n".join(right_lines)
    layout_table.add_row(left_content, right_content)

-    agent_name = _skin_branding("agent_name", "Hermes Agent")
    title_color = _skin_color("banner_title", "#FFD700")
    border_color = _skin_color("banner_border", "#CD7F32")
+    version_label = format_banner_version_label()
+    release_info = get_latest_release_tag()
+    if release_info:
+        _tag, _url = release_info
+        title_markup = f"[bold {title_color}][link={_url}]{version_label}[/link][/]"
+    else:
+        title_markup = f"[bold {title_color}]{version_label}[/]"
    outer_panel = Panel(
        layout_table,
-        title=f"[bold {title_color}]{format_banner_version_label()}[/]",
+        title=title_markup,
        border_style=border_color,
        padding=(0, 2),
    )
--- a/Show More
+++ b/Show More